Example #1
0
    def add_element(self, element):
        """Add an element to a work specification if possible.

        Adding an element to a WorkSpec must preserve the validity of the workspec, which involves several checks.
        We do not yet check for element uniqueness beyond a string name.

        If an element is added that was previously in another WorkSpec, it must first be removed from the
        other WorkSpec.
        """
        if hasattr(element, "namespace") and hasattr(
                element, "operation") and hasattr(element, "serialize"):
            if not hasattr(element, "name") or element.name is None or len(
                    str(element.name)) < 1:
                raise exceptions.UsageError(
                    "Only named elements may be added to a WorkSpec.")
            if element.name in self.elements:
                raise exceptions.UsageError(
                    "Elements in WorkSpec must be uniquely identifiable.")
            if hasattr(element, "depends"):
                for dependency in element.depends:
                    if not dependency in self.elements:
                        raise exceptions.UsageError(
                            "Element dependencies must already be specified before an Element may be added."
                        )
            # Okay, it looks like we have an element we can add
            if hasattr(
                    element, "workspec"
            ) and element.workspec is not None and element.workspec is not self:
                raise exceptions.Error(
                    "Element must be removed from its current WorkSpec to be added to this WorkSpec, but element removal is not yet implemented."
                )
            self.elements[element.name] = element.serialize()
            element.workspec = self
        else:
            raise exceptions.ValueError(
                "Provided object does not appear to be compatible with gmx.workflow.WorkElement."
            )
        logger.info("Added element {} to workspec.".format(element.name))
Example #2
0
def from_tpr(input=None, **kwargs):
    """Create a WorkSpec from a (list of) tpr file(s).

    Generates a work specification based on the provided simulation input and returns a handle to the
    MD simulation element of the workflow. Key word arguments can override simulation behavior from
    ``input``.

    If the MD operation discovers artifacts from a previous simulation that was launched from the same input,
    the simulation resumes from the last checkpointed step. If ``append_output`` is set ``False``, existing
    artifacts are kept separate from new output with the standard file naming convention,
    and new output begins from the last checkpointed step, if any.

    Setting ``end_time`` redefines the end point of the simulation trajectory from what was provided in
    ``input``. It is equivalent to changing the number of steps requested in the MDP (or TPR) input, but
    it time is provided as picoseconds instead of a number of time steps.

    .. deprecated:: 0.0.7
        If ``steps=N`` is provided and N is an integer
        greater than or equal to 1, the MD operation advances the trajectory by ``N`` steps, regardless of the number
        of simulation steps specified in ``input`` or ``end_time``. For convenience, setting ``steps=None`` does not override
        ``input``.
        Note that when it is not ``None``, ``steps`` takes precedence over ``end_time`` and ``input``, but can still be
        superceded by a signal, such as if an MD plugin or other code has a simulation completion condition that occurs
        before ``N`` additional steps have run.

    Where key word arguments correspond to ``gmx mdrun`` command line options, the corresponding flags are noted below.

    Keyword Arguments:
        input (str): *Required* string or list of strings giving the filename(s) of simulation input
        append_output (bool): Append output for continuous trajectories if True, truncate existing output data if False. (default True)
        end_time (float): Specify the final time in the simulation trajectory, overriding input read from TPR.
        grid (tuple): Domain decomposition grid divisions (nx, ny, nz). (-dd)
        max_hours (float): Terminate after 0.99 times this many hours if simulation is still running. (-maxh)
        pme_ranks (int): number of separate ranks to be used for PME electrostatics. (-npme)
        pme_threads_per_rank (int): Number of OpenMP threads per PME rank. (-ntomp_pme)
        steps (int): Override input files and run for this many steps. (-nsteps; deprecated)
        threads (int): Total number of threads to start. (-nt)
        threads_per_rank (int): number of OpenMP threads to start per MPI rank. (-ntomp)
        tmpi (int): number of thread-MPI ranks to start. (-ntmpi)

    Returns:
        simulation member of a gmx.workflow.WorkSpec object

    Produces a WorkSpec with the following data::

        version: gmxapi_workspec_0_1
        elements:
            tpr_input:
                namespace: gromacs
                operation: load_tpr
                params: {'input': ['tpr_filename1', 'tpr_filename2', ...]}
            md_sim:
                namespace: gmxapi
                operation: md
                depends: ['tpr_input']
                params: {'kw1': arg1, 'kw2': arg2, ...}

    Bugs: version 0.0.6
        * There is not a way to programatically check the current step number on disk.
          See https://github.com/kassonlab/gmxapi/issues/56 and https://github.com/kassonlab/gmxapi/issues/85
    """
    import os

    usage = "argument to from_tpr() should be a valid filename or list of filenames, followed by optional key word arguments."

    # Normalize to tuple input type.
    if isinstance(input, list) or isinstance(input, tuple):
        tpr_list = tuple([to_string(element) for element in input])
    else:
        try:
            tpr_list = (to_string(input), )
        except:
            raise exceptions.UsageError(usage)

    # Check for valid filenames
    for arg in tpr_list:
        if not (os.path.exists(arg) and os.path.isfile(arg)):
            arg_path = os.path.abspath(arg)
            raise exceptions.UsageError(usage + " Got {}".format(arg_path))

    # Note: These are runner parameters, not MD parameters, and should be in the call to gmx.run() instead of here.
    # Reference https://github.com/kassonlab/gmxapi/issues/95
    params = {}
    for arg_key in kwargs:
        if arg_key == 'grid' or arg_key == 'dd':
            params['grid'] = tuple(kwargs[arg_key])
        elif arg_key == 'pme_ranks' or arg_key == 'npme':
            params['pme_ranks'] = int(kwargs[arg_key])
        elif arg_key == 'threads' or arg_key == 'nt':
            params['threads'] = int(kwargs[arg_key])
        elif arg_key == 'tmpi' or arg_key == 'ntmpi':
            params['tmpi'] = int(kwargs[arg_key])
        elif arg_key == 'threads_per_rank' or arg_key == 'ntomp':
            params['threads_per_rank'] = int(kwargs[arg_key])
        elif arg_key == 'pme_threads_per_rank' or arg_key == 'ntomp_pme':
            params['pme_threads_per_rank'] = int(kwargs[arg_key])
        elif arg_key == 'steps' or arg_key == 'nsteps':
            if kwargs[arg_key] is None:
                # None means "don't override the input" which is indicated by a parameter value of -2 in GROMACS 2019
                steps = -2
            else:
                # Otherwise we require steps to be a positive integer
                try:
                    steps = int(kwargs[arg_key])
                    if steps < 1:
                        raise exceptions.ValueError(
                            'steps to run must be at least 1')
                except (TypeError, ValueError) as e:
                    # steps is not an integer.
                    raise exceptions.TypeError(
                        '"steps" could not be interpreted as an integer.')
                # The "nsteps" command line flag will be removed in GROMACS 2020
                # and so "steps" is deprecated in gmxapi 0.0.7
                warnings.warn(
                    "`steps` keyword argument is deprecated. Consider `end_time` instead.",
                    DeprecationWarning)
            params['steps'] = steps
        elif arg_key == 'max_hours' or arg_key == 'maxh':
            params['max_hours'] = float(kwargs[arg_key])
        elif arg_key == 'append_output':
            # Try not to encourage confusion with the `mdrun` `-noappend` flag, which would be a confusing double negative if represented as a bool.
            params['append_output'] = bool(kwargs[arg_key])
        elif arg_key == 'end_time':
            params[arg_key] = float(kwargs[arg_key])
        else:
            raise exceptions.UsageError(
                "Invalid key word argument: {}. {}".format(arg_key, usage))

    # Create an empty WorkSpec
    workspec = WorkSpec()

    # Create and add the Element for the tpr file(s)
    inputelement = WorkElement(namespace='gromacs',
                               operation='load_tpr',
                               params={'input': tpr_list})
    inputelement.name = 'tpr_input'
    if inputelement.name not in workspec.elements:
        # Operations such as this need to be replaced with accessors or properties that can check the validity of the WorkSpec
        workspec.elements[inputelement.name] = inputelement.serialize()
        inputelement.workspec = workspec

    # Create and add the simulation element
    # We can add smarter handling of the `depends` argument, but it is only critical to check when adding the element
    # to a WorkSpec.
    mdelement = WorkElement(operation='md',
                            depends=[inputelement.name],
                            params=params)
    mdelement.name = 'md_sim'
    # Check that the element has not already been added, but that its dependency has.
    workspec.add_element(mdelement)

    return mdelement
Example #3
0
    def _chase_deps(self, source_set, name_list):
        """Helper to recursively generate dependencies before dependents.

        Given a set of WorkElement objects and a list of element names, generate WorkElements for
        the members of name_list plus their dependencies in an order such that dependencies are
        guaranteed to occur before their dependent elements.

        For example, to sequence an entire work specification into a reasonable order for instantiation, use

            >>> workspec._chase_deps(set(workspec.elements.keys()), list(workspec.elements.keys()))

        Note: as a member function of WorkSpec, we have access to the full WorkSpec elements data at all
        times, giving us extra flexibility in implementation and arguments.

        Args:
            sources: a (super)set of element names from the current work spec (will be consumed)
            name_list: subset of *sources* to be sequenced

        Returns:
            Sequence of WorkElement objects drawn from the names in *source_set*

        Requires that WorkElements named in *name_list* and any elements on which
        they depend are all named in *source_list* and available in the current
        work spec.

        Note: *source_set* is a reference to an object that is modified arbitrarily.
        The caller should not re-use the object after calling _chase_deps().

        TODO: Separate out DAG topology operations from here and Context.__enter__()
        Our needs are simple enough that we probably don't need an external dependency
        like networkx...
        """
        # Recursively (depth-first) generate a topologically valid serialized DAG from source_set.
        assert isinstance(source_set, set)
        # Warning: This is not at all a rigorous check.
        # It is hard to check whether this is string-like or list-like in both Py 2.7 and 3.x
        if not isinstance(name_list, (list, tuple, set)):
            raise exceptions.ValueError(
                'Must disambiguate "name_list" by passing a list or tuple.')
        # Make a copy of name_list in case the input reference is being used elsewhere during
        # iteration, such as for source_set, which is modified during the loop.
        for name in tuple(name_list):
            if name in source_set:
                source_set.remove(name)
                element = WorkElement.deserialize(self.elements[name],
                                                  name=name,
                                                  workspec=self)
                dependencies = element.depends
                # items in element.depends are either element names or ensembles of element names.
                for item in dependencies:
                    if isinstance(item, (list, tuple, set)):
                        dependency_list = item
                    else:
                        if not isinstance(item, str):
                            raise exceptions.ValueError(
                                'Dependencies should be a string or sequence of strings. Got {}'
                                .format(type(item)))
                        dependency_list = [item]
                    for dependency in dependency_list:
                        for recursive_dep in self._chase_deps(
                                source_set, (dependency, )):
                            yield recursive_dep
                yield element
            else:
                # Note: The user is responsible for ensuring that source_set is complete.
                # Otherwise, we would need to maintain a list of elements previously yielded.
                pass