Example #1
0
    def __init__(self,
                 namespace="gmxapi",
                 operation=None,
                 params=None,
                 depends=()):
        self.namespace = to_string(namespace)
        # We can add an operations submodule to validate these. E.g. self.operation = gmx.workflow.operations.normalize(operation)
        if operation is not None:
            self.operation = to_string(operation)
        else:
            raise exceptions.UsageError("Invalid argument type for operation.")

        # Note: Nothing currently prevents attribute updates by assignment after adding the element to a workspec,
        # but this protocol will be clarified with https://github.com/kassonlab/gmxapi/issues/92
        if params is None:
            self.params = GmxMap()
        elif isinstance(params, dict):
            self.params = GmxMap(
                {to_string(name): params[name]
                 for name in params})
        else:
            raise exceptions.UsageError(
                "If provided, params must be a dictionary of keyword arguments"
            )
        self.depends = []
        for d in depends:
            if isinstance(d, (list, tuple)):
                self.depends.append([str(name) for name in d])
            else:
                self.depends.append(str(d))

        # The Python class for work elements keeps a strong reference to a WorkSpec object containing its description
        self._name = None
        self._workspec = None
Example #2
0
    def test_portability(self):
        """Serialize and deserialize."""
        namespace = "gromacs"
        depends = ()
        workspec = None
        name = "spam"
        operation = "load_tpr"
        params = {'input': ["filename1", "filename2"]}
        element = gmx.workflow.WorkElement(namespace=namespace,
                                           operation=operation,
                                           params=params)

        serialization = element.serialize()
        assert "namespace" in json.loads(to_string(serialization))

        # Two elements with the same name cannot exist in the same workspec, but this is not the case here.
        element = gmx.workflow.WorkElement.deserialize(serialization)
        assert element.name == None
        element = gmx.workflow.WorkElement.deserialize(serialization,
                                                       name=name)
        assert element.name == name

        assert element.workspec == workspec
        assert element.namespace == namespace
        assert element.operation == operation
        for a, b in zip(params['input'], element.params['input']):
            assert a == b
        for a, b in zip(depends, element.depends):
            assert a == b
Example #3
0
    def deserialize(cls, input, name=None, workspec=None):
        """Create a new WorkElement object from a serialized representation.

        Arguments:
            input: a serialized WorkElement
            name: new element name (optional) (deprecated)
            workspec: an existing workspec to attach this element to (optional)

        When subclasses become distinct, this factory function will need to do additional dispatching to create an object of the correct type.
        Alternatively, instead of subclassing, a slightly heavier single class may suffice, or more flexible duck typing might be better.
        """
        import json
        input_string = to_string(input)
        args = json.loads(input_string)
        element = cls(namespace=args['namespace'],
                      operation=args['operation'],
                      params=args['params'],
                      depends=args['depends'])
        if name is not None:
            element.name = name
            # This conditional is nested because we can only add named elements to a WorkSpec.
            if workspec is not None:
                element.workspec = workspec
                if element.name not in workspec.elements:
                    workspec.add_element(element)
        return element
Example #4
0
    def serialize(self):
        """Serialize the work specification in a form suitable to pass to any Context implementation.

        Serialization is performed with the JSON data serialization module.

        To simplify unique identification of work specifications, this function will also impose rules for reproducibility.

        1. All key-value maps are sorted alphanumerically by their string keys.
        2. Strings must consist of valid ASCII characters.
        3. Output is a byte sequence of the utf-8 encoded densely formatted JSON document.

        Returns:
            ``unicode`` object in Python 2, ``bytes`` object in Python 3

        Output of serialize() should be explicitly converted to a string before passing to a JSON deserializer.

            >>> my_object = my_workspec.serialize()
            >>> my_data_structure = json.loads(my_object.decode('utf-8'))
            >>> # or...
            >>> my_data_structure = json.loads(my_object, encoding='utf-8')

        """
        import json
        # Build the normalized dictionary
        dict_representation = {'version': self.version, 'elements': {}}
        for name, element in [(e, json.loads(to_string(self.elements[e])))
                              for e in sorted(self.elements.keys())]:
            dict_representation['elements'][str(name)] = element
        serialization = json.dumps(dict_representation,
                                   ensure_ascii=True,
                                   sort_keys=True,
                                   separators=(',', ':'))
        return serialization.encode('utf-8')
Example #5
0
    def __hash__(self):
        """Uniquely identify this work specification.

        Allows the spec to be used as a dictionary key in Python. Note that this hash is possibly dependent on the Python
        implementation. It is not part of the gmxapi specification and should not be used outside of a single invocation
        of a script.
        """
        # Hash the serialized elements, concatenated as a single string. Note that the order of elements and their
        # contents is not guaranteed, but should be consistent within a script invocation.
        return hash(to_string(self.serialize()))
Example #6
0
    def __str__(self):
        """Generate string representation for str() or print().

        The string output should look like the abstract schema for gmxapi_workspec_1_0, but the exact
        format is unspecified and may change in future versions.

        For consistent JSON output, use WorkSpec.serialize().
        """
        import json
        string = to_string(self.serialize())
        data = json.loads(string)
        reserialized = json.dumps(data, indent=4, sort_keys=True)
        return str(reserialized)
Example #7
0
 def deserialize(serialized):
     import json
     workspec = WorkSpec()
     dict_representation = json.loads(to_string(serialized))
     ver_in = dict_representation['version']
     ver_out = workspec.version
     if ver_in != ver_out:
         message = "Expected work spec version {}. Got work spec version {}.".format(
             ver_out, ver_in)
         raise exceptions.CompatibilityError(message)
     for element in dict_representation['elements']:
         workspec.elements[element] = dict_representation['elements'][
             element]
     return workspec
Example #8
0
def from_tpr(input=None, **kwargs):
    """Create a WorkSpec from a (list of) tpr file(s).

    Generates a work specification based on the provided simulation input and returns a handle to the
    MD simulation element of the workflow. Key word arguments can override simulation behavior from
    ``input``.

    If the MD operation discovers artifacts from a previous simulation that was launched from the same input,
    the simulation resumes from the last checkpointed step. If ``append_output`` is set ``False``, existing
    artifacts are kept separate from new output with the standard file naming convention,
    and new output begins from the last checkpointed step, if any.

    Setting ``end_time`` redefines the end point of the simulation trajectory from what was provided in
    ``input``. It is equivalent to changing the number of steps requested in the MDP (or TPR) input, but
    it time is provided as picoseconds instead of a number of time steps.

    .. deprecated:: 0.0.7
        If ``steps=N`` is provided and N is an integer
        greater than or equal to 1, the MD operation advances the trajectory by ``N`` steps, regardless of the number
        of simulation steps specified in ``input`` or ``end_time``. For convenience, setting ``steps=None`` does not override
        ``input``.
        Note that when it is not ``None``, ``steps`` takes precedence over ``end_time`` and ``input``, but can still be
        superceded by a signal, such as if an MD plugin or other code has a simulation completion condition that occurs
        before ``N`` additional steps have run.

    Where key word arguments correspond to ``gmx mdrun`` command line options, the corresponding flags are noted below.

    Keyword Arguments:
        input (str): *Required* string or list of strings giving the filename(s) of simulation input
        append_output (bool): Append output for continuous trajectories if True, truncate existing output data if False. (default True)
        end_time (float): Specify the final time in the simulation trajectory, overriding input read from TPR.
        grid (tuple): Domain decomposition grid divisions (nx, ny, nz). (-dd)
        max_hours (float): Terminate after 0.99 times this many hours if simulation is still running. (-maxh)
        pme_ranks (int): number of separate ranks to be used for PME electrostatics. (-npme)
        pme_threads_per_rank (int): Number of OpenMP threads per PME rank. (-ntomp_pme)
        steps (int): Override input files and run for this many steps. (-nsteps; deprecated)
        threads (int): Total number of threads to start. (-nt)
        threads_per_rank (int): number of OpenMP threads to start per MPI rank. (-ntomp)
        tmpi (int): number of thread-MPI ranks to start. (-ntmpi)

    Returns:
        simulation member of a gmx.workflow.WorkSpec object

    Produces a WorkSpec with the following data::

        version: gmxapi_workspec_0_1
        elements:
            tpr_input:
                namespace: gromacs
                operation: load_tpr
                params: {'input': ['tpr_filename1', 'tpr_filename2', ...]}
            md_sim:
                namespace: gmxapi
                operation: md
                depends: ['tpr_input']
                params: {'kw1': arg1, 'kw2': arg2, ...}

    Bugs: version 0.0.6
        * There is not a way to programatically check the current step number on disk.
          See https://github.com/kassonlab/gmxapi/issues/56 and https://github.com/kassonlab/gmxapi/issues/85
    """
    import os

    usage = "argument to from_tpr() should be a valid filename or list of filenames, followed by optional key word arguments."

    # Normalize to tuple input type.
    if isinstance(input, list) or isinstance(input, tuple):
        tpr_list = tuple([to_string(element) for element in input])
    else:
        try:
            tpr_list = (to_string(input), )
        except:
            raise exceptions.UsageError(usage)

    # Check for valid filenames
    for arg in tpr_list:
        if not (os.path.exists(arg) and os.path.isfile(arg)):
            arg_path = os.path.abspath(arg)
            raise exceptions.UsageError(usage + " Got {}".format(arg_path))

    # Note: These are runner parameters, not MD parameters, and should be in the call to gmx.run() instead of here.
    # Reference https://github.com/kassonlab/gmxapi/issues/95
    params = {}
    for arg_key in kwargs:
        if arg_key == 'grid' or arg_key == 'dd':
            params['grid'] = tuple(kwargs[arg_key])
        elif arg_key == 'pme_ranks' or arg_key == 'npme':
            params['pme_ranks'] = int(kwargs[arg_key])
        elif arg_key == 'threads' or arg_key == 'nt':
            params['threads'] = int(kwargs[arg_key])
        elif arg_key == 'tmpi' or arg_key == 'ntmpi':
            params['tmpi'] = int(kwargs[arg_key])
        elif arg_key == 'threads_per_rank' or arg_key == 'ntomp':
            params['threads_per_rank'] = int(kwargs[arg_key])
        elif arg_key == 'pme_threads_per_rank' or arg_key == 'ntomp_pme':
            params['pme_threads_per_rank'] = int(kwargs[arg_key])
        elif arg_key == 'steps' or arg_key == 'nsteps':
            if kwargs[arg_key] is None:
                # None means "don't override the input" which is indicated by a parameter value of -2 in GROMACS 2019
                steps = -2
            else:
                # Otherwise we require steps to be a positive integer
                try:
                    steps = int(kwargs[arg_key])
                    if steps < 1:
                        raise exceptions.ValueError(
                            'steps to run must be at least 1')
                except (TypeError, ValueError) as e:
                    # steps is not an integer.
                    raise exceptions.TypeError(
                        '"steps" could not be interpreted as an integer.')
                # The "nsteps" command line flag will be removed in GROMACS 2020
                # and so "steps" is deprecated in gmxapi 0.0.7
                warnings.warn(
                    "`steps` keyword argument is deprecated. Consider `end_time` instead.",
                    DeprecationWarning)
            params['steps'] = steps
        elif arg_key == 'max_hours' or arg_key == 'maxh':
            params['max_hours'] = float(kwargs[arg_key])
        elif arg_key == 'append_output':
            # Try not to encourage confusion with the `mdrun` `-noappend` flag, which would be a confusing double negative if represented as a bool.
            params['append_output'] = bool(kwargs[arg_key])
        elif arg_key == 'end_time':
            params[arg_key] = float(kwargs[arg_key])
        else:
            raise exceptions.UsageError(
                "Invalid key word argument: {}. {}".format(arg_key, usage))

    # Create an empty WorkSpec
    workspec = WorkSpec()

    # Create and add the Element for the tpr file(s)
    inputelement = WorkElement(namespace='gromacs',
                               operation='load_tpr',
                               params={'input': tpr_list})
    inputelement.name = 'tpr_input'
    if inputelement.name not in workspec.elements:
        # Operations such as this need to be replaced with accessors or properties that can check the validity of the WorkSpec
        workspec.elements[inputelement.name] = inputelement.serialize()
        inputelement.workspec = workspec

    # Create and add the simulation element
    # We can add smarter handling of the `depends` argument, but it is only critical to check when adding the element
    # to a WorkSpec.
    mdelement = WorkElement(operation='md',
                            depends=[inputelement.name],
                            params=params)
    mdelement.name = 'md_sim'
    # Check that the element has not already been added, but that its dependency has.
    workspec.add_element(mdelement)

    return mdelement
Example #9
0
 def name(self, new_name):
     self._name = str(to_string(new_name))