Ejemplo n.º 1
0
    def __init__(self,
                 namespace="gmxapi",
                 operation=None,
                 params=None,
                 depends=()):
        self._namespace = str(to_string(namespace))
        # We can add an operations submodule to validate these. E.g. self.operation = gmx.workflow.operations.normalize(operation)
        if operation is not None:
            self._operation = str(to_string(operation))
        else:
            raise exceptions.UsageError("Invalid argument type for operation.")

        # Note: Nothing currently prevents attribute updates by assignment after adding the element to a workspec,
        # but this protocol will be clarified with https://github.com/kassonlab/gmxapi/issues/92
        if params is None:
            self.params = GmxMap()
        elif isinstance(params, dict):
            self.params = GmxMap(
                {to_string(name): params[name]
                 for name in params})
        else:
            raise exceptions.UsageError(
                "If provided, params must be a dictionary of keyword arguments"
            )
        self.depends = []
        for d in depends:
            if isinstance(d, (list, tuple)):
                self.depends.append([str(name) for name in d])
            else:
                self.depends.append(str(d))

        # The Python class for work elements keeps a strong reference to a WorkSpec object containing its description
        self._name = None
        self._workspec = None
Ejemplo n.º 2
0
    def __init__(self, filename: str = None, mode: str = 'r'):
        """Open a TPR file.

        File access mode is indicated by 'r' for read-only access.

        Args:
            filename (str): Path to a run input file (e.g. 'myfile.tpr')
            mode (str): File access mode.

        Note:
            Currently, TPR files are read-only from the Python interface.

        Example:

            >>> import gmxapi as gmx
            >>> filehandle = gmx.TprFile(filename, 'r')

        """
        if filename is None:
            raise exceptions.UsageError("TprFile objects must be associated with a file.")
        if mode != 'r':
            raise exceptions.UsageError("TPR files only support read-only access.")
        self.mode = mode
        self.filename = filename
        self._tprFileHandle = None
Ejemplo n.º 3
0
def read_tpr(tprfile: typing.Union[str, TprFile]):
    """
    Get a simulation input object from a TPR run input file.

    Arguments:
        tprfile : TPR input object or filename

    Returns:
         simulation input object

    The returned object may be inspected by the user. Simulation input parameters
    may be extracted through the `parameters` attribute.

    Example:
        >>> sim_input = gmx.fileio.read_tpr(tprfile=tprfilename)
        >>> params = sim_input.parameters.extract()
        >>> print(params['init-step'])
        0

    Supports the `read_tpr` gmxapi work graph operation. (not yet implemented)
    """
    if not isinstance(tprfile, TprFile):
        try:
            tprfile = TprFile(os.fsencode(tprfile), mode='r')
        except Exception as e:
            raise exceptions.UsageError("TPR object or file name is required.") from e

    return _SimulationInput(tprfile)
Ejemplo n.º 4
0
    def add_element(self, element):
        """Add an element to a work specification if possible.

        Adding an element to a WorkSpec must preserve the validity of the workspec, which involves several checks.
        We do not yet check for element uniqueness beyond a string name.

        If an element is added that was previously in another WorkSpec, it must first be removed from the
        other WorkSpec.
        """
        if hasattr(element, "namespace") and hasattr(
                element, "operation") and hasattr(element, "serialize"):
            if not hasattr(element, "name") or element.name is None or len(
                    str(element.name)) < 1:
                raise exceptions.UsageError(
                    "Only named elements may be added to a WorkSpec.")
            if element.name in self.elements:
                raise exceptions.UsageError(
                    "Elements in WorkSpec must be uniquely identifiable.")
            if hasattr(element, "depends"):
                for dependency in element.depends:
                    if not dependency in self.elements:
                        raise exceptions.UsageError(
                            "Element dependencies must already be specified before an Element may be added."
                        )
            # Okay, it looks like we have an element we can add
            if hasattr(
                    element, "workspec"
            ) and element.workspec is not None and element.workspec is not self:
                raise exceptions.Error(
                    "Element must be removed from its current WorkSpec to be added to this WorkSpec, but element "
                    "removal is not yet implemented.")
            self.elements[element.name] = element.serialize()
            element.workspec = self
        else:
            raise exceptions.ValueError(
                "Provided object does not appear to be compatible with gmx.workflow.WorkElement."
            )
        logger.info("Added element {} to workspec.".format(element.name))
Ejemplo n.º 5
0
def scoped_communicator(original_comm, requested_size: int = None):
    from gmxapi.simulation.context import _acquire_communicator, _get_ensemble_communicator

    if requested_size is None:
        communicator = _acquire_communicator(communicator=original_comm)

    else:
        if original_comm is None or not hasattr(original_comm, 'Get_size'):
            raise exceptions.UsageError('A valid communicator must be provided when requesting a specific size.')
        original_comm_size = original_comm.Get_size()
        if original_comm_size < requested_size:
            raise exceptions.FeatureNotAvailableError(
                'Cannot produce a subcommunicator of size {} from a communicator of size {}.'.format(
                    requested_size,
                    original_comm_size
                ))
        assert original_comm_size >= requested_size
        communicator = _get_ensemble_communicator(original_comm, requested_size)

    try:
        yield communicator
    finally:
        communicator.Free()
Ejemplo n.º 6
0
def cli(command: NDArray,
        shell: bool,
        output: OutputCollectionDescription,
        stdin: str = ''):
    """Execute a command line program in a subprocess.

    Configure an executable in a subprocess. Executes when run in an execution
    Context, as part of a work graph or via gmx.run(). Runs in the current
    working directory.

    Shell processing is not enabled, but can be considered for a future version.
    This means that shell expansions such as environment variables, globbing (`*`),
    and other special symbols (like `~` for home directory) are not available.
    This allows a simpler and more robust implementation, as well as a better
    ability to uniquely identify the effects of a command line operation. If you
    think this disallows important use cases, please let us know.

    Arguments:
         command: a tuple (or list) to be the subprocess arguments, including `executable`
         output: mapping of command line flags to output filename arguments
         shell: unused (provides forward-compatibility)
         stdin (str): String input to send to STDIN (terminal input) of the executable.

    Multi-line text sent to *stdin* should be joined into a single string
    (e.g. ``'\n'.join(list_of_strings) + '\n'``).
    If multiple strings are provided to *stdin*, gmxapi will assume an ensemble,
    and will run one operation for each provided string.

    Only string input (:py:func:str) to *stdin* is currently supported.
    If you have a use case that requires streaming input or binary input,
    please open an issue or contact the author(s).

    Arguments are iteratively added to the command line with standard Python
    iteration, so you should use a tuple or list even if you have only one parameter.
    I.e. If you provide a string with `arguments="asdf"` then it will be passed as
    `... "a" "s" "d" "f"`. To pass a single string argument, `arguments=("asdf")`
    or `arguments=["asdf"]`.

    `input` and `output` should be a dictionary with string keys, where the keys
    name command line "flags" or options.

    Example:
        Execute a command named `exe` that takes a flagged option for file name
        (stored in a local Python variable `my_filename`) and an `origin` flag
        that uses the next three arguments to define a vector.

            >>> my_filename = "somefilename"
            >>> result = cli(('exe', '--origin', 1.0, 2.0, 3.0, '-f', my_filename), shell=False)
            >>> assert hasattr(result, 'file')
            >>> assert hasattr(result, 'erroroutput')
            >>> assert hasattr(result, 'returncode')

    Returns:
        A data structure with attributes for each of the results `file`, `erroroutput`, and `returncode`

    Result object attributes:
        * `file`: the mapping of CLI flags to filename strings resulting from the `output` kwarg
        * `erroroutput`: A string of error output (if any) if the process failed.
        * `returncode`: return code of the subprocess.

    """
    # Note: we could make provisions for stdio filehandles in a future version. E.g.
    # * STDOUT is available if a consuming operation is bound to `output.stdout`.
    # * STDERR is available if a consuming operation is bound to `output.stderr`.
    # * Otherwise, STDOUT and/or STDERR is(are) closed when command is called.

    # In the operation implementation, we expect the `shell` parameter to be intercepted by the
    # wrapper and set to False.
    if shell:
        raise exceptions.UsageError(
            "Operation does not support shell processing.")

    if stdin == '':
        stdin = None

    if isinstance(command, (str, bytes)):
        command = [command]
    command = list([arg for arg in command])

    executable = shutil.which(command[0])
    if executable is None:
        raise exceptions.ValueError(
            '"{}" is not found or not executable.'.format(command[0]))
    command[0] = executable

    # TODO: (FR9) Can OS input/output filehandles be a responsibility of
    #  the code providing 'resources'?

    erroroutput = ''
    logger.debug('executing subprocess')
    try:
        completed_process = subprocess.run(command,
                                           shell=shell,
                                           input=stdin,
                                           check=True,
                                           stdout=subprocess.PIPE,
                                           stderr=subprocess.STDOUT,
                                           encoding='utf-8',
                                           universal_newlines=True)
        returncode = completed_process.returncode
        # TODO: Resource management code should manage a safe data object for `output`.
        for line in completed_process.stdout.split('\n'):
            logger.debug(line)
    except subprocess.CalledProcessError as e:
        logger.info(
            "commandline operation had non-zero return status when calling {}".
            format(e.cmd))
        erroroutput = e.output
        returncode = e.returncode
    # Publish outputs.
    output.erroroutput = erroroutput
    output.returncode = returncode
Ejemplo n.º 7
0
def cli(command: NDArray, shell: bool, output: OutputCollectionDescription):
    """Execute a command line program in a subprocess.

    Configure an executable in a subprocess. Executes when run in an execution
    Context, as part of a work graph or via gmx.run(). Runs in the current
    working directory.

    Shell processing is not enabled, but can be considered for a future version.
    This means that shell expansions such as environment variables, globbing (`*`),
    and other special symbols (like `~` for home directory) are not available.
    This allows a simpler and more robust implementation, as well as a better
    ability to uniquely identify the effects of a command line operation. If you
    think this disallows important use cases, please let us know.

    Arguments:
         command : a tuple (or list) to be the subprocess arguments, including `executable`
         output : mapping of command line flags to output filename arguments
         shell : unused (provides forward-compatibility)

    Arguments are iteratively added to the command line with standard Python
    iteration, so you should use a tuple or list even if you have only one parameter.
    I.e. If you provide a string with `arguments="asdf"` then it will be passed as
    `... "a" "s" "d" "f"`. To pass a single string argument, `arguments=("asdf")`
    or `arguments=["asdf"]`.

    `input` and `output` should be a dictionary with string keys, where the keys
    name command line "flags" or options.

    Example:
        Execute a command named `exe` that takes a flagged option for file name
        (stored in a local Python variable `my_filename`) and an `origin` flag
        that uses the next three arguments to define a vector.

            >>> my_filename = "somefilename"
            >>> result = cli(('exe', '--origin', 1.0, 2.0, 3.0, '-f', my_filename), shell=False)
            >>> assert hasattr(result, 'file')
            >>> assert hasattr(result, 'erroroutput')
            >>> assert hasattr(result, 'returncode')

    Returns:
        A data structure with attributes for each of the results `file`, `erroroutput`, and `returncode`

    Result object attributes:
        * `file`: the mapping of CLI flags to filename strings resulting from the `output` kwarg
        * `erroroutput`: A string of error output (if any) if the process failed.
        * `returncode`: return code of the subprocess.

    """
    # Note: we could make provisions for stdio filehandles in a future version. E.g.
    # * STDOUT is available if a consuming operation is bound to `output.stdout`.
    # * STDERR is available if a consuming operation is bound to `output.stderr`.
    # * Otherwise, STDOUT and/or STDERR is(are) closed when command is called.
    #
    # Warning:
    #     Commands relying on STDIN cannot be used and is closed when command is called.

    # In the operation implementation, we expect the `shell` parameter to be intercepted by the
    # wrapper and set to False.
    if shell:
        raise exceptions.UsageError("Operation does not support shell processing.")

    if isinstance(command, (str, bytes)):
        command = [command]
    command = list([arg for arg in command])
    try:
        command[0] = shutil.which(command[0])
    except Exception:
        raise exceptions.ValueError('command argument could not be resolved to an executable file path.')

    # TODO: (FR9) Can OS input/output filehandles be a responsibility of
    #  the code providing 'resources'?

    erroroutput = ''
    logger.debug('executing subprocess')
    try:
        # TODO: If Python >=3.5 is required, switch to subprocess.run()
        command_output = subprocess.check_output(command,
                                                 shell=shell,
                                                 stdin=subprocess.DEVNULL,
                                                 stderr=subprocess.STDOUT,
                                                 )
        returncode = 0
        # TODO: Resource management code should manage a safe data object for `output`.
        # WARNING: We have no reason to assume the output is utf-8 encoded text!!!
        for line in command_output.decode('utf-8').split('\n'):
            logger.debug(line)
    except subprocess.CalledProcessError as e:
        logger.info("commandline operation had non-zero return status when calling {}".format(e.cmd))
        erroroutput = e.output.decode('utf-8')
        returncode = e.returncode
    # resources.output.erroroutput.publish(erroroutput)
    # resources.output.returncode.publish(returncode)
    # `publish` is descriptive, but redundant. Access to the output data handler is
    # assumed to coincide with publishing, and we assume data is published when the
    # handler is released. A class with a single `publish` method is overly complex
    # since we can just use the assignment operator.
    output.erroroutput = erroroutput
    output.returncode = returncode
Ejemplo n.º 8
0
def from_tpr(input=None, **kwargs):
    """Create a WorkSpec from a (list of) tpr file(s).

    Generates a work specification based on the provided simulation input and returns a handle to the
    MD simulation element of the workflow. Key word arguments can override simulation behavior from
    ``input``.

    If the MD operation discovers artifacts from a previous simulation that was launched from the same input,
    the simulation resumes from the last checkpointed step. If ``append_output`` is set ``False``, existing
    artifacts are kept separate from new output with the standard file naming convention,
    and new output begins from the last checkpointed step, if any.

    Setting ``end_time`` redefines the end point of the simulation trajectory from what was provided in
    ``input``. It is equivalent to changing the number of steps requested in the MDP (or TPR) input, but
    the time is provided as picoseconds instead of a number of time steps.

    .. deprecated:: 0.0.7
        If ``steps=N`` is provided and N is an integer
        greater than or equal to 1, the MD operation advances the trajectory by ``N`` steps, regardless of the number
        of simulation steps specified in ``input`` or ``end_time``. For convenience, setting ``steps=None`` does not override
        ``input``.
        Note that when it is not ``None``, ``steps`` takes precedence over ``end_time`` and ``input``, but can still be
        superceded by a signal, such as if an MD plugin or other code has a simulation completion condition that occurs
        before ``N`` additional steps have run.

    Where key word arguments correspond to ``gmx mdrun`` command line options, the corresponding flags are noted below.

    Keyword Arguments:
        input (str): *Required* string or list of strings giving the filename(s) of simulation input
        append_output (bool): Append output for continuous trajectories if True, truncate existing output data if False. (default True)
        end_time (float): Specify the final time in the simulation trajectory, overriding input read from TPR.
        grid (tuple): Domain decomposition grid divisions (nx, ny, nz). (-dd)
        max_hours (float): Terminate after 0.99 times this many hours if simulation is still running. (-maxh)
        pme_ranks (int): number of separate ranks to be used for PME electrostatics. (-npme)
        threads_per_pme_rank (int): Number of OpenMP threads per PME rank. (-ntomp_pme)
        steps (int): Override input files and run for this many steps. (-nsteps; deprecated)
        threads (int): Total number of threads to start. (-nt)
        threads_per_rank (int): number of OpenMP threads to start per MPI rank. (-ntomp)
        tmpi (int): number of thread-MPI ranks to start. (-ntmpi)

    ..  versionchanged:: 0.1
        *pme_threads_per_rank* renamed to *threads_per_pme_rank*.

    Returns:
        simulation member of a gmx.workflow.WorkSpec object

    Produces a WorkSpec with the following data::

        version: gmxapi_workspec_0_1
        elements:
            tpr_input:
                namespace: gromacs
                operation: load_tpr
                params: {'input': ['tpr_filename1', 'tpr_filename2', ...]}
            md_sim:
                namespace: gmxapi
                operation: md
                depends: ['tpr_input']
                params: {'kw1': arg1, 'kw2': arg2, ...}

    Bugs: version 0.0.6
        * There is not a way to programatically check the current step number on disk.
          See https://github.com/kassonlab/gmxapi/issues/56 and https://github.com/kassonlab/gmxapi/issues/85
    """
    import os

    usage = "argument to from_tpr() should be a valid filename or list of filenames, followed by optional key word arguments."

    # Normalize to tuple input type.
    if isinstance(input, list) or isinstance(input, tuple):
        tpr_list = tuple([to_string(element) for element in input])
    else:
        try:
            tpr_list = (to_string(input), )
        except:
            raise exceptions.UsageError(usage)

    # Check for valid filenames
    for arg in tpr_list:
        if not (os.path.exists(arg) and os.path.isfile(arg)):
            arg_path = os.path.abspath(arg)
            raise exceptions.UsageError(usage + " Got {}".format(arg_path))

    # Note: These are runner parameters, not MD parameters, and should be in the call to gmx.run() instead of here.
    # Reference https://github.com/kassonlab/gmxapi/issues/95
    params = {}
    for arg_key in kwargs:
        if arg_key == 'grid' or arg_key == 'dd':
            params['grid'] = tuple(kwargs[arg_key])
        elif arg_key == 'pme_ranks' or arg_key == 'npme':
            params['pme_ranks'] = int(kwargs[arg_key])
        elif arg_key == 'threads' or arg_key == 'nt':
            params['threads'] = int(kwargs[arg_key])
        elif arg_key == 'tmpi' or arg_key == 'ntmpi':
            params['tmpi'] = int(kwargs[arg_key])
        elif arg_key == 'threads_per_rank' or arg_key == 'ntomp':
            params['threads_per_rank'] = int(kwargs[arg_key])
        elif arg_key == 'pme_threads_per_rank' or arg_key == 'threads_per_pme_rank' or arg_key == 'ntomp_pme':
            # TODO: Remove this temporary accommodation.
            assert not gmx.version.api_is_at_least(0, 2)
            if arg_key == 'pme_threads_per_rank':
                warnings.warn(
                    "Key word pme_threads_per_rank has been renamed to threads_per_pme_rank.",
                    DeprecationWarning)
            params['threads_per_pme_rank'] = int(kwargs[arg_key])
        elif arg_key == 'steps' or arg_key == 'nsteps':
            if kwargs[arg_key] is None:
                # None means "don't override the input" which is indicated by a parameter value of -2 in GROMACS 2019
                steps = -2
            else:
                # Otherwise we require steps to be a positive integer
                try:
                    steps = int(kwargs[arg_key])
                    if steps < 1:
                        raise exceptions.ValueError(
                            'steps to run must be at least 1')
                except (TypeError, ValueError) as e:
                    # steps is not an integer.
                    raise exceptions.TypeError(
                        '"steps" could not be interpreted as an integer.')
                # The "nsteps" command line flag will be removed in GROMACS 2020
                # and so "steps" is deprecated in gmxapi 0.0.7
                warnings.warn(
                    "`steps` keyword argument is deprecated. Consider `end_time` instead.",
                    DeprecationWarning)
            params['steps'] = steps
        elif arg_key == 'max_hours' or arg_key == 'maxh':
            params['max_hours'] = float(kwargs[arg_key])
        elif arg_key == 'append_output':
            # Try not to encourage confusion with the `mdrun` `-noappend` flag, which would be a confusing double negative if represented as a bool.
            params['append_output'] = bool(kwargs[arg_key])
        elif arg_key == 'end_time':
            params[arg_key] = float(kwargs[arg_key])
        else:
            raise exceptions.UsageError(
                "Invalid key word argument: {}. {}".format(arg_key, usage))

    # Create an empty WorkSpec
    workspec = WorkSpec()

    # Create and add the Element for the tpr file(s)
    inputelement = WorkElement(namespace='gromacs',
                               operation='load_tpr',
                               params={'input': tpr_list})
    inputelement.name = 'tpr_input'
    if inputelement.name not in workspec.elements:
        # Operations such as this need to be replaced with accessors or properties that can check the validity of the WorkSpec
        workspec.elements[inputelement.name] = inputelement.serialize()
        inputelement.workspec = workspec

    # Create and add the simulation element
    # We can add smarter handling of the `depends` argument, but it is only critical to check when adding the element
    # to a WorkSpec.
    mdelement = WorkElement(operation='md',
                            depends=[inputelement.name],
                            params=params)
    mdelement.name = 'md_sim'
    # Check that the element has not already been added, but that its dependency has.
    workspec.add_element(mdelement)

    return mdelement
Ejemplo n.º 9
0
                def __init__(self, **kwargs):
                    """Initialization defines the unique input requirements of a work graph node.

                    Initialization parameters map to the parameters of the wrapped function with
                    addition(s) to support gmxapi data flow and deferred execution.

                    If provided, an ``input`` keyword argument is interpreted as a parameter pack
                    of base input. Inputs also present as standalone keyword arguments override
                    values in ``input``.

                    Inputs that are handles to gmxapi operations or outputs induce data flow
                    dependencies that the framework promises to satisfy before the Operation
                    executes and produces output.
                    """
                    #
                    # Define the unique identity and data flow constraints of this work graph node.
                    #
                    # TODO: (FR4) generalize
                    input_dependencies = []

                    # TODO: Make allowed input strongly specified in the Operation definition.
                    # TODO: Resolve execution dependencies at run() and make non-data
                    #  execution `dependencies` just another input that takes the default
                    #  output of an operation and doesn't do anything with it.

                    # If present, kwargs['input'] is treated as an input "pack" providing _default_ values.
                    input_kwargs = {}
                    if 'input' in kwargs:
                        provided_input = kwargs.pop('input')
                        if provided_input is not None:
                            # Try to determine what 'input' is.
                            # TODO: (FR5+) handling should be related to Context.
                            #  The process of accepting input arguments includes resolving placement in
                            #  a work graph and resolving the Context responsibilities for graph nodes.
                            if hasattr(provided_input, 'run'):
                                input_dependencies.append(provided_input)
                            else:
                                # Assume a parameter pack is provided.
                                for key, value in provided_input.items():
                                    input_kwargs[key] = value
                    assert 'input' not in kwargs
                    assert 'input' not in input_kwargs

                    # Merge kwargs and kwargs['input'] (keyword parameters versus parameter pack)
                    for key in kwargs:
                        if key in self.signature.parameters:
                            input_kwargs[key] = kwargs[key]
                        else:
                            raise exceptions.UsageError('Unexpected keyword argument: {}'.format(key))

                    # TODO: (FR4) Check input types

                    self.__input = PyFuncInput(args=[],
                                               kwargs=input_kwargs,
                                               dependencies=input_dependencies)

                    # TODO: (FR5+) Split the definition of the resource structure
                    #  and the resource initialization.
                    # Resource structure definition logic can be moved to the level
                    # of the class definition. We need knowledge of the inputs to
                    # uniquely identify the resources for this operation instance.
                    # Implementation suggestion: Context-provided metaclass defines
                    # resource manager interface for this Operation. Factory function
                    # initializes compartmentalized resource management at object creation.
                    self.__resource_manager = get_resource_manager(self)