Ejemplo n.º 1
0
    def test_list(self):
        list_a = [1, 2, 3]

        # TODO: test input validation
        list_result = gmx.concatenate_lists(sublists=[list_a])
        assert list_result.dtype == gmx.datamodel.NDArray
        # Note: this is specifically for the built-in tuple type.
        # Equality comparison may work differently for different sequence types.
        assert tuple(list_result.result()) == tuple(list_a)
        assert len(list_result.result()) == len(list_a)

        list_result = gmx.concatenate_lists([list_a, list_a])
        assert len(list_result.result()) == len(list_a) * 2
        assert tuple(list_result.result()) == tuple(list_a + list_a)

        list_b = gmx.ndarray([42])

        list_result = gmx.concatenate_lists(sublists=[list_b])
        assert list_result.result()[0] == 42

        list_result = gmx.join_arrays(front=list_a, back=list_b)
        assert len(list_result.result()) == len(list_a) + 1
        assert tuple(list_result.result()) == tuple(list(list_a) + [42])
Ejemplo n.º 2
0
def commandline_operation(executable=None,
                          arguments=(),
                          input_files: dict = None,
                          output_files: dict = None,
                          stdin: str = None,
                          **kwargs):
    """Helper function to define a new operation that executes a subprocess in gmxapi data flow.

    Define a new Operation for a particular executable and input/output parameter set.
    Generate a chain of operations to process the named key word arguments and handle
    input/output data dependencies.

    Arguments:
        executable: name of an executable on the path
        arguments: list of positional arguments to insert at ``argv[1]``
        input_files: mapping of command-line flags to input file names
        output_files: mapping of command-line flags to output file names
        stdin (str): String input to send to STDIN (terminal input) of the executable (optional).

    Multi-line text sent to *stdin* should be joined into a single string.
    E.g.::

        commandline_operation(..., stdin='\\n'.join(list_of_strings) + '\\n')

    If multiple strings are provided to *stdin*, gmxapi will assume an ensemble,
    and will run one operation for each provided string.

    Only string input (:py:func:`str`) to *stdin* is currently supported.
    If you have a use case that requires streaming input or binary input,
    please open an issue or contact the author(s).

    Output:
        The output node of the resulting operation handle contains

        * ``file``: the mapping of CLI flags to filename strings resulting from the ``output_files`` kwarg
        * ``erroroutput``: A string of error output (if any) if the process failed.
        * ``returncode``: return code of the subprocess.

    """

    # Implementation details: When used in a script, this function returns an
    # instance of an operation. However, because of the dynamic specification of
    # inputs and outputs, each invocation may have the overhead of defining new
    # types to express the data flow topology, regardless of the executable.
    # If this overhead is problematic, consider exposing the intermediate step
    # at which the Operation is fully specified to facilitate reuse.

    ##
    # 1. Define a new operation with outputs from `cli()` plus `file` from `output_files`

    # output_files is essentially passed through, but we need assurance that results
    # will not be published until the rest of the operation has run (i.e. the cli() executable.)

    # Warning: decorating a local function like this is counter to the notion of Operations
    # as portable (importable, serializable/deserializable). The big picture here needs
    # some more consideration.
    # TODO: (NOW) Distinguish portable Operations from relocatable Futures.
    # There is nothing antithetical about objects implementing gmxapi data interfaces
    # that are only resolvable by a certain Context as long as that Context can convey
    # the results to another Context upon request. Re-instantiating Operations is
    # only one way of relocating Futures. In this case, though, the dynamic creation of
    # merged_ops doesn't seem right, and commandline_operation should probably be
    # a proper Operation.
    #
    # TODO: (FR4+) Characterize the `file` dictionary key type:
    #  explicitly sequences rather than maybe-string/maybe-sequence-of-strings
    @gmx.function_wrapper(output={
        'erroroutput': str,
        'returncode': int,
        'file': dict
    })
    def merged_ops(erroroutput: str = None,
                   returncode: int = None,
                   file: dict = None,
                   output: OutputCollectionDescription = None):
        assert erroroutput is not None
        assert returncode is not None
        assert file is not None
        assert output is not None
        output.file = file
        output.returncode = returncode
        output.erroroutput = erroroutput

    ##
    # 2. Prepare data flow.

    if input_files is None:
        input_files = {}
    if output_files is None:
        output_files = {}
    if isinstance(arguments, (str, bytes)):
        arguments = [arguments]
    command = gmx.concatenate_lists([[executable], arguments,
                                     filemap_to_flag_list(input_files),
                                     filemap_to_flag_list(output_files)])
    shell = gmx.make_constant(False)
    cli_args = {'command': command, 'shell': shell}
    cli_args.update(**kwargs)
    if stdin is not None:
        cli_args['stdin'] = str(stdin)

    ##
    # 3. Merge operations
    #
    # Note: Without a `label` argument, repeated calls to cli(**cli_args) should
    # produce references to the same unique resource. Creating this handle
    # separately should not be necessary, but we've got a way to go until we have the
    # fingerprinting and Context resource management we need for that.
    # TODO: ``label`` kwarg
    # TODO: input fingerprinting
    cli_result = cli(**cli_args)
    merged_result = merged_ops(erroroutput=cli_result.output.erroroutput,
                               returncode=cli_result.output.returncode,
                               file=output_files,
                               **kwargs)

    # Return an object with an OutputCollection granting access to outputs of
    # cli() and of output_files (as "file")
    return merged_result
Ejemplo n.º 3
0
def commandline_operation(executable=None,
                          arguments=(),
                          input_files: dict = None,
                          output_files: dict = None,
                          **kwargs):
    """Helper function to define a new operation that executes a subprocess in gmxapi data flow.

    Define a new Operation for a particular executable and input/output parameter set.
    Generate a chain of operations to process the named key word arguments and handle
    input/output data dependencies.

    Arguments:
        executable : name of an executable on the path
        arguments : list of positional arguments to insert at argv[1]
        input_files : mapping of command-line flags to input file names
        output_files : mapping of command-line flags to output file names

    Output:
        The output node of the resulting operation handle contains

        - file: the mapping of CLI flags to filename strings resulting from the ``output`` kwarg.
        - erroroutput: A string of error output (if any) if the process failed.
        - returncode: return code of the subprocess.

    Example:
        Terminal I/O is managed by the gmxapi execution Context. Standard input
        and output are not yet accessible to the user.

        >>> operation = gmx.commandline_operation(executable='echo',
        ...                                       arguments=['hi there'])
        >>> assert operation.output.returncode.result() == 0

    The main utility of commandline_operation is to establish data flow constraints
    on command line programs such as the
    `GROMACS tools <http://manual.gromacs.org/current/user-guide/cmdline.html>`_.

    The key words can be omitted when there is no ambiguity. If you have already
    "sourced" your GMXRC (or the ``gmx`` executable is already on your PATH),
    then consider the following.

        >>> trjcat = gmx.commandline_operation(
        ...     'gmx',
        ...     'trjcat',
        ...     input_files={'-f': 'traj_comp.part0001.xtc'},
        ...     output_files={'-o': 'trjcat.xtc'})

    The above snippet defines an operation and produces a reference named ``trjcat``.
    When executed, this operation will run a command that looks like

        $ gmx trjcat -f traj_comp.part0001.xtc -o trjcat.xtc

    in a subprocess. Execution does not occur immediately, and nothing is executed
    at all if nothing forces it to. As with all gmxapi operations, execution is
    handled internally on an as-needed basis (1) when output data from one operation
    is needed by another, (2) when the caller (script or interactive user) requests
    a *result*, or (3) when explicitly requested with *run*.

    Example:
        Assume the above assingment to ``trjcat``. Then,

        >>> import os
        >>> outfile = trjcat.output.file['-o'].result()
        >>> assert os.path.exists(outfile)
        >>> trjcat.run()

        The command line is run when *result()* is called, producing the intended
        output file. If *result()* had not been called, the call to *run()* would
        have forced execution. But since the operation has already been performed,
        it is not re-executed.

    Warning:
        commandline_operation can only find executables on your PATH. In other
        words, if you couldn't execute it before starting Python, gmxapi cannot
        execute it either. This includes GROMACS command line tools, so don't
        forget to "source" your
        `GMXRC <http://manual.gromacs.org/documentation/current/install-guide/index.html?highlight=gmxrc>`_
        See `issue 2961 <https://redmine.gromacs.org/issues/2961>`_

    """

    # Implementation details: When used in a script, this function returns an
    # instance of an operation. However, because of the dynamic specification of
    # inputs and outputs, each invocation may have the overhead of defining new
    # types to express the data flow topology, regardless of the executable.
    # If this overhead is problematic, consider exposing the intermediate step
    # at which the Operation is fully specified to facilitate reuse.

    ##
    # 1. Define a new operation with outputs from `cli()` plus `file` from `output_files`

    # output_files is essentially passed through, but we need assurance that results
    # will not be published until the rest of the operation has run (i.e. the cli() executable.)

    # Warning: decorating a local function like this is counter to the notion of Operations
    # as portable (importable, serializable/deserializable). The big picture here needs
    # some more consideration.
    # TODO: (NOW) Distinguish portable Operations from relocatable Futures.
    # There is nothing antithetical about objects implementing gmxapi data interfaces
    # that are only resolvable by a certain Context as long as that Context can convey
    # the results to another Context upon request. Re-instantiating Operations is
    # only one way of relocating Futures. In this case, though, the dynamic creation of
    # merged_ops doesn't seem right, and commandline_operation should probably be
    # a proper Operation.
    #
    # TODO: (FR4+) Characterize the `file` dictionary key type:
    #  explicitly sequences rather than maybe-string/maybe-sequence-of-strings
    @gmx.function_wrapper(output={
        'erroroutput': str,
        'returncode': int,
        'file': dict
    })
    def merged_ops(erroroutput: str = None,
                   returncode: int = None,
                   file: dict = None,
                   output: OutputCollectionDescription = None):
        assert erroroutput is not None
        assert returncode is not None
        assert file is not None
        assert output is not None
        output.file = file
        output.returncode = returncode
        output.erroroutput = erroroutput

    ##
    # 2. Prepare data flow.

    if input_files is None:
        input_files = {}
    if output_files is None:
        output_files = {}
    if isinstance(arguments, (str, bytes)):
        arguments = [arguments]
    command = gmx.concatenate_lists([[executable], arguments,
                                     filemap_to_flag_list(input_files),
                                     filemap_to_flag_list(output_files)])
    shell = gmx.make_constant(False)
    cli_args = {'command': command, 'shell': shell}
    cli_args.update(**kwargs)

    ##
    # 3. Merge operations
    #
    # Note: Without a `label` argument, repeated calls to cli(**cli_args) should
    # produce references to the same unique resource (but currently do not).
    # Creating this handle separately should not be necessary,
    # but we've got a way to go until we have the fingerprinting and Context
    # resource management we need for that.
    # TODO: ``label`` kwarg
    # TODO: input fingerprinting
    cli_result = cli(**cli_args)
    merged_result = merged_ops(erroroutput=cli_result.output.erroroutput,
                               returncode=cli_result.output.returncode,
                               file=output_files,
                               **kwargs)

    # Return an object with an OutputCollection granting access to outputs of
    # cli() and of output_files (as "file")
    return merged_result