def test_step_type():
    """Test methods that distinguish different step types."""
    # CodeStep
    step = CodeStep(identifier='test',
                    func=my_add,
                    arg='z',
                    inputs=['a', 'b'],
                    outputs=['x', 'y'])
    assert step.identifier == 'test'
    assert step.arg == 'z'
    assert step.inputs == ['a', 'b']
    assert step.outputs == ['x', 'y']
    assert step.is_code_step()
    assert not step.is_container_step()
    # ContainerStep
    step = ContainerStep(identifier='test',
                         image='test',
                         inputs=['a', 'b'],
                         outputs=['x', 'y'])
    assert step.identifier == 'test'
    assert step.image == 'test'
    assert step.inputs == ['a', 'b']
    assert step.outputs == ['x', 'y']
    assert step.is_container_step()
    assert not step.is_code_step()
    # Empty inputs.
    step = CodeStep(identifier='test', func=my_add)
    assert step.inputs == []
    assert step.outputs == []
    step = ContainerStep(identifier='test', image='test')
    assert step.inputs == []
    assert step.outputs == []
    # Invalid step type.
    with pytest.raises(ValueError):
        WorkflowStep(identifier='test', step_type=-1)
Example #2
0
    def exec(self, step: CodeStep, context: Dict,
             store: FileSystemStorage) -> ExecResult:
        """Execute a workflow step of type :class:`flowserv.model.workflow.step.CodeStep`
        in a given context.

        Captures output to STDOUT and STDERR and includes them in the returned
        execution result.

        Note that the code worker expects a file system storage volume.

        Parameters
        ----------
        step: flowserv.model.workflow.step.CodeStep
            Code step in a serial workflow.
        context: dict
            Context for the executed code.
        store: flowserv.volume.fs.FileSystemStorage
            Storage volume that contains the workflow run files.

        Returns
        -------
        flowserv.controller.serial.workflow.result.ExecResult
        """
        result = ExecResult(step=step)
        out = sys.stdout
        err = sys.stderr
        sys.stdout = OutputStream(stream=result.stdout)
        sys.stderr = OutputStream(stream=result.stderr)
        # Change working directory temporarily.
        cwd = os.getcwd()
        os.chdir(store.basedir)
        try:
            step.exec(context=context)
        except Exception as ex:
            logging.error(ex, exc_info=True)
            strace = '\n'.join(util.stacktrace(ex))
            logging.debug(strace)
            result.stderr.append(strace)
            result.exception = ex
            result.returncode = 1
        finally:
            # Make sure to reverse redirection of output streams
            sys.stdout = out
            sys.stderr = err
            # Reset working directory.
            os.chdir(cwd)
        return result
Example #3
0
def Step(
    identifier: str, action: Dict, inputs: Optional[List[str]] = None,
    outputs: Optional[List[str]] = None
) -> WorkflowStep:
    """Create workflow step instance from dictionary serialization.

    The type of the generated workflow step will depend on the elements in
    the given dictionary serialization.

    Raises a ValueError if the given dictionary is not a valid serialization
    for a workflow step.

    Parameters
    ----------
    identifier: string
        Unique step name (identifier).
    action: Dict
        Dictionary serialization for the workflow step.
    inputs: list of string, default=None
        List of files that are required by the workflow step as inputs.
    outputs: list of string, default=None
        List of files that are generated by the workflow step as outputs.

    Returns
    -------
    flowserv.model.workflow.step.WorkflowStep
    """
    if 'environment' in action and 'commands' in action:
        # If the dictionary contains `environment` and `commands` the result
        # is a container step.
        return ContainerStep(
            identifier=identifier,
            image=action.get('environment'),
            commands=action.get('commands', []),
            inputs=inputs,
            outputs=outputs
        )
    elif 'func' in action:
        return CodeStep(
            identifier=identifier,
            func=util.import_obj(action['func']),
            arg=action.get('arg'),
            varnames=parse_varnames(action=action),
            inputs=inputs,
            outputs=outputs
        )
    elif 'notebook' in action:
        return NotebookStep(
            identifier=identifier,
            notebook=action['notebook'],
            output=action.get('output'),
            params=action.get('params'),
            requirements=action.get('requirements'),
            varnames=parse_varnames(action=action),
            inputs=inputs,
            outputs=outputs
        )
    raise ValueError(f"invalid action specification '{action}'")
def test_error_exec(tmpdir):
    """Test error when running a code step."""
    step = CodeStep(identifier='test', func=write_and_add, arg='a')
    r = CodeWorker().exec(step=step,
                          context={'a': -1},
                          store=FileSystemStorage(tmpdir))
    assert r.returncode == 1
    assert r.stdout == ['-1 written', '\n']
    assert r.stderr != []
    assert r.exception is not None
def test_successful_exec(tmpdir):
    """Test successfully running a code step."""
    step = CodeStep(identifier='test', func=write_and_add, arg='a')
    r = CodeWorker().exec(step=step,
                          context={'a': 1},
                          store=FileSystemStorage(tmpdir))
    assert r.returncode == 0
    assert r.stdout == ['1 written', '\n']
    assert r.stderr == []
    assert r.exception is None
    # Read the written output file.
    with open(os.path.join(tmpdir, 'out.txt'), 'r') as f:
        for line in f:
            line = line.strip()
    assert line == '1'
Example #6
0
    def add_code_step(self,
                      identifier: str,
                      func: Callable,
                      arg: Optional[str] = None,
                      varnames: Optional[Dict] = None,
                      inputs: Optional[List[str]] = None,
                      outputs: Optional[List[str]] = None) -> SerialWorkflow:
        """Append a code step to the serial workflow.

        Parameters
        ----------
        identifier: str
            Unique workflow step identifier.
        func: callable
            Python function that is executed by the workflow step.
        arg: string, default=None
            Name of the variable under which the function result is stored in
            the workflow arguments. If None, the function result is discarded.
        varnames: dict, default=None
            Mapping of function argument names to names of workflow arguments.
            This mapping is used when generating the arguments for the executed
            function. By default it is assumed that the names of arguments for
            the given function correspond to the names in the argument dictionary
            for the workflow. This mapping provides the option to map names in
            the function signature that do not occur in the arguments dictionary
            to argument names that are in the dictionary.
        inputs: list of string, default=None
            List of files that are required by the workflow step as inputs.
        outputs: list of string, default=None
            List of files that are generated by the workflow step as outputs.

        Returns
        -------
        flowserv.controller.serial.workflow.base.SerialWorkflow
        """
        step = CodeStep(identifier=identifier,
                        func=func,
                        arg=arg,
                        varnames=varnames,
                        inputs=inputs,
                        outputs=outputs)
        self.steps.append(step)
        return self
def test_run_with_two_steps(tmpdir):
    """Test executing a sequence of two code steps that operate on the same
    file in different storage volumes.
    """
    # -- Setup ----------------------------------------------------------------
    # Create two separate storage volumes.
    vol1_dir = os.path.join(tmpdir, 'v1')
    os.makedirs(vol1_dir)
    vol2_dir = os.path.join(tmpdir, 'v2')
    volumes = VolumeManager(stores=[
        FStore(basedir=vol1_dir, identifier=DEFAULT_STORE),
        FStore(basedir=vol2_dir, identifier='v2')
    ],
                            files={'data.json': [DEFAULT_STORE]})
    # Create data.json file in v1.
    with open(os.path.join(vol1_dir, 'data.json'), 'w') as f:
        json.dump({"value": 5}, f)
    # Use separate workers for each step.
    workers = WorkerPool(workers=[
        Code(identifier='w1', volume=DEFAULT_STORE),
        Code(identifier='w2', volume='v2')
    ],
                         managers={
                             's1': 'w1',
                             's2': 'w2'
                         })
    # Create workflow steps.
    steps = [
        CodeStep(identifier='s1',
                 func=multi_by_x,
                 arg='s1',
                 varnames={'x': 'x1'},
                 inputs=['data.json']),
        CodeStep(identifier='s2',
                 func=multi_by_x,
                 arg='s2',
                 varnames={'x': 'x2'},
                 inputs=['data.json'])
    ]
    # Initialize the workflow context arguments.
    arguments = {'filename': 'data.json', 'x1': 2, 'x2': 3}
    # -- Test workflow run ----------------------------------------------------
    run_result = exec_workflow(steps=steps,
                               workers=workers,
                               volumes=volumes,
                               result=RunResult(arguments=arguments))
    assert len(run_result.steps) == 2
    assert run_result.context == {
        'filename': 'data.json',
        'x1': 2,
        'x2': 3,
        's1': 10,
        's2': 15
    }
    assert os.path.isfile(os.path.join(vol2_dir, 'data.json'))
    # Error case.
    os.unlink(os.path.join(vol1_dir, 'data.json'))
    run_result = exec_workflow(steps=steps,
                               workers=workers,
                               volumes=volumes,
                               result=RunResult(arguments=arguments))
    assert len(run_result.steps) == 1
    assert run_result.context == {'filename': 'data.json', 'x1': 2, 'x2': 3}
from flowserv.controller.worker.code import CodeWorker
from flowserv.controller.worker.docker import DockerWorker
from flowserv.controller.worker.manager import WorkerPool, Code, Docker, Notebook, Subprocess
from flowserv.controller.worker.notebook import NotebookEngine
from flowserv.controller.worker.subprocess import SubprocessWorker
from flowserv.model.workflow.step import CodeStep, ContainerStep, NotebookStep

import flowserv.error as err


@pytest.mark.parametrize(
    'step,cls',
    [(ContainerStep(identifier='test', image='test'), SubprocessWorker),
     (ContainerStep(identifier='test', image='test'), SubprocessWorker),
     (CodeStep(identifier='test', func=lambda x: x), CodeWorker),
     (NotebookStep(identifier='test',
                   notebook='helloworld.ipynb'), NotebookEngine)])
def test_get_default_worker(step, cls):
    """Test getting a default worker for a workflow step that has no manager
    explicitly assigned to it.
    """
    factory = WorkerPool(workers=[])
    assert isinstance(factory.get_default_worker(step), cls)


def test_get_worker_error():
    """Test error when accessing worker with unknown identifier."""
    step = ContainerStep(identifier='test', image='test')
    factory = WorkerPool(workers=[], managers={'test': 'test'})
    with pytest.raises(err.UnknownObjectError):
def test_exec_func_step():
    """Test executing a Python function as a step in a serial workflow."""
    args = {'x': 1, 'y': 2}
    step = CodeStep(identifier='test', func=my_add, arg='z')
    step.exec(context=args)
    assert args == {'x': 1, 'y': 2, 'z': 3}
    # Test renaming arguments.
    step = CodeStep(identifier='test',
                    func=my_add,
                    varnames={'x': 'z'},
                    arg='x')
    step.exec(context=args)
    assert args == {'x': 5, 'y': 2, 'z': 3}
    # Execute function but ignore output.
    step = CodeStep(identifier='test', func=my_add)
    step.exec(context=args)
    assert args == {'x': 5, 'y': 2, 'z': 3}