def test_empty_run_result():
    """Test run result properties for a run without any executed workflow steps."""
    r = RunResult(arguments={'a': 1})
    assert len(r) == 0
    assert r.get('a') == 1
    assert r.exception is None
    assert r.returncode is None
    assert r.stderr == []
    assert r.stdout == []
Exemple #2
0
def exec_workflow(
    steps: List[WorkflowStep], workers: WorkerFactory, rundir: str,
    result: RunResult
) -> RunResult:
    """Execute steps in a serial workflow.

    The workflow arguments are part of the execution context that is contained
    in the :class:`flowserv.controller.serial.workflow.result.RunResult`. The
    result object is used to maintain the results for executed workflow steps.

    Executes workflow steps in sequence. Terminates early if the execution
    of a workflow step returns a non-zero value. Uses the given worker
    factory to create workers for steps that are of class
    :class:`flowserv.model.workflow.step.ContainerStep`.

    Parameters
    ----------
    steps: list of flowserv.model.workflow.step.WorkflowStep
        Steps in the serial workflow that are executed in the given context.
    workers: flowserv.controller.worker.factory.WorkerFactory, default=None
        Factory for :class:`flowserv.model.workflow.step.ContainerStep` steps.
    rundir: str, default=None
        Working directory for all executed workflow steps.
    result: flowserv.controller.worker.result.RunResult
        Collector for results from executed workflow steps. Contains the context
        within which the workflow is executed.

    Returns
    -------
    flowserv.controller.worker.result.RunResult
    """
    for step in steps:
        if step.is_function_step():
            r = exec_func(step=step, context=result.context, rundir=rundir)
        else:
            worker = workers.get(step.image)
            r = worker.exec(
                step=step,
                arguments=result.context,
                rundir=rundir
            )
        result.add(r)
        # Terminate if the step execution was not successful.
        if r.returncode != 0:
            break
    return result
Exemple #3
0
def run_workflow(run_id: str, rundir: str, state: WorkflowState,
                 output_files: List[str], steps: List[ContainerStep],
                 arguments: Dict,
                 workers: WorkerFactory) -> Tuple[str, str, Dict]:
    """Execute a list of workflow steps synchronously.

    This is the worker function for asynchronous workflow executions. Returns a
    tuple containing the run identifier, the folder with the run files, and a
    serialization of the workflow state.

    Parameters
    ----------
    run_id: string
        Unique run identifier
    rundir: string
        Path to the working directory of the workflow run
    state: flowserv.model.workflow.state.WorkflowState
        Current workflow state (to access the timestamps)
    output_files: list(string)
        Relative path of output files that are generated by the workflow run
    steps: list of flowserv.model.workflow.step.WorkflowStep
        Steps in the serial workflow that are executed in the given context.
    arguments: dict
        Dictionary of argument values for parameters in the template.
    workers: flowserv.controller.worker.factory.WorkerFactory, default=None
        Factory for :class:`flowserv.model.workflow.step.ContainerStep` steps.

    Returns
    -------
    (string, string, dict)
    """
    logging.info('start run {}'.format(run_id))
    try:
        run_result = exec_workflow(steps=steps,
                                   workers=workers,
                                   rundir=rundir,
                                   result=RunResult(arguments=arguments))
        if run_result.returncode != 0:
            # Return error state. Include STDERR in result
            messages = run_result.log
            result_state = state.error(messages=messages)
            doc = serialize.serialize_state(result_state)
            return run_id, rundir, doc
        # Create list of output files that were generated.
        files = list()
        for relative_path in output_files:
            if os.path.exists(os.path.join(rundir, relative_path)):
                files.append(relative_path)
        # Workflow executed successfully
        result_state = state.success(files=files)
    except Exception as ex:
        logging.error(ex)
        strace = util.stacktrace(ex)
        logging.debug('\n'.join(strace))
        result_state = state.error(messages=strace)
    logging.info('finished run {}: {}'.format(run_id, result_state.type_id))
    return run_id, rundir, serialize.serialize_state(result_state)
Exemple #4
0
def exec_workflow(steps: List[WorkflowStep], workers: WorkerPool,
                  volumes: VolumeManager, result: RunResult) -> RunResult:
    """Execute steps in a serial workflow.

    The workflow arguments are part of the execution context that is contained
    in the :class:`flowserv.controller.serial.workflow.result.RunResult`. The
    result object is used to maintain the results for executed workflow steps.

    Executes workflow steps in sequence. Terminates early if the execution
    of a workflow step returns a non-zero value. Uses the given worker
    factory to create workers for steps that are of class
    :class:`flowserv.model.workflow.step.ContainerStep`.

    Parameters
    ----------
    steps: list of flowserv.model.workflow.step.WorkflowStep
        Steps in the serial workflow that are executed in the given context.
    workers: flowserv.controller.worker.manager.WorkerPool, default=None
        Factory for :class:`flowserv.model.workflow.step.ContainerStep` steps.
    volumes: flowserv.volume.manager.VolumeManager
        Manager for storage volumes that are used by the different workers.
    result: flowserv.controller.serial.workflow.result.RunResult
        Collector for results from executed workflow steps. Contains the context
        within which the workflow is executed.

    Returns
    -------
    flowserv.controller.worker.result.RunResult
    """
    for step in steps:
        # Get the worker that is responsible for executing the workflow step.
        worker = workers.get(step)
        # Prepare the volume store that is associated with the worker.
        store = volumes.get(worker.volume)
        volumes.prepare(store=store, inputs=step.inputs, outputs=step.outputs)
        # Execute the workflow step and add the result to the overall workflow
        # result. Terminate if the step execution was not successful.
        r = worker.exec(step=step, context=result.context, store=store)
        result.add(r)
        if r.returncode != 0:
            break
        # Update volume manager with output files for the workflow step.
        volumes.update(store=store, files=step.outputs)
    return result
Exemple #5
0
def run_workflow(run_id: str, state: WorkflowState, output_files: List[str],
                 steps: List[ContainerStep], arguments: Dict,
                 volumes: VolumeManager,
                 workers: WorkerPool) -> Tuple[str, str, Dict]:
    """Execute a list of workflow steps synchronously.

    This is the worker function for asynchronous workflow executions. Returns a
    tuple containing the run identifier, the folder with the run files, and a
    serialization of the workflow state.

    Parameters
    ----------
    run_id: string
        Unique run identifier
    state: flowserv.model.workflow.state.WorkflowState
        Current workflow state (to access the timestamps)
    output_files: list(string)
        Relative path of output files that are generated by the workflow run
    steps: list of flowserv.model.workflow.step.WorkflowStep
        Steps in the serial workflow that are executed in the given context.
    arguments: dict
        Dictionary of argument values for parameters in the template.
    volumes: flowserv.volume.manager.VolumeManager
        Factory for storage volumes.
    workers: flowserv.controller.worker.manager.WorkerPool
        Factory for :class:`flowserv.model.workflow.step.ContainerStep` steps.

    Returns
    -------
    (string, string, dict)
    """
    logging.info('start run {}'.format(run_id))
    runstore = volumes.get(DEFAULT_STORE)
    try:
        run_result = exec_workflow(steps=steps,
                                   workers=workers,
                                   volumes=volumes,
                                   result=RunResult(arguments=arguments))
        if run_result.returncode != 0:
            # Return error state. Include STDERR in result
            messages = run_result.log
            result_state = state.error(messages=messages)
            doc = serialize.serialize_state(result_state)
            return run_id, runstore.to_dict(), doc
        # Workflow executed successfully
        result_state = state.success(files=output_files)
    except Exception as ex:
        logging.error(ex, exc_info=True)
        strace = util.stacktrace(ex)
        logging.debug('\n'.join(strace))
        result_state = state.error(messages=strace)
    logging.info('finished run {}: {}'.format(run_id, result_state.type_id))
    return run_id, runstore.to_dict(), serialize.serialize_state(result_state)
Exemple #6
0
    def run(self,
            arguments: Dict,
            workers: Optional[WorkerPool] = None,
            volumes: Optional[VolumeManager] = None) -> RunResult:
        """Execute workflow for the given set of input arguments.

        Executes workflow steps in sequence. Terminates early if the execution
        of a workflow step returns a non-zero value. Uses the given worker
        factory to create workers for steps that are of class
        :class:`flowserv.model.workflow.step.ContainerStep`.

        Collects results for all executed steps and returns them in the
        :class:`flowserv.controller.serial.workflow.result.RunResult`.

        Parameters
        ----------
        arguments: dict
            User-provided arguments for the workflow run.
        workers: flowserv.controller.worker.manager.WorkerPool, default=None
            Factory for :class:`flowserv.model.workflow.step.ContainerStep`
            steps. Uses the default worker for all container steps if None.
        volumes: flowserv.volume.manager.VolumeManager
            Manager for storage volumes that are used by the different workers.

        Returns
        -------
        flowserv.controller.worker.result.RunResult
        """
        # Use current working directory as the default storage volume is no
        # volumes are specified.
        if volumes is None:
            volumes = DefaultVolume(basedir=os.getcwd())
        # Use default worker for all container steps if no factory is given.
        workers = workers if workers else WorkerPool()
        # Execute the workflow and return the run result that contains the
        # results of the executed steps.
        return exec_workflow(steps=self.steps,
                             workers=workers,
                             volumes=volumes,
                             result=RunResult(arguments=arguments))
Exemple #7
0
    def run(self,
            arguments: Dict,
            workers: Optional[WorkerFactory] = None,
            rundir: Optional[str] = None) -> RunResult:
        """Execute workflow for the given set of input arguments.

        Executes workflow steps in sequence. Terminates early if the execution
        of a workflow step returns a non-zero value. Uses the given worker
        factory to create workers for steps that are of class
        :class:`flowserv.model.workflow.step.ContainerStep`.

        Collects results for all executed steps and returns them in the
        :class:`flowserv.controller.serial.workflow.result.RunResult`.

        Parameters
        ----------
        arguments: dict
            User-provided arguments for the workflow run.
        workers: flowserv.controller.worker.factory.WorkerFactory, default=None
            Factory for :class:`flowserv.model.workflow.step.ContainerStep`
            steps. Uses the default worker for all container steps if None.
        rundir: str, default=None
            Working directory for all executed workflow steps. Uses the current
            working directory if None.

        Returns
        -------
        flowserv.controller.worker.result.RunResult
        """
        # Use current working directory if run directory is None.
        rundir = rundir if rundir else os.getcwd()
        # Use default worker for all container steps if no factory is given.
        workers = workers if workers else WorkerFactory()
        # Execute the workflow and return the run result that contains the
        # results of the executed steps.
        return exec_workflow(steps=self.steps,
                             workers=workers,
                             rundir=rundir,
                             result=RunResult(arguments=arguments))
def test_run_with_two_steps(tmpdir):
    """Test executing a sequence of two code steps that operate on the same
    file in different storage volumes.
    """
    # -- Setup ----------------------------------------------------------------
    # Create two separate storage volumes.
    vol1_dir = os.path.join(tmpdir, 'v1')
    os.makedirs(vol1_dir)
    vol2_dir = os.path.join(tmpdir, 'v2')
    volumes = VolumeManager(stores=[
        FStore(basedir=vol1_dir, identifier=DEFAULT_STORE),
        FStore(basedir=vol2_dir, identifier='v2')
    ],
                            files={'data.json': [DEFAULT_STORE]})
    # Create data.json file in v1.
    with open(os.path.join(vol1_dir, 'data.json'), 'w') as f:
        json.dump({"value": 5}, f)
    # Use separate workers for each step.
    workers = WorkerPool(workers=[
        Code(identifier='w1', volume=DEFAULT_STORE),
        Code(identifier='w2', volume='v2')
    ],
                         managers={
                             's1': 'w1',
                             's2': 'w2'
                         })
    # Create workflow steps.
    steps = [
        CodeStep(identifier='s1',
                 func=multi_by_x,
                 arg='s1',
                 varnames={'x': 'x1'},
                 inputs=['data.json']),
        CodeStep(identifier='s2',
                 func=multi_by_x,
                 arg='s2',
                 varnames={'x': 'x2'},
                 inputs=['data.json'])
    ]
    # Initialize the workflow context arguments.
    arguments = {'filename': 'data.json', 'x1': 2, 'x2': 3}
    # -- Test workflow run ----------------------------------------------------
    run_result = exec_workflow(steps=steps,
                               workers=workers,
                               volumes=volumes,
                               result=RunResult(arguments=arguments))
    assert len(run_result.steps) == 2
    assert run_result.context == {
        'filename': 'data.json',
        'x1': 2,
        'x2': 3,
        's1': 10,
        's2': 15
    }
    assert os.path.isfile(os.path.join(vol2_dir, 'data.json'))
    # Error case.
    os.unlink(os.path.join(vol1_dir, 'data.json'))
    run_result = exec_workflow(steps=steps,
                               workers=workers,
                               volumes=volumes,
                               result=RunResult(arguments=arguments))
    assert len(run_result.steps) == 1
    assert run_result.context == {'filename': 'data.json', 'x1': 2, 'x2': 3}
def test_successful_run_result():
    """Test results of a successful workflow run."""
    r = RunResult(arguments={'a': 1})
    r.add(ExecResult(step=ContainerStep(identifier='s1', image='test1'), returncode=0, stdout=['o1']))
    r.context['a'] = 2
    r.add(ExecResult(step=ContainerStep(identifier='s2', image='test2'), returncode=0, stdout=['o2', 'o3']))
    r.context['b'] = 1
    assert r.exception is None
    assert r.returncode == 0
    r.raise_for_status()
    assert r.stdout == ['o1', 'o2', 'o3']
    assert r.stderr == []
    assert r.get('a') == 2
    assert r.get('b') == 1
    result = r.steps[0]
    assert result.step.image == 'test1'
    assert result.stdout == ['o1']
    result = r.steps[1]
    assert result.step.image == 'test2'
    assert result.stdout == ['o2', 'o3']
def test_error_run_result():
    """Test results of an erroneous workflow run."""
    r = RunResult(arguments={})
    r.add(ExecResult(step=ContainerStep(identifier='s1', image='test'), returncode=0))
    assert r.exception is None
    assert r.returncode == 0
    r.add(ExecResult(step=ContainerStep(identifier='s2', image='test'), returncode=1, stderr=['e1', 'e2'], exception=ValueError()))  # noqa: E501
    with pytest.raises(ValueError):
        r.raise_for_status()
    assert r.exception is not None
    assert r.returncode == 1
    assert r.stdout == []
    assert r.stderr == ['e1', 'e2']
    r = RunResult(arguments={})
    r.add(ExecResult(step=ContainerStep(identifier='s3', image='test'), returncode=1, stderr=['e1', 'e2']))
    with pytest.raises(err.FlowservError):
        r.raise_for_status()