def test_empty_run_result(): """Test run result properties for a run without any executed workflow steps.""" r = RunResult(arguments={'a': 1}) assert len(r) == 0 assert r.get('a') == 1 assert r.exception is None assert r.returncode is None assert r.stderr == [] assert r.stdout == []
def exec_workflow( steps: List[WorkflowStep], workers: WorkerFactory, rundir: str, result: RunResult ) -> RunResult: """Execute steps in a serial workflow. The workflow arguments are part of the execution context that is contained in the :class:`flowserv.controller.serial.workflow.result.RunResult`. The result object is used to maintain the results for executed workflow steps. Executes workflow steps in sequence. Terminates early if the execution of a workflow step returns a non-zero value. Uses the given worker factory to create workers for steps that are of class :class:`flowserv.model.workflow.step.ContainerStep`. Parameters ---------- steps: list of flowserv.model.workflow.step.WorkflowStep Steps in the serial workflow that are executed in the given context. workers: flowserv.controller.worker.factory.WorkerFactory, default=None Factory for :class:`flowserv.model.workflow.step.ContainerStep` steps. rundir: str, default=None Working directory for all executed workflow steps. result: flowserv.controller.worker.result.RunResult Collector for results from executed workflow steps. Contains the context within which the workflow is executed. Returns ------- flowserv.controller.worker.result.RunResult """ for step in steps: if step.is_function_step(): r = exec_func(step=step, context=result.context, rundir=rundir) else: worker = workers.get(step.image) r = worker.exec( step=step, arguments=result.context, rundir=rundir ) result.add(r) # Terminate if the step execution was not successful. if r.returncode != 0: break return result
def run_workflow(run_id: str, rundir: str, state: WorkflowState, output_files: List[str], steps: List[ContainerStep], arguments: Dict, workers: WorkerFactory) -> Tuple[str, str, Dict]: """Execute a list of workflow steps synchronously. This is the worker function for asynchronous workflow executions. Returns a tuple containing the run identifier, the folder with the run files, and a serialization of the workflow state. Parameters ---------- run_id: string Unique run identifier rundir: string Path to the working directory of the workflow run state: flowserv.model.workflow.state.WorkflowState Current workflow state (to access the timestamps) output_files: list(string) Relative path of output files that are generated by the workflow run steps: list of flowserv.model.workflow.step.WorkflowStep Steps in the serial workflow that are executed in the given context. arguments: dict Dictionary of argument values for parameters in the template. workers: flowserv.controller.worker.factory.WorkerFactory, default=None Factory for :class:`flowserv.model.workflow.step.ContainerStep` steps. Returns ------- (string, string, dict) """ logging.info('start run {}'.format(run_id)) try: run_result = exec_workflow(steps=steps, workers=workers, rundir=rundir, result=RunResult(arguments=arguments)) if run_result.returncode != 0: # Return error state. Include STDERR in result messages = run_result.log result_state = state.error(messages=messages) doc = serialize.serialize_state(result_state) return run_id, rundir, doc # Create list of output files that were generated. files = list() for relative_path in output_files: if os.path.exists(os.path.join(rundir, relative_path)): files.append(relative_path) # Workflow executed successfully result_state = state.success(files=files) except Exception as ex: logging.error(ex) strace = util.stacktrace(ex) logging.debug('\n'.join(strace)) result_state = state.error(messages=strace) logging.info('finished run {}: {}'.format(run_id, result_state.type_id)) return run_id, rundir, serialize.serialize_state(result_state)
def exec_workflow(steps: List[WorkflowStep], workers: WorkerPool, volumes: VolumeManager, result: RunResult) -> RunResult: """Execute steps in a serial workflow. The workflow arguments are part of the execution context that is contained in the :class:`flowserv.controller.serial.workflow.result.RunResult`. The result object is used to maintain the results for executed workflow steps. Executes workflow steps in sequence. Terminates early if the execution of a workflow step returns a non-zero value. Uses the given worker factory to create workers for steps that are of class :class:`flowserv.model.workflow.step.ContainerStep`. Parameters ---------- steps: list of flowserv.model.workflow.step.WorkflowStep Steps in the serial workflow that are executed in the given context. workers: flowserv.controller.worker.manager.WorkerPool, default=None Factory for :class:`flowserv.model.workflow.step.ContainerStep` steps. volumes: flowserv.volume.manager.VolumeManager Manager for storage volumes that are used by the different workers. result: flowserv.controller.serial.workflow.result.RunResult Collector for results from executed workflow steps. Contains the context within which the workflow is executed. Returns ------- flowserv.controller.worker.result.RunResult """ for step in steps: # Get the worker that is responsible for executing the workflow step. worker = workers.get(step) # Prepare the volume store that is associated with the worker. store = volumes.get(worker.volume) volumes.prepare(store=store, inputs=step.inputs, outputs=step.outputs) # Execute the workflow step and add the result to the overall workflow # result. Terminate if the step execution was not successful. r = worker.exec(step=step, context=result.context, store=store) result.add(r) if r.returncode != 0: break # Update volume manager with output files for the workflow step. volumes.update(store=store, files=step.outputs) return result
def run_workflow(run_id: str, state: WorkflowState, output_files: List[str], steps: List[ContainerStep], arguments: Dict, volumes: VolumeManager, workers: WorkerPool) -> Tuple[str, str, Dict]: """Execute a list of workflow steps synchronously. This is the worker function for asynchronous workflow executions. Returns a tuple containing the run identifier, the folder with the run files, and a serialization of the workflow state. Parameters ---------- run_id: string Unique run identifier state: flowserv.model.workflow.state.WorkflowState Current workflow state (to access the timestamps) output_files: list(string) Relative path of output files that are generated by the workflow run steps: list of flowserv.model.workflow.step.WorkflowStep Steps in the serial workflow that are executed in the given context. arguments: dict Dictionary of argument values for parameters in the template. volumes: flowserv.volume.manager.VolumeManager Factory for storage volumes. workers: flowserv.controller.worker.manager.WorkerPool Factory for :class:`flowserv.model.workflow.step.ContainerStep` steps. Returns ------- (string, string, dict) """ logging.info('start run {}'.format(run_id)) runstore = volumes.get(DEFAULT_STORE) try: run_result = exec_workflow(steps=steps, workers=workers, volumes=volumes, result=RunResult(arguments=arguments)) if run_result.returncode != 0: # Return error state. Include STDERR in result messages = run_result.log result_state = state.error(messages=messages) doc = serialize.serialize_state(result_state) return run_id, runstore.to_dict(), doc # Workflow executed successfully result_state = state.success(files=output_files) except Exception as ex: logging.error(ex, exc_info=True) strace = util.stacktrace(ex) logging.debug('\n'.join(strace)) result_state = state.error(messages=strace) logging.info('finished run {}: {}'.format(run_id, result_state.type_id)) return run_id, runstore.to_dict(), serialize.serialize_state(result_state)
def run(self, arguments: Dict, workers: Optional[WorkerPool] = None, volumes: Optional[VolumeManager] = None) -> RunResult: """Execute workflow for the given set of input arguments. Executes workflow steps in sequence. Terminates early if the execution of a workflow step returns a non-zero value. Uses the given worker factory to create workers for steps that are of class :class:`flowserv.model.workflow.step.ContainerStep`. Collects results for all executed steps and returns them in the :class:`flowserv.controller.serial.workflow.result.RunResult`. Parameters ---------- arguments: dict User-provided arguments for the workflow run. workers: flowserv.controller.worker.manager.WorkerPool, default=None Factory for :class:`flowserv.model.workflow.step.ContainerStep` steps. Uses the default worker for all container steps if None. volumes: flowserv.volume.manager.VolumeManager Manager for storage volumes that are used by the different workers. Returns ------- flowserv.controller.worker.result.RunResult """ # Use current working directory as the default storage volume is no # volumes are specified. if volumes is None: volumes = DefaultVolume(basedir=os.getcwd()) # Use default worker for all container steps if no factory is given. workers = workers if workers else WorkerPool() # Execute the workflow and return the run result that contains the # results of the executed steps. return exec_workflow(steps=self.steps, workers=workers, volumes=volumes, result=RunResult(arguments=arguments))
def run(self, arguments: Dict, workers: Optional[WorkerFactory] = None, rundir: Optional[str] = None) -> RunResult: """Execute workflow for the given set of input arguments. Executes workflow steps in sequence. Terminates early if the execution of a workflow step returns a non-zero value. Uses the given worker factory to create workers for steps that are of class :class:`flowserv.model.workflow.step.ContainerStep`. Collects results for all executed steps and returns them in the :class:`flowserv.controller.serial.workflow.result.RunResult`. Parameters ---------- arguments: dict User-provided arguments for the workflow run. workers: flowserv.controller.worker.factory.WorkerFactory, default=None Factory for :class:`flowserv.model.workflow.step.ContainerStep` steps. Uses the default worker for all container steps if None. rundir: str, default=None Working directory for all executed workflow steps. Uses the current working directory if None. Returns ------- flowserv.controller.worker.result.RunResult """ # Use current working directory if run directory is None. rundir = rundir if rundir else os.getcwd() # Use default worker for all container steps if no factory is given. workers = workers if workers else WorkerFactory() # Execute the workflow and return the run result that contains the # results of the executed steps. return exec_workflow(steps=self.steps, workers=workers, rundir=rundir, result=RunResult(arguments=arguments))
def test_run_with_two_steps(tmpdir): """Test executing a sequence of two code steps that operate on the same file in different storage volumes. """ # -- Setup ---------------------------------------------------------------- # Create two separate storage volumes. vol1_dir = os.path.join(tmpdir, 'v1') os.makedirs(vol1_dir) vol2_dir = os.path.join(tmpdir, 'v2') volumes = VolumeManager(stores=[ FStore(basedir=vol1_dir, identifier=DEFAULT_STORE), FStore(basedir=vol2_dir, identifier='v2') ], files={'data.json': [DEFAULT_STORE]}) # Create data.json file in v1. with open(os.path.join(vol1_dir, 'data.json'), 'w') as f: json.dump({"value": 5}, f) # Use separate workers for each step. workers = WorkerPool(workers=[ Code(identifier='w1', volume=DEFAULT_STORE), Code(identifier='w2', volume='v2') ], managers={ 's1': 'w1', 's2': 'w2' }) # Create workflow steps. steps = [ CodeStep(identifier='s1', func=multi_by_x, arg='s1', varnames={'x': 'x1'}, inputs=['data.json']), CodeStep(identifier='s2', func=multi_by_x, arg='s2', varnames={'x': 'x2'}, inputs=['data.json']) ] # Initialize the workflow context arguments. arguments = {'filename': 'data.json', 'x1': 2, 'x2': 3} # -- Test workflow run ---------------------------------------------------- run_result = exec_workflow(steps=steps, workers=workers, volumes=volumes, result=RunResult(arguments=arguments)) assert len(run_result.steps) == 2 assert run_result.context == { 'filename': 'data.json', 'x1': 2, 'x2': 3, 's1': 10, 's2': 15 } assert os.path.isfile(os.path.join(vol2_dir, 'data.json')) # Error case. os.unlink(os.path.join(vol1_dir, 'data.json')) run_result = exec_workflow(steps=steps, workers=workers, volumes=volumes, result=RunResult(arguments=arguments)) assert len(run_result.steps) == 1 assert run_result.context == {'filename': 'data.json', 'x1': 2, 'x2': 3}
def test_successful_run_result(): """Test results of a successful workflow run.""" r = RunResult(arguments={'a': 1}) r.add(ExecResult(step=ContainerStep(identifier='s1', image='test1'), returncode=0, stdout=['o1'])) r.context['a'] = 2 r.add(ExecResult(step=ContainerStep(identifier='s2', image='test2'), returncode=0, stdout=['o2', 'o3'])) r.context['b'] = 1 assert r.exception is None assert r.returncode == 0 r.raise_for_status() assert r.stdout == ['o1', 'o2', 'o3'] assert r.stderr == [] assert r.get('a') == 2 assert r.get('b') == 1 result = r.steps[0] assert result.step.image == 'test1' assert result.stdout == ['o1'] result = r.steps[1] assert result.step.image == 'test2' assert result.stdout == ['o2', 'o3']
def test_error_run_result(): """Test results of an erroneous workflow run.""" r = RunResult(arguments={}) r.add(ExecResult(step=ContainerStep(identifier='s1', image='test'), returncode=0)) assert r.exception is None assert r.returncode == 0 r.add(ExecResult(step=ContainerStep(identifier='s2', image='test'), returncode=1, stderr=['e1', 'e2'], exception=ValueError())) # noqa: E501 with pytest.raises(ValueError): r.raise_for_status() assert r.exception is not None assert r.returncode == 1 assert r.stdout == [] assert r.stderr == ['e1', 'e2'] r = RunResult(arguments={}) r.add(ExecResult(step=ContainerStep(identifier='s3', image='test'), returncode=1, stderr=['e1', 'e2'])) with pytest.raises(err.FlowservError): r.raise_for_status()