def test_step_type(): """Test methods that distinguish different step types.""" # CodeStep step = CodeStep(identifier='test', func=my_add, arg='z', inputs=['a', 'b'], outputs=['x', 'y']) assert step.identifier == 'test' assert step.arg == 'z' assert step.inputs == ['a', 'b'] assert step.outputs == ['x', 'y'] assert step.is_code_step() assert not step.is_container_step() # ContainerStep step = ContainerStep(identifier='test', image='test', inputs=['a', 'b'], outputs=['x', 'y']) assert step.identifier == 'test' assert step.image == 'test' assert step.inputs == ['a', 'b'] assert step.outputs == ['x', 'y'] assert step.is_container_step() assert not step.is_code_step() # Empty inputs. step = CodeStep(identifier='test', func=my_add) assert step.inputs == [] assert step.outputs == [] step = ContainerStep(identifier='test', image='test') assert step.inputs == [] assert step.outputs == [] # Invalid step type. with pytest.raises(ValueError): WorkflowStep(identifier='test', step_type=-1)
def exec(self, step: CodeStep, context: Dict, store: FileSystemStorage) -> ExecResult: """Execute a workflow step of type :class:`flowserv.model.workflow.step.CodeStep` in a given context. Captures output to STDOUT and STDERR and includes them in the returned execution result. Note that the code worker expects a file system storage volume. Parameters ---------- step: flowserv.model.workflow.step.CodeStep Code step in a serial workflow. context: dict Context for the executed code. store: flowserv.volume.fs.FileSystemStorage Storage volume that contains the workflow run files. Returns ------- flowserv.controller.serial.workflow.result.ExecResult """ result = ExecResult(step=step) out = sys.stdout err = sys.stderr sys.stdout = OutputStream(stream=result.stdout) sys.stderr = OutputStream(stream=result.stderr) # Change working directory temporarily. cwd = os.getcwd() os.chdir(store.basedir) try: step.exec(context=context) except Exception as ex: logging.error(ex, exc_info=True) strace = '\n'.join(util.stacktrace(ex)) logging.debug(strace) result.stderr.append(strace) result.exception = ex result.returncode = 1 finally: # Make sure to reverse redirection of output streams sys.stdout = out sys.stderr = err # Reset working directory. os.chdir(cwd) return result
def Step( identifier: str, action: Dict, inputs: Optional[List[str]] = None, outputs: Optional[List[str]] = None ) -> WorkflowStep: """Create workflow step instance from dictionary serialization. The type of the generated workflow step will depend on the elements in the given dictionary serialization. Raises a ValueError if the given dictionary is not a valid serialization for a workflow step. Parameters ---------- identifier: string Unique step name (identifier). action: Dict Dictionary serialization for the workflow step. inputs: list of string, default=None List of files that are required by the workflow step as inputs. outputs: list of string, default=None List of files that are generated by the workflow step as outputs. Returns ------- flowserv.model.workflow.step.WorkflowStep """ if 'environment' in action and 'commands' in action: # If the dictionary contains `environment` and `commands` the result # is a container step. return ContainerStep( identifier=identifier, image=action.get('environment'), commands=action.get('commands', []), inputs=inputs, outputs=outputs ) elif 'func' in action: return CodeStep( identifier=identifier, func=util.import_obj(action['func']), arg=action.get('arg'), varnames=parse_varnames(action=action), inputs=inputs, outputs=outputs ) elif 'notebook' in action: return NotebookStep( identifier=identifier, notebook=action['notebook'], output=action.get('output'), params=action.get('params'), requirements=action.get('requirements'), varnames=parse_varnames(action=action), inputs=inputs, outputs=outputs ) raise ValueError(f"invalid action specification '{action}'")
def test_error_exec(tmpdir): """Test error when running a code step.""" step = CodeStep(identifier='test', func=write_and_add, arg='a') r = CodeWorker().exec(step=step, context={'a': -1}, store=FileSystemStorage(tmpdir)) assert r.returncode == 1 assert r.stdout == ['-1 written', '\n'] assert r.stderr != [] assert r.exception is not None
def test_successful_exec(tmpdir): """Test successfully running a code step.""" step = CodeStep(identifier='test', func=write_and_add, arg='a') r = CodeWorker().exec(step=step, context={'a': 1}, store=FileSystemStorage(tmpdir)) assert r.returncode == 0 assert r.stdout == ['1 written', '\n'] assert r.stderr == [] assert r.exception is None # Read the written output file. with open(os.path.join(tmpdir, 'out.txt'), 'r') as f: for line in f: line = line.strip() assert line == '1'
def add_code_step(self, identifier: str, func: Callable, arg: Optional[str] = None, varnames: Optional[Dict] = None, inputs: Optional[List[str]] = None, outputs: Optional[List[str]] = None) -> SerialWorkflow: """Append a code step to the serial workflow. Parameters ---------- identifier: str Unique workflow step identifier. func: callable Python function that is executed by the workflow step. arg: string, default=None Name of the variable under which the function result is stored in the workflow arguments. If None, the function result is discarded. varnames: dict, default=None Mapping of function argument names to names of workflow arguments. This mapping is used when generating the arguments for the executed function. By default it is assumed that the names of arguments for the given function correspond to the names in the argument dictionary for the workflow. This mapping provides the option to map names in the function signature that do not occur in the arguments dictionary to argument names that are in the dictionary. inputs: list of string, default=None List of files that are required by the workflow step as inputs. outputs: list of string, default=None List of files that are generated by the workflow step as outputs. Returns ------- flowserv.controller.serial.workflow.base.SerialWorkflow """ step = CodeStep(identifier=identifier, func=func, arg=arg, varnames=varnames, inputs=inputs, outputs=outputs) self.steps.append(step) return self
def test_run_with_two_steps(tmpdir): """Test executing a sequence of two code steps that operate on the same file in different storage volumes. """ # -- Setup ---------------------------------------------------------------- # Create two separate storage volumes. vol1_dir = os.path.join(tmpdir, 'v1') os.makedirs(vol1_dir) vol2_dir = os.path.join(tmpdir, 'v2') volumes = VolumeManager(stores=[ FStore(basedir=vol1_dir, identifier=DEFAULT_STORE), FStore(basedir=vol2_dir, identifier='v2') ], files={'data.json': [DEFAULT_STORE]}) # Create data.json file in v1. with open(os.path.join(vol1_dir, 'data.json'), 'w') as f: json.dump({"value": 5}, f) # Use separate workers for each step. workers = WorkerPool(workers=[ Code(identifier='w1', volume=DEFAULT_STORE), Code(identifier='w2', volume='v2') ], managers={ 's1': 'w1', 's2': 'w2' }) # Create workflow steps. steps = [ CodeStep(identifier='s1', func=multi_by_x, arg='s1', varnames={'x': 'x1'}, inputs=['data.json']), CodeStep(identifier='s2', func=multi_by_x, arg='s2', varnames={'x': 'x2'}, inputs=['data.json']) ] # Initialize the workflow context arguments. arguments = {'filename': 'data.json', 'x1': 2, 'x2': 3} # -- Test workflow run ---------------------------------------------------- run_result = exec_workflow(steps=steps, workers=workers, volumes=volumes, result=RunResult(arguments=arguments)) assert len(run_result.steps) == 2 assert run_result.context == { 'filename': 'data.json', 'x1': 2, 'x2': 3, 's1': 10, 's2': 15 } assert os.path.isfile(os.path.join(vol2_dir, 'data.json')) # Error case. os.unlink(os.path.join(vol1_dir, 'data.json')) run_result = exec_workflow(steps=steps, workers=workers, volumes=volumes, result=RunResult(arguments=arguments)) assert len(run_result.steps) == 1 assert run_result.context == {'filename': 'data.json', 'x1': 2, 'x2': 3}
from flowserv.controller.worker.code import CodeWorker from flowserv.controller.worker.docker import DockerWorker from flowserv.controller.worker.manager import WorkerPool, Code, Docker, Notebook, Subprocess from flowserv.controller.worker.notebook import NotebookEngine from flowserv.controller.worker.subprocess import SubprocessWorker from flowserv.model.workflow.step import CodeStep, ContainerStep, NotebookStep import flowserv.error as err @pytest.mark.parametrize( 'step,cls', [(ContainerStep(identifier='test', image='test'), SubprocessWorker), (ContainerStep(identifier='test', image='test'), SubprocessWorker), (CodeStep(identifier='test', func=lambda x: x), CodeWorker), (NotebookStep(identifier='test', notebook='helloworld.ipynb'), NotebookEngine)]) def test_get_default_worker(step, cls): """Test getting a default worker for a workflow step that has no manager explicitly assigned to it. """ factory = WorkerPool(workers=[]) assert isinstance(factory.get_default_worker(step), cls) def test_get_worker_error(): """Test error when accessing worker with unknown identifier.""" step = ContainerStep(identifier='test', image='test') factory = WorkerPool(workers=[], managers={'test': 'test'}) with pytest.raises(err.UnknownObjectError):
def test_exec_func_step(): """Test executing a Python function as a step in a serial workflow.""" args = {'x': 1, 'y': 2} step = CodeStep(identifier='test', func=my_add, arg='z') step.exec(context=args) assert args == {'x': 1, 'y': 2, 'z': 3} # Test renaming arguments. step = CodeStep(identifier='test', func=my_add, varnames={'x': 'z'}, arg='x') step.exec(context=args) assert args == {'x': 5, 'y': 2, 'z': 3} # Execute function but ignore output. step = CodeStep(identifier='test', func=my_add) step.exec(context=args) assert args == {'x': 5, 'y': 2, 'z': 3}