def test_get_worker_instance(doc, cls): """Test creating worker instances from specification documents.""" factory = WorkerFactory(config=doc) worker = factory.get('test') assert isinstance(worker, cls) # Run twice to account for the cached object. assert factory.get('test') == worker
def test_load_config_from_json_file(): """Test loading worker factory configuration from a Json file.""" worker = WorkerFactory.load_json(JSON_FILE).get('test') assert worker.variables['a'] == 0 # Passing the file content directly to the object constructor should yield # the same result. worker = WorkerFactory(util.read_object(JSON_FILE)).get('test') assert worker.variables['a'] == 0
def exec_workflow( steps: List[WorkflowStep], workers: WorkerFactory, rundir: str, result: RunResult ) -> RunResult: """Execute steps in a serial workflow. The workflow arguments are part of the execution context that is contained in the :class:`flowserv.controller.serial.workflow.result.RunResult`. The result object is used to maintain the results for executed workflow steps. Executes workflow steps in sequence. Terminates early if the execution of a workflow step returns a non-zero value. Uses the given worker factory to create workers for steps that are of class :class:`flowserv.model.workflow.step.ContainerStep`. Parameters ---------- steps: list of flowserv.model.workflow.step.WorkflowStep Steps in the serial workflow that are executed in the given context. workers: flowserv.controller.worker.factory.WorkerFactory, default=None Factory for :class:`flowserv.model.workflow.step.ContainerStep` steps. rundir: str, default=None Working directory for all executed workflow steps. result: flowserv.controller.worker.result.RunResult Collector for results from executed workflow steps. Contains the context within which the workflow is executed. Returns ------- flowserv.controller.worker.result.RunResult """ for step in steps: if step.is_function_step(): r = exec_func(step=step, context=result.context, rundir=rundir) else: worker = workers.get(step.image) r = worker.exec( step=step, arguments=result.context, rundir=rundir ) result.add(r) # Terminate if the step execution was not successful. if r.returncode != 0: break return result
def run(self, arguments: Dict, workers: Optional[WorkerFactory] = None, rundir: Optional[str] = None) -> RunResult: """Execute workflow for the given set of input arguments. Executes workflow steps in sequence. Terminates early if the execution of a workflow step returns a non-zero value. Uses the given worker factory to create workers for steps that are of class :class:`flowserv.model.workflow.step.ContainerStep`. Collects results for all executed steps and returns them in the :class:`flowserv.controller.serial.workflow.result.RunResult`. Parameters ---------- arguments: dict User-provided arguments for the workflow run. workers: flowserv.controller.worker.factory.WorkerFactory, default=None Factory for :class:`flowserv.model.workflow.step.ContainerStep` steps. Uses the default worker for all container steps if None. rundir: str, default=None Working directory for all executed workflow steps. Uses the current working directory if None. Returns ------- flowserv.controller.worker.result.RunResult """ # Use current working directory if run directory is None. rundir = rundir if rundir else os.getcwd() # Use default worker for all container steps if no factory is given. workers = workers if workers else WorkerFactory() # Execute the workflow and return the run result that contains the # results of the executed steps. return exec_workflow(steps=self.steps, workers=workers, rundir=rundir, result=RunResult(arguments=arguments))
def __init__(self, steps: Optional[List[WorkflowStep]] = None, parameters: Optional[List[Parameter]] = None, workers: Optional[WorkerFactory] = None): """Initialize the object properties. All properties are optional and can be initialized via different methods of the workflow instance. Parameters ---------- steps: list of flowserv.model.workflow.step.WorkflowStep, default=None Optional sequence of steps in the serial workflow. parameters: list of flowserv.model.parameter.base.Parameter, default=None Optional list of workflow template parameters. workers: flowserv.controller.worker.factory.WorkerFactory Factory for :class:`flowserv.controller.worker.base.ContainerStep` objects that are used to execute individual :class:`flowserv.model.workflow.step.ContainerStep` instances in the workflow sequence. """ self.steps = steps if steps is not None else list() self.parameters = ParameterIndex(parameters=parameters) self.workers = workers if workers is not None else WorkerFactory()
def test_eval_arg_func(): """Test creating a worker factory from a dictionary that contains argument specifications that are callables. """ doc = { 'test': { 'image': 'test', 'worker': 'subprocess', 'args': { 'a': callme, 'b': 1 } }, 'dummy': { 'image': 'dummy', 'worker': { 'className': 'a', 'moduleName': 'b' } } } factory = WorkerFactory(config=doc, validate=True) assert factory.config['test']['args'] == {'a': 'called', 'b': 1} assert factory.config['dummy'].get('args') is None
def exec_workflow( self, run: RunObject, template: WorkflowTemplate, arguments: Dict, config: Optional[Dict] = None) -> Tuple[WorkflowState, str]: """Initiate the execution of a given workflow template for a set of argument values. This will start a new process that executes a serial workflow asynchronously. The serial workflow engine executes workflows on the local machine and therefore uses the file system to store temporary run files. The path to the run folder is returned as the second value in the result tuple. The first value in the result tuple is the state of the workflow after the process is stated. If the workflow is executed asynchronously the state will be RUNNING. Otherwise, the run state should be an inactive state. The set of arguments is not further validated. It is assumed that the validation has been performed by the calling code (e.g., the run service manager). The optional configuration object can be used to override the worker configuration that was provided at object instantiation. Expects a dictionary with an element `workers` that contains a mapping of container identifier to a container worker configuration object. If the state of the run handle is not pending, an error is raised. Parameters ---------- run: flowserv.model.base.RunObject Handle for the run that is being executed. template: flowserv.model.template.base.WorkflowTemplate Workflow template containing the parameterized specification and the parameter declarations. arguments: dict Dictionary of argument values for parameters in the template. config: dict, default=None Optional object to overwrite the worker configuration settings. Returns ------- flowserv.model.workflow.state.WorkflowState, string Raises ------ flowserv.error.DuplicateRunError """ # Get the run state. Ensure that the run is in pending state if not run.is_pending(): raise RuntimeError("invalid run state '{}'".format(run.state)) state = run.state() rundir = os.path.join(self.runsdir, run.run_id) # Get the worker configuration. worker_config = self.worker_config if not config else config.get( 'workers') # Get the source directory for static workflow files. sourcedir = self.fs.workflow_staticdir(run.workflow.workflow_id) # Get the list of workflow steps and the generated output files. steps, run_args, outputs = parser.parse_template(template=template, arguments=arguments) try: # Copy template files to the run folder. self.fs.copy_folder(key=sourcedir, dst=rundir) # Store any given file arguments in the run folder. for key, para in template.parameters.items(): if para.is_file() and key in arguments: file = arguments[key] file.source().store(os.path.join(rundir, file.target())) # Create top-level folder for all expected result files. util.create_directories(basedir=rundir, files=outputs) # Start a new process to run the workflow. Make sure to catch all # exceptions to set the run state properly state = state.start() if self.is_async: # Raise an error if the service manager is not given. if self.service is None: raise ValueError('service manager not given') # Run steps asynchronously in a separate process pool = Pool(processes=1) task_callback_function = partial(callback_function, lock=self.lock, tasks=self.tasks, service=self.service) with self.lock: self.tasks[run.run_id] = (pool, state) pool.apply_async(run_workflow, args=(run.run_id, rundir, state, outputs, steps, run_args, WorkerFactory(config=worker_config)), callback=task_callback_function) return state, rundir else: # Run steps synchronously and block the controller until done _, _, state_dict = run_workflow( run_id=run.run_id, rundir=rundir, state=state, output_files=outputs, steps=steps, arguments=run_args, workers=WorkerFactory(config=worker_config)) return serialize.deserialize_state(state_dict), rundir except Exception as ex: # Set the workflow runinto an ERROR state logging.error(ex) return state.error(messages=util.stacktrace(ex)), rundir
def test_load_config_from_yaml_file(): """Test loading worker factory configuration from a Yaml file.""" worker = WorkerFactory.load_yaml(YAML_FILE).get('test') assert worker.variables['a'] == 0
def test_load_config_from_file(): """Test loading worker factory configuration from a file.""" # Passing the file content directly to the object constructor should work # the same as using the static load method. worker = WorkerFactory(util.read_object(JSON_FILE)).get('test') assert worker.variables['a'] == 0
def test_invalid_config(config): """Test errors when creating factory from invalid worker specifications.""" # No error when validate is False WorkerFactory(config=config) with pytest.raises(ValidationError): WorkerFactory(config=config, validate=True)
def test_init_empty(validate): """Test creating a worker factory from an empty dictionary.""" factory = WorkerFactory(config=dict(), validate=validate) assert len(factory.config) == 0