def test_parse_workflow_spec_error():
    """Test error for unknown workflow step when parsing a serial workflow
    specification.
    """
    doc = {'steps': [{'name': 'S1', 'action': {'type': 'undefined'}}]}
    template = WorkflowTemplate(workflow_spec=doc, parameters=ParameterIndex())
    with pytest.raises(ValueError):
        parser.parse_template(template=template, arguments=dict())
Beispiel #2
0
    def create_workflow(self, run, template, arguments):
        """Create a new instance of a workflow from the given workflow
        template and user-provided arguments.

        Parameters
        ----------
        run: flowserv.model.base.RunObject
            Handle for the run that is being executed.
        template: flowserv.model.template.base.WorkflowTemplate
            Workflow template containing the parameterized specification and
            the parameter declarations.
        arguments: dict
            Dictionary of argument values for parameters in the template.

        Returns
        -------
        flowserv.model.workflow.remote.RemoteWorkflowObject
        """
        # Create a serial workfow to have a workflow handle.
        _, _, output_file = parser.parse_template(template=template,
                                                  arguments=arguments)
        self.state = StatePending()
        self._pollcount = 0
        return RemoteWorkflowObject(workflow_id=run.run_id,
                                    state=self.state,
                                    output_files=output_file)
def test_parse_code_step():
    """Test parsing specification for a workflow with a code step."""
    doc = {
        'steps': [{
            'name': 'code_step',
            'action': {
                'func':
                'flowserv.tests.worker.a_plus_b',
                'arg':
                'z',
                'variables': [{
                    'arg': 'a',
                    'var': 'val1'
                }, {
                    'arg': 'b',
                    'var': 'val2'
                }]
            }
        }]
    }
    template = WorkflowTemplate(workflow_spec=doc, parameters=ParameterIndex())
    steps, _, _ = parser.parse_template(template=template, arguments=dict())
    assert len(steps) == 1
    step = steps[0]
    assert step.func(2, 3) == 5
    assert step.arg == 'z'
    assert step.varnames == {'a': 'val1', 'b': 'val2'}
def test_parse_top_tagger_template():
    """Test parsing the Top-Tagger template that contains parameter references
    as workflow steps.
    """
    template = WorkflowTemplate.from_dict(
        doc=util.read_object(TEMPLATE_TOPTAGGER))
    doc = {'environment': 'test', 'commands': ['python analyze']}
    args = {'tagger': ActorValue(spec=doc)}
    steps, _, _ = parser.parse_template(template=template, arguments=args)
    assert len(steps) == 2
    step = steps[0]
    assert step.image == 'test'
    assert step.commands == ['python analyze']
def test_parse_hello_world_notebook_template():
    """Extract commands and output files from the 'Hello world' template
    that included a notebook step.
    """
    template = WorkflowTemplate.from_dict(
        doc=util.read_object(TEMPLATE_NOTEBOOK))
    steps, args, output_files = parser.parse_template(
        template=template, arguments={'greeting': 'Hey'})
    assert len(steps) == 2
    step = steps[0]
    assert step.notebook == 'notebooks/HelloWorld.ipynb'
    assert step.inputs == ['data/names.txt', 'notebooks/HelloWorld.ipynb']
    assert step.outputs == ['results/greetings.txt']
    assert output_files == ['results/greetings.txt', 'results/analytics.json']
    assert args == {
        'inputfile': 'data/names.txt',
        'outputfile': 'results/greetings.txt',
        'greeting': 'Hey'
    }
def test_parse_hello_world_template():
    """Extract commands and output files from the 'Hello world' template."""
    template = WorkflowTemplate.from_dict(
        doc=util.read_object(TEMPLATE_HELLOWORLD))
    steps, args, output_files = parser.parse_template(template=template,
                                                      arguments={
                                                          'names': 'names.txt',
                                                          'sleeptime': 10
                                                      })
    assert len(steps) == 1
    step = steps[0]
    assert step.image == 'python:3.7'
    assert len(step.commands) == 2
    assert output_files == ['results/greetings.txt', 'results/analytics.json']
    assert args == {
        'inputfile': 'names.txt',
        'outputfile': 'results/greetings.txt',
        'sleeptime': 10,
        'greeting': 'Hello'
    }
def test_parse_hello_world_template():
    """Extract commands and output files from the 'Hello world' template."""
    template = WorkflowTemplate.from_dict(
        doc=util.read_object(TEMPLATE_HELLOWORLD))
    steps, args, output_files = parser.parse_template(template=template,
                                                      arguments={
                                                          'names': 'names.txt',
                                                          'sleeptime': 10
                                                      })
    assert len(steps) == 1
    step = steps[0]
    assert step.image == 'python:2.7'
    assert len(step.commands) == 1
    assert step.commands[
        0] == '${python} "${helloworld}" --inputfile "${inputfile}" --outputfile "${outputfile}" --sleeptime ${sleeptime}'  # noqa: E501
    assert output_files == ['results/greetings.txt']
    assert args == {
        'helloworld': 'code/helloworld.py',
        'inputfile': 'names.txt',
        'outputfile': 'results/greetings.txt',
        'sleeptime': '10'
    }  # noqa: E501
Beispiel #8
0
    def exec_workflow(
            self,
            run: RunObject,
            template: WorkflowTemplate,
            arguments: Dict,
            config: Optional[Dict] = None) -> Tuple[WorkflowState, str]:
        """Initiate the execution of a given workflow template for a set of
        argument values. This will start a new process that executes a serial
        workflow asynchronously.

        The serial workflow engine executes workflows on the local machine and
        therefore uses the file system to store temporary run files. The path
        to the run folder is returned as the second value in the result tuple.
        The first value in the result tuple is the state of the workflow after
        the process is stated. If the workflow is executed asynchronously the
        state will be RUNNING. Otherwise, the run state should be an inactive
        state.

        The set of arguments is not further validated. It is assumed that the
        validation has been performed by the calling code (e.g., the run
        service manager).

        The optional configuration object can be used to override the worker
        configuration that was provided at object instantiation. Expects a
        dictionary with an element `workers` that contains a mapping of container
        identifier to a container worker configuration object.

        If the state of the run handle is not pending, an error is raised.

        Parameters
        ----------
        run: flowserv.model.base.RunObject
            Handle for the run that is being executed.
        template: flowserv.model.template.base.WorkflowTemplate
            Workflow template containing the parameterized specification and
            the parameter declarations.
        arguments: dict
            Dictionary of argument values for parameters in the template.
        config: dict, default=None
            Optional object to overwrite the worker configuration settings.

        Returns
        -------
        flowserv.model.workflow.state.WorkflowState, string

        Raises
        ------
        flowserv.error.DuplicateRunError
        """
        # Get the run state. Ensure that the run is in pending state
        if not run.is_pending():
            raise RuntimeError("invalid run state '{}'".format(run.state))
        state = run.state()
        rundir = os.path.join(self.runsdir, run.run_id)
        # Get the worker configuration.
        worker_config = self.worker_config if not config else config.get(
            'workers')
        # Get the source directory for static workflow files.
        sourcedir = self.fs.workflow_staticdir(run.workflow.workflow_id)
        # Get the list of workflow steps and the generated output files.
        steps, run_args, outputs = parser.parse_template(template=template,
                                                         arguments=arguments)
        try:
            # Copy template files to the run folder.
            self.fs.copy_folder(key=sourcedir, dst=rundir)
            # Store any given file arguments in the run folder.
            for key, para in template.parameters.items():
                if para.is_file() and key in arguments:
                    file = arguments[key]
                    file.source().store(os.path.join(rundir, file.target()))
            # Create top-level folder for all expected result files.
            util.create_directories(basedir=rundir, files=outputs)
            # Start a new process to run the workflow. Make sure to catch all
            # exceptions to set the run state properly
            state = state.start()
            if self.is_async:
                # Raise an error if the service manager is not given.
                if self.service is None:
                    raise ValueError('service manager not given')
                # Run steps asynchronously in a separate process
                pool = Pool(processes=1)
                task_callback_function = partial(callback_function,
                                                 lock=self.lock,
                                                 tasks=self.tasks,
                                                 service=self.service)
                with self.lock:
                    self.tasks[run.run_id] = (pool, state)
                pool.apply_async(run_workflow,
                                 args=(run.run_id, rundir, state, outputs,
                                       steps, run_args,
                                       WorkerFactory(config=worker_config)),
                                 callback=task_callback_function)
                return state, rundir
            else:
                # Run steps synchronously and block the controller until done
                _, _, state_dict = run_workflow(
                    run_id=run.run_id,
                    rundir=rundir,
                    state=state,
                    output_files=outputs,
                    steps=steps,
                    arguments=run_args,
                    workers=WorkerFactory(config=worker_config))
                return serialize.deserialize_state(state_dict), rundir
        except Exception as ex:
            # Set the workflow runinto an ERROR state
            logging.error(ex)
            return state.error(messages=util.stacktrace(ex)), rundir
Beispiel #9
0
    def exec_workflow(
            self,
            run: RunObject,
            template: WorkflowTemplate,
            arguments: Dict,
            staticfs: StorageVolume,
            config: Optional[Dict] = None
    ) -> Tuple[WorkflowState, StorageVolume]:
        """Initiate the execution of a given workflow template for a set of
        argument values. This will start a new process that executes a serial
        workflow asynchronously.

        The serial workflow engine executes workflows on the local machine and
        therefore uses the file system to store temporary run files. The path
        to the run folder is returned as the second value in the result tuple.
        The first value in the result tuple is the state of the workflow after
        the process is stated. If the workflow is executed asynchronously the
        state will be RUNNING. Otherwise, the run state should be an inactive
        state.

        The set of arguments is not further validated. It is assumed that the
        validation has been performed by the calling code (e.g., the run
        service manager).

        The optional configuration object can be used to override the worker
        configuration that was provided at object instantiation. Expects a
        dictionary with an element `workers` that contains a mapping of container
        identifier to a container worker configuration object.

        If the state of the run handle is not pending, an error is raised.

        Parameters
        ----------
        run: flowserv.model.base.RunObject
            Handle for the run that is being executed.
        template: flowserv.model.template.base.WorkflowTemplate
            Workflow template containing the parameterized specification and
            the parameter declarations.
        arguments: dict
            Dictionary of argument values for parameters in the template.
        staticfs: flowserv.volume.base.StorageVolume
            Storage volume that contains the static files from the workflow
            template.
        config: dict, default=None
            Optional object to overwrite the worker configuration settings.

        Returns
        -------
        flowserv.model.workflow.state.WorkflowState, flowserv.volume.base.StorageVolume
        """
        # Get the run state. Raise an error if the run is not in pending state.
        if not run.is_pending():
            raise RuntimeError("invalid run state '{}'".format(run.state))
        state = run.state()
        # Create configuration dictionary that merges the engine global
        # configuration with the workflow-specific one.
        run_config = self.config if self.config is not None else dict()
        if config:
            run_config.update(config)
        # Get the list of workflow steps, run arguments, and the list of output
        # files that the workflow is expected to generate.
        steps, run_args, outputs = parser.parse_template(template=template,
                                                         arguments=arguments)
        # Create and prepare storage volume for run files.
        runstore = self.fs.get_store_for_folder(key=util.join(
            self.runsdir, run.run_id),
                                                identifier=DEFAULT_STORE)
        try:
            # Copy template files to the run folder.
            files = staticfs.copy(src=None, store=runstore)
            # Store any given file arguments and additional input files
            # that are required by actor parameters into the run folder.
            for key, para in template.parameters.items():
                if para.is_file() and key in arguments:
                    for key in arguments[key].copy(target=runstore):
                        files.append(key)
                elif para.is_actor() and key in arguments:
                    input_files = arguments[key].files
                    for f in input_files if input_files else []:
                        for key in f.copy(target=runstore):
                            files.append(key)
            # Create factory objects for storage volumes.
            volumes = volume_manager(specs=run_config.get('volumes', []),
                                     runstore=runstore,
                                     runfiles=files)
            # Create factory for workers. Include mapping of workflow steps to
            # the worker that are responsible for their execution.
            workers = WorkerPool(workers=run_config.get('workers', []),
                                 managers={
                                     doc['step']: doc['worker']
                                     for doc in run_config.get('workflow', [])
                                 })
            # Start a new process to run the workflow. Make sure to catch all
            # exceptions to set the run state properly.
            state = state.start()
            if self.is_async:
                # Run steps asynchronously in a separate process
                pool = Pool(processes=1)
                task_callback_function = partial(callback_function,
                                                 lock=self.lock,
                                                 tasks=self.tasks,
                                                 service=self.service)
                with self.lock:
                    self.tasks[run.run_id] = (pool, state)
                pool.apply_async(run_workflow,
                                 args=(run.run_id, state, outputs, steps,
                                       run_args, volumes, workers),
                                 callback=task_callback_function)
                return state, runstore
            else:
                # Run steps synchronously and block the controller until done
                _, _, state_dict = run_workflow(run_id=run.run_id,
                                                state=state,
                                                output_files=outputs,
                                                steps=steps,
                                                arguments=run_args,
                                                volumes=volumes,
                                                workers=workers)
                return serialize.deserialize_state(state_dict), runstore
        except Exception as ex:
            # Set the workflow run into an ERROR state
            logging.error(ex, exc_info=True)
            return state.error(messages=util.stacktrace(ex)), runstore