def test_parse_code_step():
    """Test parsing specification for a workflow with a code step."""
    doc = {
        'steps': [{
            'name': 'code_step',
            'action': {
                'func':
                'flowserv.tests.worker.a_plus_b',
                'arg':
                'z',
                'variables': [{
                    'arg': 'a',
                    'var': 'val1'
                }, {
                    'arg': 'b',
                    'var': 'val2'
                }]
            }
        }]
    }
    template = WorkflowTemplate(workflow_spec=doc, parameters=ParameterIndex())
    steps, _, _ = parser.parse_template(template=template, arguments=dict())
    assert len(steps) == 1
    step = steps[0]
    assert step.func(2, 3) == 5
    assert step.arg == 'z'
    assert step.varnames == {'a': 'val1', 'b': 'val2'}
def test_parse_workflow_spec_error():
    """Test error for unknown workflow step when parsing a serial workflow
    specification.
    """
    doc = {'steps': [{'name': 'S1', 'action': {'type': 'undefined'}}]}
    template = WorkflowTemplate(workflow_spec=doc, parameters=ParameterIndex())
    with pytest.raises(ValueError):
        parser.parse_template(template=template, arguments=dict())
Exemple #3
0
    def template(self):
        """Get workflow template instance for the workflow specification that
        is included in the manifest.

        Returns
        -------
        flowserv.model.template.base.Workflowtemplate
        """
        return WorkflowTemplate.from_dict(doc=self.workflow_spec,
                                          validate=True)
Exemple #4
0
def test_validate_arguments():
    """Test validating a given set of arguments against the parameters in a
    workflow template.
    """
    parameters = ParameterIndex.from_dict([
        String(name='A', label='P1', index=0, required=True).to_dict(),
        String(name='B', label='P2', index=1, default='X').to_dict()
    ])
    template = WorkflowTemplate(workflow_spec=dict(), parameters=parameters)
    template.validate_arguments({'A': 1, 'B': 0})
    template.validate_arguments({'A': 1})
    with pytest.raises(err.MissingArgumentError):
        template.validate_arguments({'B': 1})
Exemple #5
0
def run_postproc_workflow(postproc_spec: Dict, workflow: WorkflowObject,
                          ranking: List, runs: List, run_manager: RunManager,
                          backend: WorkflowController):
    """Run post-processing workflow for a workflow template."""
    workflow_spec = postproc_spec.get('workflow')
    pp_inputs = postproc_spec.get('inputs', {})
    pp_files = pp_inputs.get('files', [])
    # Prepare temporary directory with result files for all
    # runs in the ranking. The created directory is the only
    # run argument
    strace = None
    try:
        datadir = postutil.prepare_postproc_data(input_files=pp_files,
                                                 ranking=ranking,
                                                 run_manager=run_manager)
        dst = pp_inputs.get('runs', postbase.RUNS_DIR)
        run_args = {
            postbase.PARA_RUNS: InputFile(source=FSFile(datadir), target=dst)
        }
        arg_list = [
            serialize_arg(postbase.PARA_RUNS, serialize_fh(datadir, dst))
        ]
    except Exception as ex:
        logging.error(ex)
        strace = util.stacktrace(ex)
        run_args = dict()
        arg_list = []
    # Create a new run for the workflow. The identifier for the run group is
    # None.
    run = run_manager.create_run(workflow=workflow,
                                 arguments=arg_list,
                                 runs=runs)
    if strace is not None:
        # If there were data preparation errors set the created run into an
        # error state and return.
        run_manager.update_run(run_id=run.run_id,
                               state=run.state().error(messages=strace))
    else:
        # Execute the post-processing workflow asynchronously if
        # there were no data preparation errors.
        postproc_state, rundir = backend.exec_workflow(
            run=run,
            template=WorkflowTemplate(workflow_spec=workflow_spec,
                                      parameters=postbase.PARAMETERS),
            arguments=run_args,
            config=workflow.engine_config)
        # Update the post-processing workflow run state if it is
        # no longer pending for execution.
        if not postproc_state.is_pending():
            run_manager.update_run(run_id=run.run_id,
                                   state=postproc_state,
                                   rundir=rundir)
        # Remove the temporary input folder
        shutil.rmtree(datadir)
def test_parse_top_tagger_template():
    """Test parsing the Top-Tagger template that contains parameter references
    as workflow steps.
    """
    template = WorkflowTemplate.from_dict(
        doc=util.read_object(TEMPLATE_TOPTAGGER))
    doc = {'environment': 'test', 'commands': ['python analyze']}
    args = {'tagger': ActorValue(spec=doc)}
    steps, _, _ = parser.parse_template(template=template, arguments=args)
    assert len(steps) == 2
    step = steps[0]
    assert step.image == 'test'
    assert step.commands == ['python analyze']
Exemple #7
0
def run_workflow(spec):
    """Create a new workflow run for the given specification."""
    doc = util.read_object(filename=spec)
    rundir = os.path.dirname(spec)
    if not rundir:
        rundir = '.'
    run = RunHandle(
        identifier='0000',
        workflow_id='0000',
        group_id='0000',
        state=StatePending(),
        arguments=dict(),
        rundir=rundir
    )
    template = WorkflowTemplate(workflow_spec=doc, sourcedir=rundir)
    wf = REANAClient().create_workflow(run, template, dict())
    click.echo('created workflow {} ({})'.format(wf.identifier, wf.state))
def test_parse_hello_world_notebook_template():
    """Extract commands and output files from the 'Hello world' template
    that included a notebook step.
    """
    template = WorkflowTemplate.from_dict(
        doc=util.read_object(TEMPLATE_NOTEBOOK))
    steps, args, output_files = parser.parse_template(
        template=template, arguments={'greeting': 'Hey'})
    assert len(steps) == 2
    step = steps[0]
    assert step.notebook == 'notebooks/HelloWorld.ipynb'
    assert step.inputs == ['data/names.txt', 'notebooks/HelloWorld.ipynb']
    assert step.outputs == ['results/greetings.txt']
    assert output_files == ['results/greetings.txt', 'results/analytics.json']
    assert args == {
        'inputfile': 'data/names.txt',
        'outputfile': 'results/greetings.txt',
        'greeting': 'Hey'
    }
def test_parse_hello_world_template():
    """Extract commands and output files from the 'Hello world' template."""
    template = WorkflowTemplate.from_dict(
        doc=util.read_object(TEMPLATE_HELLOWORLD))
    steps, args, output_files = parser.parse_template(template=template,
                                                      arguments={
                                                          'names': 'names.txt',
                                                          'sleeptime': 10
                                                      })
    assert len(steps) == 1
    step = steps[0]
    assert step.image == 'python:3.7'
    assert len(step.commands) == 2
    assert output_files == ['results/greetings.txt', 'results/analytics.json']
    assert args == {
        'inputfile': 'names.txt',
        'outputfile': 'results/greetings.txt',
        'sleeptime': 10,
        'greeting': 'Hello'
    }
def test_parse_hello_world_template():
    """Extract commands and output files from the 'Hello world' template."""
    template = WorkflowTemplate.from_dict(
        doc=util.read_object(TEMPLATE_HELLOWORLD))
    steps, args, output_files = parser.parse_template(template=template,
                                                      arguments={
                                                          'names': 'names.txt',
                                                          'sleeptime': 10
                                                      })
    assert len(steps) == 1
    step = steps[0]
    assert step.image == 'python:2.7'
    assert len(step.commands) == 1
    assert step.commands[
        0] == '${python} "${helloworld}" --inputfile "${inputfile}" --outputfile "${outputfile}" --sleeptime ${sleeptime}'  # noqa: E501
    assert output_files == ['results/greetings.txt']
    assert args == {
        'helloworld': 'code/helloworld.py',
        'inputfile': 'names.txt',
        'outputfile': 'results/greetings.txt',
        'sleeptime': '10'
    }  # noqa: E501
Exemple #11
0
    def modify_template(self, template, parameters):
        """Modify a the workflow specification in a given template by adding
        the a set of parameters. If a parameter in the added parameters set
        already exists in the template the name, index, default value, the
        value list and the required flag of the existing parameter are replaced
        by the values of the given parameter.

        Returns a modified workflow template. Raises an error if the parameter
        identifier in the resulting template are no longer unique.

        Note that this currently will only work for serial REANA workflow
        specifications. Will raise an InvalidTemplateError otherwise.

        Parameters
        ----------
        template: flowserv.model.template.base.WorkflowTemplate
            Workflow template handle.
        parameters: dict(flowserv.model.parameter.base.TemplateParameter)
            Additional template parameters

        Returns
        -------
        flowserv.model.template.base.WorkflowTemplate

        Raises
        ------
        flowserv.core.error.InvalidTemplateError
        """
        workflow_spec = template.workflow_spec
        # Raise an invalid template error if the workflow specification is not
        # a REANA serial workflow.
        workflow_type = workflow_spec.get('workflow', {}).get('type', 'null')
        if workflow_type != 'serial':
            msg = "invalid workflow type '{}'".format(workflow_type)
            raise err.InvalidTemplateError(msg)
        # Get a copy of the files and parameters sections of the inputs
        # declaration
        inputs = workflow_spec.get('inputs', dict())
        in_files = list(inputs.get('files', list()))
        in_params = dict(inputs.get('parameters', dict()))
        # Ensure that the identifier for all parameters are unique
        para_merge = dict(template.parameters)
        for para in parameters.values():
            if para.identifier in para_merge:
                para = para_merge[para.identifier].merge(para)
            para_merge[para.identifier] = para
            # Depending on whether the type of the parameter is a file or not we
            # add a parameter reference to the respective input section
            if para.is_file():
                in_files.append(tp.VARIABLE(para.identifier))
            else:
                if para.identifier not in in_params:
                    in_params[para.identifier] = tp.VARIABLE(para.identifier)
        spec = dict(workflow_spec)
        spec['inputs'] = {'files': in_files, 'parameters': in_params}
        return WorkflowTemplate(workflow_spec=spec,
                                sourcedir=template.sourcedir,
                                parameters=para_merge,
                                modules=template.modules,
                                postproc_spec=template.postproc_spec,
                                result_schema=template.result_schema)
Exemple #12
0
def run_postproc_workflow(workflow: WorkflowObject, ranking: List[RunResult],
                          keys: List[str], run_manager: RunManager,
                          tmpstore: StorageVolume, staticfs: StorageVolume,
                          backend: WorkflowController):
    """Run post-processing workflow for a workflow template.

    Parameters
    ----------
    workflow: flowserv.model.base.WorkflowObject
        Handle for the workflow that triggered the post-processing workflow run.
    ranking: list(flowserv.model.ranking.RunResult)
        List of runs in the current result ranking.
    keys: list of string
        Sorted list of run identifier for runs in the ranking.
    run_manager: flowserv.model.run.RunManager
        Manager for workflow runs
    tmpstore: flowserv.volume.base.StorageVolume
        Temporary storage volume where the created post-processing files are
        stored. This volume will be erased after the workflow is started.
    staticfs: flowserv.volume.base.StorageVolume
        Storage volume that contains the static files from the workflow
        template.
    backend: flowserv.controller.base.WorkflowController
        Backend that is used to execute the post-processing workflow.
    """
    # Get workflow specification and the list of input files from the
    # post-processing statement.
    postproc_spec = workflow.postproc_spec
    workflow_spec = postproc_spec.get('workflow')
    pp_inputs = postproc_spec.get('inputs', {})
    pp_files = pp_inputs.get('files', [])
    # Prepare temporary directory with result files for all
    # runs in the ranking. The created directory is the only
    # run argument
    strace = None
    try:
        prepare_postproc_data(input_files=pp_files,
                              ranking=ranking,
                              run_manager=run_manager,
                              store=tmpstore)
        dst = pp_inputs.get('runs', RUNS_DIR)
        run_args = {PARA_RUNS: InputDirectory(store=tmpstore, target=RUNS_DIR)}
        arg_list = [serialize_arg(PARA_RUNS, dst)]
    except Exception as ex:
        logging.error(ex, exc_info=True)
        strace = util.stacktrace(ex)
        run_args = dict()
        arg_list = []
    # Create a new run for the workflow. The identifier for the run group is
    # None.
    run = run_manager.create_run(workflow=workflow,
                                 arguments=arg_list,
                                 runs=keys)
    if strace is not None:
        # If there were data preparation errors set the created run into an
        # error state and return.
        run_manager.update_run(run_id=run.run_id,
                               state=run.state().error(messages=strace))
    else:
        # Execute the post-processing workflow asynchronously if
        # there were no data preparation errors.
        try:
            postproc_state, runstore = backend.exec_workflow(
                run=run,
                template=WorkflowTemplate(workflow_spec=workflow_spec,
                                          parameters=PARAMETERS),
                arguments=run_args,
                staticfs=staticfs,
                config=workflow.engine_config)
        except Exception as ex:
            # Make sure to catch exceptions and set the run into an error state.
            postproc_state = run.state().error(messages=util.stacktrace(ex))
            runstore = None
        # Update the post-processing workflow run state if it is
        # no longer pending for execution.
        if not postproc_state.is_pending():
            run_manager.update_run(run_id=run.run_id,
                                   state=postproc_state,
                                   runstore=runstore)
        # Erase the temporary storage volume.
        tmpstore.erase()
Exemple #13
0
def test_template_serialization():
    """Test creating template instances from serializations."""
    # Minimal template specification.
    doc = {'workflow': dict()}
    doc = WorkflowTemplate.from_dict(doc).to_dict()
    template = WorkflowTemplate.from_dict(doc)
    assert template.workflow_spec == dict()
    assert template.parameters == ParameterIndex()
    # Maximal template specification.
    doc = {
        'workflow': {
            'inputs': [tp.VARIABLE('A'), 'B', 'C']
        },
        'parameters': [String(name='A', label='P1', index=0).to_dict()],
        'parameterGroups': [{
            'name': '0',
            'title': 'G1',
            'index': 0
        }, {
            'name': '1',
            'title': 'G2',
            'index': 1
        }],
        'postproc': {
            'workflow': dict(),
            'inputs': {
                'files': ['D', 'E']
            }
        },
        'results': {
            'file': 'results/analytics.json',
            'schema': [{
                'name': '0',
                'label': 'col0',
                'dtype': PARA_STRING
            }]
        }
    }
    doc = WorkflowTemplate.from_dict(doc).to_dict()
    template = WorkflowTemplate.from_dict(doc)
    assert template.workflow_spec == {'inputs': [tp.VARIABLE('A'), 'B', 'C']}
    assert len(template.parameters) == 1
    assert len(template.parameter_groups) == 2
    assert template.postproc_spec['workflow'] == dict()
    # No error for invalid document only if validate is not set to False.
    para = String(name='0', label='P1', index=0).to_dict()
    para['addOn'] = 1
    doc = {
        'workflow': {
            'inputs': ['A', 'B', 'C']
        },
        'parameters': [para],
        'parameterGroups': [{
            'name': '0',
            'title': 'G1',
            'index': 0,
            'sortDesc': True
        }, {
            'name': '1',
            'title': 'G2',
            'index': 1
        }],
        'postproc': {
            'inputs': {
                'files': ['D', 'E']
            }
        }
    }
    WorkflowTemplate.from_dict(doc, validate=False)
    with pytest.raises(err.InvalidParameterError):
        WorkflowTemplate.from_dict(doc)
    # Error for missing workflow specification.
    with pytest.raises(err.InvalidTemplateError):
        WorkflowTemplate.from_dict(dict())
    # Error for unknown parameter.
    with pytest.raises(err.UnknownParameterError):
        doc = {
            'workflow': {
                'inputs': [tp.VARIABLE('0'), 'B', 'C']
            },
            'parameters': [String(name='A', label='P1', index=0).to_dict()]
        }
        WorkflowTemplate.from_dict(doc)