def test_cancel_run_helloworld(async_service):
    """Test cancelling a helloworld run."""
    # -- Setup ----------------------------------------------------------------
    #
    # Start a new run for the workflow template.
    with async_service() as api:
        workflow_id = create_workflow(api, source=BENCHMARK_DIR)
        user_id = create_user(api)
    with async_service(user_id=user_id) as api:
        group_id = create_group(api, workflow_id)
        names = io_file(data=['Alice', 'Bob', 'Zoe'], format='plain/text')
        file_id = upload_file(api, group_id, names)
        args = [
            serialize_arg('names', serialize_fh(file_id)),
            serialize_arg('sleeptime', 10),
            serialize_arg('greeting', 'Hi')
        ]
        run_id = start_run(api, group_id, arguments=args)
    # Poll run after sleeping for one second.
    time.sleep(1)
    with async_service(user_id=user_id) as api:
        run = api.runs().get_run(run_id=run_id)
    assert run['state'] in st.ACTIVE_STATES
    # -- Cancel the active run ------------------------------------------------
    with async_service(user_id=user_id) as api:
        run = api.runs().cancel_run(
            run_id=run_id,
            reason='done'
        )
        assert run['state'] == st.STATE_CANCELED
        assert run['messages'][0] == 'done'
    with async_service(user_id=user_id) as api:
        run = api.runs().get_run(run_id=run_id)
        assert run['state'] == st.STATE_CANCELED
        assert run['messages'][0] == 'done'
예제 #2
0
def test_run_helloworld_sync(sync_service, specfile, state):
    """Execute the helloworld example."""
    # -- Setup ----------------------------------------------------------------
    #
    # Start a new run for the workflow template.
    with sync_service() as api:
        workflow_id = create_workflow(api,
                                      source=TEMPLATE_DIR,
                                      specfile=specfile)
        user_id = create_user(api)
    with sync_service(user_id=user_id) as api:
        group_id = create_group(api, workflow_id)
        names = io_file(data=['Alice', 'Bob'], format='plain/text')
        file_id = upload_file(api, group_id, names)
        args = [
            serialize_arg('names', serialize_fh(file_id, 'data/names.txt')),
            serialize_arg('sleeptime', 3)
        ]
        run_id = start_run(api, group_id, arguments=args)
    # -- Validate the run handle against the expected state -------------------
    with sync_service(user_id=user_id) as api:
        r = api.runs().get_run(run_id)
        serialize.validate_run_handle(r, state=state)
        if state == st.STATE_SUCCESS:
            # The run should have the greetings.txt file as a result.
            files = dict()
            for obj in r['files']:
                files[obj['name']] = obj['id']
            assert len(files) == 1
            fh = api.runs().get_result_file(
                run_id=run_id, file_id=files['results/greetings.txt'])
            value = fh.open().read().decode('utf-8').strip()
            assert 'Hello Alice!' in value
            assert 'Hello Bob!' in value
예제 #3
0
def start_run(ctx, group, configfile):
    """Start new workflow run."""
    group_id = ctx.obj.get_group(ctx.params)
    config = factory.read_config(configfile) if configfile else None
    with service() as api:
        doc = api.groups().get_group(group_id=group_id)
        config = config if config else doc[glbls.ENGINE_CONFIG]
        # Create list of file descriptors for uploaded files that are included
        # in the submission handle
        files = []
        for fh in doc[glbls.GROUP_UPLOADS]:
            files.append((
                fh[flbls.FILE_ID],
                fh[flbls.FILE_NAME],
                fh[flbls.FILE_DATE][:19])
            )
        # Create list of additional user-provided template parameters
        parameters = ParameterIndex.from_dict(doc[glbls.GROUP_PARAMETERS])
        # Read values for all parameters.
        user_input = read(parameters.sorted(), files=files)
        args = [serialize_arg(key, val) for key, val in user_input.items()]
        # Start the run and print returned run state information.
        doc = api.runs().start_run(group_id=group_id, arguments=args, config=config)
        run_id = doc[labels.RUN_ID]
        run_state = doc[labels.RUN_STATE]
        click.echo('started run {} is {}'.format(run_id, run_state))
def test_run_helloworld_sync_env(sync_service):
    """Successfully execute the helloworld example that contains a notebook step
    in the Python environment that rund flowServ."""
    # -- Setup ----------------------------------------------------------------
    #
    # Start a new run for the workflow template.
    with sync_service() as api:
        workflow_id = create_workflow(api,
                                      source=BENCHMARK_DIR,
                                      specfile=SPEC_FILE)
        user_id = create_user(api)
    with sync_service(user_id=user_id) as api:
        group_id = create_group(api, workflow_id)
        args = [serialize_arg('greeting', 'Hey there')]
        run_id = start_run(api, group_id, arguments=args)
    # -- Validate the run handle against the expected state -------------------
    with sync_service(user_id=user_id) as api:
        r = api.runs().get_run(run_id)
        serialize.validate_run_handle(r, state=st.STATE_SUCCESS)
        # The run should have the greetings.txt file as a result.
        files = dict()
        for obj in r['files']:
            files[obj['name']] = obj['id']
        assert len(files) == 2
        fh = api.runs().get_result_file(run_id=run_id,
                                        file_id=files['results/greetings.txt'])
        value = fh.open().read().decode('utf-8').strip()
        assert 'Hey there Alice!' in value
        assert 'Hey there Bob!' in value
예제 #5
0
def test_deserialize_run_argument():
    """Test deserialization of run arguments."""
    key, value = deserialize_arg(serialize_arg('names', 'names.txt'))
    assert key == 'names'
    assert value == 'names.txt'
    with pytest.raises(ValueError):
        deserialize_arg({'id': 'names'})
def run_erroneous_workflow(service, specfile):
    """Execute the modified helloworld example."""
    with service() as api:
        # Create workflow template, user, and the workflow group.
        workflow_id = create_workflow(
            api,
            source=TEMPLATE_DIR,
            specfile=specfile
        )
        user_id = create_user(api)
    with service(user_id=user_id) as api:
        group_id = create_group(api, workflow_id)
        # Upload the names file.
        names = io_file(data=NAMES, format='txt/plain')
        file_id = upload_file(api, group_id, names)
        # Run the workflow.
        arguments = [
            serialize_arg('names', serialize_fh(file_id)),
            serialize_arg('greeting', 'Hi')
        ]
        run_id = start_run(api, group_id, arguments=arguments)
    # Poll workflow state every second.
    run = poll_run(service, run_id, user_id)
    assert run['state'] == st.STATE_SUCCESS
    with service() as api:
        wh = api.workflows().get_workflow(workflow_id=workflow_id)
    attmpts = 0
    while 'postproc' not in wh:
        time.sleep(1)
        with service() as api:
            wh = api.workflows().get_workflow(workflow_id=workflow_id)
        attmpts += 1
        if attmpts > 60:
            break
    assert 'postproc' in wh
    serialize.validate_workflow_handle(wh)
    attmpts = 0
    while wh['postproc']['state'] in st.ACTIVE_STATES:
        time.sleep(1)
        with service() as api:
            wh = api.workflows().get_workflow(workflow_id=workflow_id)
        attmpts += 1
        if attmpts > 60:
            break
    assert wh['postproc']['state'] not in st.ACTIVE_STATES
    serialize.validate_workflow_handle(wh)
    assert wh['postproc']['state'] == st.STATE_ERROR
def test_run_helloworld_async(async_service, target):
    """Execute the helloworld example."""
    # -- Setup ----------------------------------------------------------------
    #
    # Start a new run for the workflow template.
    with async_service() as api:
        workflow_id = create_workflow(api, source=BENCHMARK_DIR)
        user_id = create_user(api)
    with async_service(user_id=user_id) as api:
        group_id = create_group(api, workflow_id)
        names = io_file(data=['Alice', 'Bob', 'Zoe'], format='plain/text')
        file_id = upload_file(api, group_id, names)
        args = [
            serialize_arg('names', serialize_fh(file_id, target)),
            serialize_arg('sleeptime', 1),
            serialize_arg('greeting', 'Hi')
        ]
        run_id = start_run(api, group_id, arguments=args)
    # Poll workflow state every second.
    with async_service(user_id=user_id) as api:
        run = api.runs().get_run(run_id=run_id)
    watch_dog = 30
    while run['state'] in st.ACTIVE_STATES and watch_dog:
        time.sleep(1)
        watch_dog -= 1
        with async_service(user_id=user_id) as api:
            run = api.runs().get_run(run_id=run_id)
    assert run['state'] == st.STATE_SUCCESS
    files = dict()
    for f in run['files']:
        files[f['name']] = f['id']
    fh = api.runs().get_result_file(
        run_id=run_id,
        file_id=files['results/greetings.txt']
    )
    greetings = fh.open().read().decode('utf-8').strip()
    assert 'Hi Alice' in greetings
    assert 'Hi Bob' in greetings
    assert 'Hi Zoe' in greetings
    fh = api.runs().get_result_file(
        run_id=run_id,
        file_id=files['results/analytics.json']
    )
    assert json.load(fh.open()) is not None
예제 #8
0
def run_postproc_workflow(postproc_spec: Dict, workflow: WorkflowObject,
                          ranking: List, runs: List, run_manager: RunManager,
                          backend: WorkflowController):
    """Run post-processing workflow for a workflow template."""
    workflow_spec = postproc_spec.get('workflow')
    pp_inputs = postproc_spec.get('inputs', {})
    pp_files = pp_inputs.get('files', [])
    # Prepare temporary directory with result files for all
    # runs in the ranking. The created directory is the only
    # run argument
    strace = None
    try:
        datadir = postutil.prepare_postproc_data(input_files=pp_files,
                                                 ranking=ranking,
                                                 run_manager=run_manager)
        dst = pp_inputs.get('runs', postbase.RUNS_DIR)
        run_args = {
            postbase.PARA_RUNS: InputFile(source=FSFile(datadir), target=dst)
        }
        arg_list = [
            serialize_arg(postbase.PARA_RUNS, serialize_fh(datadir, dst))
        ]
    except Exception as ex:
        logging.error(ex)
        strace = util.stacktrace(ex)
        run_args = dict()
        arg_list = []
    # Create a new run for the workflow. The identifier for the run group is
    # None.
    run = run_manager.create_run(workflow=workflow,
                                 arguments=arg_list,
                                 runs=runs)
    if strace is not None:
        # If there were data preparation errors set the created run into an
        # error state and return.
        run_manager.update_run(run_id=run.run_id,
                               state=run.state().error(messages=strace))
    else:
        # Execute the post-processing workflow asynchronously if
        # there were no data preparation errors.
        postproc_state, rundir = backend.exec_workflow(
            run=run,
            template=WorkflowTemplate(workflow_spec=workflow_spec,
                                      parameters=postbase.PARAMETERS),
            arguments=run_args,
            config=workflow.engine_config)
        # Update the post-processing workflow run state if it is
        # no longer pending for execution.
        if not postproc_state.is_pending():
            run_manager.update_run(run_id=run.run_id,
                                   state=postproc_state,
                                   rundir=rundir)
        # Remove the temporary input folder
        shutil.rmtree(datadir)
예제 #9
0
    def start_run(self,
                  arguments: Dict,
                  config: Optional[Dict] = None,
                  poll_interval: Optional[int] = None) -> Run:
        """Run the associated workflow for the given set of arguments.

        Parameters
        ----------
        arguments: dict
            Dictionary of user-provided arguments.
        config: dict, default=None
            Optional implementation-specific configuration settings that can be
            used to overwrite settings that were initialized at object creation.
        poll_interval: int, default=None
            Optional poll interval that is used to check the state of a run
            until it is no longer in active state.

        Returns
        -------
        flowserv.client.app.run.Run
        """
        arguments = self._parameters.set_defaults(arguments=arguments)
        with self.service() as api:
            # Upload any argument values as files that are either of type
            # StringIO or BytesIO.
            arglist = list()
            for key, val in arguments.items():
                # Convert arguments to the format that is expected by the run
                # manager. We pay special attention to file parameters. Input
                # files may be represented as strings, IO buffers or file
                # objects.
                para = self._parameters.get(key)
                if para is None:
                    raise err.UnknownParameterError(key)
                if para.is_file():
                    # Upload a given file prior to running the application.
                    upload_file = None
                    target = None
                    if isinstance(val, str):
                        upload_file = FSFile(val)
                    elif isinstance(val, StringIO):
                        buf = BytesIO(val.read().encode('utf8'))
                        upload_file = IOBuffer(buf)
                    elif isinstance(val, BytesIO):
                        upload_file = IOBuffer(val)
                    elif isinstance(val, IOHandle):
                        upload_file = val
                    else:
                        msg = 'invalid argument {} for {}'.format(key, val)
                        raise err.InvalidArgumentError(msg)
                    fh = api.uploads().upload_file(group_id=self.group_id,
                                                   file=upload_file,
                                                   name=key)
                    val = serialize_fh(fh[filelbls.FILE_ID], target=target)
                arglist.append(serialize_arg(key, val))
            # Execute the run and return the serialized run handle.
            run = api.runs().start_run(group_id=self.group_id,
                                       arguments=arglist,
                                       config=config)
            rh = Run(doc=run, service=self.service)
            # Wait for run to finish if active an poll interval is given.
            while poll_interval and rh.is_active():
                time.sleep(poll_interval)
                rh = self.poll_run(run_id=rh.run_id)
            pprun = self.get_postproc_results()
            if pprun is not None:
                while poll_interval and pprun.is_active():
                    time.sleep(poll_interval)
                    pprun = self.get_postproc_results()
            return rh
예제 #10
0
def run_postproc_workflow(workflow: WorkflowObject, ranking: List[RunResult],
                          keys: List[str], run_manager: RunManager,
                          tmpstore: StorageVolume, staticfs: StorageVolume,
                          backend: WorkflowController):
    """Run post-processing workflow for a workflow template.

    Parameters
    ----------
    workflow: flowserv.model.base.WorkflowObject
        Handle for the workflow that triggered the post-processing workflow run.
    ranking: list(flowserv.model.ranking.RunResult)
        List of runs in the current result ranking.
    keys: list of string
        Sorted list of run identifier for runs in the ranking.
    run_manager: flowserv.model.run.RunManager
        Manager for workflow runs
    tmpstore: flowserv.volume.base.StorageVolume
        Temporary storage volume where the created post-processing files are
        stored. This volume will be erased after the workflow is started.
    staticfs: flowserv.volume.base.StorageVolume
        Storage volume that contains the static files from the workflow
        template.
    backend: flowserv.controller.base.WorkflowController
        Backend that is used to execute the post-processing workflow.
    """
    # Get workflow specification and the list of input files from the
    # post-processing statement.
    postproc_spec = workflow.postproc_spec
    workflow_spec = postproc_spec.get('workflow')
    pp_inputs = postproc_spec.get('inputs', {})
    pp_files = pp_inputs.get('files', [])
    # Prepare temporary directory with result files for all
    # runs in the ranking. The created directory is the only
    # run argument
    strace = None
    try:
        prepare_postproc_data(input_files=pp_files,
                              ranking=ranking,
                              run_manager=run_manager,
                              store=tmpstore)
        dst = pp_inputs.get('runs', RUNS_DIR)
        run_args = {PARA_RUNS: InputDirectory(store=tmpstore, target=RUNS_DIR)}
        arg_list = [serialize_arg(PARA_RUNS, dst)]
    except Exception as ex:
        logging.error(ex, exc_info=True)
        strace = util.stacktrace(ex)
        run_args = dict()
        arg_list = []
    # Create a new run for the workflow. The identifier for the run group is
    # None.
    run = run_manager.create_run(workflow=workflow,
                                 arguments=arg_list,
                                 runs=keys)
    if strace is not None:
        # If there were data preparation errors set the created run into an
        # error state and return.
        run_manager.update_run(run_id=run.run_id,
                               state=run.state().error(messages=strace))
    else:
        # Execute the post-processing workflow asynchronously if
        # there were no data preparation errors.
        try:
            postproc_state, runstore = backend.exec_workflow(
                run=run,
                template=WorkflowTemplate(workflow_spec=workflow_spec,
                                          parameters=PARAMETERS),
                arguments=run_args,
                staticfs=staticfs,
                config=workflow.engine_config)
        except Exception as ex:
            # Make sure to catch exceptions and set the run into an error state.
            postproc_state = run.state().error(messages=util.stacktrace(ex))
            runstore = None
        # Update the post-processing workflow run state if it is
        # no longer pending for execution.
        if not postproc_state.is_pending():
            run_manager.update_run(run_id=run.run_id,
                                   state=postproc_state,
                                   runstore=runstore)
        # Erase the temporary storage volume.
        tmpstore.erase()
예제 #11
0
    def start_run(self,
                  group_id: str,
                  arguments: List[Dict],
                  config: Optional[Dict] = None) -> Dict:
        """Start a new workflow run for the given group. The user provided
        arguments are expected to be a list of (name,value)-pairs. The name
        identifies the template parameter. The data type of the value depends
        on the type of the parameter.

        Returns a serialization of the handle for the started run.

        Raises an unauthorized access error if the user does not have the
        necessary access to modify the workflow group.

        Parameters
        ----------
        group_id: string
            Unique workflow group identifier
        arguments: list(dict)
            List of user provided arguments for template parameters.
        config: dict, default=None
            Optional implementation-specific configuration settings that can be
            used to overwrite settings that were initialized at object creation.

        Returns
        -------
        dict

        Raises
        ------
        flowserv.error.InvalidArgumentError
        flowserv.error.MissingArgumentError
        flowserv.error.UnauthorizedAccessError
        flowserv.error.UnknownFileError
        flowserv.error.UnknownParameterError
        flowserv.error.UnknownWorkflowGroupError
        """
        # Raise an error if the user does not have rights to start new runs for
        # the workflow group or if the workflow group does not exist.
        if not self.auth.is_group_member(group_id=group_id,
                                         user_id=self.user_id):
            raise err.UnauthorizedAccessError()
        # Get handle for the given user group to enable access to uploaded
        # files and the identifier of the associated workflow.
        group = self.group_manager.get_group(group_id)
        # Get the template from the workflow that the workflow group belongs
        # to. Get a modified copy of the template based on  the (potentially)
        # modified workflow specification and parameters of the workflow group.
        template = group.workflow.get_template(
            workflow_spec=group.workflow_spec, parameters=group.parameters)
        # Create instances of the template arguments from the given list of
        # values. At this point we only distinguish between scalar values and
        # input files. Also create a mapping from he argument list that is used
        # stored in the database.
        run_args = dict()
        serialized_args = list()
        for arg in arguments:
            arg_id, arg_val = deserialize_arg(arg)
            # Raise an error if multiple values are given for the same argument
            if arg_id in run_args:
                raise err.DuplicateArgumentError(arg_id)
            para = template.parameters.get(arg_id)
            if para is None:
                raise err.UnknownParameterError(arg_id)
            if is_fh(arg_val):
                file_id, target = deserialize_fh(arg_val)
                # The argument value is expected to be the identifier of an
                # previously uploaded file. This will raise an exception if the
                # file identifier is unknown.
                fileobj = self.group_manager.get_uploaded_file(
                    group_id=group_id, file_id=file_id).fileobj
                run_args[arg_id] = para.cast(value=(fileobj, target))
            else:
                run_args[arg_id] = para.cast(arg_val)
            # Actor values as parameter values canno be serialized. for now,
            # we only store the serialized workflow step but no information
            # about the additional input files.
            if isinstance(arg_val, ActorValue):
                arg_val = arg_val.spec
            serialized_args.append(serialize_arg(name=arg_id, value=arg_val))
        # Before we start creating directories and copying files make sure that
        # there are values for all template parameters (either in the arguments
        # dictionary or set as default values)
        template.validate_arguments(run_args)
        # Start the run.
        run = self.run_manager.create_run(group=group,
                                          arguments=serialized_args)
        run_id = run.run_id
        # Use default engine configuration if the configuration argument was
        # not given.
        config = config if config else group.engine_config
        staticdir = dirs.workflow_staticdir(group.workflow.workflow_id)
        state, runstore = self.backend.exec_workflow(
            run=run,
            template=template,
            arguments=run_args,
            staticfs=self.fs.get_store_for_folder(key=staticdir),
            config=config)
        # Update the run state if it is no longer pending for execution. Make
        # sure to call the update run method for the server to ensure that
        # results are inserted and post-processing workflows started.
        if not state.is_pending():
            self.update_run(run_id=run_id, state=state, runstore=runstore)
            return self.get_run(run_id)
        return self.serialize.run_handle(run, group)
def test_postproc_workflow(fsconfig, tmpdir):
    """Execute the modified helloworld example."""
    # -- Setup ----------------------------------------------------------------
    #
    # It is important here that we do not use the SQLite in-memory database
    # since this fails (for unknown reason; presumably due to different threads)
    # when the post-processing run is updated.
    # --
    env = Config().basedir(tmpdir).run_async().auth()
    env.update(fsconfig)
    service = LocalAPIFactory(env=env)
    # Start a new run for the workflow template.
    with service() as api:
        # Need to set the file store in the backend to the new instance as
        # well. Otherwise, the post processing workflow may attempt to use
        # the backend which was initialized prior with a different file store.
        workflow_id = create_workflow(
            api,
            source=TEMPLATE_DIR,
            specfile=SPEC_FILE
        )
        user_id = create_user(api)
    # Create four groups and run the workflow with a slightly different input
    # file
    for i in range(4):
        with service(user_id=user_id) as api:
            group_id = create_group(api, workflow_id)
            names = io_file(data=NAMES[:(i + 1)], format='plain/text')
            file_id = upload_file(api, group_id, names)
            # Set the template argument values
            arguments = [
                serialize_arg('names', serialize_fh(file_id)),
                serialize_arg('greeting', 'Hi')
            ]
            run_id = start_run(api, group_id, arguments=arguments)
        # Poll workflow state every second.
        run = poll_run(service, run_id, user_id)
        assert run['state'] == st.STATE_SUCCESS
        with service() as api:
            wh = api.workflows().get_workflow(workflow_id=workflow_id)
        attmpts = 0
        while 'postproc' not in wh:
            time.sleep(1)
            with service() as api:
                wh = api.workflows().get_workflow(workflow_id=workflow_id)
            attmpts += 1
            if attmpts > 60:
                break
        assert 'postproc' in wh
        serialize.validate_workflow_handle(wh)
        attmpts = 0
        while wh['postproc']['state'] in st.ACTIVE_STATES:
            time.sleep(1)
            with service() as api:
                wh = api.workflows().get_workflow(workflow_id=workflow_id)
            attmpts += 1
            if attmpts > 60:
                break
        serialize.validate_workflow_handle(wh)
        with service() as api:
            ranking = api.workflows().get_ranking(workflow_id=workflow_id)
        serialize.validate_ranking(ranking)
        for fobj in wh['postproc']['files']:
            if fobj['name'] == 'results/compare.json':
                file_id = fobj['id']
        with service(user_id=user_id) as api:
            fh = api.workflows().get_result_file(
                workflow_id=workflow_id,
                file_id=file_id
            )
        compare = util.read_object(fh.open())
        assert len(compare) == (i + 1)
    # Access the post-processing result files.
    with service() as api:
        fh = api.workflows().get_result_archive(workflow_id=workflow_id)
    assert fh.name.startswith('run')
    assert fh.mime_type == 'application/gzip'