Esempio n. 1
0
    def start_run(self,
                  group_id: str,
                  arguments: List[Dict],
                  config: Optional[Dict] = None) -> Dict:
        """Start a new workflow run for the given group. The user provided
        arguments are expected to be a list of (name,value)-pairs. The name
        identifies the template parameter. The data type of the value depends
        on the type of the parameter.

        Returns a serialization of the handle for the started run.

        Raises an unauthorized access error if the user does not have the
        necessary access to modify the workflow group.

        Parameters
        ----------
        group_id: string
            Unique workflow group identifier
        arguments: list(dict)
            List of user provided arguments for template parameters.
        config: dict, default=None
            Optional implementation-specific configuration settings that can be
            used to overwrite settings that were initialized at object creation.

        Returns
        -------
        dict

        Raises
        ------
        flowserv.error.InvalidArgumentError
        flowserv.error.MissingArgumentError
        flowserv.error.UnauthorizedAccessError
        flowserv.error.UnknownFileError
        flowserv.error.UnknownParameterError
        flowserv.error.UnknownWorkflowGroupError
        """
        # Raise an error if the user does not have rights to start new runs for
        # the workflow group or if the workflow group does not exist.
        if not self.auth.is_group_member(group_id=group_id,
                                         user_id=self.user_id):
            raise err.UnauthorizedAccessError()
        # Get handle for the given user group to enable access to uploaded
        # files and the identifier of the associated workflow.
        group = self.group_manager.get_group(group_id)
        # Get the template from the workflow that the workflow group belongs
        # to. Get a modified copy of the template based on  the (potentially)
        # modified workflow specification and parameters of the workflow group.
        template = group.workflow.get_template(
            workflow_spec=group.workflow_spec, parameters=group.parameters)
        # Create instances of the template arguments from the given list of
        # values. At this point we only distinguish between scalar values and
        # input files. Also create a mapping from he argument list that is used
        # stored in the database.
        run_args = dict()
        serialized_args = list()
        for arg in arguments:
            arg_id, arg_val = deserialize_arg(arg)
            # Raise an error if multiple values are given for the same argument
            if arg_id in run_args:
                raise err.DuplicateArgumentError(arg_id)
            para = template.parameters.get(arg_id)
            if para is None:
                raise err.UnknownParameterError(arg_id)
            if is_fh(arg_val):
                file_id, target = deserialize_fh(arg_val)
                # The argument value is expected to be the identifier of an
                # previously uploaded file. This will raise an exception if the
                # file identifier is unknown.
                fileobj = self.group_manager.get_uploaded_file(
                    group_id=group_id, file_id=file_id).fileobj
                run_args[arg_id] = para.cast(value=(fileobj, target))
            else:
                run_args[arg_id] = para.cast(arg_val)
            # Actor values as parameter values canno be serialized. for now,
            # we only store the serialized workflow step but no information
            # about the additional input files.
            if isinstance(arg_val, ActorValue):
                arg_val = arg_val.spec
            serialized_args.append(serialize_arg(name=arg_id, value=arg_val))
        # Before we start creating directories and copying files make sure that
        # there are values for all template parameters (either in the arguments
        # dictionary or set as default values)
        template.validate_arguments(run_args)
        # Start the run.
        run = self.run_manager.create_run(group=group,
                                          arguments=serialized_args)
        run_id = run.run_id
        # Use default engine configuration if the configuration argument was
        # not given.
        config = config if config else group.engine_config
        staticdir = dirs.workflow_staticdir(group.workflow.workflow_id)
        state, runstore = self.backend.exec_workflow(
            run=run,
            template=template,
            arguments=run_args,
            staticfs=self.fs.get_store_for_folder(key=staticdir),
            config=config)
        # Update the run state if it is no longer pending for execution. Make
        # sure to call the update run method for the server to ensure that
        # results are inserted and post-processing workflows started.
        if not state.is_pending():
            self.update_run(run_id=run_id, state=state, runstore=runstore)
            return self.get_run(run_id)
        return self.serialize.run_handle(run, group)
Esempio n. 2
0
    def start_run(self,
                  arguments: Dict,
                  config: Optional[Dict] = None,
                  poll_interval: Optional[int] = None) -> Run:
        """Run the associated workflow for the given set of arguments.

        Parameters
        ----------
        arguments: dict
            Dictionary of user-provided arguments.
        config: dict, default=None
            Optional implementation-specific configuration settings that can be
            used to overwrite settings that were initialized at object creation.
        poll_interval: int, default=None
            Optional poll interval that is used to check the state of a run
            until it is no longer in active state.

        Returns
        -------
        flowserv.client.app.run.Run
        """
        arguments = self._parameters.set_defaults(arguments=arguments)
        with self.service() as api:
            # Upload any argument values as files that are either of type
            # StringIO or BytesIO.
            arglist = list()
            for key, val in arguments.items():
                # Convert arguments to the format that is expected by the run
                # manager. We pay special attention to file parameters. Input
                # files may be represented as strings, IO buffers or file
                # objects.
                para = self._parameters.get(key)
                if para is None:
                    raise err.UnknownParameterError(key)
                if para.is_file():
                    # Upload a given file prior to running the application.
                    upload_file = None
                    target = None
                    if isinstance(val, str):
                        upload_file = FSFile(val)
                    elif isinstance(val, StringIO):
                        buf = BytesIO(val.read().encode('utf8'))
                        upload_file = IOBuffer(buf)
                    elif isinstance(val, BytesIO):
                        upload_file = IOBuffer(val)
                    elif isinstance(val, IOHandle):
                        upload_file = val
                    else:
                        msg = 'invalid argument {} for {}'.format(key, val)
                        raise err.InvalidArgumentError(msg)
                    fh = api.uploads().upload_file(group_id=self.group_id,
                                                   file=upload_file,
                                                   name=key)
                    val = serialize_fh(fh[filelbls.FILE_ID], target=target)
                arglist.append(serialize_arg(key, val))
            # Execute the run and return the serialized run handle.
            run = api.runs().start_run(group_id=self.group_id,
                                       arguments=arglist,
                                       config=config)
            rh = Run(doc=run, service=self.service)
            # Wait for run to finish if active an poll interval is given.
            while poll_interval and rh.is_active():
                time.sleep(poll_interval)
                rh = self.poll_run(run_id=rh.run_id)
            pprun = self.get_postproc_results()
            if pprun is not None:
                while poll_interval and pprun.is_active():
                    time.sleep(poll_interval)
                    pprun = self.get_postproc_results()
            return rh
Esempio n. 3
0
    def from_dict(cls, doc, validate=True):
        """Create an instance of the workflow template for a dictionary
        serialization. The structure of the dictionary is expected to be the
        same as generated by the to_dict() method of this class. The only
        mandatory element in the dictionary is the workflow specification.

        Parameters
        ----------
        doc: dict
            Dictionary serialization of a workflow template
        validate: bool, optional
            Validate template parameter declarations against the parameter
            schema if this flag is True.

        Returns
        -------
        flowserv.model.template.base.WorkflowTemplate

        Raises
        ------
        flowserv.error.InvalidTemplateError
        flowserv.error.UnknownParameterError
        """
        # Ensure that the mandatory elements are present. At this point, only
        # the workflow specification is mandatory.
        if validate:
            if 'workflow' not in doc:
                msg = "missing element '{}'".format('workflow')
                raise err.InvalidTemplateError(msg)
        # -- Workflow specification -------------------------------------------
        workflow_spec = doc['workflow']
        # -- Parameter declarations -------------------------------------------
        # Add given parameter declarations to the parameter list. Ensure that
        # all default values are set
        parameters = ParameterIndex.from_dict(
            doc.get('parameters', dict()),
            validate=validate
        )
        # Ensure that the workflow specification does not reference
        # undefined parameters if validate flag is True.
        if validate:
            for key in tp.get_parameter_references(workflow_spec):
                if key not in parameters:
                    raise err.UnknownParameterError(key)
        # -- Post-processing task ---------------------------------------------
        postproc_spec = None
        if 'postproc' in doc:
            postproc_spec = doc['postproc']
            if validate:
                util.validate_doc(
                    doc=postproc_spec,
                    mandatory=['workflow'],
                    optional=['inputs', 'outputs']
                )
                util.validate_doc(
                    doc=postproc_spec.get('inputs', {'files': ''}),
                    mandatory=['files'],
                    optional=['runs']
                )
        # -- Parameter module information -------------------------------------
        parameter_groups = None
        if 'parameterGroups' in doc:
            parameter_groups = list()
            for m in doc['parameterGroups']:
                parameter_groups.append(ParameterGroup.from_dict(m, validate=validate))
        # -- Output file specifications --------------------------------------
        outputs = None
        if 'outputs' in doc:
            outputs = [WorkflowOutputFile.from_dict(
                f,
                validate=validate
            ) for f in doc['outputs']]
        # -- Result schema ---------------------------------------------------
        schema = ResultSchema.from_dict(doc.get('results'), validate=validate)
        # Return template instance
        return cls(
            workflow_spec=workflow_spec,
            postproc_spec=postproc_spec,
            parameters=parameters,
            result_schema=schema,
            parameter_groups=parameter_groups,
            outputs=outputs
        )