def start_run(self, group_id: str, arguments: List[Dict], config: Optional[Dict] = None) -> Dict: """Start a new workflow run for the given group. The user provided arguments are expected to be a list of (name,value)-pairs. The name identifies the template parameter. The data type of the value depends on the type of the parameter. Returns a serialization of the handle for the started run. Raises an unauthorized access error if the user does not have the necessary access to modify the workflow group. Parameters ---------- group_id: string Unique workflow group identifier arguments: list(dict) List of user provided arguments for template parameters. config: dict, default=None Optional implementation-specific configuration settings that can be used to overwrite settings that were initialized at object creation. Returns ------- dict Raises ------ flowserv.error.InvalidArgumentError flowserv.error.MissingArgumentError flowserv.error.UnauthorizedAccessError flowserv.error.UnknownFileError flowserv.error.UnknownParameterError flowserv.error.UnknownWorkflowGroupError """ # Raise an error if the user does not have rights to start new runs for # the workflow group or if the workflow group does not exist. if not self.auth.is_group_member(group_id=group_id, user_id=self.user_id): raise err.UnauthorizedAccessError() # Get handle for the given user group to enable access to uploaded # files and the identifier of the associated workflow. group = self.group_manager.get_group(group_id) # Get the template from the workflow that the workflow group belongs # to. Get a modified copy of the template based on the (potentially) # modified workflow specification and parameters of the workflow group. template = group.workflow.get_template( workflow_spec=group.workflow_spec, parameters=group.parameters) # Create instances of the template arguments from the given list of # values. At this point we only distinguish between scalar values and # input files. Also create a mapping from he argument list that is used # stored in the database. run_args = dict() serialized_args = list() for arg in arguments: arg_id, arg_val = deserialize_arg(arg) # Raise an error if multiple values are given for the same argument if arg_id in run_args: raise err.DuplicateArgumentError(arg_id) para = template.parameters.get(arg_id) if para is None: raise err.UnknownParameterError(arg_id) if is_fh(arg_val): file_id, target = deserialize_fh(arg_val) # The argument value is expected to be the identifier of an # previously uploaded file. This will raise an exception if the # file identifier is unknown. fileobj = self.group_manager.get_uploaded_file( group_id=group_id, file_id=file_id).fileobj run_args[arg_id] = para.cast(value=(fileobj, target)) else: run_args[arg_id] = para.cast(arg_val) # Actor values as parameter values canno be serialized. for now, # we only store the serialized workflow step but no information # about the additional input files. if isinstance(arg_val, ActorValue): arg_val = arg_val.spec serialized_args.append(serialize_arg(name=arg_id, value=arg_val)) # Before we start creating directories and copying files make sure that # there are values for all template parameters (either in the arguments # dictionary or set as default values) template.validate_arguments(run_args) # Start the run. run = self.run_manager.create_run(group=group, arguments=serialized_args) run_id = run.run_id # Use default engine configuration if the configuration argument was # not given. config = config if config else group.engine_config staticdir = dirs.workflow_staticdir(group.workflow.workflow_id) state, runstore = self.backend.exec_workflow( run=run, template=template, arguments=run_args, staticfs=self.fs.get_store_for_folder(key=staticdir), config=config) # Update the run state if it is no longer pending for execution. Make # sure to call the update run method for the server to ensure that # results are inserted and post-processing workflows started. if not state.is_pending(): self.update_run(run_id=run_id, state=state, runstore=runstore) return self.get_run(run_id) return self.serialize.run_handle(run, group)
def start_run(self, arguments: Dict, config: Optional[Dict] = None, poll_interval: Optional[int] = None) -> Run: """Run the associated workflow for the given set of arguments. Parameters ---------- arguments: dict Dictionary of user-provided arguments. config: dict, default=None Optional implementation-specific configuration settings that can be used to overwrite settings that were initialized at object creation. poll_interval: int, default=None Optional poll interval that is used to check the state of a run until it is no longer in active state. Returns ------- flowserv.client.app.run.Run """ arguments = self._parameters.set_defaults(arguments=arguments) with self.service() as api: # Upload any argument values as files that are either of type # StringIO or BytesIO. arglist = list() for key, val in arguments.items(): # Convert arguments to the format that is expected by the run # manager. We pay special attention to file parameters. Input # files may be represented as strings, IO buffers or file # objects. para = self._parameters.get(key) if para is None: raise err.UnknownParameterError(key) if para.is_file(): # Upload a given file prior to running the application. upload_file = None target = None if isinstance(val, str): upload_file = FSFile(val) elif isinstance(val, StringIO): buf = BytesIO(val.read().encode('utf8')) upload_file = IOBuffer(buf) elif isinstance(val, BytesIO): upload_file = IOBuffer(val) elif isinstance(val, IOHandle): upload_file = val else: msg = 'invalid argument {} for {}'.format(key, val) raise err.InvalidArgumentError(msg) fh = api.uploads().upload_file(group_id=self.group_id, file=upload_file, name=key) val = serialize_fh(fh[filelbls.FILE_ID], target=target) arglist.append(serialize_arg(key, val)) # Execute the run and return the serialized run handle. run = api.runs().start_run(group_id=self.group_id, arguments=arglist, config=config) rh = Run(doc=run, service=self.service) # Wait for run to finish if active an poll interval is given. while poll_interval and rh.is_active(): time.sleep(poll_interval) rh = self.poll_run(run_id=rh.run_id) pprun = self.get_postproc_results() if pprun is not None: while poll_interval and pprun.is_active(): time.sleep(poll_interval) pprun = self.get_postproc_results() return rh
def from_dict(cls, doc, validate=True): """Create an instance of the workflow template for a dictionary serialization. The structure of the dictionary is expected to be the same as generated by the to_dict() method of this class. The only mandatory element in the dictionary is the workflow specification. Parameters ---------- doc: dict Dictionary serialization of a workflow template validate: bool, optional Validate template parameter declarations against the parameter schema if this flag is True. Returns ------- flowserv.model.template.base.WorkflowTemplate Raises ------ flowserv.error.InvalidTemplateError flowserv.error.UnknownParameterError """ # Ensure that the mandatory elements are present. At this point, only # the workflow specification is mandatory. if validate: if 'workflow' not in doc: msg = "missing element '{}'".format('workflow') raise err.InvalidTemplateError(msg) # -- Workflow specification ------------------------------------------- workflow_spec = doc['workflow'] # -- Parameter declarations ------------------------------------------- # Add given parameter declarations to the parameter list. Ensure that # all default values are set parameters = ParameterIndex.from_dict( doc.get('parameters', dict()), validate=validate ) # Ensure that the workflow specification does not reference # undefined parameters if validate flag is True. if validate: for key in tp.get_parameter_references(workflow_spec): if key not in parameters: raise err.UnknownParameterError(key) # -- Post-processing task --------------------------------------------- postproc_spec = None if 'postproc' in doc: postproc_spec = doc['postproc'] if validate: util.validate_doc( doc=postproc_spec, mandatory=['workflow'], optional=['inputs', 'outputs'] ) util.validate_doc( doc=postproc_spec.get('inputs', {'files': ''}), mandatory=['files'], optional=['runs'] ) # -- Parameter module information ------------------------------------- parameter_groups = None if 'parameterGroups' in doc: parameter_groups = list() for m in doc['parameterGroups']: parameter_groups.append(ParameterGroup.from_dict(m, validate=validate)) # -- Output file specifications -------------------------------------- outputs = None if 'outputs' in doc: outputs = [WorkflowOutputFile.from_dict( f, validate=validate ) for f in doc['outputs']] # -- Result schema --------------------------------------------------- schema = ResultSchema.from_dict(doc.get('results'), validate=validate) # Return template instance return cls( workflow_spec=workflow_spec, postproc_spec=postproc_spec, parameters=parameters, result_schema=schema, parameter_groups=parameter_groups, outputs=outputs )