def _handle_post_job_actions( self, step, job, replacement_dict ): # Create new PJA associations with the created job, to be run on completion. # PJA Parameter Replacement (only applies to immediate actions-- rename specifically, for now) # Pass along replacement dict with the execution of the PJA so we don't have to modify the object. for pja in step.post_job_actions: if pja.action_type in ActionBox.immediate_actions: ActionBox.execute( self.trans.app, self.trans.sa_session, pja, job, replacement_dict ) else: job.add_post_job_action( pja )
def _handle_post_job_actions(self, step, job, replacement_dict): # Create new PJA associations with the created job, to be run on completion. # PJA Parameter Replacement (only applies to immediate actions-- rename specifically, for now) # Pass along replacement dict with the execution of the PJA so we don't have to modify the object. for pja in step.post_job_actions: if pja.action_type in ActionBox.immediate_actions: ActionBox.execute(self.trans.app, self.trans.sa_session, pja, job, replacement_dict) else: job.add_post_job_action(pja)
def _remap_job_on_rerun(self, trans, galaxy_session, rerun_remap_job_id, current_job, out_data): """ Re-connect dependent datasets for a job that is being rerun (because it failed initially). If a job fails, the user has the option to try the job again with changed parameters. To be able to resume jobs that depend on this jobs output datasets we change the dependent's job input datasets to be those of the job that is being rerun. """ try: old_job = trans.sa_session.query(trans.app.model.Job).get(rerun_remap_job_id) assert old_job is not None, '(%s/%s): Old job id is invalid' % (rerun_remap_job_id, current_job.id) assert old_job.tool_id == current_job.tool_id, '(%s/%s): Old tool id (%s) does not match rerun tool id (%s)' % (old_job.id, current_job.id, old_job.tool_id, current_job.tool_id) if trans.user is not None: assert old_job.user_id == trans.user.id, '(%s/%s): Old user id (%s) does not match rerun user id (%s)' % (old_job.id, current_job.id, old_job.user_id, trans.user.id) elif trans.user is None and type(galaxy_session) == trans.model.GalaxySession: assert old_job.session_id == galaxy_session.id, '(%s/%s): Old session id (%s) does not match rerun session id (%s)' % (old_job.id, current_job.id, old_job.session_id, galaxy_session.id) else: raise Exception('(%s/%s): Remapping via the API is not (yet) supported' % (old_job.id, current_job.id)) # Duplicate PJAs before remap. for pjaa in old_job.post_job_actions: current_job.add_post_job_action(pjaa.post_job_action) if old_job.workflow_invocation_step: replacement_dict = {} for parameter in old_job.workflow_invocation_step.workflow_invocation.input_parameters: if parameter.type == WorkflowRequestInputParameter.types.REPLACEMENT_PARAMETERS: replacement_dict[parameter.name] = parameter.value for pja in old_job.workflow_invocation_step.workflow_step.post_job_actions: # execute immediate actions here, with workflow context. if pja.action_type in ActionBox.immediate_actions: ActionBox.execute(trans.app, trans.sa_session, pja, current_job, replacement_dict) for p in old_job.parameters: if p.name.endswith('|__identifier__'): current_job.parameters.append(p.copy()) remapped_hdas = self.__remap_data_inputs(old_job=old_job, current_job=current_job) for jtod in old_job.output_datasets: for (job_to_remap, jtid) in [(jtid.job, jtid) for jtid in jtod.dataset.dependent_jobs]: if (trans.user is not None and job_to_remap.user_id == trans.user.id) or ( trans.user is None and job_to_remap.session_id == galaxy_session.id): self.__remap_parameters(job_to_remap, jtid, jtod, out_data) trans.sa_session.add(job_to_remap) trans.sa_session.add(jtid) jtod.dataset.visible = False trans.sa_session.add(jtod) for jtodc in old_job.output_dataset_collection_instances: hdca = jtodc.dataset_collection_instance hdca.collection.replace_failed_elements(remapped_hdas) if hdca.implicit_collection_jobs: for job in hdca.implicit_collection_jobs.jobs: if job.job_id == old_job.id: job.job_id = current_job.id except Exception: log.exception('Cannot remap rerun dependencies.')
def _handle_post_job_actions( self, step, job, replacement_dict ): # Create new PJA associations with the created job, to be run on completion. # PJA Parameter Replacement (only applies to immediate actions-- rename specifically, for now) # Pass along replacement dict with the execution of the PJA so we don't have to modify the object. # Combine workflow and runtime post job actions into the effective post # job actions for this execution. effective_post_job_actions = step.post_job_actions[:] for key, value in self.runtime_post_job_actions.iteritems(): effective_post_job_actions.append( self.__to_pja( key, value, None ) ) for pja in effective_post_job_actions: if pja.action_type in ActionBox.immediate_actions: ActionBox.execute( self.trans.app, self.trans.sa_session, pja, job, replacement_dict ) else: job.add_post_job_action( pja )
def _execute_workflow(self, sample): for key, value in sample.workflow['mappings'].items(): if 'hda' not in value and 'ldda' in value: # If HDA is already here, it's an external input, we're not copying anything. ldda = self.sa_session.query(self.app.model.LibraryDatasetDatasetAssociation).get(value['ldda']) if ldda.dataset.state in ['new', 'upload', 'queued', 'running', 'empty', 'discarded']: log.error("Cannot import dataset '%s' to user history since its state is '%s'. " % (ldda.name, ldda.dataset.state)) elif ldda.dataset.state in ['ok', 'error']: hda = ldda.to_history_dataset_association(target_history=sample.history, add_to_history=True) sample.workflow['mappings'][key]['hda'] = hda.id self.sa_session.add(sample) self.sa_session.flush() workflow_dict = sample.workflow import copy new_wf_dict = copy.deepcopy(workflow_dict) for key in workflow_dict['mappings']: if not isinstance(key, int): new_wf_dict['mappings'][int(key)] = workflow_dict['mappings'][key] workflow_dict = new_wf_dict fk_trans = FakeTrans(self.app, history=sample.history, user=sample.request.user) workflow = self.sa_session.query(self.app.model.Workflow).get(workflow_dict['id']) if not workflow: log.error("Workflow mapping failure.") return if len(workflow.steps) == 0: log.error("Workflow cannot be run because it does not have any steps") return if workflow.has_cycles: log.error("Workflow cannot be run because it contains cycles") return if workflow.has_errors: log.error("Workflow cannot be run because of validation errors in some steps") return # Build the state for each step errors = {} # Build a fake dictionary prior to execution. # Prepare each step for step in workflow.steps: step.upgrade_messages = {} # Contruct modules if step.type == 'tool' or step.type is None: # Restore the tool state for the step step.module = module_factory.from_workflow_step(fk_trans, step) # Fix any missing parameters step.upgrade_messages = step.module.check_and_update_state() # Any connected input needs to have value DummyDataset (these # are not persisted so we need to do it every time) step.module.add_dummy_datasets(connections=step.input_connections) # Store state with the step step.state = step.module.state # Error dict if step.tool_errors: errors[step.id] = step.tool_errors else: # Non-tool specific stuff? step.module = module_factory.from_workflow_step(fk_trans, step) step.state = step.module.get_runtime_state() # Connections by input name step.input_connections_by_name = dict((conn.input_name, conn) for conn in step.input_connections) for step in workflow.steps: step.upgrade_messages = {} # Connections by input name step.input_connections_by_name = \ dict((conn.input_name, conn) for conn in step.input_connections) # Extract just the arguments for this step by prefix step_errors = None if step.type == 'tool' or step.type is None: module = module_factory.from_workflow_step(fk_trans, step) # Fix any missing parameters step.upgrade_messages = module.check_and_update_state() # Any connected input needs to have value DummyDataset (these # are not persisted so we need to do it every time) module.add_dummy_datasets(connections=step.input_connections) # Get the tool tool = module.tool # Get the state step.state = state = module.state if step_errors: errors[step.id] = state.inputs["__errors__"] = step_errors # Run each step, connecting outputs to inputs workflow_invocation = self.app.model.WorkflowInvocation() workflow_invocation.workflow = workflow outputs = odict() for i, step in enumerate(workflow.steps): job = None if step.type == 'tool' or step.type is None: tool = self.app.toolbox.get_tool(step.tool_id) def callback(input, prefixed_name, **kwargs): if isinstance(input, DataToolParameter): if prefixed_name in step.input_connections_by_name: conn = step.input_connections_by_name[prefixed_name] return outputs[conn.output_step.id][conn.output_name] visit_input_values(tool.inputs, step.state.inputs, callback) job, out_data = tool.execute(fk_trans, step.state.inputs, history=sample.history) outputs[step.id] = out_data for pja in step.post_job_actions: if pja.action_type in ActionBox.immediate_actions: ActionBox.execute(self.app, self.sa_session, pja, job, replacement_dict=None) else: job.add_post_job_action(pja) else: job, out_data = step.module.execute(fk_trans, step.state) outputs[step.id] = out_data if step.id in workflow_dict['mappings']: data = self.sa_session.query(self.app.model.HistoryDatasetAssociation).get(workflow_dict['mappings'][str(step.id)]['hda']) outputs[step.id]['output'] = data workflow_invocation_step = self.app.model.WorkflowInvocationStep() workflow_invocation_step.workflow_invocation = workflow_invocation workflow_invocation_step.workflow_step = step workflow_invocation_step.job = job self.sa_session.add(workflow_invocation) self.sa_session.flush()
def _execute_workflow(self, sample): for key, value in sample.workflow['mappings'].items(): if 'hda' not in value and 'ldda' in value: # If HDA is already here, it's an external input, we're not copying anything. ldda = self.sa_session.query( self.app.model.LibraryDatasetDatasetAssociation).get( value['ldda']) if ldda.dataset.state in [ 'new', 'upload', 'queued', 'running', 'empty', 'discarded' ]: log.error( "Cannot import dataset '%s' to user history since its state is '%s'. " % (ldda.name, ldda.dataset.state)) elif ldda.dataset.state in ['ok', 'error']: hda = ldda.to_history_dataset_association( target_history=sample.history, add_to_history=True) sample.workflow['mappings'][key]['hda'] = hda.id self.sa_session.add(sample) self.sa_session.flush() workflow_dict = sample.workflow import copy new_wf_dict = copy.deepcopy(workflow_dict) for key in workflow_dict['mappings']: if not isinstance(key, int): new_wf_dict['mappings'][int( key)] = workflow_dict['mappings'][key] workflow_dict = new_wf_dict fk_trans = FakeTrans(self.app, history=sample.history, user=sample.request.user) workflow = self.sa_session.query(self.app.model.Workflow).get( workflow_dict['id']) if not workflow: log.error("Workflow mapping failure.") return if len(workflow.steps) == 0: log.error( "Workflow cannot be run because it does not have any steps") return if workflow.has_cycles: log.error("Workflow cannot be run because it contains cycles") return if workflow.has_errors: log.error( "Workflow cannot be run because of validation errors in some steps" ) return # Build the state for each step errors = {} # Build a fake dictionary prior to execution. # Prepare each step for step in workflow.steps: step.upgrade_messages = {} # Contruct modules if step.type == 'tool' or step.type is None: # Restore the tool state for the step step.module = module_factory.from_workflow_step(fk_trans, step) # Fix any missing parameters step.upgrade_messages = step.module.check_and_update_state() # Any connected input needs to have value DummyDataset (these # are not persisted so we need to do it every time) step.module.add_dummy_datasets( connections=step.input_connections) # Store state with the step step.state = step.module.state # Error dict if step.tool_errors: errors[step.id] = step.tool_errors else: # Non-tool specific stuff? step.module = module_factory.from_workflow_step(fk_trans, step) step.state = step.module.get_runtime_state() # Connections by input name step.input_connections_by_name = dict( (conn.input_name, conn) for conn in step.input_connections) for step in workflow.steps: step.upgrade_messages = {} # Connections by input name step.input_connections_by_name = \ dict((conn.input_name, conn) for conn in step.input_connections) # Extract just the arguments for this step by prefix step_errors = None if step.type == 'tool' or step.type is None: module = module_factory.from_workflow_step(fk_trans, step) # Fix any missing parameters step.upgrade_messages = module.check_and_update_state() # Any connected input needs to have value DummyDataset (these # are not persisted so we need to do it every time) module.add_dummy_datasets(connections=step.input_connections) # Get the tool tool = module.tool # Get the state step.state = state = module.state if step_errors: errors[step.id] = state.inputs["__errors__"] = step_errors # Run each step, connecting outputs to inputs workflow_invocation = self.app.model.WorkflowInvocation() workflow_invocation.workflow = workflow outputs = odict() for i, step in enumerate(workflow.steps): job = None if step.type == 'tool' or step.type is None: tool = self.app.toolbox.get_tool(step.tool_id) def callback(input, prefixed_name, **kwargs): if isinstance(input, DataToolParameter): if prefixed_name in step.input_connections_by_name: conn = step.input_connections_by_name[ prefixed_name] return outputs[conn.output_step.id][ conn.output_name] visit_input_values(tool.inputs, step.state.inputs, callback) job, out_data = tool.execute(fk_trans, step.state.inputs, history=sample.history) outputs[step.id] = out_data for pja in step.post_job_actions: if pja.action_type in ActionBox.immediate_actions: ActionBox.execute(self.app, self.sa_session, pja, job, replacement_dict=None) else: job.add_post_job_action(pja) else: job, out_data = step.module.execute(fk_trans, step.state) outputs[step.id] = out_data if step.id in workflow_dict['mappings']: data = self.sa_session.query( self.app.model.HistoryDatasetAssociation).get( workflow_dict['mappings'][str(step.id)]['hda']) outputs[step.id]['output'] = data workflow_invocation_step = self.app.model.WorkflowInvocationStep() workflow_invocation_step.workflow_invocation = workflow_invocation workflow_invocation_step.workflow_step = step workflow_invocation_step.job = job self.sa_session.add(workflow_invocation) self.sa_session.flush()
def create(self, trans, payload, **kwd): """ POST /api/workflows We're not creating workflows from the api. Just execute for now. However, we will import them if installed_repository_file is specified """ # ------------------------------------------------------------------------------- # ### RPARK: dictionary containing which workflows to change and edit ### param_map = {} if payload.has_key("parameters"): param_map = payload["parameters"] # ------------------------------------------------------------------------------- # if "workflow_id" not in payload: # create new if "installed_repository_file" in payload: workflow_controller = trans.webapp.controllers["workflow"] result = workflow_controller.import_workflow(trans=trans, cntrller="api", **payload) return result trans.response.status = 403 return "Either workflow_id or installed_repository_file must be specified" if "installed_repository_file" in payload: trans.response.status = 403 return "installed_repository_file may not be specified with workflow_id" stored_workflow = trans.sa_session.query(self.app.model.StoredWorkflow).get( trans.security.decode_id(payload["workflow_id"]) ) if stored_workflow.user != trans.user and not trans.user_is_admin(): if ( trans.sa_session.query(trans.app.model.StoredWorkflowUserShareAssociation) .filter_by(user=trans.user, stored_workflow=stored_workflow) .count() == 0 ): trans.response.status = 400 return "Workflow is not owned by or shared with current user" workflow = stored_workflow.latest_workflow if payload["history"].startswith("hist_id="): # Passing an existing history to use. history = trans.sa_session.query(self.app.model.History).get( trans.security.decode_id(payload["history"][8:]) ) if history.user != trans.user and not trans.user_is_admin(): trans.response.status = 400 return "Invalid History specified." else: history = self.app.model.History(name=payload["history"], user=trans.user) trans.sa_session.add(history) trans.sa_session.flush() ds_map = payload["ds_map"] add_to_history = "no_add_to_history" not in payload for k in ds_map: try: if ds_map[k]["src"] == "ldda": ldda = trans.sa_session.query(self.app.model.LibraryDatasetDatasetAssociation).get( trans.security.decode_id(ds_map[k]["id"]) ) assert trans.user_is_admin() or trans.app.security_agent.can_access_dataset( trans.get_current_user_roles(), ldda.dataset ) hda = ldda.to_history_dataset_association(history, add_to_history=add_to_history) elif ds_map[k]["src"] == "ld": ldda = ( trans.sa_session.query(self.app.model.LibraryDataset) .get(trans.security.decode_id(ds_map[k]["id"])) .library_dataset_dataset_association ) assert trans.user_is_admin() or trans.app.security_agent.can_access_dataset( trans.get_current_user_roles(), ldda.dataset ) hda = ldda.to_history_dataset_association(history, add_to_history=add_to_history) elif ds_map[k]["src"] == "hda": # Get dataset handle, add to dict and history if necessary hda = trans.sa_session.query(self.app.model.HistoryDatasetAssociation).get( trans.security.decode_id(ds_map[k]["id"]) ) assert trans.user_is_admin() or trans.app.security_agent.can_access_dataset( trans.get_current_user_roles(), hda.dataset ) else: trans.response.status = 400 return "Unknown dataset source '%s' specified." % ds_map[k]["src"] if add_to_history and hda.history != history: hda = hda.copy() history.add_dataset(hda) ds_map[k]["hda"] = hda except AssertionError: trans.response.status = 400 return "Invalid Dataset '%s' Specified" % ds_map[k]["id"] if not workflow: trans.response.status = 400 return "Workflow not found." if len(workflow.steps) == 0: trans.response.status = 400 return "Workflow cannot be run because it does not have any steps" if workflow.has_cycles: trans.response.status = 400 return "Workflow cannot be run because it contains cycles" if workflow.has_errors: trans.response.status = 400 return "Workflow cannot be run because of validation errors in some steps" # Build the state for each step rval = {} for step in workflow.steps: step_errors = None if step.type == "tool" or step.type is None: step.module = module_factory.from_workflow_step(trans, step) # Check for missing parameters step.upgrade_messages = step.module.check_and_update_state() # Any connected input needs to have value DummyDataset (these # are not persisted so we need to do it every time) step.module.add_dummy_datasets(connections=step.input_connections) step.state = step.module.state #################################################### #################################################### # RPARK: IF TOOL_NAME IN PARAMETER MAP # if step.tool_id in param_map: change_param = param_map[step.tool_id]["param"] change_value = param_map[step.tool_id]["value"] step.state.inputs[change_param] = change_value #################################################### #################################################### if step.tool_errors: trans.response.status = 400 return "Workflow cannot be run because of validation errors in some steps: %s" % step_errors if step.upgrade_messages: trans.response.status = 400 return "Workflow cannot be run because of step upgrade messages: %s" % step.upgrade_messages else: # This is an input step. Make sure we have an available input. if step.type == "data_input" and str(step.id) not in ds_map: trans.response.status = 400 return "Workflow cannot be run because an expected input step '%s' has no input dataset." % step.id step.module = module_factory.from_workflow_step(trans, step) step.state = step.module.get_runtime_state() step.input_connections_by_name = dict((conn.input_name, conn) for conn in step.input_connections) # Run each step, connecting outputs to inputs workflow_invocation = self.app.model.WorkflowInvocation() workflow_invocation.workflow = workflow outputs = util.odict.odict() rval["history"] = trans.security.encode_id(history.id) rval["outputs"] = [] for i, step in enumerate(workflow.steps): job = None if step.type == "tool" or step.type is None: tool = self.app.toolbox.get_tool(step.tool_id) def callback(input, value, prefixed_name, prefixed_label): if isinstance(input, DataToolParameter): if prefixed_name in step.input_connections_by_name: conn = step.input_connections_by_name[prefixed_name] return outputs[conn.output_step.id][conn.output_name] visit_input_values(tool.inputs, step.state.inputs, callback) job, out_data = tool.execute(trans, step.state.inputs, history=history) outputs[step.id] = out_data for pja in step.post_job_actions: if pja.action_type in ActionBox.immediate_actions: ActionBox.execute(self.app, trans.sa_session, pja, job, replacement_dict=None) else: job.add_post_job_action(pja) for v in out_data.itervalues(): rval["outputs"].append(trans.security.encode_id(v.id)) else: # This is an input step. Use the dataset inputs from ds_map. job, out_data = step.module.execute(trans, step.state) outputs[step.id] = out_data outputs[step.id]["output"] = ds_map[str(step.id)]["hda"] workflow_invocation_step = self.app.model.WorkflowInvocationStep() workflow_invocation_step.workflow_invocation = workflow_invocation workflow_invocation_step.workflow_step = step workflow_invocation_step.job = job trans.sa_session.add(workflow_invocation) trans.sa_session.flush() return rval
def create(self, trans, payload, **kwd): """ POST /api/workflows We're not creating workflows from the api. Just execute for now. However, we will import them if installed_repository_file is specified """ # ------------------------------------------------------------------------------- # ### RPARK: dictionary containing which workflows to change and edit ### param_map = {} if (payload.has_key('parameters')): param_map = payload['parameters'] # ------------------------------------------------------------------------------- # if 'workflow_id' not in payload: # create new if 'installed_repository_file' in payload: workflow_controller = trans.webapp.controllers['workflow'] result = workflow_controller.import_workflow(trans=trans, cntrller='api', **payload) return result trans.response.status = 403 return "Either workflow_id or installed_repository_file must be specified" if 'installed_repository_file' in payload: trans.response.status = 403 return "installed_repository_file may not be specified with workflow_id" stored_workflow = trans.sa_session.query( self.app.model.StoredWorkflow).get( trans.security.decode_id(payload['workflow_id'])) if stored_workflow.user != trans.user and not trans.user_is_admin(): if trans.sa_session.query( trans.app.model.StoredWorkflowUserShareAssociation ).filter_by(user=trans.user, stored_workflow=stored_workflow).count() == 0: trans.response.status = 400 return ("Workflow is not owned by or shared with current user") workflow = stored_workflow.latest_workflow if payload['history'].startswith('hist_id='): #Passing an existing history to use. history = trans.sa_session.query(self.app.model.History).get( trans.security.decode_id(payload['history'][8:])) if history.user != trans.user and not trans.user_is_admin(): trans.response.status = 400 return "Invalid History specified." else: history = self.app.model.History(name=payload['history'], user=trans.user) trans.sa_session.add(history) trans.sa_session.flush() ds_map = payload['ds_map'] add_to_history = 'no_add_to_history' not in payload for k in ds_map: try: if ds_map[k]['src'] == 'ldda': ldda = trans.sa_session.query( self.app.model.LibraryDatasetDatasetAssociation).get( trans.security.decode_id(ds_map[k]['id'])) assert trans.user_is_admin( ) or trans.app.security_agent.can_access_dataset( trans.get_current_user_roles(), ldda.dataset) hda = ldda.to_history_dataset_association( history, add_to_history=add_to_history) elif ds_map[k]['src'] == 'ld': ldda = trans.sa_session.query( self.app.model.LibraryDataset).get( trans.security.decode_id( ds_map[k] ['id'])).library_dataset_dataset_association assert trans.user_is_admin( ) or trans.app.security_agent.can_access_dataset( trans.get_current_user_roles(), ldda.dataset) hda = ldda.to_history_dataset_association( history, add_to_history=add_to_history) elif ds_map[k]['src'] == 'hda': # Get dataset handle, add to dict and history if necessary hda = trans.sa_session.query( self.app.model.HistoryDatasetAssociation).get( trans.security.decode_id(ds_map[k]['id'])) assert trans.user_is_admin( ) or trans.app.security_agent.can_access_dataset( trans.get_current_user_roles(), hda.dataset) else: trans.response.status = 400 return "Unknown dataset source '%s' specified." % ds_map[ k]['src'] if add_to_history and hda.history != history: hda = hda.copy() history.add_dataset(hda) ds_map[k]['hda'] = hda except AssertionError: trans.response.status = 400 return "Invalid Dataset '%s' Specified" % ds_map[k]['id'] if not workflow: trans.response.status = 400 return "Workflow not found." if len(workflow.steps) == 0: trans.response.status = 400 return "Workflow cannot be run because it does not have any steps" if workflow.has_cycles: trans.response.status = 400 return "Workflow cannot be run because it contains cycles" if workflow.has_errors: trans.response.status = 400 return "Workflow cannot be run because of validation errors in some steps" # Build the state for each step rval = {} for step in workflow.steps: step_errors = None if step.type == 'tool' or step.type is None: step.module = module_factory.from_workflow_step(trans, step) # Check for missing parameters step.upgrade_messages = step.module.check_and_update_state() # Any connected input needs to have value DummyDataset (these # are not persisted so we need to do it every time) step.module.add_dummy_datasets( connections=step.input_connections) step.state = step.module.state #################################################### #################################################### # RPARK: IF TOOL_NAME IN PARAMETER MAP # if step.tool_id in param_map: change_param = param_map[step.tool_id]['param'] change_value = param_map[step.tool_id]['value'] step.state.inputs[change_param] = change_value #################################################### #################################################### if step.tool_errors: trans.response.status = 400 return "Workflow cannot be run because of validation errors in some steps: %s" % step_errors if step.upgrade_messages: trans.response.status = 400 return "Workflow cannot be run because of step upgrade messages: %s" % step.upgrade_messages else: # This is an input step. Make sure we have an available input. if step.type == 'data_input' and str(step.id) not in ds_map: trans.response.status = 400 return "Workflow cannot be run because an expected input step '%s' has no input dataset." % step.id step.module = module_factory.from_workflow_step(trans, step) step.state = step.module.get_runtime_state() step.input_connections_by_name = dict( (conn.input_name, conn) for conn in step.input_connections) # Run each step, connecting outputs to inputs workflow_invocation = self.app.model.WorkflowInvocation() workflow_invocation.workflow = workflow outputs = util.odict.odict() rval['history'] = trans.security.encode_id(history.id) rval['outputs'] = [] for i, step in enumerate(workflow.steps): job = None if step.type == 'tool' or step.type is None: tool = self.app.toolbox.get_tool(step.tool_id) def callback(input, value, prefixed_name, prefixed_label): if isinstance(input, DataToolParameter): if prefixed_name in step.input_connections_by_name: conn = step.input_connections_by_name[ prefixed_name] return outputs[conn.output_step.id][ conn.output_name] visit_input_values(tool.inputs, step.state.inputs, callback) job, out_data = tool.execute(trans, step.state.inputs, history=history) outputs[step.id] = out_data for pja in step.post_job_actions: if pja.action_type in ActionBox.immediate_actions: ActionBox.execute(self.app, trans.sa_session, pja, job, replacement_dict=None) else: job.add_post_job_action(pja) for v in out_data.itervalues(): rval['outputs'].append(trans.security.encode_id(v.id)) else: #This is an input step. Use the dataset inputs from ds_map. job, out_data = step.module.execute(trans, step.state) outputs[step.id] = out_data outputs[step.id]['output'] = ds_map[str(step.id)]['hda'] workflow_invocation_step = self.app.model.WorkflowInvocationStep() workflow_invocation_step.workflow_invocation = workflow_invocation workflow_invocation_step.workflow_step = step workflow_invocation_step.job = job trans.sa_session.add(workflow_invocation) trans.sa_session.flush() return rval
class JobWrapper( object ): """ Wraps a 'model.Job' with convenience methods for running processes and state management. """ def __init__( self, job, queue ): self.job_id = job.id self.session_id = job.session_id self.user_id = job.user_id self.tool = queue.app.toolbox.tools_by_id.get( job.tool_id, None ) self.queue = queue self.app = queue.app self.sa_session = self.app.model.context self.extra_filenames = [] self.command_line = None # Tool versioning variables self.version_string_cmd = None self.version_string = "" self.galaxy_lib_dir = None # With job outputs in the working directory, we need the working # directory to be set before prepare is run, or else premature deletion # and job recovery fail. # Create the working dir if necessary try: self.app.object_store.create(job, base_dir='job_work', dir_only=True, extra_dir=str(self.job_id)) self.working_directory = self.app.object_store.get_filename(job, base_dir='job_work', dir_only=True, extra_dir=str(self.job_id)) log.debug('(%s) Working directory for job is: %s' % (self.job_id, self.working_directory)) except ObjectInvalid: raise Exception('Unable to create job working directory, job failure') self.output_paths = None self.output_hdas_and_paths = None self.tool_provided_job_metadata = None # Wrapper holding the info required to restore and clean up from files used for setting metadata externally self.external_output_metadata = metadata.JobExternalOutputMetadataWrapper( job ) self.params = None if job.params: self.params = from_json_string( job.params ) self.__user_system_pwent = None self.__galaxy_system_pwent = None def get_job_runner( self ): return self.tool.get_job_runner( self.params ) def get_job( self ): return self.sa_session.query( model.Job ).get( self.job_id ) def get_id_tag(self): # For compatability with drmaa, which uses job_id right now, and TaskWrapper return str(self.job_id) def get_param_dict( self ): """ Restore the dictionary of parameters from the database. """ job = self.get_job() param_dict = dict( [ ( p.name, p.value ) for p in job.parameters ] ) param_dict = self.tool.params_from_strings( param_dict, self.app ) return param_dict def get_version_string_path( self ): return os.path.abspath(os.path.join(self.app.config.new_file_path, "GALAXY_VERSION_STRING_%s" % self.job_id)) def prepare( self ): """ Prepare the job to run by creating the working directory and the config files. """ self.sa_session.expunge_all() #this prevents the metadata reverting that has been seen in conjunction with the PBS job runner if not os.path.exists( self.working_directory ): os.mkdir( self.working_directory ) # Restore parameters from the database job = self.get_job() if job.user is None and job.galaxy_session is None: raise Exception( 'Job %s has no user and no session.' % job.id ) incoming = dict( [ ( p.name, p.value ) for p in job.parameters ] ) incoming = self.tool.params_from_strings( incoming, self.app ) # Do any validation that could not be done at job creation self.tool.handle_unvalidated_param_values( incoming, self.app ) # Restore input / output data lists inp_data = dict( [ ( da.name, da.dataset ) for da in job.input_datasets ] ) out_data = dict( [ ( da.name, da.dataset ) for da in job.output_datasets ] ) inp_data.update( [ ( da.name, da.dataset ) for da in job.input_library_datasets ] ) out_data.update( [ ( da.name, da.dataset ) for da in job.output_library_datasets ] ) # Set up output dataset association for export history jobs. Because job # uses a Dataset rather than an HDA or LDA, it's necessary to set up a # fake dataset association that provides the needed attributes for # preparing a job. class FakeDatasetAssociation ( object ): def __init__( self, dataset=None ): self.dataset = dataset self.file_name = dataset.file_name self.metadata = dict() self.children = [] special = self.sa_session.query( model.JobExportHistoryArchive ).filter_by( job=job ).first() if not special: special = self.sa_session.query( model.GenomeIndexToolData ).filter_by( job=job ).first() if special: out_data[ "output_file" ] = FakeDatasetAssociation( dataset=special.dataset ) # These can be passed on the command line if wanted as $userId $userEmail if job.history and job.history.user: # check for anonymous user! userId = '%d' % job.history.user.id userEmail = str(job.history.user.email) else: userId = 'Anonymous' userEmail = 'Anonymous' incoming['__user_id__'] = incoming['userId'] = userId incoming['__user_email__'] = incoming['userEmail'] = userEmail # Build params, done before hook so hook can use param_dict = self.tool.build_param_dict( incoming, inp_data, out_data, self.get_output_fnames(), self.working_directory ) # Certain tools require tasks to be completed prior to job execution # ( this used to be performed in the "exec_before_job" hook, but hooks are deprecated ). self.tool.exec_before_job( self.queue.app, inp_data, out_data, param_dict ) # Run the before queue ("exec_before_job") hook self.tool.call_hook( 'exec_before_job', self.queue.app, inp_data=inp_data, out_data=out_data, tool=self.tool, param_dict=incoming) self.sa_session.flush() # Build any required config files config_filenames = self.tool.build_config_files( param_dict, self.working_directory ) # FIXME: Build the param file (might return None, DEPRECATED) param_filename = self.tool.build_param_file( param_dict, self.working_directory ) # Build the job's command line self.command_line = self.tool.build_command_line( param_dict ) # FIXME: for now, tools get Galaxy's lib dir in their path if self.command_line and self.command_line.startswith( 'python' ): self.galaxy_lib_dir = os.path.abspath( "lib" ) # cwd = galaxy root # Shell fragment to inject dependencies if self.app.config.use_tool_dependencies: self.dependency_shell_commands = self.tool.build_dependency_shell_commands() else: self.dependency_shell_commands = None # We need command_line persisted to the db in order for Galaxy to re-queue the job # if the server was stopped and restarted before the job finished job.command_line = self.command_line self.sa_session.add( job ) self.sa_session.flush() # Return list of all extra files extra_filenames = config_filenames if param_filename is not None: extra_filenames.append( param_filename ) self.param_dict = param_dict self.extra_filenames = extra_filenames self.version_string_cmd = self.tool.version_string_cmd return extra_filenames def fail( self, message, exception=False ): """ Indicate job failure by setting state and message on all output datasets. """ job = self.get_job() self.sa_session.refresh( job ) # if the job was deleted, don't fail it if not job.state == job.states.DELETED: # Check if the failure is due to an exception if exception: # Save the traceback immediately in case we generate another # below job.traceback = traceback.format_exc() # Get the exception and let the tool attempt to generate # a better message etype, evalue, tb = sys.exc_info() m = self.tool.handle_job_failure_exception( evalue ) if m: message = m if self.app.config.outputs_to_working_directory: for dataset_path in self.get_output_fnames(): try: shutil.move( dataset_path.false_path, dataset_path.real_path ) log.debug( "fail(): Moved %s to %s" % ( dataset_path.false_path, dataset_path.real_path ) ) except ( IOError, OSError ), e: log.error( "fail(): Missing output file in working directory: %s" % e ) for dataset_assoc in job.output_datasets + job.output_library_datasets: dataset = dataset_assoc.dataset self.sa_session.refresh( dataset ) dataset.state = dataset.states.ERROR dataset.blurb = 'tool error' dataset.info = message dataset.set_size() dataset.dataset.set_total_size() if dataset.ext == 'auto': dataset.extension = 'data' # Update (non-library) job output datasets through the object store if dataset not in job.output_library_datasets: self.app.object_store.update_from_file(dataset.dataset, create=True) self.sa_session.add( dataset ) self.sa_session.flush() job.state = job.states.ERROR job.command_line = self.command_line job.info = message self.sa_session.add( job ) self.sa_session.flush() #Perform email action even on failure. for pja in [pjaa.post_job_action for pjaa in job.post_job_actions if pjaa.post_job_action.action_type == "EmailAction"]: ActionBox.execute(self.app, self.sa_session, pja, job) # If the job was deleted, call tool specific fail actions (used for e.g. external metadata) and clean up if self.tool: self.tool.job_failed( self, message, exception ) if self.app.config.cleanup_job == 'always' or (self.app.config.cleanup_job == 'onsuccess' and job.state == job.states.DELETED): self.cleanup()
def finish( self, stdout, stderr ): """ Called to indicate that the associated command has been run. Updates the output datasets based on stderr and stdout from the command, and the contents of the output files. """ # default post job setup self.sa_session.expunge_all() job = self.get_job() try: self.reclaim_ownership() except: self.fail( job.info ) log.exception( '(%s) Failed to change ownership of %s, failing' % ( job.id, self.working_directory ) ) # if the job was deleted, don't finish it if job.state == job.states.DELETED or job.state == job.states.ERROR: #ERROR at this point means the job was deleted by an administrator. return self.fail( job.info ) if stderr: job.state = job.states.ERROR else: job.state = job.states.OK if self.version_string_cmd: version_filename = self.get_version_string_path() if os.path.exists(version_filename): self.version_string = open(version_filename).read() os.unlink(version_filename) if self.app.config.outputs_to_working_directory and not self.__link_file_check(): for dataset_path in self.get_output_fnames(): try: shutil.move( dataset_path.false_path, dataset_path.real_path ) log.debug( "finish(): Moved %s to %s" % ( dataset_path.false_path, dataset_path.real_path ) ) except ( IOError, OSError ): # this can happen if Galaxy is restarted during the job's # finish method - the false_path file has already moved, # and when the job is recovered, it won't be found. if os.path.exists( dataset_path.real_path ) and os.stat( dataset_path.real_path ).st_size > 0: log.warning( "finish(): %s not found, but %s is not empty, so it will be used instead" % ( dataset_path.false_path, dataset_path.real_path ) ) else: return self.fail( "Job %s's output dataset(s) could not be read" % job.id ) job_context = ExpressionContext( dict( stdout = stdout, stderr = stderr ) ) job_tool = self.app.toolbox.tools_by_id.get( job.tool_id, None ) def in_directory( file, directory ): # Make both absolute. directory = os.path.abspath( directory ) file = os.path.abspath( file ) #Return true, if the common prefix of both is equal to directory #e.g. /a/b/c/d.rst and directory is /a/b, the common prefix is /a/b return os.path.commonprefix( [ file, directory ] ) == directory for dataset_assoc in job.output_datasets + job.output_library_datasets: context = self.get_dataset_finish_context( job_context, dataset_assoc.dataset.dataset ) #should this also be checking library associations? - can a library item be added from a history before the job has ended? - lets not allow this to occur for dataset in dataset_assoc.dataset.dataset.history_associations + dataset_assoc.dataset.dataset.library_associations: #need to update all associated output hdas, i.e. history was shared with job running # # If HDA is to be copied from the working directory, do it now so that other attributes are correctly set. # if isinstance( dataset, model.HistoryDatasetAssociation ): joda = self.sa_session.query( model.JobToOutputDatasetAssociation ).filter_by( job=job, dataset=dataset ).first() if joda and job_tool: hda_tool_output = job_tool.outputs.get( joda.name, None ) if hda_tool_output and hda_tool_output.from_work_dir: # Copy from working dir to HDA. source_file = os.path.join( os.path.abspath( self.working_directory ), hda_tool_output.from_work_dir ) if in_directory( source_file, self.working_directory ): try: shutil.move( source_file, dataset.file_name ) log.debug( "finish(): Moved %s to %s as directed by from_work_dir" % ( source_file, dataset.file_name ) ) except ( IOError, OSError ): log.debug( "finish(): Could not move %s to %s as directed by from_work_dir" % ( source_file, dataset.file_name ) ) else: # Security violation. log.exception( "from_work_dir specified a location not in the working directory: %s, %s" % ( source_file, self.working_directory ) ) dataset.blurb = 'done' dataset.peek = 'no peek' dataset.info = ( dataset.info or '' ) + context['stdout'] + context['stderr'] dataset.tool_version = self.version_string dataset.set_size() # Update (non-library) job output datasets through the object store if dataset not in job.output_library_datasets: self.app.object_store.update_from_file(dataset.dataset, create=True) if context['stderr']: dataset.blurb = "error" elif dataset.has_data(): # If the tool was expected to set the extension, attempt to retrieve it if dataset.ext == 'auto': dataset.extension = context.get( 'ext', 'data' ) dataset.init_meta( copy_from=dataset ) #if a dataset was copied, it won't appear in our dictionary: #either use the metadata from originating output dataset, or call set_meta on the copies #it would be quicker to just copy the metadata from the originating output dataset, #but somewhat trickier (need to recurse up the copied_from tree), for now we'll call set_meta() if not self.app.config.set_metadata_externally or \ ( not self.external_output_metadata.external_metadata_set_successfully( dataset, self.sa_session ) \ and self.app.config.retry_metadata_internally ): dataset.set_meta( overwrite = False ) elif not self.external_output_metadata.external_metadata_set_successfully( dataset, self.sa_session ) and not context['stderr']: dataset._state = model.Dataset.states.FAILED_METADATA else: #load metadata from file #we need to no longer allow metadata to be edited while the job is still running, #since if it is edited, the metadata changed on the running output will no longer match #the metadata that was stored to disk for use via the external process, #and the changes made by the user will be lost, without warning or notice dataset.metadata.from_JSON_dict( self.external_output_metadata.get_output_filenames_by_dataset( dataset, self.sa_session ).filename_out ) try: assert context.get( 'line_count', None ) is not None if ( not dataset.datatype.composite_type and dataset.dataset.is_multi_byte() ) or self.tool.is_multi_byte: dataset.set_peek( line_count=context['line_count'], is_multi_byte=True ) else: dataset.set_peek( line_count=context['line_count'] ) except: if ( not dataset.datatype.composite_type and dataset.dataset.is_multi_byte() ) or self.tool.is_multi_byte: dataset.set_peek( is_multi_byte=True ) else: dataset.set_peek() try: # set the name if provided by the tool dataset.name = context['name'] except: pass else: dataset.blurb = "empty" if dataset.ext == 'auto': dataset.extension = 'txt' self.sa_session.add( dataset ) if context['stderr']: dataset_assoc.dataset.dataset.state = model.Dataset.states.ERROR else: dataset_assoc.dataset.dataset.state = model.Dataset.states.OK # If any of the rest of the finish method below raises an # exception, the fail method will run and set the datasets to # ERROR. The user will never see that the datasets are in error if # they were flushed as OK here, since upon doing so, the history # panel stops checking for updates. So allow the # self.sa_session.flush() at the bottom of this method set # the state instead. for pja in job.post_job_actions: ActionBox.execute(self.app, self.sa_session, pja.post_job_action, job) # Flush all the dataset and job changes above. Dataset state changes # will now be seen by the user. self.sa_session.flush() # Save stdout and stderr if len( stdout ) > 32768: log.error( "stdout for job %d is greater than 32K, only first part will be logged to database" % job.id ) job.stdout = stdout[:32768] if len( stderr ) > 32768: log.error( "stderr for job %d is greater than 32K, only first part will be logged to database" % job.id ) job.stderr = stderr[:32768] # custom post process setup inp_data = dict( [ ( da.name, da.dataset ) for da in job.input_datasets ] ) out_data = dict( [ ( da.name, da.dataset ) for da in job.output_datasets ] ) inp_data.update( [ ( da.name, da.dataset ) for da in job.input_library_datasets ] ) out_data.update( [ ( da.name, da.dataset ) for da in job.output_library_datasets ] ) param_dict = dict( [ ( p.name, p.value ) for p in job.parameters ] ) # why not re-use self.param_dict here? ##dunno...probably should, this causes tools.parameters.basic.UnvalidatedValue to be used in following methods instead of validated and transformed values during i.e. running workflows param_dict = self.tool.params_from_strings( param_dict, self.app ) # Check for and move associated_files self.tool.collect_associated_files(out_data, self.working_directory) gitd = self.sa_session.query( model.GenomeIndexToolData ).filter_by( job=job ).first() if gitd: self.tool.collect_associated_files({'' : gitd}, self.working_directory) # Create generated output children and primary datasets and add to param_dict collected_datasets = {'children':self.tool.collect_child_datasets(out_data, self.working_directory),'primary':self.tool.collect_primary_datasets(out_data, self.working_directory)} param_dict.update({'__collected_datasets__':collected_datasets}) # Certain tools require tasks to be completed after job execution # ( this used to be performed in the "exec_after_process" hook, but hooks are deprecated ). self.tool.exec_after_process( self.queue.app, inp_data, out_data, param_dict, job = job ) # Call 'exec_after_process' hook self.tool.call_hook( 'exec_after_process', self.queue.app, inp_data=inp_data, out_data=out_data, param_dict=param_dict, tool=self.tool, stdout=stdout, stderr=stderr ) job.command_line = self.command_line bytes = 0 # Once datasets are collected, set the total dataset size (includes extra files) for dataset_assoc in job.output_datasets: dataset_assoc.dataset.dataset.set_total_size() bytes += dataset_assoc.dataset.dataset.get_total_size() if job.user: job.user.total_disk_usage += bytes # fix permissions for path in [ dp.real_path for dp in self.get_output_fnames() ]: util.umask_fix_perms( path, self.app.config.umask, 0666, self.app.config.gid ) self.sa_session.flush() log.debug( 'job %d ended' % self.job_id ) if self.app.config.cleanup_job == 'always' or ( not stderr and self.app.config.cleanup_job == 'onsuccess' ): self.cleanup()
def create(self, trans, payload, **kwd): """ POST /api/workflows We're not creating workflows from the api. Just execute for now. However, we will import them if installed_repository_file is specified """ # Pull parameters out of payload. workflow_id = payload['workflow_id'] param_map = payload.get('parameters', {}) ds_map = payload['ds_map'] add_to_history = 'no_add_to_history' not in payload history_param = payload['history'] # Get/create workflow. if not workflow_id: # create new if 'installed_repository_file' in payload: workflow_controller = trans.webapp.controllers[ 'workflow' ] result = workflow_controller.import_workflow( trans=trans, cntrller='api', **payload) return result trans.response.status = 403 return "Either workflow_id or installed_repository_file must be specified" if 'installed_repository_file' in payload: trans.response.status = 403 return "installed_repository_file may not be specified with workflow_id" # Get workflow + accessibility check. stored_workflow = trans.sa_session.query(self.app.model.StoredWorkflow).get( trans.security.decode_id(workflow_id)) if stored_workflow.user != trans.user and not trans.user_is_admin(): if trans.sa_session.query(trans.app.model.StoredWorkflowUserShareAssociation).filter_by(user=trans.user, stored_workflow=stored_workflow).count() == 0: trans.response.status = 400 return("Workflow is not owned by or shared with current user") workflow = stored_workflow.latest_workflow # Get target history. if history_param.startswith('hist_id='): #Passing an existing history to use. history = trans.sa_session.query(self.app.model.History).get( trans.security.decode_id(history_param[8:])) if history.user != trans.user and not trans.user_is_admin(): trans.response.status = 400 return "Invalid History specified." else: # Send workflow outputs to new history. history = self.app.model.History(name=history_param, user=trans.user) trans.sa_session.add(history) trans.sa_session.flush() # Set workflow inputs. for k in ds_map: try: if ds_map[k]['src'] == 'ldda': ldda = trans.sa_session.query(self.app.model.LibraryDatasetDatasetAssociation).get( trans.security.decode_id(ds_map[k]['id'])) assert trans.user_is_admin() or trans.app.security_agent.can_access_dataset( trans.get_current_user_roles(), ldda.dataset ) hda = ldda.to_history_dataset_association(history, add_to_history=add_to_history) elif ds_map[k]['src'] == 'ld': ldda = trans.sa_session.query(self.app.model.LibraryDataset).get( trans.security.decode_id(ds_map[k]['id'])).library_dataset_dataset_association assert trans.user_is_admin() or trans.app.security_agent.can_access_dataset( trans.get_current_user_roles(), ldda.dataset ) hda = ldda.to_history_dataset_association(history, add_to_history=add_to_history) elif ds_map[k]['src'] == 'hda': # Get dataset handle, add to dict and history if necessary hda = trans.sa_session.query(self.app.model.HistoryDatasetAssociation).get( trans.security.decode_id(ds_map[k]['id'])) assert trans.user_is_admin() or trans.app.security_agent.can_access_dataset( trans.get_current_user_roles(), hda.dataset ) else: trans.response.status = 400 return "Unknown dataset source '%s' specified." % ds_map[k]['src'] if add_to_history and hda.history != history: hda = hda.copy() history.add_dataset(hda) ds_map[k]['hda'] = hda except AssertionError: trans.response.status = 400 return "Invalid Dataset '%s' Specified" % ds_map[k]['id'] # Sanity checks. if not workflow: trans.response.status = 400 return "Workflow not found." if len( workflow.steps ) == 0: trans.response.status = 400 return "Workflow cannot be run because it does not have any steps" if workflow.has_cycles: trans.response.status = 400 return "Workflow cannot be run because it contains cycles" if workflow.has_errors: trans.response.status = 400 return "Workflow cannot be run because of validation errors in some steps" # Build the state for each step rval = {} for step in workflow.steps: step_errors = None if step.type == 'tool' or step.type is None: step.module = module_factory.from_workflow_step( trans, step ) # Check for missing parameters step.upgrade_messages = step.module.check_and_update_state() # Any connected input needs to have value DummyDataset (these # are not persisted so we need to do it every time) step.module.add_dummy_datasets( connections=step.input_connections ) step.state = step.module.state _update_step_parameters(step, param_map) if step.tool_errors: trans.response.status = 400 return "Workflow cannot be run because of validation errors in some steps: %s" % step_errors if step.upgrade_messages: trans.response.status = 400 return "Workflow cannot be run because of step upgrade messages: %s" % step.upgrade_messages else: # This is an input step. Make sure we have an available input. if step.type == 'data_input' and str(step.id) not in ds_map: trans.response.status = 400 return "Workflow cannot be run because an expected input step '%s' has no input dataset." % step.id step.module = module_factory.from_workflow_step( trans, step ) step.state = step.module.get_runtime_state() step.input_connections_by_name = dict( ( conn.input_name, conn ) for conn in step.input_connections ) # Run each step, connecting outputs to inputs workflow_invocation = self.app.model.WorkflowInvocation() workflow_invocation.workflow = workflow outputs = util.odict.odict() rval['history'] = trans.security.encode_id(history.id) rval['outputs'] = [] for step in workflow.steps: job = None if step.type == 'tool' or step.type is None: tool = self.app.toolbox.get_tool( step.tool_id ) def callback( input, value, prefixed_name, prefixed_label ): if isinstance( input, DataToolParameter ): if prefixed_name in step.input_connections_by_name: conn = step.input_connections_by_name[ prefixed_name ] return outputs[ conn.output_step.id ][ conn.output_name ] visit_input_values( tool.inputs, step.state.inputs, callback ) job, out_data = tool.execute( trans, step.state.inputs, history=history) outputs[ step.id ] = out_data # Do post-job actions. replacement_params = payload.get('replacement_params', {}) for pja in step.post_job_actions: if pja.action_type in ActionBox.immediate_actions: ActionBox.execute(trans.app, trans.sa_session, pja, job, replacement_dict=replacement_params) else: job.add_post_job_action(pja) for v in out_data.itervalues(): rval['outputs'].append(trans.security.encode_id(v.id)) else: #This is an input step. Use the dataset inputs from ds_map. job, out_data = step.module.execute( trans, step.state) outputs[step.id] = out_data outputs[step.id]['output'] = ds_map[str(step.id)]['hda'] workflow_invocation_step = self.app.model.WorkflowInvocationStep() workflow_invocation_step.workflow_invocation = workflow_invocation workflow_invocation_step.workflow_step = step workflow_invocation_step.job = job trans.sa_session.add( workflow_invocation ) trans.sa_session.flush() return rval