def get_data_inputs(self): data_inputs = [] if self.tool: def callback(input, prefixed_name, prefixed_label, **kwargs): if not hasattr(input, 'hidden') or not input.hidden: if isinstance(input, DataToolParameter): data_inputs.append(dict( name=prefixed_name, label=prefixed_label, multiple=input.multiple, extensions=input.extensions, input_type="dataset", )) elif isinstance(input, DataCollectionToolParameter): data_inputs.append(dict( name=prefixed_name, label=prefixed_label, multiple=input.multiple, input_type="dataset_collection", collection_types=input.collection_types, extensions=input.extensions, )) visit_input_values(self.tool.inputs, self.state.inputs, callback) return data_inputs
def get_data_inputs(self): data_inputs = [] def callback(input, value, prefixed_name, prefixed_label): if isinstance(input, DataToolParameter): data_inputs.append( dict( name=prefixed_name, label=prefixed_label, multiple=input.multiple, extensions=input.extensions, input_type="dataset", ) ) if isinstance(input, DataCollectionToolParameter): data_inputs.append( dict( name=prefixed_name, label=prefixed_label, multiple=input.multiple, input_type="dataset_collection", collection_type=input.collection_type, extensions=input.extensions, ) ) visit_input_values(self.tool.inputs, self.state.inputs, callback) return data_inputs
def add_dummy_datasets(self, connections=None): if connections: # Store onnections by input name input_connections_by_name = \ dict( ( conn.input_name, conn ) for conn in connections ) else: input_connections_by_name = {} # Any connected input needs to have value DummyDataset (these # are not persisted so we need to do it every time) def callback(input, value, prefixed_name, prefixed_label): replacement = None if isinstance(input, DataToolParameter): if connections is None or prefixed_name in input_connections_by_name: if input.multiple: replacement = [] if not connections else [ DummyDataset() for conn in connections ] else: replacement = DummyDataset() elif isinstance(input, DataCollectionToolParameter): if connections is None or prefixed_name in input_connections_by_name: replacement = DummyDataset() return replacement visit_input_values(self.tool.inputs, self.state.inputs, callback)
def get_data_inputs(self): data_inputs = [] def callback(input, value, prefixed_name, prefixed_label): if isinstance(input, DataToolParameter): data_inputs.append( dict( name=prefixed_name, label=prefixed_label, multiple=input.multiple, extensions=input.extensions, input_type="dataset", )) if isinstance(input, DataCollectionToolParameter): data_inputs.append( dict( name=prefixed_name, label=prefixed_label, multiple=input.multiple, input_type="dataset_collection", collection_type=input.collection_type, extensions=input.extensions, )) visit_input_values(self.tool.inputs, self.state.inputs, callback) return data_inputs
def _find_collections_to_match(self, tool, progress, step): collections_to_match = matching.CollectionsToMatch() def callback(input, value, prefixed_name, prefixed_label): is_data_param = isinstance(input, DataToolParameter) if is_data_param and not input.multiple: data = progress.replacement_for_tool_input( step, input, prefixed_name) if isinstance(data, model.HistoryDatasetCollectionAssociation): collections_to_match.add(prefixed_name, data) is_data_collection_param = isinstance(input, DataCollectionToolParameter) if is_data_collection_param and not input.multiple: data = progress.replacement_for_tool_input( step, input, prefixed_name) history_query = input._history_query(self.trans) if history_query.can_map_over(data): collections_to_match.add( prefixed_name, data, subcollection_type=input.collection_type) visit_input_values(tool.inputs, step.state.inputs, callback) return collections_to_match
def add_dummy_datasets( self, connections=None, steps=None ): if self.tool: if connections: # Store connections by input name input_connections_by_name = dict( ( conn.input_name, conn ) for conn in connections ) else: input_connections_by_name = {} # Any input needs to have value RuntimeValue or obtain the value from connected steps def callback( input, prefixed_name, context, **kwargs ): if isinstance( input, DataToolParameter ) or isinstance( input, DataCollectionToolParameter ): if connections is not None and steps is not None and self.trans.workflow_building_mode is workflow_building_modes.USE_HISTORY: if prefixed_name in input_connections_by_name: connection = input_connections_by_name[ prefixed_name ] output_step = next( output_step for output_step in steps if connection.output_step_id == output_step.id ) if output_step.type.startswith( 'data' ): output_inputs = output_step.module.get_runtime_inputs( connections=connections ) output_value = output_inputs[ 'input' ].get_initial_value( self.trans, context ) if isinstance( input, DataToolParameter ) and isinstance( output_value, self.trans.app.model.HistoryDatasetCollectionAssociation ): return output_value.to_hda_representative() return output_value return RuntimeValue() else: return input.get_initial_value( self.trans, context ) elif connections is None or prefixed_name in input_connections_by_name: return RuntimeValue() visit_input_values( self.tool.inputs, self.state.inputs, callback ) else: raise ToolMissingException( "Tool %s missing. Cannot add dummy datasets." % self.tool_id )
def get_data_inputs( self ): data_inputs = [] if self.tool: def callback( input, prefixed_name, prefixed_label, **kwargs ): if not hasattr( input, 'hidden' ) or not input.hidden: if isinstance( input, DataToolParameter ): data_inputs.append( dict( name=prefixed_name, label=prefixed_label, multiple=input.multiple, extensions=input.extensions, input_type="dataset", ) ) elif isinstance( input, DataCollectionToolParameter ): data_inputs.append( dict( name=prefixed_name, label=prefixed_label, multiple=input.multiple, input_type="dataset_collection", collection_types=input.collection_types, extensions=input.extensions, ) ) visit_input_values( self.tool.inputs, self.state.inputs, callback ) return data_inputs
def compute_runtime_state( self, trans, step_updates=None ): """ Determine the runtime state (potentially different from self.state which describes configuration state). This (again unlike self.state) is currently always a `DefaultToolState` object. If `step_updates` is `None`, this is likely for rendering the run form for instance and no runtime properties are available and state must be solely determined by the default runtime state described by the step. If `step_updates` are available they describe the runtime properties supplied by the workflow runner. """ state = self.get_runtime_state() step_errors = {} if step_updates: def update_value( input, context, prefixed_name, **kwargs ): if prefixed_name in step_updates: value, error = check_param( trans, input, step_updates.get( prefixed_name ), context ) if error is not None: step_errors[ prefixed_name ] = error return value return NO_REPLACEMENT visit_input_values( self.get_runtime_inputs(), state.inputs, update_value, no_replacement_value=NO_REPLACEMENT ) return state, step_errors
def compute_runtime_state(self, trans, step_updates=None): """ Determine the runtime state (potentially different from self.state which describes configuration state). This (again unlike self.state) is currently always a `DefaultToolState` object. If `step_updates` is `None`, this is likely for rendering the run form for instance and no runtime properties are available and state must be solely determined by the default runtime state described by the step. If `step_updates` are available they describe the runtime properties supplied by the workflow runner. """ state = self.get_runtime_state() step_errors = {} if step_updates: def update_value(input, context, prefixed_name, **kwargs): if prefixed_name in step_updates: value, error = check_param(trans, input, step_updates.get(prefixed_name), context) if error is not None: step_errors[prefixed_name] = error return value return NO_REPLACEMENT visit_input_values(self.get_runtime_inputs(), state.inputs, update_value, no_replacement_value=NO_REPLACEMENT) return state, step_errors
def add_dummy_datasets(self, connections=None, steps=None): if self.tool: if connections: # Store connections by input name input_connections_by_name = dict((conn.input_name, conn) for conn in connections) else: input_connections_by_name = {} # Any input needs to have value RuntimeValue or obtain the value from connected steps def callback(input, prefixed_name, context, **kwargs): if isinstance(input, DataToolParameter) or isinstance(input, DataCollectionToolParameter): if connections is not None and steps is not None and self.trans.workflow_building_mode is workflow_building_modes.USE_HISTORY: if prefixed_name in input_connections_by_name: connection = input_connections_by_name[prefixed_name] output_step = next(output_step for output_step in steps if connection.output_step_id == output_step.id) if output_step.type.startswith('data'): output_inputs = output_step.module.get_runtime_inputs(connections=connections) output_value = output_inputs['input'].get_initial_value(self.trans, context) if isinstance(input, DataToolParameter) and isinstance(output_value, self.trans.app.model.HistoryDatasetCollectionAssociation): return output_value.to_hda_representative() return output_value return RuntimeValue() else: return input.get_initial_value(self.trans, context) elif connections is None or prefixed_name in input_connections_by_name: return RuntimeValue() visit_input_values(self.tool.inputs, self.state.inputs, callback) else: raise ToolMissingException("Tool %s missing. Cannot add dummy datasets." % self.tool_id)
def set_compute_environment(self, compute_environment, get_special=None): """ Setup the compute environment and established the outline of the param_dict for evaluating command and config cheetah templates. """ self.compute_environment = compute_environment job = self.job incoming = dict([(p.name, p.value) for p in job.parameters]) incoming = self.tool.params_from_strings(incoming, self.app) # Full parameter validation request_context = WorkRequestContext(app=self.app, user=self._user, history=self._history) def validate_inputs(input, value, context, **kwargs): value = input.from_json(value, request_context, context) input.validate(value, request_context) visit_input_values(self.tool.inputs, incoming, validate_inputs) # Restore input / output data lists inp_data, out_data, out_collections = job.io_dicts() if get_special: # Set up output dataset association for export history jobs. Because job # uses a Dataset rather than an HDA or LDA, it's necessary to set up a # fake dataset association that provides the needed attributes for # preparing a job. class FakeDatasetAssociation (object): fake_dataset_association = True def __init__(self, dataset=None): self.dataset = dataset self.file_name = dataset.file_name self.metadata = dict() special = get_special() if special: out_data["output_file"] = FakeDatasetAssociation(dataset=special.dataset) # These can be passed on the command line if wanted as $__user_*__ incoming.update(model.User.user_template_environment(job.history and job.history.user)) # Build params, done before hook so hook can use param_dict = self.build_param_dict( incoming, inp_data, out_data, output_collections=out_collections, ) # Certain tools require tasks to be completed prior to job execution # ( this used to be performed in the "exec_before_job" hook, but hooks are deprecated ). self.tool.exec_before_job(self.app, inp_data, out_data, param_dict) # Run the before queue ("exec_before_job") hook self.tool.call_hook('exec_before_job', self.app, inp_data=inp_data, out_data=out_data, tool=self.tool, param_dict=incoming) self.param_dict = param_dict
def set_compute_environment(self, compute_environment, get_special=None): """ Setup the compute environment and established the outline of the param_dict for evaluating command and config cheetah templates. """ self.compute_environment = compute_environment job = self.job incoming = {p.name: p.value for p in job.parameters} incoming = self.tool.params_from_strings(incoming, self.app) # Full parameter validation request_context = WorkRequestContext(app=self.app, user=self._user, history=self._history) self.request_context = request_context def validate_inputs(input, value, context, **kwargs): value = input.from_json(value, request_context, context) input.validate(value, request_context) visit_input_values(self.tool.inputs, incoming, validate_inputs) # Restore input / output data lists inp_data, out_data, out_collections = job.io_dicts() if get_special: special = get_special() if special: out_data["output_file"] = special.fda # These can be passed on the command line if wanted as $__user_*__ incoming.update( model.User.user_template_environment(job.history and job.history.user)) # Build params, done before hook so hook can use param_dict = self.build_param_dict( incoming, inp_data, out_data, output_collections=out_collections, ) # Certain tools require tasks to be completed prior to job execution # ( this used to be performed in the "exec_before_job" hook, but hooks are deprecated ). self.tool.exec_before_job(self.app, inp_data, out_data, param_dict) # Run the before queue ("exec_before_job") hook self.tool.call_hook('exec_before_job', self.app, inp_data=inp_data, out_data=out_data, tool=self.tool, param_dict=incoming) self.param_dict = param_dict
def get_data_inputs( self ): data_inputs = [] def callback( input, value, prefixed_name, prefixed_label ): if isinstance( input, DataToolParameter ): data_inputs.append( dict( name=prefixed_name, label=prefixed_label, extensions=input.extensions ) ) visit_input_values( self.tool.inputs, self.state.inputs, callback ) return data_inputs
def execute(self, trans, progress, invocation, step): tool = trans.app.toolbox.get_tool(step.tool_id, tool_version=step.tool_version) tool_state = step.state # Not strictly needed - but keep Tool state clean by stripping runtime # metadata parameters from it. if RUNTIME_STEP_META_STATE_KEY in tool_state.inputs: del tool_state.inputs[RUNTIME_STEP_META_STATE_KEY] collections_to_match = self._find_collections_to_match( tool, progress, step) # Have implicit collections... if collections_to_match.has_collections(): collection_info = self.trans.app.dataset_collections_service.match_collections( collections_to_match) else: collection_info = None param_combinations = [] if collection_info: iteration_elements_iter = collection_info.slice_collections() else: iteration_elements_iter = [None] for iteration_elements in iteration_elements_iter: execution_state = tool_state.copy() # TODO: Move next step into copy() execution_state.inputs = make_dict_copy(execution_state.inputs) # Connect up def callback(input, value, prefixed_name, prefixed_label): replacement = None if isinstance(input, DataToolParameter) or isinstance( input, DataCollectionToolParameter): if iteration_elements and prefixed_name in iteration_elements: if isinstance(input, DataToolParameter): # Pull out dataset instance from element. replacement = iteration_elements[ prefixed_name].dataset_instance else: # If collection - just use element model object. replacement = iteration_elements[prefixed_name] else: replacement = progress.replacement_for_tool_input( step, input, prefixed_name) return replacement try: # Replace DummyDatasets with historydatasetassociations visit_input_values(tool.inputs, execution_state.inputs, callback) except KeyError, k: message_template = "Error due to input mapping of '%s' in '%s'. A common cause of this is conditional outputs that cannot be determined until runtime, please review your workflow." message = message_template % (tool.name, k.message) raise exceptions.MessageException(message) param_combinations.append(execution_state.inputs)
def add_dummy_datasets( self, connections=None): if connections: # Store onnections by input name input_connections_by_name = \ dict( ( conn.input_name, conn ) for conn in connections ) else: input_connections_by_name = {} # Any connected input needs to have value DummyDataset (these # are not persisted so we need to do it every time) def callback( input, value, prefixed_name, prefixed_label ): if isinstance( input, DataToolParameter ): if connections is None or prefixed_name in input_connections_by_name: return DummyDataset() visit_input_values( self.tool.inputs, self.state.inputs, callback )
def execute(self, trans, progress, invocation, step): tool = trans.app.toolbox.get_tool(step.tool_id, tool_version=step.tool_version) tool_state = step.state # Not strictly needed - but keep Tool state clean by stripping runtime # metadata parameters from it. if RUNTIME_STEP_META_STATE_KEY in tool_state.inputs: del tool_state.inputs[RUNTIME_STEP_META_STATE_KEY] collections_to_match = self._find_collections_to_match(tool, progress, step) # Have implicit collections... if collections_to_match.has_collections(): collection_info = self.trans.app.dataset_collections_service.match_collections(collections_to_match) else: collection_info = None param_combinations = [] if collection_info: iteration_elements_iter = collection_info.slice_collections() else: iteration_elements_iter = [None] for iteration_elements in iteration_elements_iter: execution_state = tool_state.copy() # TODO: Move next step into copy() execution_state.inputs = make_dict_copy(execution_state.inputs) # Connect up def callback(input, value, prefixed_name, prefixed_label): replacement = None if isinstance(input, DataToolParameter) or isinstance(input, DataCollectionToolParameter): if iteration_elements and prefixed_name in iteration_elements: if isinstance(input, DataToolParameter): # Pull out dataset instance from element. replacement = iteration_elements[prefixed_name].dataset_instance else: # If collection - just use element model object. replacement = iteration_elements[prefixed_name] else: replacement = progress.replacement_for_tool_input(step, input, prefixed_name) return replacement try: # Replace DummyDatasets with historydatasetassociations visit_input_values(tool.inputs, execution_state.inputs, callback) except KeyError, k: message_template = "Error due to input mapping of '%s' in '%s'. A common cause of this is conditional outputs that cannot be determined until runtime, please review your workflow." message = message_template % (tool.name, k.message) raise exceptions.MessageException(message) param_combinations.append(execution_state.inputs)
def _execute_tool_step( self, step ): trans = self.trans outputs = self.outputs tool = trans.app.toolbox.get_tool( step.tool_id ) # Connect up def callback( input, value, prefixed_name, prefixed_label ): replacement = None if isinstance( input, DataToolParameter ): replacement = self._replacement_for_input( input, prefixed_name, step ) return replacement try: # Replace DummyDatasets with historydatasetassociations visit_input_values( tool.inputs, step.state.inputs, callback ) except KeyError, k: raise exceptions.MessageException( "Error due to input mapping of '%s' in '%s'. A common cause of this is conditional outputs that cannot be determined until runtime, please review your workflow." % (tool.name, k.message))
def _find_collections_to_match( self, tool, progress, step ): collections_to_match = matching.CollectionsToMatch() def callback( input, value, prefixed_name, prefixed_label ): is_data_param = isinstance( input, DataToolParameter ) if is_data_param and not input.multiple: data = progress.replacement_for_tool_input( step, input, prefixed_name ) if isinstance( data, model.HistoryDatasetCollectionAssociation ): collections_to_match.add( prefixed_name, data ) is_data_collection_param = isinstance( input, DataCollectionToolParameter ) if is_data_collection_param and not input.multiple: data = progress.replacement_for_tool_input( step, input, prefixed_name ) history_query = input._history_query( self.trans ) if history_query.can_map_over( data ): collections_to_match.add( prefixed_name, data, subcollection_type=input.collection_type ) visit_input_values( tool.inputs, step.state.inputs, callback ) return collections_to_match
def set_compute_environment(self, compute_environment: ComputeEnvironment, get_special: Optional[Callable] = None): """ Setup the compute environment and established the outline of the param_dict for evaluating command and config cheetah templates. """ self.compute_environment = compute_environment job = self.job incoming = {p.name: p.value for p in job.parameters} incoming = self.tool.params_from_strings(incoming, self.app) # Full parameter validation request_context = WorkRequestContext(app=self.app, user=self._user, history=self._history) self.file_sources_dict = compute_environment.get_file_sources_dict() def validate_inputs(input, value, context, **kwargs): value = input.from_json(value, request_context, context) input.validate(value, request_context) visit_input_values(self.tool.inputs, incoming, validate_inputs) # Restore input / output data lists inp_data, out_data, out_collections = job.io_dicts() if get_special: special = get_special() if special: out_data["output_file"] = special # These can be passed on the command line if wanted as $__user_*__ incoming.update(model.User.user_template_environment(self._user)) # Build params, done before hook so hook can use self.param_dict = self.build_param_dict( incoming, inp_data, out_data, output_collections=out_collections, ) self.execute_tool_hooks(inp_data=inp_data, out_data=out_data, incoming=incoming)
def create(self, trans, payload, **kwd): """ POST /api/workflows We're not creating workflows from the api. Just execute for now. However, we will import them if installed_repository_file is specified """ # ------------------------------------------------------------------------------- # ### RPARK: dictionary containing which workflows to change and edit ### param_map = {} if (payload.has_key('parameters')): param_map = payload['parameters'] # ------------------------------------------------------------------------------- # if 'workflow_id' not in payload: # create new if 'installed_repository_file' in payload: workflow_controller = trans.webapp.controllers['workflow'] result = workflow_controller.import_workflow(trans=trans, cntrller='api', **payload) return result trans.response.status = 403 return "Either workflow_id or installed_repository_file must be specified" if 'installed_repository_file' in payload: trans.response.status = 403 return "installed_repository_file may not be specified with workflow_id" stored_workflow = trans.sa_session.query( self.app.model.StoredWorkflow).get( trans.security.decode_id(payload['workflow_id'])) if stored_workflow.user != trans.user and not trans.user_is_admin(): if trans.sa_session.query( trans.app.model.StoredWorkflowUserShareAssociation ).filter_by(user=trans.user, stored_workflow=stored_workflow).count() == 0: trans.response.status = 400 return ("Workflow is not owned by or shared with current user") workflow = stored_workflow.latest_workflow if payload['history'].startswith('hist_id='): #Passing an existing history to use. history = trans.sa_session.query(self.app.model.History).get( trans.security.decode_id(payload['history'][8:])) if history.user != trans.user and not trans.user_is_admin(): trans.response.status = 400 return "Invalid History specified." else: history = self.app.model.History(name=payload['history'], user=trans.user) trans.sa_session.add(history) trans.sa_session.flush() ds_map = payload['ds_map'] add_to_history = 'no_add_to_history' not in payload for k in ds_map: try: if ds_map[k]['src'] == 'ldda': ldda = trans.sa_session.query( self.app.model.LibraryDatasetDatasetAssociation).get( trans.security.decode_id(ds_map[k]['id'])) assert trans.user_is_admin( ) or trans.app.security_agent.can_access_dataset( trans.get_current_user_roles(), ldda.dataset) hda = ldda.to_history_dataset_association( history, add_to_history=add_to_history) elif ds_map[k]['src'] == 'ld': ldda = trans.sa_session.query( self.app.model.LibraryDataset).get( trans.security.decode_id( ds_map[k] ['id'])).library_dataset_dataset_association assert trans.user_is_admin( ) or trans.app.security_agent.can_access_dataset( trans.get_current_user_roles(), ldda.dataset) hda = ldda.to_history_dataset_association( history, add_to_history=add_to_history) elif ds_map[k]['src'] == 'hda': # Get dataset handle, add to dict and history if necessary hda = trans.sa_session.query( self.app.model.HistoryDatasetAssociation).get( trans.security.decode_id(ds_map[k]['id'])) assert trans.user_is_admin( ) or trans.app.security_agent.can_access_dataset( trans.get_current_user_roles(), hda.dataset) else: trans.response.status = 400 return "Unknown dataset source '%s' specified." % ds_map[ k]['src'] if add_to_history and hda.history != history: hda = hda.copy() history.add_dataset(hda) ds_map[k]['hda'] = hda except AssertionError: trans.response.status = 400 return "Invalid Dataset '%s' Specified" % ds_map[k]['id'] if not workflow: trans.response.status = 400 return "Workflow not found." if len(workflow.steps) == 0: trans.response.status = 400 return "Workflow cannot be run because it does not have any steps" if workflow.has_cycles: trans.response.status = 400 return "Workflow cannot be run because it contains cycles" if workflow.has_errors: trans.response.status = 400 return "Workflow cannot be run because of validation errors in some steps" # Build the state for each step rval = {} for step in workflow.steps: step_errors = None if step.type == 'tool' or step.type is None: step.module = module_factory.from_workflow_step(trans, step) # Check for missing parameters step.upgrade_messages = step.module.check_and_update_state() # Any connected input needs to have value DummyDataset (these # are not persisted so we need to do it every time) step.module.add_dummy_datasets( connections=step.input_connections) step.state = step.module.state #################################################### #################################################### # RPARK: IF TOOL_NAME IN PARAMETER MAP # if step.tool_id in param_map: change_param = param_map[step.tool_id]['param'] change_value = param_map[step.tool_id]['value'] step.state.inputs[change_param] = change_value #################################################### #################################################### if step.tool_errors: trans.response.status = 400 return "Workflow cannot be run because of validation errors in some steps: %s" % step_errors if step.upgrade_messages: trans.response.status = 400 return "Workflow cannot be run because of step upgrade messages: %s" % step.upgrade_messages else: # This is an input step. Make sure we have an available input. if step.type == 'data_input' and str(step.id) not in ds_map: trans.response.status = 400 return "Workflow cannot be run because an expected input step '%s' has no input dataset." % step.id step.module = module_factory.from_workflow_step(trans, step) step.state = step.module.get_runtime_state() step.input_connections_by_name = dict( (conn.input_name, conn) for conn in step.input_connections) # Run each step, connecting outputs to inputs workflow_invocation = self.app.model.WorkflowInvocation() workflow_invocation.workflow = workflow outputs = util.odict.odict() rval['history'] = trans.security.encode_id(history.id) rval['outputs'] = [] for i, step in enumerate(workflow.steps): job = None if step.type == 'tool' or step.type is None: tool = self.app.toolbox.get_tool(step.tool_id) def callback(input, value, prefixed_name, prefixed_label): if isinstance(input, DataToolParameter): if prefixed_name in step.input_connections_by_name: conn = step.input_connections_by_name[ prefixed_name] return outputs[conn.output_step.id][ conn.output_name] visit_input_values(tool.inputs, step.state.inputs, callback) job, out_data = tool.execute(trans, step.state.inputs, history=history) outputs[step.id] = out_data for pja in step.post_job_actions: if pja.action_type in ActionBox.immediate_actions: ActionBox.execute(self.app, trans.sa_session, pja, job, replacement_dict=None) else: job.add_post_job_action(pja) for v in out_data.itervalues(): rval['outputs'].append(trans.security.encode_id(v.id)) else: #This is an input step. Use the dataset inputs from ds_map. job, out_data = step.module.execute(trans, step.state) outputs[step.id] = out_data outputs[step.id]['output'] = ds_map[str(step.id)]['hda'] workflow_invocation_step = self.app.model.WorkflowInvocationStep() workflow_invocation_step.workflow_invocation = workflow_invocation workflow_invocation_step.workflow_step = step workflow_invocation_step.job = job trans.sa_session.add(workflow_invocation) trans.sa_session.flush() return rval
def _execute_workflow(self, sample): for key, value in sample.workflow['mappings'].items(): if 'hda' not in value and 'ldda' in value: # If HDA is already here, it's an external input, we're not copying anything. ldda = self.sa_session.query(self.app.model.LibraryDatasetDatasetAssociation).get(value['ldda']) if ldda.dataset.state in ['new', 'upload', 'queued', 'running', 'empty', 'discarded']: log.error("Cannot import dataset '%s' to user history since its state is '%s'. " % (ldda.name, ldda.dataset.state)) elif ldda.dataset.state in ['ok', 'error']: hda = ldda.to_history_dataset_association(target_history=sample.history, add_to_history=True) sample.workflow['mappings'][key]['hda'] = hda.id self.sa_session.add(sample) self.sa_session.flush() workflow_dict = sample.workflow import copy new_wf_dict = copy.deepcopy(workflow_dict) for key in workflow_dict['mappings']: if not isinstance(key, int): new_wf_dict['mappings'][int(key)] = workflow_dict['mappings'][key] workflow_dict = new_wf_dict fk_trans = FakeTrans(self.app, history=sample.history, user=sample.request.user) workflow = self.sa_session.query(self.app.model.Workflow).get(workflow_dict['id']) if not workflow: log.error("Workflow mapping failure.") return if len(workflow.steps) == 0: log.error("Workflow cannot be run because it does not have any steps") return if workflow.has_cycles: log.error("Workflow cannot be run because it contains cycles") return if workflow.has_errors: log.error("Workflow cannot be run because of validation errors in some steps") return # Build the state for each step errors = {} # Build a fake dictionary prior to execution. # Prepare each step for step in workflow.steps: step.upgrade_messages = {} # Contruct modules if step.type == 'tool' or step.type is None: # Restore the tool state for the step step.module = module_factory.from_workflow_step(fk_trans, step) # Fix any missing parameters step.upgrade_messages = step.module.check_and_update_state() # Any connected input needs to have value DummyDataset (these # are not persisted so we need to do it every time) step.module.add_dummy_datasets(connections=step.input_connections) # Store state with the step step.state = step.module.state # Error dict if step.tool_errors: errors[step.id] = step.tool_errors else: # Non-tool specific stuff? step.module = module_factory.from_workflow_step(fk_trans, step) step.state = step.module.get_runtime_state() # Connections by input name step.input_connections_by_name = dict((conn.input_name, conn) for conn in step.input_connections) for step in workflow.steps: step.upgrade_messages = {} # Connections by input name step.input_connections_by_name = \ dict((conn.input_name, conn) for conn in step.input_connections) # Extract just the arguments for this step by prefix step_errors = None if step.type == 'tool' or step.type is None: module = module_factory.from_workflow_step(fk_trans, step) # Fix any missing parameters step.upgrade_messages = module.check_and_update_state() # Any connected input needs to have value DummyDataset (these # are not persisted so we need to do it every time) module.add_dummy_datasets(connections=step.input_connections) # Get the tool tool = module.tool # Get the state step.state = state = module.state if step_errors: errors[step.id] = state.inputs["__errors__"] = step_errors # Run each step, connecting outputs to inputs workflow_invocation = self.app.model.WorkflowInvocation() workflow_invocation.workflow = workflow outputs = odict() for i, step in enumerate(workflow.steps): job = None if step.type == 'tool' or step.type is None: tool = self.app.toolbox.get_tool(step.tool_id) def callback(input, prefixed_name, **kwargs): if isinstance(input, DataToolParameter): if prefixed_name in step.input_connections_by_name: conn = step.input_connections_by_name[prefixed_name] return outputs[conn.output_step.id][conn.output_name] visit_input_values(tool.inputs, step.state.inputs, callback) job, out_data = tool.execute(fk_trans, step.state.inputs, history=sample.history) outputs[step.id] = out_data for pja in step.post_job_actions: if pja.action_type in ActionBox.immediate_actions: ActionBox.execute(self.app, self.sa_session, pja, job, replacement_dict=None) else: job.add_post_job_action(pja) else: job, out_data = step.module.execute(fk_trans, step.state) outputs[step.id] = out_data if step.id in workflow_dict['mappings']: data = self.sa_session.query(self.app.model.HistoryDatasetAssociation).get(workflow_dict['mappings'][str(step.id)]['hda']) outputs[step.id]['output'] = data workflow_invocation_step = self.app.model.WorkflowInvocationStep() workflow_invocation_step.workflow_invocation = workflow_invocation workflow_invocation_step.workflow_step = step workflow_invocation_step.job = job self.sa_session.add(workflow_invocation) self.sa_session.flush()
def _execute_workflow(self, sample): for key, value in sample.workflow['mappings'].items(): if 'hda' not in value and 'ldda' in value: # If HDA is already here, it's an external input, we're not copying anything. ldda = self.sa_session.query( self.app.model.LibraryDatasetDatasetAssociation).get( value['ldda']) if ldda.dataset.state in [ 'new', 'upload', 'queued', 'running', 'empty', 'discarded' ]: log.error( "Cannot import dataset '%s' to user history since its state is '%s'. " % (ldda.name, ldda.dataset.state)) elif ldda.dataset.state in ['ok', 'error']: hda = ldda.to_history_dataset_association( target_history=sample.history, add_to_history=True) sample.workflow['mappings'][key]['hda'] = hda.id self.sa_session.add(sample) self.sa_session.flush() workflow_dict = sample.workflow import copy new_wf_dict = copy.deepcopy(workflow_dict) for key in workflow_dict['mappings']: if not isinstance(key, int): new_wf_dict['mappings'][int( key)] = workflow_dict['mappings'][key] workflow_dict = new_wf_dict fk_trans = FakeTrans(self.app, history=sample.history, user=sample.request.user) workflow = self.sa_session.query(self.app.model.Workflow).get( workflow_dict['id']) if not workflow: log.error("Workflow mapping failure.") return if len(workflow.steps) == 0: log.error( "Workflow cannot be run because it does not have any steps") return if workflow.has_cycles: log.error("Workflow cannot be run because it contains cycles") return if workflow.has_errors: log.error( "Workflow cannot be run because of validation errors in some steps" ) return # Build the state for each step errors = {} # Build a fake dictionary prior to execution. # Prepare each step for step in workflow.steps: step.upgrade_messages = {} # Contruct modules if step.type == 'tool' or step.type is None: # Restore the tool state for the step step.module = module_factory.from_workflow_step(fk_trans, step) # Fix any missing parameters step.upgrade_messages = step.module.check_and_update_state() # Any connected input needs to have value DummyDataset (these # are not persisted so we need to do it every time) step.module.add_dummy_datasets( connections=step.input_connections) # Store state with the step step.state = step.module.state # Error dict if step.tool_errors: errors[step.id] = step.tool_errors else: # Non-tool specific stuff? step.module = module_factory.from_workflow_step(fk_trans, step) step.state = step.module.get_runtime_state() # Connections by input name step.input_connections_by_name = dict( (conn.input_name, conn) for conn in step.input_connections) for step in workflow.steps: step.upgrade_messages = {} # Connections by input name step.input_connections_by_name = \ dict((conn.input_name, conn) for conn in step.input_connections) # Extract just the arguments for this step by prefix step_errors = None if step.type == 'tool' or step.type is None: module = module_factory.from_workflow_step(fk_trans, step) # Fix any missing parameters step.upgrade_messages = module.check_and_update_state() # Any connected input needs to have value DummyDataset (these # are not persisted so we need to do it every time) module.add_dummy_datasets(connections=step.input_connections) # Get the tool tool = module.tool # Get the state step.state = state = module.state if step_errors: errors[step.id] = state.inputs["__errors__"] = step_errors # Run each step, connecting outputs to inputs workflow_invocation = self.app.model.WorkflowInvocation() workflow_invocation.workflow = workflow outputs = odict() for i, step in enumerate(workflow.steps): job = None if step.type == 'tool' or step.type is None: tool = self.app.toolbox.get_tool(step.tool_id) def callback(input, prefixed_name, **kwargs): if isinstance(input, DataToolParameter): if prefixed_name in step.input_connections_by_name: conn = step.input_connections_by_name[ prefixed_name] return outputs[conn.output_step.id][ conn.output_name] visit_input_values(tool.inputs, step.state.inputs, callback) job, out_data = tool.execute(fk_trans, step.state.inputs, history=sample.history) outputs[step.id] = out_data for pja in step.post_job_actions: if pja.action_type in ActionBox.immediate_actions: ActionBox.execute(self.app, self.sa_session, pja, job, replacement_dict=None) else: job.add_post_job_action(pja) else: job, out_data = step.module.execute(fk_trans, step.state) outputs[step.id] = out_data if step.id in workflow_dict['mappings']: data = self.sa_session.query( self.app.model.HistoryDatasetAssociation).get( workflow_dict['mappings'][str(step.id)]['hda']) outputs[step.id]['output'] = data workflow_invocation_step = self.app.model.WorkflowInvocationStep() workflow_invocation_step.workflow_invocation = workflow_invocation workflow_invocation_step.workflow_step = step workflow_invocation_step.job = job self.sa_session.add(workflow_invocation) self.sa_session.flush()
def set_compute_environment( self, compute_environment, get_special=None ): """ Setup the compute environment and established the outline of the param_dict for evaluating command and config cheetah templates. """ self.compute_environment = compute_environment self.unstructured_path_rewriter = compute_environment.unstructured_path_rewriter() job = self.job incoming = dict( [ ( p.name, p.value ) for p in job.parameters ] ) incoming = self.tool.params_from_strings( incoming, self.app ) #### #self.sa_session = self.app.model.context #query = self.sa_session.query(galaxy.model.JobParameter).get(2021) #print "In eval.py: " + str(query.name) #### # Full parameter validation request_context = WorkRequestContext( app=self.app, user=job.history and job.history.user, history=job.history ) def validate_inputs( input, value, context, **kwargs ): value = input.from_json( value, request_context, context ) input.validate( value, request_context ) visit_input_values( self.tool.inputs, incoming, validate_inputs ) # Restore input / output data lists inp_data = dict( [ ( da.name, da.dataset ) for da in job.input_datasets ] ) out_data = dict( [ ( da.name, da.dataset ) for da in job.output_datasets ] ) inp_data.update( [ ( da.name, da.dataset ) for da in job.input_library_datasets ] ) out_data.update( [ ( da.name, da.dataset ) for da in job.output_library_datasets ] ) out_collections = dict( [ ( obj.name, obj.dataset_collection_instance ) for obj in job.output_dataset_collection_instances ] ) out_collections.update( [ ( obj.name, obj.dataset_collection ) for obj in job.output_dataset_collections ] ) if get_special: # Set up output dataset association for export history jobs. Because job # uses a Dataset rather than an HDA or LDA, it's necessary to set up a # fake dataset association that provides the needed attributes for # preparing a job. class FakeDatasetAssociation ( object ): def __init__( self, dataset=None ): self.dataset = dataset self.file_name = dataset.file_name self.metadata = dict() self.children = [] special = get_special() if special: out_data[ "output_file" ] = FakeDatasetAssociation( dataset=special.dataset ) # These can be passed on the command line if wanted as $__user_*__ incoming.update( model.User.user_template_environment( job.history and job.history.user ) ) # Build params, done before hook so hook can use #print "Incoming at eval.py" #print incoming #print " All doooner" param_dict = self.build_param_dict( incoming, inp_data, out_data, output_collections=out_collections, output_paths=compute_environment.output_paths(), job_working_directory=compute_environment.working_directory(), input_paths=compute_environment.input_paths() ) # Certain tools require tasks to be completed prior to job execution # ( this used to be performed in the "exec_before_job" hook, but hooks are deprecated ). self.tool.exec_before_job( self.app, inp_data, out_data, param_dict ) # Run the before queue ("exec_before_job") hook self.tool.call_hook( 'exec_before_job', self.app, inp_data=inp_data, out_data=out_data, tool=self.tool, param_dict=incoming) self.param_dict = param_dict
class ToolRunner(BaseUIController): # Hack to get biomart to work, ideally, we could pass tool_id to biomart and receive it back @web.expose def biomart(self, trans, tool_id='biomart', **kwd): """Catches the tool id and redirects as needed""" return self.index(trans, tool_id=tool_id, **kwd) # test to get hapmap to work, ideally, we could pass tool_id to hapmap biomart and receive it back @web.expose def hapmapmart(self, trans, tool_id='hapmapmart', **kwd): """Catches the tool id and redirects as needed""" return self.index(trans, tool_id=tool_id, **kwd) @web.expose def default(self, trans, tool_id=None, **kwd): """Catches the tool id and redirects as needed""" return self.index(trans, tool_id=tool_id, **kwd) def __get_tool_components(self, tool_id, tool_version=None, get_loaded_tools_by_lineage=False, set_selected=False): return self.get_toolbox().get_tool_components( tool_id, tool_version, get_loaded_tools_by_lineage, set_selected) @web.expose def index(self, trans, tool_id=None, from_noframe=None, **kwd): # No tool id passed, redirect to main page if tool_id is None: return trans.response.send_redirect( url_for(controller="root", action="welcome")) # When the tool form is initially loaded, the received kwd will not include a 'refresh' # entry (which only is included when another option is selected in the tool_version_select_field), # so the default selected option should be the most recent version of the tool. The following # check will mae sure this occurs. refreshed_on_change = kwd.get('refresh', False) tool_version_select_field, tools, tool = self.__get_tool_components( tool_id, tool_version=None, get_loaded_tools_by_lineage=False, set_selected=refreshed_on_change) # No tool matching the tool id, display an error (shouldn't happen) if not tool or not tool.allow_user_access(trans.user): log.error("index called with tool id '%s' but no such tool exists", tool_id) trans.log_event("Tool id '%s' does not exist" % tool_id) trans.response.status = 404 return trans.show_error_message("Tool '%s' does not exist." % (escape(tool_id))) if tool.require_login and not trans.user: message = "You must be logged in to use this tool." status = "info" redirect = url_for(controller='tool_runner', action='index', tool_id=tool_id, **kwd) return trans.response.send_redirect( url_for(controller='user', action='login', cntrller='user', message=message, status=status, redirect=redirect)) def _validated_params_for(kwd): params = galaxy.util.Params( kwd, sanitize=False ) # Sanitize parameters when substituting into command line via input wrappers # do param translation here, used by datasource tools if tool.input_translator: tool.input_translator.translate(params) return params params = _validated_params_for(kwd) # We may be visiting Galaxy for the first time ( e.g., sending data from UCSC ), # so make sure to create a new history if we've never had one before. history = tool.get_default_history_by_trans(trans, create=True) try: template, vars = tool.handle_input(trans, params.__dict__) except KeyError: # This error indicates (or at least can indicate) there was a # problem with the stored tool_state - it is incompatible with # this variant of the tool - possibly because the tool changed # or because the tool version changed. del kwd["tool_state"] params = _validated_params_for(kwd) template, vars = tool.handle_input(trans, params.__dict__) if len(params) > 0: trans.log_event("Tool params: %s" % (str(params)), tool_id=tool_id) add_frame = AddFrameData() add_frame.debug = trans.debug if from_noframe is not None: add_frame.wiki_url = trans.app.config.wiki_url add_frame.from_noframe = True return trans.fill_template( template, history=history, toolbox=self.get_toolbox(), tool_version_select_field=tool_version_select_field, tool=tool, util=galaxy.util, add_frame=add_frame, form_input_auto_focus=True, **vars) @web.expose def rerun(self, trans, id=None, from_noframe=None, job_id=None, **kwd): """ Given a HistoryDatasetAssociation id, find the job and that created the dataset, extract the parameters, and display the appropriate tool form with parameters already filled in. """ if job_id: try: job_id = trans.security.decode_id(job_id) job = trans.sa_session.query(trans.app.model.Job).get(job_id) except: error("Invalid value for 'job_id' parameter") if not trans.user_is_admin(): for data_assoc in job.output_datasets: # only allow rerunning if user is allowed access to the dataset. if not trans.app.security_agent.can_access_dataset( trans.get_current_user_roles(), data_assoc.dataset.dataset): error("You are not allowed to rerun this job") param_error_text = "Failed to get parameters for job id %d " % job_id else: if not id: error("'id' parameter is required") try: id = int(id) except: # it's not an un-encoded id, try to parse as encoded try: id = trans.security.decode_id(id) except: error("Invalid value for 'id' parameter") # Get the dataset object data = trans.sa_session.query( trans.app.model.HistoryDatasetAssociation).get(id) # only allow rerunning if user is allowed access to the dataset. if not (trans.user_is_admin() or trans.app.security_agent.can_access_dataset( trans.get_current_user_roles(), data.dataset)): error("You are not allowed to access this dataset") # Get the associated job, if any. job = data.creating_job if not job: raise Exception( "Failed to get job information for dataset hid %d" % data.hid) param_error_text = "Failed to get parameters for dataset id %d " % data.id # Get the tool object tool_id = job.tool_id tool_version = job.tool_version try: tool_version_select_field, tools, tool = self.__get_tool_components( tool_id, tool_version=tool_version, get_loaded_tools_by_lineage=False, set_selected=True) if (tool.id == job.tool_id or tool.old_id == job.tool_id) and tool.version == job.tool_version: tool_id_version_message = '' elif tool.id == job.tool_id: if job.tool_version is None: # For some reason jobs don't always keep track of the tool version. tool_id_version_message = '' else: tool_id_version_message = 'This job was initially run with tool version "%s", which is not currently available. ' % job.tool_version if len(tools) > 1: tool_id_version_message += 'You can rerun the job with the selected tool or choose another derivation of the tool.' else: tool_id_version_message += 'You can rerun the job with this tool version, which is a derivation of the original tool.' else: if len(tools) > 1: tool_id_version_message = 'This job was initially run with tool version "%s", which is not currently available. ' % job.tool_version tool_id_version_message += 'You can rerun the job with the selected tool or choose another derivation of the tool.' else: tool_id_version_message = 'This job was initially run with tool id "%s", version "%s", which is not ' % ( job.tool_id, job.tool_version) tool_id_version_message += 'currently available. You can rerun the job with this tool, which is a derivation of the original tool.' assert tool is not None, 'Requested tool has not been loaded.' except: # This is expected so not an exception. tool_id_version_message = '' error( "This dataset was created by an obsolete tool (%s). Can't re-run." % tool_id) if not tool.allow_user_access(trans.user): error("The requested tool is unknown.") # Can't rerun upload, external data sources, et cetera. Workflow compatible will proxy this for now if not tool.is_workflow_compatible: error("The '%s' tool does not currently support rerunning." % tool.name) # Get the job's parameters try: params_objects = job.get_param_values(trans.app, ignore_errors=True) except: raise Exception(param_error_text) upgrade_messages = tool.check_and_update_param_values( params_objects, trans, update_values=False) # Need to remap dataset parameters. Job parameters point to original # dataset used; parameter should be the analygous dataset in the # current history. history = trans.get_history() hda_source_dict = {} # Mapping from HDA in history to source HDAs. for hda in history.datasets: source_hda = hda.copied_from_history_dataset_association while source_hda: # should this check library datasets as well? # FIXME: could be multiple copies of a hda in a single history, this does a better job of matching on cloned histories, # but is still less than perfect when eg individual datasets are copied between histories if source_hda not in hda_source_dict or source_hda.hid == hda.hid: hda_source_dict[source_hda] = hda source_hda = source_hda.copied_from_history_dataset_association # Ditto for dataset collections. hdca_source_dict = {} for hdca in history.dataset_collections: source_hdca = hdca.copied_from_history_dataset_collection_association while source_hdca: if source_hdca not in hdca_source_dict or source_hdca.hid == hdca.hid: hdca_source_dict[source_hdca] = hdca source_hdca = source_hdca.copied_from_history_dataset_collection_association # Unpack unvalidated values to strings, they'll be validated when the # form is submitted (this happens when re-running a job that was # initially run by a workflow) # This needs to be done recursively through grouping parameters def rerun_callback(input, value, prefixed_name, prefixed_label): if isinstance(value, UnvalidatedValue): try: return input.to_html_value(value.value, trans.app) except Exception, e: # Need to determine when (if ever) the to_html_value call could fail. log.debug( "Failed to use input.to_html_value to determine value of unvalidated parameter, defaulting to string: %s" % (e)) return str(value) if isinstance(input, DataToolParameter): if isinstance(value, list): values = [] for val in value: if is_hashable(val): if val in history.datasets: values.append(val) elif val in hda_source_dict: values.append(hda_source_dict[val]) return values if is_hashable( value ) and value not in history.datasets and value in hda_source_dict: return hda_source_dict[value] elif isinstance(input, DataCollectionToolParameter): if is_hashable( value ) and value not in history.dataset_collections and value in hdca_source_dict: return hdca_source_dict[value] visit_input_values(tool.inputs, params_objects, rerun_callback) # Create a fake tool_state for the tool, with the parameters values state = tool.new_state(trans) state.inputs = params_objects # If the job failed and has dependencies, allow dependency remap if job.state == job.states.ERROR: try: if [ hda.dependent_jobs for hda in [jtod.dataset for jtod in job.output_datasets] if hda.dependent_jobs ]: state.rerun_remap_job_id = trans.app.security.encode_id( job.id) except: # Job has no outputs? pass # create an incoming object from the original job's dataset-modified param objects incoming = {} params_to_incoming(incoming, tool.inputs, params_objects, trans.app) incoming["tool_state"] = galaxy.util.object_to_string( state.encode(tool, trans.app)) template, vars = tool.handle_input( trans, incoming, old_errors=upgrade_messages ) # update new state with old parameters # Is the "add frame" stuff neccesary here? add_frame = AddFrameData() add_frame.debug = trans.debug if from_noframe is not None: add_frame.wiki_url = trans.app.config.wiki_url add_frame.from_noframe = True return trans.fill_template( template, history=history, toolbox=self.get_toolbox(), tool_version_select_field=tool_version_select_field, tool=tool, job=job, util=galaxy.util, add_frame=add_frame, tool_id_version_message=tool_id_version_message, **vars)
def _workflow_to_dict_editor(self, trans, stored): workflow = stored.latest_workflow # Pack workflow data into a dictionary and return data = {} data['name'] = workflow.name data['steps'] = {} data['upgrade_messages'] = {} # For each step, rebuild the form and encode the state for step in workflow.steps: # Load from database representation module = module_factory.from_workflow_step(trans, step, exact_tools=False) if not module: raise exceptions.MessageException( 'Unrecognized step type: %s' % step.type) # Load label from state of data input modules, necessary for backward compatibility self.__set_default_label(step, module, step.tool_inputs) # Fix any missing parameters upgrade_message = module.check_and_update_state() if upgrade_message: data['upgrade_messages'][step.order_index] = upgrade_message if (hasattr(module, "version_changes")) and (module.version_changes): if step.order_index in data['upgrade_messages']: data['upgrade_messages'][step.order_index][ module.tool.name] = "\n".join(module.version_changes) else: data['upgrade_messages'][step.order_index] = { module.tool.name: "\n".join(module.version_changes) } # Get user annotation. annotation_str = self.get_item_annotation_str( trans.sa_session, trans.user, step) or '' config_form = module.get_config_form() # Pack attributes into plain dictionary step_dict = { 'id': step.order_index, 'type': module.type, 'label': module.label, 'content_id': module.get_content_id(), 'name': module.get_name(), 'tool_state': module.get_state(), 'tooltip': module.get_tooltip(static_path=url_for('/static')), 'errors': module.get_errors(), 'data_inputs': module.get_data_inputs(), 'data_outputs': module.get_data_outputs(), 'config_form': config_form, 'annotation': annotation_str, 'post_job_actions': {}, 'uuid': str(step.uuid) if step.uuid else None, 'workflow_outputs': [] } # Connections input_connections = step.input_connections input_connections_type = {} multiple_input = { } # Boolean value indicating if this can be mutliple if (step.type is None or step.type == 'tool') and module.tool: # Determine full (prefixed) names of valid input datasets data_input_names = {} def callback(input, prefixed_name, **kwargs): if isinstance(input, DataToolParameter) or isinstance( input, DataCollectionToolParameter): data_input_names[prefixed_name] = True multiple_input[prefixed_name] = input.multiple if isinstance(input, DataToolParameter): input_connections_type[input.name] = "dataset" if isinstance(input, DataCollectionToolParameter): input_connections_type[ input.name] = "dataset_collection" visit_input_values(module.tool.inputs, module.state.inputs, callback) # Filter # FIXME: this removes connection without displaying a message currently! input_connections = [ conn for conn in input_connections if conn.input_name in data_input_names ] # post_job_actions pja_dict = {} for pja in step.post_job_actions: pja_dict[pja.action_type + pja.output_name] = dict( action_type=pja.action_type, output_name=pja.output_name, action_arguments=pja.action_arguments) step_dict['post_job_actions'] = pja_dict # workflow outputs outputs = [] for output in step.unique_workflow_outputs: output_label = output.label output_name = output.output_name output_uuid = str(output.uuid) if output.uuid else None outputs.append({ "output_name": output_name, "uuid": output_uuid, "label": output_label }) step_dict['workflow_outputs'] = outputs # Encode input connections as dictionary input_conn_dict = {} for conn in input_connections: input_type = "dataset" if conn.input_name in input_connections_type: input_type = input_connections_type[conn.input_name] conn_dict = dict(id=conn.output_step.order_index, output_name=conn.output_name, input_type=input_type) if conn.input_name in multiple_input: if conn.input_name in input_conn_dict: input_conn_dict[conn.input_name].append(conn_dict) else: input_conn_dict[conn.input_name] = [conn_dict] else: input_conn_dict[conn.input_name] = conn_dict step_dict['input_connections'] = input_conn_dict # Position step_dict['position'] = step.position # Add to return value data['steps'][step.order_index] = step_dict return data
def _workflow_to_dict_editor(self, trans, stored): """ """ workflow = stored.latest_workflow # Pack workflow data into a dictionary and return data = {} data['name'] = workflow.name data['steps'] = {} data['upgrade_messages'] = {} # For each step, rebuild the form and encode the state for step in workflow.steps: # Load from database representation module = module_factory.from_workflow_step(trans, step) if not module: step_annotation = self.get_item_annotation_obj( trans.sa_session, trans.user, step) annotation_str = "" if step_annotation: annotation_str = step_annotation.annotation invalid_tool_form_html = """<div class="toolForm tool-node-error"> <div class="toolFormTitle form-row-error">Unrecognized Tool: %s</div> <div class="toolFormBody"><div class="form-row"> The tool id '%s' for this tool is unrecognized.<br/><br/> To save this workflow, you will need to delete this step or enable the tool. </div></div></div>""" % ( step.tool_id, step.tool_id) step_dict = { 'id': step.order_index, 'type': 'invalid', 'content_id': step.content_id, 'name': 'Unrecognized Tool: %s' % step.tool_id, 'tool_state': None, 'tooltip': None, 'tool_errors': ["Unrecognized Tool Id: %s" % step.tool_id], 'data_inputs': [], 'data_outputs': [], 'form_html': invalid_tool_form_html, 'annotation': annotation_str, 'input_connections': {}, 'post_job_actions': {}, 'uuid': str(step.uuid), 'label': step.label or None, 'workflow_outputs': [] } # Position step_dict['position'] = step.position # Add to return value data['steps'][step.order_index] = step_dict continue # Fix any missing parameters upgrade_message = module.check_and_update_state() if upgrade_message: data['upgrade_messages'][step.order_index] = upgrade_message if (hasattr(module, "version_changes")) and (module.version_changes): if step.order_index in data['upgrade_messages']: data['upgrade_messages'][step.order_index][ module.tool.name] = "\n".join(module.version_changes) else: data['upgrade_messages'][step.order_index] = { module.tool.name: "\n".join(module.version_changes) } # Get user annotation. step_annotation = self.get_item_annotation_obj( trans.sa_session, trans.user, step) annotation_str = "" if step_annotation: annotation_str = step_annotation.annotation form_html = None if trans.history: # If in a web session, attach form html. No reason to do # so for API requests. form_html = module.get_config_form() # Pack attributes into plain dictionary step_dict = { 'id': step.order_index, 'type': module.type, 'content_id': module.get_content_id(), 'name': module.get_name(), 'tool_state': module.get_state(), 'tooltip': module.get_tooltip(static_path=url_for('/static')), 'tool_errors': module.get_errors(), 'data_inputs': module.get_data_inputs(), 'data_outputs': module.get_data_outputs(), 'form_html': form_html, 'annotation': annotation_str, 'post_job_actions': {}, 'uuid': str(step.uuid) if step.uuid else None, 'label': step.label or None, 'workflow_outputs': [] } # Connections input_connections = step.input_connections input_connections_type = {} multiple_input = { } # Boolean value indicating if this can be mutliple if step.type is None or step.type == 'tool': # Determine full (prefixed) names of valid input datasets data_input_names = {} def callback(input, prefixed_name, **kwargs): if isinstance(input, DataToolParameter) or isinstance( input, DataCollectionToolParameter): data_input_names[prefixed_name] = True multiple_input[prefixed_name] = input.multiple if isinstance(input, DataToolParameter): input_connections_type[input.name] = "dataset" if isinstance(input, DataCollectionToolParameter): input_connections_type[ input.name] = "dataset_collection" visit_input_values(module.tool.inputs, module.state.inputs, callback) # Filter # FIXME: this removes connection without displaying a message currently! input_connections = [ conn for conn in input_connections if conn.input_name in data_input_names ] # post_job_actions pja_dict = {} for pja in step.post_job_actions: pja_dict[pja.action_type + pja.output_name] = dict( action_type=pja.action_type, output_name=pja.output_name, action_arguments=pja.action_arguments) step_dict['post_job_actions'] = pja_dict # workflow outputs outputs = [] for output in step.unique_workflow_outputs: output_label = output.label output_name = output.output_name output_uuid = str(output.uuid) if output.uuid else None outputs.append({ "output_name": output_name, "uuid": output_uuid, "label": output_label }) step_dict['workflow_outputs'] = outputs # Encode input connections as dictionary input_conn_dict = {} for conn in input_connections: input_type = "dataset" if conn.input_name in input_connections_type: input_type = input_connections_type[conn.input_name] conn_dict = dict(id=conn.output_step.order_index, output_name=conn.output_name, input_type=input_type) if conn.input_name in multiple_input: if conn.input_name in input_conn_dict: input_conn_dict[conn.input_name].append(conn_dict) else: input_conn_dict[conn.input_name] = [conn_dict] else: input_conn_dict[conn.input_name] = conn_dict step_dict['input_connections'] = input_conn_dict # Position step_dict['position'] = step.position # Add to return value data['steps'][step.order_index] = step_dict return data
def _workflow_to_dict(self, trans, stored): """ RPARK: copied from galaxy.web.controllers.workflows.py Converts a workflow to a dict of attributes suitable for exporting. """ workflow = stored.latest_workflow ### ----------------------------------- ### ## RPARK EDIT ## workflow_annotation = self.get_item_annotation_obj( trans.sa_session, trans.user, stored) annotation_str = "" if workflow_annotation: annotation_str = workflow_annotation.annotation ### ----------------------------------- ### # Pack workflow data into a dictionary and return data = {} data[ 'a_galaxy_workflow'] = 'true' # Placeholder for identifying galaxy workflow data['format-version'] = "0.1" data['name'] = workflow.name ### ----------------------------------- ### ## RPARK EDIT ## data['annotation'] = annotation_str ### ----------------------------------- ### data['steps'] = {} # For each step, rebuild the form and encode the state for step in workflow.steps: # Load from database representation module = module_factory.from_workflow_step(trans, step) ### ----------------------------------- ### ## RPARK EDIT ## # Get user annotation. step_annotation = self.get_item_annotation_obj( trans.sa_session, trans.user, step) annotation_str = "" if step_annotation: annotation_str = step_annotation.annotation ### ----------------------------------- ### # Step info step_dict = { 'id': step.order_index, 'type': module.type, 'tool_id': module.get_tool_id(), 'tool_version': step.tool_version, 'name': module.get_name(), 'tool_state': module.get_state(secure=False), 'tool_errors': module.get_errors(), ## 'data_inputs': module.get_data_inputs(), ## 'data_outputs': module.get_data_outputs(), ### ----------------------------------- ### ## RPARK EDIT ## 'annotation': annotation_str ### ----------------------------------- ### } # Add post-job actions to step dict. if module.type == 'tool': pja_dict = {} for pja in step.post_job_actions: pja_dict[pja.action_type + pja.output_name] = dict( action_type=pja.action_type, output_name=pja.output_name, action_arguments=pja.action_arguments) step_dict['post_job_actions'] = pja_dict # Data inputs step_dict['inputs'] = [] if module.type == "data_input": # Get input dataset name; default to 'Input Dataset' name = module.state.get('name', 'Input Dataset') step_dict['inputs'].append({ "name": name, "description": annotation_str }) else: # Step is a tool and may have runtime inputs. for name, val in module.state.inputs.items(): input_type = type(val) if input_type == RuntimeValue: step_dict['inputs'].append({ "name": name, "description": "runtime parameter for tool %s" % module.get_name() }) elif input_type == dict: # Input type is described by a dict, e.g. indexed parameters. for partname, partval in val.items(): if type(partval) == RuntimeValue: step_dict['inputs'].append({ "name": name, "description": "runtime parameter for tool %s" % module.get_name() }) # User outputs step_dict['user_outputs'] = [] """ module_outputs = module.get_data_outputs() step_outputs = trans.sa_session.query( WorkflowOutput ).filter( step=step ) for output in step_outputs: name = output.output_name annotation = "" for module_output in module_outputs: if module_output.get( 'name', None ) == name: output_type = module_output.get( 'extension', '' ) break data['outputs'][name] = { 'name' : name, 'annotation' : annotation, 'type' : output_type } """ # All step outputs step_dict['outputs'] = [] if type(module) is ToolModule: for output in module.get_data_outputs(): step_dict['outputs'].append({ 'name': output['name'], 'type': output['extensions'][0] }) # Connections input_connections = step.input_connections if step.type is None or step.type == 'tool': # Determine full (prefixed) names of valid input datasets data_input_names = {} def callback(input, value, prefixed_name, prefixed_label): if isinstance(input, DataToolParameter): data_input_names[prefixed_name] = True visit_input_values(module.tool.inputs, module.state.inputs, callback) # Filter # FIXME: this removes connection without displaying a message currently! input_connections = [ conn for conn in input_connections if conn.input_name in data_input_names ] # Encode input connections as dictionary input_conn_dict = {} for conn in input_connections: input_conn_dict[ conn.input_name ] = \ dict( id=conn.output_step.order_index, output_name=conn.output_name ) step_dict['input_connections'] = input_conn_dict # Position step_dict['position'] = step.position # Add to return value data['steps'][step.order_index] = step_dict return data
def _workflow_to_dict_editor(self, trans, stored): workflow = stored.latest_workflow # Pack workflow data into a dictionary and return data = {} data['name'] = workflow.name data['steps'] = {} data['upgrade_messages'] = {} # For each step, rebuild the form and encode the state for step in workflow.steps: # Load from database representation module = module_factory.from_workflow_step(trans, step, exact_tools=False) if not module: raise exceptions.MessageException('Unrecognized step type: %s' % step.type) # Load label from state of data input modules, necessary for backward compatibility self.__set_default_label(step, module, step.tool_inputs) # Fix any missing parameters upgrade_message = module.check_and_update_state() if upgrade_message: data['upgrade_messages'][step.order_index] = upgrade_message if (hasattr(module, "version_changes")) and (module.version_changes): if step.order_index in data['upgrade_messages']: data['upgrade_messages'][step.order_index][module.tool.name] = "\n".join(module.version_changes) else: data['upgrade_messages'][step.order_index] = {module.tool.name: "\n".join(module.version_changes)} # Get user annotation. annotation_str = self.get_item_annotation_str(trans.sa_session, trans.user, step) or '' config_form = module.get_config_form() # Pack attributes into plain dictionary step_dict = { 'id': step.order_index, 'type': module.type, 'label': module.label, 'content_id': module.get_content_id(), 'name': module.get_name(), 'tool_state': module.get_state(), 'tooltip': module.get_tooltip(static_path=url_for('/static')), 'errors': module.get_errors(), 'data_inputs': module.get_data_inputs(), 'data_outputs': module.get_data_outputs(), 'config_form': config_form, 'annotation': annotation_str, 'post_job_actions': {}, 'uuid': str(step.uuid) if step.uuid else None, 'workflow_outputs': [] } # Connections input_connections = step.input_connections input_connections_type = {} multiple_input = {} # Boolean value indicating if this can be mutliple if (step.type is None or step.type == 'tool') and module.tool: # Determine full (prefixed) names of valid input datasets data_input_names = {} def callback(input, prefixed_name, **kwargs): if isinstance(input, DataToolParameter) or isinstance(input, DataCollectionToolParameter): data_input_names[prefixed_name] = True multiple_input[prefixed_name] = input.multiple if isinstance(input, DataToolParameter): input_connections_type[input.name] = "dataset" if isinstance(input, DataCollectionToolParameter): input_connections_type[input.name] = "dataset_collection" visit_input_values(module.tool.inputs, module.state.inputs, callback) # Filter # FIXME: this removes connection without displaying a message currently! input_connections = [conn for conn in input_connections if conn.input_name in data_input_names] # post_job_actions pja_dict = {} for pja in step.post_job_actions: pja_dict[pja.action_type + pja.output_name] = dict( action_type=pja.action_type, output_name=pja.output_name, action_arguments=pja.action_arguments ) step_dict['post_job_actions'] = pja_dict # workflow outputs outputs = [] for output in step.unique_workflow_outputs: output_label = output.label output_name = output.output_name output_uuid = str(output.uuid) if output.uuid else None outputs.append({"output_name": output_name, "uuid": output_uuid, "label": output_label}) step_dict['workflow_outputs'] = outputs # Encode input connections as dictionary input_conn_dict = {} for conn in input_connections: input_type = "dataset" if conn.input_name in input_connections_type: input_type = input_connections_type[conn.input_name] conn_dict = dict(id=conn.output_step.order_index, output_name=conn.output_name, input_type=input_type) if conn.input_name in multiple_input: if conn.input_name in input_conn_dict: input_conn_dict[conn.input_name].append(conn_dict) else: input_conn_dict[conn.input_name] = [conn_dict] else: input_conn_dict[conn.input_name] = conn_dict step_dict['input_connections'] = input_conn_dict # Position step_dict['position'] = step.position # Add to return value data['steps'][step.order_index] = step_dict return data
def _workflow_to_dict_export(self, trans, stored=None, workflow=None): """ Export the workflow contents to a dictionary ready for JSON-ification and export. """ if workflow is None: assert stored is not None workflow = stored.latest_workflow annotation_str = "" tag_str = "" if stored is not None: annotation_str = self.get_item_annotation_str(trans.sa_session, trans.user, stored) or '' tag_str = stored.make_tag_string_list() # Pack workflow data into a dictionary and return data = {} data['a_galaxy_workflow'] = 'true' # Placeholder for identifying galaxy workflow data['format-version'] = "0.1" data['name'] = workflow.name data['annotation'] = annotation_str data['tags'] = tag_str if workflow.uuid is not None: data['uuid'] = str(workflow.uuid) data['steps'] = {} # For each step, rebuild the form and encode the state for step in workflow.steps: # Load from database representation module = module_factory.from_workflow_step(trans, step) if not module: raise exceptions.MessageException('Unrecognized step type: %s' % step.type) # Get user annotation. annotation_str = self.get_item_annotation_str(trans.sa_session, trans.user, step) or '' content_id = module.get_content_id() # Export differences for backward compatibility if module.type == 'tool': tool_state = module.get_state(nested=False) else: tool_state = module.state.inputs # Step info step_dict = { 'id': step.order_index, 'type': module.type, 'content_id': content_id, 'tool_id': content_id, # For worklfows exported to older Galaxies, # eliminate after a few years... 'tool_version': step.tool_version, 'name': module.get_name(), 'tool_state': json.dumps(tool_state), 'errors': module.get_errors(), 'uuid': str(step.uuid), 'label': step.label or None, 'annotation': annotation_str } # Add tool shed repository information and post-job actions to step dict. if module.type == 'tool': if module.tool and module.tool.tool_shed: step_dict["tool_shed_repository"] = { 'name': module.tool.repository_name, 'owner': module.tool.repository_owner, 'changeset_revision': module.tool.changeset_revision, 'tool_shed': module.tool.tool_shed } pja_dict = {} for pja in step.post_job_actions: pja_dict[pja.action_type + pja.output_name] = dict( action_type=pja.action_type, output_name=pja.output_name, action_arguments=pja.action_arguments) step_dict['post_job_actions'] = pja_dict if module.type == 'subworkflow': del step_dict['content_id'] del step_dict['errors'] del step_dict['tool_version'] del step_dict['tool_state'] subworkflow = step.subworkflow subworkflow_as_dict = self._workflow_to_dict_export( trans, stored=None, workflow=subworkflow ) step_dict['subworkflow'] = subworkflow_as_dict # Data inputs, legacy section not used anywhere within core input_dicts = [] step_state = module.state.inputs or {} if "name" in step_state and module.type != 'tool': name = step_state.get("name") input_dicts.append({"name": name, "description": annotation_str}) for name, val in step_state.items(): input_type = type(val) if input_type == RuntimeValue: input_dicts.append({"name": name, "description": "runtime parameter for tool %s" % module.get_name()}) elif input_type == dict: # Input type is described by a dict, e.g. indexed parameters. for partval in val.values(): if type(partval) == RuntimeValue: input_dicts.append({"name": name, "description": "runtime parameter for tool %s" % module.get_name()}) step_dict['inputs'] = input_dicts # User outputs workflow_outputs_dicts = [] for workflow_output in step.unique_workflow_outputs: workflow_output_dict = dict( output_name=workflow_output.output_name, label=workflow_output.label, uuid=str(workflow_output.uuid) if workflow_output.uuid is not None else None, ) workflow_outputs_dicts.append(workflow_output_dict) step_dict['workflow_outputs'] = workflow_outputs_dicts # All step outputs step_dict['outputs'] = [] if type(module) is ToolModule: for output in module.get_data_outputs(): step_dict['outputs'].append({'name': output['name'], 'type': output['extensions'][0]}) # Connections input_connections = step.input_connections if step.type is None or step.type == 'tool': # Determine full (prefixed) names of valid input datasets data_input_names = {} def callback(input, prefixed_name, **kwargs): if isinstance(input, DataToolParameter) or isinstance(input, DataCollectionToolParameter): data_input_names[prefixed_name] = True # FIXME: this updates modules silently right now; messages from updates should be provided. module.check_and_update_state() if module.tool: # If the tool is installed we attempt to verify input values # and connections, otherwise the last known state will be dumped without modifications. visit_input_values(module.tool.inputs, module.state.inputs, callback) # FIXME: this removes connection without displaying a message currently! input_connections = [conn for conn in input_connections if (conn.input_name in data_input_names or conn.non_data_connection)] # Encode input connections as dictionary input_conn_dict = {} unique_input_names = set([conn.input_name for conn in input_connections]) for input_name in unique_input_names: input_conn_dicts = [] for conn in input_connections: if conn.input_name != input_name: continue input_conn = dict( id=conn.output_step.order_index, output_name=conn.output_name ) if conn.input_subworkflow_step is not None: subworkflow_step_id = conn.input_subworkflow_step.order_index input_conn["input_subworkflow_step_id"] = subworkflow_step_id input_conn_dicts.append(input_conn) input_conn_dict[input_name] = input_conn_dicts # Preserve backward compatability. Previously Galaxy # assumed input connections would be dictionaries not # lists of dictionaries, so replace any singleton list # with just the dictionary so that workflows exported from # newer Galaxy instances can be used with older Galaxy # instances if they do no include multiple input # tools. This should be removed at some point. Mirrored # hack in _workflow_from_dict should never be removed so # existing workflow exports continue to function. for input_name, input_conn in dict(input_conn_dict).items(): if len(input_conn) == 1: input_conn_dict[input_name] = input_conn[0] step_dict['input_connections'] = input_conn_dict # Position step_dict['position'] = step.position # Add to return value data['steps'][step.order_index] = step_dict return data
def rerun( self, trans, id=None, from_noframe=None, **kwd ): """ Given a HistoryDatasetAssociation id, find the job and that created the dataset, extract the parameters, and display the appropriate tool form with parameters already filled in. """ if not id: error( "'id' parameter is required" ); try: id = int( id ) except: # it's not an un-encoded id, try to parse as encoded try: id = trans.security.decode_id( id ) except: error( "Invalid value for 'id' parameter" ) # Get the dataset object data = trans.sa_session.query( trans.app.model.HistoryDatasetAssociation ).get( id ) #only allow rerunning if user is allowed access to the dataset. if not ( trans.user_is_admin() or trans.app.security_agent.can_access_dataset( trans.get_current_user_roles(), data.dataset ) ): error( "You are not allowed to access this dataset" ) # Get the associated job, if any. job = data.creating_job if not job: raise Exception("Failed to get job information for dataset hid %d" % data.hid) # Get the tool object tool_id = job.tool_id tool_version = job.tool_version try: tool_version_select_field, tools, tool = self.__get_tool_components( tool_id, tool_version=tool_version, get_loaded_tools_by_lineage=False, set_selected=True ) if ( tool.id == job.tool_id or tool.old_id == job.tool_id ) and tool.version == job.tool_version: tool_id_version_message = '' elif tool.id == job.tool_id: if job.tool_version == None: # For some reason jobs don't always keep track of the tool version. tool_id_version_message = '' else: tool_id_version_message = 'This job was initially run with tool version "%s", which is not currently available. ' % job.tool_version if len( tools ) > 1: tool_id_version_message += 'You can rerun the job with the selected tool or choose another derivation of the tool.' else: tool_id_version_message += 'You can rerun the job with this tool version, which is a derivation of the original tool.' else: if len( tools ) > 1: tool_id_version_message = 'This job was initially run with tool version "%s", which is not currently available. ' % job.tool_version tool_id_version_message += 'You can rerun the job with the selected tool or choose another derivation of the tool.' else: tool_id_version_message = 'This job was initially run with tool id "%s", version "%s", which is not ' % ( job.tool_id, job.tool_version ) tool_id_version_message += 'currently available. You can rerun the job with this tool, which is a derivation of the original tool.' assert tool is not None, 'Requested tool has not been loaded.' except: # This is expected so not an exception. tool_id_version_message = '' error( "This dataset was created by an obsolete tool (%s). Can't re-run." % tool_id ) # Can't rerun upload, external data sources, et cetera. Workflow compatible will proxy this for now if not tool.is_workflow_compatible: error( "The '%s' tool does not currently support rerunning." % tool.name ) # Get the job's parameters try: params_objects = job.get_param_values( trans.app, ignore_errors = True ) except: raise Exception( "Failed to get parameters for dataset id %d " % data.id ) upgrade_messages = tool.check_and_update_param_values( params_objects, trans, update_values=False ) # Need to remap dataset parameters. Job parameters point to original # dataset used; parameter should be the analygous dataset in the # current history. history = trans.get_history() hda_source_dict = {} # Mapping from HDA in history to source HDAs. for hda in history.datasets: source_hda = hda.copied_from_history_dataset_association while source_hda:#should this check library datasets as well? #FIXME: could be multiple copies of a hda in a single history, this does a better job of matching on cloned histories, #but is still less than perfect when eg individual datasets are copied between histories if source_hda not in hda_source_dict or source_hda.hid == hda.hid: hda_source_dict[ source_hda ] = hda source_hda = source_hda.copied_from_history_dataset_association # Unpack unvalidated values to strings, they'll be validated when the # form is submitted (this happens when re-running a job that was # initially run by a workflow) #This needs to be done recursively through grouping parameters def rerun_callback( input, value, prefixed_name, prefixed_label ): if isinstance( value, UnvalidatedValue ): return str( value ) if isinstance( input, DataToolParameter ): if isinstance(value,list): values = [] for val in value: if is_hashable( val ): if val in history.datasets: values.append( val ) elif val in hda_source_dict: values.append( hda_source_dict[ val ]) return values if is_hashable( value ) and value not in history.datasets and value in hda_source_dict: return hda_source_dict[ value ] visit_input_values( tool.inputs, params_objects, rerun_callback ) # Create a fake tool_state for the tool, with the parameters values state = tool.new_state( trans ) state.inputs = params_objects # If the job failed and has dependencies, allow dependency remap if job.state == job.states.ERROR: try: if [ hda.dependent_jobs for hda in [ jtod.dataset for jtod in job.output_datasets ] if hda.dependent_jobs ]: state.rerun_remap_job_id = trans.app.security.encode_id(job.id) except: # Job has no outputs? pass #create an incoming object from the original job's dataset-modified param objects incoming = {} params_to_incoming( incoming, tool.inputs, params_objects, trans.app ) incoming[ "tool_state" ] = galaxy.util.object_to_string( state.encode( tool, trans.app ) ) template, vars = tool.handle_input( trans, incoming, old_errors=upgrade_messages ) #update new state with old parameters # Is the "add frame" stuff neccesary here? add_frame = AddFrameData() add_frame.debug = trans.debug if from_noframe is not None: add_frame.wiki_url = trans.app.config.wiki_url add_frame.from_noframe = True return trans.fill_template( template, history=history, toolbox=self.get_toolbox(), tool_version_select_field=tool_version_select_field, tool=tool, util=galaxy.util, add_frame=add_frame, tool_id_version_message=tool_id_version_message, **vars )
def create(self, trans, payload, **kwd): """ POST /api/workflows We're not creating workflows from the api. Just execute for now. However, we will import them if installed_repository_file is specified """ # Pull parameters out of payload. workflow_id = payload['workflow_id'] param_map = payload.get('parameters', {}) ds_map = payload['ds_map'] add_to_history = 'no_add_to_history' not in payload history_param = payload['history'] # Get/create workflow. if not workflow_id: # create new if 'installed_repository_file' in payload: workflow_controller = trans.webapp.controllers[ 'workflow' ] result = workflow_controller.import_workflow( trans=trans, cntrller='api', **payload) return result trans.response.status = 403 return "Either workflow_id or installed_repository_file must be specified" if 'installed_repository_file' in payload: trans.response.status = 403 return "installed_repository_file may not be specified with workflow_id" # Get workflow + accessibility check. stored_workflow = trans.sa_session.query(self.app.model.StoredWorkflow).get( trans.security.decode_id(workflow_id)) if stored_workflow.user != trans.user and not trans.user_is_admin(): if trans.sa_session.query(trans.app.model.StoredWorkflowUserShareAssociation).filter_by(user=trans.user, stored_workflow=stored_workflow).count() == 0: trans.response.status = 400 return("Workflow is not owned by or shared with current user") workflow = stored_workflow.latest_workflow # Get target history. if history_param.startswith('hist_id='): #Passing an existing history to use. history = trans.sa_session.query(self.app.model.History).get( trans.security.decode_id(history_param[8:])) if history.user != trans.user and not trans.user_is_admin(): trans.response.status = 400 return "Invalid History specified." else: # Send workflow outputs to new history. history = self.app.model.History(name=history_param, user=trans.user) trans.sa_session.add(history) trans.sa_session.flush() # Set workflow inputs. for k in ds_map: try: if ds_map[k]['src'] == 'ldda': ldda = trans.sa_session.query(self.app.model.LibraryDatasetDatasetAssociation).get( trans.security.decode_id(ds_map[k]['id'])) assert trans.user_is_admin() or trans.app.security_agent.can_access_dataset( trans.get_current_user_roles(), ldda.dataset ) hda = ldda.to_history_dataset_association(history, add_to_history=add_to_history) elif ds_map[k]['src'] == 'ld': ldda = trans.sa_session.query(self.app.model.LibraryDataset).get( trans.security.decode_id(ds_map[k]['id'])).library_dataset_dataset_association assert trans.user_is_admin() or trans.app.security_agent.can_access_dataset( trans.get_current_user_roles(), ldda.dataset ) hda = ldda.to_history_dataset_association(history, add_to_history=add_to_history) elif ds_map[k]['src'] == 'hda': # Get dataset handle, add to dict and history if necessary hda = trans.sa_session.query(self.app.model.HistoryDatasetAssociation).get( trans.security.decode_id(ds_map[k]['id'])) assert trans.user_is_admin() or trans.app.security_agent.can_access_dataset( trans.get_current_user_roles(), hda.dataset ) else: trans.response.status = 400 return "Unknown dataset source '%s' specified." % ds_map[k]['src'] if add_to_history and hda.history != history: hda = hda.copy() history.add_dataset(hda) ds_map[k]['hda'] = hda except AssertionError: trans.response.status = 400 return "Invalid Dataset '%s' Specified" % ds_map[k]['id'] # Sanity checks. if not workflow: trans.response.status = 400 return "Workflow not found." if len( workflow.steps ) == 0: trans.response.status = 400 return "Workflow cannot be run because it does not have any steps" if workflow.has_cycles: trans.response.status = 400 return "Workflow cannot be run because it contains cycles" if workflow.has_errors: trans.response.status = 400 return "Workflow cannot be run because of validation errors in some steps" # Build the state for each step rval = {} for step in workflow.steps: step_errors = None if step.type == 'tool' or step.type is None: step.module = module_factory.from_workflow_step( trans, step ) # Check for missing parameters step.upgrade_messages = step.module.check_and_update_state() # Any connected input needs to have value DummyDataset (these # are not persisted so we need to do it every time) step.module.add_dummy_datasets( connections=step.input_connections ) step.state = step.module.state _update_step_parameters(step, param_map) if step.tool_errors: trans.response.status = 400 return "Workflow cannot be run because of validation errors in some steps: %s" % step_errors if step.upgrade_messages: trans.response.status = 400 return "Workflow cannot be run because of step upgrade messages: %s" % step.upgrade_messages else: # This is an input step. Make sure we have an available input. if step.type == 'data_input' and str(step.id) not in ds_map: trans.response.status = 400 return "Workflow cannot be run because an expected input step '%s' has no input dataset." % step.id step.module = module_factory.from_workflow_step( trans, step ) step.state = step.module.get_runtime_state() step.input_connections_by_name = dict( ( conn.input_name, conn ) for conn in step.input_connections ) # Run each step, connecting outputs to inputs workflow_invocation = self.app.model.WorkflowInvocation() workflow_invocation.workflow = workflow outputs = util.odict.odict() rval['history'] = trans.security.encode_id(history.id) rval['outputs'] = [] for step in workflow.steps: job = None if step.type == 'tool' or step.type is None: tool = self.app.toolbox.get_tool( step.tool_id ) def callback( input, value, prefixed_name, prefixed_label ): if isinstance( input, DataToolParameter ): if prefixed_name in step.input_connections_by_name: conn = step.input_connections_by_name[ prefixed_name ] return outputs[ conn.output_step.id ][ conn.output_name ] visit_input_values( tool.inputs, step.state.inputs, callback ) job, out_data = tool.execute( trans, step.state.inputs, history=history) outputs[ step.id ] = out_data # Do post-job actions. replacement_params = payload.get('replacement_params', {}) for pja in step.post_job_actions: if pja.action_type in ActionBox.immediate_actions: ActionBox.execute(trans.app, trans.sa_session, pja, job, replacement_dict=replacement_params) else: job.add_post_job_action(pja) for v in out_data.itervalues(): rval['outputs'].append(trans.security.encode_id(v.id)) else: #This is an input step. Use the dataset inputs from ds_map. job, out_data = step.module.execute( trans, step.state) outputs[step.id] = out_data outputs[step.id]['output'] = ds_map[str(step.id)]['hda'] workflow_invocation_step = self.app.model.WorkflowInvocationStep() workflow_invocation_step.workflow_invocation = workflow_invocation workflow_invocation_step.workflow_step = step workflow_invocation_step.job = job trans.sa_session.add( workflow_invocation ) trans.sa_session.flush() return rval
def _workflow_to_dict_editor(self, trans, stored): """ """ workflow = stored.latest_workflow # Pack workflow data into a dictionary and return data = {} data['name'] = workflow.name data['steps'] = {} data['upgrade_messages'] = {} # For each step, rebuild the form and encode the state for step in workflow.steps: # Load from database representation module = module_factory.from_workflow_step( trans, step ) if not module: step_annotation = self.get_item_annotation_obj( trans.sa_session, trans.user, step ) annotation_str = "" if step_annotation: annotation_str = step_annotation.annotation invalid_tool_form_html = """<div class="toolForm tool-node-error"> <div class="toolFormTitle form-row-error">Unrecognized Tool: %s</div> <div class="toolFormBody"><div class="form-row"> The tool id '%s' for this tool is unrecognized.<br/><br/> To save this workflow, you will need to delete this step or enable the tool. </div></div></div>""" % (step.tool_id, step.tool_id) step_dict = { 'id': step.order_index, 'type': 'invalid', 'content_id': step.content_id, 'name': 'Unrecognized Tool: %s' % step.tool_id, 'tool_state': None, 'tooltip': None, 'tool_errors': ["Unrecognized Tool Id: %s" % step.tool_id], 'data_inputs': [], 'data_outputs': [], 'form_html': invalid_tool_form_html, 'annotation': annotation_str, 'input_connections': {}, 'post_job_actions': {}, 'uuid': str(step.uuid), 'label': step.label or None, 'workflow_outputs': [] } # Position step_dict['position'] = step.position # Add to return value data['steps'][step.order_index] = step_dict continue # Fix any missing parameters upgrade_message = module.check_and_update_state() if upgrade_message: data['upgrade_messages'][step.order_index] = upgrade_message if (hasattr(module, "version_changes")) and (module.version_changes): if step.order_index in data['upgrade_messages']: data['upgrade_messages'][step.order_index][module.tool.name] = "\n".join(module.version_changes) else: data['upgrade_messages'][step.order_index] = {module.tool.name: "\n".join(module.version_changes)} # Get user annotation. step_annotation = self.get_item_annotation_obj( trans.sa_session, trans.user, step ) annotation_str = "" if step_annotation: annotation_str = step_annotation.annotation form_html = None if trans.history: # If in a web session, attach form html. No reason to do # so for API requests. form_html = module.get_config_form() # Pack attributes into plain dictionary step_dict = { 'id': step.order_index, 'type': module.type, 'content_id': module.get_content_id(), 'name': module.get_name(), 'tool_state': module.get_state(), 'tooltip': module.get_tooltip( static_path=url_for( '/static' ) ), 'tool_errors': module.get_errors(), 'data_inputs': module.get_data_inputs(), 'data_outputs': module.get_data_outputs(), 'form_html': form_html, 'annotation': annotation_str, 'post_job_actions': {}, 'uuid': str(step.uuid) if step.uuid else None, 'label': step.label or None, 'workflow_outputs': [] } # Connections input_connections = step.input_connections input_connections_type = {} multiple_input = {} # Boolean value indicating if this can be mutliple if step.type is None or step.type == 'tool': # Determine full (prefixed) names of valid input datasets data_input_names = {} def callback( input, value, prefixed_name, prefixed_label ): if isinstance( input, DataToolParameter ) or isinstance( input, DataCollectionToolParameter ): data_input_names[ prefixed_name ] = True multiple_input[ prefixed_name ] = input.multiple if isinstance( input, DataToolParameter ): input_connections_type[ input.name ] = "dataset" if isinstance( input, DataCollectionToolParameter ): input_connections_type[ input.name ] = "dataset_collection" visit_input_values( module.tool.inputs, module.state.inputs, callback ) # Filter # FIXME: this removes connection without displaying a message currently! input_connections = [ conn for conn in input_connections if conn.input_name in data_input_names ] # post_job_actions pja_dict = {} for pja in step.post_job_actions: pja_dict[pja.action_type + pja.output_name] = dict( action_type=pja.action_type, output_name=pja.output_name, action_arguments=pja.action_arguments ) step_dict['post_job_actions'] = pja_dict # workflow outputs outputs = [] for output in step.unique_workflow_outputs: output_label = output.label output_name = output.output_name output_uuid = str(output.uuid) if output.uuid else None outputs.append({"output_name": output_name, "uuid": output_uuid, "label": output_label}) step_dict['workflow_outputs'] = outputs # Encode input connections as dictionary input_conn_dict = {} for conn in input_connections: input_type = "dataset" if conn.input_name in input_connections_type: input_type = input_connections_type[ conn.input_name ] conn_dict = dict( id=conn.output_step.order_index, output_name=conn.output_name, input_type=input_type ) if conn.input_name in multiple_input: if conn.input_name in input_conn_dict: input_conn_dict[ conn.input_name ].append( conn_dict ) else: input_conn_dict[ conn.input_name ] = [ conn_dict ] else: input_conn_dict[ conn.input_name ] = conn_dict step_dict['input_connections'] = input_conn_dict # Position step_dict['position'] = step.position # Add to return value data['steps'][step.order_index] = step_dict return data
def execute( self, trans, progress, invocation, step ): tool = trans.app.toolbox.get_tool( step.tool_id, tool_version=step.tool_version ) tool_state = step.state # Not strictly needed - but keep Tool state clean by stripping runtime # metadata parameters from it. if RUNTIME_STEP_META_STATE_KEY in tool_state.inputs: del tool_state.inputs[ RUNTIME_STEP_META_STATE_KEY ] collections_to_match = self._find_collections_to_match( tool, progress, step ) # Have implicit collections... if collections_to_match.has_collections(): collection_info = self.trans.app.dataset_collections_service.match_collections( collections_to_match ) else: collection_info = None param_combinations = [] if collection_info: iteration_elements_iter = collection_info.slice_collections() else: iteration_elements_iter = [ None ] for iteration_elements in iteration_elements_iter: execution_state = tool_state.copy() # TODO: Move next step into copy() execution_state.inputs = make_dict_copy( execution_state.inputs ) # Connect up def callback( input, prefixed_name, **kwargs ): replacement = NO_REPLACEMENT if isinstance( input, DataToolParameter ) or isinstance( input, DataCollectionToolParameter ): if iteration_elements and prefixed_name in iteration_elements: if isinstance( input, DataToolParameter ): # Pull out dataset instance from element. replacement = iteration_elements[ prefixed_name ].dataset_instance if hasattr(iteration_elements[ prefixed_name ], u'element_identifier') and iteration_elements[ prefixed_name ].element_identifier: replacement.element_identifier = iteration_elements[ prefixed_name ].element_identifier else: # If collection - just use element model object. replacement = iteration_elements[ prefixed_name ] else: replacement = progress.replacement_for_tool_input( step, input, prefixed_name ) else: replacement = progress.replacement_for_tool_input( step, input, prefixed_name ) return replacement try: # Replace DummyDatasets with historydatasetassociations visit_input_values( tool.inputs, execution_state.inputs, callback, no_replacement_value=NO_REPLACEMENT ) except KeyError as k: message_template = "Error due to input mapping of '%s' in '%s'. A common cause of this is conditional outputs that cannot be determined until runtime, please review your workflow." message = message_template % (tool.name, k.message) raise exceptions.MessageException( message ) param_combinations.append( execution_state.inputs ) try: execution_tracker = execute( trans=self.trans, tool=tool, param_combinations=param_combinations, history=invocation.history, collection_info=collection_info, workflow_invocation_uuid=invocation.uuid.hex ) except ToolInputsNotReadyException: delayed_why = "tool [%s] inputs are not ready, this special tool requires inputs to be ready" % tool.id raise DelayedWorkflowEvaluation(why=delayed_why) if collection_info: step_outputs = dict( execution_tracker.implicit_collections ) else: step_outputs = dict( execution_tracker.output_datasets ) step_outputs.update( execution_tracker.output_collections ) progress.set_step_outputs( step, step_outputs ) jobs = execution_tracker.successful_jobs for job in jobs: self._handle_post_job_actions( step, job, invocation.replacement_dict ) if execution_tracker.execution_errors: failed_count = len(execution_tracker.execution_errors) success_count = len(execution_tracker.successful_jobs) all_count = failed_count + success_count message = "Failed to create %d out of %s job(s) for workflow step." % (failed_count, all_count) raise Exception(message) return jobs
def rerun(self, trans, id=None, from_noframe=None, **kwd): """ Given a HistoryDatasetAssociation id, find the job and that created the dataset, extract the parameters, and display the appropriate tool form with parameters already filled in. """ if not id: error("'id' parameter is required") try: id = int(id) except: # it's not an un-encoded id, try to parse as encoded try: id = trans.security.decode_id(id) except: error("Invalid value for 'id' parameter") # Get the dataset object data = trans.sa_session.query( trans.app.model.HistoryDatasetAssociation).get(id) #only allow rerunning if user is allowed access to the dataset. if not (trans.user_is_admin() or trans.app.security_agent.can_access_dataset( trans.get_current_user_roles(), data.dataset)): error("You are not allowed to access this dataset") # Get the associated job, if any. job = data.creating_job if not job: raise Exception( "Failed to get job information for dataset hid %d" % data.hid) # Get the tool object tool_id = job.tool_id tool_version = job.tool_version try: tool_version_select_field, tools, tool = self.__get_tool_components( tool_id, tool_version=tool_version, get_loaded_tools_by_lineage=False, set_selected=True) if (tool.id == job.tool_id or tool.old_id == job.tool_id) and tool.version == job.tool_version: tool_id_version_message = '' elif tool.id == job.tool_id: if job.tool_version == None: # For some reason jobs don't always keep track of the tool version. tool_id_version_message = '' else: tool_id_version_message = 'This job was initially run with tool version "%s", which is not currently available. ' % job.tool_version if len(tools) > 1: tool_id_version_message += 'You can rerun the job with the selected tool or choose another derivation of the tool.' else: tool_id_version_message += 'You can rerun the job with this tool version, which is a derivation of the original tool.' else: if len(tools) > 1: tool_id_version_message = 'This job was initially run with tool version "%s", which is not currently available. ' % job.tool_version tool_id_version_message += 'You can rerun the job with the selected tool or choose another derivation of the tool.' else: tool_id_version_message = 'This job was initially run with tool id "%s", version "%s", which is not ' % ( job.tool_id, job.tool_version) tool_id_version_message += 'currently available. You can rerun the job with this tool, which is a derivation of the original tool.' assert tool is not None, 'Requested tool has not been loaded.' except: # This is expected so not an exception. tool_id_version_message = '' error( "This dataset was created by an obsolete tool (%s). Can't re-run." % tool_id) # Can't rerun upload, external data sources, et cetera. Workflow compatible will proxy this for now if not tool.is_workflow_compatible: error("The '%s' tool does not currently support rerunning." % tool.name) # Get the job's parameters try: params_objects = job.get_param_values(trans.app, ignore_errors=True) except: raise Exception("Failed to get parameters for dataset id %d " % data.id) upgrade_messages = tool.check_and_update_param_values( params_objects, trans, update_values=False) # Need to remap dataset parameters. Job parameters point to original # dataset used; parameter should be the analygous dataset in the # current history. history = trans.get_history() hda_source_dict = {} # Mapping from HDA in history to source HDAs. for hda in history.datasets: source_hda = hda.copied_from_history_dataset_association while source_hda: #should this check library datasets as well? #FIXME: could be multiple copies of a hda in a single history, this does a better job of matching on cloned histories, #but is still less than perfect when eg individual datasets are copied between histories if source_hda not in hda_source_dict or source_hda.hid == hda.hid: hda_source_dict[source_hda] = hda source_hda = source_hda.copied_from_history_dataset_association # Unpack unvalidated values to strings, they'll be validated when the # form is submitted (this happens when re-running a job that was # initially run by a workflow) #This needs to be done recursively through grouping parameters def rerun_callback(input, value, prefixed_name, prefixed_label): if isinstance(value, UnvalidatedValue): return str(value) if isinstance(input, DataToolParameter): if isinstance(value, list): values = [] for val in value: if is_hashable(val): if val in history.datasets: values.append(val) elif val in hda_source_dict: values.append(hda_source_dict[val]) return values if is_hashable( value ) and value not in history.datasets and value in hda_source_dict: return hda_source_dict[value] visit_input_values(tool.inputs, params_objects, rerun_callback) # Create a fake tool_state for the tool, with the parameters values state = tool.new_state(trans) state.inputs = params_objects # If the job failed and has dependencies, allow dependency remap if job.state == job.states.ERROR: try: if [ hda.dependent_jobs for hda in [jtod.dataset for jtod in job.output_datasets] if hda.dependent_jobs ]: state.rerun_remap_job_id = trans.app.security.encode_id( job.id) except: # Job has no outputs? pass #create an incoming object from the original job's dataset-modified param objects incoming = {} params_to_incoming(incoming, tool.inputs, params_objects, trans.app) incoming["tool_state"] = galaxy.util.object_to_string( state.encode(tool, trans.app)) template, vars = tool.handle_input( trans, incoming, old_errors=upgrade_messages) #update new state with old parameters # Is the "add frame" stuff neccesary here? add_frame = AddFrameData() add_frame.debug = trans.debug if from_noframe is not None: add_frame.wiki_url = trans.app.config.wiki_url add_frame.from_noframe = True return trans.fill_template( template, history=history, toolbox=self.get_toolbox(), tool_version_select_field=tool_version_select_field, tool=tool, util=galaxy.util, add_frame=add_frame, tool_id_version_message=tool_id_version_message, **vars)
def create(self, trans, payload, **kwd): """ POST /api/workflows We're not creating workflows from the api. Just execute for now. However, we will import them if installed_repository_file is specified """ # ------------------------------------------------------------------------------- # ### RPARK: dictionary containing which workflows to change and edit ### param_map = {} if payload.has_key("parameters"): param_map = payload["parameters"] # ------------------------------------------------------------------------------- # if "workflow_id" not in payload: # create new if "installed_repository_file" in payload: workflow_controller = trans.webapp.controllers["workflow"] result = workflow_controller.import_workflow(trans=trans, cntrller="api", **payload) return result trans.response.status = 403 return "Either workflow_id or installed_repository_file must be specified" if "installed_repository_file" in payload: trans.response.status = 403 return "installed_repository_file may not be specified with workflow_id" stored_workflow = trans.sa_session.query(self.app.model.StoredWorkflow).get( trans.security.decode_id(payload["workflow_id"]) ) if stored_workflow.user != trans.user and not trans.user_is_admin(): if ( trans.sa_session.query(trans.app.model.StoredWorkflowUserShareAssociation) .filter_by(user=trans.user, stored_workflow=stored_workflow) .count() == 0 ): trans.response.status = 400 return "Workflow is not owned by or shared with current user" workflow = stored_workflow.latest_workflow if payload["history"].startswith("hist_id="): # Passing an existing history to use. history = trans.sa_session.query(self.app.model.History).get( trans.security.decode_id(payload["history"][8:]) ) if history.user != trans.user and not trans.user_is_admin(): trans.response.status = 400 return "Invalid History specified." else: history = self.app.model.History(name=payload["history"], user=trans.user) trans.sa_session.add(history) trans.sa_session.flush() ds_map = payload["ds_map"] add_to_history = "no_add_to_history" not in payload for k in ds_map: try: if ds_map[k]["src"] == "ldda": ldda = trans.sa_session.query(self.app.model.LibraryDatasetDatasetAssociation).get( trans.security.decode_id(ds_map[k]["id"]) ) assert trans.user_is_admin() or trans.app.security_agent.can_access_dataset( trans.get_current_user_roles(), ldda.dataset ) hda = ldda.to_history_dataset_association(history, add_to_history=add_to_history) elif ds_map[k]["src"] == "ld": ldda = ( trans.sa_session.query(self.app.model.LibraryDataset) .get(trans.security.decode_id(ds_map[k]["id"])) .library_dataset_dataset_association ) assert trans.user_is_admin() or trans.app.security_agent.can_access_dataset( trans.get_current_user_roles(), ldda.dataset ) hda = ldda.to_history_dataset_association(history, add_to_history=add_to_history) elif ds_map[k]["src"] == "hda": # Get dataset handle, add to dict and history if necessary hda = trans.sa_session.query(self.app.model.HistoryDatasetAssociation).get( trans.security.decode_id(ds_map[k]["id"]) ) assert trans.user_is_admin() or trans.app.security_agent.can_access_dataset( trans.get_current_user_roles(), hda.dataset ) else: trans.response.status = 400 return "Unknown dataset source '%s' specified." % ds_map[k]["src"] if add_to_history and hda.history != history: hda = hda.copy() history.add_dataset(hda) ds_map[k]["hda"] = hda except AssertionError: trans.response.status = 400 return "Invalid Dataset '%s' Specified" % ds_map[k]["id"] if not workflow: trans.response.status = 400 return "Workflow not found." if len(workflow.steps) == 0: trans.response.status = 400 return "Workflow cannot be run because it does not have any steps" if workflow.has_cycles: trans.response.status = 400 return "Workflow cannot be run because it contains cycles" if workflow.has_errors: trans.response.status = 400 return "Workflow cannot be run because of validation errors in some steps" # Build the state for each step rval = {} for step in workflow.steps: step_errors = None if step.type == "tool" or step.type is None: step.module = module_factory.from_workflow_step(trans, step) # Check for missing parameters step.upgrade_messages = step.module.check_and_update_state() # Any connected input needs to have value DummyDataset (these # are not persisted so we need to do it every time) step.module.add_dummy_datasets(connections=step.input_connections) step.state = step.module.state #################################################### #################################################### # RPARK: IF TOOL_NAME IN PARAMETER MAP # if step.tool_id in param_map: change_param = param_map[step.tool_id]["param"] change_value = param_map[step.tool_id]["value"] step.state.inputs[change_param] = change_value #################################################### #################################################### if step.tool_errors: trans.response.status = 400 return "Workflow cannot be run because of validation errors in some steps: %s" % step_errors if step.upgrade_messages: trans.response.status = 400 return "Workflow cannot be run because of step upgrade messages: %s" % step.upgrade_messages else: # This is an input step. Make sure we have an available input. if step.type == "data_input" and str(step.id) not in ds_map: trans.response.status = 400 return "Workflow cannot be run because an expected input step '%s' has no input dataset." % step.id step.module = module_factory.from_workflow_step(trans, step) step.state = step.module.get_runtime_state() step.input_connections_by_name = dict((conn.input_name, conn) for conn in step.input_connections) # Run each step, connecting outputs to inputs workflow_invocation = self.app.model.WorkflowInvocation() workflow_invocation.workflow = workflow outputs = util.odict.odict() rval["history"] = trans.security.encode_id(history.id) rval["outputs"] = [] for i, step in enumerate(workflow.steps): job = None if step.type == "tool" or step.type is None: tool = self.app.toolbox.get_tool(step.tool_id) def callback(input, value, prefixed_name, prefixed_label): if isinstance(input, DataToolParameter): if prefixed_name in step.input_connections_by_name: conn = step.input_connections_by_name[prefixed_name] return outputs[conn.output_step.id][conn.output_name] visit_input_values(tool.inputs, step.state.inputs, callback) job, out_data = tool.execute(trans, step.state.inputs, history=history) outputs[step.id] = out_data for pja in step.post_job_actions: if pja.action_type in ActionBox.immediate_actions: ActionBox.execute(self.app, trans.sa_session, pja, job, replacement_dict=None) else: job.add_post_job_action(pja) for v in out_data.itervalues(): rval["outputs"].append(trans.security.encode_id(v.id)) else: # This is an input step. Use the dataset inputs from ds_map. job, out_data = step.module.execute(trans, step.state) outputs[step.id] = out_data outputs[step.id]["output"] = ds_map[str(step.id)]["hda"] workflow_invocation_step = self.app.model.WorkflowInvocationStep() workflow_invocation_step.workflow_invocation = workflow_invocation workflow_invocation_step.workflow_step = step workflow_invocation_step.job = job trans.sa_session.add(workflow_invocation) trans.sa_session.flush() return rval
def _workflow_to_dict_export(self, trans, stored=None, workflow=None): """ Export the workflow contents to a dictionary ready for JSON-ification and export. """ if workflow is None: assert stored is not None workflow = stored.latest_workflow annotation_str = "" if stored is not None: workflow_annotation = self.get_item_annotation_obj( trans.sa_session, trans.user, stored) if workflow_annotation: annotation_str = workflow_annotation.annotation # Pack workflow data into a dictionary and return data = {} data[ 'a_galaxy_workflow'] = 'true' # Placeholder for identifying galaxy workflow data['format-version'] = "0.1" data['name'] = workflow.name data['annotation'] = annotation_str if workflow.uuid is not None: data['uuid'] = str(workflow.uuid) data['steps'] = {} # For each step, rebuild the form and encode the state for step in workflow.steps: # Load from database representation module = module_factory.from_workflow_step(trans, step) if not module: return None # Get user annotation. step_annotation = self.get_item_annotation_obj( trans.sa_session, trans.user, step) annotation_str = "" if step_annotation: annotation_str = step_annotation.annotation content_id = module.get_content_id() # Step info step_dict = { 'id': step.order_index, 'type': module.type, 'content_id': content_id, 'tool_id': content_id, # For worklfows exported to older Galaxies, # eliminate after a few years... 'tool_version': step.tool_version, 'name': module.get_name(), 'tool_state': module.get_state(), 'tool_errors': module.get_errors(), 'uuid': str(step.uuid), 'label': step.label or None, # 'data_inputs': module.get_data_inputs(), # 'data_outputs': module.get_data_outputs(), 'annotation': annotation_str } # Add tool shed repository information and post-job actions to step dict. if module.type == 'tool': if module.tool.tool_shed_repository: tsr = module.tool.tool_shed_repository step_dict["tool_shed_repository"] = { 'name': tsr.name, 'owner': tsr.owner, 'changeset_revision': tsr.changeset_revision, 'tool_shed': tsr.tool_shed } pja_dict = {} for pja in step.post_job_actions: pja_dict[pja.action_type + pja.output_name] = dict( action_type=pja.action_type, output_name=pja.output_name, action_arguments=pja.action_arguments) step_dict['post_job_actions'] = pja_dict if module.type == 'subworkflow': del step_dict['content_id'] del step_dict['tool_version'] del step_dict['tool_state'] del step_dict['tool_errors'] subworkflow = step.subworkflow subworkflow_as_dict = self._workflow_to_dict_export( trans, stored=None, workflow=subworkflow) step_dict['subworkflow'] = subworkflow_as_dict # Data inputs step_dict['inputs'] = module.get_runtime_input_dicts( annotation_str) # User outputs workflow_outputs_dicts = [] for workflow_output in step.unique_workflow_outputs: workflow_output_dict = dict( output_name=workflow_output.output_name, label=workflow_output.label, uuid=str(workflow_output.uuid) if workflow_output.uuid is not None else None, ) workflow_outputs_dicts.append(workflow_output_dict) step_dict['workflow_outputs'] = workflow_outputs_dicts # All step outputs step_dict['outputs'] = [] if type(module) is ToolModule: for output in module.get_data_outputs(): step_dict['outputs'].append({ 'name': output['name'], 'type': output['extensions'][0] }) # Connections input_connections = step.input_connections if step.type is None or step.type == 'tool': # Determine full (prefixed) names of valid input datasets data_input_names = {} def callback(input, prefixed_name, **kwargs): if isinstance(input, DataToolParameter) or isinstance( input, DataCollectionToolParameter): data_input_names[prefixed_name] = True # FIXME: this updates modules silently right now; messages from updates should be provided. module.check_and_update_state() visit_input_values(module.tool.inputs, module.state.inputs, callback) # Filter # FIXME: this removes connection without displaying a message currently! input_connections = [ conn for conn in input_connections if (conn.input_name in data_input_names or conn.non_data_connection) ] # Encode input connections as dictionary input_conn_dict = {} unique_input_names = set( [conn.input_name for conn in input_connections]) for input_name in unique_input_names: input_conn_dicts = [] for conn in input_connections: if conn.input_name != input_name: continue input_conn = dict(id=conn.output_step.order_index, output_name=conn.output_name) if conn.input_subworkflow_step is not None: subworkflow_step_id = conn.input_subworkflow_step.order_index input_conn[ "input_subworkflow_step_id"] = subworkflow_step_id input_conn_dicts.append(input_conn) input_conn_dict[input_name] = input_conn_dicts # Preserve backward compatability. Previously Galaxy # assumed input connections would be dictionaries not # lists of dictionaries, so replace any singleton list # with just the dictionary so that workflows exported from # newer Galaxy instances can be used with older Galaxy # instances if they do no include multiple input # tools. This should be removed at some point. Mirrored # hack in _workflow_from_dict should never be removed so # existing workflow exports continue to function. for input_name, input_conn in dict(input_conn_dict).iteritems(): if len(input_conn) == 1: input_conn_dict[input_name] = input_conn[0] step_dict['input_connections'] = input_conn_dict # Position step_dict['position'] = step.position # Add to return value data['steps'][step.order_index] = step_dict return data
def _workflow_to_dict(self, trans, stored): """ RPARK: copied from galaxy.web.controllers.workflows.py Converts a workflow to a dict of attributes suitable for exporting. """ workflow = stored.latest_workflow ### ----------------------------------- ### ## RPARK EDIT ## workflow_annotation = self.get_item_annotation_obj(trans.sa_session, trans.user, stored) annotation_str = "" if workflow_annotation: annotation_str = workflow_annotation.annotation ### ----------------------------------- ### # Pack workflow data into a dictionary and return data = {} data["a_galaxy_workflow"] = "true" # Placeholder for identifying galaxy workflow data["format-version"] = "0.1" data["name"] = workflow.name ### ----------------------------------- ### ## RPARK EDIT ## data["annotation"] = annotation_str ### ----------------------------------- ### data["steps"] = {} # For each step, rebuild the form and encode the state for step in workflow.steps: # Load from database representation module = module_factory.from_workflow_step(trans, step) ### ----------------------------------- ### ## RPARK EDIT ## # TODO: This is duplicated from # lib/galaxy/webapps/controllres/workflow.py -- refactor and # eliminate copied code. # Get user annotation. step_annotation = self.get_item_annotation_obj(trans.sa_session, trans.user, step) annotation_str = "" if step_annotation: annotation_str = step_annotation.annotation ### ----------------------------------- ### # Step info step_dict = { "id": step.order_index, "type": module.type, "tool_id": module.get_tool_id(), "tool_version": step.tool_version, "name": module.get_name(), "tool_state": module.get_state(secure=False), "tool_errors": module.get_errors(), ## 'data_inputs': module.get_data_inputs(), ## 'data_outputs': module.get_data_outputs(), ### ----------------------------------- ### ## RPARK EDIT ## "annotation": annotation_str ### ----------------------------------- ### } # Add post-job actions to step dict. if module.type == "tool": pja_dict = {} for pja in step.post_job_actions: pja_dict[pja.action_type + pja.output_name] = dict( action_type=pja.action_type, output_name=pja.output_name, action_arguments=pja.action_arguments ) step_dict["post_job_actions"] = pja_dict # Data inputs step_dict["inputs"] = [] if module.type == "data_input": # Get input dataset name; default to 'Input Dataset' name = module.state.get("name", "Input Dataset") step_dict["inputs"].append({"name": name, "description": annotation_str}) else: # Step is a tool and may have runtime inputs. for name, val in module.state.inputs.items(): input_type = type(val) if input_type == RuntimeValue: step_dict["inputs"].append( {"name": name, "description": "runtime parameter for tool %s" % module.get_name()} ) elif input_type == dict: # Input type is described by a dict, e.g. indexed parameters. for partname, partval in val.items(): if type(partval) == RuntimeValue: step_dict["inputs"].append( {"name": name, "description": "runtime parameter for tool %s" % module.get_name()} ) # User outputs step_dict["user_outputs"] = [] """ module_outputs = module.get_data_outputs() step_outputs = trans.sa_session.query( WorkflowOutput ).filter( step=step ) for output in step_outputs: name = output.output_name annotation = "" for module_output in module_outputs: if module_output.get( 'name', None ) == name: output_type = module_output.get( 'extension', '' ) break data['outputs'][name] = { 'name' : name, 'annotation' : annotation, 'type' : output_type } """ # All step outputs step_dict["outputs"] = [] if type(module) is ToolModule: for output in module.get_data_outputs(): step_dict["outputs"].append({"name": output["name"], "type": output["extensions"][0]}) # Connections input_connections = step.input_connections if step.type is None or step.type == "tool": # Determine full (prefixed) names of valid input datasets data_input_names = {} def callback(input, value, prefixed_name, prefixed_label): if isinstance(input, DataToolParameter): data_input_names[prefixed_name] = True visit_input_values(module.tool.inputs, module.state.inputs, callback) # Filter # FIXME: this removes connection without displaying a message currently! input_connections = [conn for conn in input_connections if conn.input_name in data_input_names] # Encode input connections as dictionary input_conn_dict = {} for conn in input_connections: input_conn_dict[conn.input_name] = dict(id=conn.output_step.order_index, output_name=conn.output_name) step_dict["input_connections"] = input_conn_dict # Position step_dict["position"] = step.position # Add to return value data["steps"][step.order_index] = step_dict return data
def _workflow_to_dict_export(self, trans, stored=None, workflow=None): """ Export the workflow contents to a dictionary ready for JSON-ification and export. """ if workflow is None: assert stored is not None workflow = stored.latest_workflow annotation_str = "" tag_str = "" if stored is not None: annotation_str = self.get_item_annotation_str( trans.sa_session, trans.user, stored) or '' tag_str = stored.make_tag_string_list() # Pack workflow data into a dictionary and return data = {} data[ 'a_galaxy_workflow'] = 'true' # Placeholder for identifying galaxy workflow data['format-version'] = "0.1" data['name'] = workflow.name data['annotation'] = annotation_str data['tags'] = tag_str if workflow.uuid is not None: data['uuid'] = str(workflow.uuid) data['steps'] = {} # For each step, rebuild the form and encode the state for step in workflow.steps: # Load from database representation module = module_factory.from_workflow_step(trans, step) if not module: raise exceptions.MessageException( 'Unrecognized step type: %s' % step.type) # Get user annotation. annotation_str = self.get_item_annotation_str( trans.sa_session, trans.user, step) or '' content_id = module.get_content_id() # Export differences for backward compatibility if module.type == 'tool': tool_state = module.get_state(nested=False) else: tool_state = module.state.inputs # Step info step_dict = { 'id': step.order_index, 'type': module.type, 'content_id': content_id, 'tool_id': content_id, # For worklfows exported to older Galaxies, # eliminate after a few years... 'tool_version': step.tool_version, 'name': module.get_name(), 'tool_state': json.dumps(tool_state), 'errors': module.get_errors(), 'uuid': str(step.uuid), 'label': step.label or None, 'annotation': annotation_str } # Add tool shed repository information and post-job actions to step dict. if module.type == 'tool': if module.tool and module.tool.tool_shed: step_dict["tool_shed_repository"] = { 'name': module.tool.repository_name, 'owner': module.tool.repository_owner, 'changeset_revision': module.tool.changeset_revision, 'tool_shed': module.tool.tool_shed } pja_dict = {} for pja in step.post_job_actions: pja_dict[pja.action_type + pja.output_name] = dict( action_type=pja.action_type, output_name=pja.output_name, action_arguments=pja.action_arguments) step_dict['post_job_actions'] = pja_dict if module.type == 'subworkflow': del step_dict['content_id'] del step_dict['errors'] del step_dict['tool_version'] del step_dict['tool_state'] subworkflow = step.subworkflow subworkflow_as_dict = self._workflow_to_dict_export( trans, stored=None, workflow=subworkflow) step_dict['subworkflow'] = subworkflow_as_dict # Data inputs, legacy section not used anywhere within core input_dicts = [] step_state = module.state.inputs or {} if "name" in step_state and module.type != 'tool': name = step_state.get("name") input_dicts.append({ "name": name, "description": annotation_str }) for name, val in step_state.items(): input_type = type(val) if input_type == RuntimeValue: input_dicts.append({ "name": name, "description": "runtime parameter for tool %s" % module.get_name() }) elif input_type == dict: # Input type is described by a dict, e.g. indexed parameters. for partval in val.values(): if type(partval) == RuntimeValue: input_dicts.append({ "name": name, "description": "runtime parameter for tool %s" % module.get_name() }) step_dict['inputs'] = input_dicts # User outputs workflow_outputs_dicts = [] for workflow_output in step.unique_workflow_outputs: workflow_output_dict = dict( output_name=workflow_output.output_name, label=workflow_output.label, uuid=str(workflow_output.uuid) if workflow_output.uuid is not None else None, ) workflow_outputs_dicts.append(workflow_output_dict) step_dict['workflow_outputs'] = workflow_outputs_dicts # All step outputs step_dict['outputs'] = [] if type(module) is ToolModule: for output in module.get_data_outputs(): step_dict['outputs'].append({ 'name': output['name'], 'type': output['extensions'][0] }) # Connections input_connections = step.input_connections if step.type is None or step.type == 'tool': # Determine full (prefixed) names of valid input datasets data_input_names = {} def callback(input, prefixed_name, **kwargs): if isinstance(input, DataToolParameter) or isinstance( input, DataCollectionToolParameter): data_input_names[prefixed_name] = True # FIXME: this updates modules silently right now; messages from updates should be provided. module.check_and_update_state() if module.tool: # If the tool is installed we attempt to verify input values # and connections, otherwise the last known state will be dumped without modifications. visit_input_values(module.tool.inputs, module.state.inputs, callback) # FIXME: this removes connection without displaying a message currently! input_connections = [ conn for conn in input_connections if (conn.input_name in data_input_names or conn.non_data_connection) ] # Encode input connections as dictionary input_conn_dict = {} unique_input_names = set( [conn.input_name for conn in input_connections]) for input_name in unique_input_names: input_conn_dicts = [] for conn in input_connections: if conn.input_name != input_name: continue input_conn = dict(id=conn.output_step.order_index, output_name=conn.output_name) if conn.input_subworkflow_step is not None: subworkflow_step_id = conn.input_subworkflow_step.order_index input_conn[ "input_subworkflow_step_id"] = subworkflow_step_id input_conn_dicts.append(input_conn) input_conn_dict[input_name] = input_conn_dicts # Preserve backward compatability. Previously Galaxy # assumed input connections would be dictionaries not # lists of dictionaries, so replace any singleton list # with just the dictionary so that workflows exported from # newer Galaxy instances can be used with older Galaxy # instances if they do no include multiple input # tools. This should be removed at some point. Mirrored # hack in _workflow_from_dict should never be removed so # existing workflow exports continue to function. for input_name, input_conn in dict(input_conn_dict).items(): if len(input_conn) == 1: input_conn_dict[input_name] = input_conn[0] step_dict['input_connections'] = input_conn_dict # Position step_dict['position'] = step.position # Add to return value data['steps'][step.order_index] = step_dict return data
def _apply_extract_untyped_parameter(self, action: ExtractUntypedParameter, execution: RefactorActionExecution): untyped_parameter_name = action.name new_label = action.label or untyped_parameter_name target_value = "${%s}" % untyped_parameter_name target_tool_inputs = [] rename_pjas = [] for step_def, step in self._iterate_over_step_pairs(execution): module = step.module if module.type != "tool": continue # TODO: require a clean tool state for all tools to do this. tool = module.tool tool_inputs = module.state replace_tool_state = False def callback(input, prefixed_name, context, value=None, **kwargs): nonlocal replace_tool_state # data parameters cannot have untyped parameter values if input.type in ['data', 'data_collection']: return NO_REPLACEMENT if not contains_workflow_parameter(value): return NO_REPLACEMENT if value == target_value: target_tool_inputs.append((step.order_index, input, prefixed_name)) replace_tool_state = True return runtime_to_json(ConnectedValue()) else: return NO_REPLACEMENT visit_input_values(tool.inputs, tool_inputs.inputs, callback, no_replacement_value=NO_REPLACEMENT) if replace_tool_state: step_def["tool_state"] = step.module.get_tool_state() for post_job_action in self._iterate_over_rename_pjas(): newname = post_job_action.get("action_arguments", {}).get("newname") if target_value in newname: rename_pjas.append(post_job_action) if len(target_tool_inputs) == 0 and len(rename_pjas) == 0: raise RequestParameterInvalidException(f"Failed to find {target_value} in the tool state or any workflow steps.") as_parameter_type = { "text": "text", "integer": "integer", "float": "float", "select": "text", "genomebuild": "text", } target_parameter_types = set() for _, tool_input, _ in target_tool_inputs: tool_input_type = tool_input.type if tool_input_type not in as_parameter_type: raise RequestParameterInvalidException("Extracting inputs for parameters on tool inputs of type {tool_input_type} is unsupported") target_parameter_type = as_parameter_type[tool_input_type] target_parameter_types.add(target_parameter_type) if len(target_parameter_types) > 1: raise RequestParameterInvalidException("Extracting inputs for parameters on conflicting tool input types (e.g. numeric and non-numeric) input types is unsupported") if len(target_parameter_types) == 1: (target_parameter_type,) = target_parameter_types else: # only used in PJA, hence only used a string target_parameter_type = "text" for rename_pja in rename_pjas: # if name != label, got to rewrite this rename with new label. if untyped_parameter_name != new_label: action_arguments = rename_pja.get("action_arguments") old_newname = action_arguments["newname"] new_newname = old_newname.replace(target_value, "${%s}" % new_label) action_arguments["newname"] = new_newname optional = False input_action = AddInputAction( action_type="add_input", optional=optional, type=target_parameter_type, label=new_label, position=action.position, ) new_input_order_index = self._add_input_get_order_index(input_action, execution) for order_index, _tool_input, prefixed_name in target_tool_inputs: connect_input = InputReferenceByOrderIndex(order_index=order_index, input_name=prefixed_name) connect_action = ConnectAction( action_type="connect", input=connect_input, output=OutputReferenceByOrderIndex(order_index=new_input_order_index), ) self._apply_connect(connect_action, execution)
def _workflow_to_dict_export( self, trans, stored=None, workflow=None ): """ Export the workflow contents to a dictionary ready for JSON-ification and export. """ if workflow is None: assert stored is not None workflow = stored.latest_workflow annotation_str = "" if stored is not None: workflow_annotation = self.get_item_annotation_obj( trans.sa_session, trans.user, stored ) if workflow_annotation: annotation_str = workflow_annotation.annotation # Pack workflow data into a dictionary and return data = {} data['a_galaxy_workflow'] = 'true' # Placeholder for identifying galaxy workflow data['format-version'] = "0.1" data['name'] = workflow.name data['annotation'] = annotation_str if workflow.uuid is not None: data['uuid'] = str(workflow.uuid) data['steps'] = {} # For each step, rebuild the form and encode the state for step in workflow.steps: # Load from database representation module = module_factory.from_workflow_step( trans, step ) if not module: return None # Get user annotation. step_annotation = self.get_item_annotation_obj(trans.sa_session, trans.user, step ) annotation_str = "" if step_annotation: annotation_str = step_annotation.annotation content_id = module.get_content_id() # Step info step_dict = { 'id': step.order_index, 'type': module.type, 'content_id': content_id, 'tool_id': content_id, # For worklfows exported to older Galaxies, # eliminate after a few years... 'tool_version': step.tool_version, 'name': module.get_name(), 'tool_state': module.get_state( secure=False ), 'tool_errors': module.get_errors(), 'uuid': str(step.uuid), 'label': step.label or None, # 'data_inputs': module.get_data_inputs(), # 'data_outputs': module.get_data_outputs(), 'annotation': annotation_str } # Add post-job actions to step dict. if module.type == 'tool': pja_dict = {} for pja in step.post_job_actions: pja_dict[pja.action_type + pja.output_name] = dict( action_type=pja.action_type, output_name=pja.output_name, action_arguments=pja.action_arguments ) step_dict[ 'post_job_actions' ] = pja_dict if module.type == 'subworkflow': del step_dict['content_id'] del step_dict['tool_version'] del step_dict['tool_state'] del step_dict['tool_errors'] subworkflow = step.subworkflow subworkflow_as_dict = self._workflow_to_dict_export( trans, stored=None, workflow=subworkflow ) step_dict['subworkflow'] = subworkflow_as_dict # Data inputs step_dict['inputs'] = module.get_runtime_input_dicts( annotation_str ) # User outputs workflow_outputs_dicts = [] for workflow_output in step.unique_workflow_outputs: workflow_output_dict = dict( output_name=workflow_output.output_name, label=workflow_output.label, uuid=str(workflow_output.uuid) if workflow_output.uuid is not None else None, ) workflow_outputs_dicts.append(workflow_output_dict) step_dict['workflow_outputs'] = workflow_outputs_dicts # All step outputs step_dict['outputs'] = [] if type( module ) is ToolModule: for output in module.get_data_outputs(): step_dict['outputs'].append( { 'name': output['name'], 'type': output['extensions'][0] } ) # Connections input_connections = step.input_connections if step.type is None or step.type == 'tool': # Determine full (prefixed) names of valid input datasets data_input_names = {} def callback( input, value, prefixed_name, prefixed_label ): if isinstance( input, DataToolParameter ) or isinstance( input, DataCollectionToolParameter ): data_input_names[ prefixed_name ] = True # FIXME: this updates modules silently right now; messages from updates should be provided. module.check_and_update_state() visit_input_values( module.tool.inputs, module.state.inputs, callback ) # Filter # FIXME: this removes connection without displaying a message currently! input_connections = [ conn for conn in input_connections if (conn.input_name in data_input_names or conn.non_data_connection) ] # Encode input connections as dictionary input_conn_dict = {} unique_input_names = set( [conn.input_name for conn in input_connections] ) for input_name in unique_input_names: input_conn_dicts = [] for conn in input_connections: if conn.input_name != input_name: continue input_conn = dict( id=conn.output_step.order_index, output_name=conn.output_name ) if conn.input_subworkflow_step is not None: subworkflow_step_id = conn.input_subworkflow_step.order_index input_conn["input_subworkflow_step_id"] = subworkflow_step_id input_conn_dicts.append(input_conn) input_conn_dict[ input_name ] = input_conn_dicts # Preserve backward compatability. Previously Galaxy # assumed input connections would be dictionaries not # lists of dictionaries, so replace any singleton list # with just the dictionary so that workflows exported from # newer Galaxy instances can be used with older Galaxy # instances if they do no include multiple input # tools. This should be removed at some point. Mirrored # hack in _workflow_from_dict should never be removed so # existing workflow exports continue to function. for input_name, input_conn in dict(input_conn_dict).iteritems(): if len(input_conn) == 1: input_conn_dict[input_name] = input_conn[0] step_dict['input_connections'] = input_conn_dict # Position step_dict['position'] = step.position # Add to return value data['steps'][step.order_index] = step_dict return data
def execute(self, trans, progress, invocation, step): tool = trans.app.toolbox.get_tool(step.tool_id, tool_version=step.tool_version) tool_state = step.state # Not strictly needed - but keep Tool state clean by stripping runtime # metadata parameters from it. if RUNTIME_STEP_META_STATE_KEY in tool_state.inputs: del tool_state.inputs[RUNTIME_STEP_META_STATE_KEY] collections_to_match = self._find_collections_to_match(tool, progress, step) # Have implicit collections... if collections_to_match.has_collections(): collection_info = self.trans.app.dataset_collections_service.match_collections(collections_to_match) else: collection_info = None param_combinations = [] if collection_info: iteration_elements_iter = collection_info.slice_collections() else: iteration_elements_iter = [None] for iteration_elements in iteration_elements_iter: execution_state = tool_state.copy() # TODO: Move next step into copy() execution_state.inputs = make_dict_copy(execution_state.inputs) expected_replacement_keys = set(step.input_connections_by_name.keys()) found_replacement_keys = set() # Connect up def callback(input, prefixed_name, **kwargs): replacement = NO_REPLACEMENT if isinstance(input, DataToolParameter) or isinstance(input, DataCollectionToolParameter): if iteration_elements and prefixed_name in iteration_elements: if isinstance(input, DataToolParameter): # Pull out dataset instance from element. replacement = iteration_elements[prefixed_name].dataset_instance if hasattr(iteration_elements[prefixed_name], u'element_identifier') and iteration_elements[prefixed_name].element_identifier: replacement.element_identifier = iteration_elements[prefixed_name].element_identifier else: # If collection - just use element model object. replacement = iteration_elements[prefixed_name] else: replacement = progress.replacement_for_tool_input(step, input, prefixed_name) else: replacement = progress.replacement_for_tool_input(step, input, prefixed_name) if replacement is not NO_REPLACEMENT: found_replacement_keys.add(prefixed_name) return replacement try: # Replace DummyDatasets with historydatasetassociations visit_input_values(tool.inputs, execution_state.inputs, callback, no_replacement_value=NO_REPLACEMENT) except KeyError as k: message_template = "Error due to input mapping of '%s' in '%s'. A common cause of this is conditional outputs that cannot be determined until runtime, please review your workflow." message = message_template % (tool.name, k.message) raise exceptions.MessageException(message) unmatched_input_connections = expected_replacement_keys - found_replacement_keys if unmatched_input_connections: log.warn("Failed to use input connections for inputs [%s]" % unmatched_input_connections) param_combinations.append(execution_state.inputs) try: execution_tracker = execute( trans=self.trans, tool=tool, param_combinations=param_combinations, history=invocation.history, collection_info=collection_info, workflow_invocation_uuid=invocation.uuid.hex ) except ToolInputsNotReadyException: delayed_why = "tool [%s] inputs are not ready, this special tool requires inputs to be ready" % tool.id raise DelayedWorkflowEvaluation(why=delayed_why) if collection_info: step_outputs = dict(execution_tracker.implicit_collections) else: step_outputs = dict(execution_tracker.output_datasets) step_outputs.update(execution_tracker.output_collections) progress.set_step_outputs(step, step_outputs) jobs = execution_tracker.successful_jobs for job in jobs: self._handle_post_job_actions(step, job, invocation.replacement_dict) if execution_tracker.execution_errors: failed_count = len(execution_tracker.execution_errors) success_count = len(execution_tracker.successful_jobs) all_count = failed_count + success_count message = "Failed to create %d out of %s job(s) for workflow step." % (failed_count, all_count) raise Exception(message) return jobs