def __connect_workflow_steps(self, steps, steps_by_external_id): """ Second pass to deal with connections between steps. Create workflow connection objects using externally specified ids using during creation or update. """ for step in steps: # Input connections for input_name, conn_list in step.temp_input_connections.items(): if not conn_list: continue if not isinstance(conn_list, list): # Older style singleton connection conn_list = [conn_list] for conn_dict in conn_list: if 'output_name' not in conn_dict or 'id' not in conn_dict: template = "Invalid connection [%s] - must be dict with output_name and id fields." message = template % conn_dict raise exceptions.MessageException(message) conn = model.WorkflowStepConnection() conn.input_step = step conn.input_name = input_name conn.output_name = conn_dict['output_name'] conn.output_step = steps_by_external_id[conn_dict['id']] input_subworkflow_step_index = conn_dict.get( 'input_subworkflow_step_id', None) if input_subworkflow_step_index is not None: conn.input_subworkflow_step = step.subworkflow.step_by_index( input_subworkflow_step_index) del step.temp_input_connections
def yaml_to_model(has_dict, id_offset=100): if isinstance(has_dict, str): has_dict = yaml.safe_load(has_dict) workflow = model.Workflow() workflow.steps = [] for i, step in enumerate(has_dict.get("steps", [])): workflow_step = model.WorkflowStep() if "order_index" not in step: step["order_index"] = i if "id" not in step: # Fixed Offset ids just to test against assuption order_index != id step["id"] = id_offset id_offset += 1 step_type = step.get("type", None) assert step_type is not None if step_type == "subworkflow": subworkflow_dict = step["subworkflow"] del step["subworkflow"] subworkflow = yaml_to_model(subworkflow_dict, id_offset=id_offset) step["subworkflow"] = subworkflow id_offset += len(subworkflow.steps) for key, value in step.items(): if key == "input_connections": raise NotImplementedError() if key == "inputs": inputs = [] for input_name, input_def in value.items(): step_input = model.WorkflowStepInput(workflow_step) step_input.name = input_name connections = [] for conn_dict in input_def.get("connections", []): conn = model.WorkflowStepConnection() for conn_key, conn_value in conn_dict.items(): if conn_key == "@output_step": target_step = workflow.steps[conn_value] conn_value = target_step conn_key = "output_step" if conn_key == "@input_subworkflow_step": conn_value = step["subworkflow"].step_by_index( conn_value) conn_key = "input_subworkflow_step" setattr(conn, conn_key, conn_value) connections.append(conn) step_input.connections = connections inputs.append(step_input) value = inputs if key == "workflow_outputs": value = [ partial(_dict_to_workflow_output, workflow_step)(_) for _ in value ] setattr(workflow_step, key, value) workflow.steps.append(workflow_step) return workflow
def test_connect_tool_output(self): self._setup_workflow(TEST_WORKFLOW_YAML) hda = model.HistoryDatasetAssociation() progress = self._new_workflow_progress() progress.set_step_outputs(self._invocation_step(2), {"out1": hda}) conn = model.WorkflowStepConnection() conn.output_name = "out1" conn.output_step = self._step(2) assert progress.replacement_for_connection(conn) is hda
def test_connect_data_input(self): self._setup_workflow(TEST_WORKFLOW_YAML) hda = model.HistoryDatasetAssociation() self.inputs_by_step_id = {100: hda} progress = self._new_workflow_progress() progress.set_outputs_for_input(self._invocation_step(0)) conn = model.WorkflowStepConnection() conn.output_name = "output" conn.output_step = self._step(0) assert progress.replacement_for_connection(conn) is hda
def extract_steps(trans, history=None, job_ids=None, dataset_ids=None, dataset_collection_ids=None, dataset_names=None, dataset_collection_names=None): # Ensure job_ids and dataset_ids are lists (possibly empty) if job_ids is None: job_ids = [] elif type(job_ids) is not list: job_ids = [job_ids] if dataset_ids is None: dataset_ids = [] elif type(dataset_ids) is not list: dataset_ids = [dataset_ids] if dataset_collection_ids is None: dataset_collection_ids = [] elif type(dataset_collection_ids) is not list: dataset_collection_ids = [dataset_collection_ids] # Convert both sets of ids to integers job_ids = [int(_) for _ in job_ids] dataset_ids = [int(_) for _ in dataset_ids] dataset_collection_ids = [int(_) for _ in dataset_collection_ids] # Find each job, for security we (implicitly) check that they are # associated with a job in the current history. summary = WorkflowSummary(trans, history) jobs = summary.jobs steps = [] hid_to_output_pair = {} # Input dataset steps for i, hid in enumerate(dataset_ids): step = model.WorkflowStep() step.type = 'data_input' if dataset_names: name = dataset_names[i] else: name = "Input Dataset" step.tool_inputs = dict(name=name) hid_to_output_pair[hid] = (step, 'output') steps.append(step) for i, hid in enumerate(dataset_collection_ids): step = model.WorkflowStep() step.type = 'data_collection_input' if hid not in summary.collection_types: raise exceptions.RequestParameterInvalidException( "hid %s does not appear to be a collection" % hid) collection_type = summary.collection_types[hid] if dataset_collection_names: name = dataset_collection_names[i] else: name = "Input Dataset Collection" step.tool_inputs = dict(name=name, collection_type=collection_type) hid_to_output_pair[hid] = (step, 'output') steps.append(step) # Tool steps for job_id in job_ids: if job_id not in summary.job_id2representative_job: log.warning("job_id %s not found in job_id2representative_job %s" % (job_id, summary.job_id2representative_job)) raise AssertionError( "Attempt to create workflow with job not connected to current history" ) job = summary.job_id2representative_job[job_id] tool_inputs, associations = step_inputs(trans, job) step = model.WorkflowStep() step.type = 'tool' step.tool_id = job.tool_id step.tool_version = job.tool_version step.tool_inputs = tool_inputs # NOTE: We shouldn't need to do two passes here since only # an earlier job can be used as an input to a later # job. for other_hid, input_name in associations: if job in summary.implicit_map_jobs: an_implicit_output_collection = jobs[job][0][1] input_collection = an_implicit_output_collection.find_implicit_input_collection( input_name) if input_collection: other_hid = input_collection.hid else: log.info("Cannot find implicit input collection for %s" % input_name) if other_hid in hid_to_output_pair: other_step, other_name = hid_to_output_pair[other_hid] conn = model.WorkflowStepConnection() conn.input_step = step conn.input_name = input_name # Should always be connected to an earlier step conn.output_step = other_step conn.output_name = other_name steps.append(step) # Store created dataset hids for assoc in (job.output_datasets + job.output_dataset_collection_instances): assoc_name = assoc.name if ToolOutputCollectionPart.is_named_collection_part_name( assoc_name): continue if job in summary.implicit_map_jobs: hid = None for implicit_pair in jobs[job]: query_assoc_name, dataset_collection = implicit_pair if query_assoc_name == assoc_name: hid = dataset_collection.hid if hid is None: template = "Failed to find matching implicit job - job id is %s, implicit pairs are %s, assoc_name is %s." message = template % (job.id, jobs[job], assoc_name) log.warning(message) raise Exception("Failed to extract job.") else: if hasattr(assoc, "dataset"): hid = assoc.dataset.hid else: hid = assoc.dataset_collection_instance.hid hid_to_output_pair[hid] = (step, assoc.name) return steps
def connection(**kwds): conn = model.WorkflowStepConnection() for key, value in kwds.iteritems(): setattr(conn, key, value) return conn
def _workflow_from_dict(self, trans, data, source=None): """ RPARK: copied from galaxy.webapps.galaxy.controllers.workflows.py Creates a workflow from a dict. Created workflow is stored in the database and returned. """ # Put parameters in workflow mode trans.workflow_building_mode = True # Create new workflow from incoming dict workflow = model.Workflow() # If there's a source, put it in the workflow name. if source: name = "%s (imported from %s)" % (data['name'], source) else: name = data['name'] workflow.name = name # Assume no errors until we find a step that has some workflow.has_errors = False # Create each step steps = [] # The editor will provide ids for each step that we don't need to save, # but do need to use to make connections steps_by_external_id = {} # Keep track of tools required by the workflow that are not available in # the local Galaxy instance. Each tuple in the list of missing_tool_tups # will be ( tool_id, tool_name, tool_version ). missing_tool_tups = [] # First pass to build step objects and populate basic values for key, step_dict in data['steps'].iteritems(): # Create the model class for the step step = model.WorkflowStep() steps.append(step) steps_by_external_id[step_dict['id']] = step # FIXME: Position should be handled inside module step.position = step_dict['position'] module = module_factory.from_dict(trans, step_dict, secure=False) if module.type == 'tool' and module.tool is None: # A required tool is not available in the local Galaxy instance. missing_tool_tup = (step_dict['tool_id'], step_dict['name'], step_dict['tool_version']) if missing_tool_tup not in missing_tool_tups: missing_tool_tups.append(missing_tool_tup) module.save_to_step(step) if step.tool_errors: workflow.has_errors = True # Stick this in the step temporarily step.temp_input_connections = step_dict['input_connections'] # Save step annotation. #annotation = step_dict[ 'annotation' ] #if annotation: #annotation = sanitize_html( annotation, 'utf-8', 'text/html' ) # ------------------------------------------ # # RPARK REMOVING: user annotation b/c of API #self.add_item_annotation( trans.sa_session, trans.get_user(), step, annotation ) # ------------------------------------------ # # Unpack and add post-job actions. post_job_actions = step_dict.get('post_job_actions', {}) for name, pja_dict in post_job_actions.items(): model.PostJobAction(pja_dict['action_type'], step, pja_dict['output_name'], pja_dict['action_arguments']) # Second pass to deal with connections between steps for step in steps: # Input connections for input_name, conn_dict in step.temp_input_connections.iteritems( ): if conn_dict: conn = model.WorkflowStepConnection() conn.input_step = step conn.input_name = input_name conn.output_name = conn_dict['output_name'] conn.output_step = steps_by_external_id[conn_dict['id']] del step.temp_input_connections # Order the steps if possible attach_ordered_steps(workflow, steps) # Connect up stored = model.StoredWorkflow() stored.name = workflow.name workflow.stored_workflow = stored stored.latest_workflow = workflow stored.user = trans.user # Persist trans.sa_session.add(stored) trans.sa_session.flush() return stored, missing_tool_tups
def extract_steps(trans, history=None, job_ids=None, dataset_ids=None, dataset_collection_ids=None): # Ensure job_ids and dataset_ids are lists (possibly empty) if job_ids is None: job_ids = [] elif type(job_ids) is not list: job_ids = [job_ids] if dataset_ids is None: dataset_ids = [] elif type(dataset_ids) is not list: dataset_ids = [dataset_ids] if dataset_collection_ids is None: dataset_collection_ids = [] elif type(dataset_collection_ids) is not list: dataset_collection_ids = [dataset_collection_ids] # Convert both sets of ids to integers job_ids = [int(id) for id in job_ids] dataset_ids = [int(id) for id in dataset_ids] dataset_collection_ids = [int(id) for id in dataset_collection_ids] # Find each job, for security we (implicately) check that they are # associated witha job in the current history. summary = WorkflowSummary(trans, history) jobs = summary.jobs jobs_by_id = dict((job.id, job) for job in jobs.keys()) steps = [] steps_by_job_id = {} hid_to_output_pair = {} # Input dataset steps for hid in dataset_ids: step = model.WorkflowStep() step.type = 'data_input' step.tool_inputs = dict(name="Input Dataset") hid_to_output_pair[hid] = (step, 'output') steps.append(step) for hid in dataset_collection_ids: step = model.WorkflowStep() step.type = 'data_collection_input' if hid not in summary.collection_types: raise exceptions.RequestParameterInvalidException( "hid %s does not appear to be a collection" % hid) collection_type = summary.collection_types[hid] step.tool_inputs = dict(name="Input Dataset Collection", collection_type=collection_type) hid_to_output_pair[hid] = (step, 'output') steps.append(step) # Tool steps for job_id in job_ids: if job_id not in jobs_by_id: log.warn("job_id %s not found in jobs_by_id %s" % (job_id, jobs_by_id)) raise AssertionError( "Attempt to create workflow with job not connected to current history" ) job = jobs_by_id[job_id] tool_inputs, associations = step_inputs(trans, job) step = model.WorkflowStep() step.type = 'tool' step.tool_id = job.tool_id step.tool_inputs = tool_inputs # NOTE: We shouldn't need to do two passes here since only # an earlier job can be used as an input to a later # job. for other_hid, input_name in associations: if job in summary.implicit_map_jobs: an_implicit_output_collection = jobs[job][0][1] input_collection = an_implicit_output_collection.find_implicit_input_collection( input_name) if input_collection: other_hid = input_collection.hid if other_hid in hid_to_output_pair: other_step, other_name = hid_to_output_pair[other_hid] conn = model.WorkflowStepConnection() conn.input_step = step conn.input_name = input_name # Should always be connected to an earlier step conn.output_step = other_step conn.output_name = other_name steps.append(step) steps_by_job_id[job_id] = step # Store created dataset hids for assoc in job.output_datasets: if job in summary.implicit_map_jobs: hid = None for implicit_pair in jobs[job]: query_assoc_name, dataset_collection = implicit_pair if query_assoc_name == assoc.name: hid = dataset_collection.hid if hid is None: log.warn("Failed to find matching implicit job.") raise Exception("Failed to extract job.") else: hid = assoc.dataset.hid hid_to_output_pair[hid] = (step, assoc.name) return steps