def createWorkflowStep(workflow, position, id, type, language="KNIME", extension=None): file_binding = cwlgen.CommandLineBinding() # Individual step input workflow_step = cwlgen.WorkflowStep(str(position), id + ".cwl") workflow_step.inputs.append( cwlgen.WorkflowStepInput("inputModule", "inputModule" + str(position))) if (not "external" in type): if (position == 1): workflow_step.inputs.append( cwlgen.WorkflowStepInput("potentialCases", "potentialCases")) else: workflow_step.inputs.append( cwlgen.WorkflowStepInput("potentialCases", source=str(position - 1) + "/output")) # Individual step output workflow_step.out.append(cwlgen.WorkflowStepOutput("output")) workflow.steps = workflow.steps + [workflow_step] # Overall workflow input if (position == 1 and (not "external" in type)): workflow_input = cwlgen.InputParameter( "potentialCases", param_type='File', input_binding=file_binding, doc="Input of potential cases for processing") workflow.inputs.append(workflow_input) workflow_input = cwlgen.InputParameter( "inputModule" + str(position), param_type='File', input_binding=file_binding, doc=language[0].upper() + language[1:] + " implementation unit") workflow.inputs.append(workflow_input) # Overall workflow output if (extension): workflow_output = cwlgen.WorkflowOutputParameter( param_id='cases', param_type="File", output_source=str(position) + "/output", output_binding=cwlgen.CommandOutputBinding(glob="*." + extension)) workflow.outputs.append(workflow_output) return workflow
cwl_tool.inputs.append(no_of_cores) # Path to fetch outputs from output = cwlgen.workflow.WorkflowOutputParameter('output_file', param_type='File[]', output_source='west_tool/west_output_file', label='Output File generated with west code') cwl_tool.outputs.append(output) # Toggle between pw and wstat for pw.in and wstat.in workflow_west = cwlgen.workflow.WorkflowStep('west_tool', 'west.cwl', doc='runs west.cwl', scatter=['script_type','input_file'], scatter_method='dotproduct') # Add workflow step inputs workflow_west_script_file = cwlgen.WorkflowStepInput('script_file','script_file') workflow_west_input_file = cwlgen.WorkflowStepInput('input_file','input_file_array') workflow_west_URLs = cwlgen.WorkflowStepInput('URLs','URLs') workflow_west_script_type = cwlgen.WorkflowStepInput('script_type','script_type_array') workflow_west_cores = cwlgen.WorkflowStepInput('no_of_cores','no_of_cores') workflow_west.inputs.extend((workflow_west_script_file,workflow_west_input_file,workflow_west_URLs,workflow_west_script_type,workflow_west_cores)) workflow_west_output = cwlgen.WorkflowStepOutput('west_output_file') workflow_west.out.append(workflow_west_output) #Add first step cwl_tool.steps.append(workflow_west) #Export cwl cwl_tool.export("west_workflow.cwl")
def _convert_composite(run, tmpdir, basedir, filename=None): """Converts a workflow made up of several steps.""" inputs = {} arguments = {} outputs = {} consumed_outputs = set() steps = [] input_index = 1 argument_index = 1 subprocesses, _ = _recurse_subprocesses(run, 1) # preprocess to add dummy outputs in case of output directories previous_output_dirs = defaultdict(list) for _, subprocess in subprocesses: for input in subprocess.inputs: entity = input.consumes key = (entity.commit.hexsha, entity.path) if key not in previous_output_dirs: continue for previous_process in previous_output_dirs[key]: previous_process.outputs.append( CommandOutput(produces=entity, create_folder=False)) for output in subprocess.outputs: entity = output.produces if not isinstance(entity, Collection): continue for e in entity.entities: if e.commit.hexsha != entity.commit.hexsha: continue key = (e.commit.hexsha, e.path) previous_output_dirs[key].append(subprocess) # Convert workflow steps for i, subprocess in subprocesses: tool, path = CWLConverter._convert_step(subprocess, tmpdir, basedir) step = WorkflowStep('step_{}'.format(i), path) for input in subprocess.inputs: input_path = input.consumes.path sanitized_id = input.sanitized_id if input.mapped_to: sanitized_id = 'input_stdin' if input_path in inputs: # already used as top-level input elsewhere, reuse step.inputs.append( cwlgen.WorkflowStepInput(input.sanitized_id, source=inputs[input_path])) elif input_path in outputs: # output of a previous step, refer to it consumed_outputs.add(outputs[input_path][0]) step.inputs.append( cwlgen.WorkflowStepInput(input.sanitized_id, source='{}/{}'.format( outputs[input_path][1], outputs[input_path][0]))) else: # input isn't output and doesn't exist yet, add new inputs[input_path] = 'input_{}'.format(input_index) step.inputs.append( cwlgen.WorkflowStepInput(input.sanitized_id, source=inputs[input_path])) input_index += 1 for argument in subprocess.arguments: argument_id = 'argument_{}'.format(argument_index) arguments[argument_id] = argument.value step.inputs.append( cwlgen.WorkflowStepInput(argument.sanitized_id, source=argument_id)) argument_index += 1 for output in subprocess.outputs: sanitized_id = output.sanitized_id if output.mapped_to: sanitized_id = 'output_{}'.format( output.mapped_to.stream_type) outputs[output.produces.path] = (sanitized_id, step.id) step.out.append(cwlgen.WorkflowStepOutput(sanitized_id)) steps.append(step) workflow_object = cwlgen.Workflow(str(uuid4()), cwl_version='v1.0') workflow_object.hints = [] workflow_object.requirements = [] # check types of paths and add as top level inputs/outputs for path, id_ in inputs.items(): type_ = 'Directory' if os.path.isdir(path) else 'File' workflow_object.inputs.append( cwlgen.InputParameter(id_, param_type=type_, default={ 'path': os.path.abspath( os.path.join(basedir, path)), 'class': type_ })) for id_, value in arguments.items(): value, type_ = _get_argument_type(value) workflow_object.inputs.append( cwlgen.InputParameter(id_, param_type=type_, default=value)) for index, (path, (id_, step_id)) in enumerate(outputs.items(), 1): type_ = 'Directory' if os.path.isdir(path) else 'File' workflow_object.outputs.append( cwlgen.WorkflowOutputParameter('output_{}'.format(index), output_source='{}/{}'.format( step_id, id_), param_type=type_)) workflow_object.steps.extend(steps) if not filename: filename = 'parent_{}.cwl'.format(uuid4()) path = os.path.join(tmpdir, filename) workflow_object.export(path) return workflow_object, path
input = cwlgen.InputParameter('message_array', param_type='string[]') cwl_workflow.inputs.append(input) # add outputs output = cwlgen.WorkflowOutputParameter('output', output_source='cat_2/output', param_type='File') cwl_workflow.outputs.append(output) # add step 1 (echo) step1 = cwlgen.WorkflowStep('echo', run='1st-tool.cwl', scatter='message') #print(dir(step1)) # add inputs to step1 step1_input = cwlgen.WorkflowStepInput('message', source='message_array') step1.inputs.append(step1_input) # add outputs to step1 step1_output = cwlgen.WorkflowStepOutput('echo_out') step1.out.append(step1_output) # add step 2 (cat) step2 = cwlgen.WorkflowStep('cat', run='./cat.cwl') # add inputs to step2 step2_input = cwlgen.WorkflowStepInput('files', source='echo/echo_out') step2.inputs.append(step2_input) # add outputs to step2 step2_output = cwlgen.WorkflowStepOutput('output')
def main(): workflowName = sys.argv[1] listOfNames = sys.argv[2:] # Get information about workflow from the user questions = [ inquirer.Text( 'label', message= "Please provide some information about the workflow you are creating" ) ] answers = inquirer.prompt(questions) # Initialize the tool we want to build cwl_tool = cwlgen.Workflow(workflow_id=os.path.splitext(workflowName)[0], label=answers['label'], cwl_version='v1.1') # Parse CLT tools which were provided on the command line to get a list of inputs and outputs CLT_Inputs = {} CLT_Outputs = {} for i in listOfNames: with open(os.path.abspath(i), 'r') as cwl_file: cwl_dict = yaml.safe_load(cwl_file) try: if not isinstance(cwl_dict.get('inputs'), dict) or not isinstance( cwl_dict.get('outputs'), dict): print( "Your CWL files are not all of the same format. Please use ToolJig to make sure they all" " have the same format.") sys.exit() else: CLT_Inputs[i] = cwl_dict['inputs'] CLT_Outputs[i] = cwl_dict['outputs'] except AttributeError: pass # Declare first step of our Workflow (cwl_tool) step = cwlgen.workflow.WorkflowStep(step_id=os.path.splitext( listOfNames[0])[0], run=listOfNames[0]) # Parse the inputs of the first file to save as Workflow inputs workflowInputs = [] for item in CLT_Inputs[listOfNames[0]]: input_Info = cwlgen.workflow.InputParameter( param_id=item, label=CLT_Inputs[listOfNames[0]][item].get('label'), doc=CLT_Inputs[listOfNames[0]][item].get('doc'), param_type=CLT_Inputs[listOfNames[0]][item].get('type')) cwl_tool.inputs.append(input_Info) idToShow = { 'ID': item, 'Label': CLT_Inputs[listOfNames[0]][item].get('label'), 'Type': CLT_Inputs[listOfNames[0]][item].get('type') } workflowInputs.append(idToShow) step_inputs = cwlgen.WorkflowStepInput(input_id=item, source=item) step.inputs.append(step_inputs) # Get outputs of first step and append it to the whole workflow for y in CLT_Outputs[listOfNames[0]]: step_outputs = cwlgen.WorkflowStepOutput(output_id=y) step.out.append(step_outputs) cwl_tool.steps.append(step) # LARGE LOOP: Make the steps and designate how inputs and outputs fit together ------------------------------------- for i in range(0, len(listOfNames)): # Get outputs from "i" step that are of the type Directory or File prevStepOutputs = CLT_Outputs[listOfNames[i]] importantOutputs = [] for j in prevStepOutputs: idToAdd = {'id': j} idToAdd.update(prevStepOutputs[j]) importantOutputs.append(idToAdd) # Get inputs from the "i+1" step that are of type Directory or File nextInputs = [] importantInputs = [] try: nextInputs = CLT_Inputs[listOfNames[i + 1]] step = cwlgen.workflow.WorkflowStep(step_id=os.path.splitext( listOfNames[i + 1])[0], run=listOfNames[i + 1]) except: # This is at the end, when the last outputs are workflow outputs, designate them as such for x in importantOutputs: output = cwlgen.workflow.WorkflowOutputParameter( param_id=x.get('id'), doc=x.get('doc'), param_type=x.get('doc'), output_source=os.path.splitext(listOfNames[i])[0]) cwl_tool.outputs.append(output) for k in nextInputs: if nextInputs[k]['type'] == 'File' or nextInputs[k][ 'type'] == 'Directory': idToAdd = {'id': k} idToAdd.update(nextInputs[k]) importantInputs.append(idToAdd) # Logic for matching inputs and outputs if len(importantInputs) == len(importantOutputs) and len( importantInputs) == 1: step_inputs = cwlgen.WorkflowStepInput( input_id=importantOutputs[0].get('id'), source=listOfNames[i] + "/" + importantOutputs[0].get('id')) step.inputs.append(step_inputs) elif len(importantInputs) != len(importantOutputs) or len( importantInputs) != 1 or len(importantOutputs) != 1: for m in importantInputs: # Declare variables ---------------------------------------------- first_index = 0 externalInputToName = 'It is an external input that has yet to be referenced' previousOutput = 'It is the output of the workflow, but not the most recently previous step' # Provide options ---------------------------------------------- print("Input ", importantInputs.index(m) + 1, "/", len(importantInputs), "of Command Line File ", i + 1, "/", len(listOfNames)) print( "Your inputs and outputs don't match. Please specify where this input should be retrieved from:", m) print("") options = ['It is the output of the previous step:'] for t in importantOutputs: options.append(t) first_index = first_index + 1 if cwl_tool.inputs: options.append( 'It is an external input that already exists:') for y in workflowInputs: options.append(y) captions = [ 0, first_index + 1, first_index + len(cwl_tool.inputs) + 2 ] else: captions = [0, first_index + 1 ] # This gets the first line and "other" options.append('Other') options.append(externalInputToName) options.append(previousOutput) selection = options[cutie.select(options, caption_indices=captions)] # Logic for selection ---------------------------------------------- if selection == externalInputToName: questions = [ inquirer.Text( 'newID', message="What is the ID of the new input?"), inquirer.Text( 'newLabel', message="What is the label of the new input?") ] answers = inquirer.prompt(questions) # add it as a master input input_Info = cwlgen.workflow.InputParameter( param_id=answers.get('newID'), label=answers.get('newLabel'), param_type=m.get('type')) cwl_tool.inputs.append(input_Info) idToShow = { 'ID': answers.get('newID'), 'Label': answers.get('newLabel'), 'Type': m.get('type') } workflowInputs.append(idToShow) # add it as a step input step_inputs = cwlgen.WorkflowStepInput( input_id=answers.get('newID'), source=answers.get('newID')) elif selection == previousOutput: print( "\nPlease select which previous output corresponds to your input:" ) listOfAllOutputs = [] for o in range(0, i + 1): for output in CLT_Outputs.get(listOfNames[o]): toAdd = {'ID': output, 'From step': listOfNames[o]} toAdd.update( CLT_Outputs.get(listOfNames[o])[output]) listOfAllOutputs.append(toAdd) selection = listOfAllOutputs[cutie.select( listOfAllOutputs)] step_inputs = cwlgen.WorkflowStepInput( input_id=selection['ID'], source=selection['From step'] + "/" + selection['ID']) elif selection in workflowInputs: print(selection) step_inputs = cwlgen.WorkflowStepInput( input_id=selection.get('ID'), source=selection.get('ID')) else: step_inputs = cwlgen.WorkflowStepInput( input_id=m.get('id'), source=listOfNames[i] + "/" + selection.get('id')) step.inputs.append(step_inputs) try: for y in CLT_Outputs[listOfNames[i + 1]]: step_outputs = cwlgen.WorkflowStepOutput(output_id=y) step.out.append(step_outputs) except: pass cwl_tool.steps.append(step) cwl_tool.export(workflowName)
def translate_step( step: StepNode, is_nested_tool=False, resource_overrides=Dict[str, str], use_run_ref=True, ): tool = step.tool if use_run_ref: run_ref = ("{tool}.cwl" if is_nested_tool else "tools/{tool}.cwl").format(tool=tool.id()) else: from janis_core.workflow.workflow import Workflow has_resources_overrides = len(resource_overrides) > 0 if isinstance(tool, Workflow): run_ref = CwlTranslator.translate_workflow_to_all_in_one( tool, with_resource_overrides=has_resources_overrides) else: run_ref = CwlTranslator.translate_tool_internal( tool, True, with_resource_overrides=has_resources_overrides) cwlstep = cwlgen.WorkflowStep( step_id=step.id(), run=run_ref, # label=step.step.label, doc=step.doc, scatter=None, # Filled by StepNode scatter_method=None, # Filled by StepNode ) cwlstep.out = [ cwlgen.WorkflowStepOutput(output_id=o.tag) for o in step.tool.outputs() ] ins = step.inputs() for k in ins: inp = ins[k] if k not in step.sources: if inp.input_type.optional or inp.default: continue else: raise Exception( f"Error when building connections for cwlstep '{step.id()}', " f"could not find required connection: '{k}'") edge = step.sources[k] ss = edge.slashed_source() link_merge = None if (ss is not None and not isinstance(ss, list) and isinstance(inp.input_type, Array)): start = edge.source().start outssval = start.outputs() source_type = (first_value(outssval) if len(outssval) == 1 else outssval[edge.source().stag]).output_type # has scattered = isinstance(start, StepNode) and start.scatter if not isinstance(source_type, Array) and not (isinstance( start, StepNode) and start.scatter): ss = [ss] link_merge = "merge_nested" d = cwlgen.WorkflowStepInput( input_id=inp.tag, source=ss, link_merge= link_merge, # this will need to change when edges have multiple source_map value_from=None, ) cwlstep.inputs.append(d) for r in resource_overrides: cwlstep.inputs.append( cwlgen.WorkflowStepInput(input_id=r, source=resource_overrides[r])) if step.scatter: if len(step.scatter.fields) > 1: Logger.info( "Discovered more than one scatterable field on cwlstep '{step_id}', " "deciding scatterMethod to be '{method}".format( step_id=step.id(), method=step.scatter.method)) cwlstep.scatterMethod = step.scatter.method.cwl() cwlstep.scatter = step.scatter.fields return cwlstep
def create_workflow(drops, cwl_filename, buffer): """ Create a CWL workflow from a given Physical Graph Template A CWL workflow consists of multiple files. A single file describing the workflow, and multiple files each describing one step in the workflow. All the files are combined into one zip file, so that a single file can be downloaded by the user. NOTE: CWL only supports workflow steps that are bash shell applications Non-BashShellApp nodes are unable to be implemented in CWL """ # search the drops for non-BashShellApp drops, # if found, the graph cannot be translated into CWL for index, node in enumerate(drops): dataType = node.get('dt', '') if dataType not in SUPPORTED_CATEGORIES: raise Exception('Node {0} has an unsupported category: {1}'.format( index, dataType)) # create list for command line tool description files step_files = [] # create the workflow cwl_workflow = cwlgen.Workflow('', label='', doc='', cwl_version='v1.0') # create files dictionary files = {} # look for input and output files in the pg_spec for index, node in enumerate(drops): command = node.get('command', None) dataType = node.get('dt', None) outputId = node.get('oid', None) outputs = node.get('outputs', []) if len(outputs) > 0: files[outputs[0]] = "step" + str(index) + "/output_file_0" # add steps to the workflow for index, node in enumerate(drops): dataType = node.get('dt', '') if dataType == 'BashShellApp': name = node.get('nm', '') inputs = node.get('inputs', []) outputs = node.get('outputs', []) # create command line tool description filename = "step" + str(index) + ".cwl" contents = create_command_line_tool(node) # add contents of command line tool description to list of step files step_files.append({"filename": filename, "contents": contents}) # create step step = cwlgen.WorkflowStep("step" + str(index), run=filename) # add input to step for index, input in enumerate(inputs): step.inputs.append( cwlgen.WorkflowStepInput('input_file_' + str(index), source=files[input])) # add output to step for index, output in enumerate(outputs): step.out.append( cwlgen.WorkflowStepOutput('output_file_' + str(index))) # add step to workflow cwl_workflow.steps.append(step) # put workflow and command line tool description files all together in a zip zipObj = ZipFile(buffer, 'w') for step_file in step_files: zipObj.writestr(step_file["filename"], six.b(step_file["contents"])) zipObj.writestr(cwl_filename, six.b(cwl_workflow.export_string())) zipObj.close()