Ejemplo n.º 1
0
def createWorkflowStep(workflow,
                       position,
                       id,
                       type,
                       language="KNIME",
                       extension=None):

    file_binding = cwlgen.CommandLineBinding()

    # Individual step input

    workflow_step = cwlgen.WorkflowStep(str(position), id + ".cwl")
    workflow_step.inputs.append(
        cwlgen.WorkflowStepInput("inputModule", "inputModule" + str(position)))

    if (not "external" in type):
        if (position == 1):
            workflow_step.inputs.append(
                cwlgen.WorkflowStepInput("potentialCases", "potentialCases"))
        else:
            workflow_step.inputs.append(
                cwlgen.WorkflowStepInput("potentialCases",
                                         source=str(position - 1) + "/output"))

    # Individual step output

    workflow_step.out.append(cwlgen.WorkflowStepOutput("output"))
    workflow.steps = workflow.steps + [workflow_step]

    # Overall workflow input

    if (position == 1 and (not "external" in type)):
        workflow_input = cwlgen.InputParameter(
            "potentialCases",
            param_type='File',
            input_binding=file_binding,
            doc="Input of potential cases for processing")
        workflow.inputs.append(workflow_input)

    workflow_input = cwlgen.InputParameter(
        "inputModule" + str(position),
        param_type='File',
        input_binding=file_binding,
        doc=language[0].upper() + language[1:] + " implementation unit")
    workflow.inputs.append(workflow_input)

    # Overall workflow output

    if (extension):
        workflow_output = cwlgen.WorkflowOutputParameter(
            param_id='cases',
            param_type="File",
            output_source=str(position) + "/output",
            output_binding=cwlgen.CommandOutputBinding(glob="*." + extension))
        workflow.outputs.append(workflow_output)

    return workflow
Ejemplo n.º 2
0
    cwl_tool.inputs.append(no_of_cores)

    
    # Path to fetch outputs from
    output = cwlgen.workflow.WorkflowOutputParameter('output_file',
                                           param_type='File[]',
                                           output_source='west_tool/west_output_file',
                                           label='Output File generated with west code')
    cwl_tool.outputs.append(output)

    # Toggle between pw and wstat for pw.in and wstat.in
    workflow_west = cwlgen.workflow.WorkflowStep('west_tool',
                                               'west.cwl',
                                               doc='runs west.cwl',
                                               scatter=['script_type','input_file'],
                                               scatter_method='dotproduct')
    # Add workflow step inputs
    workflow_west_script_file = cwlgen.WorkflowStepInput('script_file','script_file')
    workflow_west_input_file = cwlgen.WorkflowStepInput('input_file','input_file_array')
    workflow_west_URLs = cwlgen.WorkflowStepInput('URLs','URLs')
    workflow_west_script_type = cwlgen.WorkflowStepInput('script_type','script_type_array')
    workflow_west_cores =  cwlgen.WorkflowStepInput('no_of_cores','no_of_cores')
    workflow_west.inputs.extend((workflow_west_script_file,workflow_west_input_file,workflow_west_URLs,workflow_west_script_type,workflow_west_cores))
    
    workflow_west_output = cwlgen.WorkflowStepOutput('west_output_file')
    workflow_west.out.append(workflow_west_output)
    #Add first step
    cwl_tool.steps.append(workflow_west)
    #Export cwl 
    cwl_tool.export("west_workflow.cwl")
Ejemplo n.º 3
0
    def _convert_composite(run, tmpdir, basedir, filename=None):
        """Converts a workflow made up of several steps."""
        inputs = {}
        arguments = {}
        outputs = {}
        consumed_outputs = set()
        steps = []

        input_index = 1
        argument_index = 1

        subprocesses, _ = _recurse_subprocesses(run, 1)

        # preprocess to add dummy outputs in case of output directories
        previous_output_dirs = defaultdict(list)
        for _, subprocess in subprocesses:
            for input in subprocess.inputs:
                entity = input.consumes
                key = (entity.commit.hexsha, entity.path)
                if key not in previous_output_dirs:
                    continue

                for previous_process in previous_output_dirs[key]:
                    previous_process.outputs.append(
                        CommandOutput(produces=entity, create_folder=False))

            for output in subprocess.outputs:
                entity = output.produces
                if not isinstance(entity, Collection):
                    continue

                for e in entity.entities:
                    if e.commit.hexsha != entity.commit.hexsha:
                        continue

                    key = (e.commit.hexsha, e.path)
                    previous_output_dirs[key].append(subprocess)

        # Convert workflow steps
        for i, subprocess in subprocesses:
            tool, path = CWLConverter._convert_step(subprocess, tmpdir,
                                                    basedir)
            step = WorkflowStep('step_{}'.format(i), path)

            for input in subprocess.inputs:
                input_path = input.consumes.path

                sanitized_id = input.sanitized_id
                if input.mapped_to:
                    sanitized_id = 'input_stdin'
                if input_path in inputs:
                    # already used as top-level input elsewhere, reuse
                    step.inputs.append(
                        cwlgen.WorkflowStepInput(input.sanitized_id,
                                                 source=inputs[input_path]))
                elif input_path in outputs:
                    # output of a previous step, refer to it
                    consumed_outputs.add(outputs[input_path][0])
                    step.inputs.append(
                        cwlgen.WorkflowStepInput(input.sanitized_id,
                                                 source='{}/{}'.format(
                                                     outputs[input_path][1],
                                                     outputs[input_path][0])))
                else:
                    # input isn't output and doesn't exist yet, add new
                    inputs[input_path] = 'input_{}'.format(input_index)
                    step.inputs.append(
                        cwlgen.WorkflowStepInput(input.sanitized_id,
                                                 source=inputs[input_path]))
                    input_index += 1

            for argument in subprocess.arguments:
                argument_id = 'argument_{}'.format(argument_index)
                arguments[argument_id] = argument.value
                step.inputs.append(
                    cwlgen.WorkflowStepInput(argument.sanitized_id,
                                             source=argument_id))
                argument_index += 1

            for output in subprocess.outputs:
                sanitized_id = output.sanitized_id

                if output.mapped_to:
                    sanitized_id = 'output_{}'.format(
                        output.mapped_to.stream_type)
                outputs[output.produces.path] = (sanitized_id, step.id)
                step.out.append(cwlgen.WorkflowStepOutput(sanitized_id))

            steps.append(step)

        workflow_object = cwlgen.Workflow(str(uuid4()), cwl_version='v1.0')
        workflow_object.hints = []
        workflow_object.requirements = []

        # check types of paths and add as top level inputs/outputs
        for path, id_ in inputs.items():
            type_ = 'Directory' if os.path.isdir(path) else 'File'
            workflow_object.inputs.append(
                cwlgen.InputParameter(id_,
                                      param_type=type_,
                                      default={
                                          'path':
                                          os.path.abspath(
                                              os.path.join(basedir, path)),
                                          'class':
                                          type_
                                      }))

        for id_, value in arguments.items():
            value, type_ = _get_argument_type(value)
            workflow_object.inputs.append(
                cwlgen.InputParameter(id_, param_type=type_, default=value))

        for index, (path, (id_, step_id)) in enumerate(outputs.items(), 1):
            type_ = 'Directory' if os.path.isdir(path) else 'File'
            workflow_object.outputs.append(
                cwlgen.WorkflowOutputParameter('output_{}'.format(index),
                                               output_source='{}/{}'.format(
                                                   step_id, id_),
                                               param_type=type_))
        workflow_object.steps.extend(steps)
        if not filename:
            filename = 'parent_{}.cwl'.format(uuid4())
        path = os.path.join(tmpdir, filename)
        workflow_object.export(path)

        return workflow_object, path
Ejemplo n.º 4
0
input = cwlgen.InputParameter('message_array', param_type='string[]')
cwl_workflow.inputs.append(input)

# add outputs
output = cwlgen.WorkflowOutputParameter('output',
                                        output_source='cat_2/output',
                                        param_type='File')
cwl_workflow.outputs.append(output)

# add step 1 (echo)
step1 = cwlgen.WorkflowStep('echo', run='1st-tool.cwl', scatter='message')

#print(dir(step1))

# add inputs to step1
step1_input = cwlgen.WorkflowStepInput('message', source='message_array')
step1.inputs.append(step1_input)

# add outputs to step1
step1_output = cwlgen.WorkflowStepOutput('echo_out')
step1.out.append(step1_output)

# add step 2 (cat)
step2 = cwlgen.WorkflowStep('cat', run='./cat.cwl')

# add inputs to step2
step2_input = cwlgen.WorkflowStepInput('files', source='echo/echo_out')
step2.inputs.append(step2_input)

# add outputs to step2
step2_output = cwlgen.WorkflowStepOutput('output')
Ejemplo n.º 5
0
def main():
    workflowName = sys.argv[1]
    listOfNames = sys.argv[2:]

    # Get information about workflow from the user
    questions = [
        inquirer.Text(
            'label',
            message=
            "Please provide some information about the workflow you are creating"
        )
    ]
    answers = inquirer.prompt(questions)

    # Initialize the tool we want to build
    cwl_tool = cwlgen.Workflow(workflow_id=os.path.splitext(workflowName)[0],
                               label=answers['label'],
                               cwl_version='v1.1')

    # Parse CLT tools which were provided on the command line to get a list of inputs and outputs
    CLT_Inputs = {}
    CLT_Outputs = {}
    for i in listOfNames:
        with open(os.path.abspath(i), 'r') as cwl_file:
            cwl_dict = yaml.safe_load(cwl_file)
            try:
                if not isinstance(cwl_dict.get('inputs'),
                                  dict) or not isinstance(
                                      cwl_dict.get('outputs'), dict):
                    print(
                        "Your CWL files are not all of the same format. Please use ToolJig to make sure they all"
                        " have the same format.")
                    sys.exit()
                else:
                    CLT_Inputs[i] = cwl_dict['inputs']
                    CLT_Outputs[i] = cwl_dict['outputs']
            except AttributeError:
                pass

    # Declare first step of our Workflow (cwl_tool)
    step = cwlgen.workflow.WorkflowStep(step_id=os.path.splitext(
        listOfNames[0])[0],
                                        run=listOfNames[0])

    # Parse the inputs of the first file to save as Workflow inputs
    workflowInputs = []
    for item in CLT_Inputs[listOfNames[0]]:
        input_Info = cwlgen.workflow.InputParameter(
            param_id=item,
            label=CLT_Inputs[listOfNames[0]][item].get('label'),
            doc=CLT_Inputs[listOfNames[0]][item].get('doc'),
            param_type=CLT_Inputs[listOfNames[0]][item].get('type'))
        cwl_tool.inputs.append(input_Info)
        idToShow = {
            'ID': item,
            'Label': CLT_Inputs[listOfNames[0]][item].get('label'),
            'Type': CLT_Inputs[listOfNames[0]][item].get('type')
        }
        workflowInputs.append(idToShow)
        step_inputs = cwlgen.WorkflowStepInput(input_id=item, source=item)
        step.inputs.append(step_inputs)

    # Get outputs of first step and append it to the whole workflow
    for y in CLT_Outputs[listOfNames[0]]:
        step_outputs = cwlgen.WorkflowStepOutput(output_id=y)
        step.out.append(step_outputs)
    cwl_tool.steps.append(step)

    # LARGE LOOP: Make the steps and designate how inputs and outputs fit together -------------------------------------
    for i in range(0, len(listOfNames)):
        # Get outputs from "i" step that are of the type Directory or File
        prevStepOutputs = CLT_Outputs[listOfNames[i]]
        importantOutputs = []
        for j in prevStepOutputs:
            idToAdd = {'id': j}
            idToAdd.update(prevStepOutputs[j])
            importantOutputs.append(idToAdd)

        # Get inputs from the "i+1" step that are of type Directory or File
        nextInputs = []
        importantInputs = []
        try:
            nextInputs = CLT_Inputs[listOfNames[i + 1]]
            step = cwlgen.workflow.WorkflowStep(step_id=os.path.splitext(
                listOfNames[i + 1])[0],
                                                run=listOfNames[i + 1])
        except:
            # This is at the end, when the last outputs are workflow outputs, designate them as such
            for x in importantOutputs:
                output = cwlgen.workflow.WorkflowOutputParameter(
                    param_id=x.get('id'),
                    doc=x.get('doc'),
                    param_type=x.get('doc'),
                    output_source=os.path.splitext(listOfNames[i])[0])
                cwl_tool.outputs.append(output)

        for k in nextInputs:
            if nextInputs[k]['type'] == 'File' or nextInputs[k][
                    'type'] == 'Directory':
                idToAdd = {'id': k}
                idToAdd.update(nextInputs[k])
                importantInputs.append(idToAdd)

        # Logic for matching inputs and outputs
        if len(importantInputs) == len(importantOutputs) and len(
                importantInputs) == 1:
            step_inputs = cwlgen.WorkflowStepInput(
                input_id=importantOutputs[0].get('id'),
                source=listOfNames[i] + "/" + importantOutputs[0].get('id'))
            step.inputs.append(step_inputs)
        elif len(importantInputs) != len(importantOutputs) or len(
                importantInputs) != 1 or len(importantOutputs) != 1:
            for m in importantInputs:
                # Declare variables ----------------------------------------------
                first_index = 0
                externalInputToName = 'It is an external input that has yet to be referenced'
                previousOutput = 'It is the output of the workflow, but not the most recently previous step'

                # Provide options ----------------------------------------------
                print("Input ",
                      importantInputs.index(m) + 1, "/", len(importantInputs),
                      "of Command Line File ", i + 1, "/", len(listOfNames))
                print(
                    "Your inputs and outputs don't match. Please specify where this input should be retrieved from:",
                    m)
                print("")
                options = ['It is the output of the previous step:']
                for t in importantOutputs:
                    options.append(t)
                    first_index = first_index + 1
                if cwl_tool.inputs:
                    options.append(
                        'It is an external input that already exists:')
                    for y in workflowInputs:
                        options.append(y)
                    captions = [
                        0, first_index + 1,
                        first_index + len(cwl_tool.inputs) + 2
                    ]
                else:
                    captions = [0, first_index + 1
                                ]  # This gets the first line and "other"
                options.append('Other')
                options.append(externalInputToName)
                options.append(previousOutput)
                selection = options[cutie.select(options,
                                                 caption_indices=captions)]

                # Logic for selection ----------------------------------------------
                if selection == externalInputToName:
                    questions = [
                        inquirer.Text(
                            'newID',
                            message="What is the ID of the new input?"),
                        inquirer.Text(
                            'newLabel',
                            message="What is the label of the new input?")
                    ]
                    answers = inquirer.prompt(questions)
                    # add it as a master input
                    input_Info = cwlgen.workflow.InputParameter(
                        param_id=answers.get('newID'),
                        label=answers.get('newLabel'),
                        param_type=m.get('type'))
                    cwl_tool.inputs.append(input_Info)
                    idToShow = {
                        'ID': answers.get('newID'),
                        'Label': answers.get('newLabel'),
                        'Type': m.get('type')
                    }
                    workflowInputs.append(idToShow)
                    # add it as a step input
                    step_inputs = cwlgen.WorkflowStepInput(
                        input_id=answers.get('newID'),
                        source=answers.get('newID'))
                elif selection == previousOutput:
                    print(
                        "\nPlease select which previous output corresponds to your input:"
                    )
                    listOfAllOutputs = []
                    for o in range(0, i + 1):
                        for output in CLT_Outputs.get(listOfNames[o]):
                            toAdd = {'ID': output, 'From step': listOfNames[o]}
                            toAdd.update(
                                CLT_Outputs.get(listOfNames[o])[output])
                            listOfAllOutputs.append(toAdd)
                    selection = listOfAllOutputs[cutie.select(
                        listOfAllOutputs)]
                    step_inputs = cwlgen.WorkflowStepInput(
                        input_id=selection['ID'],
                        source=selection['From step'] + "/" + selection['ID'])
                elif selection in workflowInputs:
                    print(selection)
                    step_inputs = cwlgen.WorkflowStepInput(
                        input_id=selection.get('ID'),
                        source=selection.get('ID'))
                else:
                    step_inputs = cwlgen.WorkflowStepInput(
                        input_id=m.get('id'),
                        source=listOfNames[i] + "/" + selection.get('id'))
                step.inputs.append(step_inputs)

        try:
            for y in CLT_Outputs[listOfNames[i + 1]]:
                step_outputs = cwlgen.WorkflowStepOutput(output_id=y)
                step.out.append(step_outputs)
        except:
            pass
        cwl_tool.steps.append(step)

    cwl_tool.export(workflowName)
Ejemplo n.º 6
0
def translate_step(
    step: StepNode,
    is_nested_tool=False,
    resource_overrides=Dict[str, str],
    use_run_ref=True,
):

    tool = step.tool
    if use_run_ref:
        run_ref = ("{tool}.cwl" if is_nested_tool else
                   "tools/{tool}.cwl").format(tool=tool.id())
    else:
        from janis_core.workflow.workflow import Workflow

        has_resources_overrides = len(resource_overrides) > 0
        if isinstance(tool, Workflow):
            run_ref = CwlTranslator.translate_workflow_to_all_in_one(
                tool, with_resource_overrides=has_resources_overrides)
        else:
            run_ref = CwlTranslator.translate_tool_internal(
                tool, True, with_resource_overrides=has_resources_overrides)

    cwlstep = cwlgen.WorkflowStep(
        step_id=step.id(),
        run=run_ref,
        # label=step.step.label,
        doc=step.doc,
        scatter=None,  # Filled by StepNode
        scatter_method=None,  # Filled by StepNode
    )

    cwlstep.out = [
        cwlgen.WorkflowStepOutput(output_id=o.tag)
        for o in step.tool.outputs()
    ]

    ins = step.inputs()

    for k in ins:
        inp = ins[k]
        if k not in step.sources:
            if inp.input_type.optional or inp.default:
                continue
            else:
                raise Exception(
                    f"Error when building connections for cwlstep '{step.id()}', "
                    f"could not find required connection: '{k}'")

        edge = step.sources[k]
        ss = edge.slashed_source()
        link_merge = None

        if (ss is not None and not isinstance(ss, list)
                and isinstance(inp.input_type, Array)):
            start = edge.source().start
            outssval = start.outputs()
            source_type = (first_value(outssval) if len(outssval) == 1 else
                           outssval[edge.source().stag]).output_type
            # has scattered = isinstance(start, StepNode) and start.scatter
            if not isinstance(source_type, Array) and not (isinstance(
                    start, StepNode) and start.scatter):
                ss = [ss]
                link_merge = "merge_nested"

        d = cwlgen.WorkflowStepInput(
            input_id=inp.tag,
            source=ss,
            link_merge=
            link_merge,  # this will need to change when edges have multiple source_map
            value_from=None,
        )

        cwlstep.inputs.append(d)

    for r in resource_overrides:
        cwlstep.inputs.append(
            cwlgen.WorkflowStepInput(input_id=r, source=resource_overrides[r]))

    if step.scatter:
        if len(step.scatter.fields) > 1:
            Logger.info(
                "Discovered more than one scatterable field on cwlstep '{step_id}', "
                "deciding scatterMethod to be '{method}".format(
                    step_id=step.id(), method=step.scatter.method))
            cwlstep.scatterMethod = step.scatter.method.cwl()
        cwlstep.scatter = step.scatter.fields

    return cwlstep
Ejemplo n.º 7
0
def create_workflow(drops, cwl_filename, buffer):
    """
    Create a CWL workflow from a given Physical Graph Template

    A CWL workflow consists of multiple files. A single file describing the
    workflow, and multiple files each describing one step in the workflow. All
    the files are combined into one zip file, so that a single file can be
    downloaded by the user.

    NOTE: CWL only supports workflow steps that are bash shell applications
          Non-BashShellApp nodes are unable to be implemented in CWL
    """

    # search the drops for non-BashShellApp drops,
    # if found, the graph cannot be translated into CWL
    for index, node in enumerate(drops):
        dataType = node.get('dt', '')
        if dataType not in SUPPORTED_CATEGORIES:
            raise Exception('Node {0} has an unsupported category: {1}'.format(
                index, dataType))

    # create list for command line tool description files
    step_files = []

    # create the workflow
    cwl_workflow = cwlgen.Workflow('', label='', doc='', cwl_version='v1.0')

    # create files dictionary
    files = {}

    # look for input and output files in the pg_spec
    for index, node in enumerate(drops):
        command = node.get('command', None)
        dataType = node.get('dt', None)
        outputId = node.get('oid', None)
        outputs = node.get('outputs', [])

        if len(outputs) > 0:
            files[outputs[0]] = "step" + str(index) + "/output_file_0"

    # add steps to the workflow
    for index, node in enumerate(drops):
        dataType = node.get('dt', '')

        if dataType == 'BashShellApp':
            name = node.get('nm', '')
            inputs = node.get('inputs', [])
            outputs = node.get('outputs', [])

            # create command line tool description
            filename = "step" + str(index) + ".cwl"
            contents = create_command_line_tool(node)

            # add contents of command line tool description to list of step files
            step_files.append({"filename": filename, "contents": contents})

            # create step
            step = cwlgen.WorkflowStep("step" + str(index), run=filename)

            # add input to step
            for index, input in enumerate(inputs):
                step.inputs.append(
                    cwlgen.WorkflowStepInput('input_file_' + str(index),
                                             source=files[input]))

            # add output to step
            for index, output in enumerate(outputs):
                step.out.append(
                    cwlgen.WorkflowStepOutput('output_file_' + str(index)))

            # add step to workflow
            cwl_workflow.steps.append(step)

    # put workflow and command line tool description files all together in a zip
    zipObj = ZipFile(buffer, 'w')
    for step_file in step_files:
        zipObj.writestr(step_file["filename"], six.b(step_file["contents"]))
    zipObj.writestr(cwl_filename, six.b(cwl_workflow.export_string()))
    zipObj.close()