예제 #1
0
 def __init__(self,
              name,
              namespace=None,
              os='linux',
              arch='x86_64',
              installed=True,
              version=None,
              container=None):
     self.logical_name = name + "_ID%s" % str(Executable.id)
     Executable.id += 1
     self.namespace = namespace
     self.version = version
     if container:
         self._dax_executable = dax.Executable(self.logical_name,
                                               namespace=self.namespace,
                                               version=version,
                                               os=os,
                                               arch=arch,
                                               installed=installed,
                                               container=container)
     else:
         self._dax_executable = dax.Executable(self.logical_name,
                                               namespace=self.namespace,
                                               version=version,
                                               os=os,
                                               arch=arch,
                                               installed=installed)
     self.in_workflow = False
     self.pfns = {}
예제 #2
0
def main():
    args = parse_args()
    setup_logger(args.debug)

    # TODO: handle execeptions for bad file paths
    workflow_file_path = args.cwl_workflow_file_path
    workflow_file_dir = os.path.dirname(workflow_file_path)

    log.info("Loading {}".format(workflow_file_path))
    workflow = cwl.load_document(workflow_file_path)

    adag = dax.ADAG("dag-generated-from-cwl", auto=True)
    rc = ReplicaCatalog()
    tc = TransformationCatalog(workflow_file_dir)

    # process initial input file(s)
    # TODO: need to account for the different fields for a file class
    # TODO: log warning for the fields that we are skipping
    workflow_input_strings = dict()
    workflow_files = dict()

    log.info("Collecting inputs in {}".format(args.input_file_spec_path))
    with open(args.input_file_spec_path, "r") as yaml_file:
        input_file_specs = load(yaml_file, Loader=Loader)

        for input in workflow.inputs:
            input_type = input.type

            if input_type == "File":
                workflow_files[get_basename(input.id)] = get_basename(input.id)
                # TODO: account for non-local sites
                rc.add_item(get_basename(input.id),
                            input_file_specs[get_basename(input.id)]["path"],
                            "local")
            elif input_type == "string":
                workflow_input_strings[get_basename(input.id)] = \
                                        input_file_specs[get_basename(input.id)]
            elif isinstance(input_type, cwl.InputArraySchema):
                if input_type.items == "File":
                    # TODO: account for workflow inputs of type File[]
                    pass
                elif input_type.items == "string":
                    workflow_input_strings[get_basename(input.id)] = \
                                        input_file_specs[get_basename(input.id)]

    log.info("Collecting output files")
    for step in workflow.steps:
        cwl_command_line_tool = cwl.load_document(step.run) if isinstance(step.run, str) \
                                                                    else step.run

        for output in cwl_command_line_tool.outputs:
            # TODO: account for outputs that are not files
            output_name = get_name(step.id, output.id)

            log.debug("Adding (key: {0}, value: {1}) to workflow_files".format(
                output_name, output.outputBinding.glob))

            # TODO: throw error when glob contains javascript expression
            #       or pattern as we cannot support anything that is dynamic
            workflow_files[output_name] = output.outputBinding.glob

    log.info("Building workflow steps into dax jobs")
    for step in workflow.steps:
        # convert cwl:CommandLineTool -> pegasus:Executable
        cwl_command_line_tool = cwl.load_document(step.run) if isinstance(step.run, str) \
                                                                    else step.run

        executable_name = os.path.basename(cwl_command_line_tool.baseCommand) if \
            os.path.isabs(cwl_command_line_tool.baseCommand) else cwl_command_line_tool.baseCommand

        dax_executable = dax.Executable(executable_name)

        # add executable to transformation catalog
        tc.add_item(executable_name, cwl_command_line_tool.baseCommand)

        # create job with executable
        dax_job = dax.Job(dax_executable)

        step_inputs = dict()
        for input in step.in_:
            input_id = get_basename(input.id)
            if isinstance(input.source, str):
                step_inputs[input_id] = get_basename(input.source)
            elif isinstance(input.source, list):
                step_inputs[input_id] = [
                    get_basename(file) for file in input.source
                ]

        # add input uses to job
        for input in cwl_command_line_tool.inputs:
            if input.type == "File":
                file_id = step_inputs[get_name(step.id, input.id)]
                file = dax.File(workflow_files[file_id])
                log.debug("Adding link ({0} -> {1})".format(
                    file_id, dax_job.name))

                dax_job.uses(file, link=dax.Link.INPUT)

            # TODO: better type checking for string[] and File[] ?
            elif isinstance(input.type, cwl.CommandInputArraySchema):
                if input.type.items == "File":
                    file_ids = step_inputs[get_name(step.id, input.id)]
                    for file_id in file_ids:
                        file = dax.File(workflow_files[file_id])
                        log.debug("Adding link ({0} -> {1})".format(
                            file_id, dax_job.name))

                        dax_job.uses(file, link=dax.Link.INPUT)

        # add output uses to job
        # TODO: ensure that these are of type File or File[]
        for output in step.out:
            file_id = get_basename(output)
            file = dax.File(workflow_files[file_id])
            log.debug("Adding link ({0} -> {1})".format(dax_job.name, file_id))

            dax_job.uses(file,
                         link=dax.Link.OUTPUT,
                         transfer=True,
                         register=True)

        # add arguments to job
        # TODO: place argument building up in a function
        dax_job_args = cwl_command_line_tool.arguments if \
            cwl_command_line_tool.arguments is not None else []

        # process cwl inputBindings if they exist and build up job argument list
        cwl_command_line_tool_inputs = sorted(cwl_command_line_tool.inputs,
            key=lambda input : input.inputBinding.position if input.inputBinding.position \
                is not None else 0 )

        for input in cwl_command_line_tool_inputs:
            # process args
            if input.inputBinding is not None:
                # TODO: account for inputBinding separation
                if input.inputBinding.prefix is not None:
                    dax_job_args.append(input.inputBinding.prefix)

                if input.type == "File":
                    dax_job_args.append(
                        dax.File(workflow_files[step_inputs[get_name(
                            step.id, input.id)]]))

                if input.type == "string":
                    dax_job_args.append(
                        workflow_input_strings[step_inputs[get_name(
                            step.id, input.id)]])

                # handle array type inputs
                if isinstance(input.type, cwl.CommandInputArraySchema):
                    if input.type.items == "File":
                        for file in step_inputs[get_name(step.id, input.id)]:
                            dax_job_args.append(dax.File(workflow_files[file]))
                    elif input.type.items == "string":
                        input_string_arr_id = step_inputs[get_name(
                            step.id, input.id)]

                        separator = " " if input.inputBinding.itemSeparator is None \
                                        else input.inputBinding.itemSeparator

                        dax_job_args.append(
                            # TODO: currently only accounting for input strings that
                            #       are inputs to the entire workflow
                            separator.join(
                                workflow_input_strings[input_string_arr_id]))

        log.debug("Adding job: {0}, with args: {1}".format(
            dax_job.name, dax_job_args))
        dax_job.addArguments(*dax_job_args)

        # add job to DAG
        adag.addJob(dax_job)

    rc.write_catalog("rc.txt")
    tc.write_catalog("tc.txt")

    with open(args.output_file_path, "w") as f:
        log.info("Writing DAX to {}".format(args.output_file_path))
        adag.writeXML(f)