def __init__(self, name, namespace=None, os='linux', arch='x86_64', installed=True, version=None, container=None): self.logical_name = name + "_ID%s" % str(Executable.id) Executable.id += 1 self.namespace = namespace self.version = version if container: self._dax_executable = dax.Executable(self.logical_name, namespace=self.namespace, version=version, os=os, arch=arch, installed=installed, container=container) else: self._dax_executable = dax.Executable(self.logical_name, namespace=self.namespace, version=version, os=os, arch=arch, installed=installed) self.in_workflow = False self.pfns = {}
def main(): args = parse_args() setup_logger(args.debug) # TODO: handle execeptions for bad file paths workflow_file_path = args.cwl_workflow_file_path workflow_file_dir = os.path.dirname(workflow_file_path) log.info("Loading {}".format(workflow_file_path)) workflow = cwl.load_document(workflow_file_path) adag = dax.ADAG("dag-generated-from-cwl", auto=True) rc = ReplicaCatalog() tc = TransformationCatalog(workflow_file_dir) # process initial input file(s) # TODO: need to account for the different fields for a file class # TODO: log warning for the fields that we are skipping workflow_input_strings = dict() workflow_files = dict() log.info("Collecting inputs in {}".format(args.input_file_spec_path)) with open(args.input_file_spec_path, "r") as yaml_file: input_file_specs = load(yaml_file, Loader=Loader) for input in workflow.inputs: input_type = input.type if input_type == "File": workflow_files[get_basename(input.id)] = get_basename(input.id) # TODO: account for non-local sites rc.add_item(get_basename(input.id), input_file_specs[get_basename(input.id)]["path"], "local") elif input_type == "string": workflow_input_strings[get_basename(input.id)] = \ input_file_specs[get_basename(input.id)] elif isinstance(input_type, cwl.InputArraySchema): if input_type.items == "File": # TODO: account for workflow inputs of type File[] pass elif input_type.items == "string": workflow_input_strings[get_basename(input.id)] = \ input_file_specs[get_basename(input.id)] log.info("Collecting output files") for step in workflow.steps: cwl_command_line_tool = cwl.load_document(step.run) if isinstance(step.run, str) \ else step.run for output in cwl_command_line_tool.outputs: # TODO: account for outputs that are not files output_name = get_name(step.id, output.id) log.debug("Adding (key: {0}, value: {1}) to workflow_files".format( output_name, output.outputBinding.glob)) # TODO: throw error when glob contains javascript expression # or pattern as we cannot support anything that is dynamic workflow_files[output_name] = output.outputBinding.glob log.info("Building workflow steps into dax jobs") for step in workflow.steps: # convert cwl:CommandLineTool -> pegasus:Executable cwl_command_line_tool = cwl.load_document(step.run) if isinstance(step.run, str) \ else step.run executable_name = os.path.basename(cwl_command_line_tool.baseCommand) if \ os.path.isabs(cwl_command_line_tool.baseCommand) else cwl_command_line_tool.baseCommand dax_executable = dax.Executable(executable_name) # add executable to transformation catalog tc.add_item(executable_name, cwl_command_line_tool.baseCommand) # create job with executable dax_job = dax.Job(dax_executable) step_inputs = dict() for input in step.in_: input_id = get_basename(input.id) if isinstance(input.source, str): step_inputs[input_id] = get_basename(input.source) elif isinstance(input.source, list): step_inputs[input_id] = [ get_basename(file) for file in input.source ] # add input uses to job for input in cwl_command_line_tool.inputs: if input.type == "File": file_id = step_inputs[get_name(step.id, input.id)] file = dax.File(workflow_files[file_id]) log.debug("Adding link ({0} -> {1})".format( file_id, dax_job.name)) dax_job.uses(file, link=dax.Link.INPUT) # TODO: better type checking for string[] and File[] ? elif isinstance(input.type, cwl.CommandInputArraySchema): if input.type.items == "File": file_ids = step_inputs[get_name(step.id, input.id)] for file_id in file_ids: file = dax.File(workflow_files[file_id]) log.debug("Adding link ({0} -> {1})".format( file_id, dax_job.name)) dax_job.uses(file, link=dax.Link.INPUT) # add output uses to job # TODO: ensure that these are of type File or File[] for output in step.out: file_id = get_basename(output) file = dax.File(workflow_files[file_id]) log.debug("Adding link ({0} -> {1})".format(dax_job.name, file_id)) dax_job.uses(file, link=dax.Link.OUTPUT, transfer=True, register=True) # add arguments to job # TODO: place argument building up in a function dax_job_args = cwl_command_line_tool.arguments if \ cwl_command_line_tool.arguments is not None else [] # process cwl inputBindings if they exist and build up job argument list cwl_command_line_tool_inputs = sorted(cwl_command_line_tool.inputs, key=lambda input : input.inputBinding.position if input.inputBinding.position \ is not None else 0 ) for input in cwl_command_line_tool_inputs: # process args if input.inputBinding is not None: # TODO: account for inputBinding separation if input.inputBinding.prefix is not None: dax_job_args.append(input.inputBinding.prefix) if input.type == "File": dax_job_args.append( dax.File(workflow_files[step_inputs[get_name( step.id, input.id)]])) if input.type == "string": dax_job_args.append( workflow_input_strings[step_inputs[get_name( step.id, input.id)]]) # handle array type inputs if isinstance(input.type, cwl.CommandInputArraySchema): if input.type.items == "File": for file in step_inputs[get_name(step.id, input.id)]: dax_job_args.append(dax.File(workflow_files[file])) elif input.type.items == "string": input_string_arr_id = step_inputs[get_name( step.id, input.id)] separator = " " if input.inputBinding.itemSeparator is None \ else input.inputBinding.itemSeparator dax_job_args.append( # TODO: currently only accounting for input strings that # are inputs to the entire workflow separator.join( workflow_input_strings[input_string_arr_id])) log.debug("Adding job: {0}, with args: {1}".format( dax_job.name, dax_job_args)) dax_job.addArguments(*dax_job_args) # add job to DAG adag.addJob(dax_job) rc.write_catalog("rc.txt") tc.write_catalog("tc.txt") with open(args.output_file_path, "w") as f: log.info("Writing DAX to {}".format(args.output_file_path)) adag.writeXML(f)