def test_traverse_workflow_singularity(self): loaded = parser.load_document(str(TEST_CWL.resolve())) with TemporaryDirectory() as tmpdir: for req in set(traverse(loaded)): image_puller = SingularityImagePuller(req, tmpdir) image_puller.save_docker_image()
def test_traverse_workflow(self): loaded = parser.load_document(str(TEST_CWL.resolve())) with TemporaryDirectory() as tmpdir: for req in set(traverse(loaded)): image_puller = DockerImagePuller(req, tmpdir) image_puller.save_docker_image() _ = image_puller.generate_udocker_loading_command()
def main(): args = parse_args() os.makedirs(args.dir, exist_ok=True) top = cwl.load_document(args.input) for req in set(traverse(top)): if args.singularity: image_puller = SingularityImagePuller(req, args.dir) else: image_puller = DockerImagePuller(req, args.dir) image_puller.save_docker_image()
def workflow_parse(request, repo_id, cwl_path): """ Parses a workflow and optionally a job file. Then gathers all fields from the user. """ repo = Repository.objects.get(pk=repo_id) full_cwl_path = repo.get_content(cwl_path) job = {} if 'job' in request.GET: job_path = repo.get_content(request.GET['job']) try: job = parse_job(Path(job_path), repo.path()) except Exception as e: logger.error(f"can't parse {job_path}: {e}") raise parsed_workflow = load_document(str(full_cwl_path)) if request.method == 'POST': form = CwlForm(parsed_workflow.inputs, data=request.POST, prefix=repo.path(), default_values=job) if form.is_valid(): relative_cwl = full_cwl_path.relative_to(repo.path()) workflow = Workflow(repository=repo, cwl_path=relative_cwl) workflow.save() with open(workflow.full_job_path(), mode='wt') as job: json.dump(form.back_to_cwl_job(repo.path()), job) run_workflow.delay(pk=workflow.id) return redirect('scheduler:workflow_list') else: form = CwlForm(parsed_workflow.inputs, prefix=repo.path(), default_values=job) context = { 'workflow': parsed_workflow, 'form': form, 'repo': repo, 'cwl_path': cwl_path } return render(request, 'scheduler/workflow_parse.html', context)
def validate_environment(self): """Validate environments in REANA CWL workflow.""" try: import cwl_utils.parser_v1_0 as cwl_parser from cwl_utils.docker_extract import traverse except ImportError as e: display_message( "Cannot validate environment. Please install reana-client on Python 3+ to enable environment validation for CWL workflows.", msg_type="error", indented=True, ) raise e top = cwl_parser.load_document(self.workflow_file) for image in traverse(top): self._validate_environment_image(image)
def main(): top = cwl.load_document(sys.argv[1]) traverse(top)
def get_process_from_step(step: cwl.WorkflowStep): if isinstance(step.run, str): return cwl.load_document(step.run) return step.run
def main(): args = parse_args() setup_logger(args.debug) # TODO: handle execeptions for bad file paths workflow_file_path = args.cwl_workflow_file_path workflow_file_dir = os.path.dirname(workflow_file_path) log.info("Loading {}".format(workflow_file_path)) workflow = cwl.load_document(workflow_file_path) adag = dax.ADAG("dag-generated-from-cwl", auto=True) rc = ReplicaCatalog() tc = TransformationCatalog(workflow_file_dir) # process initial input file(s) # TODO: need to account for the different fields for a file class # TODO: log warning for the fields that we are skipping workflow_input_strings = dict() workflow_files = dict() log.info("Collecting inputs in {}".format(args.input_file_spec_path)) with open(args.input_file_spec_path, "r") as yaml_file: input_file_specs = load(yaml_file, Loader=Loader) for input in workflow.inputs: input_type = input.type if input_type == "File": workflow_files[get_basename(input.id)] = get_basename(input.id) # TODO: account for non-local sites rc.add_item(get_basename(input.id), input_file_specs[get_basename(input.id)]["path"], "local") elif input_type == "string": workflow_input_strings[get_basename(input.id)] = \ input_file_specs[get_basename(input.id)] elif isinstance(input_type, cwl.InputArraySchema): if input_type.items == "File": # TODO: account for workflow inputs of type File[] pass elif input_type.items == "string": workflow_input_strings[get_basename(input.id)] = \ input_file_specs[get_basename(input.id)] log.info("Collecting output files") for step in workflow.steps: cwl_command_line_tool = cwl.load_document(step.run) if isinstance(step.run, str) \ else step.run for output in cwl_command_line_tool.outputs: # TODO: account for outputs that are not files output_name = get_name(step.id, output.id) log.debug("Adding (key: {0}, value: {1}) to workflow_files".format( output_name, output.outputBinding.glob)) # TODO: throw error when glob contains javascript expression # or pattern as we cannot support anything that is dynamic workflow_files[output_name] = output.outputBinding.glob log.info("Building workflow steps into dax jobs") for step in workflow.steps: # convert cwl:CommandLineTool -> pegasus:Executable cwl_command_line_tool = cwl.load_document(step.run) if isinstance(step.run, str) \ else step.run executable_name = os.path.basename(cwl_command_line_tool.baseCommand) if \ os.path.isabs(cwl_command_line_tool.baseCommand) else cwl_command_line_tool.baseCommand dax_executable = dax.Executable(executable_name) # add executable to transformation catalog tc.add_item(executable_name, cwl_command_line_tool.baseCommand) # create job with executable dax_job = dax.Job(dax_executable) step_inputs = dict() for input in step.in_: input_id = get_basename(input.id) if isinstance(input.source, str): step_inputs[input_id] = get_basename(input.source) elif isinstance(input.source, list): step_inputs[input_id] = [ get_basename(file) for file in input.source ] # add input uses to job for input in cwl_command_line_tool.inputs: if input.type == "File": file_id = step_inputs[get_name(step.id, input.id)] file = dax.File(workflow_files[file_id]) log.debug("Adding link ({0} -> {1})".format( file_id, dax_job.name)) dax_job.uses(file, link=dax.Link.INPUT) # TODO: better type checking for string[] and File[] ? elif isinstance(input.type, cwl.CommandInputArraySchema): if input.type.items == "File": file_ids = step_inputs[get_name(step.id, input.id)] for file_id in file_ids: file = dax.File(workflow_files[file_id]) log.debug("Adding link ({0} -> {1})".format( file_id, dax_job.name)) dax_job.uses(file, link=dax.Link.INPUT) # add output uses to job # TODO: ensure that these are of type File or File[] for output in step.out: file_id = get_basename(output) file = dax.File(workflow_files[file_id]) log.debug("Adding link ({0} -> {1})".format(dax_job.name, file_id)) dax_job.uses(file, link=dax.Link.OUTPUT, transfer=True, register=True) # add arguments to job # TODO: place argument building up in a function dax_job_args = cwl_command_line_tool.arguments if \ cwl_command_line_tool.arguments is not None else [] # process cwl inputBindings if they exist and build up job argument list cwl_command_line_tool_inputs = sorted(cwl_command_line_tool.inputs, key=lambda input : input.inputBinding.position if input.inputBinding.position \ is not None else 0 ) for input in cwl_command_line_tool_inputs: # process args if input.inputBinding is not None: # TODO: account for inputBinding separation if input.inputBinding.prefix is not None: dax_job_args.append(input.inputBinding.prefix) if input.type == "File": dax_job_args.append( dax.File(workflow_files[step_inputs[get_name( step.id, input.id)]])) if input.type == "string": dax_job_args.append( workflow_input_strings[step_inputs[get_name( step.id, input.id)]]) # handle array type inputs if isinstance(input.type, cwl.CommandInputArraySchema): if input.type.items == "File": for file in step_inputs[get_name(step.id, input.id)]: dax_job_args.append(dax.File(workflow_files[file])) elif input.type.items == "string": input_string_arr_id = step_inputs[get_name( step.id, input.id)] separator = " " if input.inputBinding.itemSeparator is None \ else input.inputBinding.itemSeparator dax_job_args.append( # TODO: currently only accounting for input strings that # are inputs to the entire workflow separator.join( workflow_input_strings[input_string_arr_id])) log.debug("Adding job: {0}, with args: {1}".format( dax_job.name, dax_job_args)) dax_job.addArguments(*dax_job_args) # add job to DAG adag.addJob(dax_job) rc.write_catalog("rc.txt") tc.write_catalog("tc.txt") with open(args.output_file_path, "w") as f: log.info("Writing DAX to {}".format(args.output_file_path)) adag.writeXML(f)
from cwl_utils import parser_v1_0 from re import sub from cwlab.wf_input.read_xls import clean_string configs = {} cwl_document = parser_v1_0.load_document("test_files/workflows/wf_fastqc.cwl") if isinstance(cwl_document, list): cwl_documents = cwl_document for cwl_document_ in cwl_documents: if clean_string(sub(".*#", "", cwl_document_.id)) == "main": cwl_document = cwl_document_ break inp_records = cwl_document.inputs for inp_rec in inp_records: inp_rec name = clean_string(sub(".*#", "", inp_rec.id)) is_array = False null_allowed = False null_items_allowed = False default_value = [""] # test if optional: if isinstance(inp_rec.type, list): if len(inp_rec.type) == 2 and "null" in inp_rec.type: null_allowed = True inp_rec.type.remove("null") inp_rec.type = inp_rec.type[0] else: raise AssertionError( "E: unkown type for parameter " + name + ": lists of type are only supported when one of two elements is \"null\""
def main(): setup_logger(None) args = parse_args() workflow_file_path = args.cwl_workflow_file_path workflow_file_dir = os.path.dirname(workflow_file_path) logger.info("Loading {}".format(workflow_file_path)) workflow = cwl.load_document(workflow_file_path) adag = ADAG("dag-generated-from-cwl", auto=True) rc = ReplicaCatalog() tc = TransformationCatalog() # process initial input file(s) # TODO: need to account for the different fields for a file class # TODO: log warning for the fields that we are skipping workflow_input_strings = dict() workflow_files = dict() with open(args.input_file_spec_path, "r") as yaml_file: input_file_specs = load(yaml_file, Loader=Loader) for id, fields in input_file_specs.items(): if isinstance(fields, dict): if fields["class"] == "File": workflow_files[id] = id rc.add_item(id, fields["path"], "local") elif isinstance(fields, str): workflow_input_strings[id] = fields for step in workflow.steps: cwl_command_line_tool = cwl.load_document(step.run) if isinstance(step.run, str) else step.run for clt_output in cwl_command_line_tool.outputs: # TODO: account for outputs that are not files workflow_files[get_basename(step.id) + "/" + get_basename(clt_output.id)] = clt_output.outputBinding.glob for step in workflow.steps: # convert cwl:CommandLineTool -> pegasus:Executable cwl_command_line_tool = cwl.load_document(step.run) if isinstance(step.run, str) else step.run dax_executable = Executable(cwl_command_line_tool.baseCommand) # add executable to transformation catalog tc.add_item(cwl_command_line_tool.baseCommand, cwl_command_line_tool.baseCommand) # create job with executable dax_job = Job(dax_executable) # get the inputs of this step step_inputs = {get_basename(input.id) : get_basename(input.source) for input in step.in_} print(step_inputs) # add input uses to job for input in cwl_command_line_tool.inputs: if input.type == "File": dax_job.uses( File(workflow_files[step_inputs[get_basename(step.id) + "/" + get_basename(input.id)]]), link=Link.INPUT ) # add output uses to job # TODO: ensure that these are of type File or File[] for output in step.out: output_file = File(workflow_files[get_basename(output)]) dax_job.uses( output_file, link=Link.OUTPUT, transfer=True, register=True ) # add arguments to job # TODO: place argument building up in a function dax_job_args = cwl_command_line_tool.arguments if cwl_command_line_tool.arguments is not None else [] # process cwl inputBindings if they exist and build up job argument list cwl_command_line_tool_inputs = sorted(cwl_command_line_tool.inputs, key=lambda input : input.inputBinding.position if input.inputBinding.position is not None else 0 ) for input in cwl_command_line_tool_inputs: # process args if input.inputBinding is not None: # TODO: account for inputBinding separation if input.inputBinding.prefix is not None: dax_job_args.append(input.inputBinding.prefix) if input.type == "File": dax_job_args.append(File(workflow_files[step_inputs[get_basename(step.id) + "/" + get_basename(input.id)]])) # TODO: take into account string inputs that are outputs of other steps # and not just workflow inputs input_string_id = step_inputs[get_basename(step.id) + "/" + get_basename(input.id)] arg_string = "" if input.type == "string[]": separator = " " if input.inputBinding.itemSeparator is None \ else input.inputBinding.itemSeparator arg_string += separator.join( workflow_input_strings[input_string_id] ) elif input.type == "string": arg_string += workflow_input_strings[input_string_id] dax_job_args.append(arg_string) dax_job.addArguments(*dax_job_args) # add executable to DAG if not adag.hasExecutable(dax_executable): adag.addExecutable(dax_executable) # add job to DAG adag.addJob(dax_job) # TODO: fix this, can't have forward slash in lfn, so replacing # with "." for now to get this working for filename, file in adag.files.items(): if "/" in filename: file.name.replace("/", ".") # TODO: fix this, can't have forward slash in lfn, so replacing # with "." for now to get this working for jobid, job in adag.jobs.items(): for used in job.used: if "/" in used.name: used.name = used.name.replace("/", ".") for arg in job.arguments: if isinstance(arg, File): if "/" in arg.name: arg.name = arg.name.replace("/", ".") rc.write_catalog("rc.txt") tc.write_catalog("tc.txt") with open(args.output_file_path, "w") as f: adag.writeXML(f)
def main(): args = parse_args() workflow_file_path = args.cwl_workflow_file_path workflow_file_dir = os.path.dirname(workflow_file_path) workflow = cwl.load_document(workflow_file_path) adag = ADAG("from-cwl-workflow", auto=True) for step in workflow.steps: # EXECUTABLE ''' - step.run points to a cwl file with a CommandLineTool class. - CommandLineTool.baseCommand must either be an absolute path to some executable OR the executable name, in which case it must be added to the run environment's PATH - in this case, I am assuming absolute paths and will just add it as a PFN to the Executable ''' # this step's run cwl document parameter_reference = cwl.load_document(step.run) executable = Executable(parameter_reference.id) executable.addPFN( PFN(parameter_reference.baseCommand, "what to do about 'site'???")) if not adag.hasExecutable(executable): adag.addExecutable(executable) # INPUT FILES input_files = set() for input_file in step.in_: if isinstance(input_file.source, list): input_files |= set( map(lambda filename: File(filename), input_file.source)) # should be a string in this case, just being careful for now elif isinstance(input_file.source, str): input_files.add(File(input_file.source)) else: raise Exception( "didn't get a string from a step's input file field") for input_file in input_files: if not adag.hasFile(input_file.name): adag.addFile(input_file) # OUTPUT FILES output_files = set() for output_file in step.out: # seems like this is always a list of filenames if isinstance(output_file, str): output_files.add(File(output_file)) else: raise Exception( "didn't get a string from a step's output file field") for output_file in output_files: if not adag.hasFile(output_file.name): adag.addFile(output_file) # JOB job = Job(executable) for input_file in input_files: job.uses(input_file, link=Link.INPUT) for output_file in output_files: job.uses(output_file, link=Link.OUTPUT) adag.addJob(job) ''' What about the notion of arguments? In CWL, a CommandLineTool has an arguments field but, according to the docs, this is meant for command line bindings which are not directly associated with input parameters ''' # ADD PFNs for initial input files ''' Initial input files are specified in a separate YAML file according to the CWL docs. For each file in the separate YAML file, we need to get the PFNs for those files if they exist and add them to the File objects in the Pegasus adag object. ''' input_file_spec_path = args.input_file_spec_path with open(input_file_spec_path, "r") as yaml_file: input_file_specs = load(yaml_file, Loader=Loader) for filename, properties in input_file_specs.items(): id = "#" + filename if properties["class"] == "File": for file_ in input_files: if file_.name.endswith(id): file_.addPFN( PFN(properties["path"], "what to do about this site??")) break with open("2cwl-to-dax-conversion-workflow.xml", "w") as f: adag.writeXML(f)
def test_traverse_workflow(self): loaded = parser.load_document(str(TEST_CWL.resolve())) traverse_workflow(loaded)