Ejemplo n.º 1
0
    def test_traverse_workflow_singularity(self):
        loaded = parser.load_document(str(TEST_CWL.resolve()))

        with TemporaryDirectory() as tmpdir:
            for req in set(traverse(loaded)):
                image_puller = SingularityImagePuller(req, tmpdir)
                image_puller.save_docker_image()
Ejemplo n.º 2
0
    def test_traverse_workflow(self):
        loaded = parser.load_document(str(TEST_CWL.resolve()))

        with TemporaryDirectory() as tmpdir:
            for req in set(traverse(loaded)):
                image_puller = DockerImagePuller(req, tmpdir)
                image_puller.save_docker_image()
                _ = image_puller.generate_udocker_loading_command()
Ejemplo n.º 3
0
def main():
    args = parse_args()
    os.makedirs(args.dir, exist_ok=True)

    top = cwl.load_document(args.input)

    for req in set(traverse(top)):
        if args.singularity:
            image_puller = SingularityImagePuller(req, args.dir)
        else:
            image_puller = DockerImagePuller(req, args.dir)
        image_puller.save_docker_image()
Ejemplo n.º 4
0
def workflow_parse(request, repo_id, cwl_path):
    """
    Parses a workflow and optionally a job file. Then gathers all fields from the user.
    """
    repo = Repository.objects.get(pk=repo_id)
    full_cwl_path = repo.get_content(cwl_path)

    job = {}
    if 'job' in request.GET:
        job_path = repo.get_content(request.GET['job'])
        try:
            job = parse_job(Path(job_path), repo.path())
        except Exception as e:
            logger.error(f"can't parse {job_path}: {e}")
            raise

    parsed_workflow = load_document(str(full_cwl_path))

    if request.method == 'POST':
        form = CwlForm(parsed_workflow.inputs,
                       data=request.POST,
                       prefix=repo.path(),
                       default_values=job)
        if form.is_valid():
            relative_cwl = full_cwl_path.relative_to(repo.path())
            workflow = Workflow(repository=repo, cwl_path=relative_cwl)
            workflow.save()

            with open(workflow.full_job_path(), mode='wt') as job:
                json.dump(form.back_to_cwl_job(repo.path()), job)

            run_workflow.delay(pk=workflow.id)

            return redirect('scheduler:workflow_list')

    else:
        form = CwlForm(parsed_workflow.inputs,
                       prefix=repo.path(),
                       default_values=job)

    context = {
        'workflow': parsed_workflow,
        'form': form,
        'repo': repo,
        'cwl_path': cwl_path
    }
    return render(request, 'scheduler/workflow_parse.html', context)
Ejemplo n.º 5
0
    def validate_environment(self):
        """Validate environments in REANA CWL workflow."""

        try:
            import cwl_utils.parser_v1_0 as cwl_parser
            from cwl_utils.docker_extract import traverse
        except ImportError as e:
            display_message(
                "Cannot validate environment. Please install reana-client on Python 3+ to enable environment validation for CWL workflows.",
                msg_type="error",
                indented=True,
            )
            raise e

        top = cwl_parser.load_document(self.workflow_file)

        for image in traverse(top):
            self._validate_environment_image(image)
Ejemplo n.º 6
0
def main():
    top = cwl.load_document(sys.argv[1])
    traverse(top)
Ejemplo n.º 7
0
def get_process_from_step(step: cwl.WorkflowStep):
    if isinstance(step.run, str):
        return cwl.load_document(step.run)
    return step.run
Ejemplo n.º 8
0
def main():
    args = parse_args()
    setup_logger(args.debug)

    # TODO: handle execeptions for bad file paths
    workflow_file_path = args.cwl_workflow_file_path
    workflow_file_dir = os.path.dirname(workflow_file_path)

    log.info("Loading {}".format(workflow_file_path))
    workflow = cwl.load_document(workflow_file_path)

    adag = dax.ADAG("dag-generated-from-cwl", auto=True)
    rc = ReplicaCatalog()
    tc = TransformationCatalog(workflow_file_dir)

    # process initial input file(s)
    # TODO: need to account for the different fields for a file class
    # TODO: log warning for the fields that we are skipping
    workflow_input_strings = dict()
    workflow_files = dict()

    log.info("Collecting inputs in {}".format(args.input_file_spec_path))
    with open(args.input_file_spec_path, "r") as yaml_file:
        input_file_specs = load(yaml_file, Loader=Loader)

        for input in workflow.inputs:
            input_type = input.type

            if input_type == "File":
                workflow_files[get_basename(input.id)] = get_basename(input.id)
                # TODO: account for non-local sites
                rc.add_item(get_basename(input.id),
                            input_file_specs[get_basename(input.id)]["path"],
                            "local")
            elif input_type == "string":
                workflow_input_strings[get_basename(input.id)] = \
                                        input_file_specs[get_basename(input.id)]
            elif isinstance(input_type, cwl.InputArraySchema):
                if input_type.items == "File":
                    # TODO: account for workflow inputs of type File[]
                    pass
                elif input_type.items == "string":
                    workflow_input_strings[get_basename(input.id)] = \
                                        input_file_specs[get_basename(input.id)]

    log.info("Collecting output files")
    for step in workflow.steps:
        cwl_command_line_tool = cwl.load_document(step.run) if isinstance(step.run, str) \
                                                                    else step.run

        for output in cwl_command_line_tool.outputs:
            # TODO: account for outputs that are not files
            output_name = get_name(step.id, output.id)

            log.debug("Adding (key: {0}, value: {1}) to workflow_files".format(
                output_name, output.outputBinding.glob))

            # TODO: throw error when glob contains javascript expression
            #       or pattern as we cannot support anything that is dynamic
            workflow_files[output_name] = output.outputBinding.glob

    log.info("Building workflow steps into dax jobs")
    for step in workflow.steps:
        # convert cwl:CommandLineTool -> pegasus:Executable
        cwl_command_line_tool = cwl.load_document(step.run) if isinstance(step.run, str) \
                                                                    else step.run

        executable_name = os.path.basename(cwl_command_line_tool.baseCommand) if \
            os.path.isabs(cwl_command_line_tool.baseCommand) else cwl_command_line_tool.baseCommand

        dax_executable = dax.Executable(executable_name)

        # add executable to transformation catalog
        tc.add_item(executable_name, cwl_command_line_tool.baseCommand)

        # create job with executable
        dax_job = dax.Job(dax_executable)

        step_inputs = dict()
        for input in step.in_:
            input_id = get_basename(input.id)
            if isinstance(input.source, str):
                step_inputs[input_id] = get_basename(input.source)
            elif isinstance(input.source, list):
                step_inputs[input_id] = [
                    get_basename(file) for file in input.source
                ]

        # add input uses to job
        for input in cwl_command_line_tool.inputs:
            if input.type == "File":
                file_id = step_inputs[get_name(step.id, input.id)]
                file = dax.File(workflow_files[file_id])
                log.debug("Adding link ({0} -> {1})".format(
                    file_id, dax_job.name))

                dax_job.uses(file, link=dax.Link.INPUT)

            # TODO: better type checking for string[] and File[] ?
            elif isinstance(input.type, cwl.CommandInputArraySchema):
                if input.type.items == "File":
                    file_ids = step_inputs[get_name(step.id, input.id)]
                    for file_id in file_ids:
                        file = dax.File(workflow_files[file_id])
                        log.debug("Adding link ({0} -> {1})".format(
                            file_id, dax_job.name))

                        dax_job.uses(file, link=dax.Link.INPUT)

        # add output uses to job
        # TODO: ensure that these are of type File or File[]
        for output in step.out:
            file_id = get_basename(output)
            file = dax.File(workflow_files[file_id])
            log.debug("Adding link ({0} -> {1})".format(dax_job.name, file_id))

            dax_job.uses(file,
                         link=dax.Link.OUTPUT,
                         transfer=True,
                         register=True)

        # add arguments to job
        # TODO: place argument building up in a function
        dax_job_args = cwl_command_line_tool.arguments if \
            cwl_command_line_tool.arguments is not None else []

        # process cwl inputBindings if they exist and build up job argument list
        cwl_command_line_tool_inputs = sorted(cwl_command_line_tool.inputs,
            key=lambda input : input.inputBinding.position if input.inputBinding.position \
                is not None else 0 )

        for input in cwl_command_line_tool_inputs:
            # process args
            if input.inputBinding is not None:
                # TODO: account for inputBinding separation
                if input.inputBinding.prefix is not None:
                    dax_job_args.append(input.inputBinding.prefix)

                if input.type == "File":
                    dax_job_args.append(
                        dax.File(workflow_files[step_inputs[get_name(
                            step.id, input.id)]]))

                if input.type == "string":
                    dax_job_args.append(
                        workflow_input_strings[step_inputs[get_name(
                            step.id, input.id)]])

                # handle array type inputs
                if isinstance(input.type, cwl.CommandInputArraySchema):
                    if input.type.items == "File":
                        for file in step_inputs[get_name(step.id, input.id)]:
                            dax_job_args.append(dax.File(workflow_files[file]))
                    elif input.type.items == "string":
                        input_string_arr_id = step_inputs[get_name(
                            step.id, input.id)]

                        separator = " " if input.inputBinding.itemSeparator is None \
                                        else input.inputBinding.itemSeparator

                        dax_job_args.append(
                            # TODO: currently only accounting for input strings that
                            #       are inputs to the entire workflow
                            separator.join(
                                workflow_input_strings[input_string_arr_id]))

        log.debug("Adding job: {0}, with args: {1}".format(
            dax_job.name, dax_job_args))
        dax_job.addArguments(*dax_job_args)

        # add job to DAG
        adag.addJob(dax_job)

    rc.write_catalog("rc.txt")
    tc.write_catalog("tc.txt")

    with open(args.output_file_path, "w") as f:
        log.info("Writing DAX to {}".format(args.output_file_path))
        adag.writeXML(f)
from cwl_utils import parser_v1_0
from re import sub
from cwlab.wf_input.read_xls import clean_string

configs = {}
cwl_document = parser_v1_0.load_document("test_files/workflows/wf_fastqc.cwl")
if isinstance(cwl_document, list):
    cwl_documents = cwl_document
    for cwl_document_ in cwl_documents:
        if clean_string(sub(".*#", "", cwl_document_.id)) == "main":
            cwl_document = cwl_document_
            break
inp_records = cwl_document.inputs

for inp_rec in inp_records:
    inp_rec
    name = clean_string(sub(".*#", "", inp_rec.id))
    is_array = False
    null_allowed = False
    null_items_allowed = False
    default_value = [""]
    # test if optional:
    if isinstance(inp_rec.type, list):
        if len(inp_rec.type) == 2 and "null" in inp_rec.type:
            null_allowed = True
            inp_rec.type.remove("null")
            inp_rec.type = inp_rec.type[0]
        else:
            raise AssertionError(
                "E: unkown type for parameter " + name +
                ": lists of type are only supported when one of two elements is \"null\""
def main():
    setup_logger(None)
    args = parse_args()

    workflow_file_path = args.cwl_workflow_file_path
    workflow_file_dir = os.path.dirname(workflow_file_path)

    logger.info("Loading {}".format(workflow_file_path))
    workflow = cwl.load_document(workflow_file_path)

    adag = ADAG("dag-generated-from-cwl", auto=True)
    rc = ReplicaCatalog()
    tc = TransformationCatalog()

    # process initial input file(s)
    # TODO: need to account for the different fields for a file class
    # TODO: log warning for the fields that we are skipping
    workflow_input_strings = dict()
    workflow_files = dict()
    with open(args.input_file_spec_path, "r") as yaml_file:
        input_file_specs = load(yaml_file, Loader=Loader)
        for id, fields in input_file_specs.items():
            if isinstance(fields, dict):
                if fields["class"] == "File":
                    workflow_files[id] = id
                    rc.add_item(id, fields["path"], "local")
            elif isinstance(fields, str):
                workflow_input_strings[id] = fields

    for step in workflow.steps:
        cwl_command_line_tool = cwl.load_document(step.run) if isinstance(step.run, str) else step.run

        for clt_output in cwl_command_line_tool.outputs:
            # TODO: account for outputs that are not files
            workflow_files[get_basename(step.id) + "/" + get_basename(clt_output.id)] = clt_output.outputBinding.glob


    for step in workflow.steps:
        # convert cwl:CommandLineTool -> pegasus:Executable
        cwl_command_line_tool = cwl.load_document(step.run) if isinstance(step.run, str) else step.run
        dax_executable = Executable(cwl_command_line_tool.baseCommand)

        # add executable to transformation catalog
        tc.add_item(cwl_command_line_tool.baseCommand, cwl_command_line_tool.baseCommand)

        # create job with executable
        dax_job = Job(dax_executable)

        # get the inputs of this step
        step_inputs = {get_basename(input.id) : get_basename(input.source) for input in step.in_}
        print(step_inputs)

        # add input uses to job
        for input in cwl_command_line_tool.inputs:
            if input.type == "File":
                dax_job.uses(
                    File(workflow_files[step_inputs[get_basename(step.id) + "/" + get_basename(input.id)]]),
                    link=Link.INPUT
                )

        # add output uses to job
        # TODO: ensure that these are of type File or File[]
        for output in step.out:
            output_file = File(workflow_files[get_basename(output)])
            dax_job.uses(
                    output_file,
                    link=Link.OUTPUT,
                    transfer=True,
                    register=True
                )

        # add arguments to job
        # TODO: place argument building up in a function
        dax_job_args = cwl_command_line_tool.arguments if cwl_command_line_tool.arguments is not None else []

        # process cwl inputBindings if they exist and build up job argument list
        cwl_command_line_tool_inputs = sorted(cwl_command_line_tool.inputs,
            key=lambda input : input.inputBinding.position if input.inputBinding.position is not None else 0 )

        for input in cwl_command_line_tool_inputs:
            # process args
            if input.inputBinding is not None:
                # TODO: account for inputBinding separation
                if input.inputBinding.prefix is not None:
                    dax_job_args.append(input.inputBinding.prefix)

                if input.type == "File":
                    dax_job_args.append(File(workflow_files[step_inputs[get_basename(step.id) + "/" + get_basename(input.id)]]))

                # TODO: take into account string inputs that are outputs of other steps
                #       and not just workflow inputs

                input_string_id = step_inputs[get_basename(step.id) + "/" + get_basename(input.id)]

                arg_string = ""
                if input.type == "string[]":
                    separator = " " if input.inputBinding.itemSeparator is None \
                                        else input.inputBinding.itemSeparator

                    arg_string += separator.join(
                        workflow_input_strings[input_string_id]
                    )
                elif input.type == "string":
                    arg_string += workflow_input_strings[input_string_id]

                dax_job_args.append(arg_string)

        dax_job.addArguments(*dax_job_args)

        # add executable to DAG
        if not adag.hasExecutable(dax_executable):
            adag.addExecutable(dax_executable)

        # add job to DAG
        adag.addJob(dax_job)

    # TODO: fix this, can't have forward slash in lfn, so replacing
    # with "." for now to get this working
    for filename, file in adag.files.items():
        if "/" in filename:
            file.name.replace("/", ".")

    # TODO: fix this, can't have forward slash in lfn, so replacing
    # with "." for now to get this working
    for jobid, job in adag.jobs.items():
        for used in job.used:
            if "/" in used.name:
                used.name = used.name.replace("/", ".")

        for arg in job.arguments:
            if isinstance(arg, File):
                if "/" in arg.name:
                    arg.name = arg.name.replace("/", ".")

    rc.write_catalog("rc.txt")
    tc.write_catalog("tc.txt")

    with open(args.output_file_path, "w") as f:
        adag.writeXML(f)
Ejemplo n.º 11
0
def main():
    args = parse_args()

    workflow_file_path = args.cwl_workflow_file_path
    workflow_file_dir = os.path.dirname(workflow_file_path)

    workflow = cwl.load_document(workflow_file_path)

    adag = ADAG("from-cwl-workflow", auto=True)
    for step in workflow.steps:
        # EXECUTABLE
        '''
        - step.run points to a cwl file with a CommandLineTool class.
        - CommandLineTool.baseCommand must either be an absolute path
            to some executable OR the executable name, in which case it
            must be added to the run environment's PATH
        - in this case, I am assuming absolute paths and will just add it
            as a PFN to the Executable 
        '''
        # this step's run cwl document
        parameter_reference = cwl.load_document(step.run)
        executable = Executable(parameter_reference.id)
        executable.addPFN(
            PFN(parameter_reference.baseCommand, "what to do about 'site'???"))

        if not adag.hasExecutable(executable):
            adag.addExecutable(executable)

        # INPUT FILES
        input_files = set()

        for input_file in step.in_:
            if isinstance(input_file.source, list):
                input_files |= set(
                    map(lambda filename: File(filename), input_file.source))
            # should be a string in this case, just being careful for now
            elif isinstance(input_file.source, str):
                input_files.add(File(input_file.source))
            else:
                raise Exception(
                    "didn't get a string from a step's input file field")

        for input_file in input_files:
            if not adag.hasFile(input_file.name):
                adag.addFile(input_file)

        # OUTPUT FILES
        output_files = set()

        for output_file in step.out:
            # seems like this is always a list of filenames
            if isinstance(output_file, str):
                output_files.add(File(output_file))
            else:
                raise Exception(
                    "didn't get a string from a step's output file field")

        for output_file in output_files:
            if not adag.hasFile(output_file.name):
                adag.addFile(output_file)

        # JOB
        job = Job(executable)
        for input_file in input_files:
            job.uses(input_file, link=Link.INPUT)

        for output_file in output_files:
            job.uses(output_file, link=Link.OUTPUT)

        adag.addJob(job)
        '''
        What about the notion of arguments? In CWL, a CommandLineTool has an arguments field but,
        according to the docs, this is meant for command line bindings which are not directly 
        associated with input parameters
        '''

        # ADD PFNs for initial input files
        '''
        Initial input files are specified in a separate YAML file according to the CWL docs. 
        For each file in the separate YAML file, we need to get the PFNs for those files if
        they exist and add them to the File objects in the Pegasus adag object. 
        '''
        input_file_spec_path = args.input_file_spec_path
        with open(input_file_spec_path, "r") as yaml_file:
            input_file_specs = load(yaml_file, Loader=Loader)

        for filename, properties in input_file_specs.items():
            id = "#" + filename
            if properties["class"] == "File":
                for file_ in input_files:
                    if file_.name.endswith(id):
                        file_.addPFN(
                            PFN(properties["path"],
                                "what to do about this site??"))
                        break

        with open("2cwl-to-dax-conversion-workflow.xml", "w") as f:
            adag.writeXML(f)
Ejemplo n.º 12
0
 def test_traverse_workflow(self):
     loaded = parser.load_document(str(TEST_CWL.resolve()))
     traverse_workflow(loaded)