コード例 #1
0
ファイル: parser.py プロジェクト: MADscientist314/galaxy-lib
def to_cwl_tool_object(tool_path=None,
                       tool_object=None,
                       persisted_tool=None,
                       strict_cwl_validation=True):
    schema_loader = _schema_loader(strict_cwl_validation)
    if tool_path is not None:
        cwl_tool = schema_loader.tool(path=tool_path)
    elif tool_object is not None:
        # Allow loading tools from YAML...
        from ruamel import yaml as ryaml
        import json
        as_str = json.dumps(tool_object)
        tool_object = ryaml.round_trip_load(as_str)
        from schema_salad import sourceline
        from schema_salad.ref_resolver import file_uri
        uri = file_uri(os.getcwd()) + "/"
        sourceline.add_lc_filename(tool_object, uri)
        tool_object, _ = schema_loader.raw_document_loader.resolve_all(
            tool_object, uri)
        raw_process_reference = schema_loader.raw_process_reference_for_object(
            tool_object, uri=uri)
        cwl_tool = schema_loader.tool(
            raw_process_reference=raw_process_reference, )
    else:
        cwl_tool = ToolProxy.from_persistent_representation(persisted_tool)

    if isinstance(cwl_tool, int):
        raise Exception("Failed to load tool.")

    raw_tool = cwl_tool.tool
    # Apply Galaxy hacks to CWL tool representation to bridge semantic differences
    # between Galaxy and cwltool.
    _hack_cwl_requirements(cwl_tool)
    check_requirements(raw_tool)
    return cwl_tool_object_to_proxy(cwl_tool, tool_path=tool_path)
コード例 #2
0
ファイル: parser.py プロジェクト: longr/ansible-cwl
def _to_cwl_tool_object(tool_path=None, tool_object=None, cwl_tool_object=None, raw_process_reference=None, strict_cwl_validation=True, tool_directory=None, uuid=None):
    if uuid is None:
        uuid = str(uuid4())
    schema_loader = _schema_loader(strict_cwl_validation)
    if raw_process_reference is None and tool_path is not None:
        assert cwl_tool_object is None
        assert tool_object is None

        raw_process_reference = schema_loader.raw_process_reference(tool_path)
        cwl_tool = schema_loader.tool(
            raw_process_reference=raw_process_reference,
        )
    elif tool_object is not None:
        assert raw_process_reference is None
        assert cwl_tool_object is None

        # Allow loading tools from YAML...
        from ruamel import yaml as ryaml
        import json
        as_str = json.dumps(tool_object)
        tool_object = ryaml.round_trip_load(as_str)
        from schema_salad import sourceline
        from schema_salad.ref_resolver import file_uri
        path = tool_directory
        if path is None:
            path = os.getcwd()
        uri = file_uri(path) + "/"
        sourceline.add_lc_filename(tool_object, uri)
        # tool_object, _ = schema_loader.raw_document_loader.resolve_all(tool_object, uri, checklinks=False)
        raw_process_reference = schema_loader.raw_process_reference_for_object(
            tool_object,
            uri=uri
        )
        cwl_tool = schema_loader.tool(
            raw_process_reference=raw_process_reference,
        )
    else:
        cwl_tool = cwl_tool_object

    if isinstance(cwl_tool, int):
        raise Exception("Failed to load tool.")

    raw_tool = cwl_tool.tool
    # Apply Galaxy hacks to CWL tool representation to bridge semantic differences
    # between Galaxy and cwltool.
    _hack_cwl_requirements(cwl_tool)
    check_requirements(raw_tool)
    return _cwl_tool_object_to_proxy(cwl_tool, uuid, raw_process_reference=raw_process_reference, tool_path=tool_path)
コード例 #3
0
def _document_load_by_url(loader, url, loadingOptions):
    # type: (_Loader, str, LoadingOptions) -> Any
    if url in loadingOptions.idx:
        return _document_load(loader, loadingOptions.idx[url], url, loadingOptions)

    text = loadingOptions.fetcher.fetch_text(url)
    if isinstance(text, bytes):
        textIO = StringIO(text.decode("utf-8"))
    else:
        textIO = StringIO(text)
    textIO.name = str(url)
    result = yaml.main.round_trip_load(textIO, preserve_quotes=True)
    add_lc_filename(result, url)

    loadingOptions.idx[url] = result

    loadingOptions = LoadingOptions(copyfrom=loadingOptions, fileuri=url)

    return _document_load(loader, result, url, loadingOptions)
コード例 #4
0
 def validate_js_expressions_strict(tool: dict):
     """
     Run cwltool function to validate JS in an app
     :param tool: CWL dict
     """
     data = ruamel.yaml.load(ruamel.yaml.dump(tool),
                             ruamel.yaml.RoundTripLoader)
     schema = get_schema(tool['cwlVersion'])[1].names[tool['class']]
     add_lc_filename(data, data.get('label', 'input_document'))
     jshint_options = {
         "includewarnings": [
             "W117",  # <VARIABLE> not defined
             "W104",
             "W119"  # using ES6 features
         ],
         "strict": "implied",
         "esversion": 5
     }
     validate_js_expressions(data, schema, jshint_options)
コード例 #5
0
def run(
    sourceIO: IO[str], output_dir: str, output_format: str, mainfile: str, pretty: bool
) -> None:
    """Loop over the provided packed CWL document and split it up."""
    source = yaml.main.round_trip_load(sourceIO, preserve_quotes=True)
    add_lc_filename(source, sourceIO.name)

    if "$graph" not in source:
        print("No $graph, so not for us.")
        return

    version = source.pop("cwlVersion")

    def my_represent_none(
        self: Any, data: Any
    ) -> Any:  # pylint: disable=unused-argument
        """Force clean representation of 'null'."""
        return self.represent_scalar("tag:yaml.org,2002:null", "null")

    yaml.representer.RoundTripRepresenter.add_representer(type(None), my_represent_none)

    for entry in source["$graph"]:
        entry_id = entry.pop("id")[1:]
        entry["cwlVersion"] = version
        imports = rewrite(entry, entry_id)
        if imports:
            for import_name in imports:
                rewrite_types(entry, f"#{import_name}", False)
        if entry_id == "main":
            if mainfile is None:
                entry_id = f"unpacked_{os.path.basename(sourceIO.name)}"
            else:
                entry_id = mainfile

        output_file = os.path.join(output_dir, entry_id)
        if output_format == "json":
            json_dump(entry, output_file)
        elif output_format == "yaml":
            yaml_dump(entry, output_file, pretty)
コード例 #6
0
def load_schema_generate_form():
    # At startup, we need to load the metadata schema from the uploader module, so we can make a form for it
    if os.path.isfile("bh20sequploader/bh20seq-schema.yml"):
        METADATA_SCHEMA = yaml.round_trip_load(
            open("bh20sequploader/bh20seq-schema.yml", "r").read())
        METADATA_OPTION_DEFINITIONS = yaml.safe_load(
            open("bh20sequploader/bh20seq-options.yml", "r").read())
    else:
        METADATA_SCHEMA = yaml.round_trip_load(
            pkg_resources.resource_stream("bh20sequploader",
                                          "bh20seq-schema.yml"))
        METADATA_OPTION_DEFINITIONS = yaml.safe_load(
            pkg_resources.resource_stream("bh20sequploader",
                                          "bh20seq-options.yml"))

    METADATA_SCHEMA["name"] = "bh20seq-schema.yml"
    add_lc_filename(METADATA_SCHEMA, "bh20seq-schema.yml")
    metaschema_names, _metaschema_doc, metaschema_loader = schema_salad.schema.get_metaschema(
    )
    schema_doc, schema_metadata = metaschema_loader.resolve_ref(
        METADATA_SCHEMA, "")

    return generate_form(schema_doc, METADATA_OPTION_DEFINITIONS)
コード例 #7
0
def main(args):

    (document_loader, avsc_names, schema_metadata,
     metaschema_loader) = schema_salad.schema.load_schema(args.schema)

    with open(args.metadata) as f:
        metadata_contents = ruamel.yaml.round_trip_load(f)

    for metadata_content in metadata_contents:
        metadata_content["id"] = "http://example.org/id"
        add_lc_filename(metadata_content, metadata_content["id"])
        doc, metadata = schema_salad.schema.load_and_validate(
            document_loader, avsc_names, metadata_content, True)

    with open(args.shex) as f:
        shex = f.read()

    g = schema_salad.jsonld_context.makerdf("workflow", doc,
                                            document_loader.ctx)
    validation_result, reason = evaluate(g, shex, doc["id"], "sample_name")

    if not validation_result:
        print(reason)
コード例 #8
0
ファイル: parser.py プロジェクト: ImmPortDB/immport-galaxy
def to_cwl_tool_object(tool_path=None, tool_object=None, persisted_tool=None, strict_cwl_validation=True):
    schema_loader = _schema_loader(strict_cwl_validation)
    if tool_path is not None:
        cwl_tool = schema_loader.tool(
            path=tool_path
        )
    elif tool_object is not None:
        # Allow loading tools from YAML...
        from ruamel import yaml as ryaml
        import json
        as_str = json.dumps(tool_object)
        tool_object = ryaml.round_trip_load(as_str)
        from schema_salad import sourceline
        from schema_salad.ref_resolver import file_uri
        uri = file_uri(os.getcwd()) + "/"
        sourceline.add_lc_filename(tool_object, uri)
        tool_object, _ = schema_loader.raw_document_loader.resolve_all(tool_object, uri)
        raw_process_reference = schema_loader.raw_process_reference_for_object(
            tool_object,
            uri=uri
        )
        cwl_tool = schema_loader.tool(
            raw_process_reference=raw_process_reference,
        )
    else:
        cwl_tool = ToolProxy.from_persistent_representation(persisted_tool)

    if isinstance(cwl_tool, int):
        raise Exception("Failed to load tool.")

    raw_tool = cwl_tool.tool
    # Apply Galaxy hacks to CWL tool representation to bridge semantic differences
    # between Galaxy and cwltool.
    _hack_cwl_requirements(cwl_tool)
    check_requirements(raw_tool)
    return cwl_tool_object_to_proxy(cwl_tool, tool_path=tool_path)
コード例 #9
0
ファイル: main.py プロジェクト: proccaserra/bh20-seq-resource
def validate_upload(api, collection, validated_project, fastq_project,
                    fastq_workflow_uuid):
    col = arvados.collection.Collection(collection["uuid"])

    # validate the collection here.  Check metadata, etc.
    valid = True

    if "metadata.yaml" not in col:
        logging.warn("Upload '%s' missing metadata.yaml", collection["name"])
        valid = False
    else:
        try:
            metadata_content = ruamel.yaml.round_trip_load(
                col.open("metadata.yaml"))
            metadata_content[
                "id"] = "http://arvados.org/keep:%s/metadata.yaml" % collection[
                    "portable_data_hash"]
            sample_id = metadata_content["sample"]["sample_id"]
            add_lc_filename(metadata_content, metadata_content["id"])
            valid = qc_metadata(metadata_content) and valid
        except Exception as e:
            logging.warn(e)
            valid = False
        if not valid:
            logging.warn("Failed metadata qc")

    if valid:
        try:
            tgt = None
            paired = {
                "reads_1.fastq": "reads.fastq",
                "reads_1.fastq.gz": "reads.fastq.gz"
            }
            for n in ("sequence.fasta", "reads.fastq", "reads.fastq.gz",
                      "reads_1.fastq", "reads_1.fastq.gz"):
                if n not in col:
                    continue
                with col.open(n, 'rb') as qf:
                    tgt = qc_fasta(qf)[0]
                    if tgt != n and tgt != paired.get(n):
                        logging.info(
                            "Expected %s but magic says it should be %s", n,
                            tgt)
                        valid = False
                    elif tgt in ("reads.fastq", "reads.fastq.gz",
                                 "reads_1.fastq", "reads_1.fastq.gz"):
                        start_fastq_to_fasta(api, collection, fastq_project,
                                             fastq_workflow_uuid, n, sample_id)
                        return False
            if tgt is None:
                valid = False
                logging.warn(
                    "Upload '%s' does not contain sequence.fasta, reads.fastq or reads_1.fastq",
                    collection["name"])
        except ValueError as v:
            valid = False

    dup = api.collections().list(
        filters=[["owner_uuid", "=", validated_project],
                 ["portable_data_hash", "=",
                  col.portable_data_hash()]]).execute()
    if dup["items"]:
        # This exact collection has been uploaded before.
        valid = False
        logging.warn("Upload '%s' is duplicate" % collection["name"])

    if valid:
        logging.info("Added '%s' to validated sequences" % collection["name"])
        # Move it to the "validated" project to be included in the next analysis
        api.collections().update(
            uuid=collection["uuid"],
            body={
                "owner_uuid":
                validated_project,
                "name":
                "%s (%s)" % (collection["name"], time.asctime(time.gmtime()))
            }).execute()
    else:
        # It is invalid, delete it.
        logging.warn("Suggest deleting '%s'" % collection["name"])
        #api.collections().delete(uuid=collection["uuid"]).execute()

    return valid
コード例 #10
0
(document_loader,
 avsc_names,
 schema_metadata,
 metaschema_loader) = schema_salad.schema.load_schema(metadataSchema)


for item in validated:
    pdh = item["portable_data_hash"]
    uuid = item["uuid"]
    try:
        subject = "http://covid19.genenetwork.org/resource/%s" % uuid
        with arvados.collection.CollectionReader(pdh, api_client=api, keep_client=keepclient) as col:
            with col.open("metadata.yaml", "rt") as md:
                metadata_content = ruamel.yaml.round_trip_load(md)
                metadata_content["id"] = subject
                add_lc_filename(metadata_content, metadata_content["id"])
                doc, metadata = schema_salad.schema.load_and_validate(document_loader, avsc_names, metadata_content, False, False)
                g = schema_salad.jsonld_context.makerdf(subject, doc, document_loader.ctx)

            with col.open("sequence.fasta", "rt") as fa:
                label = fa.readline().strip()
                merged_metadata.write("<%s> <http://biohackathon.org/bh20-seq-schema/original_fasta_label> \"%s\" .\n" % (subject, label[1:].replace('"', '\\"')))
                merged_metadata.write("<%s> <http://biohackathon.org/bh20-seq-schema/collection_pdh> \"%s\" .\n" % (subject, pdh))
                merged_metadata.write("<%s> <http://biohackathon.org/bh20-seq-schema/collection_version> \"%s\" .\n" % (subject, item["version"]))
                skip = (subject in blacklist or label[1:] in blacklist)
                if skip:
                    merged_metadata.write("<%s> <http://biohackathon.org/bh20-seq-schema/excluded_from_graph> \"true\"^^<http://www.w3.org/2001/XMLSchema#boolean> .\n" % subject)
                if not skip:
                    relabeled_fasta.write(">"+subject+"\n")
                data = fa.read(8096)
                while data:
コード例 #11
0
def run(args: argparse.Namespace) -> int:
    """Primary processing loop."""
    imports: Set[str] = set()
    for document in args.inputs:
        _logger.info("Processing %s.", document)
        with open(document) as doc_handle:
            result = yaml.main.round_trip_load(doc_handle,
                                               preserve_quotes=True)
        add_lc_filename(result, document)
        version = result.get("cwlVersion", None)
        if version in ("draft-3", "cwl:draft-3", "v1.0", "v1.1"):
            result = cwlupgrader.upgrade_document(result, False, False,
                                                  args.dir, imports)
        else:
            _logger.error(
                "Sorry, %s in %s is not a supported CWL version by this tool.",
                (version, document),
            )
            return -1
        uri = Path(document).resolve().as_uri()
        if not args.no_expression_refactoring:
            refactored, _ = cwl_v1_2_expression_refactor.traverse(
                load_document_by_yaml(result, uri),
                not args.etools,
                False,
                args.skip_some1,
                args.skip_some2,
            )
            if not isinstance(refactored, MutableSequence):
                result = save(
                    refactored,
                    base_url=refactored.loadingOptions.fileuri
                    if refactored.loadingOptions.fileuri else "",
                )
            #   ^^ Setting the base_url and keeping the default value
            #      for relative_uris=True means that the IDs in the generated
            #      JSON/YAML are kept clean of the path to the input document
            else:
                result = [
                    save(result_item,
                         base_url=result_item.loadingOptions.fileuri)
                    for result_item in refactored
                ]
        if "$graph" in result:
            packed = result
        else:
            with tempfile.TemporaryDirectory() as tmpdirname:
                path = Path(tmpdirname) / Path(document).name
                with open(path, "w") as handle:
                    yaml.main.round_trip_dump(result, handle)
                # TODO replace the cwltool based packing with a parser_v1_2 based packer
                runtimeContext = RuntimeContext()
                loadingContext = LoadingContext()
                use_standard_schema("v1.2")
                # loadingContext.construct_tool_object = workflow.default_make_tool
                # loadingContext.resolver = tool_resolver
                loadingContext.do_update = False
                uri, tool_file_uri = resolve_tool_uri(
                    str(path),
                    resolver=loadingContext.resolver,
                    fetcher_constructor=loadingContext.fetcher_constructor,
                )
                loadingContext, workflowobj, uri = fetch_document(
                    uri, loadingContext)
                loadingContext, uri = resolve_and_validate_document(
                    loadingContext,
                    workflowobj,
                    uri,
                    preprocess_only=True,
                    skip_schemas=True,
                )
                packed = print_pack(loadingContext, uri)
        output = Path(args.dir) / Path(document).name
        with open(output, "w", encoding="utf-8") as output_filehandle:
            output_filehandle.write(packed)
    return 0
コード例 #12
0
ファイル: main.py プロジェクト: urbanslug/bh20-seq-resource
    def validate_upload(self, collection, revalidate):
        if not revalidate and collection["properties"].get("status") in (
                "validated", "rejected"):
            return False

        with arvados.collection.CollectionReader(
                collection["uuid"],
                api_client=self.api,
                keep_client=self.keepclient) as col:
            # validate the collection here.  Check metadata, etc.
            logging.info("Validating upload '%s' (%s)" %
                         (collection["name"], collection["uuid"]))

            errors = []

            if collection["owner_uuid"] != self.validated_project:
                dup = self.api.collections().list(filters=[[
                    "owner_uuid", "=", self.validated_project
                ], ["portable_data_hash", "=",
                    col.portable_data_hash()]]).execute()
                if dup["items"]:
                    # This exact collection has been uploaded before.
                    errors.append("Duplicate of %s" %
                                  ([d["uuid"] for d in dup["items"]]))

            if not errors:
                if "metadata.yaml" not in col:
                    errors.append("%s missing metadata.yaml" %
                                  collection["name"])
                else:
                    try:
                        with col.open("metadata.yaml") as md:
                            metadata_content = ruamel.yaml.round_trip_load(md)
                        metadata_content[
                            "id"] = "http://covid19.genenetwork.org/resource/%s" % collection[
                                "uuid"]
                        sample_id = metadata_content["sample"]["sample_id"]
                        add_lc_filename(metadata_content,
                                        metadata_content["id"])
                        valid = qc_metadata(metadata_content)
                        if not valid:
                            errors.append("Failed metadata qc")
                    except Exception as e:
                        errors.append(str(e))

            existing = self.api.collections().list(filters=[[
                "owner_uuid", "=", self.validated_project
            ], ["properties.sequence_label", "=", sample_id]]).execute()

            if not errors:
                try:
                    tgt = None
                    paired = {
                        "reads_1.fastq": "reads.fastq",
                        "reads_1.fastq.gz": "reads.fastq.gz"
                    }
                    for n in ("sequence.fasta", "reads.fastq",
                              "reads.fastq.gz", "reads_1.fastq",
                              "reads_1.fastq.gz"):
                        if n not in col:
                            continue
                        with col.open(n, 'rb') as qf:
                            tgt, seqlabel, seq_type = qc_fasta(qf)
                            if tgt != n and tgt != paired.get(n):
                                errors.append(
                                    "Expected %s but magic says it should be %s"
                                    % (n, tgt))
                            elif tgt in ("reads.fastq", "reads.fastq.gz",
                                         "reads_1.fastq", "reads_1.fastq.gz"):
                                self.start_fastq_to_fasta(
                                    collection, n, sample_id)
                                return False

                            # If it is a FASTA
                            if sample_id != seqlabel:
                                errors.append(
                                    "Expected sample_id == seqlabel, but %s != %s"
                                    % (sample_id, seqlabel))
                    if tgt is None and len(existing["items"]) == 0:
                        errors.append(
                            "Upload '%s' does not contain sequence.fasta, reads.fastq or reads_1.fastq"
                            % collection["name"])
                except Exception as v:
                    errors.append(str(v))

            if errors:
                # It is invalid
                logging.warn("'%s' (%s) has validation errors: %s" %
                             (collection["name"], collection["uuid"],
                              "\n".join(errors)))
                collection["properties"]["status"] = "rejected"
                collection["properties"]["errors"] = errors
                self.api.collections().update(uuid=collection["uuid"],
                                              body={
                                                  "properties":
                                                  collection["properties"]
                                              }).execute()
                return False

            update_from = None
            if existing["items"]:
                # "collection" is the newly uploaded one we're looking at
                update_from = collection
                collection = existing["items"][0]
                collection["properties"] = update_from["properties"]

            if "errors" in collection["properties"]:
                del collection["properties"]["errors"]
            collection["properties"]["status"] = "validated"
            collection["properties"]["sequence_label"] = sample_id

            if update_from:
                with arvados.collection.Collection(
                        collection["uuid"],
                        api_client=self.api,
                        keep_client=self.keepclient) as update_existing_col:
                    update_existing_col.copy("metadata.yaml",
                                             "metadata.yaml",
                                             source_collection=col,
                                             overwrite=True)
                    update_existing_col.save(
                        properties=collection["properties"])
                self.api.collections().delete(
                    uuid=update_from["uuid"]).execute()
                logging.info("Updated '%s' in validated sequences" %
                             collection["name"])
            else:
                # Move it to the "validated" project to be included in the next analysis
                self.api.collections().update(
                    uuid=collection["uuid"],
                    body={
                        "owner_uuid":
                        self.validated_project,
                        "name":
                        "%s (%s)" %
                        (collection["name"], time.asctime(time.gmtime())),
                        "properties":
                        collection["properties"]
                    }).execute()
                logging.info("Added '%s' to validated sequences" %
                             collection["name"])

            return True