Exemple #1
0
def test_go_rules_13():

    a = ["PomBase", "SPBC11B10.09", "cdc2", "", "GO:0007275", "PMID:21873635", "IBA", "PANTHER:PTN000623979|TAIR:locus:2099478", "P", "Cyclin-dependent kinase 1", "UniProtKB:P04551|PTN000624043", "protein", "taxon:284812", "20170228", "GO_Central", "", ""]
    assoc = gafparser.to_association(a).associations[0]
    gaferences = gaference.load_gaferencer_inferences_from_file("tests/resources/test.inferences.json")
    test_result = qc.GoRule13().test(assoc, assocparser.AssocParserConfig(annotation_inferences=gaferences))
    assert test_result.result_type == qc.ResultType.ERROR

    a = ["PomBase", "SPBC11B10.09", "cdc2", "", "GO:0007275", "PMID:21873635", "EXP", "PANTHER:PTN000623979|TAIR:locus:2099478", "P", "Cyclin-dependent kinase 1", "UniProtKB:P04551|PTN000624043", "protein", "taxon:284812", "20170228", "GO_Central", "", ""]
    assoc = gafparser.to_association(a).associations[0]
    gaferences = gaference.load_gaferencer_inferences_from_file("tests/resources/test.inferences.json")
    test_result = qc.GoRule13().test(assoc, assocparser.AssocParserConfig(annotation_inferences=gaferences))
    assert test_result.result_type == qc.ResultType.WARNING

    a = ["PomBase", "SPBC11B10.09", "cdc2", "NOT", "GO:0007275", "PMID:21873635", "EXP", "PANTHER:PTN000623979|TAIR:locus:2099478", "P", "Cyclin-dependent kinase 1", "UniProtKB:P04551|PTN000624043", "protein", "taxon:284812", "20170228", "GO_Central", "", ""]
    assoc = gafparser.to_association(a).associations[0]
    gaferences = gaference.load_gaferencer_inferences_from_file("tests/resources/test.inferences.json")
    test_result = qc.GoRule13().test(assoc, assocparser.AssocParserConfig(annotation_inferences=gaferences))
    assert test_result.result_type == qc.ResultType.PASS

    a = ["AspGD", "ASPL0000059928", "AN0127", "", "GO:0032258", "AspGD_REF:ASPL0000000005", "IEA", "SGD:S000001917", "P", "", "AN0127|ANID_00127|ANIA_00127", "gene_product", "taxon:227321", "20200201", "AspGD", "", ""]
    assoc = gafparser.to_association(a).associations[0]
    gaferences = gaference.load_gaferencer_inferences_from_file("tests/resources/test.inferences.json")
    test_result = qc.GoRule13().test(assoc, assocparser.AssocParserConfig(annotation_inferences=gaferences))
    assert test_result.result_type == qc.ResultType.ERROR
Exemple #2
0
def rule(metadata_dir, out, ontology, gaferencer_file):
    absolute_metadata = os.path.abspath(metadata_dir)

    click.echo("Loading ontology: {}...".format(ontology))
    ontology_graph = OntologyFactory().create(ontology)

    goref_metadata = metadata.yamldown_lookup(os.path.join(absolute_metadata, "gorefs"))
    gorule_metadata = metadata.yamldown_lookup(os.path.join(absolute_metadata, "rules"))

    click.echo("Found {} GO Rules".format(len(gorule_metadata.keys())))

    db_entities = metadata.database_entities(absolute_metadata)
    group_ids = metadata.groups(absolute_metadata)

    gaferences = None
    if gaferencer_file:
        gaferences = gaference.load_gaferencer_inferences_from_file(gaferencer_file)

    config = assocparser.AssocParserConfig(
        ontology=ontology_graph,
        goref_metadata=goref_metadata,
        entity_idspaces=db_entities,
        group_idspace=group_ids,
        annotation_inferences=gaferences,
        rule_set=assocparser.RuleSet.ALL
    )
    all_examples_valid = True
    all_results = []
    for rule_id, rule_meta in gorule_metadata.items():
        examples = rules.RuleExample.example_from_json(rule_meta)
        if len(examples) == 0:
            # skip if there are no examples
            continue

        click.echo("==============================================================================")
        click.echo("Validating {} examples for {}".format(len(examples), rule_id.upper().replace("-", ":")))
        results = rules.validate_all_examples(examples, config=config)
        successes = sum(1 for r in results if r.success)
        click.echo("\t* {}/{} success".format(successes, len(results)))
        for r in results:
            if not r.success:
                click.echo("\tRule example failed: {}".format(r.reason))
                click.echo("\tInput: >> `{}`".format(r.example.input))
                all_examples_valid = False

        all_results += results

    if out:
        absolute_out = os.path.abspath(out)
        os.makedirs(os.path.dirname(absolute_out), exist_ok=True)
        try:
            with open(absolute_out, "w") as outfile:
                json.dump(rules.validation_report(all_results), outfile, indent=4)
        except Exception as e:
            raise click.ClickException("Could not write report to {}: ".format(out, e))

    if not all_examples_valid:
        raise click.ClickException("At least one rule example was not validated.")
Exemple #3
0
def produce(group, metadata_dir, gpad, ttl, target, ontology, exclude, base_download_url, suppress_rule_reporting_tag, skip_existing_files, gaferencer_file):

    products = {
        "gaf": True,
        "gpi": True,
        "gpad": gpad,
        "ttl": ttl
    }
    click.echo("Making products {}.".format(", ".join([key for key in products if products[key]])))
    absolute_target = os.path.abspath(target)
    os.makedirs(os.path.join(absolute_target, "groups"), exist_ok=True)
    click.echo("Products will go in {}".format(absolute_target))
    absolute_metadata = os.path.abspath(metadata_dir)

    group_metadata = metadata.dataset_metadata_file(absolute_metadata, group)
    click.echo("Loading ontology: {}...".format(ontology))
    ontology_graph = OntologyFactory().create(ontology, ignore_cache=True)

    downloaded_gaf_sources = download_source_gafs(group_metadata, absolute_target, exclusions=exclude, base_download_url=base_download_url, replace_existing_files=not skip_existing_files)

    # extract the titles for the go rules, this is a dictionary comprehension
    rule_metadata = metadata.yamldown_lookup(os.path.join(absolute_metadata, "rules"))
    goref_metadata = metadata.yamldown_lookup(os.path.join(absolute_metadata, "gorefs"))
    
    click.echo("Found {} GO Rules".format(len(rule_metadata.keys())))
    click.echo("Found {} GO_REFs".format(len(goref_metadata.keys())))

    paint_metadata = metadata.dataset_metadata_file(absolute_metadata, "paint")
    noctua_metadata = metadata.dataset_metadata_file(absolute_metadata, "noctua")
    mixin_metadata_list = list(filter(lambda m: m != None, [paint_metadata, noctua_metadata]))

    db_entities = metadata.database_entities(absolute_metadata)
    group_ids = metadata.groups(absolute_metadata)

    gaferences = None
    if gaferencer_file:
        gaferences = gaference.load_gaferencer_inferences_from_file(gaferencer_file)
            
    for dataset_metadata, source_gaf in downloaded_gaf_sources:
        dataset = dataset_metadata["dataset"]
        # Set paint to True when the group is "paint".
        # This will prevent filtering of IBA (GO_RULE:26) when paint is being treated as a top level group, like for paint_other.
        valid_gaf = produce_gaf(dataset, source_gaf, ontology_graph,
            paint=(group=="paint"),
            group=group,
            rule_metadata=rule_metadata,
            goref_metadata=goref_metadata,
            db_entities=db_entities,
            group_idspace=group_ids,
            suppress_rule_reporting_tags=suppress_rule_reporting_tag,
            annotation_inferences=gaferences
            )[0]

        gpi = produce_gpi(dataset, absolute_target, valid_gaf, ontology_graph)

        end_gaf = mixin_a_dataset(valid_gaf, mixin_metadata_list, group_metadata["id"], dataset, absolute_target, ontology_graph, gpipath=gpi, base_download_url=base_download_url, replace_existing_files=not skip_existing_files)
        make_products(dataset, absolute_target, end_gaf, products, ontology_graph)
Exemple #4
0
def test_go_rules_13():

    a = [
        "PomBase", "SPBC11B10.09", "cdc2", "", "GO:0007275", "PMID:21873635",
        "IBA", "PANTHER:PTN000623979|TAIR:locus:2099478", "P",
        "Cyclin-dependent kinase 1", "UniProtKB:P04551|PTN000624043",
        "protein", "taxon:284812", "20170228", "GO_Central", "", ""
    ]
    assoc = gafparser.to_association(a).associations[0]
    gaferences = gaference.load_gaferencer_inferences_from_file(
        "tests/resources/test.inferences.json")
    test_result = qc.GoRule13().test(
        assoc, assocparser.AssocParserConfig(annotation_inferences=gaferences))
    assert test_result.result_type == qc.ResultType.WARNING
Exemple #5
0
def produce(ctx, group, metadata_dir, gpad, ttl, target, ontology, exclude,
            base_download_url, suppress_rule_reporting_tag,
            skip_existing_files, gaferencer_file, only_dataset,
            gaf_output_version, rule_set):

    logger.info("Logging is verbose")
    products = {"gaf": True, "gpi": True, "gpad": gpad, "ttl": ttl}
    click.echo("Making products {}.".format(", ".join(
        [key for key in products if products[key]])))
    absolute_target = os.path.abspath(target)
    os.makedirs(os.path.join(absolute_target, "groups"), exist_ok=True)
    click.echo("Products will go in {}".format(absolute_target))
    absolute_metadata = os.path.abspath(metadata_dir)

    group_metadata = metadata.dataset_metadata_file(absolute_metadata, group)
    click.echo("Loading ontology: {}...".format(ontology))
    ontology_graph = OntologyFactory().create(ontology, ignore_cache=True)

    downloaded_gaf_sources = download_source_gafs(
        group_metadata,
        absolute_target,
        exclusions=exclude,
        base_download_url=base_download_url,
        replace_existing_files=not skip_existing_files,
        only_dataset=only_dataset)

    # extract the titles for the go rules, this is a dictionary comprehension
    rule_metadata = metadata.yamldown_lookup(
        os.path.join(absolute_metadata, "rules"))
    goref_metadata = metadata.yamldown_lookup(
        os.path.join(absolute_metadata, "gorefs"))

    click.echo("Found {} GO Rules".format(len(rule_metadata.keys())))
    click.echo("Found {} GO_REFs".format(len(goref_metadata.keys())))

    paint_metadata = metadata.dataset_metadata_file(absolute_metadata, "paint")
    noctua_metadata = metadata.dataset_metadata_file(absolute_metadata,
                                                     "noctua")
    mixin_metadata_list = list(
        filter(lambda m: m != None, [paint_metadata, noctua_metadata]))

    db_entities = metadata.database_entities(absolute_metadata)
    group_ids = metadata.groups(absolute_metadata)
    extensions_constraints = metadata.extensions_constraints_file(
        absolute_metadata)

    gaferences = None
    if gaferencer_file:
        gaferences = gaference.load_gaferencer_inferences_from_file(
            gaferencer_file)

    # Default comes through as single-element tuple
    if rule_set == (assocparser.RuleSet.ALL, ):
        rule_set = assocparser.RuleSet.ALL

    for dataset_metadata, source_gaf in downloaded_gaf_sources:
        dataset = dataset_metadata["dataset"]
        # Set paint to True when the group is "paint".
        # This will prevent filtering of IBA (GO_RULE:26) when paint is being treated as a top level group,
        # like for paint_other.
        valid_gaf = produce_gaf(
            dataset,
            source_gaf,
            ontology_graph,
            paint=(group == "paint"),
            group=group,
            rule_metadata=rule_metadata,
            goref_metadata=goref_metadata,
            db_entities=db_entities,
            group_idspace=group_ids,
            suppress_rule_reporting_tags=suppress_rule_reporting_tag,
            annotation_inferences=gaferences,
            group_metadata=group_metadata,
            extensions_constraints=extensions_constraints,
            rule_contexts=["import"]
            if dataset_metadata.get("import", False) else [],
            gaf_output_version=gaf_output_version,
            rule_set=rule_set)[0]

        gpi = produce_gpi(dataset, absolute_target, valid_gaf, ontology_graph)

        gpi_list = [gpi]
        # Try to find other GPIs in metadata and merge
        for ds in group_metadata["datasets"]:
            # Where type=GPI for the same dataset (e.g. "zfin", "goa_cow")
            if ds["type"] == "gpi" and ds["dataset"] == dataset and ds.get(
                    "source"):
                matching_gpi_path = download_a_dataset_source(
                    group,
                    ds,
                    absolute_target,
                    ds["source"],
                    replace_existing_files=not skip_existing_files)
                if ds.get("compression", None) == "gzip":
                    matching_gpi_path = unzip_simple(matching_gpi_path)
                gpi_list.append(matching_gpi_path)

        end_gaf = mixin_a_dataset(
            valid_gaf,
            mixin_metadata_list,
            group_metadata["id"],
            dataset,
            absolute_target,
            ontology_graph,
            gpipaths=gpi_list,
            base_download_url=base_download_url,
            rule_metadata=rule_metadata,
            replace_existing_files=not skip_existing_files,
            gaf_output_version=gaf_output_version)
        make_products(dataset, absolute_target, end_gaf, products,
                      ontology_graph)