Esempio n. 1
0
def produce(group, metadata_dir, gpad, ttl, target, ontology, exclude, base_download_url, suppress_rule_reporting_tag, skip_existing_files, gaferencer_file):

    products = {
        "gaf": True,
        "gpi": True,
        "gpad": gpad,
        "ttl": ttl
    }
    click.echo("Making products {}.".format(", ".join([key for key in products if products[key]])))
    absolute_target = os.path.abspath(target)
    os.makedirs(os.path.join(absolute_target, "groups"), exist_ok=True)
    click.echo("Products will go in {}".format(absolute_target))
    absolute_metadata = os.path.abspath(metadata_dir)

    group_metadata = metadata.dataset_metadata_file(absolute_metadata, group)
    click.echo("Loading ontology: {}...".format(ontology))
    ontology_graph = OntologyFactory().create(ontology, ignore_cache=True)

    downloaded_gaf_sources = download_source_gafs(group_metadata, absolute_target, exclusions=exclude, base_download_url=base_download_url, replace_existing_files=not skip_existing_files)

    # extract the titles for the go rules, this is a dictionary comprehension
    rule_metadata = metadata.yamldown_lookup(os.path.join(absolute_metadata, "rules"))
    goref_metadata = metadata.yamldown_lookup(os.path.join(absolute_metadata, "gorefs"))
    
    click.echo("Found {} GO Rules".format(len(rule_metadata.keys())))
    click.echo("Found {} GO_REFs".format(len(goref_metadata.keys())))

    paint_metadata = metadata.dataset_metadata_file(absolute_metadata, "paint")
    noctua_metadata = metadata.dataset_metadata_file(absolute_metadata, "noctua")
    mixin_metadata_list = list(filter(lambda m: m != None, [paint_metadata, noctua_metadata]))

    db_entities = metadata.database_entities(absolute_metadata)
    group_ids = metadata.groups(absolute_metadata)

    gaferences = None
    if gaferencer_file:
        gaferences = gaference.load_gaferencer_inferences_from_file(gaferencer_file)
            
    for dataset_metadata, source_gaf in downloaded_gaf_sources:
        dataset = dataset_metadata["dataset"]
        # Set paint to True when the group is "paint".
        # This will prevent filtering of IBA (GO_RULE:26) when paint is being treated as a top level group, like for paint_other.
        valid_gaf = produce_gaf(dataset, source_gaf, ontology_graph,
            paint=(group=="paint"),
            group=group,
            rule_metadata=rule_metadata,
            goref_metadata=goref_metadata,
            db_entities=db_entities,
            group_idspace=group_ids,
            suppress_rule_reporting_tags=suppress_rule_reporting_tag,
            annotation_inferences=gaferences
            )[0]

        gpi = produce_gpi(dataset, absolute_target, valid_gaf, ontology_graph)

        end_gaf = mixin_a_dataset(valid_gaf, mixin_metadata_list, group_metadata["id"], dataset, absolute_target, ontology_graph, gpipath=gpi, base_download_url=base_download_url, replace_existing_files=not skip_existing_files)
        make_products(dataset, absolute_target, end_gaf, products, ontology_graph)
Esempio n. 2
0
def paint(group, dataset, metadata, target, ontology):
    absolute_metadata = os.path.abspath(metadata)
    absolute_target = os.path.abspath(target)
    os.makedirs(os.path.join(absolute_target, "groups"), exist_ok=True)
    paint_metadata = metadata.dataset_metadata_file(absolute_metadata, "paint")
    paint_src_gaf = check_and_download_mixin_source(paint_metadata, dataset, absolute_target)

    click.echo("Loading ontology: {}...".format(ontology))
    ontology_graph = OntologyFactory().create(ontology)

    gpi_path = os.path.join(absolute_target, "groups", dataset, "{}.gpi".format(dataset))
    click.echo("Using GPI at {}".format(gpi_path))
    paint_gaf = produce_gaf("paint_{}".format(dataset), paint_src_gaf, ontology_graph, gpipath=gpi_path)
Esempio n. 3
0
def produce(ctx, group, metadata_dir, gpad, ttl, target, ontology, exclude,
            base_download_url, suppress_rule_reporting_tag,
            skip_existing_files, gaferencer_file, only_dataset,
            gaf_output_version, rule_set):

    logger.info("Logging is verbose")
    products = {"gaf": True, "gpi": True, "gpad": gpad, "ttl": ttl}
    click.echo("Making products {}.".format(", ".join(
        [key for key in products if products[key]])))
    absolute_target = os.path.abspath(target)
    os.makedirs(os.path.join(absolute_target, "groups"), exist_ok=True)
    click.echo("Products will go in {}".format(absolute_target))
    absolute_metadata = os.path.abspath(metadata_dir)

    group_metadata = metadata.dataset_metadata_file(absolute_metadata, group)
    click.echo("Loading ontology: {}...".format(ontology))
    ontology_graph = OntologyFactory().create(ontology, ignore_cache=True)

    downloaded_gaf_sources = download_source_gafs(
        group_metadata,
        absolute_target,
        exclusions=exclude,
        base_download_url=base_download_url,
        replace_existing_files=not skip_existing_files,
        only_dataset=only_dataset)

    # extract the titles for the go rules, this is a dictionary comprehension
    rule_metadata = metadata.yamldown_lookup(
        os.path.join(absolute_metadata, "rules"))
    goref_metadata = metadata.yamldown_lookup(
        os.path.join(absolute_metadata, "gorefs"))

    click.echo("Found {} GO Rules".format(len(rule_metadata.keys())))
    click.echo("Found {} GO_REFs".format(len(goref_metadata.keys())))

    paint_metadata = metadata.dataset_metadata_file(absolute_metadata, "paint")
    noctua_metadata = metadata.dataset_metadata_file(absolute_metadata,
                                                     "noctua")
    mixin_metadata_list = list(
        filter(lambda m: m != None, [paint_metadata, noctua_metadata]))

    db_entities = metadata.database_entities(absolute_metadata)
    group_ids = metadata.groups(absolute_metadata)
    extensions_constraints = metadata.extensions_constraints_file(
        absolute_metadata)

    gaferences = None
    if gaferencer_file:
        gaferences = gaference.load_gaferencer_inferences_from_file(
            gaferencer_file)

    # Default comes through as single-element tuple
    if rule_set == (assocparser.RuleSet.ALL, ):
        rule_set = assocparser.RuleSet.ALL

    for dataset_metadata, source_gaf in downloaded_gaf_sources:
        dataset = dataset_metadata["dataset"]
        # Set paint to True when the group is "paint".
        # This will prevent filtering of IBA (GO_RULE:26) when paint is being treated as a top level group,
        # like for paint_other.
        valid_gaf = produce_gaf(
            dataset,
            source_gaf,
            ontology_graph,
            paint=(group == "paint"),
            group=group,
            rule_metadata=rule_metadata,
            goref_metadata=goref_metadata,
            db_entities=db_entities,
            group_idspace=group_ids,
            suppress_rule_reporting_tags=suppress_rule_reporting_tag,
            annotation_inferences=gaferences,
            group_metadata=group_metadata,
            extensions_constraints=extensions_constraints,
            rule_contexts=["import"]
            if dataset_metadata.get("import", False) else [],
            gaf_output_version=gaf_output_version,
            rule_set=rule_set)[0]

        gpi = produce_gpi(dataset, absolute_target, valid_gaf, ontology_graph)

        gpi_list = [gpi]
        # Try to find other GPIs in metadata and merge
        for ds in group_metadata["datasets"]:
            # Where type=GPI for the same dataset (e.g. "zfin", "goa_cow")
            if ds["type"] == "gpi" and ds["dataset"] == dataset and ds.get(
                    "source"):
                matching_gpi_path = download_a_dataset_source(
                    group,
                    ds,
                    absolute_target,
                    ds["source"],
                    replace_existing_files=not skip_existing_files)
                if ds.get("compression", None) == "gzip":
                    matching_gpi_path = unzip_simple(matching_gpi_path)
                gpi_list.append(matching_gpi_path)

        end_gaf = mixin_a_dataset(
            valid_gaf,
            mixin_metadata_list,
            group_metadata["id"],
            dataset,
            absolute_target,
            ontology_graph,
            gpipaths=gpi_list,
            base_download_url=base_download_url,
            rule_metadata=rule_metadata,
            replace_existing_files=not skip_existing_files,
            gaf_output_version=gaf_output_version)
        make_products(dataset, absolute_target, end_gaf, products,
                      ontology_graph)