def rule(metadata_dir, out, ontology, gaferencer_file): absolute_metadata = os.path.abspath(metadata_dir) click.echo("Loading ontology: {}...".format(ontology)) ontology_graph = OntologyFactory().create(ontology) goref_metadata = metadata.yamldown_lookup(os.path.join(absolute_metadata, "gorefs")) gorule_metadata = metadata.yamldown_lookup(os.path.join(absolute_metadata, "rules")) click.echo("Found {} GO Rules".format(len(gorule_metadata.keys()))) db_entities = metadata.database_entities(absolute_metadata) group_ids = metadata.groups(absolute_metadata) gaferences = None if gaferencer_file: gaferences = gaference.load_gaferencer_inferences_from_file(gaferencer_file) config = assocparser.AssocParserConfig( ontology=ontology_graph, goref_metadata=goref_metadata, entity_idspaces=db_entities, group_idspace=group_ids, annotation_inferences=gaferences, rule_set=assocparser.RuleSet.ALL ) all_examples_valid = True all_results = [] for rule_id, rule_meta in gorule_metadata.items(): examples = rules.RuleExample.example_from_json(rule_meta) if len(examples) == 0: # skip if there are no examples continue click.echo("==============================================================================") click.echo("Validating {} examples for {}".format(len(examples), rule_id.upper().replace("-", ":"))) results = rules.validate_all_examples(examples, config=config) successes = sum(1 for r in results if r.success) click.echo("\t* {}/{} success".format(successes, len(results))) for r in results: if not r.success: click.echo("\tRule example failed: {}".format(r.reason)) click.echo("\tInput: >> `{}`".format(r.example.input)) all_examples_valid = False all_results += results if out: absolute_out = os.path.abspath(out) os.makedirs(os.path.dirname(absolute_out), exist_ok=True) try: with open(absolute_out, "w") as outfile: json.dump(rules.validation_report(all_results), outfile, indent=4) except Exception as e: raise click.ClickException("Could not write report to {}: ".format(out, e)) if not all_examples_valid: raise click.ClickException("At least one rule example was not validated.")
def produce(group, metadata_dir, gpad, ttl, target, ontology, exclude, base_download_url, suppress_rule_reporting_tag, skip_existing_files, gaferencer_file): products = { "gaf": True, "gpi": True, "gpad": gpad, "ttl": ttl } click.echo("Making products {}.".format(", ".join([key for key in products if products[key]]))) absolute_target = os.path.abspath(target) os.makedirs(os.path.join(absolute_target, "groups"), exist_ok=True) click.echo("Products will go in {}".format(absolute_target)) absolute_metadata = os.path.abspath(metadata_dir) group_metadata = metadata.dataset_metadata_file(absolute_metadata, group) click.echo("Loading ontology: {}...".format(ontology)) ontology_graph = OntologyFactory().create(ontology, ignore_cache=True) downloaded_gaf_sources = download_source_gafs(group_metadata, absolute_target, exclusions=exclude, base_download_url=base_download_url, replace_existing_files=not skip_existing_files) # extract the titles for the go rules, this is a dictionary comprehension rule_metadata = metadata.yamldown_lookup(os.path.join(absolute_metadata, "rules")) goref_metadata = metadata.yamldown_lookup(os.path.join(absolute_metadata, "gorefs")) click.echo("Found {} GO Rules".format(len(rule_metadata.keys()))) click.echo("Found {} GO_REFs".format(len(goref_metadata.keys()))) paint_metadata = metadata.dataset_metadata_file(absolute_metadata, "paint") noctua_metadata = metadata.dataset_metadata_file(absolute_metadata, "noctua") mixin_metadata_list = list(filter(lambda m: m != None, [paint_metadata, noctua_metadata])) db_entities = metadata.database_entities(absolute_metadata) group_ids = metadata.groups(absolute_metadata) gaferences = None if gaferencer_file: gaferences = gaference.load_gaferencer_inferences_from_file(gaferencer_file) for dataset_metadata, source_gaf in downloaded_gaf_sources: dataset = dataset_metadata["dataset"] # Set paint to True when the group is "paint". # This will prevent filtering of IBA (GO_RULE:26) when paint is being treated as a top level group, like for paint_other. valid_gaf = produce_gaf(dataset, source_gaf, ontology_graph, paint=(group=="paint"), group=group, rule_metadata=rule_metadata, goref_metadata=goref_metadata, db_entities=db_entities, group_idspace=group_ids, suppress_rule_reporting_tags=suppress_rule_reporting_tag, annotation_inferences=gaferences )[0] gpi = produce_gpi(dataset, absolute_target, valid_gaf, ontology_graph) end_gaf = mixin_a_dataset(valid_gaf, mixin_metadata_list, group_metadata["id"], dataset, absolute_target, ontology_graph, gpipath=gpi, base_download_url=base_download_url, replace_existing_files=not skip_existing_files) make_products(dataset, absolute_target, end_gaf, products, ontology_graph)
def produce(ctx, group, metadata_dir, gpad, ttl, target, ontology, exclude, base_download_url, suppress_rule_reporting_tag, skip_existing_files, gaferencer_file, only_dataset, gaf_output_version, rule_set): logger.info("Logging is verbose") products = {"gaf": True, "gpi": True, "gpad": gpad, "ttl": ttl} click.echo("Making products {}.".format(", ".join( [key for key in products if products[key]]))) absolute_target = os.path.abspath(target) os.makedirs(os.path.join(absolute_target, "groups"), exist_ok=True) click.echo("Products will go in {}".format(absolute_target)) absolute_metadata = os.path.abspath(metadata_dir) group_metadata = metadata.dataset_metadata_file(absolute_metadata, group) click.echo("Loading ontology: {}...".format(ontology)) ontology_graph = OntologyFactory().create(ontology, ignore_cache=True) downloaded_gaf_sources = download_source_gafs( group_metadata, absolute_target, exclusions=exclude, base_download_url=base_download_url, replace_existing_files=not skip_existing_files, only_dataset=only_dataset) # extract the titles for the go rules, this is a dictionary comprehension rule_metadata = metadata.yamldown_lookup( os.path.join(absolute_metadata, "rules")) goref_metadata = metadata.yamldown_lookup( os.path.join(absolute_metadata, "gorefs")) click.echo("Found {} GO Rules".format(len(rule_metadata.keys()))) click.echo("Found {} GO_REFs".format(len(goref_metadata.keys()))) paint_metadata = metadata.dataset_metadata_file(absolute_metadata, "paint") noctua_metadata = metadata.dataset_metadata_file(absolute_metadata, "noctua") mixin_metadata_list = list( filter(lambda m: m != None, [paint_metadata, noctua_metadata])) db_entities = metadata.database_entities(absolute_metadata) group_ids = metadata.groups(absolute_metadata) extensions_constraints = metadata.extensions_constraints_file( absolute_metadata) gaferences = None if gaferencer_file: gaferences = gaference.load_gaferencer_inferences_from_file( gaferencer_file) # Default comes through as single-element tuple if rule_set == (assocparser.RuleSet.ALL, ): rule_set = assocparser.RuleSet.ALL for dataset_metadata, source_gaf in downloaded_gaf_sources: dataset = dataset_metadata["dataset"] # Set paint to True when the group is "paint". # This will prevent filtering of IBA (GO_RULE:26) when paint is being treated as a top level group, # like for paint_other. valid_gaf = produce_gaf( dataset, source_gaf, ontology_graph, paint=(group == "paint"), group=group, rule_metadata=rule_metadata, goref_metadata=goref_metadata, db_entities=db_entities, group_idspace=group_ids, suppress_rule_reporting_tags=suppress_rule_reporting_tag, annotation_inferences=gaferences, group_metadata=group_metadata, extensions_constraints=extensions_constraints, rule_contexts=["import"] if dataset_metadata.get("import", False) else [], gaf_output_version=gaf_output_version, rule_set=rule_set)[0] gpi = produce_gpi(dataset, absolute_target, valid_gaf, ontology_graph) gpi_list = [gpi] # Try to find other GPIs in metadata and merge for ds in group_metadata["datasets"]: # Where type=GPI for the same dataset (e.g. "zfin", "goa_cow") if ds["type"] == "gpi" and ds["dataset"] == dataset and ds.get( "source"): matching_gpi_path = download_a_dataset_source( group, ds, absolute_target, ds["source"], replace_existing_files=not skip_existing_files) if ds.get("compression", None) == "gzip": matching_gpi_path = unzip_simple(matching_gpi_path) gpi_list.append(matching_gpi_path) end_gaf = mixin_a_dataset( valid_gaf, mixin_metadata_list, group_metadata["id"], dataset, absolute_target, ontology_graph, gpipaths=gpi_list, base_download_url=base_download_url, rule_metadata=rule_metadata, replace_existing_files=not skip_existing_files, gaf_output_version=gaf_output_version) make_products(dataset, absolute_target, end_gaf, products, ontology_graph)