def gpad2gocams(ctx, gpad_path, gpi_path, target, ontology, ttl): # NOTE: Validation on GPAD not included here since it's currently baked into produce() above. # Multi-param to accept multiple ontology files, then merge to one (this will make a much smaller ontology # with only what we need, i.e. GO, RO, GOREL) ontology_graph = OntologyFactory().create(ontology[0], ignore_cache=True) for ont in ontology[1:]: ontology_graph.merge( [OntologyFactory().create(ont, ignore_cache=True)]) parser_config = assocparser.AssocParserConfig(ontology=ontology_graph, gpi_authority_path=gpi_path) extractor = AssocExtractor(gpad_path, parser_config=parser_config) assocs_by_gene = extractor.group_assocs() absolute_target = os.path.abspath(target) gpad_basename = os.path.basename(gpad_path) gpad_basename_root, gpad_ext = os.path.splitext(gpad_basename) output_basename = "{}.nq".format(gpad_basename_root) report_basename = "{}.gocamgen.report".format(gpad_basename_root) output_path = os.path.join(absolute_target, output_basename) report_path = os.path.join(absolute_target, report_basename) builder = GoCamBuilder(parser_config=parser_config) for gene, associations in assocs_by_gene.items(): if ttl: builder.make_model_and_write_out(gene, annotations=associations, output_directory=absolute_target) else: builder.make_model_and_add_to_store(gene, annotations=associations) if not ttl: builder.write_out_store_to_nquads(filepath=output_path) builder.write_report(report_filepath=report_path)
def test_gaf_2_1_simple_terms(): line = ["SGD", "S000000819", "AFG3", "", "GO:0006259", "PMID:8681382|SGD_REF:S000055187", "IMP", "", "P", "Mitochondrial inner membrane m-AAA protease component", "YER017C|AAA family ATPase AFG3|YTA10", "gene", "taxon:559292", "20170428", "SGD"] ontology = OntologyFactory().create("tests/resources/goslim_generic.json") p = GafParser(config=assocparser.AssocParserConfig(ontology=ontology)) p.make_internal_cell_component_closure() parsed = gafparser.to_association(line) assoc = p.upgrade_empty_qualifier(parsed.associations[0]) assert assoc.qualifiers[0] == association.Curie(namespace="RO", identity="0002264") line = ["SGD", "S000000819", "AFG3", "", "GO:0042393", "PMID:8681382|SGD_REF:S000055187", "IMP", "", "P", "Mitochondrial inner membrane m-AAA protease component", "YER017C|AAA family ATPase AFG3|YTA10", "gene", "taxon:559292", "20170428", "SGD"] ontology = OntologyFactory().create("tests/resources/goslim_generic.json") p = GafParser(config=assocparser.AssocParserConfig(ontology=ontology)) p.make_internal_cell_component_closure() parsed = gafparser.to_association(line) assoc = p.upgrade_empty_qualifier(parsed.associations[0]) assert assoc.qualifiers[0] == association.Curie(namespace="RO", identity="0002327") line = ["SGD", "S000000819", "AFG3", "", "GO:0005773", "PMID:8681382|SGD_REF:S000055187", "IMP", "", "P", "Mitochondrial inner membrane m-AAA protease component", "YER017C|AAA family ATPase AFG3|YTA10", "gene", "taxon:559292", "20170428", "SGD"] ontology = OntologyFactory().create("tests/resources/goslim_generic.json") p = GafParser(config=assocparser.AssocParserConfig(ontology=ontology)) p.make_internal_cell_component_closure() parsed = gafparser.to_association(line) assoc = p.upgrade_empty_qualifier(parsed.associations[0]) assert assoc.qualifiers[0] == association.Curie(namespace="RO", identity="0001025")
def setup_ontologies(go_ontology=None, ro_ontology=None): global GO_ONTOLOGY if go_ontology is None: GO_ONTOLOGY = OntologyFactory().create("go") else: GO_ONTOLOGY = go_ontology global RO_ONTOLOGY if ro_ontology is None: RO_ONTOLOGY = OntologyFactory().create( "http://purl.obolibrary.org/obo/ro.owl") else: RO_ONTOLOGY = ro_ontology
def __init__(self): self.ro_ontology = OntologyFactory().create( "http://purl.obolibrary.org/obo/ro.owl") self.gorel_ontology = OntologyFactory().create( "http://release.geneontology.org/2019-03-18/ontology/extensions/gorel.obo" ) # Can't get logical_definitions w/ ont.create("go"), need to load ontology via PURL self.go_ontology = OntologyFactory().create( "http://purl.obolibrary.org/obo/go.owl") self.ext_mapper = ExtensionsMapper(go_ontology=self.go_ontology, ro_ontology=self.ro_ontology) self.store = plugin.get('IOMemory', Store)()
def test_semsearch(): afa = AssociationSetFactory() f = POMBASE ont = OntologyFactory().create(ONT) parser = GafParser() assocs = parser.parse(POMBASE, skipheader=True) assocs = [a for a in assocs if a['subject']['label'] in GENES] aset = afa.create_from_assocs(assocs, ontology=ont) ont = aset.subontology() aset.ontology = ont logging.info('Genes={} Terms={}'.format(len(aset.subjects), len(ont.nodes()))) print('STATS={}'.format(aset.as_dataframe().describe())) #genes = aset.subjects[0:5] sse = SemSearchEngine(assocmodel=aset) logging.info('Calculating all MICAs') sse.calculate_all_micas() #h5path = 'tests/resources/mica_ic.h5' #logging.info('Saving to {}'.format(h5path)) #sse.mica_ic_df.to_hdf(h5path, key='mica_ic', mode='w') #logging.info('Saved to {}'.format(h5path)) logging.info('Doing pairwise') for i in aset.subjects: for j in aset.subjects: sim = sse.pw_score_cosine(i, j) #print('{} x {} = {}'.format(i,j,sim)) if i == j: assert (sim > 0.9999) tups = sse.pw_score_resnik_bestmatches(i, j) print('{} x {} = {} // {}'.format(i, j, sim, tups))
def test_obsolete_term_repair_withfrom(): vals = ["ZFIN", "ZFIN:ZDB-GENE-980526-362", "acts_upstream_of_or_within", "GO:0007155", "PMID:15494018", "ECO:0000305", "GO:0005913|GO:1,GO:4|ZFIN:ZDB-MRPHLNO-010101-1,MGI:1232453", "", "20041026", "ZFIN", "", "contributor=GOC:zfin_curators|model-state=production|noctua-model-id=gomodel:ZFIN_ZDB-GENE-980526-362" ] ont = OntologyFactory().create(ALT_ID_ONT) config = assocparser.AssocParserConfig(ontology=ont, rule_set=assocparser.RuleSet.ALL) parser = GpadParser(config=config) result = parser.parse_line("\t".join(vals)) assoc = result.associations[0] # GO:0005913 should be repaired to its replacement term, GO:00005912 assert [ConjunctiveSet(elements=[Curie(namespace='GO', identity='0005912')]), # repaired test GO elements ConjunctiveSet(elements=[Curie(namespace='GO', identity='2'), Curie(namespace='GO', identity='3')]), # non GO elements stay the same, could be obsolete or not ConjunctiveSet(elements=[Curie(namespace='ZFIN', identity='ZDB-MRPHLNO-010101-1'), Curie(namespace='MGI', identity='1232453')])] == assoc.evidence.with_support_from
def test_local_json_parse(): """ Load ontology from JSON """ factory = OntologyFactory() print("Creating ont") ont = factory.create('tests/resources/pato.json') ploidy = ont.node(PLOIDY) print("PLOIDY: {}".format(ploidy)) assert ont.label(PLOIDY) == 'ploidy' # exact match search_results = ont.search('shape') print("SEARCH (exact): {}".format(search_results)) assert [SHAPE] == search_results # implicit regexp search_results = ont.search('%shape%') print("SEARCH (re, implicit): {}".format(search_results)) assert SHAPE in search_results assert len(search_results) > 10 # explicit regexp search_results = ont.search('.*shape.*', is_regex=True) print("SEARCH (re, explicit): {}".format(search_results)) assert SHAPE in search_results assert len(search_results) > 10 num_nodes = 0 for n in ont.nodes(): num_nodes = num_nodes + 1 assert num_nodes > 100
def load_associations(self, taxon) -> None: taxon_map = { 'human': 'NCBITaxon:9606', 'mouse': 'NCBITaxon:10090', } ofactory = OntologyFactory() self.ontology = ofactory.create(self.ont) p = GafParser() url = '' if self.ont == 'go': # CX: GO:0008150 is biological_process, GO:0003674 is molecular_function. # CX: These are 2 out of 3 top-level terms in GO ontology. # CX: The excluded term is cellular_component (where gene carries out a molecular function) go_roots = set(self.ontology.descendants('GO:0008150') + self.ontology.descendants('GO:0003674')) sub_ont = self.ontology.subontology(go_roots) if taxon == 'mouse': url = "http://current.geneontology.org/annotations/mgi.gaf.gz" if taxon == 'human': url = "http://current.geneontology.org/annotations/goa_human.gaf.gz" assocs = p.parse(url) self.assocs = assocs assocs = [x for x in assocs if 'header' not in x.keys()] assocs = [x for x in assocs if x['object']['id'] in go_roots] self.associations = self.afactory.create_from_assocs(assocs, ontology=sub_ont) else: self.associations = \ self.afactory.create( ontology=self.ontology, subject_category='gene', object_category='phenotype', taxon=taxon_map[taxon] )
def get(self, ontology, node): """ Extract a subgraph from an ontology """ args = parser.parse_args() qnodes = [node] if args.cnode is not None: qnodes += args.cnode factory = OntologyFactory() ont = get_ontology(ontology) #subont = ont.subontology([id], relations=args.relations) relations = args.relation print("Traversing: {} using {}".format(qnodes,relations)) nodes = ont.traverse_nodes(qnodes, up=True, down=False, relations=relations) subont = ont.subontology(nodes, relations=relations) ojr = OboJsonGraphRenderer(include_meta=args.include_meta) json_obj = ojr.to_json(subont) # TODO: remove this next release of ontobio if not args.include_meta: for g in json_obj['graphs']: for n in g['nodes']: n['meta']={} return json_obj
def get(self, ontology, node): """ Extract a subgraph from an ontology """ args = parser.parse_args() ids = [node] if args.cnode is not None: ids += args.cnode factory = OntologyFactory() ont = factory.create(ontology) g = ont.get_filtered_graph(relations=args.relation) nodes = set() dirn = 'du' for id in ids: nodes.add(id) # NOTE: we use direct networkx methods as we have already extracted # the subgraph we want if dirn.find("u") > -1: nodes.update(nx.ancestors(g, id)) if dirn.find("d") > -1: nodes.update(nx.descendants(g, id)) subg = g.subgraph(nodes) ojr = OboJsonGraphRenderer() json_obj = ojr.to_json(subg) return json_obj
def test_no_flag_valid_id(): ont = OntologyFactory().create(ONT) p = GafParser() p.config.ontology = ont p._validate_ontology_class_id( "GO:0000785", assocparser.SplitLine("fake", [""] * 17, taxon="foo")) assert len(p.report.messages) == 0
def run_phenolog(ont, aset, args): """ Like run_enrichment_test, but uses classes from a 2nd ontology/assocset to build the gene set. """ ofactory = OntologyFactory() ont2 = ofactory.create(args.resource2) afactory = AssociationSetFactory() aset2 = afactory.create(ontology=ont2, file=args.file2) # only test for genes (or other subjects of statements) in common common = set(aset.subjects).intersection(aset2.subjects) num_common = len(common) logging.info("Genes in common between two KBs: {}/\{} = {}".format( len(aset.subjects), len(aset2.subjects), num_common)) if num_common < 2: logging.error("TOO FEW") return None for n in aset.ontology.nodes(): nl = ont.label(n, id_if_null=True) genes = aset.query([n]) num_genes = len(genes) if num_genes > 2: logging.info("BASE: {} {} num={}".format(n, nl, num_genes)) enr = aset2.enrichment_test(subjects=genes, background=aset2.subjects, labels=True) for r in enr: print("{:8.3g} {} {:20s} <-> {} {:20s}".format( r['p'], n, nl, r['c'], str(r['n'])))
def test_remote_sparql(): """ reconstitution test """ factory = OntologyFactory() # default method is sparql ont = factory.create('pato') g = ont.get_graph() info = g.node[PLOIDY] print(str(info)) nodes = g.nodes() print(len(nodes)) assert len(nodes) > 100 nbrs = g.successors(PLOIDY) print("SUCC:" + str(nbrs)) parents = g.predecessors(PLOIDY) print("PRED:" + str(parents)) assert parents == ['PATO:0001396'] ancs = ancestors(g, PLOIDY) print("ANCS:" + str(ancs)) assert 'PATO:0000001' in ancs print(g) Q = ['.*shape.*'] w = GraphRenderer.create('tree') shapes1 = ont.resolve_names(Q, is_regex=True, is_remote=False) print("SHAPE Q:" + str(shapes1)) show_nodes(w, ont, shapes1) assert Y_SHAPED in shapes1 shapes2 = ont.resolve_names(Q, is_regex=True, is_remote=True) print("SHAPE Q:" + str(shapes2)) show_nodes(w, ont, shapes2) assert Y_SHAPED in shapes2
def test_learn(): ont = OntologyFactory().create(ONT) tont = OntologyFactory().create(TONT) afa = AssociationSetFactory() aset = afa.create_from_file(file=GAF, ontology=ont) taset = afa.create_from_file(file=TGAF, ontology=tont) learner = ol.OntologyLearner(assocs=aset, target_assocs=taset, score_threshold=0.6) print('L={}'.format(learner)) print('L.assocs={}'.format(learner.assocs)) print('L.tassocs={}'.format(learner.target_assocs)) with open('target/pheno_index.md', 'w') as file: learner.fit_all(reportfile=file) print('L.targets={}'.format(learner.targets))
def get_scigraph_nodes(id_list) -> Iterator[Dict]: """ Queries scigraph neighbors to get a list of nodes back We use the scigraph neighbors function because ids can be sent in batch which is faster than iteratively querying solr search or the scigraph graph/id function :return: json decoded result from scigraph_ontology._neighbors_graph :raises ValueError: If id is not in scigraph """ scigraph = OntologyFactory().create('scigraph:data') chunks = [id_list[i:i + 400] for i in range(0, len(list(id_list)), 400)] for chunk in chunks: params = {'id': chunk, 'depth': 0} try: result_graph = scigraph._neighbors_graph(**params) for node in result_graph['nodes']: yield node except JSONDecodeError as exception: # Assume json decode is due to an incorrect class ID # Should we handle this? raise ValueError(exception.doc)
def retrieve_associations(self, ont, group): taxon_map = { 'human': 'NCBITaxon:9606', 'mouse': 'NCBITaxon:10090', } ofactory = OntologyFactory() self.ontology = ofactory.create(ont) p = GafParser() url = '' if ont == 'go': go_roots = set( self.ontology.descendants('GO:0008150') + self.ontology.descendants('GO:0003674')) sub_ont = self.ontology.subontology(go_roots) if group == 'mouse': url = "http://current.geneontology.org/annotations/mgi.gaf.gz" if group == 'human': url = "http://current.geneontology.org/annotations/goa_human.gaf.gz" assocs = p.parse('goa_human.gaf.gz') #assocs = p.parse(url) self.assocs = assocs assocs = [x for x in assocs if 'header' not in x.keys()] assocs = [x for x in assocs if x['object']['id'] in go_roots] self.associations = self.afactory.create_from_assocs( assocs, ontology=sub_ont) else: self.associations = self.afactory.create( ontology=self.ontology, subject_category='gene', object_category='phenotype', taxon=taxon_map[group])
def test_factory(): afa = AssociationSetFactory() ont = OntologyFactory().create(ONT) aset = afa.create_from_file(POMBASE, ontology=ont, skim=False) found = 0 for s in aset.subjects: print('{} {}'.format(s, aset.label(s))) for c in aset.annotations(s): print(' {} {}'.format(c, ont.label(c))) for a in aset.associations(s, c): e = a['evidence'] print(' {} {} {}'.format(e['type'], e['with_support_from'], e['has_supporting_reference'])) if s == 'PomBase:SPBC2D10.10c' and c == 'GO:0005730': if e['type'] == 'ISO': if e['with_support_from'] == [ 'SGD:S000002172' ] and e['has_supporting_reference'] == [ 'GO_REF:0000024' ]: found += 1 logging.info('** FOUND: {}'.format(a)) if e['type'] == 'IDA': if e['has_supporting_reference'] == ['PMID:16823372']: found += 1 logging.info('** FOUND: {}'.format(a)) assert len(aset.associations_by_subj) > 0 assert found == 2
def test_aspect_fill_for_obsolete_terms(): # Test null aspect on an obsolete term # GO:4 is obsolete and has no aspect (hasOBONamespace) in obsolete.json ontology # GO:3 is it's replacement term # Note that GPAD lines contain no aspect data vals = [ "MGI", "MGI:105128", "involved_in", "GO:4", "PMID:25901318", "ECO:0000314", "", "", "20190517", "MGI", "", "contributor=http://orcid.org/0000-0002-9796-7693|model-state=production|noctua-model-id=gomodel:5c4605cc00004132" ] ont = OntologyFactory().create(ALT_ID_ONT) config = assocparser.AssocParserConfig(ontology=ont, rule_set=assocparser.RuleSet.ALL) parser = GpadParser(config=config) result = parser.parse_line("\t".join(vals)) assoc = result.associations[0] assert assoc.object.id == Curie("GO", "3") # GO:4 should be repaired to its replacement term, GO:3 assert assoc.aspect == 'P' # Aspect should not be empty
def load_slim(self, module: Module, slim_url: str, slim_cache_path: str): if slim_url and slim_cache_path: relations = None if module == Module.GO: relations = self.go_relations elif module == Module.DO_EXPERIMENTAL: relations = self.do_relations elif module == Module.EXPRESSION: relations = None slim_onto = OntologyFactory().create( self._get_cached_file(file_source_url=slim_url, cache_path=slim_cache_path)).subontology( relations=relations) slim_set = set([ node for node in slim_onto.nodes() if "type" in slim_onto.node(node) and slim_onto.node(node)["type"] == "CLASS" ]) if module == Module.GO: logger.info("Setting GO Slim") self.go_slim = slim_set elif module == Module.DO_EXPERIMENTAL: logger.info("Setting DO Slim") self.do_slim = slim_set elif module == Module.EXPRESSION: logger.info("Setting Expression Slim") self.exp_slim = slim_set
def produce(group, metadata, gpad, ttl, target, ontology, exclude): products = {"gaf": True, "gpi": True, "gpad": gpad, "ttl": ttl} click.echo("Making products {}.".format(", ".join( [key for key in products if products[key]]))) absolute_target = os.path.abspath(target) os.makedirs(os.path.join(absolute_target, "groups"), exist_ok=True) click.echo("Products will go in {}".format(absolute_target)) absolute_metadata = os.path.abspath(metadata) group_metadata = metadata_file(absolute_metadata, group) click.echo("Loading ontology: {}...".format(ontology)) ontology_graph = OntologyFactory().create(ontology) source_gaf_zips = download_source_gafs(group_metadata, absolute_target, exclusions=exclude) source_gafs = { zip_path: os.path.join(os.path.split(zip_path)[0], "{}-src.gaf".format(dataset)) for dataset, zip_path in source_gaf_zips.items() } for source_zip, source_gaf in source_gafs.items(): unzip(source_zip, source_gaf) paint_metadata = metadata_file(absolute_metadata, "paint") for dataset in source_gaf_zips.keys(): gafzip = source_gaf_zips[dataset] source_gaf = source_gafs[gafzip] # TODO (Fix as part of https://github.com/geneontology/go-site/issues/642) Set paint to True when the group is "paint". # This will prevent filtering of IBA (GO_RULE:26) when paint is being treated as a top level group, like for paint_other. valid_gaf = produce_gaf(dataset, source_gaf, ontology_graph, paint=(group == "paint"))[0] gpi = produce_gpi(dataset, absolute_target, valid_gaf, ontology_graph) paint_src_gaf = check_and_download_paint_source( paint_metadata, group_metadata["id"], dataset, absolute_target) end_gaf = valid_gaf if paint_src_gaf is not None: paint_gaf = produce_gaf("paint_{}".format(dataset), paint_src_gaf, ontology_graph, gpipath=gpi, paint=True)[0] end_gaf = merge_mod_and_paint(valid_gaf, paint_gaf) else: gafgz = "{}.gz".format(valid_gaf) os.rename( gafgz, os.path.join( os.path.split(gafgz)[0], "{}.gaf.gz".format(dataset))) make_products(dataset, absolute_target, end_gaf, products, ontology_graph)
def test_one_line(): p = GafParser(config=assocparser.AssocParserConfig( ontology=OntologyFactory().create( "tests/resources/goslim_generic.json"))) parsed = p.parse_line( "PomBase SPBC16D10.09 pcn1 GO:0009536 PMID:8663159 IDA C PCNA pcn protein taxon:4896 20150326 PomBase" )
def test_obsolete_replair_of_withfrom(): p = GafParser(config=assocparser.AssocParserConfig( ontology=OntologyFactory().create(OBSOLETE_ONT))) assocs = p.parse(open(ZFIN_GAF, "r"), skipheader=True) assert assocs[0].evidence.with_support_from == [ ConjunctiveSet(elements=[Curie(namespace='GO', identity='0005912')]) ] # Reset parser report p = GafParser(config=assocparser.AssocParserConfig( ontology=OntologyFactory().create(OBSOLETE_ONT))) p.version = "2.2" obsolete_no_replacement_line = "FB\tFBgn0003334\tScm\tlocated_in\tGO:0005634\tFB:FBrf0179383|PMID:15280237\tIC\tGO:0016458\tC\tSex comb on midleg\tCG9495|SCM|Sex Comb on Midleg|Sex Comb on the Midleg|Sex combs on midleg|Sex combs on midlegs|Su(z)302|l(3)85Ef|scm|sex comb on midleg\tprotein\ttaxon:7227\t20050203\tUniProt\t\t" assoc_result = p.parse_line(obsolete_no_replacement_line) assert assoc_result.associations == [] assert p.report.to_report_json( )["messages"]["gorule-0000020"][0]["obj"] == "GO:0016458"
def test_gaf_2_1_upconvert_in_parse(): gaf = io.StringIO("!gaf-version: 2.1\nSGD\tS000000819\tAFG3\t\tGO:0005840\tPMID:8681382|SGD_REF:S000055187\tIMP\t\tP\tMitochondrial inner membrane m-AAA protease component\tYER017C|AAA family ATPase AFG3|YTA10\tgene\ttaxon:559292\t20170428\tSGD") ontology = OntologyFactory().create("tests/resources/goslim_generic.json") p = GafParser(config=assocparser.AssocParserConfig(ontology=ontology)) # We're 2.1, qualifier blank, cell component term from above, ontology defined: should upgrade assocs = p.parse(gaf, skipheader=True) assert assocs[0].relation == association.Curie(namespace="BFO", identity="0000050")
def get(self): """ Extract a subgraph from an ontology """ factory = OntologyFactory() ont = factory.create() z = get_db() return {'z': z, 'test': len(ont.nodes())}
def rule(metadata_dir, out, ontology, gaferencer_file): absolute_metadata = os.path.abspath(metadata_dir) click.echo("Loading ontology: {}...".format(ontology)) ontology_graph = OntologyFactory().create(ontology) goref_metadata = metadata.yamldown_lookup(os.path.join(absolute_metadata, "gorefs")) gorule_metadata = metadata.yamldown_lookup(os.path.join(absolute_metadata, "rules")) click.echo("Found {} GO Rules".format(len(gorule_metadata.keys()))) db_entities = metadata.database_entities(absolute_metadata) group_ids = metadata.groups(absolute_metadata) gaferences = None if gaferencer_file: gaferences = gaference.load_gaferencer_inferences_from_file(gaferencer_file) config = assocparser.AssocParserConfig( ontology=ontology_graph, goref_metadata=goref_metadata, entity_idspaces=db_entities, group_idspace=group_ids, annotation_inferences=gaferences, rule_set=assocparser.RuleSet.ALL ) all_examples_valid = True all_results = [] for rule_id, rule_meta in gorule_metadata.items(): examples = rules.RuleExample.example_from_json(rule_meta) if len(examples) == 0: # skip if there are no examples continue click.echo("==============================================================================") click.echo("Validating {} examples for {}".format(len(examples), rule_id.upper().replace("-", ":"))) results = rules.validate_all_examples(examples, config=config) successes = sum(1 for r in results if r.success) click.echo("\t* {}/{} success".format(successes, len(results))) for r in results: if not r.success: click.echo("\tRule example failed: {}".format(r.reason)) click.echo("\tInput: >> `{}`".format(r.example.input)) all_examples_valid = False all_results += results if out: absolute_out = os.path.abspath(out) os.makedirs(os.path.dirname(absolute_out), exist_ok=True) try: with open(absolute_out, "w") as outfile: json.dump(rules.validation_report(all_results), outfile, indent=4) except Exception as e: raise click.ClickException("Could not write report to {}: ".format(out, e)) if not all_examples_valid: raise click.ClickException("At least one rule example was not validated.")
def test_write(): """ write obo from json """ factory = OntologyFactory() print("Creating ont") ont = factory.create('tests/resources/nucleus.json') w = GraphRenderer.create('obo') w.write(ont)
def produce(group, metadata_dir, gpad, ttl, target, ontology, exclude, base_download_url, suppress_rule_reporting_tag, skip_existing_files, gaferencer_file): products = { "gaf": True, "gpi": True, "gpad": gpad, "ttl": ttl } click.echo("Making products {}.".format(", ".join([key for key in products if products[key]]))) absolute_target = os.path.abspath(target) os.makedirs(os.path.join(absolute_target, "groups"), exist_ok=True) click.echo("Products will go in {}".format(absolute_target)) absolute_metadata = os.path.abspath(metadata_dir) group_metadata = metadata.dataset_metadata_file(absolute_metadata, group) click.echo("Loading ontology: {}...".format(ontology)) ontology_graph = OntologyFactory().create(ontology, ignore_cache=True) downloaded_gaf_sources = download_source_gafs(group_metadata, absolute_target, exclusions=exclude, base_download_url=base_download_url, replace_existing_files=not skip_existing_files) # extract the titles for the go rules, this is a dictionary comprehension rule_metadata = metadata.yamldown_lookup(os.path.join(absolute_metadata, "rules")) goref_metadata = metadata.yamldown_lookup(os.path.join(absolute_metadata, "gorefs")) click.echo("Found {} GO Rules".format(len(rule_metadata.keys()))) click.echo("Found {} GO_REFs".format(len(goref_metadata.keys()))) paint_metadata = metadata.dataset_metadata_file(absolute_metadata, "paint") noctua_metadata = metadata.dataset_metadata_file(absolute_metadata, "noctua") mixin_metadata_list = list(filter(lambda m: m != None, [paint_metadata, noctua_metadata])) db_entities = metadata.database_entities(absolute_metadata) group_ids = metadata.groups(absolute_metadata) gaferences = None if gaferencer_file: gaferences = gaference.load_gaferencer_inferences_from_file(gaferencer_file) for dataset_metadata, source_gaf in downloaded_gaf_sources: dataset = dataset_metadata["dataset"] # Set paint to True when the group is "paint". # This will prevent filtering of IBA (GO_RULE:26) when paint is being treated as a top level group, like for paint_other. valid_gaf = produce_gaf(dataset, source_gaf, ontology_graph, paint=(group=="paint"), group=group, rule_metadata=rule_metadata, goref_metadata=goref_metadata, db_entities=db_entities, group_idspace=group_ids, suppress_rule_reporting_tags=suppress_rule_reporting_tag, annotation_inferences=gaferences )[0] gpi = produce_gpi(dataset, absolute_target, valid_gaf, ontology_graph) end_gaf = mixin_a_dataset(valid_gaf, mixin_metadata_list, group_metadata["id"], dataset, absolute_target, ontology_graph, gpipath=gpi, base_download_url=base_download_url, replace_existing_files=not skip_existing_files) make_products(dataset, absolute_target, end_gaf, products, ontology_graph)
def test_create_from_file_no_fmt(): """ Test loading from gaf while setting fmt to None """ ont = OntologyFactory().create('go') f = AssociationSetFactory() aset = f.create(ontology=ont, fmt=None, file=POMBASE) print("SUBJS: {}".format(aset.subjects)) assert len(aset.subjects) > 100
def test_alt_id_repair(): p = GafParser() ont = OntologyFactory().create(ALT_ID_ONT) p.config.ecomap = EcoMap() p.config.ontology = ont gaf = io.StringIO("SGD\tS000000819\tAFG3\t\tGO:0043623\tPMID:8681382|SGD_REF:S000055187\tIMP\t\tP\tMitochondrial inner membrane m-AAA protease component\tYER017C|AAA family ATPase AFG3|YTA10\tgene\ttaxon:559292\t20170428\tSGD") assocs = p.parse(gaf, skipheader=True) assert len(assocs) > 0 assert assocs[0]["object"]["id"] == "GO:0043623"
def test_invalid_goid_in_gpad(): # Note: this ontology is a subset of GO extracted using the GAF, not GPAD p = GpadParser() p.config.ontology = OntologyFactory().create(ONT) results = p.parse(open(POMBASE_GPAD, "r"), skipheader=True) # we expect errors since ONT is not tuned for the GPAD file # for m in p.report.messages: # print("MESSAGE: {}".format(m)) assert len(p.report.messages) > 500 print(p.report.to_markdown())