Ejemplo n.º 1
0
def gpad2gocams(ctx, gpad_path, gpi_path, target, ontology, ttl):
    # NOTE: Validation on GPAD not included here since it's currently baked into produce() above.
    # Multi-param to accept multiple ontology files, then merge to one (this will make a much smaller ontology
    #  with only what we need, i.e. GO, RO, GOREL)
    ontology_graph = OntologyFactory().create(ontology[0], ignore_cache=True)
    for ont in ontology[1:]:
        ontology_graph.merge(
            [OntologyFactory().create(ont, ignore_cache=True)])
    parser_config = assocparser.AssocParserConfig(ontology=ontology_graph,
                                                  gpi_authority_path=gpi_path)
    extractor = AssocExtractor(gpad_path, parser_config=parser_config)
    assocs_by_gene = extractor.group_assocs()

    absolute_target = os.path.abspath(target)
    gpad_basename = os.path.basename(gpad_path)
    gpad_basename_root, gpad_ext = os.path.splitext(gpad_basename)
    output_basename = "{}.nq".format(gpad_basename_root)
    report_basename = "{}.gocamgen.report".format(gpad_basename_root)
    output_path = os.path.join(absolute_target, output_basename)
    report_path = os.path.join(absolute_target, report_basename)

    builder = GoCamBuilder(parser_config=parser_config)

    for gene, associations in assocs_by_gene.items():
        if ttl:
            builder.make_model_and_write_out(gene,
                                             annotations=associations,
                                             output_directory=absolute_target)
        else:
            builder.make_model_and_add_to_store(gene, annotations=associations)
    if not ttl:
        builder.write_out_store_to_nquads(filepath=output_path)

    builder.write_report(report_filepath=report_path)
Ejemplo n.º 2
0
def test_gaf_2_1_simple_terms():
    line = ["SGD", "S000000819", "AFG3", "", "GO:0006259", "PMID:8681382|SGD_REF:S000055187", "IMP", "", "P", "Mitochondrial inner membrane m-AAA protease component", "YER017C|AAA family ATPase AFG3|YTA10", "gene", "taxon:559292", "20170428", "SGD"]
    ontology = OntologyFactory().create("tests/resources/goslim_generic.json")
    p = GafParser(config=assocparser.AssocParserConfig(ontology=ontology))
    p.make_internal_cell_component_closure()

    parsed = gafparser.to_association(line)
    assoc = p.upgrade_empty_qualifier(parsed.associations[0])
    assert assoc.qualifiers[0] == association.Curie(namespace="RO", identity="0002264")

    line = ["SGD", "S000000819", "AFG3", "", "GO:0042393", "PMID:8681382|SGD_REF:S000055187", "IMP", "", "P",
            "Mitochondrial inner membrane m-AAA protease component", "YER017C|AAA family ATPase AFG3|YTA10", "gene",
            "taxon:559292", "20170428", "SGD"]
    ontology = OntologyFactory().create("tests/resources/goslim_generic.json")
    p = GafParser(config=assocparser.AssocParserConfig(ontology=ontology))
    p.make_internal_cell_component_closure()

    parsed = gafparser.to_association(line)
    assoc = p.upgrade_empty_qualifier(parsed.associations[0])
    assert assoc.qualifiers[0] == association.Curie(namespace="RO", identity="0002327")

    line = ["SGD", "S000000819", "AFG3", "", "GO:0005773", "PMID:8681382|SGD_REF:S000055187", "IMP", "", "P",
            "Mitochondrial inner membrane m-AAA protease component", "YER017C|AAA family ATPase AFG3|YTA10", "gene",
            "taxon:559292", "20170428", "SGD"]
    ontology = OntologyFactory().create("tests/resources/goslim_generic.json")
    p = GafParser(config=assocparser.AssocParserConfig(ontology=ontology))
    p.make_internal_cell_component_closure()

    parsed = gafparser.to_association(line)
    assoc = p.upgrade_empty_qualifier(parsed.associations[0])
    assert assoc.qualifiers[0] == association.Curie(namespace="RO", identity="0001025")
Ejemplo n.º 3
0
def run_phenolog(ont, aset, args):
    """
    Like run_enrichment_test, but uses classes from a 2nd ontology/assocset to build the gene set.
    """
    ofactory = OntologyFactory()
    ont2 = ofactory.create(args.resource2)

    afactory = AssociationSetFactory()
    aset2 = afactory.create(ontology=ont2, file=args.file2)

    # only test for genes (or other subjects of statements) in common
    common = set(aset.subjects).intersection(aset2.subjects)
    num_common = len(common)
    logging.info("Genes in common between two KBs: {}/\{} = {}".format(
        len(aset.subjects), len(aset2.subjects), num_common))
    if num_common < 2:
        logging.error("TOO FEW")
        return None
    for n in aset.ontology.nodes():
        nl = ont.label(n, id_if_null=True)
        genes = aset.query([n])
        num_genes = len(genes)
        if num_genes > 2:
            logging.info("BASE: {} {} num={}".format(n, nl, num_genes))
            enr = aset2.enrichment_test(subjects=genes,
                                        background=aset2.subjects,
                                        labels=True)
            for r in enr:
                print("{:8.3g} {} {:20s} <-> {} {:20s}".format(
                    r['p'], n, nl, r['c'], str(r['n'])))
 def load_associations(self, taxon) -> None:
     taxon_map = {
         'human': 'NCBITaxon:9606',
         'mouse': 'NCBITaxon:10090',
     }
     ofactory = OntologyFactory()
     self.ontology = ofactory.create(self.ont)
     p = GafParser()
     url = ''
     if self.ont == 'go':
         # CX: GO:0008150 is biological_process, GO:0003674 is molecular_function. 
         # CX: These are 2 out of 3 top-level terms in GO ontology. 
         # CX: The excluded term is cellular_component (where gene carries out a molecular function)
         go_roots = set(self.ontology.descendants('GO:0008150') + self.ontology.descendants('GO:0003674'))
         sub_ont = self.ontology.subontology(go_roots)
         if taxon == 'mouse':
             url = "http://current.geneontology.org/annotations/mgi.gaf.gz"
         if taxon == 'human':
             url = "http://current.geneontology.org/annotations/goa_human.gaf.gz"
         assocs = p.parse(url)
         self.assocs = assocs
         assocs = [x for x in assocs if 'header' not in x.keys()]
         assocs = [x for x in assocs if x['object']['id'] in go_roots]
         self.associations = self.afactory.create_from_assocs(assocs, ontology=sub_ont)
     else:
         self.associations = \
             self.afactory.create(
                     ontology=self.ontology,
                     subject_category='gene',
                     object_category='phenotype',
                     taxon=taxon_map[taxon]
         )
Ejemplo n.º 5
0
    def get(self, ontology, node):
        """
        Extract a subgraph from an ontology
        """
        args = parser.parse_args()

        ids = [node]
        if args.cnode is not None:
            ids += args.cnode

        factory = OntologyFactory()
        ont = factory.create(ontology)
        g = ont.get_filtered_graph(relations=args.relation)

        nodes = set()

        dirn = 'du'
        for id in ids:
            nodes.add(id)
            # NOTE: we use direct networkx methods as we have already extracted
            # the subgraph we want
            if dirn.find("u") > -1:
                nodes.update(nx.ancestors(g, id))
            if dirn.find("d") > -1:
                nodes.update(nx.descendants(g, id))
        subg = g.subgraph(nodes)
        ojr = OboJsonGraphRenderer()
        json_obj = ojr.to_json(subg)
        return json_obj
Ejemplo n.º 6
0
def test_factory():
    afa = AssociationSetFactory()
    ont = OntologyFactory().create(ONT)
    aset = afa.create_from_file(POMBASE, ontology=ont, skim=False)

    found = 0
    for s in aset.subjects:
        print('{} {}'.format(s, aset.label(s)))
        for c in aset.annotations(s):
            print('  {} {}'.format(c, ont.label(c)))
            for a in aset.associations(s, c):
                e = a['evidence']
                print('    {} {} {}'.format(e['type'], e['with_support_from'],
                                            e['has_supporting_reference']))
                if s == 'PomBase:SPBC2D10.10c' and c == 'GO:0005730':
                    if e['type'] == 'ISO':
                        if e['with_support_from'] == [
                                'SGD:S000002172'
                        ] and e['has_supporting_reference'] == [
                                'GO_REF:0000024'
                        ]:
                            found += 1
                            logging.info('** FOUND: {}'.format(a))
                    if e['type'] == 'IDA':
                        if e['has_supporting_reference'] == ['PMID:16823372']:
                            found += 1
                            logging.info('** FOUND: {}'.format(a))

    assert len(aset.associations_by_subj) > 0
    assert found == 2
Ejemplo n.º 7
0
 def retrieve_associations(self, ont, group):
     taxon_map = {
         'human': 'NCBITaxon:9606',
         'mouse': 'NCBITaxon:10090',
     }
     ofactory = OntologyFactory()
     self.ontology = ofactory.create(ont)
     p = GafParser()
     url = ''
     if ont == 'go':
         go_roots = set(
             self.ontology.descendants('GO:0008150') +
             self.ontology.descendants('GO:0003674'))
         sub_ont = self.ontology.subontology(go_roots)
         if group == 'mouse':
             url = "http://current.geneontology.org/annotations/mgi.gaf.gz"
         if group == 'human':
             url = "http://current.geneontology.org/annotations/goa_human.gaf.gz"
         assocs = p.parse('goa_human.gaf.gz')
         #assocs = p.parse(url)
         self.assocs = assocs
         assocs = [x for x in assocs if 'header' not in x.keys()]
         assocs = [x for x in assocs if x['object']['id'] in go_roots]
         self.associations = self.afactory.create_from_assocs(
             assocs, ontology=sub_ont)
     else:
         self.associations = self.afactory.create(
             ontology=self.ontology,
             subject_category='gene',
             object_category='phenotype',
             taxon=taxon_map[group])
Ejemplo n.º 8
0
def test_semsearch():
    afa = AssociationSetFactory()
    f = POMBASE
    ont = OntologyFactory().create(ONT)
    parser = GafParser()
    assocs = parser.parse(POMBASE, skipheader=True)
    assocs = [a for a in assocs if a['subject']['label'] in GENES]
    aset = afa.create_from_assocs(assocs, ontology=ont)
    ont = aset.subontology()
    aset.ontology = ont
    logging.info('Genes={} Terms={}'.format(len(aset.subjects),
                                            len(ont.nodes())))

    print('STATS={}'.format(aset.as_dataframe().describe()))

    #genes = aset.subjects[0:5]
    sse = SemSearchEngine(assocmodel=aset)

    logging.info('Calculating all MICAs')
    sse.calculate_all_micas()

    #h5path = 'tests/resources/mica_ic.h5'
    #logging.info('Saving to {}'.format(h5path))
    #sse.mica_ic_df.to_hdf(h5path, key='mica_ic', mode='w')
    #logging.info('Saved to {}'.format(h5path))

    logging.info('Doing pairwise')
    for i in aset.subjects:
        for j in aset.subjects:
            sim = sse.pw_score_cosine(i, j)
            #print('{} x {} = {}'.format(i,j,sim))
            if i == j:
                assert (sim > 0.9999)
            tups = sse.pw_score_resnik_bestmatches(i, j)
            print('{} x {} = {} // {}'.format(i, j, sim, tups))
Ejemplo n.º 9
0
def test_local_json_parse():
    """
    Load ontology from JSON
    """
    factory = OntologyFactory()
    print("Creating ont")
    ont = factory.create('tests/resources/pato.json')

    ploidy = ont.node(PLOIDY)
    print("PLOIDY: {}".format(ploidy))
    assert ont.label(PLOIDY) == 'ploidy'

    # exact match
    search_results = ont.search('shape')
    print("SEARCH (exact): {}".format(search_results))
    assert [SHAPE] == search_results

    # implicit regexp
    search_results = ont.search('%shape%')
    print("SEARCH (re, implicit): {}".format(search_results))
    assert SHAPE in search_results
    assert len(search_results) > 10

    # explicit regexp
    search_results = ont.search('.*shape.*', is_regex=True)
    print("SEARCH (re, explicit): {}".format(search_results))
    assert SHAPE in search_results
    assert len(search_results) > 10

    num_nodes = 0
    for n in ont.nodes():
        num_nodes = num_nodes + 1
    assert num_nodes > 100
Ejemplo n.º 10
0
def test_remote_sparql():
    """
    reconstitution test
    """
    factory = OntologyFactory()
    # default method is sparql
    ont = factory.create('pato')
    g = ont.get_graph()
    info = g.node[PLOIDY]
    print(str(info))
    nodes = g.nodes()
    print(len(nodes))
    assert len(nodes) > 100
    nbrs = g.successors(PLOIDY)
    print("SUCC:" + str(nbrs))
    parents = g.predecessors(PLOIDY)
    print("PRED:" + str(parents))
    assert parents == ['PATO:0001396']
    ancs = ancestors(g, PLOIDY)
    print("ANCS:" + str(ancs))
    assert 'PATO:0000001' in ancs
    print(g)
    Q = ['.*shape.*']
    w = GraphRenderer.create('tree')

    shapes1 = ont.resolve_names(Q, is_regex=True, is_remote=False)
    print("SHAPE Q:" + str(shapes1))
    show_nodes(w, ont, shapes1)
    assert Y_SHAPED in shapes1

    shapes2 = ont.resolve_names(Q, is_regex=True, is_remote=True)
    print("SHAPE Q:" + str(shapes2))
    show_nodes(w, ont, shapes2)
    assert Y_SHAPED in shapes2
Ejemplo n.º 11
0
 def load_slim(self, module: Module, slim_url: str, slim_cache_path: str):
     if slim_url and slim_cache_path:
         relations = None
         if module == Module.GO:
             relations = self.go_relations
         elif module == Module.DO_EXPERIMENTAL:
             relations = self.do_relations
         elif module == Module.EXPRESSION:
             relations = None
         slim_onto = OntologyFactory().create(
             self._get_cached_file(file_source_url=slim_url,
                                   cache_path=slim_cache_path)).subontology(
                                       relations=relations)
         slim_set = set([
             node for node in slim_onto.nodes()
             if "type" in slim_onto.node(node)
             and slim_onto.node(node)["type"] == "CLASS"
         ])
         if module == Module.GO:
             logger.info("Setting GO Slim")
             self.go_slim = slim_set
         elif module == Module.DO_EXPERIMENTAL:
             logger.info("Setting DO Slim")
             self.do_slim = slim_set
         elif module == Module.EXPRESSION:
             logger.info("Setting Expression Slim")
             self.exp_slim = slim_set
Ejemplo n.º 12
0
def get_scigraph_nodes(id_list) -> Iterator[Dict]:
    """
    Queries scigraph neighbors to get a list of nodes back

    We use the scigraph neighbors function because ids can be sent in batch
    which is faster than iteratively querying solr search
    or the scigraph graph/id function

    :return: json decoded result from scigraph_ontology._neighbors_graph
    :raises ValueError: If id is not in scigraph
    """
    scigraph = OntologyFactory().create('scigraph:data')

    chunks = [id_list[i:i + 400] for i in range(0, len(list(id_list)), 400)]
    for chunk in chunks:
        params = {'id': chunk, 'depth': 0}

        try:
            result_graph = scigraph._neighbors_graph(**params)
            for node in result_graph['nodes']:
                yield node
        except JSONDecodeError as exception:
            # Assume json decode is due to an incorrect class ID
            # Should we handle this?
            raise ValueError(exception.doc)
Ejemplo n.º 13
0
 def get(self):
     """
     Extract a subgraph from an ontology
     """
     factory = OntologyFactory()
     ont = factory.create()
     z = get_db()
     return {'z': z, 'test': len(ont.nodes())}
Ejemplo n.º 14
0
def test_write():
    """
    write obo from json
    """
    factory = OntologyFactory()
    print("Creating ont")
    ont = factory.create('tests/resources/nucleus.json')
    w = GraphRenderer.create('obo')
    w.write(ont)
Ejemplo n.º 15
0
 def __init__(self, context):
     super(GO, self).__init__("go", context)
     ofactory = OntologyFactory()
     try:
         #sometimes the ontology world is down :(
         self.ont = ofactory.create('go')
     except:
         self.ont = ofactory.create('obo:go')
     #self.ont = ofactory.create('./mondo.owl')
     #This seems to be required to make the ontology actually load:
     _ = self.ont.get_level(0)
Ejemplo n.º 16
0
 def make_apo_map():
     # load apo for term mapping
     ofactory = OntologyFactory()
     apo_ont = ofactory.create("apo")
     apo_nodes = apo_ont.nodes()
     # dict schema { 'term': 'apo_id' }
     apo_term_id = dict()
     for node in apo_nodes:
         label = apo_ont.label(node)
         apo_term_id[label] = node
     return apo_term_id
Ejemplo n.º 17
0
 def make_apo_map():
     # load apo for term mapping
     ofactory = OntologyFactory()
     apo_ont = ofactory.create("apo")
     apo_nodes = apo_ont.nodes()
     # dict schema { 'term': 'apo_id' }
     apo_term_id = dict()
     for node in apo_nodes:
         label = apo_ont.label(node)
         apo_term_id[label] = node
     return apo_term_id
Ejemplo n.º 18
0
def setup_ontologies(go_ontology=None, ro_ontology=None):
    global GO_ONTOLOGY
    if go_ontology is None:
        GO_ONTOLOGY = OntologyFactory().create("go")
    else:
        GO_ONTOLOGY = go_ontology
    global RO_ONTOLOGY
    if ro_ontology is None:
        RO_ONTOLOGY = OntologyFactory().create(
            "http://purl.obolibrary.org/obo/ro.owl")
    else:
        RO_ONTOLOGY = ro_ontology
Ejemplo n.º 19
0
 def __init__(self):
     self.ro_ontology = OntologyFactory().create(
         "http://purl.obolibrary.org/obo/ro.owl")
     self.gorel_ontology = OntologyFactory().create(
         "http://release.geneontology.org/2019-03-18/ontology/extensions/gorel.obo"
     )
     # Can't get logical_definitions w/ ont.create("go"), need to load ontology via PURL
     self.go_ontology = OntologyFactory().create(
         "http://purl.obolibrary.org/obo/go.owl")
     self.ext_mapper = ExtensionsMapper(go_ontology=self.go_ontology,
                                        ro_ontology=self.ro_ontology)
     self.store = plugin.get('IOMemory', Store)()
Ejemplo n.º 20
0
def test_gaf():
    """
    Test loading from gaf
    """
    ofactory = OntologyFactory()
    afactory = AssociationSetFactory()
    ont = ofactory.create('go')
    aset = afactory.create_from_gaf(open(POMBASE, "r"), ontology=ont)
    print(str(aset))
    genes = aset.query([INTRACELLULAR])
    for g in genes:
        print("G={} '{}'".format(g, aset.label(g)))
    assert G1 in genes
Ejemplo n.º 21
0
def get_ontology(id):
    handle = id
    for c in cfg['ontologies']:
        if c['id'] == id:
            logging.info("getting handle for id: {} from cfg".format(id))
            handle = c['handle']

    if handle not in omap:
        logging.info("Creating a new ontology object for {}".format(handle))
        ofa = OntologyFactory()
        omap[handle] = ofa.create(handle)
    else:
        logging.info("Using cached for {}".format(handle))
    return omap[handle]
Ejemplo n.º 22
0
def test_remote_disease():
    """
    factory test
    """
    ofactory = OntologyFactory()
    afactory = AssociationSetFactory()
    ont = ofactory.create('doid')
    aset = afactory.create(ontology=ont,
                           subject_category='disease',
                           object_category='phenotype',
                           taxon=HUMAN)

    rs = aset.query_associations([PD])
    print("Gene Assocs to PD: {} {}".format(rs, len(rs)))
Ejemplo n.º 23
0
def test_alt_id():
    """
    test alt_ids and replaced by
    """
    factory = OntologyFactory()
    print("Creating ont")
    ont = factory.create('tests/resources/alt_id_test.json')

    for x in ont.nodes():
        if ont.is_obsolete(x):
            if ont.replaced_by(x):
                print('{} --> {}'.format(x, ont.replaced_by(x)))
            else:
                print('OBS: {} no replacement'.format(x))
Ejemplo n.º 24
0
def test_learn():
    afa = AssociationSetFactory()
    ont = OntologyFactory().create(ONT)

    aset = afa.create_from_file(file=GAF, ontology=ont)
    learner = ol.OntologyLearner(assocs=aset)
    print('L={}'.format(learner))
    subont = ont.subontology(relations=['subClassOf'])
    learner.split_assocs(CC, ontology=subont)
    print('L.assocs={}'.format(learner.assocs))
    print('L.tassocs={}'.format(learner.target_assocs))
    with open('target/index.md', 'w') as file:
        learner.fit_all(reportfile=file)
    print('L.targets={}'.format(learner.targets))
Ejemplo n.º 25
0
def test_subontology():
    """
    Load extracting subontology
    """
    factory = OntologyFactory()
    print("Creating ont")
    ont = factory.create('tests/resources/go-truncated-pombase.json')
    print("ONT NODES: {}".format(ont.nodes()))
    subont = ont.subontology(relations=['subClassOf'])
    PERM = 'GO:1990578'
    print("NODES: {}".format(subont.nodes()))
    ancs = subont.ancestors(PERM)
    print(str(ancs))
    assert len(ancs) > 0
Ejemplo n.º 26
0
    def get(self, ontology, node):
        """
        Extract a subgraph from an ontology
        """
        args = parser.parse_args()

        qnodes = [node]
        if args.cnode is not None:
            qnodes += args.cnode

        factory = OntologyFactory()
        ont = get_ontology(ontology)
        #subont = ont.subontology([id], relations=args.relations)
        relations = args.relation
        print("Traversing: {} using {}".format(qnodes,relations))
        nodes = ont.traverse_nodes(qnodes,
                                   up=True,
                                   down=False,
                                   relations=relations)

        subont = ont.subontology(nodes, relations=relations)
        
        ojr = OboJsonGraphRenderer(include_meta=args.include_meta)
        json_obj = ojr.to_json(subont)
        # TODO: remove this next release of ontobio
        if not args.include_meta:
            for g in json_obj['graphs']:
                for n in g['nodes']:
                    n['meta']={}
        return json_obj
Ejemplo n.º 27
0
def test_obsolete_term_repair_withfrom():

    vals = ["ZFIN",
            "ZFIN:ZDB-GENE-980526-362",
            "acts_upstream_of_or_within",
            "GO:0007155",
            "PMID:15494018",
            "ECO:0000305",
            "GO:0005913|GO:1,GO:4|ZFIN:ZDB-MRPHLNO-010101-1,MGI:1232453",
            "",
            "20041026",
            "ZFIN",
            "",
            "contributor=GOC:zfin_curators|model-state=production|noctua-model-id=gomodel:ZFIN_ZDB-GENE-980526-362"
            ]
    ont = OntologyFactory().create(ALT_ID_ONT)
    config = assocparser.AssocParserConfig(ontology=ont, rule_set=assocparser.RuleSet.ALL)
    parser = GpadParser(config=config)
    result = parser.parse_line("\t".join(vals))
    assoc = result.associations[0]
    # GO:0005913 should be repaired to its replacement term, GO:00005912
    assert [ConjunctiveSet(elements=[Curie(namespace='GO', identity='0005912')]),
            # repaired test GO elements
            ConjunctiveSet(elements=[Curie(namespace='GO', identity='2'), Curie(namespace='GO', identity='3')]),
            # non GO elements stay the same, could be obsolete or not
            ConjunctiveSet(elements=[Curie(namespace='ZFIN', identity='ZDB-MRPHLNO-010101-1'),
                                     Curie(namespace='MGI', identity='1232453')])] == assoc.evidence.with_support_from
Ejemplo n.º 28
0
def test_aspect_fill_for_obsolete_terms():
    # Test null aspect on an obsolete term
    # GO:4 is obsolete and has no aspect (hasOBONamespace) in obsolete.json ontology
    # GO:3 is it's replacement term
    # Note that GPAD lines contain no aspect data
    vals = [
        "MGI",
        "MGI:105128",
        "involved_in",
        "GO:4",
        "PMID:25901318",
        "ECO:0000314",
        "",
        "",
        "20190517",
        "MGI",
        "",
        "contributor=http://orcid.org/0000-0002-9796-7693|model-state=production|noctua-model-id=gomodel:5c4605cc00004132"
    ]
    ont = OntologyFactory().create(ALT_ID_ONT)
    config = assocparser.AssocParserConfig(ontology=ont, rule_set=assocparser.RuleSet.ALL)
    parser = GpadParser(config=config)
    result = parser.parse_line("\t".join(vals))
    assoc = result.associations[0]

    assert assoc.object.id == Curie("GO", "3")  # GO:4 should be repaired to its replacement term, GO:3
    assert assoc.aspect == 'P'  # Aspect should not be empty
Ejemplo n.º 29
0
def test_no_flag_valid_id():
    ont = OntologyFactory().create(ONT)
    p = GafParser()
    p.config.ontology = ont
    p._validate_ontology_class_id(
        "GO:0000785", assocparser.SplitLine("fake", [""] * 17, taxon="foo"))
    assert len(p.report.messages) == 0
Ejemplo n.º 30
0
def test_learn():
    ont = OntologyFactory().create(ONT)
    tont = OntologyFactory().create(TONT)
    afa = AssociationSetFactory()
    aset = afa.create_from_file(file=GAF, ontology=ont)
    taset = afa.create_from_file(file=TGAF, ontology=tont)

    learner = ol.OntologyLearner(assocs=aset,
                                 target_assocs=taset,
                                 score_threshold=0.6)
    print('L={}'.format(learner))
    print('L.assocs={}'.format(learner.assocs))
    print('L.tassocs={}'.format(learner.target_assocs))
    with open('target/pheno_index.md', 'w') as file:
        learner.fit_all(reportfile=file)
    print('L.targets={}'.format(learner.targets))
Ejemplo n.º 31
0
def produce(group, metadata, gpad, ttl, target, ontology, exclude):

    products = {"gaf": True, "gpi": True, "gpad": gpad, "ttl": ttl}
    click.echo("Making products {}.".format(", ".join(
        [key for key in products if products[key]])))
    absolute_target = os.path.abspath(target)
    os.makedirs(os.path.join(absolute_target, "groups"), exist_ok=True)
    click.echo("Products will go in {}".format(absolute_target))
    absolute_metadata = os.path.abspath(metadata)

    group_metadata = metadata_file(absolute_metadata, group)
    click.echo("Loading ontology: {}...".format(ontology))
    ontology_graph = OntologyFactory().create(ontology)

    source_gaf_zips = download_source_gafs(group_metadata,
                                           absolute_target,
                                           exclusions=exclude)
    source_gafs = {
        zip_path:
        os.path.join(os.path.split(zip_path)[0], "{}-src.gaf".format(dataset))
        for dataset, zip_path in source_gaf_zips.items()
    }
    for source_zip, source_gaf in source_gafs.items():
        unzip(source_zip, source_gaf)

    paint_metadata = metadata_file(absolute_metadata, "paint")

    for dataset in source_gaf_zips.keys():
        gafzip = source_gaf_zips[dataset]
        source_gaf = source_gafs[gafzip]
        # TODO (Fix as part of https://github.com/geneontology/go-site/issues/642) Set paint to True when the group is "paint".
        # This will prevent filtering of IBA (GO_RULE:26) when paint is being treated as a top level group, like for paint_other.
        valid_gaf = produce_gaf(dataset,
                                source_gaf,
                                ontology_graph,
                                paint=(group == "paint"))[0]

        gpi = produce_gpi(dataset, absolute_target, valid_gaf, ontology_graph)

        paint_src_gaf = check_and_download_paint_source(
            paint_metadata, group_metadata["id"], dataset, absolute_target)

        end_gaf = valid_gaf
        if paint_src_gaf is not None:
            paint_gaf = produce_gaf("paint_{}".format(dataset),
                                    paint_src_gaf,
                                    ontology_graph,
                                    gpipath=gpi,
                                    paint=True)[0]
            end_gaf = merge_mod_and_paint(valid_gaf, paint_gaf)
        else:
            gafgz = "{}.gz".format(valid_gaf)
            os.rename(
                gafgz,
                os.path.join(
                    os.path.split(gafgz)[0], "{}.gaf.gz".format(dataset)))

        make_products(dataset, absolute_target, end_gaf, products,
                      ontology_graph)
from ontobio.io.ontol_renderers import GraphRenderer
import requests
import ontobio.sparql.wikidata as wd
from ontobio.assoc_factory import AssociationSetFactory
from dipper.graph.RDFGraph import RDFGraph
from scigraph.api.SciGraph import SciGraph
import re
import logging

logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)


# Make ontology from wikidata

onto_factory = OntologyFactory()
wd_ontology = onto_factory.create('wdq:Q185034')  # Sickle cell anemia
qids = wd_ontology.search('Sickle%')

# Traverse up and down from query node in our sub-ontology
nodes = wd_ontology.traverse_nodes(qids, up=True, down=True)

renderer = GraphRenderer.create('obo')
renderer.outfile = './output/wd-ontology.obo'
# renderer.write(wd_ontology)
# >> AttributeError: 'EagerWikidataOntology' object has no attribute 'all_logical_definitions'

renderer.write_subgraph(wd_ontology, nodes, query_ids=qids)

# Get GO terms
outfile = open('./output/go-terms.tsv', 'w')