Example #1
0
def main():
    sgg = Graph(cache=True)
    sgg_local = Graph(cache=True)

    fma3_r = Query('FMA3:Brain', 'http://sig.biostr.washington.edu/fma3.0#regional_part_of', 'INCOMING', 9)
    fma3_c = Query('FMA3:Brain', 'http://sig.biostr.washington.edu/fma3.0#constitutional_part_of', 'INCOMING', 9)
    #fma3_tree, fma3_extra = creatTree(*fma3_r, graph=sgg_local)

    fma_r = Query('FMA:50801', 'http://purl.org/sig/ont/fma/regional_part_of', 'INCOMING', 20)
    fma_c = Query('FMA:50801', 'http://purl.org/sig/ont/fma/constitutional_part_of', 'INCOMING', 20)
    fma_rch_r = Query('FMA:61819', 'http://purl.org/sig/ont/fma/regional_part_of', 'INCOMING', 20)
    #fma_tree, fma_extra = creatTree(*fma_r, graph=sgg_local)
    #fma_tree, fma_extra = creatTree(*fma_rch_r, graph=sgg_local)

    fma_hip = Query('FMA:275020', 'http://purl.org/sig/ont/fma/regional_part_of', 'BOTH', 20)
    fma_hip = Query('FMA:275020', 'http://purl.org/sig/ont/fma/constitutional_part_of', 'BOTH', 20)
    #fma_tree, fma_extra = creatTree(*fma_hip, graph=sgg_local)

    fma_mfg = Query('FMA:273103', 'http://purl.org/sig/ont/fma/regional_part_of', 'BOTH', 20)
    #fma_tree, fma_extra = creatTree(*fma_mfg, graph=sgg_local)

    fma_tel = Query('FMA:62000', 'http://purl.org/sig/ont/fma/regional_part_of', 'INCOMING', 20)
    if False:
        fma_gsc_tree, fma_gsc_extra = creatTree(*fma_tel, graph=sgg_local)

        childs = list(fma_gsc_extra[2])  # get the curies for the left/right so we can get parents for all
        g = Graph(cache=True)
        parent_nodes = []
        for curie in childs:
            json = g.getNeighbors(curie, relationshipType='subClassOf')
            if json:
                for node in json['nodes']:
                    if node['id'] != curie:
                        parent_nodes.append(node)  # should have dupes


        breakpoint()
        return

    uberon = Query('UBERON:0000955', 'BFO:0000050', 'INCOMING', 40)
    uberon_tree, uberon_extra = creatTree(*uberon, graph=sgg)
    queries = uberon,

    uberon_flat = sorted(set(n for n in flatten(uberon_extra[0])))
    with open(f'{tempfile.tempdir}/uberon_partonomy_terms', 'wt') as f:
        f.writelines('\n'.join(uberon_flat))

    for query in queries:
        tree, extra = creatTree(*query, graph=sgg)
        dematerialize(list(tree.keys())[0], tree)
        print(tree)
        #print(extra[0])
        with open(f'{tempfile.tempdir}/' + query.root, 'wt') as f:
            f.writelines(tree.print_tree())

        level_sizes = [len(levels(tree, i)) for i in range(11)]
        print('level sizes', level_sizes)
        parent_counts = sorted(set(len(v) for v in extra[-4].values()))
        print('unique parent counts', parent_counts)
        print('num terms', len(extra[2]))

    return

    breakpoint()
Example #2
0
import os
from collections import defaultdict, namedtuple
import rdflib
from rdflib import URIRef, RDFS, RDF, OWL
from rdflib.namespace import SKOS
import requests
from pyontutils.scigraph import Vocabulary, Graph
from pyontutils.utils import TODAY, async_getter, TermColors as tc
from pyontutils.scig import scigPrint
from pyontutils.hierarchies import creatTree, flatten
from pyontutils.core import devconfig, OntMeta, makePrefixes, makeGraph
from pyontutils.core import NIFRID, oboInOwl
from IPython import embed

sgg = Graph(cache=True)
sgv = Vocabulary(cache=True)

Query = namedtuple('Query', ['root', 'relationshipType', 'direction', 'depth'])

CON = oboInOwl.consider
DBX = oboInOwl.hasDbXref  # FIXME also behaves as objectProperty :/
AID = oboInOwl.hasAlternativeId
IRBC = NIFRID.isReplacedByClass

PREFIXES = makePrefixes(
    'UBERON',
    'ro',
    'owl',
    'skos',
)
Example #3
0
def main():
    resources = auth.get_path('resources')
    if not resources.exists():
        raise FileNotFoundError(f'{resources} does not exist cannot continue')

    with open((auth.get_path('git-local-base') /
               'entity_mapping/mappings/uberon-nervous').as_posix(),
              'rt') as f:
        brain_only = set([l.strip() for l in f.readlines()])

    sgv = Vocabulary(cache=True)
    sgg = Graph(cache=True)

    g = rdflib.Graph()
    g.parse((auth.get_path('ontology-local-repo') /
             'ttl/generated/parcellation/cocomacslim.ttl').as_posix(),
            format='turtle')
    sos = [so for so in g.subject_objects(rdflib.RDFS.label)]

    map_ = []
    smap_ = []
    fma_lookup = {}
    for s, o in sos:
        cc_id = g.qname(s)
        cc_label = o.toPython()
        existing_id = None
        existing_label = None
        existing_fma = ''
        s_existing_id = None
        s_existing_label = None
        s_existing_fma = ''

        cands = sgv.findByTerm(o)
        if not cands:
            cands = []
            scands = sgv.searchByTerm(o)
            if not scands:
                scands = []
        else:
            scands = []

        for cand in cands:
            existing_fma = ''
            if 'curie' in cand:
                existing_id = cand['curie']
            elif 'cocomac' in cand['iri']:
                continue
            else:
                raise ValueError(f'What is this thing? {curie["iri"]}')

            existing_label = cand['labels'][0]
            if existing_id.startswith('UBERON'):
                if existing_id not in brain_only:
                    existing_id = None
                    existing_label = None
                    existing_fma = ''
                else:
                    if existing_id in fma_lookup:
                        existing_fma = fma_lookup[existing_id]
                    else:
                        meta = sgg.getNode(existing_id)['nodes'][0]['meta']
                        if dbx in meta:
                            xrefs = meta[dbx]
                            for ref in xrefs:
                                if ref.startswith('FMA:'):
                                    existing_fma += ref
                        fma_lookup[existing_id] = existing_fma
                    break
            #elif cand['curie'].startswith('NIFGA'):
            #elif cand['curie'].startswith('MBA'):

        if existing_id:
            map_.append(
                (cc_label, cc_id, existing_label, existing_id, existing_fma))

        for scand in scands:

            if 'cocomac' in scand['iri']:
                continue
            elif not scand['curie']:
                continue  # good old remove the key instead of set it to None

            s_existing_fma = ''
            if scand['curie'].startswith('UBERON'):
                if scand['curie'] in brain_only:
                    s_existing_id = scand['curie']
                    s_existing_label = scand['labels'][0]
                    if not s_existing_id:
                        print(scand)
                        continue
                    asdf = sgg.getNode(s_existing_id)
                    #print(asdf, s_existing_id, s_existing_label)
                    if s_existing_id in fma_lookup:
                        s_existing_fma = fma_lookup[s_existing_id]
                    else:
                        meta = asdf['nodes'][0]['meta']
                        if dbx in meta:
                            xrefs = meta[dbx]
                            for ref in xrefs:
                                if ref.startswith('FMA:'):
                                    s_existing_fma += ref
                        fma_lookup[s_existing_id] = s_existing_fma
                    smap_.append((cc_label, cc_id, s_existing_label,
                                  s_existing_id, s_existing_fma))
                #break  # FOW :/

    _ = [
        print(a) for a in sorted(smap_, key=lambda a: int(a[1].split(':')[1]))
    ]
    with open('/tmp/coco_uber_match.csv', 'wt') as f:
        writer = csv.writer(f)
        writer.writerows(map_)
    with open('/tmp/coco_uber_search.csv', 'wt') as f:
        writer = csv.writer(f)
        writer.writerows(smap_)

    # cocomac -> integrated connectivity terminiology mapping

    def lnc(string):
        return string.lower().replace(',', ' ')  # matches the conv in NIF_conn

    ccslim = rdflib.Graph().parse(
        (auth.get_path('ontology-local-repo') /
         'ttl/generated/parcellation/cocomacslim.ttl').as_posix(),
        format='turtle')
    coco_all = [l for l in ccslim.objects(None, rdflib.RDFS.label)]

    intcon = resources / 'NIF_conn_allcols_minimal_clean_filtered2.csv'
    with open(intcon.as_posix(), 'rt') as f:
        ber_rows = [r for r in csv.reader(f)]

    ber_set = set(
        [c for c in zip(*[r for r in ber_rows if r[0] == 'CoCoMac'])][1])
    coco_match_lower_no_comma = set(
        [lnc(t) for t in [c for c in zip(*map_)][0]])
    if smap_:
        coco_search_lower_no_comma = set(
            [lnc(t) for t in [c for c in zip(*smap_)][0]])
    else:
        coco_search_lower_no_comma = set()
    coco_all_lower_no_comma = set([lnc(t) for t in coco_all])
    matched = ber_set.intersection(coco_match_lower_no_comma)
    searched = ber_set.intersection(coco_search_lower_no_comma)
    alled = ber_set.intersection(coco_all_lower_no_comma)
    unmapped = alled.difference(matched.union(searched))
    missing = ber_set.difference(alled)

    nmatch = len(matched)
    nsearch = len(searched)
    nall = len(alled)
    nunmapped = len(unmapped)
    nmissing = len(missing)

    print('# matched =', nmatch)
    print('# searched =', nsearch)
    print('# alled =', nall)
    print('# unmatched =', nunmapped)
    print('# missing =', nmissing)

    print('missing')
    for m in sorted(missing):
        print(m)

    print('unmapped')
    for m in sorted(unmapped):
        print(m)