Exemple #1
0
def test_makeGraph():
    img_data, title, new_record = utils.makeGraph("Berlin", "DE")
    assert (type(img_data) == bytes)
    img_data, title, new_record = utils.makeGraph("Berlin",
                                                  "DE",
                                                  date=dt.datetime.now(),
                                                  current_temp=20)
    assert (new_record == True)
    img_data, title, new_record = utils.makeGraph("Berlin",
                                                  "DE",
                                                  date=dt.datetime.now(),
                                                  current_temp=-10)
    assert (new_record == False)
def make_neurolex_graph():
    # neurolex test stuff
    nlxpref = {'ilx':'http://uri.interlex.org/base/'}
    nlxpref.update(NIFPREFIXES)
    neurolex = makeGraph('neurolex-temp', nlxpref)
    neurolex.g.parse('/tmp/neurolex_basic.ttl', format='turtle')

    ILXPO = 'ilx:partOf'
    nj = neurolex.make_scigraph_json(ILXPO)
    g_, h = creatTree(*Query('NIFGA:birnlex_796', ILXPO, 'INCOMING', 10), json=nj)
    i_, j_ = creatTree(*Query('NIFGA:nlx_412', ILXPO, 'INCOMING', 10), json=nj)

    brht = sorted(set(flatten(h[0],[])))
    wmht = sorted(set(flatten(j_[0],[])))
    ufixedrb = {'NIFGA:' + k.split(':')[1]:v for k, v in u_replaced_by.items()}
    b_nlx_replaced_by = new_replaced_by(brht, ufixedrb)
    w_nlx_replaced_by = new_replaced_by(wmht, ufixedrb)
    additional_edges = defaultdict(list)  # TODO this could be fun for the future but is a nightmare atm
    for edge in h[-1]['edges'] + j_[-1]['edges']:
        additional_edges[edge['sub']] = edge
        additional_edges[edge['obj']] = edge

    #filter out bad edges becase we are lazy
    additional_edges = {k:v for k, v in additional_edges.items()
                        if k in b_nlx_replaced_by or k in w_nlx_replaced_by}

    print('neurolex tree')  # computed above
    print(g_)
    print(i_)

    return additional_edges
 def fixHasAltId(g):
     ng = makeGraph('',
                    graph=g,
                    prefixes=makePrefixes('oboInOwl', 'NIFCHEM', 'BIRNANN'))
     ng.replace_uriref('NIFCHEM:hasAlternativeId',
                       'oboInOwl:hasAlternativeId')
     ng.replace_uriref('BIRNANN:ChEBIid', 'oboInOwl:id')
Exemple #4
0
    def __new__(cls, validate=False):
        error = 'Expected %s got %s'
        if type(cls.ont) != OntMeta:
            raise TypeError(error % (OntMeta, type(cls.ont)))
        elif type(cls.concept) != PScheme:
            raise TypeError(error % (PScheme, type(cls.concept)))
        elif type(cls.atlas) != PSArtifact:
            raise TypeError(error % (PSArtifact, type(cls.atlas)))

        ontid = cls.ont.path + cls.ont.filename + '.ttl'
        PREFIXES = {k: v for k, v in cls.PREFIXES.items()}
        PREFIXES.update(genericPScheme.PREFIXES)
        #if '' in cls.PREFIXES:  # NOT ALLOWED!
        #if PREFIXES[''] is None:
        #PREFIXES[''] = ontid + '/'
        graph = makeGraph(cls.ont.filename, PREFIXES, writeloc=WRITELOC)
        graph.add_ont(ontid, *cls.ont[2:])
        make_scheme(graph, cls.concept, cls.atlas.curie)
        data = cls.datagetter()
        cls.datamunge(data)
        cls.dataproc(graph, data)
        add_ops(graph)
        graph.write()
        if validate or getattr(cls, 'VALIDATE', False):
            cls.validate(graph)
        return ontid, cls.atlas
Exemple #5
0
 def __new__(cls, validate=False):
     error = 'Expected %s got %s' 
     if type(cls.ont) != OntMeta:
         raise TypeError(error % (OntMeta, type(cls.ont)))
     elif type(cls.concept) != PScheme:
         raise TypeError(error % (PScheme, type(cls.concept)))
     elif type(cls.atlas) != PSArtifact:
         raise TypeError(error % (PSArtifact, type(cls.atlas)))
         
     ontid = cls.ont.path + cls.ont.filename + '.ttl'
     PREFIXES = {k:v for k, v in cls.PREFIXES.items()}
     PREFIXES.update(genericPScheme.PREFIXES)
     if '' in cls.PREFIXES:
         if PREFIXES[''] is None:
             PREFIXES[''] = ontid + '/'
     graph = makeGraph(cls.ont.filename, PREFIXES, writeloc='/tmp/parc/')
     graph.add_ont(ontid, *cls.ont[2:])
     make_scheme(graph, cls.concept, cls.atlas.curie)
     data = cls.datagetter()
     cls.datamunge(data)
     cls.dataproc(graph, data)
     add_ops(graph)
     graph.write(convert=False)
     if validate or getattr(cls, 'VALIDATE', False):
         cls.validate(graph)
     return ontid, cls.atlas 
 def switch_dead(g):
     ng = makeGraph('', graph=g, prefixes=makePrefixes('oboInOwl'))
     for f, r in deads.items():
         ng.replace_uriref(f, r)
         ng.add_node(r, 'oboInOwl:hasAlternateId',
                     rdflib.Literal(f, datatype=rdflib.XSD.string))
         g.remove(
             (r, replacedBy, r))  # in case the replaced by was already in
    def fixAltIdIsURIRef(g):
        hai = ug.expand('oboInOwl:hasAlternativeId')
        i = ug.expand('oboInOwl:id')
        makeGraph('', graph=g, prefixes=makePrefixes(
            'CHEBI'))  # amazlingly sometimes this is missing...

        def inner(s, p, o):
            if type(o) == rdflib.URIRef:
                qn = g.namespace_manager.qname(o)
                g.add((s, p, rdflib.Literal(qn, datatype=rdflib.XSD.string)))
                if 'ns' in qn:
                    print('WARNING UNKNOWN NAMESPACE BEING SHORTENED', str(o),
                          qn)
                g.remove((s, p, o))

        for s, o in g.subject_objects(hai):
            inner(s, hai, o)
        for s, o in g.subject_objects(i):
            inner(s, i, o)
 def fixIons(g):
     # there are a series of atom/ion confusions that shall be dealt with, solution is to add 'iron' as a synonym to the charged form since that is what the biologists are usually referring to...
     ng = makeGraph('', graph=g, prefixes=makePrefixes('CHEBI'))
     # atom           ion
     None, 'CHEBI:29108'  # calcium is ok
     ng.replace_uriref('CHEBI:30145', 'CHEBI:49713')  # lithium
     ng.replace_uriref('CHEBI:18248', 'CHEBI:29033')  # iron
     ng.replace_uriref('CHEBI:26216', 'CHEBI:29103')  # potassium
     ng.replace_uriref('CHEBI:26708', 'CHEBI:29101')  # sodium
     None, 'CHEBI:29105'  # zinc is ok
Exemple #9
0
def ncbigene_make():
    IDS_FILE = 'gene-subset-ids.txt'
    with open(IDS_FILE, 'rt') as f:  # this came from neuroNER
        ids = [l.split(':')[1].strip() for l in f.readlines()]
    
    #url = 'http://eutils.ncbi.nlm.nih.gov/entrez/eutils/esummary.fcgi?retmode=json&retmax=5000&db=gene&id='
    #for id_ in ids:
        #data = requests.get(url + id_).json()['result'][id_]
    url = 'http://eutils.ncbi.nlm.nih.gov/entrez/eutils/esummary.fcgi'
    data = {
        'db':'gene',
        'retmode':'json',
        'retmax':5000,
        'id':None,
    }
    chunks = []
    for i, idset in enumerate(chunk_list(ids, 100)):
        print(i, len(idset))
        data['id'] = ','.join(idset),
        resp = requests.post(url, data=data).json()
        chunks.append(resp)
    
    base = chunks[0]['result']
    uids = base['uids']
    for more in chunks[1:]:
        data = more['result']
        uids.extend(data['uids'])
        base.update(data)
    #base['uids'] = uids  # i mean... its just the keys
    base.pop('uids')
 
    prefixes = {
        'ilx':'http://uri.interlex.org/base/',
        'OBOANN':'http://ontology.neuinfo.org/NIF/Backend/OBO_annotation_properties.owl#',  # FIXME needs to die a swift death
        'NCBIGene':'http://www.ncbi.nlm.nih.gov/gene/',
        'NCBITaxon':'http://purl.obolibrary.org/obo/NCBITaxon_',
    }
    ng = makeGraph('ncbigeneslim', prefixes)

    for k, v in base.items():
        #if k != 'uids':
        ncbi(v, ng)

    ontid = 'http://ontology.neuinfo.org/NIF/ttl/generated/ncbigeneslim.ttl'
    ng.add_node(ontid, rdflib.RDF.type, rdflib.OWL.Ontology)
    ng.add_node(ontid, rdflib.RDFS.label, 'NIF NCBI Gene subset')
    ng.add_node(ontid, rdflib.RDFS.comment, 'This subset is automatically generated from the NCBI Gene database on a subset of terms listed in %s.' % IDS_FILE)
    ng.add_node(ontid, rdflib.OWL.versionInfo, date.isoformat(date.today()))
    ng.write()
Exemple #10
0
def parcellation_schemes(ontids_atlases):
    ont = OntMeta(
        GENERATED, 'parcellation',
        'NIF collected parcellation schemes ontology', 'NIF Parcellations',
        'Brain parcellation schemes as represented by root concepts.', TODAY)
    ontid = ont.path + ont.filename + '.ttl'
    PREFIXES = makePrefixes('', 'ilx', 'owl', 'skos', 'NIFRID', 'ILXREPLACE')
    graph = makeGraph(ont.filename, PREFIXES, writeloc=WRITELOC)
    graph.add_ont(ontid, *ont[2:])

    for import_id, atlas in sorted(ontids_atlases):
        graph.add_trip(ontid, owl.imports, import_id)
        add_triples(graph, atlas, make_atlas)

    graph.add_class(ATLAS_SUPER, label=atname)

    graph.add_class(PARC_SUPER, label=psname)
    graph.write()
Exemple #11
0
def parcellation_schemes(ontids_atlases):
    ont = OntMeta('http://ontology.neuinfo.org/NIF/ttl/generated/',
                  'parcellation',
                  'NIF collected parcellation schemes ontology',
                  'NIF Parcellations',
                  'Brain parcellation schemes as represented by root concepts.',
                  TODAY)
    ontid = ont.path + ont.filename + '.ttl'
    PREFIXES = makePrefixes('ilx', 'owl', 'skos', 'OBOANN')
    graph = makeGraph(ont.filename, PREFIXES, writeloc = '/tmp/parc/')
    graph.add_ont(ontid, *ont[2:])

    for import_id, atlas in sorted(ontids_atlases):
        graph.add_node(ontid, rdflib.OWL.imports, import_id)
        add_triples(graph, atlas, make_atlas)

    graph.add_class(PARC_SUPER[0], label=PARC_SUPER[1])
    graph.write(convert=False)
Exemple #12
0
def make_neurolex_graph():
    # neurolex test stuff
    nlxpref = {'ilx': 'http://uri.interlex.org/base/'}
    nlxpref.update(NIFPREFIXES)
    neurolex = makeGraph('neurolex-temp', nlxpref)
    neurolex.g.parse('/tmp/neurolex_basic.ttl', format='turtle')

    ILXPO = 'ilx:partOf'
    nj = neurolex.make_scigraph_json(ILXPO)
    g_, h = creatTree(*Query('NIFGA:birnlex_796', ILXPO, 'INCOMING', 10),
                      json=nj)
    i_, j_ = creatTree(*Query('NIFGA:nlx_412', ILXPO, 'INCOMING', 10), json=nj)

    brht = sorted(set(flatten(h[0], [])))
    wmht = sorted(set(flatten(j_[0], [])))
    ufixedrb = {
        'NIFGA:' + k.split(':')[1]: v
        for k, v in u_replaced_by.items()
    }
    b_nlx_replaced_by = new_replaced_by(brht, ufixedrb)
    w_nlx_replaced_by = new_replaced_by(wmht, ufixedrb)
    additional_edges = defaultdict(
        list)  # TODO this could be fun for the future but is a nightmare atm
    for edge in h[-1]['edges'] + j_[-1]['edges']:
        additional_edges[edge['sub']] = edge
        additional_edges[edge['obj']] = edge

    #filter out bad edges becase we are lazy
    additional_edges = {
        k: v
        for k, v in additional_edges.items()
        if k in b_nlx_replaced_by or k in w_nlx_replaced_by
    }

    print('neurolex tree')  # computed above
    print(g_)
    print(i_)

    return additional_edges
Exemple #13
0
        'uberon_id': uid,
        'uberon_label': uberon_labs[uid],
        'aba_id': aid,
        'aba_label': abalabs[aid],
        'aba_syns': '\n'.join(sorted(abasyns[aid] + abaacro[aid])),
        'uberon_syns': '\n'.join(insert_uberon)
    }
    return to_format.format(**kwargs)


text = '\n\n'.join(
    [make_record(uid, aid[0]) for uid, aid in sorted(u_a_map.items()) if aid])

with open('aba_uberon_syn_review.txt', 'wt') as f:
    f.write(text)

print('total uberon terms checked:', len(uberon_labs))
print('total aba terms:           ', len(abalabs))
print('total uberon with aba xref:', len([a for a in u_a_map.values() if a]))

ubridge = makeGraph('uberon-parcellation-mappings',
                    prefixes=makePrefixes('ilx', 'UBERON', 'MBA'))
for u, arefs in u_a_map.items():
    if arefs:
        # TODO check for bad assumptions here
        ubridge.add_node(u, 'ilx:delineatedBy', arefs[0])
        ubridge.add_node(arefs[0], 'ilx:delineates', u)

ubridge.write()
embed()
Exemple #14
0
def swanson():
    """ not really a parcellation scheme """
    ONT_PATH = 'http://ontology.neuinfo.org/NIF/ttl/generated/'
    filename = 'swanson_hierarchies'
    ontid = ONT_PATH + filename + '.ttl'
    PREFIXES = makePrefixes('ilx', 'owl', 'OBOANN', 'UBERON')
    PREFIXES.update({
        '':ontid + '/',  # looking for better options
        'SWAN':'http://swanson.org/node/',
        'SWAA':'http://swanson.org/appendix/',
    })
    new_graph = makeGraph(filename, PREFIXES, writeloc='/tmp/parc/')
    new_graph.add_ont(ontid,
                      'Swanson brain partomies',
                      'Swanson 2014 Partonomies',
                      'This file is automatically generated from....',
                      TODAY)
            
    with open('resources/swanson_aligned.txt', 'rt') as f:
        lines = [l.strip() for l in f.readlines()]

    # join header on page 794
    lines[635] += ' ' + lines.pop(636)
    #fix for capitalization since this header is reused
    fixed = ' or '.join([' ('.join([n.capitalize() for n in _.split(' (')]) for _ in lines[635].lower().split(' or ')]).replace('human','HUMAN')
    lines[635] = fixed
    
    data = []
    for l in lines:
        if not l.startswith('#'):
            level = l.count('.'*5)
            l = l.strip('.')
            if ' (' in l:
                if ') or' in l:
                    n1, l = l.split(') or')
                    area_name, citationP =  n1.strip().split(' (')
                    citation = citationP.rstrip(')')
                    d = (level, area_name, citation, 'NEXT SYN')
                    data.append(d)
                    #print(tc.red(tc.bold(repr(d))))

                area_name, citationP =  l.strip().split(' (')
                citation = citationP.rstrip(')')
            else:
                area_name = l
                citation = None
            
            d = (level, area_name, citation, None)
            #print(d)
            data.append(d)
    results = async_getter(sgv.findByTerm, [(d[1],) for d in data])
    #results = [None] * len(data)
    curies = [[r['curie'] for r in _ if 'UBERON' in r['curie']] if _ else [] for _ in results]
    output = [_[0] if _ else None for _ in curies]

    header = ['Depth', 'Name', 'Citation', 'NextSyn', 'Uberon']
    zoop = [header] + [r for r in zip(*zip(*data), output)] + \
            [(0, 'Appendix END None', None, None, None)]  # needed to add last appendix

    class SP(rowParse):
        def __init__(self):
            self.nodes = defaultdict(dict)
            self._appendix = 0
            self.appendicies = {}
            self._last_at_level = {}
            self.names = defaultdict(set)
            self.children = defaultdict(set)
            self.parents = defaultdict(set)
            self.next_syn = False
            super().__init__(zoop)

        def Depth(self, value):
            if self.next_syn:
                self.synonym = self.next_syn
            else:
                self.synonym = False
            self.depth = value

        def Name(self, value):
            self.name = value

        def Citation(self, value):
            self.citation = value

        def NextSyn(self, value):
            if value:
                self.next_syn = self._rowind
            else:
                self.next_syn = False

        def Uberon(self, value):
            self.uberon = value

        def _row_post(self):
            # check if we are in the next appendix
            # may want to xref ids between appendicies as well...
            if self.depth == 0:
                if self.name.startswith('Appendix'):
                    if self._appendix:
                        self.appendicies[self._appendix]['children'] = dict(self.children)
                        self.appendicies[self._appendix]['parents'] = dict(self.parents)
                        self._last_at_level = {}
                        self.children = defaultdict(set)
                        self.parents = defaultdict(set)
                    _, num, apname = self.name.split(' ', 2)
                    if num == 'END':
                        return
                    self._appendix = int(num)
                    self.appendicies[self._appendix] = {
                        'name':apname.capitalize(),
                        'type':self.citation.capitalize() if self.citation else None}
                    return
                else:
                    if ' [' in self.name:
                        name, taxonB = self.name.split(' [')
                        self.name = name
                        self.appendicies[self._appendix]['taxon'] = taxonB.rstrip(']').capitalize()
                    else:  # top level is animalia
                        self.appendicies[self._appendix]['taxon'] = 'ANIMALIA'.capitalize()

                    self.name = self.name.capitalize()
                    self.citation = self.citation.capitalize()
            # nodes
            if self.synonym:
                self.nodes[self.synonym]['synonym'] = self.name
                self.nodes[self.synonym]['syn-cite'] = self.citation
                self.nodes[self.synonym]['syn-uberon'] = self.uberon
                return
            else:
                if self.citation:  # Transverse Longitudinal etc all @ lvl4
                    self.names[self.name + ' ' + self.citation].add(self._rowind)
                else:
                    self.name += str(self._appendix) + self.nodes[self._last_at_level[self.depth - 1]]['label']
                    #print(level, self.name)
                    # can't return here because they are their own level
                # replace with actually doing something...
                self.nodes[self._rowind]['label'] = self.name
                self.nodes[self._rowind]['citation'] = self.citation
                self.nodes[self._rowind]['uberon'] = self.uberon
            # edges
            self._last_at_level[self.depth] = self._rowind
            # TODO will need something to deal with the Lateral/
            if self.depth > 0:
                try:
                    parent = self._last_at_level[self.depth - 1]
                except:
                    embed()
                self.children[parent].add(self._rowind)
                self.parents[self._rowind].add(parent)

        def _end(self):
            replace = {}
            for asdf in [sorted(n) for k,n in self.names.items() if len(n) > 1]:
                replace_with, to_replace = asdf[0], asdf[1:]
                for r in to_replace:
                    replace[r] = replace_with

            for r, rw in replace.items():
                #print(self.nodes[rw])
                o = self.nodes.pop(r)
                #print(o)

            for vals in self.appendicies.values():
                children = vals['children']
                parents = vals['parents']
                # need reversed so children are corrected before swap
                for r, rw in reversed(sorted(replace.items())):
                    if r in parents:
                        child = r
                        new_child = rw
                        parent = parents.pop(child)
                        parents[new_child] = parent
                        parent = list(parent)[0]
                        children[parent].remove(child)
                        children[parent].add(new_child)
                    if r in children:
                        parent = r
                        new_parent = rw
                        childs = children.pop(parent)
                        children[new_parent] = childs
                        for child in childs:
                            parents[child] = {new_parent}

            self.nodes = dict(self.nodes)

    sp = SP()
    tp = [_ for _ in sorted(['{: <50}'.format(n['label']) + n['uberon'] if n['uberon'] else n['label'] for n in sp.nodes.values()])]
    #print('\n'.join(tp))
    #print(sp.appendicies[1].keys())
    #print(sp.nodes[1].keys())
    nbase = 'http://swanson.org/node/%s' 
    json_ = {'nodes':[],'edges':[]}
    for node, anns in sp.nodes.items():
        nid = nbase % node
        new_graph.add_class(nid, 'ilx:swansonBrainRegionConcept', label=anns['label'])
        new_graph.add_node(nid, 'OBOANN:definingCitation', anns['citation'])
        json_['nodes'].append({'lbl':anns['label'],'id':'SWA:' + str(node)})
        #if anns['uberon']:
            #new_graph.add_node(nid, rdflib.OWL.equivalentClass, anns['uberon'])  # issues arrise here...

    for appendix, data in sp.appendicies.items():
        aid = 'http://swanson.org/appendix/%s' % appendix
        new_graph.add_class(aid, label=data['name'].capitalize())
        new_graph.add_node(aid, 'ilx:hasTaxonRank', data['taxon'])  # FIXME appendix is the data artifact...
        children = data['children']
        ahp = HASPART + str(appendix)
        apo = PARTOF + str(appendix)
        new_graph.add_op(ahp, transitive=True)
        new_graph.add_op(apo, inverse=ahp, transitive=True)
        for parent, childs in children.items():  # FIXME does this give complete coverage?
            pid = nbase % parent
            for child in childs:
                cid = nbase % child
                new_graph.add_hierarchy(cid, ahp, pid)  # note hierarhcy inverts direction
                new_graph.add_hierarchy(pid, apo, cid)
                json_['edges'].append({'sub':'SWA:' + str(child),'pred':apo,'obj':'SWA:' + str(parent)})

    new_graph.write(convert=False)
    if False:
        Query = namedtuple('Query', ['root','relationshipType','direction','depth'])
        mapping = (1, 1, 1, 1, 30, 83, 69, 70, 74, 1)  # should generate?
        for i, n in enumerate(mapping):
            a, b = creatTree(*Query('SWA:' + str(n), 'ilx:partOf' + str(i + 1), 'INCOMING', 10), json=json_)
            print(a)
    return ontid, None
def make_neurons(syn_mappings, pedges, ilx_start_, defined_graph):
    ilx_start = ilx_start_
    cheating = {
        'vasoactive intestinal peptide': 'VIP',
        'star':
        None,  # is a morphological phen that is missing but hits scigraph
    }
    ng = makeGraph('NIF-Neuron', prefixes=PREFIXES)

    #""" It seemed like a good idea at the time...
    nif_cell = '~/git/NIF-Ontology/ttl/NIF-Cell.ttl'  # need to be on neurons branch
    cg = rdflib.Graph()
    cg.parse(os.path.expanduser(nif_cell), format='turtle')
    missing = (
        'NIFCELL:nifext_55',
        'NIFCELL:nifext_56',
        'NIFCELL:nifext_57',
        'NIFCELL:nifext_59',
        'NIFCELL:nifext_81',
        'NIFCELL:nlx_cell_091205',
        NIFCELL_NEURON,
        'NIFCELL:sao2128417084',
        'NIFCELL:sao862606388',  # secondary, not explicitly in the hbp import
    )
    for m in missing:
        m = ng.expand(m)
        for s, p, o in cg.triples((m, None, None)):
            ng.add_trip(s, p, o)

    #cg.remove((None, rdflib.OWL.imports, None))  # DONOTWANT NIF-Cell imports
    #for t in cg.triples((None, None, None)):
    #ng.add_trip(*t)  # only way to clean prefixes :/
    #cg = None
    #"""

    hbp_cell = '~/git/NIF-Ontology/ttl/generated/NIF-Neuron-HBP-cell-import.ttl'  # need to be on neurons branch
    _temp = rdflib.Graph()  # use a temp to strip nasty namespaces
    _temp.parse(os.path.expanduser(hbp_cell), format='turtle')
    for s, p, o in _temp.triples((None, None, None)):
        if s != rdflib.URIRef(
                'http://ontology.neuinfo.org/NIF/ttl/generated/NIF-Neuron-HBP-cell-import.ttl'
        ):
            ng.g.add((s, p, o))

    base = 'http://ontology.neuinfo.org/NIF/ttl/'

    ontid = base + ng.name + '.ttl'
    ng.add_trip(ontid, rdflib.RDF.type, rdflib.OWL.Ontology)
    ng.add_trip(ontid, rdflib.OWL.imports, base + 'NIF-Neuron-Phenotype.ttl')
    ng.add_trip(ontid, rdflib.OWL.imports, base + 'NIF-Neuron-Defined.ttl')
    ng.add_trip(ontid, rdflib.OWL.imports, base + 'hbp-special.ttl')
    #ng.add_trip(ontid, rdflib.OWL.imports, base + 'NIF-Cell.ttl')  # NO!
    #ng.add_trip(ontid, rdflib.OWL.imports, base + 'external/uberon.owl')
    #ng.add_trip(ontid, rdflib.OWL.imports, base + 'external/pr.owl')
    ng.replace_uriref('ilx:hasMolecularPhenotype',
                      'ilx:hasExpressionPhenotype')

    #defined_graph = makeGraph('NIF-Neuron-Defined', prefixes=PREFIXES, graph=_g)
    defined_graph.add_trip(base + defined_graph.name + '.ttl', rdflib.RDF.type,
                           rdflib.OWL.Ontology)
    defined_graph.add_trip(base + defined_graph.name + '.ttl',
                           rdflib.OWL.imports,
                           base + 'NIF-Neuron-Phenotype.ttl')

    done = True  #False
    done_ = set()
    for pedge in pedges:
        for s, p, o_lit in ng.g.triples((None, pedge, None)):
            o = o_lit.toPython()
            success = False
            true_o = None
            true_id = None
            if o in syn_mappings:
                id_ = syn_mappings[o]

                ng.add_hierarchy(id_, p, s)
                ng.g.remove((s, p, o_lit))
                #print('SUCCESS, substituting', o, 'for', id_)
                success = True
                true_o = o_lit
                true_id = id_

            elif 'Location' in p.toPython() or 'LocatedIn' in p.toPython(
            ):  # lift location to restrictions
                if o.startswith('http://'):
                    ng.add_hierarchy(o_lit, p, s)
                    ng.g.remove((s, p, o_lit))

                    data = sgv.findById(o)
                    label = data['labels'][0]
                    ng.add_trip(o, rdflib.RDF.type, rdflib.OWL.Class)
                    ng.add_trip(o, rdflib.RDFS.label, label)

                    success = True
                    true_o = label
                    true_id = o_lit

            else:
                if o in cheating:
                    o = cheating[o]

                data = sgv.findByTerm(o)
                if data:
                    print('SCIGRAPH',
                          [(d['curie'], d['labels']) for d in data])
                    for d in data:
                        if 'PR:' in d['curie']:
                            sgt = ng.expand(d['curie'])
                            ng.add_hierarchy(sgt, p, s)
                            ng.g.remove((s, p, o_lit))

                            label = d['labels'][0]
                            ng.add_trip(sgt, rdflib.RDF.type, rdflib.OWL.Class)
                            ng.add_trip(sgt, rdflib.RDFS.label, label)

                            success = True
                            true_o = label
                            true_id = sgt
                            break

                    if not success:
                        for d in data:
                            if 'NIFMOL:' in d['curie']:
                                sgt = ng.expand(d['curie'])
                                ng.add_hierarchy(sgt, p, s)
                                ng.g.remove((s, p, o_lit))

                                label = d['labels'][0]
                                ng.add_trip(sgt, rdflib.RDF.type,
                                            rdflib.OWL.Class)
                                ng.add_trip(sgt, rdflib.RDFS.label, label)

                                success = True
                                true_o = label
                                true_id = sgt
                                break

            if o not in done_ and success:
                done_.add(o)
                t = tuple(
                    defined_graph.g.triples(
                        (None, rdflib.OWL.someValuesFrom, true_id)))
                if t:
                    print('ALREADY IN', t)
                else:
                    ilx_start += 1
                    id_ = ng.expand(ilx_base.format(ilx_start))
                    defined_graph.add_trip(id_, rdflib.RDF.type,
                                           rdflib.OWL.Class)
                    restriction = infixowl.Restriction(p,
                                                       graph=defined_graph.g,
                                                       someValuesFrom=true_id)
                    intersection = infixowl.BooleanClass(
                        members=(defined_graph.expand(NIFCELL_NEURON),
                                 restriction),
                        graph=defined_graph.g)
                    this = infixowl.Class(id_, graph=defined_graph.g)
                    this.equivalentClass = [intersection]
                    this.subClassOf = [
                        defined_graph.expand(defined_class_parent)
                    ]
                    this.label = rdflib.Literal(true_o + ' neuron')
                    print('make_neurons ilx_start', ilx_start,
                          list(this.label)[0])
                    if not done:
                        embed()
                        done = True

    defined_graph.add_class(defined_class_parent,
                            NIFCELL_NEURON,
                            label='defined class neuron')
    defined_graph.add_trip(defined_class_parent,
                           rdflib.namespace.SKOS.definition,
                           'Parent class For all defined class neurons')

    defined_graph.write()
    ng.write()

    for sub, syn in [
            _ for _ in ng.g.subject_objects(ng.expand('NIFRID:synonym'))
    ] + [_ for _ in ng.g.subject_objects(rdflib.RDFS.label)]:
        syn = syn.toPython()
        if syn in syn_mappings:
            print('ERROR duplicate synonym!', syn, sub)
        syn_mappings[syn] = sub

    return ilx_start
Exemple #16
0
def chebi_make():
    PREFIXES = makePrefixes('definition', 'hasRole', 'BFO', 'CHEBI', 'owl',
                            'skos', 'oboInOwl')
    dPREFIXES = makePrefixes('CHEBI', 'replacedBy', 'owl', 'skos')
    ug = makeGraph('utilgraph', prefixes=PREFIXES)

    IDS_FILE = 'resources/chebi-subset-ids.txt'
    with open(IDS_FILE, 'rt') as f:
        ids_raw = set((_.strip() for _ in f.readlines()))
        ids = set((ug.expand(_.strip()).toPython() for _ in ids_raw))

    #gzed = requests.get('http://localhost:8000/chebi.owl')
    #raw = BytesIO(gzed.content)
    gzed = requests.get(
        'http://ftp.ebi.ac.uk/pub/databases/chebi/ontology/nightly/chebi.owl.gz'
    )
    raw = BytesIO(gzip.decompress(gzed.content))
    t = etree.parse(raw)
    r = t.getroot()
    cs = r.getchildren()
    classes = [
        _ for _ in cs if _.tag == '{http://www.w3.org/2002/07/owl#}Class'
        and _.values()[0] in ids
    ]
    ontology = t.xpath("/*[local-name()='RDF']/*[local-name()='Ontology']")
    ops = t.xpath(
        "/*[local-name()='RDF']/*[local-name()='ObjectProperty']")  # TODO
    wanted = [etree.ElementTree(_) for _ in classes]
    rpl_check = t.xpath(
        "/*[local-name()='RDF']/*[local-name()='Class']/*[local-name()='hasAlternativeId']"
    )
    rpl_dict = {
        _.text: _.getparent()
        for _ in rpl_check if _.text in ids_raw
    }  # we also need to have any new classes that have replaced old ids
    also_classes = list(rpl_dict.values())

    def rec(start_set, done):
        ids_ = set()
        for c in start_set:
            ids_.update([
                _.items()[0][1] for _ in etree.ElementTree(c).xpath(
                    "/*[local-name()='Class']/*[local-name()='subClassOf']")
                if _.items()
            ])
            ids_.update([
                _.items()[0][1] for _ in etree.ElementTree(c).xpath(
                    "/*[local-name()='Class']/*[local-name()='subClassOf']/*[local-name()='Restriction']/*[local-name()='someValuesFrom']"
                ) if _.items()
            ])
        supers = [
            _ for _ in cs if _.tag == '{http://www.w3.org/2002/07/owl#}Class'
            and _.values()[0] in ids_ and _ not in done
        ]
        if supers:
            msup, mids = rec(supers, done + supers)
            supers += msup
            ids_.update(mids)
        return supers, ids_

    a = ontology + ops + classes + also_classes
    more, mids = rec(a, a)
    all_ = set(a + more)
    r.clear()  # wipe all the stuff we don't need
    for c in all_:
        r.append(c)
    data = etree.tostring(r)

    g = rdflib.Graph()
    g.parse(
        data=data
    )  # now _this_ is stupidly slow (like 20 minutes of slow) might make more sense to do the xml directly?

    src_version = list(
        g.query(
            'SELECT DISTINCT ?match WHERE { ?temp rdf:type owl:Ontology . ?temp owl:versionIRI ?match . }'
        ))[0][0]

    new_graph = createOntology(
        'chebislim',
        'NIF ChEBI slim',
        PREFIXES,
        'chebislim',
        'This file is generated by pyontutils/slimgen from the full ChEBI nightly at versionIRI %s based on the list of terms in %s.'
        % (src_version, IDS_FILE),
        remote_base='http://ontology.neuinfo.org/NIF/')

    chebi_dead = createOntology(
        'chebi-dead',
        'NIF ChEBI deprecated',
        dPREFIXES,
        'chebidead',
        'This file is generated by pyontutils/slimgen to make deprecated classes resolvablefrom the full ChEBI nightly at versionIRI %s based on the list of terms in %s.'
        % (src_version, IDS_FILE),
        remote_base='http://ontology.neuinfo.org/NIF/')

    depwor = {
        'CHEBI:33243': 'natural product',  # FIXME remove these?
        'CHEBI:36809': 'tricyclic antidepressant',
    }

    for id_ in sorted(
            set(ids_raw) | set((ug.g.namespace_manager.qname(_)
                                for _ in mids))):
        eid = ug.expand(id_)
        trips = list(g.triples((eid, None, None)))
        if not trips:
            #looks for the id_ as a literal
            alts = list(
                g.triples((
                    None,
                    rdflib.term.URIRef(
                        'http://www.geneontology.org/formats/oboInOwl#hasAlternativeId'
                    ),
                    rdflib.Literal(
                        id_,
                        datatype=rdflib.term.URIRef(
                            'http://www.w3.org/2001/XMLSchema#string')))))
            if alts:
                replaced_by, _, __ = alts[0]
                if replaced_by.toPython(
                ) not in ids:  #  we need to add any replacment classes to the bridge
                    print('REPLACED BY NEW CLASS', id_)
                    for t in g.triples((replaced_by, None, None)):
                        new_graph.add_recursive(t, g)
                chebi_dead.add_class(id_)
                chebi_dead.add_node(id_, 'replacedBy:', replaced_by)
                chebi_dead.add_node(id_, rdflib.OWL.deprecated, True)
            else:
                if id_ not in depwor:
                    raise BaseException('wtf error', id_)
        else:
            for trip in trips:
                new_graph.add_recursive(trip, g)

    # https://github.com/ebi-chebi/ChEBI/issues/3294
    madness = new_graph.expand('oboInOwl:hasRelatedSynonym'), rdflib.Literal(
        '0', datatype=rdflib.namespace.XSD.string)
    for a in new_graph.g.subjects(*madness):
        new_graph.g.remove((a, ) + madness)

    new_graph.write()
    chebi_dead.write()
    embed()
Exemple #17
0
    kwargs = {
        'uberon_id':uid,
        'uberon_label':uberon_labs[uid],
        'aba_id':aid,
        'aba_label':abalabs[aid],
        'aba_syns':'\n'.join(sorted(abasyns[aid] + abaacro[aid])),
        'uberon_syns':'\n'.join(insert_uberon)
    }
    return to_format.format(**kwargs)

text = '\n\n'.join([make_record(uid, aid[0]) for uid, aid in sorted(u_a_map.items()) if aid])

with open('aba_uberon_syn_review.txt', 'wt') as f:
    f.write(text)

print('total uberon terms checked:', len(uberon_labs))
print('total aba terms:           ', len(abalabs))
print('total uberon with aba xref:', len([a for a in u_a_map.values() if a]))

ubridge = makeGraph('uberon-parcellation-mappings',prefixes=makePrefixes('ilx', 'UBERON', 'MBA'))
for u, arefs in u_a_map.items():
    if arefs:
        # TODO check for bad assumptions here
        ubridge.add_node(u, 'ilx:delineatedBy', arefs[0])
        ubridge.add_node(arefs[0], 'ilx:delineates', u)

ubridge.write()
embed()

def _rest_make_phenotypes():
    #phenotype sources
    neuroner = '~/git/neuroNER/resources/bluima/neuroner/hbp_morphology_ontology.obo'
    neuroner1 = '~/git/neuroNER/resources/bluima/neuroner/hbp_electrophysiology_ontology.obo'
    neuroner2 = '~/git/neuroNER/resources/bluima/neuroner/hbp_electrophysiology-triggers_ontology.obo'
    nif_qual = '~/git/NIF-Ontology/ttl/NIF-Quality.ttl'

    mo = OboFile(os.path.expanduser(neuroner))
    mo1 = OboFile(os.path.expanduser(neuroner1))
    mo2 = OboFile(os.path.expanduser(neuroner2))
    mo_ttl = mo.__ttl__() + mo1.__ttl__() + mo2.__ttl__()

    mo_ttl = """\
    @prefix : <http://FIXME.org/> .
    @prefix nsu: <http://www.FIXME.org/nsupper#> .
    @prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#> .
    @prefix owl: <http://www.w3.org/2002/07/owl#> .
    @prefix rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> .
    """ + mo_ttl

    #sio = io.StringIO()
    #sio.write(mo_ttl)

    ng = rdflib.Graph()
    ng.parse(data=mo_ttl, format='turtle')
    ng.parse(os.path.expanduser(nif_qual), format='turtle')
    #ng.namespace_manager.bind('default1', None, override=False, replace=True)
    ng.remove((None, rdflib.OWL.imports, None))

    bad_match = {
        'http://ontology.neuinfo.org/NIF/BiomaterialEntities/NIF-Quality.owl#nlx_qual_20090505',
        'http://ontology.neuinfo.org/NIF/BiomaterialEntities/NIF-Quality.owl#sao1693353776',
        'http://ontology.neuinfo.org/NIF/BiomaterialEntities/NIF-Quality.owl#sao1288413465',
        'http://ontology.neuinfo.org/NIF/BiomaterialEntities/NIF-Quality.owl#sao4459136323',
        'http://ontology.neuinfo.org/NIF/BiomaterialEntities/NIF-Quality.owl#nlx_qual_20090507',
    }

    exact = []
    similar = []
    quals = []
    s2 = {}

    for subject, label in sorted(ng.subject_objects(rdflib.RDFS.label)):

        syns = set([
            a for a in ng.objects(
                subject, rdflib.URIRef('http://www.FIXME.org/nsupper#synonym'))
        ])
        syns.update(
            set([
                a for a in ng.objects(
                    subject,
                    rdflib.URIRef(
                        'http://ontology.neuinfo.org/NIF/Backend/OBO_annotation_properties.owl#synonym'
                    ))
            ]))
        #if syns:
        #print(syns)
        #print(subject)
        #print(label.lower())
        if 'quality' in label.lower():
            quals.append((subject, label))
        subpre = ng.namespace_manager.compute_qname(subject)[1]
        llower = rdflib.Literal(label.lower(), lang='en')
        for s in ng.subjects(rdflib.RDFS.label, llower):
            if s != subject:
                exact.append((subject, s, label, llower))
        for s, p, o in sorted(ng.triples((None, rdflib.RDFS.label, None))):
            spre = ng.namespace_manager.compute_qname(s)[1]
            if subject != s and label.lower() in o.lower().split(
                    ' ') and spre != subpre:
                if s.toPython() in bad_match or subject.toPython(
                ) in bad_match:
                    continue
                #print()
                #print(spre, subpre)
                similar.append((subject, s, label, o))
                if subpre.toPython() == 'http://FIXME.org/':
                    print('YAY')
                    print(label, ',', o)
                    print(subject, s)
                    subject, s = s, subject
                    label, o = o, label

                if subject in s2:
                    #print('YES IT EXISTS')
                    #print(syns, label, [subject, s])
                    s2[subject]['syns'].update(syns)
                    s2[subject]['syns'].add(label)
                    s2[subject]['xrefs'] += [subject, s]
                else:
                    s2[subject] = {
                        'label': label.toPython(),
                        'o': o.toPython(),
                        'xrefs': [subject, s],
                        'syns': syns
                    }  # FIXME overwrites

    pprint(quals)
    """ print stuff
    print('matches')
    pprint(exact)
    pprint(similar)

    #print('EXACT', exact)

    print()
    for k, v in s2.items():
        print(k)
        for k, v2 in sorted(v.items()):
            print('    ', k, ':', v2)
    #"""

    desired_nif_terms = set()  #{
    #'NIFQUAL:sao1959705051',  # dendrite
    #'NIFQUAL:sao2088691397',  # axon
    #'NIFQUAL:sao1057800815',  # morphological
    #'NIFQUAL:sao-1126011106',  # soma
    #'NIFQUAL:',
    #'NIFQUAL:',
    #}
    starts = [
        #"NIFQUAL:sao2088691397",
        #"NIFQUAL:sao1278200674",
        #"NIFQUAL:sao2088691397",
        #"NIFQUAL:sao-1126011106",  # FIXME WTF IS THIS NONSENSE  (scigraph bug?)
        quote(
            "http://ontology.neuinfo.org/NIF/BiomaterialEntities/NIF-Quality.owl#sao1959705051"
        ).replace('/', '%2F'),
        quote(
            "http://ontology.neuinfo.org/NIF/BiomaterialEntities/NIF-Quality.owl#sao2088691397"
        ).replace('/', '%2F'),
        quote(
            "http://ontology.neuinfo.org/NIF/BiomaterialEntities/NIF-Quality.owl#sao1278200674"
        ).replace('/', '%2F'),
        quote(
            "http://ontology.neuinfo.org/NIF/BiomaterialEntities/NIF-Quality.owl#sao2088691397"
        ).replace('/', '%2F'),
        quote(
            "http://ontology.neuinfo.org/NIF/BiomaterialEntities/NIF-Quality.owl#sao-1126011106"
        ).replace('/', '%2F'),
    ]

    for id_ in starts:
        want = sgg.getNeighbors(id_,
                                relationshipType='subClassOf',
                                direction='INCOMING',
                                depth=5)
        #print(id_, want)
        desired_nif_terms.update([n['id'] for n in want['nodes']])

    print(desired_nif_terms)

    ilx_start = 50114
    print(ilx_base.format(ilx_start))
    new_terms = {}
    dg = makeGraph('uwotm8', prefixes=PREFIXES)
    xr = makeGraph('xrefs', prefixes=PREFIXES)
    for s, o in sorted(ng.subject_objects(rdflib.RDFS.label))[::-1]:
        spre = ng.namespace_manager.compute_qname(s)[1]
        #if spre.toPython() == g.namespaces['NIFQUAL']:
        #print('skipping', s)
        #continue  # TODO
        if s in new_terms:
            print(s, 'already in as xref probably')
            continue
        #elif spre.toPython() != 'http://uri.interlex.org/base/ilx_' or spre.toPython() != 'http://FIXME.org/' and s.toPython() not in desired_nif_terms:
        #elif spre.toPython() != 'http://FIXME.org/' and s.toPython() not in desired_nif_terms:
        #print('DO NOT WANT', s, spre)
        #continue

        syns = set([s for s in ng.objects(s, dg.namespaces['nsu']['synonym'])])
        #data['syns'] += syns

        data = {}
        id_ = ilx_base.format(ilx_start)
        ilx_start += 1
        if s in s2:
            d = s2[s]
            syns.update(d['syns'])
            new_terms[d['xrefs'][0]] = {'replaced_by': id_}
            xr.add_trip(d['xrefs'][0], 'oboInOwl:replacedBy', id_)
            #dg.add_trip(d['xrefs'][0], 'oboInOwl:replacedBy', id_)
            new_terms[d['xrefs'][1]] = {'replaced_by': id_}
            xr.add_trip(d['xrefs'][1], 'oboInOwl:replacedBy', id_)
            #dg.add_trip(d['xrefs'][1], 'oboInOwl:replacedBy', id_)

            data['labels'] = [d['label'], d['o']]
            #dg.add_trip(id_, rdflib.RDFS.label, d['label'])
            dg.add_trip(id_, rdflib.RDFS.label, d['o'])
            data['xrefs'] = d['xrefs']
            for x in d[
                    'xrefs']:  # FIXME... expecting order of evaluation errors here...
                dg.add_trip(id_, 'oboInOwl:hasDbXref', x)  # xr
                xr.add_trip(id_, 'oboInOwl:hasDbXref', x)  # x

        elif spre.toPython(
        ) != 'http://ontology.neuinfo.org/NIF/BiomaterialEntities/NIF-Quality.owl#' or ng.namespace_manager.qname(
                s).replace(
                    'default1',
                    'NIFQUAL') in desired_nif_terms:  # skip non-xref quals
            #print(ng.namespace_manager.qname(s).replace('default1','NIFQUAL'))
            new_terms[s] = {'replaced_by': id_}
            xr.add_trip(s, 'oboInOwl:replacedBy', id_)
            data['labels'] = [o.toPython()]
            dg.add_trip(id_, rdflib.RDFS.label, o.toPython())
            data['xrefs'] = [s]
            dg.add_trip(id_, 'oboInOwl:hasDbXref', s)  # xr
            xr.add_trip(id_, 'oboInOwl:hasDbXref', s)  # xr
        else:
            ilx_start -= 1
            continue

        new_terms[id_] = data
        dg.add_trip(id_, rdflib.RDF.type, rdflib.OWL.Class)
        xr.add_trip(id_, rdflib.RDF.type, rdflib.OWL.Class)
        for syn in syns:
            if syn.toPython() not in data['labels']:
                if len(syn) > 3:
                    dg.add_trip(id_, 'NIFRID:synonym', syn)
                elif syn:
                    dg.add_trip(id_, 'NIFRID:abbrev', syn)

        if 'EPHYS' in s or any(['EPHYS' in x for x in data['xrefs']]):
            dg.add_trip(id_, rdflib.RDFS.subClassOf, ephys_phenotype)
        elif 'MORPHOLOGY' in s or any(
            ['MORPHOLOGY' in x for x in data['xrefs']]):
            dg.add_trip(id_, rdflib.RDFS.subClassOf, morpho_phenotype)

    #dg.write(convert=False)
    xr.write(convert=False)

    #skip this for now, we can use DG to do lookups later
    #for t in dg.g.triples((None, None, None)):
    #g.add_trip(*t)  # only way to clean prefixes :/
    add_phenotypes(g)
    g.write(convert=False)

    g2 = makeGraph('pheno-comp', PREFIXES)
    for t in ng.triples((None, None, None)):
        g2.add_trip(*t)  # only way to clean prefixes :/

    g2.write(convert=False)

    syn_mappings = {}
    for sub, syn in [
            _ for _ in g.g.subject_objects(g.expand('NIFRID:synonym'))
    ] + [_ for _ in g.g.subject_objects(rdflib.RDFS.label)]:
        syn = syn.toPython()
        if syn in syn_mappings:
            print('ERROR duplicate synonym!', syn, sub)
        syn_mappings[syn] = sub

    #embed()
    return syn_mappings, pedges, ilx_start
Exemple #19
0
PREFIXES = makePrefixes(
    "SCR", "MBA", "NIFMOL", "NIFNEURON", "NIFCELL", "NIFGA", "UBERON", "PR", "NIFNEURMOR", "skos", "owl"
)

ont = OntMeta(
    "http://ontology.neuinfo.org/NIF/ttl/generated/",
    "ksdesc-defs",
    "Knolwedge Space Defs",
    "KSDEFS",
    "Definitions from knowledge space descriptions. Generated by pyontutils/ksdesc_bridge.py",
    TODAY,
)

ontid = ont.path + ont.filename + ".ttl"
g = makeGraph(ont.filename, prefixes=PREFIXES)
g.add_ont(ontid, *ont[2:])

top_level = glob(os.path.expanduser("~/git/ksdesc/") + "*")

for putative_dir in top_level:
    if os.path.isdir(putative_dir):
        for putative_md in glob(putative_dir + "/*.md"):
            ident = os.path.split(putative_dir)[-1] + ":" + os.path.splitext(os.path.split(putative_md)[-1])[0]
            print(ident)
            with open(putative_md, "rt") as f:
                def_ = f.read()

            for test in ("Description", "Definition"):
                if test in def_:
                    def_ = def_.split(test, 1)[-1].strip().strip("=").strip()
Exemple #20
0
def chebi_imp():
    PREFIXES = makePrefixes('definition',
                            'hasRole',
                            'CHEBI',
                            'owl',
                            'skos',
                            'oboInOwl')
    ug = makeGraph('utilgraph', prefixes=PREFIXES)
    with open('chebi-subset-ids.txt', 'rt') as f:
        ids_raw = set((_.strip() for _ in f.readlines()))
        ids = sorted(set((ug.expand(_.strip()) for _ in ids_raw)))

    def check_chebis(g):
        a = []
        for id_ in ids:
            l = sorted(g.triples((id_, None, None)))
            ll = len(l)
            a.append(ll)
        return a

    g = rdflib.Graph()
    cg = rdflib.Graph()
    chemg = rdflib.Graph()
    molg = rdflib.Graph()
    g.parse('/home/tom/git/NIF-Ontology/ttl/generated/chebislim.ttl', format='turtle')
    cg.parse('/home/tom/git/NIF-Ontology/ttl/generated/chebislim.ttl', format='turtle')
    a1 = check_chebis(g)
    g.parse('/home/tom/git/NIF-Ontology/ttl/generated/chebi-dead.ttl', format='turtle')
    a2 = check_chebis(g)
    g.parse('/home/tom/git/NIF-Ontology/ttl/NIF-Chemical.ttl', format='turtle')
    chemg.parse('/home/tom/git/NIF-Ontology/ttl/NIF-Chemical.ttl', format='turtle')
    a3 = check_chebis(g)
    g.parse('/home/tom/git/NIF-Ontology/ttl/NIF-Molecule.ttl', format='turtle')
    molg.parse('/home/tom/git/NIF-Ontology/ttl/NIF-Molecule.ttl', format='turtle')
    a4 = check_chebis(g)
    matches = [_ for _ in zip(a1, a2, a3, a4)]
    changed = [len(set(_)) != 1 for _ in matches] 
    review = [(id_, m) for id_, changed, m in zip(ids, changed, matches) if changed and m[0]]
    # for reasons currently lost to implementation details this returns a list of empty lists if run from ipython
    wat_c = [set([(s, str(o.toPython())) for s, p, o in cg.triples((u, None, None))]) for u, _ in review]
    wat_a = [set([(s, str(o.toPython())) for s, p, o in g.triples((u, None, None))]) for u, _ in review]
    wat_c_ = [set(cg.triples((u, None, None))) for u, _ in review]  # for reasons currently lost to implementation details this returns a list of empty lists if run from ipython
    wat_a_ = [set(g.triples((u, None, None))) for u, _ in review]  # for reasons currently lost to implementation details this returns a list of empty lists if run from ipython
    diff = [a - c for a, c in zip(wat_a, wat_c)]
    diff_ = [a - c for a, c in zip(wat_a_, wat_c_)]

    cb = makeGraph('chebi-bridge', makePrefixes('CHEBI',
                                                'owl',
                                                'skos',
                                                'dc',
                                                'hasRole',
                                                'NIFCHEM',
                                                'NIFMOL',
                                                'OBOANN',
                                                'BIRNANN'))
    out = []
    for set_ in diff:
        for sub, string in sorted(set_):
            for t in g.triples((sub, None, None)):
                py = t[-1].toPython()
                if py == string and not py.startswith('ub'):  # ignore restrictions... this is safe because nifmol and nifchem dont have any restrictions...
                    cb.add_recursive(t, g)
        cb.add_class(sub)  # only need to go at the end because sub is the same for each set

    cb.write()  # re-add only the missing edges so that we can zap them from NIF-Molecule and NIF-Chemical (recurse is needed...)
    embed()
Exemple #21
0
def main():
    DB_URI = 'mysql+mysqlconnector://{user}:{password}@{host}:{port}/{db}'
    #config = mysql_conn_helper('mysql5-stage.crbs.ucsd.edu', 'nif_eelg', 'nif_eelg_secure')
    config = mysql_conn_helper('nif-mysql.crbs.ucsd.edu', 'nif_eelg', 'nif_eelg_secure')
    engine = create_engine(DB_URI.format(**config))
    config = None  # all weakrefs should be gone by now?
    del(config)  # i wonder whether this actually cleans it up when using **config
    insp = inspect(engine)
    #names = [c['name'] for c in insp.get_columns('registry')]
    #resource_columns = [c['name'] for c in insp.get_columns('resource_columns')]
    #resource_data = [c['name'] for c in insp.get_columns('resource_data')]
    #resource_fields = [c['name'] for c in insp.get_columns('resource_fields')]
    #resources = [c['name'] for c in insp.get_columns('resources')]
    #conn.execute('SELECT * from registry;')
    if 1:
    #with engine.connect() as conn:
        conn = engine
        tables = ('resource_columns', 'resource_data', 'resource_fields', 'resources')
        data = {t:([c['name'] for c in insp.get_columns(t)], conn.execute('SELECT * from %s limit 20;' % t).fetchall()) for t in tables}
        all_fields = [n[0] for n in conn.execute('SELECT distinct(name) FROM resource_fields;').fetchall()]

        #query = conn.execute('SELECT r.rid, r.original_id, r.type, rc.name, rc.value from resources as r JOIN'
                            #' resource_columns as rc ON r.id=rc.rid'
                            #' WHERE rc.name IN %s limit 1000;' % str(tuple([n for n in field_mapping if n != 'MULTI'])))  # XXX DANGER THIS QUERY IS O(x^n) :x
                            #' ORDER BY r.rid limit 2000;'

        #query = conn.execute('SELECT r.rid, r.original_id, r.type, rc.name, rc.value from resource_columns as rc JOIN'
                             #' resources as r ON rc.rid=r.id'
                             #' WHERE rc.name IN %s;' % str(tuple([n for n in field_mapping if n != 'MULTI'])))  # XXX DANGER why does > 2000 limit break stuff?

        #join = query.fetchall()

        #embed()
        #return
        #print('running join')
        print('running 1')
        r_query = conn.execute('SELECT id, rid, original_id, type, status FROM resources WHERE id < 16000;')  # avoid the various test entries :(
        print('fetching 1 ')
        r = r_query.fetchall()
        print('running 2')
        rc_query = conn.execute('SELECT rid, name, value, version FROM resource_columns as rc WHERE rc.rid < 16000 AND rc.name IN %s;' % str(tuple([n for n in field_mapping if n != 'MULTI'])))
        print('fetching 2')
        rc = rc_query.fetchall()

        #embed()
        #return

    r.append( (-100, 'NIF:nlx_63400', 'nlx_63400', 'Resource', 'Curated') )
    r.append( (-101, 'NIF:nlx_152342', 'nlx_152342', 'Organization', 'Curated') )
    r.append( (-102, 'NIF:nlx_152328', 'nlx_152328', 'Organization', 'Curated') )
    r.append( (-103, 'NIF:NEMO_0569000', 'NEMO_0569000', 'Institution', 'Curated') )
    r.append( (-104, 'NIF:birnlex_2431', 'birnlex_2431', 'Institution', 'Curated') )
    r.append( (-105, 'NIF:SIO_000688', 'SIO_000688', 'Institution', 'Curated') )
    r.append( (-106, 'NIF:birnlex_2085', 'birnlex_2085', 'Institution', 'Curated') )
    rc.append( (-100, 'Resource Name', 'Resource', 1) )
    rc.append( (-101, 'Resource Name', 'Commercial Organization', 1) )
    rc.append( (-102, 'Resource Name', 'Organization', 1) )
    rc.append( (-103, 'Resource Name', 'University', 1) )
    rc.append( (-104, 'Resource Name', 'Government granting agency', 1) )
    rc.append( (-105, 'Resource Name', 'Institute', 1) )
    rc.append( (-106, 'Resource Name', 'Institution', 1) )
    rc.append( (-101, 'Supercategory', 'NIF:nlx_152328', 1) )  # TODO extract this more intelligently from remap supers please

    output = make_records(r, rc, field_mapping)
    print('Fetching and data prep done.')
    g = makeGraph('scicrunch-registry', PREFIXES)

    # ontology metadata
    ontid = ONTOLOGY_DEF['iri']
    g.add_node(ontid, rdflib.RDF.type, rdflib.OWL.Ontology)
    g.add_node(ontid, rdflib.RDFS.label, ONTOLOGY_DEF['label'])
    g.add_node(ontid, rdflib.RDFS.comment, ONTOLOGY_DEF['comment'])
    g.add_node(ontid, rdflib.OWL.versionInfo, ONTOLOGY_DEF['version'])

    for id_, rec in output.items():
        for field, value in rec:
            #print(field, value)
            if not value:  # don't add empty edges  # FIXME issue with False literal
                print('caught an empty value on field', id_, field)
                continue
            if field != 'id' and str(value) in id_:
            #if field == 'alt_id' and id_[1:] == value:
                print('caught a mainid appearing as altid', field, value)
                continue
            g.add_node(*make_node(id_, field, value))

    g.write()
Exemple #22
0
#!/usr/bin/env python3.5

import rdflib
from utils import makePrefixes, makeGraph

PREFIXES = makePrefixes('NIFGA', 'NIFSTD', 'owl')

g = rdflib.Graph()
g.parse('http://purl.obolibrary.org/obo/uberon/bridge/uberon-bridge-to-nifstd.owl', format='xml')
name = 'NIFGA-Equivs'
ng = makeGraph(name, PREFIXES)
[ng.g.add(t) for t in ((rdflib.URIRef(PREFIXES['NIFGA'] + o.rsplit('/',1)[-1]), p, o) for s, p, o in g.triples((None, rdflib.OWL.equivalentClass, None)))]
ng.add_ont('http://ontology.neuinfo.org/NIF/ttl/generated/' + name + '.ttl', 'NIFGA to NIFSTD mappings')
ng.write()
def do_deprecation(replaced_by, g, additional_edges, conflated):
    bmeta = OntMeta('http://ontology.neuinfo.org/NIF/ttl/bridge/',
                  'uberon-bridge',
                  'NIFSTD Uberon Bridge',
                  'UBERON Bridge',
                  ('This is the bridge file that holds local NIFSTD additions to uberon. '
                   'This is also staging for any changes that we want to push upstream.'),
                  TODAY)
    ontid = bmeta.path + bmeta.filename + '.ttl'
    bridge = makeGraph('uberon-bridge', PREFIXES)
    bridge.add_ont(ontid, *bmeta[2:])

    graph = makeGraph('NIF-GrossAnatomy', NIFPREFIXES, graph=g)
    #graph.g.namespace_manager._NamespaceManager__cache = {}
    #g.namespace_manager.bind('UBERON','http://purl.obolibrary.org/obo/UBERON_')  # this has to go in again because we reset g FIXME
    udone = set('NOREP')
    uedges = defaultdict(lambda:defaultdict(set))

    def inner(nifga, uberon):
        # check neuronames id TODO

        udepr = sgv.findById(uberon)['deprecated'] if uberon != 'NOREP' else False
        if udepr:
            # add xref to the now deprecated uberon term
            graph.add_node(nifga, 'oboInOwl:hasDbXref', uberon)
            #print('Replacement is deprecated, not replacing:', uberon)
            graph.add_node(nifga, RDFS.comment, 'xref %s is deprecated, so not using replacedBy:' % uberon)
        else:
            # add replaced by -> uberon
            graph.add_node(nifga, 'replacedBy:', uberon)

        # add deprecated true (ok to do twice...)
        graph.add_node(nifga, OWL.deprecated, True)

        # review nifga relations, specifically has_proper_part, proper_part_of
        # put those relations on the uberon term in the 
        # if there is no uberon term raise an error so we can look into it

        #if uberon not in uedges:
            #uedges[uberon] = defaultdict(set)
        resp = sgg.getNeighbors(nifga)
        edges = resp['edges']
        if nifga in additional_edges:
            edges.append(additional_edges[nifga])
        include = False  # set this to True when running anns
        for edge in edges:  # FIXME TODO hierarchy extraction and porting
            #print(edge)
            if udepr:  # skip everything if uberon is deprecated
                include = False
                hier = False
                break
            sub = edge['sub']
            obj = edge['obj']
            pred = edge['pred']
            hier = False
            if pred == 'subClassOf':
                pred = RDFS.subClassOf
                continue
            elif pred == 'equivalentClass':
                pred = OWL.equivalentClass
                continue
            elif pred == 'isDefinedBy':
                pred = RDFS.isDefinedBy
                continue
            elif pred == 'http://www.obofoundry.org/ro/ro.owl#has_proper_part':
                hier = True
                include = True
            elif pred == 'http://www.obofoundry.org/ro/ro.owl#proper_part_of':
                hier = True
                include = True
            elif pred == 'ilx:partOf':
                hier = True
                include = True

            if sub == nifga:
                try:
                    obj = replaced_by[obj]
                    if obj == 'NOREP':
                        hier = False
                except KeyError:
                    print('not in replaced_by', obj)
                if type(obj) == tuple: continue  # TODO
                if hier:
                    if uberon not in uedges[obj][pred]:
                        uedges[obj][pred].add(uberon)
                        bridge.add_hierarchy(obj, pred, uberon)
                else:
                    #bridge.add_node(uberon, pred, obj)
                    pass
            elif obj == nifga:
                try:
                    sub = replaced_by[sub]
                    if sub == 'NOREP':
                        hier = False
                except KeyError:
                    print('not in replaced_by', sub)
                if type(sub) == tuple: continue  # TODO
                if hier:
                    if sub not in uedges[uberon][pred]:
                        uedges[uberon][pred].add(sub)
                        bridge.add_hierarchy(uberon, pred, sub)
                else:
                    #bridge.add_node(sub, pred, uberon)
                    pass

        if False and uberon not in udone and include:  # skip porting annotations and labels for now
            #udone.add(uberon)
            try:
                label = sgv.findById(uberon)['labels'][0]
            except IndexError:
                WAT = sgv.findById(uberon)
                embed()
            bridge.add_class(uberon, label=label)

            # annotations to port
            for p in anns_to_port:
                os_ = list(graph.g.objects(graph.expand(nifga), p))
                for o in os_:
                    if label.lower() != o.lower():  # we can simply capitalize labels
                        print(label.lower())
                        print(o.lower())
                        print()
                        bridge.add_node(uberon, p, o)

                if p == SKOS.prefLabel and not os_:
                    if uberon not in conflated or (uberon in conflated and nifga in preflabs):
                        l = list(graph.g.objects(graph.expand(nifga), RDFS.label))[0]
                        bridge.add_node(uberon, SKOS.prefLabel, l)  # port label to prefLabel if no prefLabel

    for nifga, uberon in replaced_by.items():
        if type(uberon) == tuple:
            print(uberon)
            for ub in uberon:
                print(ub)
                inner(nifga, ub)
        elif uberon == 'NOREP':
            graph.add_node(nifga, OWL.deprecated, True)  # TODO check for missing edges?
        elif uberon is None:
            continue  # BUT TODAY IS NOT THAT DAY!
        else:
            inner(nifga, uberon)

    return graph, bridge, uedges
Exemple #24
0
def do_deprecation(replaced_by, g, additional_edges, conflated):
    bmeta = OntMeta(
        'http://ontology.neuinfo.org/NIF/ttl/bridge/', 'uberon-bridge',
        'NIFSTD Uberon Bridge', 'UBERON Bridge',
        ('This is the bridge file that holds local NIFSTD additions to uberon. '
         'This is also staging for any changes that we want to push upstream.'
         ), TODAY)
    ontid = bmeta.path + bmeta.filename + '.ttl'
    bridge = makeGraph('uberon-bridge', PREFIXES)
    bridge.add_ont(ontid, *bmeta[2:])

    graph = makeGraph('NIF-GrossAnatomy', NIFPREFIXES, graph=g)
    #graph.g.namespace_manager._NamespaceManager__cache = {}
    #g.namespace_manager.bind('UBERON','http://purl.obolibrary.org/obo/UBERON_')  # this has to go in again because we reset g FIXME
    udone = set('NOREP')
    uedges = defaultdict(lambda: defaultdict(set))

    def inner(nifga, uberon):
        # check neuronames id TODO

        udepr = sgv.findById(
            uberon)['deprecated'] if uberon != 'NOREP' else False
        if udepr:
            # add xref to the now deprecated uberon term
            graph.add_trip(nifga, 'oboInOwl:hasDbXref', uberon)
            #print('Replacement is deprecated, not replacing:', uberon)
            graph.add_trip(
                nifga, RDFS.comment,
                'xref %s is deprecated, so not using replacedBy:' % uberon)
        else:
            # add replaced by -> uberon
            graph.add_trip(nifga, 'replacedBy:', uberon)

        # add deprecated true (ok to do twice...)
        graph.add_trip(nifga, OWL.deprecated, True)

        # review nifga relations, specifically has_proper_part, proper_part_of
        # put those relations on the uberon term in the
        # if there is no uberon term raise an error so we can look into it

        #if uberon not in uedges:
        #uedges[uberon] = defaultdict(set)
        resp = sgg.getNeighbors(nifga)
        edges = resp['edges']
        if nifga in additional_edges:
            edges.append(additional_edges[nifga])
        include = False  # set this to True when running anns
        for edge in edges:  # FIXME TODO hierarchy extraction and porting
            #print(edge)
            if udepr:  # skip everything if uberon is deprecated
                include = False
                hier = False
                break
            sub = edge['sub']
            obj = edge['obj']
            pred = edge['pred']
            hier = False
            if pred == 'subClassOf':
                pred = RDFS.subClassOf
                continue
            elif pred == 'equivalentClass':
                pred = OWL.equivalentClass
                continue
            elif pred == 'isDefinedBy':
                pred = RDFS.isDefinedBy
                continue
            elif pred == 'http://www.obofoundry.org/ro/ro.owl#has_proper_part':
                hier = True
                include = True
            elif pred == 'http://www.obofoundry.org/ro/ro.owl#proper_part_of':
                hier = True
                include = True
            elif pred == 'ilx:partOf':
                hier = True
                include = True

            if sub == nifga:
                try:
                    obj = replaced_by[obj]
                    if obj == 'NOREP':
                        hier = False
                except KeyError:
                    print('not in replaced_by', obj)
                if type(obj) == tuple: continue  # TODO
                if hier:
                    if uberon not in uedges[obj][pred]:
                        uedges[obj][pred].add(uberon)
                        bridge.add_hierarchy(obj, pred, uberon)
                else:
                    #bridge.add_trip(uberon, pred, obj)
                    pass
            elif obj == nifga:
                try:
                    sub = replaced_by[sub]
                    if sub == 'NOREP':
                        hier = False
                except KeyError:
                    print('not in replaced_by', sub)
                if type(sub) == tuple: continue  # TODO
                if hier:
                    if sub not in uedges[uberon][pred]:
                        uedges[uberon][pred].add(sub)
                        bridge.add_hierarchy(uberon, pred, sub)
                else:
                    #bridge.add_trip(sub, pred, uberon)
                    pass

        if False and uberon not in udone and include:  # skip porting annotations and labels for now
            #udone.add(uberon)
            try:
                label = sgv.findById(uberon)['labels'][0]
            except IndexError:
                WAT = sgv.findById(uberon)
                embed()
            bridge.add_class(uberon, label=label)

            # annotations to port
            for p in anns_to_port:
                os_ = list(graph.g.objects(graph.expand(nifga), p))
                for o in os_:
                    if label.lower() != o.lower(
                    ):  # we can simply capitalize labels
                        print(label.lower())
                        print(o.lower())
                        print()
                        bridge.add_trip(uberon, p, o)

                if p == SKOS.prefLabel and not os_:
                    if uberon not in conflated or (uberon in conflated
                                                   and nifga in preflabs):
                        l = list(
                            graph.g.objects(graph.expand(nifga),
                                            RDFS.label))[0]
                        bridge.add_trip(
                            uberon, SKOS.prefLabel,
                            l)  # port label to prefLabel if no prefLabel

    for nifga, uberon in replaced_by.items():
        if type(uberon) == tuple:
            print(uberon)
            for ub in uberon:
                print(ub)
                inner(nifga, ub)
        elif uberon == 'NOREP':
            graph.add_trip(nifga, OWL.deprecated,
                           True)  # TODO check for missing edges?
        elif uberon is None:
            continue  # BUT TODAY IS NOT THAT DAY!
        else:
            inner(nifga, uberon)

    return graph, bridge, uedges
Exemple #25
0
from utils import makeGraph, sendTweet, CITIES
import os

for city in CITIES:
    makeGraph(city)

if os.environ["DEBUG"] == "False":
    sendTweet("Berlin")
Exemple #26
0
 def __init__(self):
     g = makeGraph('merged', prefixes={k:str(v) for k, v in EXISTING_GRAPH.namespaces()}, graph=EXISTING_GRAPH)
     self.g = g
     self.bag_existing()
def make_table1(syn_mappings, ilx_start, phenotypes):
    # TODO when to explicitly subClassOf? I think we want this when the higher level phenotype bag is shared
    # it may turn out that things like the disjointness exist at a higher level while correlated properties
    # should be instantiated together as sub classes, for example if cck and
    # FIXME disagreement about nest basket cells
    # TODO hasPhenotypes needs to be function to get phenotypeOf to work via reasoner??? this seems wrong.
    #  this also works if phenotypeOf is inverseFunctional
    #  hasPhenotype shall be asymmetric, irreflexive, and intransitive
    # XXX in answer to Maryann's question about why we need the morphological phenotypes by themselves:
    #  if we don't have them we can't agregate across orthogonal phenotypes since owl correctly keeps the classes distinct
    # TODO disjointness axioms work really well on defined classes and propagate excellently
    # TODO add 'Petilla' or something like that to the phenotype definitions
    #  we want this because 'Petilla' denotes the exact ANALYSIS used to determine the phenotype
    #  there are some additional 'protocol' related restrictions on what you can apply analysis to
    #  but we don't have to model those explicitly which would be a nightmare and break the
    #  orthogonality of the cell type decomposition
    # TODO to make this explicit we need to include that phenotypes require 2 things
    #  1) a type of data (data type?) 2) a way to classify that data (analysis protocol)
    #
    # need a full type restriction... property chain?

    graph = makeGraph('hbp-special', prefixes=PREFIXES)  # XXX fix all prefixes

    with open(refile(__file__, 'resources/26451489 table 1.csv'), 'rt') as f:
        rows = [list(r) for r in zip(*csv.reader(f))]

    base = 'http://ontology.neuinfo.org/NIF/ttl/'
    ontid = base + graph.name + '.ttl'
    graph.add_trip(ontid, rdflib.RDF.type, rdflib.OWL.Ontology)
    graph.add_trip(ontid, rdflib.OWL.imports,
                   base + 'NIF-Neuron-Phenotype.ttl')
    graph.add_trip(ontid, rdflib.OWL.imports, base + 'NIF-Neuron-Defined.ttl')

    def lsn(word):
        syn_mappings[word] = graph.expand(
            sgv.findByTerm(word)[0]['curie'])  # cheating

    lsn('Parvalbumin')
    lsn('neuropeptide Y')
    lsn('VIP peptides')
    lsn('somatostatin')
    syn_mappings['calbindin'] = graph.expand('PR:000004967')  # cheating
    syn_mappings['calretinin'] = graph.expand('PR:000004968')  # cheating
    t = table1(graph, rows, syn_mappings, ilx_start)
    ilx_start = t.ilx_start

    # adding fake mouse data
    #with open(refile(__file__, 'resources/26451489 table 1.csv'), 'rt') as f:  # FIXME annoying
    #rows = [list(r) for r in zip(*csv.reader(f))]
    #t2 = table1(graph, rows, syn_mappings, ilx_start, species='NCBITaxon:10090')  # FIXME double SOM+ phenos etc
    #ilx_start = t2.ilx_start

    def do_graph(d):
        sgt = graph.expand(d['curie'])
        label = d['labels'][0]
        graph.add_trip(sgt, rdflib.RDF.type, rdflib.OWL.Class)
        graph.add_trip(sgt, rdflib.RDFS.label, label)

    done = set()
    for s, p, o in graph.g.triples(
        (None, None, None)):  #(rdflib.RDFS.subClassOf,rdflib.OWL.Thing)):
        if o not in done and type(o) == rdflib.term.URIRef:
            done.add(o)
            if not [_ for _ in graph.g.objects((o, rdflib.RDFS.label))]:
                d = sgv.findById(o)
                if d:
                    if 'PR:' in d['curie']:
                        do_graph(d)
                    elif 'NIFMOL:' in d['curie']:
                        do_graph(d)
                    elif 'UBERON:' in d['curie']:
                        do_graph(d)
                    elif 'NCBITaxon:' in d['curie']:
                        do_graph(d)
                    elif 'NIFCELL:' in d['curie']:
                        do_graph(d)

    # FIXME this is a dupe with defined_class
    #graph.add_trip(defined_class_parent, rdflib.RDF.type, rdflib.OWL.Class)
    #graph.add_trip(defined_class_parent, rdflib.RDFS.label, 'defined class neuron')
    #graph.add_trip(defined_class_parent, rdflib.namespace.SKOS.description, 'Parent class For all defined class neurons')
    #graph.add_trip(defined_class_parent, rdflib.RDFS.subClassOf, NIFCELL_NEURON)
    #graph.add_trip(morpho_defined, rdflib.RDFS.subClassOf, defined_class_parent)
    #graph.add_trip(morpho_defined, rdflib.RDFS.label, 'Morphologically classified neuron')  # FIXME -- need asserted in here...
    #graph.add_trip(ephys_defined, rdflib.RDFS.subClassOf, defined_class_parent)
    #graph.add_trip(ephys_defined, rdflib.RDFS.label, 'Electrophysiologically classified neuron')

    graph.add_class(expression_defined, NIFCELL_NEURON, autogen=True)
    graph.add_class('ilx:NeuroTypeClass',
                    NIFCELL_NEURON,
                    label='Neuron TypeClass')

    graph.g.commit()

    phenotype_dju_dict = add_types(graph, phenotypes)
    for pheno, disjoints in phenotype_dju_dict.items():
        name = ' '.join(re.findall(
            r'[A-Z][a-z]*',
            pheno.split(':')[1])[:-1])  #-1: drops Phenotype
        ilx_start += 1  # = make_defined(graph, ilx_start, name + ' neuron type', pheno, 'ilx:hasPhenotype')
        id_ = graph.expand(ilx_base.format(ilx_start))
        typeclass = infixowl.Class(id_, graph=graph.g)
        typeclass.label = rdflib.Literal(name + ' neuron type')

        restriction = infixowl.Restriction(graph.expand('ilx:hasPhenotype'),
                                           graph=graph.g,
                                           someValuesFrom=pheno)
        #typeclass.subClassOf = [restriction, graph.expand('ilx:NeuroTypeClass')]

        ntc = graph.expand('ilx:NeuroTypeClass')
        intersection = infixowl.BooleanClass(members=(ntc, restriction),
                                             graph=graph.g)
        typeclass.equivalentClass = [intersection]

        # FIXME not clear that we should be doing typeclasses this way.... :/
        # requires more thought, on the plus side you do get better reasoning...
        disjointunion = disjointUnionOf(graph=graph.g, members=list(disjoints))
        graph.add_trip(id_, rdflib.OWL.disjointUnionOf, disjointunion)

    graph.write()
    return t
Exemple #28
0
def swanson():
    """ not really a parcellation scheme """
    source = 'resources/swanson_aligned.txt'
    ONT_PATH = GENERATED
    filename = 'swanson_hierarchies'
    ontid = ONT_PATH + filename + '.ttl'
    PREFIXES = makePrefixes('', 'ilx', 'owl', 'skos', 'NIFRID', 'ILXREPLACE')
    PREFIXES.update({
        #'':ontid + '/',  # looking for better options
        'SWAN': interlex_namespace('swanson/nt/term'),
        'SWAA': interlex_namespace('swanson/nt/appendix'),
    })
    new_graph = makeGraph(filename, PREFIXES, writeloc=WRITELOC)
    new_graph.add_ont(
        ontid, 'Swanson brain partomies', 'Swanson 2014 Partonomies',
        'This file is automatically generated from ' + source + '.' + NOTICE,
        TODAY)

    # FIXME citations should really go on the ... anatomy? scheme artifact
    definingCitation = 'Swanson, Larry W. Neuroanatomical Terminology: a lexicon of classical origins and historical foundations. Oxford University Press, USA, 2014.'
    definingCitationID = 'ISBN:9780195340624'
    new_graph.add_trip(ontid, 'NIFRID:definingCitation', definingCitation)
    new_graph.add_trip(ontid, 'NIFRID:definingCitationID', definingCitationID)

    with open(source, 'rt') as f:
        lines = [l.strip() for l in f.readlines()]

    # join header on page 794
    lines[635] += ' ' + lines.pop(636)
    #fix for capitalization since this header is reused
    fixed = ' or '.join([
        ' ('.join([n.capitalize() for n in _.split(' (')])
        for _ in lines[635].lower().split(' or ')
    ]).replace('human', 'HUMAN')
    lines[635] = fixed

    data = []
    for l in lines:
        if not l.startswith('#'):
            level = l.count('.' * 5)
            l = l.strip('.')
            if ' (' in l:
                if ') or' in l:
                    n1, l = l.split(') or')
                    area_name, citationP = n1.strip().split(' (')
                    citation = citationP.rstrip(')')
                    d = (level, area_name, citation, 'NEXT SYN')
                    data.append(d)
                    #print(tc.red(tc.bold(repr(d))))

                area_name, citationP = l.strip().split(' (')
                citation = citationP.rstrip(')')
            else:
                area_name = l
                citation = None

            d = (level, area_name, citation, None)
            #print(d)
            data.append(d)
    results = async_getter(sgv.findByTerm, [(d[1], ) for d in data])
    #results = [None] * len(data)
    curies = [[r['curie'] for r in _ if 'UBERON' in r['curie']] if _ else []
              for _ in results]
    output = [_[0] if _ else None for _ in curies]

    header = ['Depth', 'Name', 'Citation', 'NextSyn', 'Uberon']
    zoop = [header] + [r for r in zip(*zip(*data), output)] + \
            [(0, 'Appendix END None', None, None, None)]  # needed to add last appendix

    class SP(rowParse):
        def __init__(self):
            self.nodes = defaultdict(dict)
            self._appendix = 0
            self.appendicies = {}
            self._last_at_level = {}
            self.names = defaultdict(set)
            self.children = defaultdict(set)
            self.parents = defaultdict(set)
            self.next_syn = False
            super().__init__(zoop)

        def Depth(self, value):
            if self.next_syn:
                self.synonym = self.next_syn
            else:
                self.synonym = False
            self.depth = value

        def Name(self, value):
            self.name = value

        def Citation(self, value):
            self.citation = value

        def NextSyn(self, value):
            if value:
                self.next_syn = self._rowind
            else:
                self.next_syn = False

        def Uberon(self, value):
            self.uberon = value

        def _row_post(self):
            # check if we are in the next appendix
            # may want to xref ids between appendicies as well...
            if self.depth == 0:
                if self.name.startswith('Appendix'):
                    if self._appendix:
                        self.appendicies[self._appendix]['children'] = dict(
                            self.children)
                        self.appendicies[self._appendix]['parents'] = dict(
                            self.parents)
                        self._last_at_level = {}
                        self.children = defaultdict(set)
                        self.parents = defaultdict(set)
                    _, num, apname = self.name.split(' ', 2)
                    if num == 'END':
                        return
                    self._appendix = int(num)
                    self.appendicies[self._appendix] = {
                        'name':
                        apname.capitalize(),
                        'type':
                        self.citation.capitalize() if self.citation else None
                    }
                    return
                else:
                    if ' [' in self.name:
                        name, taxonB = self.name.split(' [')
                        self.name = name
                        self.appendicies[self._appendix][
                            'taxon'] = taxonB.rstrip(']').capitalize()
                    else:  # top level is animalia
                        self.appendicies[
                            self._appendix]['taxon'] = 'ANIMALIA'.capitalize()

                    self.name = self.name.capitalize()
                    self.citation = self.citation.capitalize()
            # nodes
            if self.synonym:
                self.nodes[self.synonym]['synonym'] = self.name
                self.nodes[self.synonym]['syn-cite'] = self.citation
                self.nodes[self.synonym]['syn-uberon'] = self.uberon
                return
            else:
                if self.citation:  # Transverse Longitudinal etc all @ lvl4
                    self.names[self.name + ' ' + self.citation].add(
                        self._rowind)
                else:
                    self.name += str(self._appendix) + self.nodes[
                        self._last_at_level[self.depth - 1]]['label']
                    #print(level, self.name)
                    # can't return here because they are their own level
                # replace with actually doing something...
                self.nodes[self._rowind]['label'] = self.name
                self.nodes[self._rowind]['citation'] = self.citation
                self.nodes[self._rowind]['uberon'] = self.uberon
            # edges
            self._last_at_level[self.depth] = self._rowind
            # TODO will need something to deal with the Lateral/
            if self.depth > 0:
                try:
                    parent = self._last_at_level[self.depth - 1]
                except:
                    embed()
                self.children[parent].add(self._rowind)
                self.parents[self._rowind].add(parent)

        def _end(self):
            replace = {}
            for asdf in [
                    sorted(n) for k, n in self.names.items() if len(n) > 1
            ]:
                replace_with, to_replace = asdf[0], asdf[1:]
                for r in to_replace:
                    replace[r] = replace_with

            for r, rw in replace.items():
                #print(self.nodes[rw])
                o = self.nodes.pop(r)
                #print(o)

            for vals in self.appendicies.values():
                children = vals['children']
                parents = vals['parents']
                # need reversed so children are corrected before swap
                for r, rw in reversed(sorted(replace.items())):
                    if r in parents:
                        child = r
                        new_child = rw
                        parent = parents.pop(child)
                        parents[new_child] = parent
                        parent = list(parent)[0]
                        children[parent].remove(child)
                        children[parent].add(new_child)
                    if r in children:
                        parent = r
                        new_parent = rw
                        childs = children.pop(parent)
                        children[new_parent] = childs
                        for child in childs:
                            parents[child] = {new_parent}

            self.nodes = dict(self.nodes)

    sp = SP()
    tp = [
        _ for _ in sorted([
            '{: <50}'.format(n['label']) +
            n['uberon'] if n['uberon'] else n['label']
            for n in sp.nodes.values()
        ])
    ]
    #print('\n'.join(tp))
    #print(sp.appendicies[1].keys())
    #print(sp.nodes[1].keys())
    nbase = PREFIXES['SWAN'] + '%s'
    json_ = {'nodes': [], 'edges': []}
    parent = ILXREPLACE('swansonBrainRegionConcept')
    for node, anns in sp.nodes.items():
        nid = nbase % node
        new_graph.add_class(nid, parent, label=anns['label'])
        new_graph.add_trip(nid, 'NIFRID:definingCitation', anns['citation'])
        json_['nodes'].append({'lbl': anns['label'], 'id': 'SWA:' + str(node)})
        #if anns['uberon']:
        #new_graph.add_trip(nid, owl.equivalentClass, anns['uberon'])  # issues arrise here...

    for appendix, data in sp.appendicies.items():
        aid = PREFIXES['SWAA'] + str(appendix)
        new_graph.add_class(aid, label=data['name'].capitalize())
        new_graph.add_trip(
            aid, 'ilx:hasTaxonRank',
            data['taxon'])  # FIXME appendix is the data artifact...
        children = data['children']
        ahp = HASPART + str(appendix)
        apo = PARTOF + str(appendix)
        new_graph.add_op(ahp, transitive=True)
        new_graph.add_op(apo, inverse=ahp, transitive=True)
        for parent, childs in children.items(
        ):  # FIXME does this give complete coverage?
            pid = nbase % parent
            for child in childs:
                cid = nbase % child
                new_graph.add_hierarchy(
                    cid, ahp, pid)  # note hierarhcy inverts direction
                new_graph.add_hierarchy(pid, apo, cid)
                json_['edges'].append({
                    'sub': 'SWA:' + str(child),
                    'pred': apo,
                    'obj': 'SWA:' + str(parent)
                })

    new_graph.write()
    if False:
        Query = namedtuple('Query',
                           ['root', 'relationshipType', 'direction', 'depth'])
        mapping = (1, 1, 1, 1, 30, 83, 69, 70, 74, 1)  # should generate?
        for i, n in enumerate(mapping):
            a, b = creatTree(*Query('SWA:' + str(n), 'ilx:partOf' + str(i + 1),
                                    'INCOMING', 10),
                             json=json_)
            print(a)
    return ontid, None
def make_phenotypes():
    ilx_start = 50114
    graph = makeGraph('NIF-Phenotype-Core', prefixes=PREFIXES)
    graph2 = makeGraph('NIF-Phenotypes', prefixes=PREFIXES)

    eont = OntMeta(
        'http://ontology.neuinfo.org/NIF/ttl/', 'NIF-Neuron-Defined',
        'NIF Neuron Defined Classes', 'NIFNEUDEF',
        'This file contains defined classes derived from neuron phenotypes.',
        TODAY)
    defined_graph = makeGraph(eont.filename, prefixes=PREFIXES)
    ontid = eont.path + eont.filename + '.ttl'
    defined_graph.add_ont(ontid, *eont[2:])

    # do edges first since we will need them for the phenotypes later
    # TODO real ilx_ids and use prefixes to manage human readability
    with open(refile(__file__, 'resources/neuron_phenotype_edges.csv'),
              'rt') as f:
        rows = [r for r in csv.reader(f)]

    lookup = {
        'asymmetric': 'owl:AsymmetricProperty',
        'irreflexive': 'owl:IrreflexiveProperty',
        'functional': 'owl:FunctionalProperty',
    }
    pedges = set()

    def irn(inp):
        return ILXREPLACE(__name__ + inp)

    for row in rows[1:]:
        if row[0].startswith('#') or not row[0]:
            if row[0] == '#references':
                break
            print(row)
            continue
        id_ = PREFIXES['ilx'] + row[0]
        pedges.add(graph.expand('ilx:' + row[0]))
        graph.add_trip(id_, rdflib.RDFS.label, row[0])  # FIXME
        graph.add_trip(id_, rdflib.RDF.type, rdflib.OWL.ObjectProperty)
        if row[3]:
            graph.add_trip(id_, rdflib.namespace.SKOS.definition, row[3])
        if row[6]:
            graph.add_trip(id_, rdflib.RDFS.subPropertyOf, 'ilx:' + row[6])
        if row[7]:
            graph.add_trip(id_, rdflib.OWL.inverseOf, 'ilx:' + row[7])
        if row[8]:
            for t in row[8].split(','):
                t = t.strip()
                graph.add_trip(id_, rdflib.RDF.type, lookup[t])

    with open(refile(__file__, 'resources/neuron_phenotype.csv'), 'rt') as f:
        rows = [
            r for r in csv.reader(f) if any(r) and not r[0].startswith('#')
        ]

    class PP(rowParse):  # FIXME use add_new in _row_post?
        SCD = 'subClassesDisjoint'
        DJW = 'disjointWith'

        def __init__(self):
            self.ilx_start = ilx_start
            self.parent_child_map = defaultdict(set)
            self.child_parent_map = defaultdict(set)
            self.scd = set()
            super().__init__(rows)

        def ilx_id(self, value):
            self.id_ = graph2.expand(value)
            self.Class = infixowl.Class(self.id_, graph=graph2.g)
            label = ' '.join(re.findall(r'[A-Z][a-z]*',
                                        self.id_.split(':')[1]))
            self._label = label

        def subClassOf(self, value):
            if value:
                self.parent = graph2.expand(value)
                self.parent_child_map[self.parent].add(self.id_)
                self.child_parent_map[self.id_].add(self.parent)
                self.Class.subClassOf = [self.parent]

        def label(self, value):
            if value:
                self._label = value
                self.Class.label = value
            else:
                self.Class.label = rdflib.Literal(self._label)

        def synonyms(self, value):
            if value:
                for v in value.split(','):
                    graph2.add_trip(self.id_, 'NIFRID:synonym', v)

        def rules(self, value):
            if value == PP.SCD:
                self.scd.add(self.id_)
            elif value.startswith(PP.DJW):
                [
                    graph2.add_trip(self.id_, rdflib.OWL.disjointWith, _)
                    for _ in value.split(' ')[1:]
                ]

        def use_edge(self, value):
            if value:
                graph2.add_trip(self.id_, 'ilx:useObjectProperty',
                                graph.expand('ilx:' + value))

        def _row_post(self):
            # defined class
            lookup = {
                graph.expand('ilx:AxonPhenotype'):
                rdflib.URIRef('http://axon.org'),
                graph.expand('ilx:AxonMorphologicalPhenotype'):
                None,
                graph.expand('ilx:DendritePhenotype'):
                rdflib.URIRef('http://dendrite.org'),
                graph.expand('ilx:DendriteMorphologicalPhenotype'):
                None,
                graph.expand('ilx:SomaPhenotype'):
                rdflib.URIRef('http://soma.org'),
                graph.expand('ilx:SomaMorphologicalPhenotype'):
                None,
                graph.expand('ilx:NeuronPhenotype'):
                graph.expand(NIFCELL_NEURON),
                graph.expand('ilx:CellPhenotype'):
                None,
                graph.expand('ilx:Phenotype'):
                graph.expand('ilx:Phenotype'),
            }
            if self.id_ in lookup:
                return
            #elif 'Petilla' in self.id_:
            #return
            #else:
            #print(self.id_)

            # hidden label for consturctions
            graph2.add_trip(self.id_, rdflib.namespace.SKOS.hiddenLabel,
                            self._label.rsplit(' Phenotype')[0])

            self.ilx_start += 1
            id_ = defined_graph.expand(ilx_base.format(self.ilx_start))
            defined = infixowl.Class(id_, graph=defined_graph.g)
            #defined.label = rdflib.Literal(self._label.rstrip(' Phenotype') + ' neuron')  # the extra space in rstrip removes 'et ' as well WTF!
            defined.label = rdflib.Literal(
                self._label.rstrip('Phenotype') + 'neuron')
            #print(self._label)
            print('_row_post ilx_start', self.ilx_start,
                  list(defined.label)[0])

            def getPhenotypeEdge(phenotype):
                print(phenotype)
                edge = 'ilx:hasPhenotype'  # TODO in neuronManager...
                return edge

            edge = getPhenotypeEdge(self.id_)
            restriction = infixowl.Restriction(graph.expand(edge),
                                               graph=defined_graph.g,
                                               someValuesFrom=self.id_)

            parent = [p for p in self.child_parent_map[self.id_] if p]
            if parent:
                parent = parent[0]
                while 1:
                    if parent == defined_graph.expand('ilx:NeuronPhenotype'):
                        #defined.subClassOf = [graph.expand(defined_class_parent)]  # XXX this does not produce what we want
                        break
                    #else:
                    #print(parent, graph.expand('ilx:NeuronPhenotype'))

                    #print('xxxxxxxxxxxxxxxx', parent)
                    new_parent = [
                        p for p in self.child_parent_map[parent] if p
                    ]
                    if new_parent:
                        parent = new_parent[0]
                    else:
                        break
                phenotype_equiv = lookup[parent]
            else:
                return

            intersection = infixowl.BooleanClass(members=(phenotype_equiv,
                                                          restriction),
                                                 graph=defined_graph.g)
            ##intersection = infixowl.BooleanClass(members=(restriction,), graph=self.graph.g)

            defined.equivalentClass = [intersection]

        def _end(self):
            for parent in self.scd:
                make_mutually_disjoint(graph2,
                                       list(self.parent_child_map[parent]))

    pp = PP()
    ilx_start = pp.ilx_start

    to_add = {}

    def lsn(word):
        rank = defaultdict(lambda: 0)
        rank['PR'] = -100
        rank['NIFMOL'] = -50
        rank['UBERON'] = -10
        rank['NCBITaxon'] = -9
        rank['NIFCELL'] = -8
        sort_rank = lambda r: rank[r['curie'].split(':')[0]]
        to_add[word] = graph2.expand(
            sorted(sgv.findByTerm(word),
                   key=sort_rank)[0]['curie'])  # cheating

    # FIXME naming
    lsn('Parvalbumin')
    lsn('neuropeptide Y')
    lsn('VIP peptides')
    lsn('somatostatin')
    lsn('calbindin')
    lsn('calretinin')

    #for name, iri in to_add.items():  # XXX do not need, is already covered elsewhere
    #print('make_phenotypes ilx_start', ilx_start, name)
    #ilx_start = make_defined(defined_graph, ilx_start, name, iri, 'ilx:hasExpressionPhenotype', parent=expression_defined)

    #syn_mappings['calbindin'] = graph.expand('PR:000004967')  # cheating
    #syn_mappings['calretinin'] = graph.expand('PR:000004968')  # cheating
    ontid = 'http://ontology.neuinfo.org/NIF/ttl/' + graph.name + '.ttl'
    graph.add_ont(
        ontid,
        'NIF Phenotype core',
        comment=
        'This is the core set of predicates used to model phenotypes and the parent class for phenotypes.'
    )
    graph.add_class('ilx:Phenotype', label='Phenotype')
    graph.add_trip(
        'ilx:Phenotype', 'skos:definition',
        'A Phenotype is a binary property of a biological entity. Phenotypes are derived from measurements made on the subject of interest. While Phenotype is not currently placed within the BFO hierarchy, if we were to place it, it would fall under BFO:0000016 -> disposition, since these phenotypes are contingent on the experimental conditions under which measurements were made and are NOT qualities. For consideration: in theory this would mean that disjointness does not make sense, even for things that would seem to be obviously disjoint such as Accomodating and Non-Accomodating. However, this information can still be captured on a subject by subject basis by asserting that for this particular entity, coocurrance of phenotypes is not possilbe. This still leaves the question of whether the class of biological entities that correspond to the bag of phenotypes is implicitly bounded/limited only to the extrinsic and unspecified experimental contidions, some of which are not and cannot be included in a bag of phenotypes. The way to deal with this when we want to include 2 \'same time\' disjoint phenotypes, is to use a logical phenotype to wrap them with an auxillary variable that we think accounts for the difference.'
    )
    #graph.add_trip(ontid, rdflib.RDFS.comment, 'The NIF Neuron ontology holds materialized neurons that are collections of phenotypes.')
    #graph.add_trip(ontid, rdflib.OWL.versionInfo, ONTOLOGY_DEF['version'])
    #graph.g.commit()
    #get_defined_classes(graph)  # oops...
    graph.write()  # moved below to incorporate uwotm8

    ontid2 = 'http://ontology.neuinfo.org/NIF/ttl/' + graph2.name + '.ttl'
    graph2.add_ont(
        ontid2,
        'NIF Phenotypes',
        comment=
        'A taxonomy of phenotypes used to model biological types as collections of measurements.'
    )
    graph2.add_trip(ontid2, 'owl:imports', ontid)
    graph2.write()

    syn_mappings = {}
    for sub, syn in [
            _ for _ in graph.g.subject_objects(graph.expand('NIFRID:synonym'))
    ] + [_ for _ in graph.g.subject_objects(rdflib.RDFS.label)]:
        syn = syn.toPython()
        if syn in syn_mappings:
            print('ERROR duplicate synonym!', syn, sub)
        syn_mappings[syn] = sub

    phenotypes = [
        s for s, p, o in graph.g.triples((None, None, None))
        if ' Phenotype' in o
    ]
    inc = get_transitive_closure(
        graph, rdflib.RDFS.subClassOf,
        graph.expand('ilx:NeuronPhenotype'))  # FIXME not very configurable...

    return syn_mappings, pedges, ilx_start, inc, defined_graph
def chebi_imp():
    PREFIXES = makePrefixes('definition', 'replacedBy', 'hasRole', 'oboInOwl',
                            'CHEBI', 'owl', 'skos', 'oboInOwl')
    ug = makeGraph('utilgraph', prefixes=PREFIXES)
    with open('resources/chebi-subset-ids.txt', 'rt') as f:
        ids_raw = set((_.strip() for _ in f.readlines()))
        ids = sorted(set((ug.expand(_.strip()) for _ in ids_raw)))

    def check_chebis(g):
        a = []
        for id_ in ids:
            l = sorted(g.triples((id_, None, None)))
            ll = len(l)
            a.append(ll)
        return a

    def fixIons(g):
        # there are a series of atom/ion confusions that shall be dealt with, solution is to add 'iron' as a synonym to the charged form since that is what the biologists are usually referring to...
        ng = makeGraph('', graph=g, prefixes=makePrefixes('CHEBI'))
        # atom           ion
        None, 'CHEBI:29108'  # calcium is ok
        ng.replace_uriref('CHEBI:30145', 'CHEBI:49713')  # lithium
        ng.replace_uriref('CHEBI:18248', 'CHEBI:29033')  # iron
        ng.replace_uriref('CHEBI:26216', 'CHEBI:29103')  # potassium
        ng.replace_uriref('CHEBI:26708', 'CHEBI:29101')  # sodium
        None, 'CHEBI:29105'  # zinc is ok

    g = rdflib.Graph()
    cg = rdflib.Graph()
    cd = rdflib.Graph()
    chemg = rdflib.Graph()
    molg = rdflib.Graph()

    #g.parse('/home/tom/git/NIF-Ontology/ttl/generated/chebislim.ttl', format='turtle')
    cg.parse('/home/tom/git/NIF-Ontology/ttl/generated/chebislim.ttl',
             format='turtle')
    list(g.add(t) for t in cg)
    a1 = check_chebis(g)

    #g.parse('/home/tom/git/NIF-Ontology/ttl/generated/chebi-dead.ttl', format='turtle')
    cd.parse('/home/tom/git/NIF-Ontology/ttl/generated/chebi-dead.ttl',
             format='turtle')
    list(g.add(t) for t in cd)
    a2 = check_chebis(g)

    #g.parse('/home/tom/git/NIF-Ontology/ttl/NIF-Chemical.ttl', format='turtle')
    chemg.parse('/home/tom/git/NIF-Ontology/ttl/NIF-Chemical.ttl',
                format='turtle')
    chemgg = makeGraph('NIF-Chemical', graph=chemg)
    fixIons(chemg)
    list(g.add(t) for t in chemg)
    a3 = check_chebis(g)

    #g.parse('/home/tom/git/NIF-Ontology/ttl/NIF-Molecule.ttl', format='turtle')
    molg.parse('/home/tom/git/NIF-Ontology/ttl/NIF-Molecule.ttl',
               format='turtle')
    molgg = makeGraph('NIF-Molecule', graph=molg)
    fixIons(molg)
    list(g.add(t) for t in molg)
    a4 = check_chebis(g)

    replacedBy = ug.expand('replacedBy:')
    deads = {s: o for s, o in cd.subject_objects(replacedBy)}

    def switch_dead(g):
        ng = makeGraph('', graph=g, prefixes=makePrefixes('oboInOwl'))
        for f, r in deads.items():
            ng.replace_uriref(f, r)
            ng.add_node(r, 'oboInOwl:hasAlternateId',
                        rdflib.Literal(f, datatype=rdflib.XSD.string))
            g.remove(
                (r, replacedBy, r))  # in case the replaced by was already in

    switch_dead(g)
    switch_dead(cg)
    switch_dead(chemg)
    switch_dead(molg)

    def fixHasAltId(g):
        ng = makeGraph('',
                       graph=g,
                       prefixes=makePrefixes('oboInOwl', 'NIFCHEM', 'BIRNANN'))
        ng.replace_uriref('NIFCHEM:hasAlternativeId',
                          'oboInOwl:hasAlternativeId')
        ng.replace_uriref('BIRNANN:ChEBIid', 'oboInOwl:id')

    list(map(fixHasAltId, (g, cg, chemg)))

    def fixAltIdIsURIRef(g):
        hai = ug.expand('oboInOwl:hasAlternativeId')
        i = ug.expand('oboInOwl:id')
        makeGraph('', graph=g, prefixes=makePrefixes(
            'CHEBI'))  # amazlingly sometimes this is missing...

        def inner(s, p, o):
            if type(o) == rdflib.URIRef:
                qn = g.namespace_manager.qname(o)
                g.add((s, p, rdflib.Literal(qn, datatype=rdflib.XSD.string)))
                if 'ns' in qn:
                    print('WARNING UNKNOWN NAMESPACE BEING SHORTENED', str(o),
                          qn)
                g.remove((s, p, o))

        for s, o in g.subject_objects(hai):
            inner(s, hai, o)
        for s, o in g.subject_objects(i):
            inner(s, i, o)

    list(map(fixAltIdIsURIRef, (g, cg, chemg)))

    matches = [_ for _ in zip(a1, a2, a3, a4)]
    changed = [len(set(_)) != 1 for _ in matches]
    review = [(id_, m) for id_, changed, m in zip(ids, changed, matches)
              if changed and m[0]]
    # for reasons currently lost to implementation details this returns a list of empty lists if run from ipython
    wat_c = [
        set([(s, str(o.toPython()))
             for s, p, o in cg.triples((u, None, None))]) for u, _ in review
    ]
    wat_a = [
        set([(s, str(o.toPython())) for s, p, o in g.triples((u, None, None))])
        for u, _ in review
    ]
    wat_c_ = [
        set(cg.triples((u, None, None))) for u, _ in review
    ]  # for reasons currently lost to implementation details this returns a list of empty lists if run from ipython
    wat_a_ = [
        set(g.triples((u, None, None))) for u, _ in review
    ]  # for reasons currently lost to implementation details this returns a list of empty lists if run from ipython
    diff = [a - c for a, c in zip(wat_a, wat_c)]
    diff_ = [a - c for a, c in zip(wat_a_, wat_c_)]

    cb = createOntology(
        'chebi-bridge',
        'NIF ChEBI bridge',
        makePrefixes('CHEBI', 'BFO1SNAP', 'owl', 'skos', 'dc', 'hasRole',
                     'NIFCHEM', 'oboInOwl', 'NIFMOL', 'OBOANN', 'BIRNANN'),
        'chebibridge',
        ('This bridge file contains additional annotations'
         ' on top of CHEBI identifiers that were originally'
         ' included in NIF-Chemical or NIF-Molecule that have'
         ' not since been added to CHEBI upstream'),
        path='ttl/bridge/',
        #imports=('https://raw.githubusercontent.com/SciCrunch/NIF-Ontology/master/ttl/generated/chebislim.ttl',
        #'https://raw.githubusercontent.com/SciCrunch/NIF-Ontology/master/ttl/generated/chebi-dead.ttl'))
        imports=(
            'http://ontology.neuinfo.org/NIF/ttl/generated/chebislim.ttl',
            'http://ontology.neuinfo.org/NIF/ttl/generated/chebi-dead.ttl'))

    out = []
    for set_ in diff:
        for sub, string in sorted(set_):
            for t in g.triples((sub, None, None)):
                # please not that this process will do things like remove hasStreenName ectasy from CHEBI:1391 since chebislim has it listed as a synonym
                py = t[-1].toPython()
                if py == string and not py.startswith(
                        'ub'
                ):  # ignore restrictions... this is safe because nifmol and nifchem dont have any restrictions...
                    cb.add_recursive(t, g)
        cb.add_class(
            sub
        )  # only need to go at the end because sub is the same for each set

    def hasImplicitSuperclass(s, o):
        for super_ in cg.objects(s, rdflib.RDFS.subClassOf):
            if super_ == o:
                return True
            elif hasImplicitSuperclass(super_, o):
                return True

    # curation decisions after review (see outtc for full list)
    curatedOut = []

    def curateOut(*t):
        curatedOut.append(
            tuple(
                ug.expand(_) if type(_) is not rdflib.Literal else _
                for _ in t))
        cb.del_trip(*t)

    curateOut(
        'CHEBI:6887', 'rdfs:subClassOf', 'CHEBI:23367'
    )  # defer to the chebi choice of chemical substance over molecular entity since it is classified as a racemate which doesn't quite match the mol ent def
    curateOut(
        'CHEBI:26519', 'rdfs:subClassOf', 'CHEBI:24870'
    )  # some ions may also be free radicals, but all free radicals are not ions!
    #natural product removal since natural product should probably be a role if anything...
    curateOut('CHEBI:18059', 'rdfs:subClassOf', 'CHEBI:33243')
    curateOut('CHEBI:24921', 'rdfs:subClassOf', 'CHEBI:33243')
    curateOut('CHEBI:37332', 'rdfs:subClassOf', 'CHEBI:33243')

    curateOut('CHEBI:50906', 'rdfs:label',
              rdflib.Literal('Chemical role', datatype=rdflib.XSD.string)
              )  # chebi already has a chemical role...
    curateOut(
        'CHEBI:22586', 'rdfs:subClassOf', 'CHEBI:24432'
    )  # antioxidant is already modelled as a chemical role instead of a biological role, the distinction is that the biological roles affect biological processes/property, not chemical processes/property
    curateOut('CHEBI:22720', 'rdfs:subClassOf',
              'CHEBI:27171')  # not all children are bicyclic
    curateOut(
        'CHEBI:23447', 'rdfs:subClassOf', 'CHEBI:17188'
    )  # this one seems obviously flase... all cyclic nucleotides are not nucleoside 5'-monophosphate...
    curateOut(
        'CHEBI:24922', 'rdfs:subClassOf', 'CHEBI:27171'
    )  # not all children are bicyclic, some may be poly, therefore removing
    curateOut(
        'CHEBI:48706', 'rdfs:subClassOf', 'CHEBI:33232'
    )  # removing since antagonist is more incidental and pharmacological role is more appropriate (as chebi has it)
    curateOut('CHEBI:51064', 'rdfs:subClassOf',
              'CHEBI:35338')  # removing since chebi models this with has part
    curateOut(
        'CHEBI:8247', 'rdfs:subClassOf', 'CHEBI:22720'
    )  # the structure is 'fused to' a benzo, but it is not a benzo, chebi has the correct
    #curateOut('CHEBI:9463', 'rdfs:subClassOf', 'CHEBI:50786')  # not sure what to make of this wikipedia says one thing, but chebi says another, very strange... not an anabolic agent?!??! wat no idea

    # review hold over subClassOf statements
    intc = []
    outtc = []
    for s, o in cb.g.subject_objects(rdflib.RDFS.subClassOf):
        if str(
                o
        ) == 'http://ontology.neuinfo.org/NIF/Backend/BIRNLex_annotation_properties.owl#_birnlex_retired_class':
            # we need to remove any of the cases where deprecation was misused
            cb.g.remove((s, rdflib.RDFS.subClassOf, o))
        elif hasImplicitSuperclass(s, o):
            cb.g.remove((s, rdflib.RDFS.subClassOf, o))
            intc.append((s, rdflib.RDFS.subClassOf, o))
        else:
            outtc.append((s, rdflib.RDFS.subClassOf, o))

    def qname(trips):
        return tuple(
            tuple(cb.g.namespace_manager.qname(_) for _ in t) for t in trips)

    for a, p, b in sorted(qname(outtc)):
        if 'NIFMOL' in b:
            continue  # not considering cases where NIFMOL/NIFCHEM ids are used, that can come later
        s = sgv.findById(a)
        o = sgv.findById(b)
        if s is None or o is None:
            print(a, '=>', s)
            print(b, '=>', o)
        else:
            print(s['labels'], s['curie'])
            print('subClassOf')
            print(o['labels'], o['curie'])
            print((a, p, b))
        print('---------------------')

    cb.write(
    )  # re-add only the missing edges so that we can zap them from NIF-Molecule and NIF-Chemical (recurse is needed...)

    # validation
    diff2 = set(cb.g) - set(cg)
    diff3 = set(cb.g) - diff2  # should just be all the owl:Class entries
    diff4 = set(cb.g) - set(chemg) | set(cb.g) - set(molg)  # not informative
    diff5 = set(cb.g) - diff4  # not informative
    both = set(chemg) & set(
        molg)  # there is no overlap beyond the owl:Class declarations

    def getChebis(set_):
        return set(t for t in set_ if 'CHEBI_' in t[0])

    def nodt(graph):
        return set((s, str(o) if type(o) is rdflib.Literal else o)
                   for s, p, o in graph)

    cmc = getChebis((((
        (nodt(chemg) - nodt(cb.g)) - nodt(cg)) - nodt(cd)) - nodt(intc)) -
                    nodt(curatedOut))
    cmc = sorted(t for s, o in cmc for t in chemg.triples((s, None, o)))
    mmc = getChebis((((
        (nodt(molg) - nodt(cb.g)) - nodt(cg)) - nodt(cd)) - nodt(intc)) -
                    nodt(curatedOut))
    mmc = sorted(t for s, o in mmc for t in molg.triples((s, None, o)))

    # remove chebi classes from nifchem and nifmol
    def remstuff(sources, targets):
        for source in sources:
            for id_ in source.subjects(rdflib.RDF.type, rdflib.OWL.Class):
                for target in targets:
                    target.del_class(id_)

    remstuff((cg, cd), (chemgg, molgg))

    chemgg.write()
    molgg.write()

    embed()
Exemple #31
0
def clean_hbp_cell():
    #old graph
    g = rdflib.Graph()
    g.parse(os.path.expanduser('~/git/methodsOntology/ttl/hbp_cell_ontology.ttl'), format='turtle')
    g.remove((None, rdflib.OWL.imports, None))
    g.remove((None, rdflib.RDF.type, rdflib.OWL.Ontology))

    #new graph
    NAME = 'NIF-Neuron-HBP-cell-import'
    mg = makeGraph(NAME, prefixes=PREFIXES)
    ontid = 'http://ontology.neuinfo.org/NIF/ttl/generated/' + NAME + '.ttl'
    mg.add_node(ontid, rdflib.RDF.type, rdflib.OWL.Ontology)
    mg.add_node(ontid, rdflib.RDFS.label, 'NIF Neuron HBP cell import')
    mg.add_node(ontid, rdflib.RDFS.comment, 'this file was automatically using pyontutils/hbp_cells.py')
    mg.add_node(ontid, rdflib.OWL.versionInfo, date.isoformat(date.today()))
    newgraph = mg.g

    skip = {
        '0000000':'NIFCELL:sao1813327414',  # cell
        #'0000001':NEURON,  # neuron  (equiv)
        #'0000002':'NIFCELL:sao313023570',  # glia  (equiv)
        #'0000021':'NIFNEURNT:nlx_neuron_nt_090804',  # glut  (equiv, but phen)
        #'0000022':'NIFNEURNT:nlx_neuron_nt_090803',  # gaba  (equiv, but phen)

        '0000003':NEURON,
        '0000004':NEURON,
        '0000005':NEURON,
        '0000006':NEURON,
        '0000007':NEURON,
        '0000008':NEURON,
        '0000009':NEURON,
        '0000010':NEURON,
        '0000019':NEURON,
        '0000020':NEURON,
        '0000033':NEURON,
        '0000034':NEURON,
        '0000070':NEURON,
        '0000071':NEURON,
    }
    to_phenotype = {
        '0000021':('ilx:hasExpressionPhenotype', 'NIFMOL:sao1744435799'),  # glut, all classes that might be here are equived out
        '0000022':('ilx:hasExperssionPhenotype', 'NIFMOL:sao229636300'),  # gaba
    }
    lookup = {'NIFCELL', 'NIFNEURNT'}
    missing_supers = {
        'HBP_CELL:0000136',
        'HBP_CELL:0000137',
        'HBP_CELL:0000140',
    }

    replace = set()
    phen = set()
    equiv = {}
    for triple in sorted(g.triples((None, None, None))):
        id_suffix = newgraph.namespace_manager.compute_qname(triple[0].toPython())[2]
        try:
            obj_suffix = newgraph.namespace_manager.compute_qname(triple[2].toPython())[2]
        except:  # it wasn't a url
            pass
        # equiv insert for help
        if triple[1] == rdflib.OWL.equivalentClass and id_suffix not in skip and id_suffix not in to_phenotype:
            qnt = newgraph.namespace_manager.compute_qname(triple[2].toPython())
            #print(qnt)
            if qnt[0] in lookup:
                try:
                    lab = v.findById(qnt[0] + ':' + qnt[2])['labels'][0]
                    print('REMOTE', qnt[0] + ':' + qnt[2], lab)
                    #mg.add_node(triple[2], rdflib.RDFS.label, lab)
                    #mg.add_node(triple[0], PREFIXES['OBOANN'] + 'synonym', lab)  # so we can see it
                except TypeError:
                    if qnt[2].startswith('nlx'):
                        triple = (triple[0], triple[1], expand('NIFSTD:' + qnt[2]))
                    #print('bad identifier')

        #check for equiv
        if triple[0] not in equiv:
            eq = [o for o in g.objects(triple[0], rdflib.OWL.equivalentClass)]
            if eq and id_suffix not in skip and id_suffix not in to_phenotype:
                if len(eq) > 1:
                    print(eq)
                equiv[triple[0]] = eq[0]
                continue
        elif triple[0] in equiv:
            continue

        # edge replace
        if triple[1].toPython() == 'http://www.FIXME.org/nsupper#synonym':
            edge =  rdflib.URIRef('http://ontology.neuinfo.org/NIF/Backend/OBO_annotation_properties.owl#abbrev')
        elif triple[1].toPython() == 'http://www.FIXME.org/nsupper#definition':
            edge = rdflib.namespace.SKOS.definition
        else:
            edge = triple[1]

        # skip or to phenotype or equiv
        if id_suffix in skip:  # have to make a manual edit to rdflib to include 'Nd' in allowed 1st chars
            replace.add(triple[0])
            #print('MEEP MEEP')
        elif id_suffix in to_phenotype:  # have to make a manual edit to rdflib to include 'Nd' in allowed 1st chars
            phen.add(triple[0])
        elif triple[1] == rdflib.RDFS.label:  # fix labels
            if not triple[2].startswith('Hippocampus'):
                new_label = rdflib.Literal('Neocortex ' + triple[2], lang='en')
                newgraph.add((triple[0], edge, new_label))
            else:
                newgraph.add((triple[0], edge, triple[2]))
        elif triple[2] in replace:
            mg.add_node(triple[0], edge, skip[obj_suffix])
        elif triple[2] in phen:
            edge_, rst_on = to_phenotype[obj_suffix]
            edge_ = expand(edge_)
            rst_on = expand(rst_on)

            this = triple[0]
            this = infixowl.Class(this, graph=newgraph)
            this.subClassOf = [expand(NEURON)] + [c for c in this.subClassOf]

            restriction = infixowl.Restriction(edge_, graph=newgraph, someValuesFrom=rst_on)
            this.subClassOf = [restriction] + [c for c in this.subClassOf]
        elif triple[2] in equiv:
            newgraph.add((triple[0], edge, equiv[triple[2]]))
        else:
            newgraph.add((triple[0], edge, triple[2]))

    # final cleanup for forward references (since we iterate through sorted)
    
    tt = rdflib.URIRef(expand('HBP_CELL:0000033'))
    tf = rdflib.URIRef(expand('HBP_CELL:0000034'))
    newgraph.remove((None, None, tt))
    newgraph.remove((None, None, tf))

    # add missing subClasses
    for nosub in missing_supers:
        mg.add_node(nosub, rdflib.RDFS.subClassOf, NEURON)

    # cleanup for subClassOf
    for subject in sorted(newgraph.subjects(rdflib.RDFS.subClassOf, expand(NEURON))):
        sco = [a for a in newgraph.triples((subject, rdflib.RDFS.subClassOf, None))]
        #print('U WOT M8')
        if len(sco) > 1:
            #print('#############\n', sco)
            for s, p, o in sco:
                if 'hbp_cell_ontology' in o or 'NIF-Cell' in o and o != expand(NEURON): #or 'sao2128417084' in o:  # neocortex pyramidal cell
                    #print(sco)
                    newgraph.remove((subject, rdflib.RDFS.subClassOf, expand(NEURON)))
                    break

    # do ilx
    ilx_start = ilx_get_start()
    #ilx_conv_mem = memoize('hbp_cell_interlex.json')(ilx_conv)  # FIXME NOPE, also need to modify the graph :/
    ilx_labels, ilx_replace = ilx_conv(graph=newgraph, prefix='HBP_CELL', ilx_start=ilx_start)
    ilx_add_ids(ilx_labels)
    with open('hbp_cell_ilx_ids.json', 'wt') as f:
        json.dump(ilx_replace, f)

    replace_map = ilx_replace
    for hbp, rep in skip.items():
        ori = 'HBP_CELL:'+hbp
        if ori in replace_map: raise KeyError('identifier already in!??! %s' % ori)
        replace_map[ori] = rep
    for hbp, (e, rep) in to_phenotype.items():
        ori = 'HBP_CELL:'+hbp
        if ori in replace_map: raise KeyError('identifier already in!??! %s' % ori)
        replace_map[ori] = edge, rep
    for hbp_iri, rep_iri in equiv.items():
        hbp = newgraph.compute_qname(hbp_iri)[2]
        rep = newgraph.qname(rep_iri)
        ori = 'HBP_CELL:'+hbp
        if ori in replace_map: raise KeyError('identifier already in!??! %s' % ori)
        replace_map[ori] = rep

    return mg, replace_map
Exemple #32
0
#!/usr/bin/env python3.5

import rdflib
from utils import makePrefixes, makeGraph

PREFIXES = makePrefixes('NIFGA', 'NIFSTD', 'owl')

g = rdflib.Graph()
g.parse(
    'http://purl.obolibrary.org/obo/uberon/bridge/uberon-bridge-to-nifstd.owl',
    format='xml')
name = 'NIFGA-Equivs'
ng = makeGraph(name, PREFIXES)
[
    ng.g.add(t)
    for t in ((rdflib.URIRef(PREFIXES['NIFGA'] + o.rsplit('/', 1)[-1]), p, o)
              for s, p, o in g.triples((None, rdflib.OWL.equivalentClass,
                                        None)))
]
ng.add_ont('http://ontology.neuinfo.org/NIF/ttl/generated/' + name + '.ttl',
           'NIFGA to NIFSTD mappings')
ng.write()
def clean_hbp_cell():
    #old graph
    g = rdflib.Graph()
    g.parse(
        os.path.expanduser('~/git/methodsOntology/ttl/hbp_cell_ontology.ttl'),
        format='turtle')
    g.remove((None, rdflib.OWL.imports, None))
    g.remove((None, rdflib.RDF.type, rdflib.OWL.Ontology))

    #new graph
    NAME = 'NIF-Neuron-HBP-cell-import'
    mg = makeGraph(NAME, prefixes=PREFIXES)
    ontid = 'http://ontology.neuinfo.org/NIF/ttl/generated/' + NAME + '.ttl'
    mg.add_trip(ontid, rdflib.RDF.type, rdflib.OWL.Ontology)
    mg.add_trip(ontid, rdflib.RDFS.label, 'NIF Neuron HBP cell import')
    mg.add_trip(ontid, rdflib.RDFS.comment,
                'this file was automatically using pyontutils/hbp_cells.py')
    mg.add_trip(ontid, rdflib.OWL.versionInfo, date.isoformat(date.today()))
    newgraph = mg.g

    skip = {
        '0000000': 'SAO:1813327414',  # cell
        #'0000001':NEURON,  # neuron  (equiv)
        #'0000002':'SAO:313023570',  # glia  (equiv)
        #'0000021':'NLXNEURNT:090804',  # glut  (equiv, but phen)
        #'0000022':'NLXNEURNT:090803',  # gaba  (equiv, but phen)
        '0000003': NEURON,
        '0000004': NEURON,
        '0000005': NEURON,
        '0000006': NEURON,
        '0000007': NEURON,
        '0000008': NEURON,
        '0000009': NEURON,
        '0000010': NEURON,
        '0000019': NEURON,
        '0000020': NEURON,
        '0000033': NEURON,
        '0000034': NEURON,
        '0000070': NEURON,
        '0000071': NEURON,
    }
    to_phenotype = {
        '0000021': ('ilx:hasExpressionPhenotype', 'SAO:1744435799'
                    ),  # glut, all classes that might be here are equived out
        '0000022': ('ilx:hasExperssionPhenotype', 'SAO:229636300'),  # gaba
    }
    lookup = {'NIFCELL', 'NIFNEURNT'}
    missing_supers = {
        'HBP_CELL:0000136',
        'HBP_CELL:0000137',
        'HBP_CELL:0000140',
    }

    replace = set()
    phen = set()
    equiv = {}
    for triple in sorted(g.triples((None, None, None))):
        id_suffix = newgraph.namespace_manager.compute_qname(
            triple[0].toPython())[2]
        try:
            obj_suffix = newgraph.namespace_manager.compute_qname(
                triple[2].toPython())[2]
        except:  # it wasn't a url
            pass
        # equiv insert for help
        if triple[
                1] == rdflib.OWL.equivalentClass and id_suffix not in skip and id_suffix not in to_phenotype:
            qnt = newgraph.namespace_manager.compute_qname(
                triple[2].toPython())
            #print(qnt)
            if qnt[0] in lookup:
                try:
                    lab = v.findById(qnt[0] + ':' + qnt[2])['labels'][0]
                    print('REMOTE', qnt[0] + ':' + qnt[2], lab)
                    #mg.add_trip(triple[2], rdflib.RDFS.label, lab)
                    #mg.add_trip(triple[0], PREFIXES['NIFRID'] + 'synonym', lab)  # so we can see it
                except TypeError:
                    if qnt[2].startswith('nlx'):
                        triple = (triple[0], triple[1],
                                  expand('NIFSTD:' + qnt[2]))
                    #print('bad identifier')

        #check for equiv
        if triple[0] not in equiv:
            eq = [o for o in g.objects(triple[0], rdflib.OWL.equivalentClass)]
            if eq and id_suffix not in skip and id_suffix not in to_phenotype:
                if len(eq) > 1:
                    print(eq)
                equiv[triple[0]] = eq[0]
                continue
        elif triple[0] in equiv:
            continue

        # edge replace
        if triple[1].toPython() == 'http://www.FIXME.org/nsupper#synonym':
            edge = mg.expand('NIFRID:abbrev')
        elif triple[1].toPython() == 'http://www.FIXME.org/nsupper#definition':
            edge = rdflib.namespace.SKOS.definition
        else:
            edge = triple[1]

        # skip or to phenotype or equiv
        if id_suffix in skip:  # have to make a manual edit to rdflib to include 'Nd' in allowed 1st chars
            replace.add(triple[0])
            #print('MEEP MEEP')
        elif id_suffix in to_phenotype:  # have to make a manual edit to rdflib to include 'Nd' in allowed 1st chars
            phen.add(triple[0])
        elif triple[1] == rdflib.RDFS.label:  # fix labels
            if not triple[2].startswith('Hippocampus'):
                new_label = rdflib.Literal('Neocortex ' + triple[2], lang='en')
                newgraph.add((triple[0], edge, new_label))
            else:
                newgraph.add((triple[0], edge, triple[2]))
        elif triple[2] in replace:
            mg.add_trip(triple[0], edge, skip[obj_suffix])
        elif triple[2] in phen:
            edge_, rst_on = to_phenotype[obj_suffix]
            edge_ = expand(edge_)
            rst_on = expand(rst_on)

            this = triple[0]
            this = infixowl.Class(this, graph=newgraph)
            this.subClassOf = [expand(NEURON)] + [c for c in this.subClassOf]

            restriction = infixowl.Restriction(edge_,
                                               graph=newgraph,
                                               someValuesFrom=rst_on)
            this.subClassOf = [restriction] + [c for c in this.subClassOf]
        elif triple[2] in equiv:
            newgraph.add((triple[0], edge, equiv[triple[2]]))
        else:
            newgraph.add((triple[0], edge, triple[2]))

    # final cleanup for forward references (since we iterate through sorted)

    tt = rdflib.URIRef(expand('HBP_CELL:0000033'))
    tf = rdflib.URIRef(expand('HBP_CELL:0000034'))
    newgraph.remove((None, None, tt))
    newgraph.remove((None, None, tf))

    # add missing subClasses
    for nosub in missing_supers:
        mg.add_trip(nosub, rdflib.RDFS.subClassOf, NEURON)

    # cleanup for subClassOf
    for subject in sorted(
            newgraph.subjects(rdflib.RDFS.subClassOf, expand(NEURON))):
        sco = [
            a
            for a in newgraph.triples((subject, rdflib.RDFS.subClassOf, None))
        ]
        #print('U WOT M8')
        if len(sco) > 1:
            #print('#############\n', sco)
            for s, p, o in sco:
                if 'hbp_cell_ontology' in o or 'NIF-Cell' in o and o != expand(
                        NEURON
                ):  #or 'sao2128417084' in o:  # neocortex pyramidal cell
                    #print(sco)
                    newgraph.remove(
                        (subject, rdflib.RDFS.subClassOf, expand(NEURON)))
                    break

    # do ilx
    ilx_start = ilx_get_start()
    #ilx_conv_mem = memoize('hbp_cell_interlex.json')(ilx_conv)  # FIXME NOPE, also need to modify the graph :/
    ilx_labels, ilx_replace = ilx_conv(graph=newgraph,
                                       prefix='HBP_CELL',
                                       ilx_start=ilx_start)
    ilx_add_ids(ilx_labels)
    with open('hbp_cell_ilx_ids.json', 'wt') as f:
        json.dump(ilx_replace, f)

    replace_map = ilx_replace
    for hbp, rep in skip.items():
        ori = 'HBP_CELL:' + hbp
        if ori in replace_map:
            raise KeyError('identifier already in!??! %s' % ori)
        replace_map[ori] = rep
    for hbp, (e, rep) in to_phenotype.items():
        ori = 'HBP_CELL:' + hbp
        if ori in replace_map:
            raise KeyError('identifier already in!??! %s' % ori)
        replace_map[ori] = edge, rep
    for hbp_iri, rep_iri in equiv.items():
        hbp = newgraph.compute_qname(hbp_iri)[2]
        rep = newgraph.qname(rep_iri)
        ori = 'HBP_CELL:' + hbp
        if ori in replace_map:
            raise KeyError('identifier already in!??! %s' % ori)
        replace_map[ori] = rep

    return mg, replace_map
Exemple #34
0
from glob import glob
from rdflib.namespace import SKOS
from parcellation import OntMeta
from utils import TODAY, makeGraph, makePrefixes

PREFIXES = makePrefixes('SCR', 'MBA', 'NIFMOL', 'NIFNEURON', 'NIFCELL',
                        'NIFGA', 'UBERON', 'PR', 'NIFNEURMOR', 'skos', 'owl')

ont = OntMeta(
    'http://ontology.neuinfo.org/NIF/ttl/generated/', 'ksdesc-defs',
    'Knolwedge Space Defs', 'KSDEFS',
    'Definitions from knowledge space descriptions. Generated by pyontutils/ksdesc_bridge.py',
    TODAY)

ontid = ont.path + ont.filename + '.ttl'
g = makeGraph(ont.filename, prefixes=PREFIXES)
g.add_ont(ontid, *ont[2:])

top_level = glob(os.path.expanduser('~/git/ksdesc/') + '*')

for putative_dir in top_level:
    if os.path.isdir(putative_dir):
        for putative_md in glob(putative_dir + '/*.md'):
            ident = os.path.split(putative_dir)[-1] + ':' + os.path.splitext(
                os.path.split(putative_md)[-1])[0]
            print(ident)
            with open(putative_md, 'rt') as f:
                def_ = f.read()

            for test in ('Description', 'Definition'):
                if test in def_:
Exemple #35
0
def chebi_make():
    PREFIXES = makePrefixes('definition',
                            'hasRole',
                            'CHEBI',
                            'owl',
                            'skos',
                            'oboInOwl')
    dPREFIXES = makePrefixes('CHEBI','replacedBy','owl','skos')
    ug = makeGraph('utilgraph', prefixes=PREFIXES)

    IDS_FILE = 'chebi-subset-ids.txt'
    with open(IDS_FILE, 'rt') as f:
        ids_raw = set((_.strip() for _ in f.readlines()))
        ids = set((ug.expand(_.strip()).toPython() for _ in ids_raw))

    #gzed = requests.get('http://localhost:8000/chebi.owl')
    #raw = BytesIO(gzed.content)
    gzed = requests.get('http://ftp.ebi.ac.uk/pub/databases/chebi/ontology/nightly/chebi.owl.gz')
    raw = BytesIO(gzip.decompress(gzed.content))
    t = etree.parse(raw)
    r = t.getroot()
    cs = r.getchildren()
    classes = [_ for _ in cs if _.tag == '{http://www.w3.org/2002/07/owl#}Class' and _.values()[0] in ids]
    ontology = t.xpath("/*[local-name()='RDF']/*[local-name()='Ontology']")
    ops = t.xpath("/*[local-name()='RDF']/*[local-name()='ObjectProperty']")  # TODO
    wanted = [etree.ElementTree(_) for _ in classes]
    rpl_check = t.xpath("/*[local-name()='RDF']/*[local-name()='Class']/*[local-name()='hasAlternativeId']")
    rpl_dict = {_.text:_.getparent() for _ in rpl_check if _.text in ids_raw } # we also need to have any new classes that have replaced old ids
    also_classes = list(rpl_dict.values())
    def rec(start_set, done):
        ids_ = set()
        for c in start_set:
            ids_.update([_.items()[0][1] for _ in etree.ElementTree(c).xpath("/*[local-name()='Class']/*[local-name()='subClassOf']") if _.items()])
            ids_.update([_.items()[0][1] for _ in etree.ElementTree(c).xpath("/*[local-name()='Class']/*[local-name()='subClassOf']/*[local-name()='Restriction']/*[local-name()='someValuesFrom']") if _.items()])
        supers = [_ for _ in cs if _.tag == '{http://www.w3.org/2002/07/owl#}Class' and _.values()[0] in ids_ and _ not in done]
        if supers:
            msup, mids = rec(supers, done + supers)
            supers += msup
            ids_.update(mids)
        return supers, ids_
    a = ontology + ops + classes + also_classes
    more, mids = rec(a, a)
    all_ = set(a + more)
    r.clear()  # wipe all the stuff we don't need
    for c in all_:
        r.append(c)
    data = etree.tostring(r)

    g = rdflib.Graph()
    g.parse(data=data)  # now _this_ is stupidly slow (like 20 minutes of slow) might make more sense to do the xml directly?

    src_version = list(g.query('SELECT DISTINCT ?match WHERE { ?temp rdf:type owl:Ontology . ?temp owl:versionIRI ?match . }'))[0][0]

    ont = OntMeta('http://ontology.neuinfo.org/NIF/ttl/generated/',
                  'chebislim',
                  'NIF ChEBI slim',
                  'chebislim',
                  'This file is generated by pyontutils/slimgen from the full ChEBI nightly at versionIRI %s based on the list of terms in %s.' % (src_version, IDS_FILE),
                  TODAY)
    dont = OntMeta('http://ontology.neuinfo.org/NIF/ttl/generated/',
                  'chebi-dead',
                  'NIF ChEBI deprecated',
                  'chebidead',
                  'This file is generated by pyontutils/slimgen to make deprecated classes resolvablefrom the full ChEBI nightly at versionIRI %s based on the list of terms in %s.' % (src_version, IDS_FILE),
                  TODAY)

    new_graph = makeGraph(ont.filename, PREFIXES)
    ontid = ont.path + ont.filename + '.ttl'
    new_graph.add_ont(ontid, *ont[2:])
    chebi_dead = makeGraph(dont.filename, dPREFIXES)
    dontid = dont.path + dont.filename + '.ttl'
    chebi_dead.add_ont(dontid, *dont[2:])

    depwor = {'CHEBI:33243':'natural product',  # FIXME remove these?
              'CHEBI:36809':'tricyclic antidepressant',
             }

    for id_ in sorted(set(ids_raw) | set((ug.g.namespace_manager.qname(_) for _ in mids))):
        eid = ug.expand(id_)
        trips = list(g.triples((eid, None, None)))
        if not trips:
            #looks for the id_ as a literal
            alts = list(g.triples((None,
                                             rdflib.term.URIRef('http://www.geneontology.org/formats/oboInOwl#hasAlternativeId'),
                                             rdflib.Literal(id_, datatype=rdflib.term.URIRef('http://www.w3.org/2001/XMLSchema#string')))))
            if alts:
                replaced_by, _, __ = alts[0]
                if replaced_by.toPython() not in ids:  #  we need to add any replacment classes to the bridge
                    print('REPLACED BY NEW CLASS', id_)
                    for t in g.triples((replaced_by, None, None)):
                        new_graph.add_recursive(t, g)
                chebi_dead.add_class(id_)
                chebi_dead.add_node(id_, 'replacedBy:', replaced_by)
                chebi_dead.add_node(id_, rdflib.OWL.deprecated, True)
            else:
                if id_ not in depwor:
                    raise BaseException('wtf error', id_)
        else:
            for trip in trips:
                new_graph.add_recursive(trip, g)

    new_graph.write()
    chebi_dead.write()
    embed()