예제 #1
0
def normalize_prefixes(graph, curies):
    mg = makeGraph('nifall',
                   makePrefixes('owl', 'skos', 'oboInOwl'),
                   graph=graph)
    mg.del_namespace('')

    old_namespaces = list(graph.namespaces())
    ng_ = makeGraph('', prefixes=makePrefixes('oboInOwl', 'skos'))
    [ng_.g.add(t) for t in mg.g]
    [ng_.add_namespace(n, p) for n, p in curies.items() if n != '']
    #[mg.add_namespace(n, p) for n, p in old_namespaces if n.startswith('ns') or n.startswith('default')]
    #[mg.del_namespace(n) for n in list(mg.namespaces)]
    #graph.namespace_manager.reset()
    #[mg.add_namespace(n, p) for n, p in wat.items() if n != '']
    return mg, ng_
예제 #2
0
파일: coco.py 프로젝트: MCSZ/pyontutils
    def __new__(cls, validate=False):
        error = 'Expected %s got %s'
        if type(cls.ont) != OntMeta:
            raise TypeError(error % (OntMeta, type(cls.ont)))
        elif type(cls.concept) != PScheme:
            raise TypeError(error % (PScheme, type(cls.concept)))
        elif type(cls.atlas) != PSArtifact:
            raise TypeError(error % (PSArtifact, type(cls.atlas)))

        ontid = cls.ont.path + cls.ont.filename + '.ttl'
        PREFIXES = {k: v for k, v in cls.PREFIXES.items()}
        PREFIXES.update(genericPScheme.PREFIXES)
        #if '' in cls.PREFIXES:  # NOT ALLOWED!
        #if PREFIXES[''] is None:
        #PREFIXES[''] = ontid + '/'
        graph = makeGraph(cls.ont.filename, PREFIXES, writeloc='/tmp/')
        graph.add_ont(ontid, *cls.ont[2:])
        make_scheme(graph, cls.concept, cls.atlas.curie)
        data = cls.datagetter()
        cls.datamunge(data)
        cls.dataproc(graph, data)
        add_ops(graph)
        graph.write()
        if validate or getattr(cls, 'VALIDATE', False):
            cls.validate(graph)
        return ontid, cls.atlas
예제 #3
0
def ilx_json_to_tripples(
    j
):  # this will be much eaiser if everything can be exported as a relationship or an anotation
    g = makeGraph('do not write me',
                  prefixes=makePrefixes('ILX', 'ilx', 'owl', 'skos', 'NIFRID'))

    def pref(inp):
        return makePrefixes('ilx')['ilx'] + inp

    id_ = pref(j['ilx'])
    type_ = {
        'term': 'owl:Class',
        'relationship': 'owl:ObjectProperty',
        'annotation': 'owl:AnnotationProperty'
    }[j['type']]
    out = []  # TODO need to expand these
    out.append((id_, rdflib.RDF.type, type_))
    out.append((id_, rdflib.RDFS.label, j['label']))
    out.append((id_, 'skos:definition', j['definition']))
    for syndict in j['synonyms']:
        out.append((id_, 'NIFRID:synonym', syndict['literal']))
    for superdict in j[
            'superclasses']:  # should we be returning the preferred id here not the ilx? or maybe that is a different json output?
        out.append((id_, rdflib.RDFS.subClassOf, pref(superdict['ilx'])))
    for eid in j['existing_ids']:
        out.append((id_, 'ilx:someOtherId', eid['iri']))  # predicate TODO
    [g.add_trip(*o) for o in out]
    return g.g.serialize(format='nifttl')  # other formats can be choosen
예제 #4
0
 def __init__(self, input):
     prefixes = {**{'JAX': 'http://jaxmice.jax.org/strain/',
                 'MMRRC': 'http://www.mmrrc.org/catalog/getSDS.jsp?mmrrc_id=',
                 'AIBS': 'http://api.brain-map.org/api/v2/data/TransgenicLine/'},
                 **makePrefixes('definition', 'ilxtr', 'owl')}
     self.g = makeGraph('transgenic-lines', prefixes=prefixes)
     self.neuron_data = input
예제 #5
0
def load(file, olr=None, mkdir=False):
    filepath = os.path.expanduser(file)
    _, ext = os.path.splitext(filepath)
    filetype = ext.strip('.')
    if filetype == 'ttl':
        infmt = 'turtle'
    else:
        infmt = None
    print(filepath)
    graph = rdflib.Graph()
    try:
        graph.parse(filepath, format=infmt)
    except rdflib.plugins.parsers.notation3.BadSyntax as e:
        print('PARSING FAILED', filepath)
        raise e
    og = makeGraph('', graph=graph)

    # FIXME this should really just be a function :/
    curie, *prefs = kludge(filepath)

    name = os.path.splitext(os.path.basename(filepath))[0]
    if 'slim' in name:
        name = name.replace('slim', '')
    try:
        version = list(graph.subject_objects(owl.versionIRI))[0][1]
    except IndexError:
        version = list(graph.subjects(rdf.type, owl.Ontology))[0]

    ng = createOntology(f'{name}-dead',
                        f'NIF {curie} deprecated',
                        makePrefixes('replacedBy', 'NIFRID', curie, *prefs),
                        f'{name}dead',
                        f'Classes from {curie} with owl:deprecated true that we want rdfs:subClassOf NIFRID:birnlexRetiredClass, or classes hiding in a oboInOwl:hasAlternativeId annotation. This file was generated by pyontutils/necromancy from {version}.',
                        local_base=olr)
    extract(og, ng, curie, mkdir)
예제 #6
0
def loadGraphFromFile(filename, prefixes=None):
    graph = rdflib.Graph()
    graph.parse(filename, format='turtle')
    fn = os.path.splitext(filename)[0]
    print(fn)
    mg = makeGraph(fn, prefixes=prefixes, graph=graph, writeloc='')
    return mg
예제 #7
0
 def switch_dead(g):
     ng = makeGraph('', graph=g, prefixes=makePrefixes('oboInOwl'))
     for f, r in deads.items():
         ng.replace_uriref(f, r)
         ng.add_trip(r, 'oboInOwl:hasAlternateId',
                     rdflib.Literal(f, datatype=rdflib.XSD.string))
         g.remove(
             (r, replacedBy, r))  # in case the replaced by was already in
예제 #8
0
    def fixAltIdIsURIRef(g):
        hai = ug.expand('oboInOwl:hasAlternativeId')
        # i = ug.expand('oboInOwl:id')  # :id does not exist
        makeGraph('', graph=g, prefixes=makePrefixes(
            'CHEBI'))  # amazlingly sometimes this is missing...

        def inner(s, p, o):
            if type(o) == rdflib.URIRef:
                qn = g.namespace_manager.qname(o)
                g.add((s, p, rdflib.Literal(qn, datatype=rdflib.XSD.string)))
                if 'ns' in qn:
                    print('WARNING UNKNOWN NAMESPACE BEING SHORTENED', str(o),
                          qn)
                g.remove((s, p, o))

        for s, o in g.subject_objects(hai):
            inner(s, hai, o)
예제 #9
0
def graphFromGithub(link, verbose=False):
    # mmmm no validation
    # also caching probably
    if verbose:
        log.info(link)
    return makeGraph('',
                     graph=rdflib.Graph().parse(f'{link}?raw=true',
                                                format='turtle'))
예제 #10
0
def do_file(filename, swap, *args):
    print('START', filename)
    ng = rdflib.Graph()
    ng.parse(filename, format='turtle')
    reps = switchURIs(ng, swap, *args)
    wg = makeGraph('', graph=ng)
    wg.filename = filename
    wg.write()
    print('END', filename)
    return reps
예제 #11
0
def main():
    PREFIXES = makePrefixes('NIFGA', 'NIFSTD', 'owl')

    g = rdflib.Graph()
    g.parse('http://purl.obolibrary.org/obo/uberon/bridge/uberon-bridge-to-nifstd.owl', format='xml')
    name = 'NIFGA-Equivs'
    ng = makeGraph(name, PREFIXES)
    [ng.g.add(t) for t in ((rdflib.URIRef(PREFIXES['NIFGA'] + o.rsplit('/',1)[-1]), p, o) for s, p, o in g.triples((None, rdflib.OWL.equivalentClass, None)))]
    ng.add_ont('http://ontology.neuinfo.org/NIF/ttl/generated/' + name + '.ttl', 'NIFGA to NIFSTD mappings')
    ng.write()
예제 #12
0
 def fixIons(g):
     # there are a series of atom/ion confusions that shall be dealt with, solution is to add 'iron' as a synonym to the charged form since that is what the biologists are usually referring to...
     ng = makeGraph('', graph=g, prefixes=makePrefixes('CHEBI'))
     # atom           ion
     None, 'CHEBI:29108'  # calcium is ok
     ng.replace_uriref('CHEBI:30145', 'CHEBI:49713')  # lithium
     ng.replace_uriref('CHEBI:18248', 'CHEBI:29033')  # iron
     ng.replace_uriref('CHEBI:26216', 'CHEBI:29103')  # potassium
     ng.replace_uriref('CHEBI:26708', 'CHEBI:29101')  # sodium
     None, 'CHEBI:29105'  # zinc is ok
예제 #13
0
def graph_todo(graph, curie_prefixes, get_values):
    ug = makeGraph('big-graph', graph=graph)
    ug.add_known_namespaces('NIFRID')
    fragment_prefixes, ureps = get_values(ug)
    #all_uris = sorted(set(_ for t in graph for _ in t if type(_) == rdflib.URIRef))  # this snags a bunch of other URIs
    #all_uris = sorted(set(_ for _ in graph.subjects() if type(_) != rdflib.BNode))
    #all_uris = set(spo for t in graph.subject_predicates() for spo in t if isinstance(spo, rdflib.URIRef))
    all_uris = set(spo for t in graph for spo in t if isinstance(spo, rdflib.URIRef))
    prefs = set(_.rsplit('#', 1)[0] + '#' if '#' in _
                       else (_.rsplit('_',1)[0] + '_' if '_' in _
                             else _.rsplit('/',1)[0] + '/') for _ in all_uris)
    nots = set(_ for _ in prefs if _ not in curie_prefixes)  # TODO
    sos = set(prefs) - set(nots)
    all_uris = [u if u not in ureps
                else ureps[u]
                for u in all_uris]
    #to_rep = set(_.rsplit('#', 1)[-1].split('_', 1)[0] for _ in all_uris if 'ontology.neuinfo.org' in _)
    #to_rep = set(_.rsplit('#', 1)[-1] for _ in all_uris if 'ontology.neuinfo.org' in _)

    ignore = (
        # deprecated and only in as annotations
        'NIFGA:birnAnatomy_011',
        'NIFGA:birnAnatomy_249',
        'NIFORG:birnOrganismTaxon_19',
        'NIFORG:birnOrganismTaxon_20',
        'NIFORG:birnOrganismTaxon_21',
        'NIFORG:birnOrganismTaxon_390',
        'NIFORG:birnOrganismTaxon_391',
        'NIFORG:birnOrganismTaxon_56',
        'NIFORG:birnOrganismTaxon_68',
        'NIFINV:birnlexInvestigation_174',
        'NIFINV:birnlexInvestigation_199',
        'NIFINV:birnlexInvestigation_202',
        'NIFINV:birnlexInvestigation_204',
    )
    ignore = tuple(ug.expand(i) for i in ignore)


    non_normal_identifiers = sorted(u for u in all_uris
                                    if 'ontology.neuinfo.org' in u
                                    and noneMembers(u, *fragment_prefixes)
                                    and not u.endswith('.ttl')
                                    and not u.endswith('.owl')
                                    and u not in ignore)
    print(len(prefs))
    embed()
예제 #14
0
def main():
    source = 'https://raw.githubusercontent.com/BlueBrain/nat/master/nat/data/modelingDictionary.csv'
    delimiter = ';'

    resp = requests.get(source)
    rows = [
        r for r in csv.reader(resp.text.split('\n'), delimiter=delimiter)
        if r and r[0][0] != '#'
    ]
    header = [
        'Record_ID', 'parent_category', 'name', 'description', 'required_tags'
    ]

    PREFIXES = makePrefixes('owl', 'skos', 'ILX', 'definition')
    graph = makeGraph('measures', prefixes=PREFIXES)

    class nat(rowParse):
        def Record_ID(self, value):
            print(value)
            self.old_id = value
            self._id = TEMP[value]

        def parent_category(self, value):
            self.super_old_id = value
            self.super_id = TEMP[value]

        def name(self, value):
            self.hidden = value
            self.label = value.replace('_', ' ')

        def description(self, value):
            self.definition = value

        def required_tags(self, value):
            pass

        def _row_post(self):
            graph.add_class(self._id, self.super_id, label=self.label)
            graph.add_trip(self._id, 'skos:hiddenLabel', self.hidden)
            graph.add_trip(self._id, 'definition:', self.definition)

    asdf = nat(rows, header)
    graph.write()
    if __name__ == '__main__':
        breakpoint()
예제 #15
0
파일: coco.py 프로젝트: MCSZ/pyontutils
def parcellation_schemes(ontids_atlases):
    ont = OntMeta(
        GENERATED, 'parcellation',
        'NIF collected parcellation schemes ontology', 'NIF Parcellations',
        'Brain parcellation schemes as represented by root concepts.', TODAY())
    ontid = ont.path + ont.filename + '.ttl'
    PREFIXES = makePrefixes('ilxtr', 'owl', 'skos', 'NIFRID')
    graph = makeGraph(ont.filename, PREFIXES, writeloc='/tmp/')
    graph.add_ont(ontid, *ont[2:])

    for import_id, atlas in sorted(ontids_atlases):
        graph.add_trip(ontid, owl.imports, import_id)
        add_triples(graph, atlas, make_atlas)

    graph.add_class(ATLAS_SUPER, label=atname)

    graph.add_class(PARC_SUPER, label=psname)
    graph.write()
예제 #16
0
def import_tree(graph, ontologies, **kwargs):
    for ontology in ontologies:
        thisfile = Path(ontology).name
        print(thisfile)
        mg = makeGraph('', graph=graph)
        mg.add_known_namespaces('owl', 'obo', 'dc', 'dcterms', 'dctypes',
                                'skos', 'NIFTTL')
        j = mg.make_scigraph_json('owl:imports', direct=True)
        try:
            t, te = creatTree(*Query(f'NIFTTL:{thisfile}', 'owl:imports',
                                     'OUTGOING', 30),
                              json=j,
                              prefixes=mg.namespaces,
                              **kwargs)
            #print(t)
            yield t, te
        except KeyError:
            print(tc.red('WARNING:'), 'could not find', ontology,
                  'in import chain')  # TODO zap onts w/o imports
예제 #17
0
def switchURIs(g, swap, *args):
    if len(args) > 1:  # FIXME hack!
        _, fragment_prefixes = args
    reps = []
    prefs = {None}
    addpg = makeGraph('', graph=g)
    for t in g:
        nt, ireps, iprefs = tuple(zip(*swap(t, *args)))
        if t != nt:
            g.remove(t)
            g.add(nt)

        for rep in ireps:
            if rep is not None:
                reps.append(rep)

        for pref in iprefs:
            if pref not in prefs:
                prefs.add(pref)
                addpg.add_known_namespaces(fragment_prefixes[pref])
    return reps
예제 #18
0
def make_neurolex_graph():
    # neurolex test stuff
    nlxpref = {'ilx': 'http://uri.interlex.org/base/'}
    nlxpref.update(NIFPREFIXES)
    neurolex = makeGraph('neurolex-temp', nlxpref)
    neurolex.g.parse('/tmp/neurolex_basic.ttl', format='turtle')

    ILXPO = 'ilx:partOf'
    nj = neurolex.make_scigraph_json(ILXPO)
    g_, h = creatTree(*Query('NIFGA:birnlex_796', ILXPO, 'INCOMING', 10),
                      json=nj)
    i_, j_ = creatTree(*Query('NIFGA:nlx_412', ILXPO, 'INCOMING', 10), json=nj)

    brht = sorted(set(flatten(h[0], [])))
    wmht = sorted(set(flatten(j_[0], [])))
    ufixedrb = {
        'NIFGA:' + k.split(':')[1]: v
        for k, v in u_replaced_by.items()
    }
    b_nlx_replaced_by = new_replaced_by(brht, ufixedrb)
    w_nlx_replaced_by = new_replaced_by(wmht, ufixedrb)
    additional_edges = defaultdict(
        list)  # TODO this could be fun for the future but is a nightmare atm
    for edge in h[-1]['edges'] + j_[-1]['edges']:
        additional_edges[edge['sub']] = edge
        additional_edges[edge['obj']] = edge

    #filter out bad edges becase we are lazy
    additional_edges = {
        k: v
        for k, v in additional_edges.items()
        if k in b_nlx_replaced_by or k in w_nlx_replaced_by
    }

    print('neurolex tree')  # computed above
    print(g_)
    print(i_)

    return additional_edges
예제 #19
0
def main():
    import rdflib
    from pyontutils.core import makeGraph, makePrefixes, log
    from pyontutils.config import auth

    ub = auth.get_path('ontology-local-repo') / 'ttl/bridge/uberon-bridge.ttl'
    ncrb = auth.get_path(
        'ontology-local-repo') / 'ttl/NIF-Neuron-Circuit-Role-Bridge.ttl'
    if not ub.exists() or not ncrb.exists():
        # just skip this if we can't file the files
        log.warning(f'missing file {ub} or {ncrb}')
        return

    graph = rdflib.Graph()
    graph.parse(ub.as_posix(), format='turtle')
    graph.parse(ncrb.as_posix(), format='ttl')

    ecgraph = rdflib.Graph()
    oec = EquivalentClass()
    test = tuple(oec.parse(graph=graph))

    ft = oc_.full_combinator(test[0][0], test[0][1])
    ftng = makeGraph('thing3', prefixes=makePrefixes('owl', 'TEMP'))
    *ft.serialize(ftng.g),
    ftng.write()

    _roundtrip = list(test[0][1](test[0][0]))
    roundtrip = oc_(test[0][0], test[0][1])  # FIXME not quite there yet...
    for t in roundtrip:
        ecgraph.add(t)
    ecng = makeGraph('thing2',
                     graph=ecgraph,
                     prefixes=makePrefixes('owl', 'TEMP'))
    ecng.write()
    if __name__ == '__main__':
        breakpoint()
        return
    r = Restriction(
        rdfs.subClassOf)  #, scope=owl.allValuesFrom)#NIFRID.has_proper_part)
    l = tuple(r.parse(graph=graph))
    for t in r.triples:
        graph.remove(t)
    ng = makeGraph('thing', graph=graph)
    ng.write()
    #print(l)
    restriction = Restriction(None)  #rdf.first)
    ll = List(lift_rules={owl.Restriction: restriction})
    trips = tuple(ll.parse(graph=graph))
    #subClassOf = PredicateCombinator(rdfs.subClassOf)  # TODO should be able to do POCombinator(rdfs.subClassOf, 0bjectCombinator)
    subClassOf = POCombinator(rdfs.subClassOf, ObjectCombinator)
    superDuperClass = subClassOf(
        TEMP.superDuperClass)  # has to exist prior to triples
    ec = oec(
        TEMP.ec1,
        TEMP.ec2,
        restriction(TEMP.predicate0, TEMP.target1),
        restriction(TEMP.predicate1, TEMP.target2),
    )
    egraph = rdflib.Graph()
    acombinator = annotation((TEMP.testSubject, rdf.type, owl.Class),
                             (TEMP.hoh, 'FUN'))
    ft = flattenTriples((
        acombinator((TEMP.annotation, 'annotation value')),
        acombinator((TEMP.anotherAnnotation, 'annotation value again')),
        oc_(TEMP.c1, superDuperClass),
        oc_(TEMP.c2, superDuperClass),
        oc_(TEMP.c3, superDuperClass),
        oc_(TEMP.c4, superDuperClass),
        oc_(TEMP.c5, superDuperClass),
        oc_(TEMP.wat, subClassOf(TEMP.watParent)),
        oc_(TEMP.testSubject),
        ec(TEMP.testSubject),
        oc_(TEMP.more,
            oec(TEMP.ec3, restriction(TEMP.predicate10, TEMP.target10))),
    ), )
    [egraph.add(t) for t in ft]
    eng = makeGraph('thing1',
                    graph=egraph,
                    prefixes=makePrefixes('owl', 'TEMP'))
    eng.write()
    if __name__ == '__main__':
        breakpoint()
예제 #20
0
def main():
    olr = auth.get_path('ontology-local-repo')
    resources = auth.get_path('resources')
    if not olr.exists():
        raise FileNotFoundError(f'{olr} does not exist cannot continue')
    if not resources.exists():
        raise FileNotFoundError(f'{resources} does not exist cannot continue')

    PREFIXES = makePrefixes('definition', 'replacedBy', 'hasRole', 'oboInOwl',
                            'CHEBI', 'owl', 'skos', 'oboInOwl')
    ug = makeGraph('utilgraph', prefixes=PREFIXES)
    file = resources / 'chebi-subset-ids.txt'
    with open(file.as_posix(), 'rt') as f:
        ids_raw = set((_.strip() for _ in f.readlines()))
        ids = sorted(set((ug.expand(_.strip()) for _ in ids_raw)))

    def check_chebis(g):
        a = []
        for id_ in ids:
            l = sorted(g.triples((id_, None, None)))
            ll = len(l)
            a.append(ll)
        return a

    def fixIons(g):
        # there are a series of atom/ion confusions that shall be dealt with, solution is to add 'iron' as a synonym to the charged form since that is what the biologists are usually referring to...
        ng = makeGraph('', graph=g, prefixes=makePrefixes('CHEBI'))
        # atom           ion
        None, 'CHEBI:29108'  # calcium is ok
        ng.replace_uriref('CHEBI:30145', 'CHEBI:49713')  # lithium
        ng.replace_uriref('CHEBI:18248', 'CHEBI:29033')  # iron
        ng.replace_uriref('CHEBI:26216', 'CHEBI:29103')  # potassium
        ng.replace_uriref('CHEBI:26708', 'CHEBI:29101')  # sodium
        None, 'CHEBI:29105'  # zinc is ok

    g = OntGraph()
    cg = OntGraph()
    cd = OntGraph()
    chemg = OntGraph()
    molg = OntGraph()

    cg.parse(olr / 'ttl/generated/chebislim.ttl', format='turtle')
    list(g.add(t) for t in cg)
    a1 = check_chebis(g)

    cd.parse(olr / 'ttl/generated/chebi-dead.ttl', format='turtle')
    list(g.add(t) for t in cd)
    a2 = check_chebis(g)

    chemg.parse(olr / 'ttl/NIF-Chemical.ttl', format='turtle')
    chemgg = makeGraph('NIF-Chemical', graph=chemg)
    fixIons(chemg)
    list(g.add(t) for t in chemg)
    a3 = check_chebis(g)

    molg.parse(olr / 'ttl/NIF-Molecule.ttl', format='turtle')
    molgg = makeGraph('NIF-Molecule', graph=molg)
    fixIons(molg)
    list(g.add(t) for t in molg)
    a4 = check_chebis(g)

    replacedBy = ug.expand('replacedBy:')
    deads = {s: o for s, o in cd.subject_objects(replacedBy)}

    def switch_dead(g):
        ng = makeGraph('', graph=g, prefixes=makePrefixes('oboInOwl'))
        for f, r in deads.items():
            ng.replace_uriref(f, r)
            ng.add_trip(r, 'oboInOwl:hasAlternateId',
                        rdflib.Literal(f, datatype=rdflib.XSD.string))
            g.remove(
                (r, replacedBy, r))  # in case the replaced by was already in

    switch_dead(g)
    switch_dead(cg)
    switch_dead(chemg)
    switch_dead(molg)

    def fixHasAltId(g):
        ng = makeGraph('',
                       graph=g,
                       prefixes=makePrefixes('oboInOwl', 'NIFCHEM', 'NIFRID'))
        ng.replace_uriref('NIFCHEM:hasAlternativeId',
                          'oboInOwl:hasAlternativeId')
        # ng.replace_uriref('NIFRID:ChEBIid', 'oboInOwl:id')  # :id does not exist, do we need an alternative?

    list(map(fixHasAltId, (g, cg, chemg)))

    def fixAltIdIsURIRef(g):
        hai = ug.expand('oboInOwl:hasAlternativeId')
        # i = ug.expand('oboInOwl:id')  # :id does not exist
        makeGraph('', graph=g, prefixes=makePrefixes(
            'CHEBI'))  # amazlingly sometimes this is missing...

        def inner(s, p, o):
            if type(o) == rdflib.URIRef:
                qn = g.namespace_manager.qname(o)
                g.add((s, p, rdflib.Literal(qn, datatype=rdflib.XSD.string)))
                if 'ns' in qn:
                    print('WARNING UNKNOWN NAMESPACE BEING SHORTENED', str(o),
                          qn)
                g.remove((s, p, o))

        for s, o in g.subject_objects(hai):
            inner(s, hai, o)
        #for s, o in g.subject_objects(i):  # :id does not exist
        #inner(s, i, o)

    list(map(fixAltIdIsURIRef, (g, cg, chemg)))

    matches = [_ for _ in zip(a1, a2, a3, a4)]
    changed = [len(set(_)) != 1 for _ in matches]
    review = [(id_, m) for id_, changed, m in zip(ids, changed, matches)
              if changed and m[0]]
    # for reasons currently lost to implementation details this returns a list of empty lists if run from ipython
    wat_c = [
        set([(s, str(o.toPython()))
             for s, p, o in cg.triples((u, None, None))]) for u, _ in review
    ]
    wat_a = [
        set([(s, str(o.toPython())) for s, p, o in g.triples((u, None, None))])
        for u, _ in review
    ]
    wat_c_ = [
        set(cg.triples((u, None, None))) for u, _ in review
    ]  # for reasons currently lost to implementation details this returns a list of empty lists if run from ipython
    wat_a_ = [
        set(g.triples((u, None, None))) for u, _ in review
    ]  # for reasons currently lost to implementation details this returns a list of empty lists if run from ipython
    diff = [a - c for a, c in zip(wat_a, wat_c)]
    diff_ = [a - c for a, c in zip(wat_a_, wat_c_)]

    cb = createOntology(
        'chebi-bridge',
        'NIF ChEBI bridge',
        makePrefixes('CHEBI', 'BFO1SNAP', 'owl', 'skos', 'dc', 'hasRole',
                     'NIFCHEM', 'oboInOwl', 'NIFMOL', 'NIFRID'),
        'chebibridge',
        ('This bridge file contains additional annotations'
         ' on top of CHEBI identifiers that were originally'
         ' included in NIF-Chemical or NIF-Molecule that have'
         ' not since been added to CHEBI upstream'),
        path='ttl/bridge/',
        #imports=('https://raw.githubusercontent.com/SciCrunch/NIF-Ontology/master/ttl/generated/chebislim.ttl',
        #'https://raw.githubusercontent.com/SciCrunch/NIF-Ontology/master/ttl/generated/chebi-dead.ttl'))
        imports=(
            'http://ontology.neuinfo.org/NIF/ttl/generated/chebislim.ttl',
            'http://ontology.neuinfo.org/NIF/ttl/generated/chebi-dead.ttl'))

    out = []
    for set_ in diff:
        for sub, string in sorted(set_):
            for t in g.triples((sub, None, None)):
                # please not that this process will do things like remove hasStreenName ectasy from CHEBI:1391 since chebislim has it listed as a synonym
                py = t[-1].toPython()
                if py == string and not py.startswith(
                        'ub'
                ):  # ignore restrictions... this is safe because nifmol and nifchem dont have any restrictions...
                    cb.add_recursive(t, g)
        cb.add_class(
            sub
        )  # only need to go at the end because sub is the same for each set

    def hasImplicitSuperclass(s, o):
        for super_ in cg.objects(s, rdflib.RDFS.subClassOf):
            if super_ == o:
                return True
            elif hasImplicitSuperclass(super_, o):
                return True

    # curation decisions after review (see outtc for full list)
    curatedOut = []

    def curateOut(*t):
        curatedOut.append(
            tuple(
                ug.expand(_) if type(_) is not rdflib.Literal else _
                for _ in t))
        cb.del_trip(*t)

    curateOut(
        'CHEBI:6887', 'rdfs:subClassOf', 'CHEBI:23367'
    )  # defer to the chebi choice of chemical substance over molecular entity since it is classified as a racemate which doesn't quite match the mol ent def
    curateOut(
        'CHEBI:26519', 'rdfs:subClassOf', 'CHEBI:24870'
    )  # some ions may also be free radicals, but all free radicals are not ions!
    #natural product removal since natural product should probably be a role if anything...
    curateOut('CHEBI:18059', 'rdfs:subClassOf', 'CHEBI:33243')
    curateOut('CHEBI:24921', 'rdfs:subClassOf', 'CHEBI:33243')
    curateOut('CHEBI:37332', 'rdfs:subClassOf', 'CHEBI:33243')

    curateOut('CHEBI:50906', 'rdfs:label',
              rdflib.Literal('Chemical role', datatype=rdflib.XSD.string)
              )  # chebi already has a chemical role...
    curateOut(
        'CHEBI:22586', 'rdfs:subClassOf', 'CHEBI:24432'
    )  # antioxidant is already modelled as a chemical role instead of a biological role, the distinction is that the biological roles affect biological processes/property, not chemical processes/property
    curateOut('CHEBI:22720', 'rdfs:subClassOf',
              'CHEBI:27171')  # not all children are bicyclic
    curateOut(
        'CHEBI:23447', 'rdfs:subClassOf', 'CHEBI:17188'
    )  # this one seems obviously flase... all cyclic nucleotides are not nucleoside 5'-monophosphate...
    curateOut(
        'CHEBI:24922', 'rdfs:subClassOf', 'CHEBI:27171'
    )  # not all children are bicyclic, some may be poly, therefore removing
    curateOut(
        'CHEBI:48706', 'rdfs:subClassOf', 'CHEBI:33232'
    )  # removing since antagonist is more incidental and pharmacological role is more appropriate (as chebi has it)
    curateOut('CHEBI:51064', 'rdfs:subClassOf',
              'CHEBI:35338')  # removing since chebi models this with has part
    curateOut(
        'CHEBI:8247', 'rdfs:subClassOf', 'CHEBI:22720'
    )  # the structure is 'fused to' a benzo, but it is not a benzo, chebi has the correct
    #curateOut('CHEBI:9463', 'rdfs:subClassOf', 'CHEBI:50786')  # not sure what to make of this wikipedia says one thing, but chebi says another, very strange... not an anabolic agent?!??! wat no idea

    # review hold over subClassOf statements
    intc = []
    outtc = []
    for s, o in cb.g.subject_objects(rdflib.RDFS.subClassOf):
        if str(
                o
        ) == 'http://ontology.neuinfo.org/NIF/Backend/BIRNLex_annotation_properties.owl#_birnlex_retired_class' or str(
                o
        ) == 'http://ontology.neuinfo.org/nif/nifstd/readable/birnlexRetiredClass':
            # we need to remove any of the cases where deprecation was misused
            cb.g.remove((s, rdflib.RDFS.subClassOf, o))
        elif hasImplicitSuperclass(s, o):
            cb.g.remove((s, rdflib.RDFS.subClassOf, o))
            intc.append((s, rdflib.RDFS.subClassOf, o))
        else:
            outtc.append((s, rdflib.RDFS.subClassOf, o))

    def qname(trips):
        return tuple(
            tuple(cb.g.namespace_manager.qname(_) for _ in t) for t in trips)

    for a, p, b in sorted(qname(outtc)):
        if 'NIFMOL' in b:
            continue  # not considering cases where NIFMOL/NIFCHEM ids are used, that can come later
        s = sgv.findById(a)
        o = sgv.findById(b)
        if s is None or o is None:
            print(a, '=>', s)
            print(b, '=>', o)
        else:
            print(s['labels'], s['curie'])
            print('subClassOf')
            print(o['labels'], o['curie'])
            print((a, p, b))
        print('---------------------')

    cb.write(
    )  # re-add only the missing edges so that we can zap them from NIF-Molecule and NIF-Chemical (recurse is needed...)

    # validation
    diff2 = set(cb.g) - set(cg)
    diff3 = set(cb.g) - diff2  # should just be all the owl:Class entries
    diff4 = set(cb.g) - set(chemg) | set(cb.g) - set(molg)  # not informative
    diff5 = set(cb.g) - diff4  # not informative
    both = set(chemg) & set(
        molg)  # there is no overlap beyond the owl:Class declarations

    def getChebis(set_):
        return set(t for t in set_ if 'CHEBI_' in t[0])

    def nodt(graph):
        return set((s, str(o) if type(o) is rdflib.Literal else o)
                   for s, p, o in graph)

    cmc = getChebis((((
        (nodt(chemg) - nodt(cb.g)) - nodt(cg)) - nodt(cd)) - nodt(intc)) -
                    nodt(curatedOut))
    cmc = sorted(t for s, o in cmc for t in chemg.triples((s, None, o)))
    mmc = getChebis((((
        (nodt(molg) - nodt(cb.g)) - nodt(cg)) - nodt(cd)) - nodt(intc)) -
                    nodt(curatedOut))
    mmc = sorted(t for s, o in mmc for t in molg.triples((s, None, o)))

    # remove chebi classes from nifchem and nifmol
    def remstuff(sources, targets):
        for source in sources:
            for id_ in source.subjects(rdflib.RDF.type, rdflib.OWL.Class):
                for target in targets:
                    target.del_class(id_)

    remstuff((cg, cd), (chemgg, molgg))

    chemgg.write()
    molgg.write()

    if __name__ == '__main__':
        breakpoint()
예제 #21
0
def main():
    for filename in ('mbaslim', 'hbaslim', 'paxinos-rat-labels',
                     'waxholm-rat-labels'):
        filepath = gitf / 'NIF-Ontology/ttl/generated/parcellation' / (
            filename + '.ttl')
        dir_ = filepath.parent.as_posix()
        print(dir_)
        file_commit = subprocess.check_output(
            [
                'git', 'log', '-n', '1', '--pretty=format:%H', '--',
                filepath.name
            ],
            cwd=dir_,
            stderr=subprocess.DEVNULL).decode().rstrip()
        graph = rdflib.Graph().parse(filepath.as_posix(), format='ttl')
        g = makeGraph('', graph=graph)

        annos = defaultdict(set)
        anno_trips = defaultdict(set)
        for triple, predicate_objects in annotation.parse(graph=graph):
            for a_p, a_o in predicate_objects:
                annos[a_p, a_o].add(triple)
                anno_trips[triple].add((a_p, a_o))

        anno_trips = {k: v for k, v in anno_trips.items()}

        for lifted_triple in restriction.parse(graph=graph):
            graph.add(lifted_triple)

        out_header = 'label|abbrev|curie|superPart curie\n'
        out = []
        editions_header = 'edition|label|abbrev|curie\n'
        editions = []
        for s in graph.subjects(rdf.type, owl.Class):
            rdfsLabel = next(graph.objects(s, rdfs.label))
            try:
                prefLabel = next(graph.objects(s, skos.prefLabel))
            except StopIteration:
                print(tc.red('WARNING:'),
                      f'skipping {s} {rdfsLabel} since it has no prefLabel')
                continue
            syns = sorted(
                graph.objects(s, NIFRID.synonym)
            )  # TODO are there cases where we need to recaptulate what we are doing for for abbrevs?
            abbrevs = sorted(graph.objects(
                s, NIFRID.abbrev))  # FIXME paxinos has more than one
            try:
                if annos:
                    if len(abbrevs) > 1:
                        print(tc.blue('INFO:'), g.qname(s),
                              repr(prefLabel.value), 'has multiple abbrevs',
                              [a.value for a in abbrevs])
                    # prefer latest
                    current_edition = ''
                    for a in abbrevs:
                        for a_p, edition in anno_trips[s, NIFRID.abbrev, a]:
                            if a_p == ilxtr.literalUsedBy:
                                if current_edition < edition:
                                    current_edition = edition
                                    abbrev = a
                else:
                    abbrev = abbrevs[0]
            except IndexError:
                abbrev = ''
            try:
                superPart = next(graph.objects(s, ilxtr.labelPartOf))
            except StopIteration:
                superPart = ''

            out.append(
                f'{prefLabel}|{abbrev}|{g.qname(s)}|{g.qname(superPart)}')

            if annos:
                #asdf = {'ed':{'label':,'abbrev':,'curie':}}
                asdf = defaultdict(dict)
                triple = s, skos.prefLabel, prefLabel
                eds = anno_trips[triple]
                for a_p, a_o in eds:
                    asdf[a_o]['curie'] = g.qname(s)
                    asdf[a_o]['label'] = prefLabel
                for syn in graph.objects(s, NIFRID.synonym):
                    triple = s, NIFRID.synonym, syn
                    eds = anno_trips[triple]
                    for a_p, a_o in eds:
                        asdf[a_o]['curie'] = g.qname(s)
                        if 'label' in asdf[a_o]:
                            print(
                                tc.red('WARNING:'),
                                f'{a_o} already has a label "{asdf[a_o]["label"]}" for "{syn}"'
                            )
                        asdf[a_o]['label'] = syn
                for abbrev in graph.objects(s, NIFRID.abbrev):
                    triple = s, NIFRID.abbrev, abbrev
                    eds = anno_trips[triple]
                    #print('aaaaaaaaaaa', g.qname(s), )
                    for a_p, a_o in eds:
                        asdf[a_o]['curie'] = g.qname(s)
                        if 'abbrev' in asdf[a_o]:
                            print(
                                tc.red('WARNING:'),
                                f'{a_o} already has a abbrev "{asdf[a_o]["abbrev"]}" for "{abbrev}"'
                            )
                        asdf[a_o]['abbrev'] = abbrev

                #print(asdf)
                for ed, kwargs in sorted(asdf.items()):
                    if 'abbrev' not in kwargs:
                        print('Skipping', ed, 'for\n', kwargs)
                        continue
                    editions.append('{ed}|{label}|{abbrev}|{curie}'.format(
                        ed=g.qname(ed), **kwargs))

        with open('/tmp/' + filename + f'-{file_commit[:8]}.psv', 'wt') as f:
            f.write(out_header + '\n'.join(sorted(out, key=labelkey)))
        if editions:
            with open('/tmp/' + filename + f'-editions-{file_commit[:8]}.psv',
                      'wt') as f:
                f.write(editions_header +
                        '\n'.join(sorted(editions, key=edkey)))
예제 #22
0
def main():
    args = docopt(__doc__, version='0.1')
    outloc = args['--output-location']
    filename = os.path.splitext(os.path.basename(args['<file>']))[0]

    mgraph = makeGraph(filename, prefixes=uPREFIXES, writeloc=outloc)

    if args['workflow']:
        w = WorkflowMapping(args['<file>'])
        [mgraph.g.add(t) for t in w.triples]
        w.post_graph(mgraph.g)

    elif args['paper']:
        w = PaperIdMapping(args['<file>'])
        [mgraph.g.add(t) for t in w.triples]
        w.post_graph(mgraph.g)

    elif args['methods']:
        parser = etree.XMLParser(remove_blank_text=True)
        e = etree.parse(args['<file>'], parser)
        #graph = e.find(abv['graph'])
        nodes = xpath(e, '//' + abv['node'])
        edges = xpath(e, '//' + abv['edge'])
        node_dict = {}
        for node in nodes:  # slow but who cares
            id_ = node.get('id')
            #label = xpath(node, '//'+abv['NodeLabel'])[0].text
            idstr = '[@id="%s"]//' % id_
            label = xpath(e, '//' + abv['node'] + idstr +
                          abv['NodeLabel'])[0].text
            targets = []
            node_dict['FIXME:' + id_] = label, targets

        edge_dict = {}
        edge_types = set()
        for edge in edges:
            id_ = edge.get('id')
            #print(id_)
            idstr = '[@id="%s"]//' % id_
            source = 'FIXME:' + edge.get('source')
            target = 'FIXME:' + edge.get('target')
            out = xpath(edge, '//' + abv['edge'] + idstr + abv['EdgeLabel'])
            edge_type = out[0].text if out else None
            #print(edge_type)
            edge_dict[id_] = source, target, edge_replace(edge_type)
            edge_types.add(edge_type)

        for et in set(edge_to_ttl.values()):
            if et != 'SKIP':
                mgraph.add_op(et)

        for eid, (source, target, edge_type) in edge_dict.items():
            node_dict[source][1].append((edge_type, target))
            #print(source, edge_type, target)
            if edge_type == 'SKIP':
                mgraph.add_trip(source, 'rdf:type', 'owl:Class')
            elif edge_type is not None:
                mgraph.add_class(source)
                mgraph.add_class(target)
                try:
                    if edge_type == 'rdfs:subClassOf':
                        mgraph.add_trip(source, edge_type, target)
                    else:
                        mgraph.add_hierarchy(target, edge_type, source)
                except ValueError as e:
                    raise ValueError(f'{source} {edge_type} {target}') from e

            label = node_dict[source][0]

            if '(id' in label:
                label, rest = label.split('(id')
                id_, rest = rest.split(')', 1)
                mgraph.add_trip(source, 'FIXME:REPLACEID', id_)
                label = label.strip() + rest.strip()
            if '(syns' in label:
                label, rest = label.split('(syns')
                syns, rest = rest.split(')', 1)
                if ',' in syns:
                    syns = [
                        mgraph.add_trip(source, 'NIFRID:synonym', s.strip())
                        for s in syns.split(',') if s
                    ]  #FIXME
                else:
                    syns = [
                        mgraph.add_trip(source, 'NIFRID:synonym', s)
                        for s in syns.split(' ') if s
                    ]  #FIXME
                label = label.strip() + rest.strip()
            if '(note' in label:
                while '(note' in label:
                    label, rest = label.split('(note', 1)
                    note, rest = rest.split(')', 1)
                    mgraph.add_trip(source, 'rdfs:comment', note)
                    label = label.strip() + rest.strip()
            if '(def' in label:
                label, rest = label.split('(def')
                def_, rest = rest.split(')', 1)
                def_ = def_.replace('\n', ' ')
                mgraph.add_trip(source, 'NIFRID:definition', def_.strip())
                label = label.strip() + rest
            if '#FIXME' in label:
                label, note = label.split('#FIXME')
                label = label.replace('\n', '').strip()
                note = note.replace('\n', ' ').strip()
                mgraph.add_trip(source, 'rdfs:comment', note)
            if args['methods']:
                clabel = label.capitalize()
            else:
                clabel = label
            mgraph.add_trip(source, 'rdfs:label', clabel)

        Query = namedtuple('Query',
                           ['root', 'relationshipType', 'direction', 'depth'])
        json = mgraph.make_scigraph_json('rdfs:subClassOf', direct=True)
        t, te = creatTree(*Query('FIXME:n0', 'rdfs:subClassOf', 'INCOMING',
                                 20),
                          json=json)  # methods
        t, te = creatTree(*Query('FIXME:n236', 'rdfs:subClassOf', 'INCOMING',
                                 20),
                          json=json)  # techniques
        print(t)

        with open(os.path.join(outloc, filename + '.txt'), 'wt') as f:
            f.write(str(t))
        with open(os.path.join(outloc, filename + '.html'), 'wt') as f:
            f.write(te.html)

    out_graph = cull_prefixes(mgraph.g,
                              prefixes={
                                  **dict(workflow=workflow, RRIDCUR=RRIDCUR),
                                  **uPREFIXES
                              })
    out_graph.filename = mgraph.filename
    out_graph.write()
예제 #23
0
def do_deprecation(replaced_by, g, additional_edges, conflated):
    bmeta = OntMeta(
        'http://ontology.neuinfo.org/NIF/ttl/bridge/', 'uberon-bridge',
        'NIFSTD Uberon Bridge', 'UBERON Bridge',
        ('This is the bridge file that holds local NIFSTD additions to uberon. '
         'This is also staging for any changes that we want to push upstream.'
         ), TODAY())
    ontid = bmeta.path + bmeta.filename + '.ttl'
    bridge = makeGraph('uberon-bridge', PREFIXES)
    bridge.add_ont(ontid, *bmeta[2:])

    graph = makeGraph('NIF-GrossAnatomy', NIFPREFIXES, graph=g)
    #graph.g.namespace_manager._NamespaceManager__cache = {}
    #g.namespace_manager.bind('UBERON','http://purl.obolibrary.org/obo/UBERON_')  # this has to go in again because we reset g FIXME
    udone = set('NOREP')
    uedges = defaultdict(lambda: defaultdict(set))

    def inner(nifga, uberon):
        # check neuronames id TODO

        udepr = sgv.findById(
            uberon)['deprecated'] if uberon != 'NOREP' else False
        if udepr:
            # add xref to the now deprecated uberon term
            graph.add_trip(nifga, 'oboInOwl:hasDbXref', uberon)
            #print('Replacement is deprecated, not replacing:', uberon)
            graph.add_trip(
                nifga, RDFS.comment,
                'xref %s is deprecated, so not using replacedBy:' % uberon)
        else:
            # add replaced by -> uberon
            graph.add_trip(nifga, 'replacedBy:', uberon)

        # add deprecated true (ok to do twice...)
        graph.add_trip(nifga, OWL.deprecated, True)

        # review nifga relations, specifically has_proper_part, proper_part_of
        # put those relations on the uberon term in the
        # if there is no uberon term raise an error so we can look into it

        #if uberon not in uedges:
        #uedges[uberon] = defaultdict(set)
        resp = sgg.getNeighbors(nifga)
        edges = resp['edges']
        if nifga in additional_edges:
            edges.append(additional_edges[nifga])
        include = False  # set this to True when running anns
        for edge in edges:  # FIXME TODO hierarchy extraction and porting
            #print(edge)
            if udepr:  # skip everything if uberon is deprecated
                include = False
                hier = False
                break
            sub = edge['sub']
            obj = edge['obj']
            pred = edge['pred']
            hier = False
            if pred == 'subClassOf':
                pred = RDFS.subClassOf
                continue
            elif pred == 'equivalentClass':
                pred = OWL.equivalentClass
                continue
            elif pred == 'isDefinedBy':
                pred = RDFS.isDefinedBy
                continue
            elif pred == 'http://www.obofoundry.org/ro/ro.owl#has_proper_part':
                hier = True
                include = True
            elif pred == 'http://www.obofoundry.org/ro/ro.owl#proper_part_of':
                hier = True
                include = True
            elif pred == 'ilx:partOf':
                hier = True
                include = True

            if sub == nifga:
                try:
                    obj = replaced_by[obj]
                    if obj == 'NOREP':
                        hier = False
                except KeyError:
                    print('not in replaced_by', obj)
                if type(obj) == tuple: continue  # TODO
                if hier:
                    if uberon not in uedges[obj][pred]:
                        uedges[obj][pred].add(uberon)
                        bridge.add_hierarchy(obj, pred, uberon)
                else:
                    #bridge.add_trip(uberon, pred, obj)
                    pass
            elif obj == nifga:
                try:
                    sub = replaced_by[sub]
                    if sub == 'NOREP':
                        hier = False
                except KeyError:
                    print('not in replaced_by', sub)
                if type(sub) == tuple: continue  # TODO
                if hier:
                    if sub not in uedges[uberon][pred]:
                        uedges[uberon][pred].add(sub)
                        bridge.add_hierarchy(uberon, pred, sub)
                else:
                    #bridge.add_trip(sub, pred, uberon)
                    pass

        if False and uberon not in udone and include:  # skip porting annotations and labels for now
            #udone.add(uberon)
            try:
                label = sgv.findById(uberon)['labels'][0]
            except IndexError:
                WAT = sgv.findById(uberon)
                embed()
            bridge.add_class(uberon, label=label)

            # annotations to port
            for p in anns_to_port:
                os_ = list(graph.g.objects(graph.expand(nifga), p))
                for o in os_:
                    if label.lower() != o.lower(
                    ):  # we can simply capitalize labels
                        print(label.lower())
                        print(o.lower())
                        print()
                        bridge.add_trip(uberon, p, o)

                if p == SKOS.prefLabel and not os_:
                    if uberon not in conflated or (uberon in conflated
                                                   and nifga in preflabs):
                        l = list(
                            graph.g.objects(graph.expand(nifga),
                                            RDFS.label))[0]
                        bridge.add_trip(
                            uberon, SKOS.prefLabel,
                            l)  # port label to prefLabel if no prefLabel

    for nifga, uberon in replaced_by.items():
        if type(uberon) == tuple:
            print(uberon)
            for ub in uberon:
                print(ub)
                inner(nifga, ub)
        elif uberon == 'NOREP':
            graph.add_trip(nifga, OWL.deprecated,
                           True)  # TODO check for missing edges?
        elif uberon is None:
            continue  # BUT TODAY IS NOT THAT DAY!
        else:
            inner(nifga, uberon)

    return graph, bridge, uedges
예제 #24
0
def swanson():
    """ not really a parcellation scheme
        NOTE: the defining information up here is now deprecated
        it is kept around to keep the code further down happy """

    source = auth.get_path('resources') / 'swanson_aligned.txt'
    ONT_PATH = 'http://ontology.neuinfo.org/NIF/ttl/generated/'
    filename = 'swanson_hierarchies'
    ontid = ONT_PATH + filename + '.ttl'
    PREFIXES = SwansonLabels.prefixes
    new_graph = makeGraph(filename, PREFIXES, writeloc='/tmp/')
    new_graph.add_ont(ontid,
                      'Swanson brain partomies',
                      'Swanson 2014 Partonomies',
                      'This file is automatically generated from ' + source.as_posix() + '.' + '**FIXME**',
                      'now')

    # FIXME citations should really go on the ... anatomy? scheme artifact
    definingCitation = 'Swanson, Larry W. Neuroanatomical Terminology: a lexicon of classical origins and historical foundations. Oxford University Press, USA, 2014.'
    definingCitationID = 'ISBN:9780195340624'
    new_graph.add_trip(ontid, 'NIFRID:definingCitation', definingCitation)
    new_graph.add_trip(ontid, 'NIFRID:definingCitationID', definingCitationID)

    with open(source, 'rt') as f:
        lines = [l.strip() for l in f.readlines()]

    # join header on page 794
    lines[635] += ' ' + lines.pop(636)
    #fix for capitalization since this header is reused
    fixed = ' or '.join([' ('.join([n.capitalize() for n in _.split(' (')]) for _ in lines[635].lower().split(' or ')]).replace('human','HUMAN')
    lines[635] = fixed

    data = []
    for l in lines:
        if not l.startswith('#'):
            level = l.count('.'*5)
            l = l.strip('.')
            if ' (' in l:
                if ') or' in l:
                    n1, l = l.split(') or')
                    area_name, citationP =  n1.strip().split(' (')
                    citation = citationP.rstrip(')')
                    d = (level, area_name, citation, 'NEXT SYN')
                    data.append(d)
                    #print(tc.red(tc.bold(repr(d))))

                area_name, citationP =  l.strip().split(' (')
                citation = citationP.rstrip(')')
            else:
                area_name = l
                citation = None

            d = (level, area_name, citation, None)
            #print(d)
            data.append(d)
    results = Async()(deferred(sgv.findByTerm)(d[1]) for d in data)
    #results = [None] * len(data)
    curies = [[r['curie'] for r in _ if 'curie' in r and 'UBERON' in r['curie']] if _ else [] for _ in results]
    output = [_[0] if _ else None for _ in curies]

    header = ['Depth', 'Name', 'Citation', 'NextSyn', 'Uberon']
    zoop = [header] + [r for r in zip(*zip(*data), output)] + \
            [(0, 'Appendix END None', None, None, None)]  # needed to add last appendix

    # TODO annotate the appendicies and the classes with these
    appendix_root_mapping = (1, 1, 1, 1, 30, 83, 69, 70, 74, 1)  # should generate?

    class SP(rowParse):
        def __init__(self):
            self.nodes = defaultdict(dict)
            self._appendix = 0
            self.appendicies = {}
            self._last_at_level = {}
            self.names = defaultdict(set)
            self.children = defaultdict(set)
            self.parents = defaultdict(set)
            self.next_syn = False
            super().__init__(zoop)

        def Depth(self, value):
            if self.next_syn:
                self.synonym = self.next_syn
            else:
                self.synonym = False
            self.depth = value

        def Name(self, value):
            self.name = value

        def Citation(self, value):
            self.citation = value

        def NextSyn(self, value):
            if value:
                self.next_syn = self._rowind
            else:
                self.next_syn = False

        def Uberon(self, value):
            self.uberon = value

        def _row_post(self):
            # check if we are in the next appendix
            # may want to xref ids between appendicies as well...
            if self.depth == 0:
                if self.name.startswith('Appendix'):
                    if self._appendix:
                        self.appendicies[self._appendix]['children'] = dict(self.children)
                        self.appendicies[self._appendix]['parents'] = dict(self.parents)
                        self._last_at_level = {}
                        self.children = defaultdict(set)
                        self.parents = defaultdict(set)
                    _, num, apname = self.name.split(' ', 2)
                    if num == 'END':
                        return
                    self._appendix = int(num)
                    self.appendicies[self._appendix] = {
                        'name':apname.capitalize(),
                        'type':self.citation.capitalize() if self.citation else None}
                    return
                else:
                    if ' [' in self.name:
                        name, taxonB = self.name.split(' [')
                        self.name = name
                        self.appendicies[self._appendix]['taxon'] = taxonB.rstrip(']').capitalize()
                    else:  # top level is animalia
                        self.appendicies[self._appendix]['taxon'] = 'ANIMALIA'.capitalize()

                    self.name = self.name.capitalize()
                    self.citation = self.citation.capitalize()
            # nodes
            if self.synonym:
                self.nodes[self.synonym]['synonym'] = self.name
                self.nodes[self.synonym]['syn-cite'] = self.citation
                self.nodes[self.synonym]['syn-uberon'] = self.uberon
                return
            else:
                if self.citation:  # Transverse Longitudinal etc all @ lvl4
                    self.names[self.name + ' ' + self.citation].add(self._rowind)
                else:
                    self.name += str(self._appendix) + self.nodes[self._last_at_level[self.depth - 1]]['label']
                    #print(level, self.name)
                    # can't return here because they are their own level
                # replace with actually doing something...
                self.nodes[self._rowind]['label'] = self.name
                self.nodes[self._rowind]['citation'] = self.citation
                self.nodes[self._rowind]['uberon'] = self.uberon
            # edges
            self._last_at_level[self.depth] = self._rowind
            # TODO will need something to deal with the Lateral/
            if self.depth > 0:
                try:
                    parent = self._last_at_level[self.depth - 1]
                except:
                    breakpoint()
                self.children[parent].add(self._rowind)
                self.parents[self._rowind].add(parent)

        def _end(self):
            replace = {}
            for asdf in [sorted(n) for k,n in self.names.items() if len(n) > 1]:
                replace_with, to_replace = asdf[0], asdf[1:]
                for r in to_replace:
                    replace[r] = replace_with

            for r, rw in replace.items():
                #print(self.nodes[rw])
                o = self.nodes.pop(r)
                #print(o)

            for vals in self.appendicies.values():
                children = vals['children']
                parents = vals['parents']
                # need reversed so children are corrected before swap
                for r, rw in reversed(sorted(replace.items())):
                    if r in parents:
                        child = r
                        new_child = rw
                        parent = parents.pop(child)
                        parents[new_child] = parent
                        parent = list(parent)[0]
                        children[parent].remove(child)
                        children[parent].add(new_child)
                    if r in children:
                        parent = r
                        new_parent = rw
                        childs = children.pop(parent)
                        children[new_parent] = childs
                        for child in childs:
                            parents[child] = {new_parent}

            self.nodes = dict(self.nodes)

    sp = SP()
    tp = [_ for _ in sorted(['{: <50}'.format(n['label']) + n['uberon'] if n['uberon'] else n['label'] for n in sp.nodes.values()])]
    #print('\n'.join(tp))
    #print(sp.appendicies[1].keys())
    #print(sp.nodes[1].keys())
    nbase = PREFIXES['SWAN'] + '%s'
    json_ = {'nodes':[],'edges':[]}
    parent = ilxtr.swansonBrainRegionConcept

    og = OntGraph()
    for node, anns in sp.nodes.items():
        nid = nbase % node
        new_graph.add_class(nid, parent, label=anns['label'])
        new_graph.add_trip(nid, 'NIFRID:definingCitation', anns['citation'])
        json_['nodes'].append({'lbl':anns['label'],'id':'SWA:' + str(node)})
        #if anns['uberon']:
            #new_graph.add_trip(nid, owl.equivalentClass, anns['uberon'])  # issues arrise here...
        [og.add(t) for t in map_term(rdflib.URIRef(nid), anns['label'], prefix='UBERON')]

    og.write(auth.get_path('ontology-local-repo') /
             'ttl/generated/swanson-uberon-mapping.ttl')
    #hrm = [(anns['label'], gn(anns['label'])) for node, anns in sp.nodes.items()]
    #ok = [(h, test, term_source(h, test)) for h, test in hrm if test]
    #notok = [h for h, test in hrm if not test]

    for appendix, data in sp.appendicies.items():
        aid = PREFIXES['SWAA'] + str(appendix)
        new_graph.add_class(aid, label=data['name'].capitalize())
        new_graph.add_trip(aid, 'ilxtr:hasTaxonRank', data['taxon'])  # FIXME appendix is the data artifact...
        children = data['children']
        ahp = 'swanr:hasPart' + str(appendix)
        apo = 'swanr:partOf' + str(appendix)
        new_graph.add_op(ahp, transitive=True)
        new_graph.add_op(apo, inverse=ahp, transitive=True)
        for parent, childs in children.items():  # FIXME does this give complete coverage?
            pid = nbase % parent
            for child in childs:
                cid = nbase % child
                new_graph.add_restriction(pid, ahp, cid)  # note hierarhcy inverts direction
                new_graph.add_restriction(cid, apo, pid)
                json_['edges'].append({'sub':'SWA:' + str(child),'pred':apo,'obj':'SWA:' + str(parent)})

    return new_graph
예제 #25
0
def clean_hbp_cell():
    #old graph
    g = rdflib.Graph()
    if __name__ == '__main__':
        embed()
    path = Path(devconfig.git_local_base,
                'methodsOntology/ttl/hbp_cell_ontology.ttl')
    if not path.exists():
        raise devconfig.MissingRepoError(f'repo for {path} does not exist')

    g.parse(path.as_posix(), format='turtle')
    g.remove((None, rdflib.OWL.imports, None))
    g.remove((None, rdflib.RDF.type, rdflib.OWL.Ontology))

    #new graph
    NAME = 'NIF-Neuron-HBP-cell-import'
    mg = makeGraph(NAME, prefixes=PREFIXES)
    ontid = 'http://ontology.neuinfo.org/NIF/ttl/generated/' + NAME + '.ttl'
    mg.add_trip(ontid, rdflib.RDF.type, rdflib.OWL.Ontology)
    mg.add_trip(ontid, rdflib.RDFS.label, 'NIF Neuron HBP cell import')
    mg.add_trip(ontid, rdflib.RDFS.comment, 'this file was automatically using pyontutils/hbp_cells.py')
    mg.add_trip(ontid, rdflib.OWL.versionInfo, date.isoformat(date.today()))
    newgraph = mg.g

    skip = {
        '0000000':'SAO:1813327414',  # cell
        #'0000001':NEURON,  # neuron  (equiv)
        #'0000002':'SAO:313023570',  # glia  (equiv)
        #'0000021':'NLXNEURNT:090804',  # glut  (equiv, but phen)
        #'0000022':'NLXNEURNT:090803',  # gaba  (equiv, but phen)

        '0000003':NEURON,
        '0000004':NEURON,
        '0000005':NEURON,
        '0000006':NEURON,
        '0000007':NEURON,
        '0000008':NEURON,
        '0000009':NEURON,
        '0000010':NEURON,
        '0000019':NEURON,
        '0000020':NEURON,
        '0000033':NEURON,
        '0000034':NEURON,
        '0000070':NEURON,
        '0000071':NEURON,
    }
    to_phenotype = {
        '0000021':('ilx:hasExpressionPhenotype', 'SAO:1744435799'),  # glut, all classes that might be here are equived out
        '0000022':('ilx:hasExperssionPhenotype', 'SAO:229636300'),  # gaba
    }
    lookup = {'NIFCELL', 'NIFNEURNT'}
    missing_supers = {
        'HBP_CELL:0000136',
        'HBP_CELL:0000137',
        'HBP_CELL:0000140',
    }

    replace = set()
    phen = set()
    equiv = {}
    for triple in sorted(g.triples((None, None, None))):
        id_suffix = newgraph.namespace_manager.compute_qname(triple[0].toPython())[2]
        try:
            obj_suffix = newgraph.namespace_manager.compute_qname(triple[2].toPython())[2]
        except:  # it wasn't a url
            pass
        # equiv insert for help
        if triple[1] == rdflib.OWL.equivalentClass and id_suffix not in skip and id_suffix not in to_phenotype:
            qnt = newgraph.namespace_manager.compute_qname(triple[2].toPython())
            #print(qnt)
            if qnt[0] in lookup:
                try:
                    lab = v.findById(qnt[0] + ':' + qnt[2])['labels'][0]
                    print('REMOTE', qnt[0] + ':' + qnt[2], lab)
                    #mg.add_trip(triple[2], rdflib.RDFS.label, lab)
                    #mg.add_trip(triple[0], PREFIXES['NIFRID'] + 'synonym', lab)  # so we can see it
                except TypeError:
                    if qnt[2].startswith('nlx'):
                        triple = (triple[0], triple[1], expand('NIFSTD:' + qnt[2]))
                    #print('bad identifier')

        #check for equiv
        if triple[0] not in equiv:
            eq = [o for o in g.objects(triple[0], rdflib.OWL.equivalentClass)]
            if eq and id_suffix not in skip and id_suffix not in to_phenotype:
                if len(eq) > 1:
                    print(eq)
                equiv[triple[0]] = eq[0]
                continue
        elif triple[0] in equiv:
            continue

        # edge replace
        if triple[1].toPython() == 'http://www.FIXME.org/nsupper#synonym':
            edge =  mg.expand('NIFRID:abbrev')
        elif triple[1].toPython() == 'http://www.FIXME.org/nsupper#definition':
            edge = rdflib.namespace.SKOS.definition
        else:
            edge = triple[1]

        # skip or to phenotype or equiv
        if id_suffix in skip:  # have to make a manual edit to rdflib to include 'Nd' in allowed 1st chars
            replace.add(triple[0])
            #print('MEEP MEEP')
        elif id_suffix in to_phenotype:  # have to make a manual edit to rdflib to include 'Nd' in allowed 1st chars
            phen.add(triple[0])
        elif triple[1] == rdflib.RDFS.label:  # fix labels
            if not triple[2].startswith('Hippocampus'):
                new_label = rdflib.Literal('Neocortex ' + triple[2], lang='en')
                newgraph.add((triple[0], edge, new_label))
            else:
                newgraph.add((triple[0], edge, triple[2]))
        elif triple[2] in replace:
            mg.add_trip(triple[0], edge, skip[obj_suffix])
        elif triple[2] in phen:
            edge_, rst_on = to_phenotype[obj_suffix]
            edge_ = expand(edge_)
            rst_on = expand(rst_on)

            this = triple[0]
            this = infixowl.Class(this, graph=newgraph)
            this.subClassOf = [expand(NEURON)] + [c for c in this.subClassOf]

            restriction = infixowl.Restriction(edge_, graph=newgraph, someValuesFrom=rst_on)
            this.subClassOf = [restriction] + [c for c in this.subClassOf]
        elif triple[2] in equiv:
            newgraph.add((triple[0], edge, equiv[triple[2]]))
        else:
            newgraph.add((triple[0], edge, triple[2]))

    # final cleanup for forward references (since we iterate through sorted)

    tt = rdflib.URIRef(expand('HBP_CELL:0000033'))
    tf = rdflib.URIRef(expand('HBP_CELL:0000034'))
    newgraph.remove((None, None, tt))
    newgraph.remove((None, None, tf))

    # add missing subClasses
    for nosub in missing_supers:
        mg.add_trip(nosub, rdflib.RDFS.subClassOf, NEURON)

    # cleanup for subClassOf
    for subject in sorted(newgraph.subjects(rdflib.RDFS.subClassOf, expand(NEURON))):
        sco = [a for a in newgraph.triples((subject, rdflib.RDFS.subClassOf, None))]
        #print('U WOT M8')
        if len(sco) > 1:
            #print('#############\n', sco)
            for s, p, o in sco:
                if 'hbp_cell_ontology' in o or 'NIF-Cell' in o and o != expand(NEURON): #or 'sao2128417084' in o:  # neocortex pyramidal cell
                    #print(sco)
                    newgraph.remove((subject, rdflib.RDFS.subClassOf, expand(NEURON)))
                    break

    # do ilx
    ilx_start = ilx_get_start()
    #ilx_conv_mem = memoize('hbp_cell_interlex.json')(ilx_conv)  # FIXME NOPE, also need to modify the graph :/
    ilx_labels, ilx_replace = ilx_conv(graph=newgraph, prefix='HBP_CELL', ilx_start=ilx_start)
    ilx_add_ids(ilx_labels)

    replace_map = ilx_replace
    for hbp, rep in skip.items():
        ori = 'HBP_CELL:'+hbp
        if ori in replace_map: raise KeyError('identifier already in!??! %s' % ori)
        replace_map[ori] = rep
    for hbp, (e, rep) in to_phenotype.items():
        ori = 'HBP_CELL:'+hbp
        if ori in replace_map: raise KeyError('identifier already in!??! %s' % ori)
        replace_map[ori] = edge, rep
    for hbp_iri, rep_iri in equiv.items():
        hbp = newgraph.compute_qname(hbp_iri)[2]
        rep = newgraph.qname(rep_iri)
        ori = 'HBP_CELL:'+hbp
        if ori in replace_map: raise KeyError('identifier already in!??! %s' % ori)
        replace_map[ori] = rep

    return mg, replace_map
예제 #26
0
def g(filename):
    return makeGraph('', graph=rdflib.Graph().parse(filename, format='turtle'))
예제 #27
0
def main():
    # load in our existing graph
    # note: while it would be nice to allow specification of phenotypes to be decoupled
    # from insertion into the graph... maybe we could enable this, but it definitely seems
    # to break a number of nice features... and we would need the phenotype graph anyway
    Config('temporary-graph')
    EXISTING_GRAPH = graphBase.in_graph
    #EXISTING_GRAPH = rdflib.Graph()
    #graphBase.in_graph = EXISTING_GRAPH
    #graphBase.core_graph = EXISTING_GRAPH
    local_prefix = auth.get_path('ontology-local-repo') / 'ttl'
    sources = (f'{local_prefix}/NIF-Neuron-Defined.ttl',
               f'{local_prefix}/NIF-Neuron.ttl',
               f'{local_prefix}/NIF-Neuron-Phenotype.ttl',
               f'{local_prefix}/phenotype-core.ttl',
               f'{local_prefix}/phenotypes.ttl',
               f'{local_prefix}/hbp-special.ttl')
    for file in sources:
        EXISTING_GRAPH.parse(file, format='turtle')
    #EXISTING_GRAPH.namespace_manager.bind('PR', makePrefixes('PR')['PR'])

    #graphBase.core_graph = EXISTING_GRAPH
    #graphBase.out_graph = rdflib.Graph()
    graphBase.__import_name__ = 'neurondm.lang'

    proot = graphBase.core_graph.qname(PHENO_ROOT)
    mroot = graphBase.core_graph.qname(MOD_ROOT)
    graphBase._predicates, _psupers = getPhenotypePredicates(
        EXISTING_GRAPH, proot, mroot)

    g = makeGraph('merged',
                  prefixes={k: str(v)
                            for k, v in EXISTING_GRAPH.namespaces()},
                  graph=EXISTING_GRAPH)
    reg_neurons = list(g.g.subjects(rdfs.subClassOf, _NEURON_CLASS))
    tc_neurons = [
        _ for (_, ) in
        g.g.query('SELECT DISTINCT ?match WHERE {?match rdfs:subClassOf+ %s}' %
                  g.g.qname(_NEURON_CLASS))
    ]
    def_neurons = g.get_equiv_inter(_NEURON_CLASS)

    nodef = sorted(set(tc_neurons) - set(def_neurons))
    og1 = MeasuredNeuron.out_graph = rdflib.Graph(
    )  # there is only 1 out_graph at a time, load and switch

    mns = [MeasuredNeuron(id_=n) for n in nodef]
    mnsp = [n for n in mns if n.pes]
    graphBase.out_graph = rdflib.Graph(
    )  # XXX NEVER DO THIS IT IS EVIL ZALGO WILL EAT YOUR FACE
    graphBase.ng.g = graphBase.out_graph
    # and he did, had to swtich to graphBase for exactly this reason >_<
    dns = [Neuron(id_=n) for n in sorted(def_neurons)]
    #dns += [Neuron(*m.pes) if m.pes else m.id_ for m in mns]
    dns += [Neuron(*m.pes) for m in mns if m.pes]

    # reset everything for export
    config = Config('phenotype-direct', source_file=relative_path(__file__))
    #Neuron.out_graph = graphBase.out_graph  # each subclass of graphBase has a distinct out graph IF it was set manually
    #Neuron.out_graph = rdflib.Graph()
    #ng = makeGraph('', prefixes={}, graph=Neuron.out_graph)
    #ng.filename = Neuron.ng.filename
    Neuron.mro()[1].existing_pes = {
    }  # wow, new adventures in evil python patterns mro()[1]
    dns = [Neuron(*d.pes) for d in set(dns)
           ]  # TODO remove the set and use this to test existing bags?
    #from neurons.lang import WRITEPYTHON
    #WRITEPYTHON(sorted(dns))
    #ng.add_ont(TEMP['defined-neurons'], 'Defined Neurons', 'NIFDEFNEU',
    #'VERY EXPERIMENTAL', '0.0.0.1a')
    #ng.add_trip(TEMP['defined-neurons'], owl.imports, rdflib.URIRef('file:///home/tom/git/NIF-Ontology/ttl/phenotype-core.ttl'))
    #ng.add_trip(TEMP['defined-neurons'], owl.imports, rdflib.URIRef('file:///home/tom/git/NIF-Ontology/ttl/phenotypes.ttl'))
    #ng.write()
    ontinfo = (
        (Neuron.ng.ontid, rdf.type, owl.Ontology),
        (Neuron.ng.ontid, rdfs.label,
         rdflib.Literal('phenotype direct neurons')),
        (Neuron.ng.ontid, rdfs.comment,
         rdflib.Literal('Neurons derived directly from phenotype definitions')
         ),
    )
    [Neuron.out_graph.add(t) for t in ontinfo]
    Neuron.write()
    Neuron.write_python()
    bads = [
        n for n in Neuron.ng.g.subjects(rdf.type, owl.Class)
        if len(list(Neuron.ng.g.predicate_objects(n))) == 1
    ]
    if __name__ == '__main__':
        breakpoint()

    return config
예제 #28
0
def parse_workflow():
    # FIXME TODO these states should probably be compiled down to numbers???
    docs = Path(__file__).parent.absolute().resolve().parent / 'docs'
    rridpath = docs / 'workflow-rrid.graphml'
    paperpath = docs / 'workflow-paper-id.graphml'

    cgraph = ConjunctiveGraph()
    gt.WorkflowMapping(rridpath.as_posix()).graph(cgraph)
    gt.PaperIdMapping(paperpath.as_posix(), False).graph(cgraph)
    write(cgraph, '/tmp/workflow.ttl')
    predicates = set(cgraph.predicates())
    OntCuries({cp:str(ip) for cp, ip in cgraph.namespaces()})
    OntCuries({'RRID': 'https://scicrunch.org/resolver/RRID:',
               'DOI': 'https://doi.org/',
               'PMID': 'https://www.ncbi.nlm.nih.gov/pubmed/'})
    hg = makeGraph('', graph=cgraph)
    short = sorted(hg.qname(_) for _ in predicates)

    wf.hasTag
    wf.hasReplyTag
    wf.hasTagOrReplyTag
    wf.hasOutputTag

    #if type isa wf.tag

    tag_types = set(cgraph.transitive_subjects(rdfs.subClassOf, wf.tag))
    tag_tokens = {tagType:sorted(set(t for t in cgraph.transitive_subjects(rdf.type, tagType)
                                     if t != tagType))
                  for tagType in tag_types}
    has_tag_types = set(cgraph.transitive_subjects(rdfs.subPropertyOf, wf.hasTagOrReplyTag))
    has_tag_types.add(wf.hasOutputTag)
    has_next_action_types = set(cgraph.transitive_subjects(rdfs.subPropertyOf, wf.hasOutput))
    has_next_action_types.add(wf.hasNextStep)

    terminals = sorted(tag
                       for ttype in tag_types
                       if ttype != wf.tagScibot  # scibot is not 'terminal' for this part
                       for tag in cgraph[:rdf.type:ttype]
                       if not isinstance(tag, BNode)
                       and not any(o for httype in has_tag_types
                                   for o in cgraph[tag:httype]))

    endpoints = sorted(endpoint
                       for endpoint in cgraph[:rdf.type:wf.state]
                       if not isinstance(endpoint, BNode)
                       and not any(o for hnatype in has_next_action_types
                                   for o in cgraph[endpoint:hnatype]))

    complicated = sorted(a_given_tag
                 for tt in tag_types
                 for a_given_tag in cgraph[:rdf.type:tt]
                 if not isinstance(a_given_tag, BNode)
                         and not [successor_tag
                          for htt in has_tag_types
                          for successor_tag in chain(t
                                                     for t in cgraph[a_given_tag:htt]
                                                     #if not isinstance(t, BNode)
                                        ,
                                                     # we don't actually need this for terminals
                                                     # we will need it later
                                                     #(t for b in cgraph[a_given_tag:htt]
                                                     #if isinstance(b, BNode)
                                                     #for listhead in cgraph[b:owl.oneOf]
                                                     #for t in unlist(listhead, cgraph)),
                         )])

    def topList(node, g):
        for s in g[:rdf.rest:node]:
            yield s

    def getLists(node, g):
        for linker in g[:rdf.first:node]:
            top = None
            for top in g.transitiveClosure(topList, linker):
                pass

            if top:
                yield top
            else:
                yield linker

    def getIsTagOf(node, g):
        for htt in has_tag_types:
            for parent_tag in g[:htt:node]:
                yield parent_tag

    def getIsOneOfTagOf(node, g):
        for list_top in getLists(node, g):
            for linker in g[:owl.oneOf:list_top]:
                for parent_tag, _ in g[::linker]:
                    yield parent_tag

    def getPreviousTag(node, g):  # not quite what we need
        yield from getIsOneOfTagOf(node, g)
        yield from getIsTagOf(node, g)

    def getTagChains(node, g, seen=tuple()):
        # seen to prevent recursion cases where
        # taggning can occur in either order e.g. PMID -> DOI
        #print(tc.red(repr(OntId(node))))  # tc.red(OntId(node)) does weird stuff O_o
        parent_tag = None
        for parent_tag in chain(getIsOneOfTagOf(node, g),
                                getIsTagOf(node, g)):
            if parent_tag in seen:
                parent_tag = None
                continue
            ptt = next(g[parent_tag:rdf.type])
            #if ptt in tag_types:
            for pchain in getTagChains(parent_tag, g, seen + (node,)):
                if ptt in tag_types:
                    out = parent_tag, *pchain
                else:
                    out = pchain
                yield out

            if not ptt and not out:
                parent_tag = None

        if not parent_tag:
            yield tuple()

    def getInitiatesAction(node, g):
        for action in g[:wf.initiatesAction:node]:
            yield action

    def getIsOneOfOutputOf(node, g):
        for list_top in getLists(node, g):
            for linker in g[:owl.oneOf:list_top]:
                for hot in has_next_action_types:
                    for parent_thing  in g[:hot:linker]:
                        yield parent_thing

    def getActionChains(node, g):
        parent_action = None
        for parent_action in chain(getIsOneOfOutputOf(node, g),  # works for actions too
                                   getInitiatesAction(node, g)):
            for pchain in getActionChains(parent_action, g):  # NOTE may also be a tag...
                out = parent_action, *pchain
                #print(tuple(hg.qname(o) for o in out))
                yield out

        if not parent_action:
            yield tuple()

    def getRestSubjects(predicate, object, g):
        """ invert restriction """
        rsco = cmb.Restriction(rdfs.subClassOf)
        for rt in rsco.parse(graph=g):
            if rt.p == predicate and rt.o == object:
                yield from g.transitive_subjects(rdfs.subClassOf, rt.s)

    annoParts = list(getRestSubjects(wf.isAttachedTo, wf.annotation, cgraph))
    partInstances = {OntId(a):set(t if isinstance(t, BNode) else OntId(t)
                                  for t in cgraph.transitive_subjects(rdf.type, a)
                                  if not isinstance(t, BNode) and t != a)
                     for a in annoParts}

    _endpoint_chains = {OntId(endpoint):[[OntId(endpoint)] + [OntId(e) for e in chain]
                                            for chain in getActionChains(endpoint, cgraph)]
                        for endpoint in endpoints}

    #print([hg.qname(e) for e in endpoints])
    #print([print([hg.qname(c) for c in getActionChains(endpoint, cgraph) if c])
           #for endpoint in endpoints
           #if endpoint])

    #_ = [print(list(getActionChains(e, cgraph)) for e in endpoints)]
    #return

    wat = cgraph.transitiveClosure(getPreviousTag, RRIDCUR.Duplicate)
    wat = list(wat)
    #def invOneOf(tag, g):

    fake_chains = {hg.qname(terminal):
                   [hg.qname(c)
                    for c in cgraph.transitiveClosure(getPreviousTag, terminal)]
                   for terminal in terminals}

    def make_chains(things, getChains):
        return {OntId(thing):[[OntId(thing)] + [OntId(e) for e in chain]
                              for chain in getChains(thing, cgraph)]
                for thing in things
                #if not print(thing)
        }

    def print_chains(thing_chains):
        print('\nstart from beginning')

        print('\n'.join(sorted(' -> '.join(hg.qname(e) for e in reversed(chain))
                               for chains in thing_chains.values()
                               for chain in chains)))

        print('\nstart from end')

        print('\n'.join(sorted(' <- '.join(e.curie for e in chain)
                               for chains in thing_chains.values()
                               for chain in chains)))

    def valid_tagsets(all_chains):
        # not the most efficient way to do this ...
        transitions = defaultdict(set)
        for end, chains in all_chains.items():
            for chain in chains:
                valid = set()
                prior_state = None
                for element in reversed(chain):
                    valid.add(element)
                    state = frozenset(valid)
                    transitions[prior_state].add(state)
                    prior_state = state

        return {s:frozenset(n) for s, n in transitions.items()}

    endpoint_chains = make_chains(endpoints, getActionChains)
    #endpoint_transitions = valid_transitions(endpoint_chains)  # not the right structure
    print_chains(endpoint_chains)
    terminal_chains = make_chains(terminals, getTagChains)
    print_chains(terminal_chains)
    tag_transitions = valid_tagsets(terminal_chains)
    terminal_tags_to_endpoints =  'TODO'

    def printq(*things):
        print(*(OntId(t).curie for t in things))

    from pprint import pprint
    def get_linkers(s, o, g, linkerFunc):  # FIXME not right
        for p in g[s::o]:
            yield p

        for l in linkerFunc(o, g):
            #print(tc.blue(f'{OntId(s).curie} {l if isinstance(l, BNode) else OntId(l).curie}'))
            for p in g[s::l]:
                #print(tc.red(f'{s} {l} {o} {p}'))
                yield p
        return 
        linkers = set(l for l in g.transitiveClosure(linkerFunc, o))
        for p, o in g[s::]:
            if o in linkers:
                yield p

    def edge_to_symbol(p, rev=False):
        if p == wf.initiatesAction:
            return '<<' if rev else '>>'
        elif p == wf.hasReplyTag:
            return '<' if rev else '>'
        elif p == wf.hasTagOrReplyTag:
            return '<=' if rev else '=>'
        elif p == wf.hasOutputTag:
            return '-<-' if rev else '->-'
        else:
            return '<??' if rev else '??>'

    def chain_to_typed_chain(chain, g, func):
        # duh...
        #pprint(chain)
        for s, o in zip(chain, chain[1:]):
            # TODO deal with reversed case
            s, o = s.u, o.u
            p = None
            #print(s, o)
            printq(s, o)
            for p in get_linkers(s, o, g, func):
                #print(tc.yellow(p))
                #yield (s, edge_to_symbol(p), o)
                yield from (s, edge_to_symbol(p), o)

            if not p:
                for rp in get_linkers(o, s, g, func):
                    print(tc.blue(rp))
                    yield from (s, edge_to_symbol(rp, rev=True), o)

    def tchains(thing_chains, func):
        return sorted([OntId(e).curie if isinstance(e, URIRef) else e
                       for e in chain_to_typed_chain(list(reversed(chain)), cgraph, func)]
                      for chains in thing_chains.values()
                      for chain in chains)

    def getLinkers(node, g):
        for list_top in getLists(node, g):
            for linker in g[:owl.oneOf:list_top]:
                yield linker

    def allSubjects(object, graph):
        yield from (s for s, p in graph[::object])
        yield from getLinkers(object, graph)

    print()
    ttc = tchains(terminal_chains, allSubjects)
    tec = tchains(endpoint_chains, allSubjects)
    pprint(ttc)
    pprint(tec)

    valid_tagsets = frozenset((t for s in tag_transitions.values() for t in s))
    tts = valid_tagsets - frozenset(tag_transitions)
    endtype = 'TODO'  # 
    tt = {}
    for endtype, chains  in endpoint_chains.items():
        for *_chain, tag in chains:
            if _chain:
                next_thing = _chain[-1]
            for ets in tts:
                if tag in ets:
                    tt[ets] = next_thing

    terminal_tagsets = tt

    #[print(wat) for wat in terminal_chains.values()]
    #pprint(terminal_chains)
    return tag_types, tag_tokens, partInstances, valid_tagsets, terminal_tagsets, tag_transitions
예제 #29
0
def backend_refactor_values():
    uri_reps_lit = {
        # from https://github.com/information-artifact-ontology/IAO/blob/master/docs/BFO%201.1%20to%202.0%20conversion/mapping.txt
        'http://www.ifomis.org/bfo/1.1#Entity': 'BFO:0000001',
        'BFO1SNAP:Continuant': 'BFO:0000002',
        'BFO1SNAP:Disposition': 'BFO:0000016',
        'BFO1SNAP:Function': 'BFO:0000034',
        'BFO1SNAP:GenericallyDependentContinuant': 'BFO:0000031',
        'BFO1SNAP:IndependentContinuant': 'BFO:0000004',
        'BFO1SNAP:MaterialEntity': 'BFO:0000040',
        'BFO1SNAP:Quality': 'BFO:0000019',
        'BFO1SNAP:RealizableEntity': 'BFO:0000017',
        'BFO1SNAP:Role': 'BFO:0000023',
        'BFO1SNAP:Site': 'BFO:0000029',
        'BFO1SNAP:SpecificallyDependentContinuant': 'BFO:0000020',
        'BFO1SPAN:Occurrent': 'BFO:0000003',
        'BFO1SPAN:ProcessualEntity': 'BFO:0000015',
        'BFO1SPAN:Process': 'BFO:0000015',
        'BFO1SNAP:ZeroDimensionalRegion': 'BFO:0000018',
        'BFO1SNAP:OneDimensionalRegion': 'BFO:0000026',
        'BFO1SNAP:TwoDimensionalRegion': 'BFO:0000009',
        'BFO1SNAP:ThreeDimensionalRegion': 'BFO:0000028',
        'http://purl.org/obo/owl/OBO_REL#bearer_of': 'RO:0000053',
        'http://purl.org/obo/owl/OBO_REL#inheres_in': 'RO:0000052',
        'ro:has_part': 'BFO:0000051',
        'ro:part_of': 'BFO:0000050',
        'ro:has_participant': 'RO:0000057',
        'ro:participates_in': 'RO:0000056',
        'http://purl.obolibrary.org/obo/OBI_0000294': 'RO:0000059',
        'http://purl.obolibrary.org/obo/OBI_0000297': 'RO:0000058',
        'http://purl.obolibrary.org/obo/OBI_0000300': 'BFO:0000054',
        'http://purl.obolibrary.org/obo/OBI_0000308': 'BFO:0000055',

        # more bfo
        'BFO1SNAP:SpatialRegion': 'BFO:0000006',
        'BFO1SNAP:FiatObjectPart': 'BFO:0000024',
        'BFO1SNAP:ObjectAggregate': 'BFO:0000027',
        'BFO1SNAP:Object': 'BFO:0000030',
        #'BFO1SNAP:ObjectBoundary'  # no direct replacement, only occurs in unused
        #'BFO1SPAN:ProcessAggregate'  # was not replaced, could simply be a process itself??
        #'BFO1SNAP:DependentContinuant'  # was not replaced

        # other
        #'ro:participates_in'  # above
        #'ro:has_participant'  # above
        #'ro:has_part',  # above
        #'ro:part_of',  # above
        #'ro:precedes'  # unused and only in inferred
        #'ro:preceded_by'  # unused and only in inferred
        #'ro:transformation_of'  # unused and only in inferred
        #'ro:transformed_into'  # unused and only in inferred
        'http://purl.org/obo/owl/obo#inheres_in': 'RO:0000052',
        'http://purl.obolibrary.org/obo/obo#towards': 'RO:0002503',
        'http://purl.org/obo/owl/pato#towards': 'RO:0002503',
        'http://purl.obolibrary.org/obo/pato#inheres_in': 'RO:0000052',
        'BIRNLEX:17': 'RO:0000053',  # is_bearer_of
        'http://purl.obolibrary.org/obo/pato#towards': 'RO:0002503',
        'ro:adjacent_to': 'RO:0002220',
        'ro:derives_from': 'RO:0001000',
        'ro:derives_into': 'RO:0001001',
        'ro:agent_in': 'RO:0002217',
        'ro:has_agent': 'RO:0002218',
        'ro:contained_in': 'RO:0001018',
        'ro:contains': 'RO:0001019',
        'ro:located_in': 'RO:0001025',
        'ro:location_of': 'RO:0001015',
        'ro:has_proper_part': 'NIFRID:has_proper_part',
        'ro:proper_part_of':
        'NIFRID:proper_part_of',  # part of where things are not part of themsevles need to review
    }
    ug = makeGraph('',
                   prefixes=makePrefixes('ro', 'RO', 'BIRNLEX', 'NIFRID',
                                         'BFO', 'BFO1SNAP', 'BFO1SPAN'))
    ureps = {
        ug.check_thing(k): ug.check_thing(v)
        for k, v in uri_reps_lit.items()
    }

    return ureps
예제 #30
0
 def loadData(cls):
     ug = makeGraph('utilgraph', prefixes=uPREFIXES)
     with open(cls.source, 'rt') as f:
         ids_raw = set(_.strip() for _ in f.readlines())
         ids = set(ug.expand(_.strip()).toPython() for _ in ids_raw)
         return ids_raw, ids