Esempio n. 1
0
def normalize_prefixes(graph, curies):
    mg = makeGraph('nifall',
                   makePrefixes('owl', 'skos', 'oboInOwl'),
                   graph=graph)
    mg.del_namespace('')

    old_namespaces = list(graph.namespaces())
    ng_ = makeGraph('', prefixes=makePrefixes('oboInOwl', 'skos'))
    [ng_.g.add(t) for t in mg.g]
    [ng_.add_namespace(n, p) for n, p in curies.items() if n != '']
    #[mg.add_namespace(n, p) for n, p in old_namespaces if n.startswith('ns') or n.startswith('default')]
    #[mg.del_namespace(n) for n in list(mg.namespaces)]
    #graph.namespace_manager.reset()
    #[mg.add_namespace(n, p) for n, p in wat.items() if n != '']
    return mg, ng_
Esempio n. 2
0
class ksDefs(Ont):
    """ Definitions sourced from knowledge space descriptions. """
    filename = 'ksdesc-defs'
    name = 'Knolwedge Space Defs'
    shortname = 'ksdefs'
    sources = ksDefsSource,
    prefixes = makePrefixes('SCR', 'MBA', 'UBERON', 'PR',
                            #'NIFMOL', 'NIFCELL', 'NIFGA', 'NIFNEURMOR',
                            'NLXMOL', 'SAO', 'NLXCELL', 'NIFEXT', 'BIRNLEX')
    def _triples(self):
        skipped_prefixes = set()
        for putative_dir in top_level:
            if os.path.isdir(putative_dir):
                for putative_md in glob(putative_dir + '/*.md'):
                    prefix = os.path.split(putative_dir)[-1]
                    if prefix in self.prefixes:
                        ident = prefix + ':' + os.path.splitext(os.path.split(putative_md)[-1])[0]
                        id_ = self._graph.expand(ident)
                        with open(putative_md, 'rt') as f:
                            def_ = f.read()

                        for test in ('Description', 'Definition' ):
                            if test in def_:
                                def_ = def_.split(test, 1)[-1].strip().strip('=').strip()
                                break

                        yield id_, skos.definition, Literal(def_)
                    else:
                        skipped_prefixes.add(prefix)
        print(sorted(skipped_prefixes))
Esempio n. 3
0
def ilx_json_to_tripples(
    j
):  # this will be much eaiser if everything can be exported as a relationship or an anotation
    g = makeGraph('do not write me',
                  prefixes=makePrefixes('ILX', 'ilx', 'owl', 'skos', 'NIFRID'))

    def pref(inp):
        return makePrefixes('ilx')['ilx'] + inp

    id_ = pref(j['ilx'])
    type_ = {
        'term': 'owl:Class',
        'relationship': 'owl:ObjectProperty',
        'annotation': 'owl:AnnotationProperty'
    }[j['type']]
    out = []  # TODO need to expand these
    out.append((id_, rdflib.RDF.type, type_))
    out.append((id_, rdflib.RDFS.label, j['label']))
    out.append((id_, 'skos:definition', j['definition']))
    for syndict in j['synonyms']:
        out.append((id_, 'NIFRID:synonym', syndict['literal']))
    for superdict in j[
            'superclasses']:  # should we be returning the preferred id here not the ilx? or maybe that is a different json output?
        out.append((id_, rdflib.RDFS.subClassOf, pref(superdict['ilx'])))
    for eid in j['existing_ids']:
        out.append((id_, 'ilx:someOtherId', eid['iri']))  # predicate TODO
    [g.add_trip(*o) for o in out]
    return g.g.serialize(format='nifttl')  # other formats can be choosen
Esempio n. 4
0
def load(file, olr=None, mkdir=False):
    filepath = os.path.expanduser(file)
    _, ext = os.path.splitext(filepath)
    filetype = ext.strip('.')
    if filetype == 'ttl':
        infmt = 'turtle'
    else:
        infmt = None
    print(filepath)
    graph = rdflib.Graph()
    try:
        graph.parse(filepath, format=infmt)
    except rdflib.plugins.parsers.notation3.BadSyntax as e:
        print('PARSING FAILED', filepath)
        raise e
    og = makeGraph('', graph=graph)

    # FIXME this should really just be a function :/
    curie, *prefs = kludge(filepath)

    name = os.path.splitext(os.path.basename(filepath))[0]
    if 'slim' in name:
        name = name.replace('slim', '')
    try:
        version = list(graph.subject_objects(owl.versionIRI))[0][1]
    except IndexError:
        version = list(graph.subjects(rdf.type, owl.Ontology))[0]

    ng = createOntology(f'{name}-dead',
                        f'NIF {curie} deprecated',
                        makePrefixes('replacedBy', 'NIFRID', curie, *prefs),
                        f'{name}dead',
                        f'Classes from {curie} with owl:deprecated true that we want rdfs:subClassOf NIFRID:birnlexRetiredClass, or classes hiding in a oboInOwl:hasAlternativeId annotation. This file was generated by pyontutils/necromancy from {version}.',
                        local_base=olr)
    extract(og, ng, curie, mkdir)
Esempio n. 5
0
class CoCoMac(genericPScheme):
    ont = OntMeta(
        'http://ontology.neuinfo.org/NIF/ttl/generated/parcellation/',
        'cocomacslim', 'CoCoMac terminology', 'CoCoMac',
        ('This file is automatically generated from the CoCoMac '
         'database on the terms from BrainMaps_BrainSiteAcronyms.' +
         '**FIXME**'), TODAY())
    concept = PScheme(ilx['cocomac/uris/readable/BrainSiteAcronym'],
                      'CoCoMac terminology parcellation concept',
                      'NCBITaxon:9544', 'ilxtr:various')
    atlas = PSArtifact(
        ilx['cocomac/uris/readable/BrainSiteAcronymTable'],
        'CoCoMac terminology',
        None,  #'no version info',
        None,  #'no date',
        'http://cocomac.g-node.org',
        'scholarly things',
        tuple(),
        tuple())

    PREFIXES = makePrefixes('NIFRID')
    PREFIXES[
        'cocomac'] = 'http://cocomac.g-node.org/services/custom_sql_query.php?sql=SELECT%20*%20from%20BrainMaps_BrainSiteAcronyms%20where%20ID='  # looking for better options

    @classmethod
    def datagetter(cls):
        url = 'http://cocomac.g-node.org/services/custom_sql_query.php?sql=SELECT * from BrainMaps_BrainSiteAcronyms;&format=json'
        table = requests.get(url).json()
        fields = table['fields']
        data = [fields] + list(table['data'].values())
        return data

    @classmethod
    def dataproc(cls, graph, data):
        class cocomac(rowParse):
            def ID(self, value):
                self.identifier = 'cocomac:' + value  # safe because reset every row (ish)
                graph.add_class(self.identifier, cls.concept.curie)

            def Key(self, value):
                pass

            def Summary(self, value):
                pass

            def Acronym(self, value):
                graph.add_trip(self.identifier, ACRONYM, value)

            def FullName(self, value):
                graph.add_trip(self.identifier, rdfs.label,
                               '(%s) ' % cls.ont.shortname + value)
                graph.add_trip(self.identifier, PARCLAB, value)

            def LegacyID(self, value):
                graph.add_trip(self.identifier, ACRONYM, value)

            def BrainInfoID(self, value):
                pass

        cocomac(data)
Esempio n. 6
0
class genericPScheme:
    ont = OntMeta
    concept = PScheme
    atlas = PSArtifact
    PREFIXES = makePrefixes('ilxtr', 'owl', 'skos', 'BIRNLEX', 'NCBITaxon')

    def __new__(cls, validate=False):
        error = 'Expected %s got %s'
        if type(cls.ont) != OntMeta:
            raise TypeError(error % (OntMeta, type(cls.ont)))
        elif type(cls.concept) != PScheme:
            raise TypeError(error % (PScheme, type(cls.concept)))
        elif type(cls.atlas) != PSArtifact:
            raise TypeError(error % (PSArtifact, type(cls.atlas)))

        ontid = cls.ont.path + cls.ont.filename + '.ttl'
        PREFIXES = {k: v for k, v in cls.PREFIXES.items()}
        PREFIXES.update(genericPScheme.PREFIXES)
        #if '' in cls.PREFIXES:  # NOT ALLOWED!
        #if PREFIXES[''] is None:
        #PREFIXES[''] = ontid + '/'
        graph = makeGraph(cls.ont.filename, PREFIXES, writeloc='/tmp/')
        graph.add_ont(ontid, *cls.ont[2:])
        make_scheme(graph, cls.concept, cls.atlas.curie)
        data = cls.datagetter()
        cls.datamunge(data)
        cls.dataproc(graph, data)
        add_ops(graph)
        graph.write()
        if validate or getattr(cls, 'VALIDATE', False):
            cls.validate(graph)
        return ontid, cls.atlas

    @classmethod
    def datagetter(cls):
        """ example datagetter function, make any local modifications here """
        with open('myfile', 'rt') as f:
            rows = [r for r in csv.reader(f)]
        dothing = lambda _: [i for i, v in enumerate(_)]
        rows = [dothing(_) for _ in rows]
        raise NotImplementedError('You need to implement this yourlself!')
        return rows

    @classmethod
    def datamunge(cls, data):
        """ in place modifier of data """
        pass

    @classmethod
    def dataproc(cls, graph, data):
        """ example datagetter function, make any local modifications here """
        for thing in data:
            graph.add_trip(*thing)
        raise NotImplementedError('You need to implement this yourlself!')

    @classmethod
    def validate(cls, graph):
        """ Put any post validation here. """
        raise NotImplementedError('You need to implement this yourlself!')
Esempio n. 7
0
 def switch_dead(g):
     ng = makeGraph('', graph=g, prefixes=makePrefixes('oboInOwl'))
     for f, r in deads.items():
         ng.replace_uriref(f, r)
         ng.add_trip(r, 'oboInOwl:hasAlternateId',
                     rdflib.Literal(f, datatype=rdflib.XSD.string))
         g.remove(
             (r, replacedBy, r))  # in case the replaced by was already in
Esempio n. 8
0
def createRecordsFromGraph(graph, existing, target_graph=None):
    mg = graph
    graph = mg.g
    s = rdflib.URIRef(makePrefixes('NIFRID')['NIFRID'] + 'synonym')
    if target_graph is None:
        target_ontology_iri = mg.ontid
    else:
        target_ontology_iri = target_graph.ontid

    if 'ILXREPLACE' in mg.namespaces:
        namespace = str(mg.namespaces['ILXREPLACE'])
    else:
        print('Nothing needs to be replaced in', mg.filename)
        return
    query = ("SELECT DISTINCT ?v "
             "WHERE { {?v ?p ?o} UNION {?s ?v ?o} UNION {?s ?p ?v} . "
             "FILTER("
             "strstarts(str(?v), '%s') )}") % namespace
    vals = graph.query(query)
    #existing = {}  # TODO this needs to populate from an existing source ie the ontology, and a tempid -> realid map?
    for val, in vals:
        qn = graph.namespace_manager.qname(val)
        try:
            labs = list(graph.objects(val, rdflib.RDFS.label))
            label = str(labs[0])
        except IndexError:
            label = None  # not defined here but we need to collect info here anyway
        definition = list(graph.objects(val, rdflib.namespace.SKOS.definition))
        if definition: definition = definition[0]
        synonyms = list(graph.objects(val, s))
        superclass = [_ for _ in graph.objects(val, rdflib.RDFS.subClassOf) if type(_) == rdflib.URIRef]
        superclass = superclass[0] if superclass else None
        try:
            superclass = graph.namespace_manager.qname(superclass)
        except (TypeError, ValueError) as e:
            print('ERROR: superclass of', qn, 'not a proper uri', superclass)
            superclass = None
        rec = makeIlxRec(label, definition, type='term', comment=qn, synonyms=synonyms, existing_ids=[], superclass=superclass, ontologies=[target_ontology_iri])
        if qn in existing:
            _tmp = set(existing[qn]['files'])
            _tmp.add(mg.filename)  # prevent duplicate files from multiple runs
            existing[qn]['files'] = list(_tmp)
            newrec = {}
            for k, v in existing[qn]['rec'].items():
                if v:
                    newrec[k] = v
                else:
                    newrec[k] = rec[k]
            existing[qn]['rec'] = newrec

        else:
            existing[qn] = {'id':None,
                            'sc':superclass,  # make sorting easier
                            'files':[mg.filename],
                            #'done':False,  # we probably don't need this
                            'rec':rec}

    superToLabel(existing)
Esempio n. 9
0
 def fixIons(g):
     # there are a series of atom/ion confusions that shall be dealt with, solution is to add 'iron' as a synonym to the charged form since that is what the biologists are usually referring to...
     ng = makeGraph('', graph=g, prefixes=makePrefixes('CHEBI'))
     # atom           ion
     None, 'CHEBI:29108'  # calcium is ok
     ng.replace_uriref('CHEBI:30145', 'CHEBI:49713')  # lithium
     ng.replace_uriref('CHEBI:18248', 'CHEBI:29033')  # iron
     ng.replace_uriref('CHEBI:26216', 'CHEBI:29103')  # potassium
     ng.replace_uriref('CHEBI:26708', 'CHEBI:29101')  # sodium
     None, 'CHEBI:29105'  # zinc is ok
Esempio n. 10
0
def main():
    PREFIXES = makePrefixes('NIFGA', 'NIFSTD', 'owl')

    g = rdflib.Graph()
    g.parse('http://purl.obolibrary.org/obo/uberon/bridge/uberon-bridge-to-nifstd.owl', format='xml')
    name = 'NIFGA-Equivs'
    ng = makeGraph(name, PREFIXES)
    [ng.g.add(t) for t in ((rdflib.URIRef(PREFIXES['NIFGA'] + o.rsplit('/',1)[-1]), p, o) for s, p, o in g.triples((None, rdflib.OWL.equivalentClass, None)))]
    ng.add_ont('http://ontology.neuinfo.org/NIF/ttl/generated/' + name + '.ttl', 'NIFGA to NIFSTD mappings')
    ng.write()
Esempio n. 11
0
class BermanLabels(LabelsBase):
    """ Berman Cat labels """
    # sort by label/structure not by abbrev
    filename = 'berman-cat-labels'
    name = 'Berman 1968 cat brain stem labels'
    shortname = 'bercat'
    imports = parcCore,
    prefixes = {
        **makePrefixes('NIFRID', 'ilxtr', 'prov'), 'BERCAT': str(BERCAT)
    }
    sources = BermanSrc,
    namespace = BERCAT
    root = LabelRoot(
        iri=nsExact(namespace),
        label='Berman 1968 cat label root',
        shortname=shortname,
        definingArtifacts=(s.artifact.iri for s in sources),
    )

    def _triples(self):
        for source in self.sources:
            for i, (label, paren_thing, abbrev, index) in enumerate(source):
                local_identifier = str(i + 1)
                iri = self.namespace[
                    local_identifier]  # TODO load from existing
                yield from Label(
                    labelRoot=self.root,
                    label=label,
                    #altLabel=None,
                    #synonyms=extras,
                    abbrevs=(abbrev, ),
                    iri=iri,
                )
                if paren_thing:
                    yield iri, ilx[
                        'berman/uris/readable/hasWeirdParenValue'], rdflib.Literal(
                            paren_thing)

                continue
                # FIXME different file ...
                region_iri = ilx['berman/uris/cat/regions/' + local_identifier]
                # FIXME incorporate version in tree or no?
                # just have it be consecutive? HRM
                yield region_iri, rdf.type, owl.Class
                yield region_iri, ilxtr.hasParcellationLabel, iri  # FIXME predicate choice ...
                yield region_iri, ilxtr.isDefinedBy, BermanSrc.artifact.iri  # FIXME
                for plate_num in index:
                    yield region_iri, ilxtr.appearsOnPlateNumber, rdflib.Literal(
                        plate_num)  # FIXME generalize ...
Esempio n. 12
0
class WHSSDLabels(LabelsBase):
    filename = 'waxholm-rat-labels'
    name = 'Waxholm Sprague Dawley Atlas Labels'
    shortname = 'whssd'
    imports = parcCore,
    prefixes = {
        **makePrefixes('NIFRID', 'ilxtr', 'prov', 'dcterms'), 'WHSSD':
        str(WHSSD)
    }
    sources = WHSSDSrc2, WHSSDilfSrc2, WHSSDSrc1, WHSSDilfSrc1
    namespace = WHSSD
    root = LabelRoot(
        iri=nsExact(namespace),  # ilxtr.whssdroot,
        label='Waxholm Space Sprague Dawley parcellation label root',
        shortname=shortname,
        definingArtifacts=(s.artifact.iri for s in sources),
    )

    def _triples(self):
        for source in self.sources:
            preds = False
            for index, label, *rest in source:
                abbrev, parent = rest if rest else (
                    False, False)  # a tricky one if you miss the parens
                abbrevs = (abbrev, ) if abbrev and abbrev != label else tuple()
                if int(index
                       ) >= 1000:  # FIXME this is the WRONG way to do this
                    # FIXME parentless structures in the ilf files?
                    label += ' (structure)'
                iri = WHSSD[str(index)]
                yield from Label(
                    labelRoot=self.root,
                    label=label,
                    abbrevs=abbrevs,
                    iri=iri,
                )
                if parent:
                    preds = True
                    parent = WHSSD[str(parent)]
                    yield from restriction.serialize(
                        iri, source.predicates[ilxtr.labelPartOf], parent)

            yield from source.isVersionOf
            if preds:
                for parent, child in source.predicates.items(
                ):  # FIXME annotationProperty vs objectProperty
                    yield child, rdfs.subPropertyOf, parent
Esempio n. 13
0
    def fixAltIdIsURIRef(g):
        hai = ug.expand('oboInOwl:hasAlternativeId')
        # i = ug.expand('oboInOwl:id')  # :id does not exist
        makeGraph('', graph=g, prefixes=makePrefixes(
            'CHEBI'))  # amazlingly sometimes this is missing...

        def inner(s, p, o):
            if type(o) == rdflib.URIRef:
                qn = g.namespace_manager.qname(o)
                g.add((s, p, rdflib.Literal(qn, datatype=rdflib.XSD.string)))
                if 'ns' in qn:
                    print('WARNING UNKNOWN NAMESPACE BEING SHORTENED', str(o),
                          qn)
                g.remove((s, p, o))

        for s, o in g.subject_objects(hai):
            inner(s, hai, o)
Esempio n. 14
0
class DMBALabels(HBALabels):
    path = 'ttl/generated/parcellation/'
    filename = 'dmbaslim'
    name = 'Allen Developing Mouse Brain Atlas Ontology'
    shortname = 'dmba'
    prefixes = {
        **makePrefixes('NIFRID', 'ilxtr', 'prov'), 'DMBA': str(DMBA),
        'ilxDMBA': str(ilxDMBA)
    }
    sources = DMBASrc,
    namespace = DMBA
    root = LabelRoot(
        iri=AIBS['mouse/devel/labels/'],
        label='Allen Developing Mouse Brain Atlas parcellation label root',
        shortname=shortname,
        definingArtifacts=(s.artifact.iri for s in sources),
    )
Esempio n. 15
0
def ncbigene_make():
    IDS_FILE = (Path(__file__).parent /
                'resources/gene-subset-ids.txt').as_posix()
    with open(IDS_FILE, 'rt') as f:  # this came from neuroNER
        ids = [l.split(':')[1].strip() for l in f.readlines()]

    #url = 'http://eutils.ncbi.nlm.nih.gov/entrez/eutils/esummary.fcgi?retmode=json&retmax=5000&db=gene&id='
    #for id_ in ids:
    #data = requests.get(url + id_).json()['result'][id_]
    url = 'https://eutils.ncbi.nlm.nih.gov/entrez/eutils/esummary.fcgi'
    data = {
        'db': 'gene',
        'retmode': 'json',
        'retmax': 5000,
        'id': None,
    }
    chunks = []
    for i, idset in enumerate(chunk_list(ids, 100)):
        print(i, len(idset))
        data['id'] = ','.join(idset),
        resp = requests.post(url, data=data).json()
        chunks.append(resp)

    base = chunks[0]['result']
    uids = base['uids']
    for more in chunks[1:]:
        data = more['result']
        uids.extend(data['uids'])
        base.update(data)
    #base['uids'] = uids  # i mean... its just the keys
    base.pop('uids')

    ng = createOntology(
        'ncbigeneslim',
        'NIF NCBI Gene subset',
        makePrefixes('ilxtr', 'NIFRID', 'NCBIGene', 'NCBITaxon', 'skos',
                     'owl'),
        'ncbigeneslim',
        'This subset is automatically generated from the NCBI Gene database on a subset of terms listed in %s.'
        % IDS_FILE,
        remote_base='http://ontology.neuinfo.org/NIF/')

    for k, v in base.items():
        #if k != 'uids':
        ncbi(v, ng)
    ng.write()
Esempio n. 16
0
def uri_switch(filenames, get_values):
    replacement_graph = createOntology(
        'NIF-NIFSTD-mapping', 'NIF* to NIFSTD equivalents',
        makePrefixes('BIRNANN', 'BIRNOBI', 'BIRNOBO', 'NIFANN', 'NIFCELL',
                     'NIFCHEM', 'NIFDYS', 'NIFFUN', 'NIFGA', 'NIFGG', 'NIFINV',
                     'NIFMOL', 'NIFMOLINF', 'NIFMOLROLE', 'NIFNCBISLIM',
                     'NIFNEURBR', 'NIFNEURBR2', 'NIFNEURCIR', 'NIFNEURMC',
                     'NIFNEURMOR', 'NIFNEURNT', 'NIFORG', 'NIFQUAL', 'NIFRES',
                     'NIFRET', 'NIFSCID', 'NIFSUB', 'NIFUNCL', 'OBOANN',
                     'SAOCORE'))
    fragment_prefixes, ureps = get_values(replacement_graph)
    print('Start writing')
    trips_lists = Parallel(n_jobs=9)(
        delayed(do_file)(f, swapUriSwitch, ureps, fragment_prefixes)
        for f in filenames)
    print('Done writing')
    [replacement_graph.g.add(t) for trips in trips_lists for t in trips]
    replacement_graph.write()
Esempio n. 17
0
def parcellation_schemes(ontids_atlases):
    ont = OntMeta(
        GENERATED, 'parcellation',
        'NIF collected parcellation schemes ontology', 'NIF Parcellations',
        'Brain parcellation schemes as represented by root concepts.', TODAY())
    ontid = ont.path + ont.filename + '.ttl'
    PREFIXES = makePrefixes('ilxtr', 'owl', 'skos', 'NIFRID')
    graph = makeGraph(ont.filename, PREFIXES, writeloc='/tmp/')
    graph.add_ont(ontid, *ont[2:])

    for import_id, atlas in sorted(ontids_atlases):
        graph.add_trip(ontid, owl.imports, import_id)
        add_triples(graph, atlas, make_atlas)

    graph.add_class(ATLAS_SUPER, label=atname)

    graph.add_class(PARC_SUPER, label=psname)
    graph.write()
Esempio n. 18
0
class HCPMMPLabels(LabelsBase):
    filename = 'hcpmmp'
    name = 'Human Connectome Project Multi-Modal human cortical parcellation'
    shortname = 'hcpmmp'
    imports = parcCore,
    prefixes = {
        **makePrefixes('NIFRID', 'ilxtr', 'prov'), 'HCPMMP': str(HCPMMP)
    }
    sources = HCPMMPSrc,
    namespace = HCPMMP
    root = LabelRoot(
        iri=nsExact(namespace),  # ilxtr.hcpmmproot,
        label='HCPMMP label root',
        shortname=shortname,
        definingArtifacts=(s.artifact.iri for s in sources),
    )

    def _triples(self):
        for source in self.sources:
            for record in source:
                (Parcellation_Index, Area_Name, Area_Description,
                 Newly_Described, Results_Section, Other_Names,
                 Key_Studies) = [r.strip() for r in record]
                iri = HCPMMP[str(Parcellation_Index)]
                onames = [
                    n.strip() for n in Other_Names.split(',') if n.strip()
                ]
                syns = (n for n in onames if len(n) > 3)
                abvs = tuple(n for n in onames if len(n) <= 3)
                cites = tuple(s.strip() for s in Key_Studies.split(','))
                if Newly_Described in ('Yes*', 'Yes'):
                    cites = cites + ('Glasser and Van Essen 2016', )

                yield from Label(
                    labelRoot=self.root,
                    label=Area_Description,
                    altLabel=Area_Name,
                    synonyms=syns,
                    abbrevs=abvs,
                    #bibliographicCitation=  # XXX vs definingCitation
                    definingCitations=cites,
                    iri=iri)
Esempio n. 19
0
class parcCore(ParcOnt):
    """ Core OWL2 entities needed for parcellations """

    # setup

    path = 'ttl/generated/'
    filename = 'parcellation-core'
    name = 'Parcellation Core'
    #shortname = 'parcore'  # huehuehue
    prefixes = {**makePrefixes('skos', 'BFO'), **ParcOnt.prefixes}
    imports = NIFTTL['nif_backend.ttl'], parcArts

    # stuff

    parents = LabelRoot, RegionRoot

    def _triples(self):
        yield ilxtr.labelPartOf, rdf.type, owl.ObjectProperty
        yield ilxtr.labelPartOf, rdf.type, owl.TransitiveProperty
        yield ilxtr.labelPartOf, rdfs.subPropertyOf, partOf
        for parent in self.parents:
            yield from parent.class_triples()
Esempio n. 20
0
class parcArts(ParcOnt):
    """ Ontology file for artifacts that define labels or
        geometry for parcellation schemes. """

    # setup

    path = 'ttl/generated/'
    filename = 'parcellation-artifacts'
    name = 'Parcellation Artifacts'
    #shortname = 'parcarts'
    prefixes = {
        **makePrefixes('NCBITaxon', 'UBERON', 'skos'),
        **ParcOnt.prefixes,
        'FSLATS': str(FSLATS),
        'paxmusver': str(paxmusver),
        'paxratver': str(paxratver),
    }

    def __call__(self):
        return super().__call__()

    @property
    def _artifacts(self):
        for collector in subclasses(Collector):
            if collector.__module__ != 'nifstd_tools.parcellation':  # just run __main__
                yield from collector.arts()

    def _triples(self):
        from nifstd_tools.parcellation import Artifact
        yield from Artifact.class_triples()
        # OH LOOK PYTHON IS BEING AN AWFUL LANGUAGE AGAIN
        for art_type in subclasses(
                Artifact
        ):  # this is ok because all subclasses are in this file...
            # do not comment this out it is what makes the
            # upper classes in the artifacts hierarchy
            yield from art_type.class_triples()
        for artifact in self._artifacts:
            yield from artifact
Esempio n. 21
0
class HBALabels(LabelsBase):
    filename = 'hbaslim'
    name = 'Allen Human Brain Atlas Ontology'
    shortname = 'hba'
    imports = parcCore,
    prefixes = {
        **makePrefixes('NIFRID', 'ilxtr', 'prov'), 'HBA': str(HBA),
        'ilxHBA': str(ilxHBA)
    }
    sources = HBASrc,
    namespace = HBA
    root = LabelRoot(
        iri=AIBS['human/labels/'],  # ilxtr.hbaroot,
        label='Allen Human Brain Atlas parcellation label root',
        shortname=shortname,
        definingArtifacts=(s.artifact.iri for s in sources),
    )

    def _triples(self):
        for source in self.sources:
            for record in source:
                iri = self.namespace[str(record['id'])]
                sn = record['safe_name']
                if sn and sn != record['name']: syns = sn,
                else: syns = tuple()
                yield from Label(
                    labelRoot=self.root,
                    label=record['name'],
                    synonyms=syns,
                    abbrevs=(record['acronym'], ),
                    iri=iri,
                )
                superpart = record['parent_structure_id']
                if superpart:
                    superpart_iri = self.namespace[str(superpart)]
                    yield from restriction.serialize(iri, ilxtr.labelPartOf,
                                                     superpart_iri)
Esempio n. 22
0
class Chebi(Ont):
    sources = ChebiIdsSrc, ChebiOntSrc
    filename = 'chebislim'
    name = 'NIF ChEBI slim'
    shortname = 'chebislim'
    prefixes = makePrefixes('definition', 'hasRole', 'replacedBy', 'hasPart',
                            'termsMerged', 'obsReason', 'BFO', 'CHEBI',
                            'chebi', 'chebi1', 'chebi2', 'chebi3', 'ilxtr',
                            'prov', 'skos', 'oboInOwl')

    #'This file is generated by pyontutils/slimgen from the full ChEBI nightly at versionIRI %s based on the list of terms in %s.' % (src_version, IDS_FILE),

    def _triples(self):
        (ids_raw, ids), (more, more_ids, g) = self.sources

        depwor = {
            'CHEBI:33243': 'natural product',  # FIXME remove these?
            'CHEBI:36809': 'tricyclic antidepressant',
        }
        chebiiri = next(g[:rdf.type:owl.Ontology])
        oiodate = rdflib.URIRef(
            str(oboInOwl) + 'date')  # this predicate doesn't actually exist...
        chebidate = next(g[chebiiri:oiodate])
        yield self.iri, oiodate, chebidate
        # wow prov is extremely heavy weight ...
        b0, b1, b2 = [rdflib.BNode() for _ in range(3)]
        e1, e2 = self.wasDerivedFrom
        yield self.iri, prov.qualifiedDerivation, b0
        yield b0, rdf.type, prov.Derivation
        yield b0, prov.entity, e1
        yield b0, prov.hadActivity, b1
        yield b1, rdf.type, prov.Activity
        yield b1, prov.startedAtTime, rdflib.Literal(self.start_time)
        yield b1, prov.used, rdflib.URIRef(self.wasGeneratedBy)
        #yield b1, prov.generated, self.versionIRI
        # the fact that it isn't really possible to include a versionIRI
        # reveals that trying to include bound prov data rapidly encounters
        # significant issues
        yield b1, prov.wasAssociatedWith, b2
        yield b2, rdf.type, prov.SoftwareAgent
        yield b2, ilxtr.implementationOf, ilxtr['ProgrammingLanguage/Python']
        yield b2, ilxtr.versionString, rdflib.Literal(
            sys.version.replace('\n', ' '))
        # NOTE: b1 doesn't quite work if we want endedAtTime
        # because we don't actually know when the process will
        # end IF we consider the end of the process to be the
        # actual serialization of the file, in which case the
        # prov has to be external to the file itself, or the
        # last serialization cannot have bound provenance
        # or some real identifier has to be used to allow
        # a reference to the activity so that an end time can
        # be logged in another system

        yield from ((ss, ps, os) for s in g[:rdf.type:owl.ObjectProperty]
                    if str(s) not in (str(hasRole), str(hasPart))
                    for p, o in g[s]
                    for ss, ps, os in yield_recursive(s, p, o, g))

        for s, p, o in g:
            if s == chebiiri or s == rdflib.URIRef(
                    hasPart) or s == rdflib.URIRef(hasRole):
                continue
            if p == replacedBy:
                chebi_dead.addTrip(s, rdfs.subClassOf, owl.DeprecatedClass)
                for dead_predicate in (rdfs.label, oboInOwl.hasExactSynonym):
                    for dead_object in g[o:dead_predicate]:
                        chebi_dead.addTrip(s, dead_predicate, dead_object)

            yield s, p, o

    def _old(self):
        # the implementation below is a much slower equivalent that does needless checks
        # better simply to validate that there are no chebi ids that are missing an owl:Class
        #for id_ in sorted(set(ids_raw) | set((ug.g.namespace_manager.qname(_) for _ in mids))):
        print('more_ids', more_ids)
        for eid in sorted(ids | more_ids):
            #print(repr(eid))
            id_ = self._graph.qname(eid)
            s = rdflib.URIRef(eid)
            po = list(g.predicate_objects(s))
            if not po:
                print(s, 'not in xml')
                #looks for the id_ as a literal
                alts = list(
                    g.subjects(oboInOwl.hasAlternativeId,
                               rdflib.Literal(id_,
                                              datatype=rdflib.XSD.string)))
                if alts:
                    replaced_by = alts[0]
                    if replaced_by.toPython(
                    ) not in ids:  #  we need to add any replacment classes to the bridge
                        print('REPLACED BY NEW CLASS', id_)
                        for p, o in g.predicate_objects(replaced_by):
                            yield from yield_recursive(replaced_by, p, o, g)
                    chebi_dead.addTrip(s, rdf.type, owl.Class)
                    chebi_dead.addTrip(s, replacedBy, replaced_by)
                    chebi_dead.addTrip(s, owl.deprecated, Literal(True))
                else:
                    if self._graph.qname(eid) not in depwor:
                        raise BaseException('wtf error',
                                            self._graph.qname(eid))
            else:
                for p, o in po:
                    yield from yield_recursive(s, p, o, g)
                    if p == replacedBy:
                        chebi_dead.addTrip(s, rdfs.subClassOf,
                                           owl.DeprecatedClass)
                        oqname = self._graph.qname(o)
                        if (o, rdf.type, owl.Class) not in g:
                            print(
                                'WARNING: replaced but not in the xml subset',
                                o)
                        elif oqname not in ids and str(o) not in more_ids:
                            print(
                                'WARNING: replaced but not in ids or more_ids',
                                o)
                            for np, no in g[o]:
                                yield from yield_recursive(o, np, no, g)
                        for ro in g[o:rdfs.label]:
                            chebi_dead.addTrip(s, rdfs.label, ro)
Esempio n. 23
0
            #out = out.rstrip(' \n').rstrip(';') + '. #%s\n\n' % comment
            #except:
            out = out.rstrip('\n').rstrip(';') + '.\n\n\n\n'

        else:
            out += '.\n\n\n\n'

        outputs.append(out)

    return ''.join(natural_sort(outputs))


# TODO identifier mapping needs to happen before here

PREFIXES = {
    **makePrefixes('owl', 'skos', 'BFO', 'NIFRID'),
    **{
        'FIXME': 'http://fixme.org/',
    }
}
_PREFIXES = {
    'owl': 'http://www.w3.org/2002/07/owl#',
    'rdf': 'http://www.w3.org/1999/02/22-rdf-syntax-ns#',
    'rdfs': 'http://www.w3.org/2000/01/rdf-schema#',
    'skos': 'http://www.w3.org/2004/02/skos/core#',
    #'NIF':'http://uri.neuinfo.org/nif/nifstd/',  # for old ids??
    #'obo_annot':'http://ontology.neuinfo.org/NIF/Backend/OBO_annotation_properties.owl#',  #FIXME OLD??
    #'oboInOwl':'http://www.geneontology.org/formats/oboInOwl#',  # these aren't really from OBO files but they will be friendly known identifiers to people in the community
}

Esempio n. 24
0
import os
import csv
import json
from pathlib import Path
from datetime import date
import rdflib
from rdflib.extras import infixowl
from pyontutils.core import makeGraph
from pyontutils.config import devconfig
from pyontutils.scigraph import Vocabulary
from pyontutils.namespaces import makePrefixes
from IPython import embed

v = Vocabulary()

PREFIXES = makePrefixes('ilx', 'owl', 'skos', 'NIFSTD', 'NIFRID', 'SAO', 'NIFEXT', 'NLXCELL')
PREFIXES.update({
    'HBP_CELL':'http://www.hbp.FIXME.org/hbp_cell_ontology/',
})


def expand(curie):
    prefix, suffix = curie.split(':')
    return rdflib.URIRef(PREFIXES[prefix] + suffix)


def ilx_get_start():
    with open(Path(devconfig.ontology_local_repo,
                   'interlex_reserved.txt').as_posix(), 'rt') as f:
        for line in f.readlines()[::-1]:  # go backward to find the first non empty
            new_ilx_id, label = line.strip().split(':')
Esempio n. 25
0
class Chebi(Ont):
    sources = ChebiIdsSrc, ChebiOntSrc
    filename = 'chebislim'
    name = 'NIF ChEBI slim'
    shortname = 'chebislim'
    prefixes = makePrefixes('definition', 'hasRole', 'replacedBy', 'hasPart',
                            'termsMerged', 'obsReason', 'BFO', 'CHEBI',
                            'chebi', 'chebi1', 'chebi2', 'chebi3', 'prov',
                            'skos', 'oboInOwl')

    #'This file is generated by pyontutils/slimgen from the full ChEBI nightly at versionIRI %s based on the list of terms in %s.' % (src_version, IDS_FILE),

    def _triples(self):
        (ids_raw, ids), (more, more_ids, g) = self.sources

        depwor = {
            'CHEBI:33243': 'natural product',  # FIXME remove these?
            'CHEBI:36809': 'tricyclic antidepressant',
        }
        chebiiri = next(g[:rdf.type:owl.Ontology])
        oiodate = rdflib.URIRef(
            str(oboInOwl) + 'date')  # this predicate doesn't actually exist...
        chebidate = next(g[chebiiri:oiodate])
        b0 = rdflib.BNode()
        yield self.iri, prov.qualifiedDerivation, b0
        yield b0, rdf.type, prov.Derivation
        yield b0, prov.atTime, chebidate

        yield from ((ss, ps, os) for s in g[:rdf.type:owl.ObjectProperty]
                    if str(s) not in (str(hasRole), str(hasPart))
                    for p, o in g[s]
                    for ss, ps, os in yield_recursive(s, p, o, g))

        for s, p, o in g:
            if s == chebiiri or s == rdflib.URIRef(
                    hasPart) or s == rdflib.URIRef(hasRole):
                continue
            if p == replacedBy:
                chebi_dead.addTrip(s, rdfs.subClassOf, owl.DeprecatedClass)
                for dead_predicate in (rdfs.label, oboInOwl.hasExactSynonym):
                    for dead_object in g[o:dead_predicate]:
                        chebi_dead.addTrip(s, dead_predicate, dead_object)

            yield s, p, o

        return  # the implementation below is a much slower equivalent that does needless checks
        # better simply to validate that there are no chebi ids that are missing an owl:Class

        #for id_ in sorted(set(ids_raw) | set((ug.g.namespace_manager.qname(_) for _ in mids))):
        print('more_ids', more_ids)
        for eid in sorted(ids | more_ids):
            #print(repr(eid))
            id_ = self._graph.qname(eid)
            s = rdflib.URIRef(eid)
            po = list(g.predicate_objects(s))
            if not po:
                print(s, 'not in xml')
                #looks for the id_ as a literal
                alts = list(
                    g.subjects(oboInOwl.hasAlternativeId,
                               rdflib.Literal(id_,
                                              datatype=rdflib.XSD.string)))
                if alts:
                    replaced_by = alts[0]
                    if replaced_by.toPython(
                    ) not in ids:  #  we need to add any replacment classes to the bridge
                        print('REPLACED BY NEW CLASS', id_)
                        for p, o in g.predicate_objects(replaced_by):
                            yield from yield_recursive(replaced_by, p, o, g)
                    chebi_dead.addTrip(s, rdf.type, owl.Class)
                    chebi_dead.addTrip(s, replacedBy, replaced_by)
                    chebi_dead.addTrip(s, owl.deprecated, Literal(True))
                else:
                    if self._graph.qname(eid) not in depwor:
                        raise BaseException('wtf error',
                                            self._graph.qname(eid))
            else:
                for p, o in po:
                    yield from yield_recursive(s, p, o, g)
                    if p == replacedBy:
                        chebi_dead.addTrip(s, rdfs.subClassOf,
                                           owl.DeprecatedClass)
                        oqname = self._graph.qname(o)
                        if (o, rdf.type, owl.Class) not in g:
                            print(
                                'WARNING: replaced but not in the xml subset',
                                o)
                        elif oqname not in ids and str(o) not in more_ids:
                            print(
                                'WARNING: replaced but not in ids or more_ids',
                                o)
                            for np, no in g[o]:
                                yield from yield_recursive(o, np, no, g)
                        for ro in g[o:rdfs.label]:
                            chebi_dead.addTrip(s, rdfs.label, ro)
Esempio n. 26
0
class Registry(Ont):
    path = ''
    filename = 'scicrunch-registry'
    name = 'scicrunch registry exported ontology'
    shortname = 'screxp'
    comment = 'Turtle export of the SciCrunch Registry'
    sources = RegistrySource,
    prefixes = makePrefixes(
        'definition',  # these aren't really from OBO files but they will be friendly known identifiers to people in the community
        'SCR',  # generate base from this directly?
        #'obo':'http://purl.obolibrary.org/obo/',
        #'FIXME':'http://fixme.org/',
        'NLX',
        'NIFSTD',  # for old ids??
        'NIFRID',
        'oboInOwl')
    prepared = False

    @classmethod
    def config(cls, user=None, host=None, port=None, database=None):
        cls.user = user
        cls.host = host
        cls.port = port
        cls.database = database

    @classmethod
    def prepare(cls):
        # we have to do this here because Source only supports the tuple interface right now
        if not cls.prepared:
            cls.records = get_records(user=cls.user,
                                      host=cls.host,
                                      port=cls.port,
                                      database=cls.database)
            super().prepare()
            cls.prepared = True

    def _triples(self):
        for id_, rec in self.records.items():
            for field, value in rec:
                #print(field, value)
                if not value:  # don't add empty edges  # FIXME issue with False literal
                    logd.debug(f'caught an empty value on field {id_} {field}')
                    continue
                if field != 'id' and (str(value).replace('_', ':') in id_
                                      or str(value) in id_):
                    #if field == 'alt_id' and id_[1:] == value:
                    if field != 'old_id':
                        logd.debug(
                            f'caught a mainid appearing as altid {field} {value}'
                        )

                    continue
                s, p, o = make_triple(id_, field, value)

                if not isinstance(o, rdflib.URIRef):
                    try:
                        if o.startswith(
                                ':'
                        ) and ' ' in o:  # not a compact repr AND starts with a : because humans are insane
                            o = ' ' + o
                        o = self._graph.check_thing(o)
                    except (AttributeError, KeyError, ValueError) as e:
                        o = rdflib.Literal(o)  # trust autoconv

                #yield OntId(s), OntId(p), self._graph.check_thing(o)  # FIXME OntId(p) breaks rdflib rdf:type -> a
                yield OntId(s), p, o
Esempio n. 27
0
 def pref(inp): return makePrefixes('ilx')['ilx'] + inp
 id_ =  pref(j['ilx'])
Esempio n. 28
0
def backend_refactor_values():
    uri_reps_lit = {
        # from https://github.com/information-artifact-ontology/IAO/blob/master/docs/BFO%201.1%20to%202.0%20conversion/mapping.txt
        'http://www.ifomis.org/bfo/1.1#Entity': 'BFO:0000001',
        'BFO1SNAP:Continuant': 'BFO:0000002',
        'BFO1SNAP:Disposition': 'BFO:0000016',
        'BFO1SNAP:Function': 'BFO:0000034',
        'BFO1SNAP:GenericallyDependentContinuant': 'BFO:0000031',
        'BFO1SNAP:IndependentContinuant': 'BFO:0000004',
        'BFO1SNAP:MaterialEntity': 'BFO:0000040',
        'BFO1SNAP:Quality': 'BFO:0000019',
        'BFO1SNAP:RealizableEntity': 'BFO:0000017',
        'BFO1SNAP:Role': 'BFO:0000023',
        'BFO1SNAP:Site': 'BFO:0000029',
        'BFO1SNAP:SpecificallyDependentContinuant': 'BFO:0000020',
        'BFO1SPAN:Occurrent': 'BFO:0000003',
        'BFO1SPAN:ProcessualEntity': 'BFO:0000015',
        'BFO1SPAN:Process': 'BFO:0000015',
        'BFO1SNAP:ZeroDimensionalRegion': 'BFO:0000018',
        'BFO1SNAP:OneDimensionalRegion': 'BFO:0000026',
        'BFO1SNAP:TwoDimensionalRegion': 'BFO:0000009',
        'BFO1SNAP:ThreeDimensionalRegion': 'BFO:0000028',
        'http://purl.org/obo/owl/OBO_REL#bearer_of': 'RO:0000053',
        'http://purl.org/obo/owl/OBO_REL#inheres_in': 'RO:0000052',
        'ro:has_part': 'BFO:0000051',
        'ro:part_of': 'BFO:0000050',
        'ro:has_participant': 'RO:0000057',
        'ro:participates_in': 'RO:0000056',
        'http://purl.obolibrary.org/obo/OBI_0000294': 'RO:0000059',
        'http://purl.obolibrary.org/obo/OBI_0000297': 'RO:0000058',
        'http://purl.obolibrary.org/obo/OBI_0000300': 'BFO:0000054',
        'http://purl.obolibrary.org/obo/OBI_0000308': 'BFO:0000055',

        # more bfo
        'BFO1SNAP:SpatialRegion': 'BFO:0000006',
        'BFO1SNAP:FiatObjectPart': 'BFO:0000024',
        'BFO1SNAP:ObjectAggregate': 'BFO:0000027',
        'BFO1SNAP:Object': 'BFO:0000030',
        #'BFO1SNAP:ObjectBoundary'  # no direct replacement, only occurs in unused
        #'BFO1SPAN:ProcessAggregate'  # was not replaced, could simply be a process itself??
        #'BFO1SNAP:DependentContinuant'  # was not replaced

        # other
        #'ro:participates_in'  # above
        #'ro:has_participant'  # above
        #'ro:has_part',  # above
        #'ro:part_of',  # above
        #'ro:precedes'  # unused and only in inferred
        #'ro:preceded_by'  # unused and only in inferred
        #'ro:transformation_of'  # unused and only in inferred
        #'ro:transformed_into'  # unused and only in inferred
        'http://purl.org/obo/owl/obo#inheres_in': 'RO:0000052',
        'http://purl.obolibrary.org/obo/obo#towards': 'RO:0002503',
        'http://purl.org/obo/owl/pato#towards': 'RO:0002503',
        'http://purl.obolibrary.org/obo/pato#inheres_in': 'RO:0000052',
        'BIRNLEX:17': 'RO:0000053',  # is_bearer_of
        'http://purl.obolibrary.org/obo/pato#towards': 'RO:0002503',
        'ro:adjacent_to': 'RO:0002220',
        'ro:derives_from': 'RO:0001000',
        'ro:derives_into': 'RO:0001001',
        'ro:agent_in': 'RO:0002217',
        'ro:has_agent': 'RO:0002218',
        'ro:contained_in': 'RO:0001018',
        'ro:contains': 'RO:0001019',
        'ro:located_in': 'RO:0001025',
        'ro:location_of': 'RO:0001015',
        'ro:has_proper_part': 'NIFRID:has_proper_part',
        'ro:proper_part_of':
        'NIFRID:proper_part_of',  # part of where things are not part of themsevles need to review
    }
    ug = makeGraph('',
                   prefixes=makePrefixes('ro', 'RO', 'BIRNLEX', 'NIFRID',
                                         'BFO', 'BFO1SNAP', 'BFO1SPAN'))
    ureps = {
        ug.check_thing(k): ug.check_thing(v)
        for k, v in uri_reps_lit.items()
    }

    return ureps
Esempio n. 29
0
class AllenCellTypes:

    branch = auth.get('neurons-branch')

    prefixes = {
        **{
            'JAX': 'http://jaxmice.jax.org/strain/',
            'MMRRC': 'http://www.mmrrc.org/catalog/getSDS.jsp?mmrrc_id=',
            'AllenTL': 'http://api.brain-map.org/api/v2/data/TransgenicLine/'
        },
        **makePrefixes('definition', 'ilxtr', 'owl')
    }
    prefixes[
        'AllenTransgenicLine'] = 'http://api.brain-map.org/api/v2/data/TransgenicLine/'

    def __init__(self, input, name):
        self.name = name
        self.ns = {k: rdflib.Namespace(v) for k, v in self.prefixes.items()}
        self.neuron_data = input
        self.tag_names = set()
        # self.sample_neuron()

    def avoid_url_conversion(self, string):
        if not string:
            return string
        return re.sub("/| |\(", '_', string).replace(')', '')

    def sample_neuron(self, ):
        Neuron(
            Phenotype('ilxtr:apical',
                      'ilxtr:hasPhenotype',
                      label='apical - truncated'),
            Phenotype('JAX:12345',
                      'ilxtr:hasExperimentalPhenotype',
                      label='prefix+stock_number'),
        )
        print(graphBase.ttl())

    def cell_phenotypes(self, cell_specimen):
        cell_mappings = {
            'hemisphere': 'ilxtr:hasSomaLocationLaterality',
            # 'name': 'ilxtr:hasPhenotype',
        }
        phenotypes = []
        for name, value in cell_specimen.items():
            mapping = cell_mappings.get(name)
            if mapping and value:
                if name == 'hemisphere':
                    if value.lower() == 'left':
                        curie = 'UBERON:0002812'
                    elif value.lower() == 'right':
                        curie = 'UBERON:0002813'
                    else:
                        raise ValueError('got stuck with unkown hemisphere ' +
                                         value)
                phenotypes.append(Phenotype(
                    curie,
                    mapping,
                ))
        return phenotypes

    # TODO: wrong phenotype
    def structure_phenotypes(self, cell_specimen):
        struc = cell_specimen['structure']
        phenotypes = []
        acronym = self.avoid_url_conversion(struc['acronym'])
        curie = 'MBA:' + str(struc['id'])
        if struc:
            phenotypes.append(
                Phenotype(curie, 'ilxtr:hasSomaLocatedIn', label=acronym), )
        return phenotypes

    def donor_phenotypes(self, cell_specimen):
        donor_mappings = {'sex_full_name': 'ilxtr:hasBiologicalSex'}
        phenotypes = []
        for name, value in cell_specimen['donor'].items():
            mapping = donor_mappings.get(name)
            if mapping and value:
                if name == 'sex_full_name':
                    if value.lower() == 'female':
                        curie = 'PATO:0000383'
                    elif value.lower() == 'male':
                        curie = 'PATO:0000384'
                    else:
                        raise ValueError('unkown sex ' + str(value))
                phenotypes.append(Phenotype(
                    curie,
                    mapping,
                ), )
        return phenotypes

    # TODO: Figure how to add: description, name and type
    def transgenic_lines_phenotypes(self, cell_specimen):
        transgenic_mappings = {}
        phenotypes = []
        for tl in cell_specimen['donor']['transgenic_lines']:
            prefix = tl['transgenic_line_source_name']
            suffix = tl['stock_number'] if tl['stock_number'] else str(
                tl['id'])
            name = self.avoid_url_conversion(tl['name'])
            _type = tl['transgenic_line_type_name']
            if _type == 'driver':
                if 'CreERT2' in name:  # FIXME from structured instead of name?
                    pred = ilxtr.hasDriverExpressionInducedPhenotype
                else:
                    pred = 'ilxtr:hasDriverExpressionPhenotype'
            elif _type == 'reporter':
                pred = 'ilxtr:hasReporterExpressionPhenotype'
            else:
                pred = 'ilxtr:hasExpressionPhenotype'

            line_names = []
            if prefix and suffix and prefix in ['AIBS', 'MMRRC', 'JAX']:
                if prefix == 'AIBS':
                    prefix = 'AllenTL'
                iri = self.ns[prefix][suffix]
                phenotypes.append(Phenotype(iri, pred))
        return phenotypes

    # TODO: search if description exists
    # TODO: Create mapping for all possible types
    # TODO: Fork negatives to NegPhenotype
    def specimen_tags_phenotypes(self, cell_specimen):
        pred = 'ilxtr:hasDendriteMorphologicalPhenotype'
        specimen_tag_mappings = {
            'spiny':
            Phenotype('ilxtr:SpinyPhenotype', pred),
            'aspiny':
            NegPhenotype('ilxtr:SpinyPhenotype', pred),
            'sparsely spiny':
            LogicalPhenotype(
                AND, Phenotype('ilxtr:SpinyPhenotype', pred),
                Phenotype('PATO:0001609', 'ilxtr:hasPhenotypeModifier')),
            'apicalIntact':
            Phenotype('ilxtr:ApicalDendritePhenotype',
                      'ilxtr:hasMorphologicalPhenotype'),
            'apicalTruncated':
            LogicalPhenotype(
                AND,
                Phenotype('ilxtr:ApicalDendritePhenotype',
                          'ilxtr:hasMorphologicalPhenotype'),
                Phenotype('PATO:0000936', 'ilxtr:hasPhenotypeModifier')),
            'apicalNa':
            NegPhenotype('ilxtr:ApicalDendritePhenotype',
                         'ilxtr:hasMorphologicalPhenotype'
                         ),  # NA means there was no apical dendrite
        }
        phenotypes = []
        for tag in cell_specimen['specimen_tags']:
            if 'dendrite type' in tag['name']:
                one_two = tag['name'].split(' - ')[1]
                #if ' ' in one_two:
                #one, two = one_two.split(' ')
                #name = one + two.capitalize()
                #else:
                name = one_two
            else:
                one, two = tag['name'].split(' - ')
                #if two == 'NA':  # apical - NA
                #continue
                name = one + two.capitalize()

            self.tag_names.add(tag['name'])
            # if phenotype == '+':
            if name not in specimen_tag_mappings:
                raise ValueError(name)

            phenotypes.append(
                specimen_tag_mappings[name] if name in
                specimen_tag_mappings else Phenotype('ilxtr:' + name, pred))
            # elif phenotype == '-': phenotypes.append(NegPhenotype(...))

        return phenotypes

    # TODO: check to see if specimen_id is really the priority
    def cell_soma_locations_phenotypes(self, cell_specimen):
        cell_soma_mappings = {}
        phenotypes = []
        for csl in cell_specimen['cell_soma_locations']:
            location = csl['id']
            phenotypes.append(
                Phenotype(
                    'ilxtr:' + str(location),
                    'ilxtr:hasSomaLocatedIn',
                ))
        return phenotypes

    def add_mouse_lineage(self, cell_specimen):
        phenotypes = [Phenotype('NCBITaxon:10090', 'ilxtr:hasInstanceInTaxon')]
        return phenotypes

    def build_phenotypes(self, cell_specimen):
        phenotype_functions = [
            self.cell_phenotypes,
            self.structure_phenotypes,
            self.donor_phenotypes,
            self.transgenic_lines_phenotypes,
            self.specimen_tags_phenotypes,
            self.add_mouse_lineage,
            # self.cell_soma_locations_phenotypes, # deprecated
        ]
        phenotypes = []
        for func in phenotype_functions:
            phenotypes.extend(func(cell_specimen))
        return phenotypes

    def make_config(self):
        # have to call Config here because transgenic lines doesn't exist
        self.config = Config(
            name=self.name,
            imports=[
                f'NIFRAW:{self.branch}/ttl/generated/allen-transgenic-lines.ttl'
            ],
            prefixes=self.prefixes,
            branch=self.branch,
            sources=tuple(),  # TODO insert the link to the query...
            source_file=relative_path(__file__, no_wd_value=__file__))

    def build_neurons(self):
        instances = []
        dids = []
        for cell_specimen in self.neuron_data:
            neuron = NeuronACT(*self.build_phenotypes(cell_specimen))
            did = AIBSSPEC[str(cell_specimen['id'])]
            dids.append(did)
            instances.append((did, rdf.type, owl.NamedIndividual))
            instances.append((did, rdf.type, neuron.identifier))

        print(sorted(self.tag_names))
        NeuronACT.write()
        NeuronACT.write_python()
        self.build_instances(instances, dids)

    def build_instances(self, instances, dids):
        folder = Path(self.config.out_graph_path()).parent
        # WOW do I need to implement the new/better way of
        # managing writing collections of neurons to graphs
        neuron_uri = next(NeuronACT.out_graph[:rdf.type:owl.Ontology])
        name = 'allen-cell-instances.ttl'
        base, _ = neuron_uri.rsplit('/', 1)
        uri = rdflib.URIRef(base + '/' + name)
        metadata = ((uri, rdf.type, owl.Ontology), )
        instance_graph = OntGraph(path=folder / name)
        instance_graph.bind('AIBSSPEC', AIBSSPEC)
        [instance_graph.add(t) for t in metadata]
        [instance_graph.add(t) for t in instances]
        [
            instance_graph.add(t)
            for t in allDifferent(None, distinctMembers(*dids))
        ]
        instance_graph.write()

    def build_transgenic_lines(self):
        """
        init class     |  "transgenic_line_source_name":"stock_number" a Class
        add superClass |  rdfs:subClassOf ilxtr:transgenicLine
        add *order*    |  ilxtr:useObjectProperty ilxtr:<order>
        add name       |  rdfs:label "name"
        add def        |  definition: "description"
        add transtype  |  rdfs:hasTransgenicType "transgenic_line_type_name"
        """

        triples = []
        for cell_specimen in self.neuron_data:
            for tl in cell_specimen['donor']['transgenic_lines']:
                _id = tl['stock_number'] if tl['stock_number'] else tl['id']
                prefix = tl['transgenic_line_source_name']
                line_type = tl['transgenic_line_type_name']
                if line_type == 'driver' and 'CreERT2' in tl['name']:
                    line_type = 'inducibleDriver'

                if prefix not in ['JAX', 'MMRRC', 'AIBS']:
                    print(tc.red('WARNING:'), 'unknown prefix', prefix,
                          json.dumps(tl, indent=4))
                    continue
                elif prefix == 'AIBS':
                    prefix = 'AllenTL'

                _class = self.ns[prefix][str(_id)]
                triples.append((_class, rdf.type, owl.Class))
                triples.append(
                    (_class, rdfs.label, rdflib.Literal(tl['name'])))
                triples.append(
                    (_class, definition, rdflib.Literal(tl['description'])))
                triples.append((_class, rdfs.subClassOf, ilxtr.transgenicLine))
                triples.append((_class, ilxtr.hasTransgenicType,
                                ilxtr[line_type + 'Line']))

        # TODO aspects.ttl?
        transgenic_lines = simpleOnt(
            filename='allen-transgenic-lines',
            local_base=graphBase.local_base,
            path='ttl/generated/',
            prefixes=self.prefixes,
            triples=triples,
            comment='Allen transgenic lines for cell types',
            branch=self.branch,
            calling__file__=__file__,
        )

        transgenic_lines._graph.write()
Esempio n. 30
0
class ChebiDead(Ont):
    filename = 'chebi-dead'
    name = 'NIF ChEBI deprecated'
    shortname = 'chebidead'
    prefixes = makePrefixes('CHEBI', 'replacedBy', 'prov', 'oboInOwl')