Beispiel #1
0
def main():
    import csv
    from neurondm.core import auth
    with open(auth.get_path('resources') / '26451489 table 1.csv', 'rt') as f:
        rows = [list(r) for r in zip(*csv.reader(f))]

    config = Config('markram-2015',
                    source_file=relative_path(__file__, no_wd_value=__file__))
    table1(rows)
    return config,
Beispiel #2
0
def _ontology_data_files():
    resources = 'resources'
    relpaths = [
        'ttl/phenotype-core.ttl',
        'ttl/phenotype-indicators.ttl',
        'ttl/phenotypes.ttl',
        'ttl/generated/part-of-self.ttl',
    ]
    if RELEASE:
        from augpathlib import RepoPath as Path
        ### KILL IT WITH FIRE
        try:
            from neurondm.core import auth  ### this is NOT ok
        except Exception:
            # can't catch an error that you can never import because
            # it will be raised before you can import it ... SIGH
            import orthauth as oa
            from pyontutils.config import auth as pauth
            auth = oa.configure(Path('neurondm/auth-config.py').resolve(),
                                include=pauth)
        ###

        olr = Path(auth.get_path('ontology-local-repo'))

        ### KILL IT WITH FIRE
        if not olr.exists():
            original = auth.get('ontology-local-repo')
            raise FileNotFoundError(
                f'ontology local repo does not exist: {olr}'
                f'path expanded from {original}')
        elif olr.repo.active_branch.name != auth.get('neurons-branch'):
            # FIXME yes indeed having to call Config in a way that is
            # invoked at import time is REALLY REALLY BAD :/
            raise ValueError('git is on the wrong branch! '
                             f'{olr.repo.active_branch}')
        ###

        resources = Path(resources)
        resources.mkdir(
        )  # if we add resources to git, this will error before we delete by accident
        paths = [olr / rp for rp in relpaths]
        for p in paths:
            p.copy_to(resources / p.name)

    else:
        from pathlib import Path
        resources = Path(resources)
        paths = [Path(rp) for rp in relpaths]

    return resources.absolute(), [(resources / p.name).as_posix()
                                  for p in paths]
Beispiel #3
0
def main():
    branch=auth.get('neurons-branch')
    remote = OntId('NIFTTL:') if branch == 'master' else OntId(f'NIFRAW:{branch}/')

    ont_config = ontneurons(remote)
    ont_neurons = ont_config.neurons()

    bn_config = Config('basic-neurons',
                       # FIXME this should probably be pulled in automatically
                       # from the import statements, and it doesn't work even as is
                       # also a chicken and an egg problem here
                       imports=[remote.iri + 'ttl/generated/swanson.ttl'])

    #RDFL = oq.plugin.get('rdflib')  # FIXME ick
    #rdfl = RDFL(bn_config.core_graph, OntId)
    #OntTerm.query.ladd(rdfl)  # FIXME ick
    bn_config.load_existing()
    bn_neurons = bn_config.neurons()
    #OntTerm.query._services = OntTerm.query._services[:-1]  # FIXME ick

    ndl_config = Config('neuron_data_lifted')
    ndl_config.load_existing()  # FIXME this is extremely slow
    ndl_neurons = sorted(ndl_config.neurons())

    resources = auth.get_path('resources')
    cutcsv = resources / 'cut-development.csv'
    with open(cutcsv.as_posix(), 'rt') as f:
        rows = [l for l in csv.reader(f)]

    bc = byCol(rows)

    (_, *labels), *_ = zip(*bc)
    labels_set0 = set(labels)
    ns = []
    skipped = []
    bamscok = (NIFSTD.BAMSC1125,)
    for n in (ont_neurons + ndl_neurons):
        if n.id_ and 'BAMSC' in n.id_:
            if n.id_ not in bamscok:
                skipped.append(n)
                continue

        l = str(n.origLabel)
        if l is not None:
            for replace, match in rename_rules.items():  # HEH
                l = l.replace(match, replace)

        if l in labels:
            n._origLabel = l
            ns.append(n)

    ns = sorted(ns)
    sns = set(n.origLabel for n in ns)

    labels_set1 = labels_set0 - sns

    agen = [c.label for c in bc if c.autogenerated]
    sagen = set(agen)
    added = [c.label for c in bc if c.added]
    sadded = set(added)
    ans = []
    sans = set()
    missed = set()
    _bl = []  # XXX NOTE THE CONTINUE BELOW
    for n in bn_neurons:
        continue  # we actually get all of these with uberon, will map between them later
        # can't use capitalize here because there are proper names that stay uppercase
        l = n.label.replace('(swannt) ',
                            '').replace('Intrinsic',
                                        'intrinsic').replace('Projection',
                                                             'projection')

        for replace, match in rename_rules.items():  # HEH
            l = l.replace(match, replace)

        if l in agen:
            n._origLabel = l
            ans.append(n)
            sans.add(l)

        else:
            missed.add(l)

        _bl.append(l)

    agen_missing = sagen - sans
    labels_set2 = labels_set1 - sans

    nlx_labels = [c.label for c in bc if c.neurolex]
    snlx_labels = set(nlx_labels)

    class SourceCUT(resSource):
        sourceFile = 'nifstd/resources/cut-development.csv'  # FIXME relative to git workingdir...
        source_original = True

    sources = SourceCUT(),
    swanr = rdflib.Namespace(interlex_namespace('swanson/uris/readable/'))
    SWAN = interlex_namespace('swanson/uris/neuroanatomical-terminology/terms/')
    SWAA = interlex_namespace('swanson/uris/neuroanatomical-terminology/appendix/')
    config = Config('cut-development-raw', sources=sources, source_file=relative_path(__file__),
                    prefixes={'swanr': swanr,
                              'SWAN': SWAN,
                              'SWAA': SWAA,})
    ins = [None if OntId(n.id_).prefix == 'TEMP' else n.id_ for n in ns]
    ians = [None] * len(ans)

    with NeuronCUT(CUT.Mammalia):
        mamns = [NeuronCUT(*zap(n.pes), id_=i, label=n._origLabel, override=bool(i)).adopt_meta(n)
                 for i, n in zip(ins + ians, ns + ans)]

    smatch, rem = get_smatch(labels_set2)

    labels_set3 = labels_set2 - smatch
    added_unmapped = sadded & labels_set3

    # TODO preserve the names from neuronlex on import ...
    Neuron.write()
    Neuron.write_python()
    raw_neurons = config.neurons()
    # do this before creating the new config
    # even though we are in theory tripling number of neurons in the current config graph
    # it won't show up in the next config (and this is why we need to reengineer)
    raw_neurons_ind_undep = [n.asUndeprecated().asIndicator() for n in raw_neurons]
    config = Config('cut-development', sources=sources, source_file=relative_path(__file__),
                    prefixes={'swanr': swanr,
                              'SWAN': SWAN,
                              'SWAA': SWAA,})
    # FIXME the call to asUndprecated currenlty triggers addition
    # to the current config and output graph as a side effect (ick!)
    ids_updated_neurons = [n.asUndeprecated() for n in raw_neurons]
    assert len(ids_updated_neurons) == len(raw_neurons)
    Neuron.write()
    Neuron.write_python()
    progress = (len(labels_set0), len(sns), len(sans), len(smatch),
                len(labels_set1), len(labels_set2), len(labels_set3))
    prog_report = ('\nProgress:\n'
                   f'total:            {progress[0]}\n'
                   f'from nlx:         {progress[1]}\n'
                   f'from basic:       {progress[2]}\n'
                   f'from match:       {progress[3]}\n'
                   f'TODO after nlx:   {progress[4]}\n'
                   f'TODO after basic: {progress[5]}\n'
                   f'TODO after match: {progress[6]}\n')
    print(prog_report)
    assert progress[0] == progress[1] + progress[4], 'neurolex does not add up'
    assert progress[4] == progress[2] + progress[5], 'basic does not add up'

    lnlx = set(n.lower() for n in snlx_labels)
    sos = set(n.origLabel.lower() if n.origLabel else None for n in ndl_neurons)  # FIXME load origLabel
    nlx_review = lnlx - sos
    nlx_missing = sorted(nlx_review)
    print(f'\nNeuroLex listed as source but no mapping (n = {len(nlx_review)}):')
    _ = [print(l) for l in nlx_missing]

    partial = {k:v for k, v in rem.items() if v and v not in terminals}
    print(f'\nPartially mapped (n = {len(partial)}):')
    if partial:
        mk = max((len(k) for k in partial.keys())) + 2
        for k, v in sorted(partial.items()):
            print(f'{k:<{mk}} {v!r}')
            #print(f'{k!r:<{mk}}{v!r}')
        #pprint(partial, width=200)
    unmapped = sorted(labels_set3)
    print(f'\nUnmapped (n = {len(labels_set3)}):')
    _ = [print(l) for l in unmapped]

    no_location = [n for n in Neuron.neurons()
                   if noneMembers((ilxtr.hasSomaLocatedIn, ilxtr.hasSomaLocatedInLayer), *n.unique_predicates)]
    if __name__ == '__main__':
        review_rows = export_for_review(config, unmapped, partial, nlx_missing)
        breakpoint()

    return config, unmapped, partial, nlx_missing
Beispiel #4
0
def export_for_review(config, unmapped, partial, nlx_missing,
                      filename='cuts-review.csv',
                      with_curies=False):
    neurons = sorted(config.neurons())
    predicates = sorted(set(e for n in neurons
                            for me in n.edges
                            for e in (me if isinstance(me, tuple) else (me,))))  # columns
    q = graphBase.core_graph.transitive_subjects(rdfs.subPropertyOf, ilxtr.hasPhenotype)
    all_predicates = set(s for s in q)
    extra_predicates = sorted(p for p in all_predicates if p not in predicates)

    col_labels = {p.e:p.eLabel for n in neurons
                  for mp in n.pes
                  for p in (mp.pes if isinstance(mp, LogicalPhenotype) else (mp,))}

    header = (['curie', 'label'] +
              [col_labels[p] for p in predicates] +
              ['Status', 'PMID', 'synonyms', 'definition'] +
              [OntId(p).suffix for p in extra_predicates if not skip_pred(p)])

    def neuron_to_review_row(neuron, cols=predicates):  # TODO column names
        _curie = neuron.ng.qname(neuron.id_)
        curie = None if 'TEMP:' in _curie else _curie
        row = [curie, neuron.origLabel]
        for pdim in cols:  # pdim -> phenotypic dimension
            if pdim in neuron:
                #print('>>>>>>>>>>>>>', pdim, neuron)
                #if any(isinstance(p, LogicalPhenotype) for p in neuron):
                    #breakpoint()
                row.append(','.join(sorted([f'{_._pClass.qname}|{_.pLabel}'
                                            if with_curies else
                                            _.pLabel
                                            for _ in neuron[pdim]] if
                                           isinstance(neuron[pdim], list) else
                                           [neuron[pdim].pLabel])))
                #if col == ilxtr.hasLayerLocationPhenotype:
                    #derp = neuron[col]
                    #log = [p for p in derp if isinstance(p, LogicalPhenotype)]
                    #if log:
                        #print(log, row)
                        #breakpoint()
            else:
                row.append(None)

        return row

    #[n for n in neurons]
    resources = auth.get_path('resources')
    reviewcsv = resources / filename
    rows = [neuron_to_review_row(neuron) for neuron in neurons]

    for i, row in enumerate(rows):
        label = row[1]
        if label in unmapped:
            row.append('Unmapped')
        elif label in partial:
            rem = partial[label]
            row.append(f'Partial: {rem!r}')

        if label in nlx_missing:
            row.append('Could not find NeuroLex mapping')
        else:
            row.append(None)

        row.append(None)  # pmid
        if i < len(neurons):
            n = neurons[i]
            # FIXME
            row.append(','.join(n.config.out_graph[n.id_:NIFRID.synonym:]))  # syn
            row.append(','.join(n.config.out_graph[n.id_:definition:]))  # def

    rows = sorted(rows, key=lambda r:(1 if r[1] is None else 0, str(r[1])))
    incomplete = [[None, u] + [None] * (len(rows[0]) - 2) + ['Unmapped', None, None] for u in unmapped]
    incomplete = sorted(incomplete, key=lambda r:r[1])
    rows += incomplete
    with open(reviewcsv.as_posix(), 'wt') as f:
        writer = csv.writer(f, lineterminator='\n')
        writer.writerow(header)
        writer.writerows(rows)

    return [header] + rows
Beispiel #5
0
def main():
    # load in our existing graph
    # note: while it would be nice to allow specification of phenotypes to be decoupled
    # from insertion into the graph... maybe we could enable this, but it definitely seems
    # to break a number of nice features... and we would need the phenotype graph anyway
    Config('temporary-graph')
    EXISTING_GRAPH = graphBase.in_graph
    #EXISTING_GRAPH = rdflib.Graph()
    #graphBase.in_graph = EXISTING_GRAPH
    #graphBase.core_graph = EXISTING_GRAPH
    local_prefix = auth.get_path('ontology-local-repo') / 'ttl'
    sources = (f'{local_prefix}/NIF-Neuron-Defined.ttl',
               f'{local_prefix}/NIF-Neuron.ttl',
               f'{local_prefix}/NIF-Neuron-Phenotype.ttl',
               f'{local_prefix}/phenotype-core.ttl',
               f'{local_prefix}/phenotypes.ttl',
               f'{local_prefix}/hbp-special.ttl')
    for file in sources:
        EXISTING_GRAPH.parse(file, format='turtle')
    #EXISTING_GRAPH.namespace_manager.bind('PR', makePrefixes('PR')['PR'])

    #graphBase.core_graph = EXISTING_GRAPH
    #graphBase.out_graph = rdflib.Graph()
    graphBase.__import_name__ = 'neurondm.lang'

    proot = graphBase.core_graph.qname(PHENO_ROOT)
    mroot = graphBase.core_graph.qname(MOD_ROOT)
    graphBase._predicates, _psupers = getPhenotypePredicates(
        EXISTING_GRAPH, proot, mroot)

    g = makeGraph('merged',
                  prefixes={k: str(v)
                            for k, v in EXISTING_GRAPH.namespaces()},
                  graph=EXISTING_GRAPH)
    reg_neurons = list(g.g.subjects(rdfs.subClassOf, _NEURON_CLASS))
    tc_neurons = [
        _ for (_, ) in
        g.g.query('SELECT DISTINCT ?match WHERE {?match rdfs:subClassOf+ %s}' %
                  g.g.qname(_NEURON_CLASS))
    ]
    def_neurons = g.get_equiv_inter(_NEURON_CLASS)

    nodef = sorted(set(tc_neurons) - set(def_neurons))
    og1 = MeasuredNeuron.out_graph = rdflib.Graph(
    )  # there is only 1 out_graph at a time, load and switch

    mns = [MeasuredNeuron(id_=n) for n in nodef]
    mnsp = [n for n in mns if n.pes]
    graphBase.out_graph = rdflib.Graph(
    )  # XXX NEVER DO THIS IT IS EVIL ZALGO WILL EAT YOUR FACE
    graphBase.ng.g = graphBase.out_graph
    # and he did, had to swtich to graphBase for exactly this reason >_<
    dns = [Neuron(id_=n) for n in sorted(def_neurons)]
    #dns += [Neuron(*m.pes) if m.pes else m.id_ for m in mns]
    dns += [Neuron(*m.pes) for m in mns if m.pes]

    # reset everything for export
    config = Config('phenotype-direct', source_file=relative_path(__file__))
    #Neuron.out_graph = graphBase.out_graph  # each subclass of graphBase has a distinct out graph IF it was set manually
    #Neuron.out_graph = rdflib.Graph()
    #ng = makeGraph('', prefixes={}, graph=Neuron.out_graph)
    #ng.filename = Neuron.ng.filename
    Neuron.mro()[1].existing_pes = {
    }  # wow, new adventures in evil python patterns mro()[1]
    dns = [Neuron(*d.pes) for d in set(dns)
           ]  # TODO remove the set and use this to test existing bags?
    #from neurons.lang import WRITEPYTHON
    #WRITEPYTHON(sorted(dns))
    #ng.add_ont(TEMP['defined-neurons'], 'Defined Neurons', 'NIFDEFNEU',
    #'VERY EXPERIMENTAL', '0.0.0.1a')
    #ng.add_trip(TEMP['defined-neurons'], owl.imports, rdflib.URIRef('file:///home/tom/git/NIF-Ontology/ttl/phenotype-core.ttl'))
    #ng.add_trip(TEMP['defined-neurons'], owl.imports, rdflib.URIRef('file:///home/tom/git/NIF-Ontology/ttl/phenotypes.ttl'))
    #ng.write()
    ontinfo = (
        (Neuron.ng.ontid, rdf.type, owl.Ontology),
        (Neuron.ng.ontid, rdfs.label,
         rdflib.Literal('phenotype direct neurons')),
        (Neuron.ng.ontid, rdfs.comment,
         rdflib.Literal('Neurons derived directly from phenotype definitions')
         ),
    )
    [Neuron.out_graph.add(t) for t in ontinfo]
    Neuron.write()
    Neuron.write_python()
    bads = [
        n for n in Neuron.ng.g.subjects(rdf.type, owl.Class)
        if len(list(Neuron.ng.g.predicate_objects(n))) == 1
    ]
    if __name__ == '__main__':
        breakpoint()

    return config