Ejemplo n.º 1
0
def _ontology_data_files():
    resources = 'resources'
    relpaths = [
        'ttl/phenotype-core.ttl',
        'ttl/phenotype-indicators.ttl',
        'ttl/phenotypes.ttl',
        'ttl/generated/part-of-self.ttl',
    ]
    if RELEASE:
        from augpathlib import RepoPath as Path
        ### KILL IT WITH FIRE
        try:
            from neurondm.core import auth  ### this is NOT ok
        except Exception:
            # can't catch an error that you can never import because
            # it will be raised before you can import it ... SIGH
            import orthauth as oa
            from pyontutils.config import auth as pauth
            auth = oa.configure(Path('neurondm/auth-config.py').resolve(),
                                include=pauth)
        ###

        olr = Path(auth.get_path('ontology-local-repo'))

        ### KILL IT WITH FIRE
        if not olr.exists():
            original = auth.get('ontology-local-repo')
            raise FileNotFoundError(
                f'ontology local repo does not exist: {olr}'
                f'path expanded from {original}')
        elif olr.repo.active_branch.name != auth.get('neurons-branch'):
            # FIXME yes indeed having to call Config in a way that is
            # invoked at import time is REALLY REALLY BAD :/
            raise ValueError('git is on the wrong branch! '
                             f'{olr.repo.active_branch}')
        ###

        resources = Path(resources)
        resources.mkdir(
        )  # if we add resources to git, this will error before we delete by accident
        paths = [olr / rp for rp in relpaths]
        for p in paths:
            p.copy_to(resources / p.name)

    else:
        from pathlib import Path
        resources = Path(resources)
        paths = [Path(rp) for rp in relpaths]

    return resources.absolute(), [(resources / p.name).as_posix()
                                  for p in paths]
Ejemplo n.º 2
0
def config(
        remote_base='https://raw.githubusercontent.com/SciCrunch/NIF-Ontology/',
        local_base=None,  # auth.get_path('ontology-local-repo') by default
        branch=auth.get('neurons-branch'),
        core_graph_paths=['ttl/phenotype-core.ttl', 'ttl/phenotypes.ttl'],
        core_graph=None,
        in_graph_paths=tuple(),
        out_graph_path='/tmp/_Neurons.ttl',
        out_imports=['ttl/phenotype-core.ttl'],
        out_graph=None,
        prefixes=tuple(),
        force_remote=False,
        checkout_ok=ont_checkout_ok,
        scigraph=None,  # defaults to auth.get('scigraph-api')
        iri=None,
        sources=tuple(),
        source_file=None,
        use_local_import_paths=True,
        ignore_existing=True):
    """ Wraps graphBase.configGraphIO to provide a set of sane defaults
        for input ontologies and output files. """
    graphBase.configGraphIO(remote_base=remote_base,
                            local_base=local_base,
                            branch=branch,
                            core_graph_paths=core_graph_paths,
                            core_graph=core_graph,
                            in_graph_paths=in_graph_paths,
                            out_graph_path=out_graph_path,
                            out_imports=out_imports,
                            out_graph=out_graph,
                            prefixes=prefixes,
                            force_remote=force_remote,
                            checkout_ok=checkout_ok,
                            scigraph=scigraph,
                            iri=iri,
                            sources=sources,
                            source_file=source_file,
                            use_local_import_paths=use_local_import_paths,
                            ignore_existing=ignore_existing)

    pred = graphBase._predicates
    return pred  # because the python module system is opinionated :/
Ejemplo n.º 3
0
class AllenCellTypes:

    branch = auth.get('neurons-branch')

    prefixes = {
        **{
            'JAX': 'http://jaxmice.jax.org/strain/',
            'MMRRC': 'http://www.mmrrc.org/catalog/getSDS.jsp?mmrrc_id=',
            'AllenTL': 'http://api.brain-map.org/api/v2/data/TransgenicLine/'
        },
        **makePrefixes('definition', 'ilxtr', 'owl')
    }
    prefixes[
        'AllenTransgenicLine'] = 'http://api.brain-map.org/api/v2/data/TransgenicLine/'

    def __init__(self, input, name):
        self.name = name
        self.ns = {k: rdflib.Namespace(v) for k, v in self.prefixes.items()}
        self.neuron_data = input
        self.tag_names = set()
        # self.sample_neuron()

    def avoid_url_conversion(self, string):
        if not string:
            return string
        return re.sub("/| |\(", '_', string).replace(')', '')

    def sample_neuron(self, ):
        Neuron(
            Phenotype('ilxtr:apical',
                      'ilxtr:hasPhenotype',
                      label='apical - truncated'),
            Phenotype('JAX:12345',
                      'ilxtr:hasExperimentalPhenotype',
                      label='prefix+stock_number'),
        )
        print(graphBase.ttl())

    def cell_phenotypes(self, cell_specimen):
        cell_mappings = {
            'hemisphere': 'ilxtr:hasSomaLocationLaterality',
            # 'name': 'ilxtr:hasPhenotype',
        }
        phenotypes = []
        for name, value in cell_specimen.items():
            mapping = cell_mappings.get(name)
            if mapping and value:
                if name == 'hemisphere':
                    if value.lower() == 'left':
                        curie = 'UBERON:0002812'
                    elif value.lower() == 'right':
                        curie = 'UBERON:0002813'
                    else:
                        raise ValueError('got stuck with unkown hemisphere ' +
                                         value)
                phenotypes.append(Phenotype(
                    curie,
                    mapping,
                ))
        return phenotypes

    # TODO: wrong phenotype
    def structure_phenotypes(self, cell_specimen):
        struc = cell_specimen['structure']
        phenotypes = []
        acronym = self.avoid_url_conversion(struc['acronym'])
        curie = 'MBA:' + str(struc['id'])
        if struc:
            phenotypes.append(
                Phenotype(curie, 'ilxtr:hasSomaLocatedIn', label=acronym), )
        return phenotypes

    def donor_phenotypes(self, cell_specimen):
        donor_mappings = {'sex_full_name': 'ilxtr:hasBiologicalSex'}
        phenotypes = []
        for name, value in cell_specimen['donor'].items():
            mapping = donor_mappings.get(name)
            if mapping and value:
                if name == 'sex_full_name':
                    if value.lower() == 'female':
                        curie = 'PATO:0000383'
                    elif value.lower() == 'male':
                        curie = 'PATO:0000384'
                    else:
                        raise ValueError('unkown sex ' + str(value))
                phenotypes.append(Phenotype(
                    curie,
                    mapping,
                ), )
        return phenotypes

    # TODO: Figure how to add: description, name and type
    def transgenic_lines_phenotypes(self, cell_specimen):
        transgenic_mappings = {}
        phenotypes = []
        for tl in cell_specimen['donor']['transgenic_lines']:
            prefix = tl['transgenic_line_source_name']
            suffix = tl['stock_number'] if tl['stock_number'] else str(
                tl['id'])
            name = self.avoid_url_conversion(tl['name'])
            _type = tl['transgenic_line_type_name']
            if _type == 'driver':
                if 'CreERT2' in name:  # FIXME from structured instead of name?
                    pred = ilxtr.hasDriverExpressionInducedPhenotype
                else:
                    pred = 'ilxtr:hasDriverExpressionPhenotype'
            elif _type == 'reporter':
                pred = 'ilxtr:hasReporterExpressionPhenotype'
            else:
                pred = 'ilxtr:hasExpressionPhenotype'

            line_names = []
            if prefix and suffix and prefix in ['AIBS', 'MMRRC', 'JAX']:
                if prefix == 'AIBS':
                    prefix = 'AllenTL'
                iri = self.ns[prefix][suffix]
                phenotypes.append(Phenotype(iri, pred))
        return phenotypes

    # TODO: search if description exists
    # TODO: Create mapping for all possible types
    # TODO: Fork negatives to NegPhenotype
    def specimen_tags_phenotypes(self, cell_specimen):
        pred = 'ilxtr:hasDendriteMorphologicalPhenotype'
        specimen_tag_mappings = {
            'spiny':
            Phenotype('ilxtr:SpinyPhenotype', pred),
            'aspiny':
            NegPhenotype('ilxtr:SpinyPhenotype', pred),
            'sparsely spiny':
            LogicalPhenotype(
                AND, Phenotype('ilxtr:SpinyPhenotype', pred),
                Phenotype('PATO:0001609', 'ilxtr:hasPhenotypeModifier')),
            'apicalIntact':
            Phenotype('ilxtr:ApicalDendritePhenotype',
                      'ilxtr:hasMorphologicalPhenotype'),
            'apicalTruncated':
            LogicalPhenotype(
                AND,
                Phenotype('ilxtr:ApicalDendritePhenotype',
                          'ilxtr:hasMorphologicalPhenotype'),
                Phenotype('PATO:0000936', 'ilxtr:hasPhenotypeModifier')),
            'apicalNa':
            NegPhenotype('ilxtr:ApicalDendritePhenotype',
                         'ilxtr:hasMorphologicalPhenotype'
                         ),  # NA means there was no apical dendrite
        }
        phenotypes = []
        for tag in cell_specimen['specimen_tags']:
            if 'dendrite type' in tag['name']:
                one_two = tag['name'].split(' - ')[1]
                #if ' ' in one_two:
                #one, two = one_two.split(' ')
                #name = one + two.capitalize()
                #else:
                name = one_two
            else:
                one, two = tag['name'].split(' - ')
                #if two == 'NA':  # apical - NA
                #continue
                name = one + two.capitalize()

            self.tag_names.add(tag['name'])
            # if phenotype == '+':
            if name not in specimen_tag_mappings:
                raise ValueError(name)

            phenotypes.append(
                specimen_tag_mappings[name] if name in
                specimen_tag_mappings else Phenotype('ilxtr:' + name, pred))
            # elif phenotype == '-': phenotypes.append(NegPhenotype(...))

        return phenotypes

    # TODO: check to see if specimen_id is really the priority
    def cell_soma_locations_phenotypes(self, cell_specimen):
        cell_soma_mappings = {}
        phenotypes = []
        for csl in cell_specimen['cell_soma_locations']:
            location = csl['id']
            phenotypes.append(
                Phenotype(
                    'ilxtr:' + str(location),
                    'ilxtr:hasSomaLocatedIn',
                ))
        return phenotypes

    def add_mouse_lineage(self, cell_specimen):
        phenotypes = [Phenotype('NCBITaxon:10090', 'ilxtr:hasInstanceInTaxon')]
        return phenotypes

    def build_phenotypes(self, cell_specimen):
        phenotype_functions = [
            self.cell_phenotypes,
            self.structure_phenotypes,
            self.donor_phenotypes,
            self.transgenic_lines_phenotypes,
            self.specimen_tags_phenotypes,
            self.add_mouse_lineage,
            # self.cell_soma_locations_phenotypes, # deprecated
        ]
        phenotypes = []
        for func in phenotype_functions:
            phenotypes.extend(func(cell_specimen))
        return phenotypes

    def make_config(self):
        # have to call Config here because transgenic lines doesn't exist
        self.config = Config(
            name=self.name,
            imports=[
                f'NIFRAW:{self.branch}/ttl/generated/allen-transgenic-lines.ttl'
            ],
            prefixes=self.prefixes,
            branch=self.branch,
            sources=tuple(),  # TODO insert the link to the query...
            source_file=relative_path(__file__, no_wd_value=__file__))

    def build_neurons(self):
        instances = []
        dids = []
        for cell_specimen in self.neuron_data:
            neuron = NeuronACT(*self.build_phenotypes(cell_specimen))
            did = AIBSSPEC[str(cell_specimen['id'])]
            dids.append(did)
            instances.append((did, rdf.type, owl.NamedIndividual))
            instances.append((did, rdf.type, neuron.identifier))

        print(sorted(self.tag_names))
        NeuronACT.write()
        NeuronACT.write_python()
        self.build_instances(instances, dids)

    def build_instances(self, instances, dids):
        folder = Path(self.config.out_graph_path()).parent
        # WOW do I need to implement the new/better way of
        # managing writing collections of neurons to graphs
        neuron_uri = next(NeuronACT.out_graph[:rdf.type:owl.Ontology])
        name = 'allen-cell-instances.ttl'
        base, _ = neuron_uri.rsplit('/', 1)
        uri = rdflib.URIRef(base + '/' + name)
        metadata = ((uri, rdf.type, owl.Ontology), )
        instance_graph = OntGraph(path=folder / name)
        instance_graph.bind('AIBSSPEC', AIBSSPEC)
        [instance_graph.add(t) for t in metadata]
        [instance_graph.add(t) for t in instances]
        [
            instance_graph.add(t)
            for t in allDifferent(None, distinctMembers(*dids))
        ]
        instance_graph.write()

    def build_transgenic_lines(self):
        """
        init class     |  "transgenic_line_source_name":"stock_number" a Class
        add superClass |  rdfs:subClassOf ilxtr:transgenicLine
        add *order*    |  ilxtr:useObjectProperty ilxtr:<order>
        add name       |  rdfs:label "name"
        add def        |  definition: "description"
        add transtype  |  rdfs:hasTransgenicType "transgenic_line_type_name"
        """

        triples = []
        for cell_specimen in self.neuron_data:
            for tl in cell_specimen['donor']['transgenic_lines']:
                _id = tl['stock_number'] if tl['stock_number'] else tl['id']
                prefix = tl['transgenic_line_source_name']
                line_type = tl['transgenic_line_type_name']
                if line_type == 'driver' and 'CreERT2' in tl['name']:
                    line_type = 'inducibleDriver'

                if prefix not in ['JAX', 'MMRRC', 'AIBS']:
                    print(tc.red('WARNING:'), 'unknown prefix', prefix,
                          json.dumps(tl, indent=4))
                    continue
                elif prefix == 'AIBS':
                    prefix = 'AllenTL'

                _class = self.ns[prefix][str(_id)]
                triples.append((_class, rdf.type, owl.Class))
                triples.append(
                    (_class, rdfs.label, rdflib.Literal(tl['name'])))
                triples.append(
                    (_class, definition, rdflib.Literal(tl['description'])))
                triples.append((_class, rdfs.subClassOf, ilxtr.transgenicLine))
                triples.append((_class, ilxtr.hasTransgenicType,
                                ilxtr[line_type + 'Line']))

        # TODO aspects.ttl?
        transgenic_lines = simpleOnt(
            filename='allen-transgenic-lines',
            local_base=graphBase.local_base,
            path='ttl/generated/',
            prefixes=self.prefixes,
            triples=triples,
            comment='Allen transgenic lines for cell types',
            branch=self.branch,
            calling__file__=__file__,
        )

        transgenic_lines._graph.write()
Ejemplo n.º 4
0
def main():
    branch=auth.get('neurons-branch')
    remote = OntId('NIFTTL:') if branch == 'master' else OntId(f'NIFRAW:{branch}/')

    ont_config = ontneurons(remote)
    ont_neurons = ont_config.neurons()

    bn_config = Config('basic-neurons',
                       # FIXME this should probably be pulled in automatically
                       # from the import statements, and it doesn't work even as is
                       # also a chicken and an egg problem here
                       imports=[remote.iri + 'ttl/generated/swanson.ttl'])

    #RDFL = oq.plugin.get('rdflib')  # FIXME ick
    #rdfl = RDFL(bn_config.core_graph, OntId)
    #OntTerm.query.ladd(rdfl)  # FIXME ick
    bn_config.load_existing()
    bn_neurons = bn_config.neurons()
    #OntTerm.query._services = OntTerm.query._services[:-1]  # FIXME ick

    ndl_config = Config('neuron_data_lifted')
    ndl_config.load_existing()  # FIXME this is extremely slow
    ndl_neurons = sorted(ndl_config.neurons())

    resources = auth.get_path('resources')
    cutcsv = resources / 'cut-development.csv'
    with open(cutcsv.as_posix(), 'rt') as f:
        rows = [l for l in csv.reader(f)]

    bc = byCol(rows)

    (_, *labels), *_ = zip(*bc)
    labels_set0 = set(labels)
    ns = []
    skipped = []
    bamscok = (NIFSTD.BAMSC1125,)
    for n in (ont_neurons + ndl_neurons):
        if n.id_ and 'BAMSC' in n.id_:
            if n.id_ not in bamscok:
                skipped.append(n)
                continue

        l = str(n.origLabel)
        if l is not None:
            for replace, match in rename_rules.items():  # HEH
                l = l.replace(match, replace)

        if l in labels:
            n._origLabel = l
            ns.append(n)

    ns = sorted(ns)
    sns = set(n.origLabel for n in ns)

    labels_set1 = labels_set0 - sns

    agen = [c.label for c in bc if c.autogenerated]
    sagen = set(agen)
    added = [c.label for c in bc if c.added]
    sadded = set(added)
    ans = []
    sans = set()
    missed = set()
    _bl = []  # XXX NOTE THE CONTINUE BELOW
    for n in bn_neurons:
        continue  # we actually get all of these with uberon, will map between them later
        # can't use capitalize here because there are proper names that stay uppercase
        l = n.label.replace('(swannt) ',
                            '').replace('Intrinsic',
                                        'intrinsic').replace('Projection',
                                                             'projection')

        for replace, match in rename_rules.items():  # HEH
            l = l.replace(match, replace)

        if l in agen:
            n._origLabel = l
            ans.append(n)
            sans.add(l)

        else:
            missed.add(l)

        _bl.append(l)

    agen_missing = sagen - sans
    labels_set2 = labels_set1 - sans

    nlx_labels = [c.label for c in bc if c.neurolex]
    snlx_labels = set(nlx_labels)

    class SourceCUT(resSource):
        sourceFile = 'nifstd/resources/cut-development.csv'  # FIXME relative to git workingdir...
        source_original = True

    sources = SourceCUT(),
    swanr = rdflib.Namespace(interlex_namespace('swanson/uris/readable/'))
    SWAN = interlex_namespace('swanson/uris/neuroanatomical-terminology/terms/')
    SWAA = interlex_namespace('swanson/uris/neuroanatomical-terminology/appendix/')
    config = Config('cut-development-raw', sources=sources, source_file=relative_path(__file__),
                    prefixes={'swanr': swanr,
                              'SWAN': SWAN,
                              'SWAA': SWAA,})
    ins = [None if OntId(n.id_).prefix == 'TEMP' else n.id_ for n in ns]
    ians = [None] * len(ans)

    with NeuronCUT(CUT.Mammalia):
        mamns = [NeuronCUT(*zap(n.pes), id_=i, label=n._origLabel, override=bool(i)).adopt_meta(n)
                 for i, n in zip(ins + ians, ns + ans)]

    smatch, rem = get_smatch(labels_set2)

    labels_set3 = labels_set2 - smatch
    added_unmapped = sadded & labels_set3

    # TODO preserve the names from neuronlex on import ...
    Neuron.write()
    Neuron.write_python()
    raw_neurons = config.neurons()
    # do this before creating the new config
    # even though we are in theory tripling number of neurons in the current config graph
    # it won't show up in the next config (and this is why we need to reengineer)
    raw_neurons_ind_undep = [n.asUndeprecated().asIndicator() for n in raw_neurons]
    config = Config('cut-development', sources=sources, source_file=relative_path(__file__),
                    prefixes={'swanr': swanr,
                              'SWAN': SWAN,
                              'SWAA': SWAA,})
    # FIXME the call to asUndprecated currenlty triggers addition
    # to the current config and output graph as a side effect (ick!)
    ids_updated_neurons = [n.asUndeprecated() for n in raw_neurons]
    assert len(ids_updated_neurons) == len(raw_neurons)
    Neuron.write()
    Neuron.write_python()
    progress = (len(labels_set0), len(sns), len(sans), len(smatch),
                len(labels_set1), len(labels_set2), len(labels_set3))
    prog_report = ('\nProgress:\n'
                   f'total:            {progress[0]}\n'
                   f'from nlx:         {progress[1]}\n'
                   f'from basic:       {progress[2]}\n'
                   f'from match:       {progress[3]}\n'
                   f'TODO after nlx:   {progress[4]}\n'
                   f'TODO after basic: {progress[5]}\n'
                   f'TODO after match: {progress[6]}\n')
    print(prog_report)
    assert progress[0] == progress[1] + progress[4], 'neurolex does not add up'
    assert progress[4] == progress[2] + progress[5], 'basic does not add up'

    lnlx = set(n.lower() for n in snlx_labels)
    sos = set(n.origLabel.lower() if n.origLabel else None for n in ndl_neurons)  # FIXME load origLabel
    nlx_review = lnlx - sos
    nlx_missing = sorted(nlx_review)
    print(f'\nNeuroLex listed as source but no mapping (n = {len(nlx_review)}):')
    _ = [print(l) for l in nlx_missing]

    partial = {k:v for k, v in rem.items() if v and v not in terminals}
    print(f'\nPartially mapped (n = {len(partial)}):')
    if partial:
        mk = max((len(k) for k in partial.keys())) + 2
        for k, v in sorted(partial.items()):
            print(f'{k:<{mk}} {v!r}')
            #print(f'{k!r:<{mk}}{v!r}')
        #pprint(partial, width=200)
    unmapped = sorted(labels_set3)
    print(f'\nUnmapped (n = {len(labels_set3)}):')
    _ = [print(l) for l in unmapped]

    no_location = [n for n in Neuron.neurons()
                   if noneMembers((ilxtr.hasSomaLocatedIn, ilxtr.hasSomaLocatedInLayer), *n.unique_predicates)]
    if __name__ == '__main__':
        review_rows = export_for_review(config, unmapped, partial, nlx_missing)
        breakpoint()

    return config, unmapped, partial, nlx_missing