Ejemplo n.º 1
0
        def map(attr, predicate):
            cell = getattr(self, attr)()
            value = cell.value
            if value:
                for iri, label in list(self.sheet.convert_cell(value, predicate=predicate)):
                    if ',' not in value and label != value:
                        log.warning(f'label mismatch {label!r} != {value!r}')

                    if iri is None:
                        if label == 'bed nucleus of stria terminalis juxtacapsular nucleus':
                            iri = OntTerm('UBERON:0011173', label='anterior division of bed nuclei of stria terminalis')
                        else:
                            log.debug(f'nothing found for {label}')
                            continue

                    if isinstance(iri, tuple):
                        op, *rest = iri  # TODO need combinators in future version for union/intersection of object
                        out = (op, *(NegPhenotype(r, predicate)
                                     if isinstance(r, LacksObject) else
                                     Phenotype(r, predicate)
                                     for r in rest if r is not None))
                        yield out
                        continue
                    elif isinstance(iri, LacksObject):
                        p = NegPhenotype(iri.asURIRef(), predicate)
                    else:
                        p = Phenotype(iri, predicate)

                    yield p.asIndicator()
Ejemplo n.º 2
0
    def mapCell(cls, cell, syns=False):
        search_prefixes = (
            'UBERON',
            'CHEBI',
            'PR',
            'NCBITaxon',
            'NCBIGene',
            'ilxtr',
            'NIFEXT',
            'SAO',
            'NLXMOL',
            'BIRNLEX',
        )

        if ':' in cell and ' ' not in cell:
            log.debug(cell)
            if 'http' in cell:
                if cell.startswith('http'):
                    t = OntTerm(iri=cell)
                else:
                    return None, None  # garbage with http inline
            else:
                t = OntTerm(cell, exclude_prefix=(
                    'FMA', ))  # FIXME need better error message in ontquery

            return t.u, t.label

        result = [
            r for r in cls.sgv.findByTerm(
                cell, searchSynonyms=syns, prefix=search_prefixes)
            if not r['deprecated']
        ]
        #printD(cell, result)
        if not result:
            log.debug(f'{cell}')
            maybe = list(cls.query(label=cell, exclude_prefix=('FMA', )))
            if maybe:
                t = maybe[0]
                return t.u, t.label
            elif not syns:
                return cls.mapCell(cell, syns=True)
            else:
                return None, None
        elif len(result) > 1:
            #printD('WARNING', result)
            result = select_by_curie_rank(result)
        else:
            result = result[0]

        return rdflib.URIRef(result['iri']), result['labels'][0]
Ejemplo n.º 3
0
    def neuron_existing(self):
        curie = self.curie().value
        if curie:
            id_ = OntTerm(curie).u
            match = self.sheet.existing.get(id_)
            if match:
                return match

        al = self.alignment_label().value
        return self.sheet.existing.get(al if al else self.label().value)
Ejemplo n.º 4
0
    def neuron_existing(self):
        curie = self.curie().value
        if curie:
            id_ = OntTerm(curie).u
            match = self.sheet.existing.get(id_)
            if match:
                match.id_ = id_
                return match

        al = self.alignment_label().value
        nrn = self.sheet.existing.get(al if al else self.label().value)

        if nrn and curie:
            nrn.id_ = id_

        return nrn
Ejemplo n.º 5
0
    def entailed_molecular_phenotypes(self):
        cell = self.exhasmolecularphenotype()
        labels = cell.value.split(',')

        # FIXME hack
        yield OntTerm(curie='ilxtr:GABAReceptor').asPhenotype()
        yield OntTerm(curie='ilxtr:glutamateReceptor').asPhenotype()

        for label in labels:
            label = label.strip()
            term = self.sheet.sgv.findByTerm(label)
            if term:
                ot = OntTerm(iri=term[0]['iri'])
                p = ot.asPhenotype()
                yield p
                if ot.curie == 'NLXMOL:1006001':
                    yield OntTerm(curie='ilxtr:GABAReceptor').asPhenotype()
                elif ot.curie == 'SAO:1164727693':
                    yield OntTerm(curie='ilxtr:glutamateReceptor').asPhenotype()
Ejemplo n.º 6
0
    def mapCell(cls, cell, syns=False, predicate=None):
        search_prefixes = ('UBERON', 'CHEBI', 'PR', 'NCBIGene', 'NCBITaxon',
                           'ilxtr', 'NIFEXT', 'SAO', 'NLXMOL', 'BIRNLEX',)

        if predicate and predicate in Phenotype._molecular_predicates:
            # uberon syns pollute molecular results so move it to one before birnlex
            ub, *rest, b = search_prefixes
            search_prefixes = (*rest, ub, b)

        if cell == 'contralateral':
            return ilxtr.Contralateral, cell  # XXX FIXME only BSPO has this right now
        elif cell.lower() == 'gaba receptor role':
            return ilxtr.GABAReceptor, cell

        if ':' in cell and ' ' not in cell:
            log.debug(cell)
            if 'http' in cell:
                if cell.startswith('http'):
                    t = OntTerm(iri=cell)
                else:
                    return None, None  # garbage with http inline
            else:
                t = OntTerm(cell, exclude_prefix=('FMA',))  # FIXME need better error message in ontquery

            return t.u, t.label

        if cell in ('Vertebrata', ):  # search syns
            syns = True

        def rank_mask(r):
            """
            create a boolean array testing if the current entry
            starts with the prefixes in order and what you will
            get out is arrays where the nth element is true if
            the nth prefix is matched which will then be sorted by n
            1 0 0 0 0 0 0 1 \\
            1 0 0 0 0 0 0 0 \\
            0 1 0 0 0 0 0 0 \\
            0 0 1 0 0 0 0 0 \\
            0 0 0 1 0 0 0 0 \\
            0 0 0 0 1 0 0 0 \\
            """
            # why did it take so long to think of this?
            return (
                *(r['curie'].startswith(p) for p in search_prefixes),
                'labels' in r and cell in r['labels'],
            )

        result = sorted([r for r in cls.sgv.findByTerm(cell, searchSynonyms=syns, prefix=search_prefixes)
                         if not r['deprecated']], key=rank_mask, reverse=True)
        #printD(cell, result)
        if not result:
            log.debug(f'{cell}')
            maybe = list(cls.query(label=cell, exclude_prefix=('FMA',)))
            if maybe:
                t = maybe[0]
                return t.u, t.label
            elif not syns:
                return cls.mapCell(cell, syns=True, predicate=predicate)
            else:
                return None, None
        elif len(result) > 1:
            #printD('WARNING', result)
            result = result[0] #select_by_curie_rank(result)
        else:
            result = result[0]

        return rdflib.URIRef(result['iri']), result['labels'][0]
Ejemplo n.º 7
0
        def loop_internal(j, header, cell):
            nonlocal id
            nonlocal current_neuron
            nonlocal do_release
            notes = list(process_note(get_note(i + 1, j, self.cells_index)))  # + 1 since headers is removed
            if notes and not header.startswith('has'):
                _predicate = self.convert_other(header)
                if cell:
                    _object = rdflib.Literal(cell)  # FIXME curies etc.
                else:
                    _object = rdf.nil
                other_notes[_predicate, _object] = notes

            if header == 'curie':
                id = OntId(cell).u if cell else None
                return
            elif header == 'label':
                if id == OntId('NIFEXT:66').u:
                    breakpoint()
                label_neuron = cell
                if cell in self.existing:
                    current_neuron = self.existing[cell]
                elif cell:
                    # TODO
                    self.new.append(cell)
                else:
                    raise ValueError(cell)  # wat
                return
            elif header == 'Status':
                # TODO
                if cell == 'Yes':
                    do_release = True
                elif cell == 'Maybe':
                    pass
                elif cell == 'Not yet':
                    pass
                elif cell == 'Delete':
                    pass
                else:
                    pass

                return
            elif header == 'PMID':
                # TODO
                return
            elif header == 'Other reference':
                # TODO
                return
            elif header == 'Other label':
                # TODO
                return
            elif header == 'definition':
                return  # FIXME single space differences between the spreadsheet and the source

                if cell:
                    definition_neuron = rdflib.Literal(cell)

            elif header == 'synonyms':
                if cell:
                    synonyms_neuron = [rdflib.Literal(s.strip())
                                    # FIXME bare comma is extremely dangerous
                                    for s in cell.split(',')]

                return
            elif header in self.skip:
                return

            objects = []
            if cell:
                predicate = self.convert_header(header)
                if predicate is None:
                    log.debug(f'{(header, cell, notes)}')

                for object, label in self.convert_cell(cell):
                    if predicate in NeuronCUT._molecular_predicates:
                        if isinstance(object, tuple):
                            op, *rest = object
                            rest = [OntTerm(o).asIndicator().URIRef for o in rest]
                            object = op, *rest
                        elif object:
                            log.debug(f'{object!r}')
                            object = OntTerm(object).asIndicator().URIRef

                    if isinstance(label, tuple):  # LogicalPhenotype case
                        _err = []
                        for l in label:
                            if self.lower_check(l, cell):
                                _err.append((cell, label))
                        if _err:
                            self.errors.extend(_err)
                        else:
                            objects.append(object)
                    elif self.lower_check(label, cell):
                        self.errors.append((cell, label))
                    elif str(id) == object:
                        self.errors.append((header, cell, object, label))
                        object = None
                    else:
                        objects.append(object)

                if notes:
                    # FIXME this is a hack to only attach to the last value
                    # since we can't distinguish at the moment
                    wat[predicate, object] = notes
                    if object is not None:
                        # object aka iri can be none if we don't find anything
                        object_notes[object] = notes
                    else:
                        predicate_notes[predicate] = notes
                        # FIXME it might also be simpler in some cases
                        # to have this be object_notes[object] = notes
                        # because we are much less likely to have the same
                        # phenotype appear attached to the different dimensions

                        # FIXME comma sep is weak here because the
                        # reference is technically ambiguous
                        # might be an argument for the denormalized form ...
                        # or perhaps having another sheet for cases like that

            else:
                return

            if predicate and objects:
                for object in objects:  # FIXME has layer location phenotype
                    if isinstance(object, tuple):
                        op, *rest = object
                        pes = (Phenotype(r, predicate) for r in rest)  # FIXME nonhomogenous phenotypes
                        phenotypes.append(LogicalPhenotype(op, *pes))
                    elif object:
                        phenotypes.append(Phenotype(object, predicate))
                    else:
                        self.errors.append((object, predicate, cell))
            elif objects:
                self.errors.append((header, objects))
            else:
                self.errors.append((header, cell))