Beispiel #1
0
 def setUp(self):
     ontquery.OntCuries(core.PREFIXES)
     #self.query = OntQuery(localonts, remoteonts1, remoteonts2)  # provide by default maybe as ontquery?
     #bs = ontquery.BasicService()  # TODO
     #self.query = ontquery.OntQuery(bs, upstream=OntTerm)
     if 'SCICRUNCH_API_KEY' in os.environ:
         self.query = ontquery.OntQuery(
             ontquery.SciGraphRemote(api_key=core.get_api_key()),
             upstream=OntTerm)
     else:
         self.query = ontquery.OntQuery(ontquery.SciGraphRemote(
             apiEndpoint='http://localhost:9000/scigraph'),
                                        upstream=OntTerm)
Beispiel #2
0
    def test_query(self):

        self.query('brain')
        self.query(term='brain')
        #self.query(prefix='UBERON', suffix='0000955')  # only for OntId
        self.query(search='thalamus'
                   )  # will probably fail with many results to choose from
        self.query(prefix='MBA', abbrev='TH')

        uberon = ontquery.OntQuery(*self.query, prefix='UBERON')
        brain_result = uberon(
            'brain')  # -> OntTerm('UBERON:0000955', label='brain')

        species = ontquery.OntQuery(*self.query, category='species')
        mouse_result = species(
            'mouse')  # -> OntTerm('NCBITaxon:10090', label='mouse')

        list(self.query.predicates)
Beispiel #3
0
    def __new__(cls, *args, **kwargs):
        if not hasattr(cls, 'existing'):
            e_config = Config('cut-development')
            e_config.load_existing()
            # FIXME clear use case for the remaining bound to whatever query produced it rather
            # than the other way around ... how to support this use case ...
            cls.existing = {n.origLabel.toPython():n for n in e_config.existing_pes}
            cls.existing.update({n.id_:n for n in e_config.existing_pes})
            cls.query = oq.OntQuery(oq.plugin.get('rdflib')(e_config.core_graph), instrumented=OntTerm)
            cls.sgv = Vocabulary()

        return super().__new__(cls)
Beispiel #4
0
    def __new__(cls, *args, **kwargs):
        if not hasattr(cls, 'existing'):
            e_config = Config('common-usage-types')
            e_config.load_existing()
            # FIXME clear use case for the remaining bound to whatever query produced it rather
            # than the other way around ... how to support this use case ...
            cls.existing = {str(n.origLabel): n for n in e_config.neurons()}
            cls.query = oq.OntQuery(oq.plugin.get('rdflib')(
                e_config.core_graph),
                                    instrumented=OntTerm)
            cls.sgv = Vocabulary()

        return super().__new__(cls)
Beispiel #5
0
def sheet_to_neurons(values, notes_index, expect_pes):
    # TODO import existing ids to register by label
    sgv = Vocabulary()
    e_config = Config('common-usage-types')
    e_config.load_existing()
    query = oq.OntQuery(oq.plugin.get('rdflib')(e_config.core_graph), instrumented=OntTerm)
    # FIXME clear use case for the remaining bound to whatever query produced it rather
    # than the other way around ... how to support this use case ...
    existing = {str(n.origLabel):n for n in e_config.neurons()}
    def convert_header(header):
        if header.startswith('has'):  # FIXME use a closed namespace
            return ilxtr[header]
        else:
            return None

    def convert_other(header):
        if header == 'label':
            return rdfs.label
        elif header == 'curie':
            return rdf.type
        elif header == 'definition':
            return definition
        else:
            header = header.replace(' ', '_')
            return TEMP[header]  # FIXME

    def mapCell(cell, syns=False):
        search_prefixes = ('UBERON', 'CHEBI', 'PR', 'NCBITaxon', 'NCBIGene', 'ilxtr', 'NIFEXT', 'SAO', 'NLXMOL',
                           'BIRNLEX',)

        if ':' in cell and ' ' not in cell:
            log.debug(cell)
            if 'http' in cell:
                if cell.startswith('http'):
                    t = OntTerm(iri=cell)
                else:
                    return None, None  # garbage with http inline
            else:
                t = OntTerm(cell, exclude_prefix=('FMA',))  # FIXME need better error message in ontquery

            return t.u, t.label

        result = [r for r in sgv.findByTerm(cell, searchSynonyms=syns, prefix=search_prefixes)
                  if not r['deprecated']]
        #printD(cell, result)
        if not result:
            log.debug(f'{cell}')
            maybe = list(query(label=cell, exclude_prefix=('FMA',)))
            if maybe:
                qr = maybe[0]
                return qr.OntTerm.u, qr.label
            elif not syns:
                return mapCell(cell, syns=True)
            else:
                return None, None
        elif len(result) > 1:
            #printD('WARNING', result)
            result = select_by_curie_rank(result)
        else:
            result = result[0]

        return rdflib.URIRef(result['iri']), result['labels'][0]

    def lower_check(label, cell):
        return label not in cell and label.lower() not in cell.lower()  # have to handle comma sep case

    lnlu = {v:k for k, v in LogicalPhenotype.local_names.items()}
    def convert_cell(cell_or_comma_sep):
        #printD('CONVERTING', cell_or_comma_sep)
        for cell_w_junk in cell_or_comma_sep.split(','):  # XXX WARNING need a way to alter people to this
            cell = cell_w_junk.strip()
            if cell.startswith('(OR') or cell.startswith('(AND'):
                start, *middle, end = cell.split('" "')
                OPoperator, first = start.split(' "')
                operator = OPoperator[1:]
                operator = lnlu[operator]
                last, CP = end.rsplit('"')
                iris, labels = [], []
                for term in (first, *middle, last):
                    iri, label = mapCell(term)
                    if label is None:
                        label = cell_or_comma_sep
                    iris.append(iri)
                    labels.append(label)

                yield (operator, *iris), tuple(labels)

            else:
                iri, label = mapCell(cell)
                if label is None:
                    yield iri, cell_or_comma_sep  # FIXME need a way to handle this that doesn't break things?
                else:
                    yield iri, label

    config = Config('cut-roundtrip')
    skip = 'alignment label',
    headers, *rows = values
    errors = []
    new = []
    release = []
    for i, neuron_row in enumerate(rows):
        id = None
        label_neuron  = None
        definition_neuron = None
        synonyms_neuron = None
        current_neuron = None
        phenotypes = []
        do_release = False
        predicate_notes = {}
        object_notes = {}
        other_notes = {}
        wat = {}
        for j, (header, cell) in enumerate(zip(headers, neuron_row)):
            notes = list(process_note(get_note(i + 1, j, notes_index)))  # + 1 since headers is removed
            if notes and not header.startswith('has'):
                _predicate = convert_other(header)
                if cell:
                    _object = rdflib.Literal(cell)  # FIXME curies etc.
                else:
                    _object = rdf.nil
                other_notes[_predicate, _object] = notes

            if header == 'curie':
                id = OntId(cell).u if cell else None
                continue
            elif header == 'label':
                label_neuron = cell
                if cell in existing:
                    current_neuron = existing[cell]
                elif cell:
                    # TODO
                    new.append(cell)
                else:
                    raise ValueError(cell)  # wat
                continue
            elif header == 'Status':
                # TODO
                if cell == 'Yes':
                    do_release = True
                elif cell == 'Maybe':
                    pass
                elif cell == 'Not yet':
                    pass
                elif cell == 'Delete':
                    pass
                else:
                    pass

                continue
            elif header == 'PMID':
                # TODO
                continue
            elif header == 'Other reference':
                # TODO
                continue
            elif header == 'Other label':
                # TODO
                continue
            elif header == 'definition':
                continue  # FIXME single space differences between the spreadsheet and the source

                if cell:
                    definition_neuron = rdflib.Literal(cell)

                continue

            elif header == 'synonyms':
                if cell:
                    synonyms_neuron = [rdflib.Literal(s.strip())
                                    # FIXME bare comma is extremely dangerous
                                    for s in cell.split(',')]

                continue
            elif header in skip:
                continue

            objects = []
            if cell:
                predicate = convert_header(header)
                if predicate is None:
                    log.debug(f'{(header, cell, notes)}')

                for object, label in convert_cell(cell):
                    if isinstance(label, tuple):  # LogicalPhenotype case
                        _err = []
                        for l in label:
                            if lower_check(l, cell):
                                _err.append((cell, label))
                        if _err:
                            errors.extend(_err)
                        else:
                            objects.append(object)
                    elif lower_check(label, cell):
                        errors.append((cell, label))
                    elif str(id) == object:
                        errors.append((header, cell, object, label))
                        object = None
                    else:
                        objects.append(object)

                if notes:
                    # FIXME this is a hack to only attach to the last value
                    # since we can't distinguish at the moment
                    wat[predicate, object] = notes
                    if object is not None:
                        # object aka iri can be none if we don't find anything
                        object_notes[object] = notes
                    else:
                        predicate_notes[predicate] = notes
                        # FIXME it might also be simpler in some cases
                        # to have this be object_notes[object] = notes
                        # because we are much less likely to have the same
                        # phenotype appear attached to the different dimensions

                        # FIXME comma sep is weak here because the
                        # reference is technically ambiguous
                        # might be an argument for the denormalized form ...
                        # or perhaps having another sheet for cases like that

            else:
                continue

            if predicate and objects:
                for object in objects:  # FIXME has layer location phenotype
                    if isinstance(object, tuple):
                        op, *rest = object
                        pes = (Phenotype(r, predicate) for r in rest)  # FIXME nonhomogenous phenotypes
                        phenotypes.append(LogicalPhenotype(op, *pes))
                    elif object:
                        phenotypes.append(Phenotype(object, predicate))
                    else:
                        errors.append((object, predicate, cell))
            elif objects:
                errors.append((header, objects))
            else:
                errors.append((header, cell))
            # translate header -> predicate
            # translate cell value to ontology id

        if current_neuron and phenotypes:
            # TODO merge current with changes
            # or maybe we just replace since all the phenotypes should be there?
            log.debug(phenotypes)
            if id is not None:
                log.debug(f'{(id, bool(id))}')

            elif label_neuron:
                id = make_cut_id(label_neuron)

            if id not in expect_pes:
                log.error(f'{id!r} not in cuts!?')
                continue

            if expect_pes[id] != len(phenotypes):
                log.error(f'{id!r} failed roundtrip {len(phenotypes)} != {expect_pes[id]}')
                continue

            neuron = NeuronCUT(*phenotypes, id_=id, label=label_neuron,
                               override=bool(id) or bool(label_neuron))
            neuron.adopt_meta(current_neuron)
            # FIXME occasionally this will error?!
        else:
            continue  # FIXME this polutes everything ???
            fn = fixname(label_neuron)
            if not phenotypes and i:  # i skips header
                errors.append((i, neuron_row))  # TODO special review for phenos but not current
                phenotypes = Phenotype('TEMP:phenotype/' + fn),

            neuron = NeuronCUT(*phenotypes,
                               id_=make_cut_id(label_neuron),
                               label=label_neuron, override=True)

        # update the meta if there were any changes
        if definition_neuron is not None:
            neuron.definition = definition_neuron

        if synonyms_neuron is not None:
            neuron.synonyms = synonyms_neuron

        try:
            neuron.batchAnnotateByObject(object_notes)
            neuron.batchAnnotate(other_notes)
        except AttributeError as e:
            #embed()
            log.exception(e) #'something very strage has happened\n', e)
            pass  # FIXME FIXME FIXME

        #neuron.batchAnnotateByPredicate(predicate_notes)  # TODO
        # FIXME doesn't quite work in this context, but there are other
        # cases where annotations to the general modality are still desireable
        # FIXME there may be no predicate? if the object fails to match?

        if do_release:
            release.append(neuron)

    return config, errors, new, release
Beispiel #6
0
            p = translate[p]
            if isinstance(o, rdflib.Literal):
                o = o.toPython()
            out[p] = o
        yield out


class lOntTerm(OntTerm):
    repr_arg_order = (
        ('curie', 'label'),  # FIXME this doesn't stick?!
        ('iri', 'label'),
    )
    __firsts = 'curie', 'iri'


lOntTerm.query = ontquery.OntQuery(ontquery.plugin.get('rdflib')(sgraph),
                                   instrumented=lOntTerm)

regions_unfilt = sorted(set(lOntTerm(e) for r in rests for e in (r.s, r.o)),
                        key=lambda t: int(t.suffix))
regions = [
    r for r in regions_unfilt
    if 'gyrus' not in r.label and 'Pineal' not in r.label
]
rows = [['label', 'soma located in', 'projection type']]
with Basic:  # FIXME if this is called inside a function stack_magic fails :/
    for region in regions:
        for type in (projection, intrinsic):
            n = Neuron(
                Phenotype(region,
                          ilxtr.hasSomaLocatedIn,
                          label=region.label,
Beispiel #7
0
class lOntTerm(OntTerm):
    repr_arg_order = (('curie', 'label'),  # FIXME this doesn't stick?!
                      ('iri', 'label'),)
    __firsts = 'curie', 'iri'
    query = ontquery.OntQuery(ontquery.plugin.get('rdflib')(sgraph))