Beispiel #1
0
 def test_load_huang(self):
     from neurondm import Config
     # FIXME placeholder for loading and roundtripping
     # neurons with other neurons as asserted equivalent
     # or disjoint classes
     config = Config('huang-2017')
     config.load_existing()
Beispiel #2
0
 def test_0_write_py_after_load_none(self):
     from neurondm import Config, Neuron, Phenotype
     #config = Config('test-write', ttl_export_dir=tel, py_export_dir=pyel)
     #Neuron(Phenotype('TEMP:hello'))
     #config.write()
     config2 = Config('test-write-after-same', ttl_export_dir=tel, py_export_dir=pyel)
     Neuron(Phenotype('TEMP:after-other'))
     config2.write_python()
Beispiel #3
0
    def __new__(cls, *args, **kwargs):
        if not hasattr(cls, 'existing'):
            e_config = Config('cut-development')
            e_config.load_existing()
            # FIXME clear use case for the remaining bound to whatever query produced it rather
            # than the other way around ... how to support this use case ...
            cls.existing = {n.origLabel.toPython():n for n in e_config.existing_pes}
            cls.existing.update({n.id_:n for n in e_config.existing_pes})
            cls.query = oq.OntQuery(oq.plugin.get('rdflib')(e_config.core_graph), instrumented=OntTerm)
            cls.sgv = Vocabulary()

        return super().__new__(cls)
Beispiel #4
0
    def __new__(cls, *args, **kwargs):
        if not hasattr(cls, 'existing'):
            e_config = Config('common-usage-types')
            e_config.load_existing()
            # FIXME clear use case for the remaining bound to whatever query produced it rather
            # than the other way around ... how to support this use case ...
            cls.existing = {str(n.origLabel): n for n in e_config.neurons()}
            cls.query = oq.OntQuery(oq.plugin.get('rdflib')(
                e_config.core_graph),
                                    instrumented=OntTerm)
            cls.sgv = Vocabulary()

        return super().__new__(cls)
Beispiel #5
0
    def test_2_write_py_after_load_other(self):
        from neurondm.lang import Config, Neuron, Phenotype
        config = Config('huang-2017')
        config.load_existing()

        config2 = Config('test-write-after-other', ttl_export_dir=tel, py_export_dir=pyel)
        Neuron(Phenotype('TEMP:after-other'))
        config2.write_python()
Beispiel #6
0
def main():
    a = APIN()
    config = Config('bolser-lewis', source_file=relative_path(__file__))
    bags = list(a.bags)
    for label, bag in bags:
        BolserLewisNeuron(*bag, label=label, override=True)

    Neuron.write()
    Neuron.write_python()
    return config,
Beispiel #7
0
 def neurons(self, expect_pes):
     # TODO import existing ids to register by label
     self.config = Config('cut-roundtrip')
     self.skip = 'alignment label',
     self.errors = []
     self.failed = {}
     self.new = []
     self.release = []
     headers, *rows = self.values
     self.tomqc_check_ind = headers.index('tomqc')
     for i, neuron_row in enumerate(rows):
         yield from self.convert_row(i, neuron_row, headers, expect_pes)
Beispiel #8
0
def main():
    #from neurondm.models.cuts import main as cuts_main
    #cuts_config, *_ = cuts_main()
    from IPython import embed
    from neurondm.compiled.common_usage_types import config as cuts_config
    cuts_neurons = cuts_config.neurons()
    expect_pes = {n.id_:len(n.pes) for n in cuts_neurons}

    sheet = CutsV1()
    config, errors, new, release = sheet_to_neurons(sheet.values, sheet.notes_index, expect_pes)
    #sheet.show_notes()
    config.write_python()
    config.write()
    #config = Config(config.name)
    #config.load_existing()  # FIXME this is a hack to get get a load_graph
    from neurondm import Config, NeuronCUT
    release_config = Config('cut-release')
    [NeuronCUT(*n, id_=n.id_, label=n.origLabel, override=True).adopt_meta(n) for n in release]
    release_config.write_python()
    release_config.write()
    from neurondm.models.cuts import export_for_review
    review_rows = export_for_review(config, [], [], [], filename='cut-rt-test.csv', with_curies=True)
    from pyontutils.utils import byCol
    valuesC = byCol(sheet.values[1:],
                    header=[v.replace(' ', '_') for v in sheet.values[0]],
                    to_index=['label'])
    reviewC = byCol(review_rows[1:], header=[v.replace(' ', '_') for v in review_rows[0]], to_index=['label'])
    def grow(r):
        log.debug(r)
        # TODO implement on the object to allow joining on an index?
        # man this would be easier with sql >_< probably pandas too
        # but so many dependencies ... also diffing issues etc
        return valuesC.searchIndex('label', r.label)

    def key(field_value):
        field, value = field_value
        try:
            return valuesC.header._fields.index(field)  # TODO warn on field mismatch
        except ValueError as e:
            print('ERROR!!!!!!!!!!!', field, value)
            return None

    def replace(r, *cols):
        """ replace and reorder """
        # FIXME _super_ inefficient
        vrow = grow(r)
        for field, value in sorted(zip(r._fields, r), key=key):
            if field in cols:
                value = getattr(vrow, field)

            yield '' if value is None else value  # completely overwrite the sheet

    rows = [list(replace(r, 'Status', 'definition', 'synonyms', 'PMID')) for r in reviewC]
    #resp = update_sheet_values('neurons-cut', 'Roundtrip', rows)
    embed()
Beispiel #9
0
def main():
    #cv1 = CutsV1Lite()
    CutsV1.fetch_grid = False
    cv1 = CutsV1()
    hrm = [cv1.row_object(i) for i, r in enumerate(cv1.values)
           if cv1.row_object(i).exhasmolecularphenotype().value]
    to_sco = set(t for h in hrm for t in h.entailed_molecular_phenotypes())
    ros = [cv1.row_object(i + 1) for i, r in enumerate(cv1.values[1:])]
    to_fix = [r for r in ros if list(r.entailed_molecular_phenotypes())]
    #maybe_fixed = [t.neuron_cleaned() for t in to_fix]
    #assert maybe_fixed != [f.neuron_existing() for f in to_fix]
    config = Config('common-usage-types')
    _final = [r.neuron_cleaned() for r in ros if r.include()]
    final = [f for f in _final if f is not None]  # FIXME there are 16 neurons marked as yes that are missing
    #fixed = [f for f in final if [_ for _ in f.pes if isinstance(_, EntailedPhenotype)]]
    [f._sigh() for f in final]
    config.write()
    config.write_python()
    if __name__ == '__main__':
        breakpoint()
Beispiel #10
0
def main():
    a = APIN()
    config = Config('bolser-lewis',
                    source_file=relative_path(__file__, no_wd_value=__file__))
    bags = list(a.bags)
    for id, label, bag in bags:
        BolserLewisNeuron(*bag, label=label, id_=id, override=True)

    config.write()
    labels = (
        rdfs.label,
        #ilxtr.genLabel, ilxtr.localLabel, ilxtr.simpleLabel,
        #ilxtr.simpleLocalLabel, skos.prefLabel
    )
    to_remove = [t for t in config._written_graph
                 if t[1] in labels]
    [config._written_graph.remove(t) for t in to_remove]
    config._written_graph.write()
    config.write_python()
    return config,
Beispiel #11
0
 def test_rewrite_source_module(self):
     from neurondm import Config
     config = Config('test-madness', py_export_dir=madpath.parent)
     config.load_python()  # this is required
     #breakpoint()
     config.write_python()  # BOOM HEADSHOT
Beispiel #12
0
def main():
    #from neurondm.models.cuts import main as cuts_main
    #cuts_config, *_ = cuts_main()

    from neurondm.compiled.common_usage_types import config as cuts_config
    cuts_neurons = cuts_config.neurons()
    expect_pes = {n.id_:n.pes for n in cuts_neurons}

    sheet = CutsV1()
    _neurons = list(sheet.neurons(expect_pes))
    config = sheet.config
    errors = sheet.errors
    new = sheet.new
    release = sheet.release

    #sheet.show_notes()
    config.write_python()
    config.write()
    #config = Config(config.name)
    #config.load_existing()  # FIXME this is a hack to get get a load_graph

    # FIXME we need this because _bagExisting doesn't deal with unionOf right now
    def trything(f):
        @wraps(f)
        def inner(*args, **kwargs):
            try:
                return f(*args, **kwargs)
            except:
                pass

        return inner

    from neurondm import Config, NeuronCUT

    failed_config = Config('cut-failed')
    [trything(NeuronCUT)(*pes, id_=id_) for id_, pes in sheet.failed.items()]
    failed_config.write_python()
    failed_config.write()

    release_config = Config('cut-release')
    [NeuronCUT(*n, id_=n.id_, label=n.origLabel, override=True).adopt_meta(n) for n in release]
    release_config.write_python()
    release_config.write()

    from neurondm.models.cuts import export_for_review
    review_rows = export_for_review(config, [], [], [], filename='cut-rt-test.csv', with_curies=True)
    from pyontutils.utils import byCol
    valuesC = byCol(sheet.values[1:],
                    header=[v.replace(' ', '_') for v in sheet.values[0]],
                    to_index=['label'])
    reviewC = byCol(review_rows[1:], header=[v.replace(' ', '_') for v in review_rows[0]], to_index=['label'])
    def grow(r):
        log.debug(r)
        # TODO implement on the object to allow joining on an index?
        # man this would be easier with sql >_< probably pandas too
        # but so many dependencies ... also diffing issues etc
        if r.label is not None:
            return valuesC.searchIndex('label', r.label)

    def key(field_value):
        field, value = field_value
        try:
            return 0, valuesC.header._fields.index(field)  # TODO warn on field mismatch
        except ValueError as e:
            log.error(f'{field} {value}')
            return 1, 0

    def replace(r, *cols):
        """ replace and reorder """
        # FIXME _super_ inefficient
        vrow = grow(r)
        log.debug('\n'.join(r._fields))
        log.debug('\n'.join(str(_) for _ in r))
        for field, value in sorted(zip(r._fields, r), key=key):
            if field in cols:
                value = getattr(vrow, field)

            yield '' if value is None else value  # completely overwrite the sheet

    breakpoint()
    rows = [list(replace(r, 'Status', 'definition', 'synonyms', 'PMID')) for r in reviewC]
    #resp = update_sheet_values('neurons-cut', 'Roundtrip', rows)
    if __name__ == '__main__':
        breakpoint()
Beispiel #13
0
def sheet_to_neurons(values, notes_index, expect_pes):
    # TODO import existing ids to register by label
    sgv = Vocabulary()
    e_config = Config('common-usage-types')
    e_config.load_existing()
    query = oq.OntQuery(oq.plugin.get('rdflib')(e_config.core_graph), instrumented=OntTerm)
    # FIXME clear use case for the remaining bound to whatever query produced it rather
    # than the other way around ... how to support this use case ...
    existing = {str(n.origLabel):n for n in e_config.neurons()}
    def convert_header(header):
        if header.startswith('has'):  # FIXME use a closed namespace
            return ilxtr[header]
        else:
            return None

    def convert_other(header):
        if header == 'label':
            return rdfs.label
        elif header == 'curie':
            return rdf.type
        elif header == 'definition':
            return definition
        else:
            header = header.replace(' ', '_')
            return TEMP[header]  # FIXME

    def mapCell(cell, syns=False):
        search_prefixes = ('UBERON', 'CHEBI', 'PR', 'NCBITaxon', 'NCBIGene', 'ilxtr', 'NIFEXT', 'SAO', 'NLXMOL',
                           'BIRNLEX',)

        if ':' in cell and ' ' not in cell:
            log.debug(cell)
            if 'http' in cell:
                if cell.startswith('http'):
                    t = OntTerm(iri=cell)
                else:
                    return None, None  # garbage with http inline
            else:
                t = OntTerm(cell, exclude_prefix=('FMA',))  # FIXME need better error message in ontquery

            return t.u, t.label

        result = [r for r in sgv.findByTerm(cell, searchSynonyms=syns, prefix=search_prefixes)
                  if not r['deprecated']]
        #printD(cell, result)
        if not result:
            log.debug(f'{cell}')
            maybe = list(query(label=cell, exclude_prefix=('FMA',)))
            if maybe:
                qr = maybe[0]
                return qr.OntTerm.u, qr.label
            elif not syns:
                return mapCell(cell, syns=True)
            else:
                return None, None
        elif len(result) > 1:
            #printD('WARNING', result)
            result = select_by_curie_rank(result)
        else:
            result = result[0]

        return rdflib.URIRef(result['iri']), result['labels'][0]

    def lower_check(label, cell):
        return label not in cell and label.lower() not in cell.lower()  # have to handle comma sep case

    lnlu = {v:k for k, v in LogicalPhenotype.local_names.items()}
    def convert_cell(cell_or_comma_sep):
        #printD('CONVERTING', cell_or_comma_sep)
        for cell_w_junk in cell_or_comma_sep.split(','):  # XXX WARNING need a way to alter people to this
            cell = cell_w_junk.strip()
            if cell.startswith('(OR') or cell.startswith('(AND'):
                start, *middle, end = cell.split('" "')
                OPoperator, first = start.split(' "')
                operator = OPoperator[1:]
                operator = lnlu[operator]
                last, CP = end.rsplit('"')
                iris, labels = [], []
                for term in (first, *middle, last):
                    iri, label = mapCell(term)
                    if label is None:
                        label = cell_or_comma_sep
                    iris.append(iri)
                    labels.append(label)

                yield (operator, *iris), tuple(labels)

            else:
                iri, label = mapCell(cell)
                if label is None:
                    yield iri, cell_or_comma_sep  # FIXME need a way to handle this that doesn't break things?
                else:
                    yield iri, label

    config = Config('cut-roundtrip')
    skip = 'alignment label',
    headers, *rows = values
    errors = []
    new = []
    release = []
    for i, neuron_row in enumerate(rows):
        id = None
        label_neuron  = None
        definition_neuron = None
        synonyms_neuron = None
        current_neuron = None
        phenotypes = []
        do_release = False
        predicate_notes = {}
        object_notes = {}
        other_notes = {}
        wat = {}
        for j, (header, cell) in enumerate(zip(headers, neuron_row)):
            notes = list(process_note(get_note(i + 1, j, notes_index)))  # + 1 since headers is removed
            if notes and not header.startswith('has'):
                _predicate = convert_other(header)
                if cell:
                    _object = rdflib.Literal(cell)  # FIXME curies etc.
                else:
                    _object = rdf.nil
                other_notes[_predicate, _object] = notes

            if header == 'curie':
                id = OntId(cell).u if cell else None
                continue
            elif header == 'label':
                label_neuron = cell
                if cell in existing:
                    current_neuron = existing[cell]
                elif cell:
                    # TODO
                    new.append(cell)
                else:
                    raise ValueError(cell)  # wat
                continue
            elif header == 'Status':
                # TODO
                if cell == 'Yes':
                    do_release = True
                elif cell == 'Maybe':
                    pass
                elif cell == 'Not yet':
                    pass
                elif cell == 'Delete':
                    pass
                else:
                    pass

                continue
            elif header == 'PMID':
                # TODO
                continue
            elif header == 'Other reference':
                # TODO
                continue
            elif header == 'Other label':
                # TODO
                continue
            elif header == 'definition':
                continue  # FIXME single space differences between the spreadsheet and the source

                if cell:
                    definition_neuron = rdflib.Literal(cell)

                continue

            elif header == 'synonyms':
                if cell:
                    synonyms_neuron = [rdflib.Literal(s.strip())
                                    # FIXME bare comma is extremely dangerous
                                    for s in cell.split(',')]

                continue
            elif header in skip:
                continue

            objects = []
            if cell:
                predicate = convert_header(header)
                if predicate is None:
                    log.debug(f'{(header, cell, notes)}')

                for object, label in convert_cell(cell):
                    if isinstance(label, tuple):  # LogicalPhenotype case
                        _err = []
                        for l in label:
                            if lower_check(l, cell):
                                _err.append((cell, label))
                        if _err:
                            errors.extend(_err)
                        else:
                            objects.append(object)
                    elif lower_check(label, cell):
                        errors.append((cell, label))
                    elif str(id) == object:
                        errors.append((header, cell, object, label))
                        object = None
                    else:
                        objects.append(object)

                if notes:
                    # FIXME this is a hack to only attach to the last value
                    # since we can't distinguish at the moment
                    wat[predicate, object] = notes
                    if object is not None:
                        # object aka iri can be none if we don't find anything
                        object_notes[object] = notes
                    else:
                        predicate_notes[predicate] = notes
                        # FIXME it might also be simpler in some cases
                        # to have this be object_notes[object] = notes
                        # because we are much less likely to have the same
                        # phenotype appear attached to the different dimensions

                        # FIXME comma sep is weak here because the
                        # reference is technically ambiguous
                        # might be an argument for the denormalized form ...
                        # or perhaps having another sheet for cases like that

            else:
                continue

            if predicate and objects:
                for object in objects:  # FIXME has layer location phenotype
                    if isinstance(object, tuple):
                        op, *rest = object
                        pes = (Phenotype(r, predicate) for r in rest)  # FIXME nonhomogenous phenotypes
                        phenotypes.append(LogicalPhenotype(op, *pes))
                    elif object:
                        phenotypes.append(Phenotype(object, predicate))
                    else:
                        errors.append((object, predicate, cell))
            elif objects:
                errors.append((header, objects))
            else:
                errors.append((header, cell))
            # translate header -> predicate
            # translate cell value to ontology id

        if current_neuron and phenotypes:
            # TODO merge current with changes
            # or maybe we just replace since all the phenotypes should be there?
            log.debug(phenotypes)
            if id is not None:
                log.debug(f'{(id, bool(id))}')

            elif label_neuron:
                id = make_cut_id(label_neuron)

            if id not in expect_pes:
                log.error(f'{id!r} not in cuts!?')
                continue

            if expect_pes[id] != len(phenotypes):
                log.error(f'{id!r} failed roundtrip {len(phenotypes)} != {expect_pes[id]}')
                continue

            neuron = NeuronCUT(*phenotypes, id_=id, label=label_neuron,
                               override=bool(id) or bool(label_neuron))
            neuron.adopt_meta(current_neuron)
            # FIXME occasionally this will error?!
        else:
            continue  # FIXME this polutes everything ???
            fn = fixname(label_neuron)
            if not phenotypes and i:  # i skips header
                errors.append((i, neuron_row))  # TODO special review for phenos but not current
                phenotypes = Phenotype('TEMP:phenotype/' + fn),

            neuron = NeuronCUT(*phenotypes,
                               id_=make_cut_id(label_neuron),
                               label=label_neuron, override=True)

        # update the meta if there were any changes
        if definition_neuron is not None:
            neuron.definition = definition_neuron

        if synonyms_neuron is not None:
            neuron.synonyms = synonyms_neuron

        try:
            neuron.batchAnnotateByObject(object_notes)
            neuron.batchAnnotate(other_notes)
        except AttributeError as e:
            #embed()
            log.exception(e) #'something very strage has happened\n', e)
            pass  # FIXME FIXME FIXME

        #neuron.batchAnnotateByPredicate(predicate_notes)  # TODO
        # FIXME doesn't quite work in this context, but there are other
        # cases where annotations to the general modality are still desireable
        # FIXME there may be no predicate? if the object fails to match?

        if do_release:
            release.append(neuron)

    return config, errors, new, release