def map(attr, predicate): cell = getattr(self, attr)() value = cell.value if value: for iri, label in list(self.sheet.convert_cell(value, predicate=predicate)): if ',' not in value and label != value: log.warning(f'label mismatch {label!r} != {value!r}') if iri is None: if label == 'bed nucleus of stria terminalis juxtacapsular nucleus': iri = OntTerm('UBERON:0011173', label='anterior division of bed nuclei of stria terminalis') else: log.debug(f'nothing found for {label}') continue if isinstance(iri, tuple): op, *rest = iri # TODO need combinators in future version for union/intersection of object out = (op, *(NegPhenotype(r, predicate) if isinstance(r, LacksObject) else Phenotype(r, predicate) for r in rest if r is not None)) yield out continue elif isinstance(iri, LacksObject): p = NegPhenotype(iri.asURIRef(), predicate) else: p = Phenotype(iri, predicate) yield p.asIndicator()
def test_2_write_py_after_load_other(self): from neurondm.lang import Config, Neuron, Phenotype config = Config('huang-2017') config.load_existing() config2 = Config('test-write-after-other', ttl_export_dir=tel, py_export_dir=pyel) Neuron(Phenotype('TEMP:after-other')) config2.write_python()
def test_0_write_py_after_load_none(self): from neurondm import Config, Neuron, Phenotype #config = Config('test-write', ttl_export_dir=tel, py_export_dir=pyel) #Neuron(Phenotype('TEMP:hello')) #config.write() config2 = Config('test-write-after-same', ttl_export_dir=tel, py_export_dir=pyel) Neuron(Phenotype('TEMP:after-other')) config2.write_python()
def test_nest_logical(self): AND = self.AND Neuron = self.Neuron LogicalPhenotype = self.LogicalPhenotype Phenotype = self.Phenotype n1 = Neuron(Phenotype('NCBITaxon:10090', 'ilxtr:hasInstanceInTaxon', label='Mus musculus'), LogicalPhenotype( AND, Phenotype('NCBIGene:50779', 'ilxtr:hasExpressionPhenotype', label='Rgs6'), LogicalPhenotype( AND, Phenotype('ilxtr:GABAReceptor', 'ilxtr:hasExpressionPhenotype', label='GABAR'))), label='test logical') n2 = Neuron(Phenotype('NCBITaxon:10090', 'ilxtr:hasInstanceInTaxon', label='Mus musculus'), LogicalPhenotype( AND, Phenotype('NCBIGene:50779', 'ilxtr:hasExpressionPhenotype', label='Rgs6'), Phenotype('ilxtr:GABAReceptor', 'ilxtr:hasExpressionPhenotype', label='GABAR')), label='test logical') n1l = n1.genLabel n2l = n2.genLabel assert '(intersectionOf' in n1l, n1l assert '(intersectionOf' in n2l, n2l
def loop_internal(j, header, cell): nonlocal id nonlocal current_neuron nonlocal do_release notes = list(process_note(get_note(i + 1, j, self.cells_index))) # + 1 since headers is removed if notes and not header.startswith('has'): _predicate = self.convert_other(header) if cell: _object = rdflib.Literal(cell) # FIXME curies etc. else: _object = rdf.nil other_notes[_predicate, _object] = notes if header == 'curie': id = OntId(cell).u if cell else None return elif header == 'label': if id == OntId('NIFEXT:66').u: breakpoint() label_neuron = cell if cell in self.existing: current_neuron = self.existing[cell] elif cell: # TODO self.new.append(cell) else: raise ValueError(cell) # wat return elif header == 'Status': # TODO if cell == 'Yes': do_release = True elif cell == 'Maybe': pass elif cell == 'Not yet': pass elif cell == 'Delete': pass else: pass return elif header == 'PMID': # TODO return elif header == 'Other reference': # TODO return elif header == 'Other label': # TODO return elif header == 'definition': return # FIXME single space differences between the spreadsheet and the source if cell: definition_neuron = rdflib.Literal(cell) elif header == 'synonyms': if cell: synonyms_neuron = [rdflib.Literal(s.strip()) # FIXME bare comma is extremely dangerous for s in cell.split(',')] return elif header in self.skip: return objects = [] if cell: predicate = self.convert_header(header) if predicate is None: log.debug(f'{(header, cell, notes)}') for object, label in self.convert_cell(cell): if predicate in NeuronCUT._molecular_predicates: if isinstance(object, tuple): op, *rest = object rest = [OntTerm(o).asIndicator().URIRef for o in rest] object = op, *rest elif object: log.debug(f'{object!r}') object = OntTerm(object).asIndicator().URIRef if isinstance(label, tuple): # LogicalPhenotype case _err = [] for l in label: if self.lower_check(l, cell): _err.append((cell, label)) if _err: self.errors.extend(_err) else: objects.append(object) elif self.lower_check(label, cell): self.errors.append((cell, label)) elif str(id) == object: self.errors.append((header, cell, object, label)) object = None else: objects.append(object) if notes: # FIXME this is a hack to only attach to the last value # since we can't distinguish at the moment wat[predicate, object] = notes if object is not None: # object aka iri can be none if we don't find anything object_notes[object] = notes else: predicate_notes[predicate] = notes # FIXME it might also be simpler in some cases # to have this be object_notes[object] = notes # because we are much less likely to have the same # phenotype appear attached to the different dimensions # FIXME comma sep is weak here because the # reference is technically ambiguous # might be an argument for the denormalized form ... # or perhaps having another sheet for cases like that else: return if predicate and objects: for object in objects: # FIXME has layer location phenotype if isinstance(object, tuple): op, *rest = object pes = (Phenotype(r, predicate) for r in rest) # FIXME nonhomogenous phenotypes phenotypes.append(LogicalPhenotype(op, *pes)) elif object: phenotypes.append(Phenotype(object, predicate)) else: self.errors.append((object, predicate, cell)) elif objects: self.errors.append((header, objects)) else: self.errors.append((header, cell))
def sheet_to_neurons(values, notes_index, expect_pes): # TODO import existing ids to register by label sgv = Vocabulary() e_config = Config('common-usage-types') e_config.load_existing() query = oq.OntQuery(oq.plugin.get('rdflib')(e_config.core_graph), instrumented=OntTerm) # FIXME clear use case for the remaining bound to whatever query produced it rather # than the other way around ... how to support this use case ... existing = {str(n.origLabel):n for n in e_config.neurons()} def convert_header(header): if header.startswith('has'): # FIXME use a closed namespace return ilxtr[header] else: return None def convert_other(header): if header == 'label': return rdfs.label elif header == 'curie': return rdf.type elif header == 'definition': return definition else: header = header.replace(' ', '_') return TEMP[header] # FIXME def mapCell(cell, syns=False): search_prefixes = ('UBERON', 'CHEBI', 'PR', 'NCBITaxon', 'NCBIGene', 'ilxtr', 'NIFEXT', 'SAO', 'NLXMOL', 'BIRNLEX',) if ':' in cell and ' ' not in cell: log.debug(cell) if 'http' in cell: if cell.startswith('http'): t = OntTerm(iri=cell) else: return None, None # garbage with http inline else: t = OntTerm(cell, exclude_prefix=('FMA',)) # FIXME need better error message in ontquery return t.u, t.label result = [r for r in sgv.findByTerm(cell, searchSynonyms=syns, prefix=search_prefixes) if not r['deprecated']] #printD(cell, result) if not result: log.debug(f'{cell}') maybe = list(query(label=cell, exclude_prefix=('FMA',))) if maybe: qr = maybe[0] return qr.OntTerm.u, qr.label elif not syns: return mapCell(cell, syns=True) else: return None, None elif len(result) > 1: #printD('WARNING', result) result = select_by_curie_rank(result) else: result = result[0] return rdflib.URIRef(result['iri']), result['labels'][0] def lower_check(label, cell): return label not in cell and label.lower() not in cell.lower() # have to handle comma sep case lnlu = {v:k for k, v in LogicalPhenotype.local_names.items()} def convert_cell(cell_or_comma_sep): #printD('CONVERTING', cell_or_comma_sep) for cell_w_junk in cell_or_comma_sep.split(','): # XXX WARNING need a way to alter people to this cell = cell_w_junk.strip() if cell.startswith('(OR') or cell.startswith('(AND'): start, *middle, end = cell.split('" "') OPoperator, first = start.split(' "') operator = OPoperator[1:] operator = lnlu[operator] last, CP = end.rsplit('"') iris, labels = [], [] for term in (first, *middle, last): iri, label = mapCell(term) if label is None: label = cell_or_comma_sep iris.append(iri) labels.append(label) yield (operator, *iris), tuple(labels) else: iri, label = mapCell(cell) if label is None: yield iri, cell_or_comma_sep # FIXME need a way to handle this that doesn't break things? else: yield iri, label config = Config('cut-roundtrip') skip = 'alignment label', headers, *rows = values errors = [] new = [] release = [] for i, neuron_row in enumerate(rows): id = None label_neuron = None definition_neuron = None synonyms_neuron = None current_neuron = None phenotypes = [] do_release = False predicate_notes = {} object_notes = {} other_notes = {} wat = {} for j, (header, cell) in enumerate(zip(headers, neuron_row)): notes = list(process_note(get_note(i + 1, j, notes_index))) # + 1 since headers is removed if notes and not header.startswith('has'): _predicate = convert_other(header) if cell: _object = rdflib.Literal(cell) # FIXME curies etc. else: _object = rdf.nil other_notes[_predicate, _object] = notes if header == 'curie': id = OntId(cell).u if cell else None continue elif header == 'label': label_neuron = cell if cell in existing: current_neuron = existing[cell] elif cell: # TODO new.append(cell) else: raise ValueError(cell) # wat continue elif header == 'Status': # TODO if cell == 'Yes': do_release = True elif cell == 'Maybe': pass elif cell == 'Not yet': pass elif cell == 'Delete': pass else: pass continue elif header == 'PMID': # TODO continue elif header == 'Other reference': # TODO continue elif header == 'Other label': # TODO continue elif header == 'definition': continue # FIXME single space differences between the spreadsheet and the source if cell: definition_neuron = rdflib.Literal(cell) continue elif header == 'synonyms': if cell: synonyms_neuron = [rdflib.Literal(s.strip()) # FIXME bare comma is extremely dangerous for s in cell.split(',')] continue elif header in skip: continue objects = [] if cell: predicate = convert_header(header) if predicate is None: log.debug(f'{(header, cell, notes)}') for object, label in convert_cell(cell): if isinstance(label, tuple): # LogicalPhenotype case _err = [] for l in label: if lower_check(l, cell): _err.append((cell, label)) if _err: errors.extend(_err) else: objects.append(object) elif lower_check(label, cell): errors.append((cell, label)) elif str(id) == object: errors.append((header, cell, object, label)) object = None else: objects.append(object) if notes: # FIXME this is a hack to only attach to the last value # since we can't distinguish at the moment wat[predicate, object] = notes if object is not None: # object aka iri can be none if we don't find anything object_notes[object] = notes else: predicate_notes[predicate] = notes # FIXME it might also be simpler in some cases # to have this be object_notes[object] = notes # because we are much less likely to have the same # phenotype appear attached to the different dimensions # FIXME comma sep is weak here because the # reference is technically ambiguous # might be an argument for the denormalized form ... # or perhaps having another sheet for cases like that else: continue if predicate and objects: for object in objects: # FIXME has layer location phenotype if isinstance(object, tuple): op, *rest = object pes = (Phenotype(r, predicate) for r in rest) # FIXME nonhomogenous phenotypes phenotypes.append(LogicalPhenotype(op, *pes)) elif object: phenotypes.append(Phenotype(object, predicate)) else: errors.append((object, predicate, cell)) elif objects: errors.append((header, objects)) else: errors.append((header, cell)) # translate header -> predicate # translate cell value to ontology id if current_neuron and phenotypes: # TODO merge current with changes # or maybe we just replace since all the phenotypes should be there? log.debug(phenotypes) if id is not None: log.debug(f'{(id, bool(id))}') elif label_neuron: id = make_cut_id(label_neuron) if id not in expect_pes: log.error(f'{id!r} not in cuts!?') continue if expect_pes[id] != len(phenotypes): log.error(f'{id!r} failed roundtrip {len(phenotypes)} != {expect_pes[id]}') continue neuron = NeuronCUT(*phenotypes, id_=id, label=label_neuron, override=bool(id) or bool(label_neuron)) neuron.adopt_meta(current_neuron) # FIXME occasionally this will error?! else: continue # FIXME this polutes everything ??? fn = fixname(label_neuron) if not phenotypes and i: # i skips header errors.append((i, neuron_row)) # TODO special review for phenos but not current phenotypes = Phenotype('TEMP:phenotype/' + fn), neuron = NeuronCUT(*phenotypes, id_=make_cut_id(label_neuron), label=label_neuron, override=True) # update the meta if there were any changes if definition_neuron is not None: neuron.definition = definition_neuron if synonyms_neuron is not None: neuron.synonyms = synonyms_neuron try: neuron.batchAnnotateByObject(object_notes) neuron.batchAnnotate(other_notes) except AttributeError as e: #embed() log.exception(e) #'something very strage has happened\n', e) pass # FIXME FIXME FIXME #neuron.batchAnnotateByPredicate(predicate_notes) # TODO # FIXME doesn't quite work in this context, but there are other # cases where annotations to the general modality are still desireable # FIXME there may be no predicate? if the object fails to match? if do_release: release.append(neuron) return config, errors, new, release