def graph(self): if not hasattr(self, '_graph'): self._graph = populateFromJsonLd(OntGraph(), self.asJsonLd()) OntCuries.populate(self._graph) self.populateHeader(self._graph) return self._graph
def graph(self): g = OntGraph() OntCuries.populate(g) self.populate(g) g.bind('local', self.context) g.bind('apinatomy', readable) # FIXME populate from store g.bind('elements', elements) return g
def graphFromGithub(link, verbose=False): # mmmm no validation # also caching probably if verbose: log.info(link) g = OntGraph().parse(f'{link}?raw=true', format='turtle') OntCuries.populate(g) return g
def populate(self, graph=None): """ Populate a graph, or if no graph is provided populate a new empty graph from the current content. (Also useful for debug) """ if graph is None: graph = OntGraph() [graph.add(t) for t in self.triples] OntCuries.populate(graph) return graph
def import_tree(graph, ontologies, **kwargs): for ontology in ontologies: thisfile = Path(ontology).name print(thisfile) OntCuries.populate(graph) j = graph.asOboGraph('owl:imports', restriction=False) try: t, te = creatTree(*Query(f'NIFTTL:{thisfile}', 'owl:imports', 'OUTGOING', 30), json=j, prefixes=dict(graph.namespace_manager), **kwargs) #print(t) yield t, te except KeyError: print(tc.red('WARNING:'), 'could not find', ontology, 'in import chain') # TODO zap onts w/o imports
def fromRdf(cls, graph): iri = graph.boundIdentifier context = rdflib.Namespace(iri + '/ids/') # TODO removing things from the trie is not implemented ... #d = OntCuries._dict #d.pop('local', None) #d['local'] = str(context) #OntCuries.reset() OntCuries({'local': str(context)}) _, id = iri.rsplit('/', 1) resources = {} for s in graph[:rdf.type:owl.NamedIndividual]: for element in graph[s:rdf.type]: if element != owl.NamedIndividual: _, class_ = element.rsplit('/', 1) resource = getattr(cls, class_).fromRdf(s, graph, context) # FIXME we should really keep the internal representation # around instead of just throwing it away resources[resource.id] = resource.blob for s in graph[:rdf.type:owl.Class]: # FIXME s rdf:type elements:External ?? resource = External.fromRdf(s, graph, context) resources[resource.id] = resource.blob map = {'id': id, 'resources': resources} return cls(map, {})
def normalize_prefixes(graph, curies): new_graph = OntGraph() oc = OntCuries.new() curies.pop('', None) curies['rdf'] = str(rdf) curies['rdfs'] = str(rdfs) oc(curies) oc.populate(new_graph) [new_graph.add(t) for t in graph] return new_graph
#!/usr/bin/env python3.7 from pathlib import Path import rdflib from pyontutils.core import Ont, OntGraph from pyontutils.config import auth from pyontutils.sheets import Sheet from pyontutils.namespaces import owl, rdf, rdfs, ilxtr, NIFRID, OntCuries, skos, makeNamespaces from neurondm import OntTerm, OntId from neurondm.models.allen_cell_types import AllenCellTypes from neurondm.core import log log = log.getChild('indicators') OntCuries(AllenCellTypes.prefixes) # FIXME BAD OntCuries({'TEMPIND': 'http://uri.interlex.org/temp/uris/phenotype-indicators/'}) NIFRAW, NIFTTL = makeNamespaces('NIFRAW', 'NIFTTL') a = rdf.type # TODO the proper way to do this in the future will be to write a populateIndicators # for Neuron and Phenotype class PhenotypeIndicators(Sheet): name = 'phenotype-indicators' sheet_name = 'indicators' @property def things(self): def process(k, v):
sparc.middleName, sparc.lastName, xsd.minInclusive, xsd.maxInclusive, TEMP.hasValue, TEMP.hasUnit, )) OntCuries({ 'orcid': 'https://orcid.org/', 'ORCID': 'https://orcid.org/', 'DOI': 'https://doi.org/', 'ror': 'https://ror.org/', 'dataset': 'https://api.blackfynn.io/datasets/N:dataset:', 'package': 'https://api.blackfynn.io/packages/N:package:', 'user': '******', 'unit': str(unit), 'dim': str(dim), 'asp': str(asp), 'tech': str(tech), 'awards': str(TEMP['awards/']), 'sparc': str(sparc), }) class OntId(OIDB): pass #def atag(self, **kwargs): #if 'curie' in kwargs: #kwargs.pop('curie') #return hfn.atag(self.iri, self.curie, **kwargs)
def new_index(self, referenceIndex, *, commit=True): """ reference hosts have a single incrementing primary key index to which everything is mapped in theory these indexes could also be per 'prefix' aka the sandboxed uri path or external uri path to which something is mapped I don't see any reason not to do this for this kind of implementation since a regular pattern can be develop """ ''' QUESTION: do we force a remapping of external id sequences into uris/ first? this seems like a bad idea? or rather, it is actually a good idea, but it will have to be done with a pattern based redirect instead of an actual materialization the alternative is to do what ontobee does and pass the external iri as a query parameter ... hrm tradoffs, well we certainly can't make a nice /uberon/uris/obo/{UBERON_} folder if we include the whole uri ... so this seems a reasonable tradeoff http://purl.obolibrary.org/obo/ can wind up being mapped into multiple uri spaces ... /obo/uris/obo/ would seem to make more sense but how to indicate that other organizations/projects map there ... /uberon/uris/obo/UBERON_ could indicate the latest sequence ah, and of course in theory this gets us out of the very annoying situation where /uberon/uris/obo/UBERON_ really IS different than /doid/uris/obo/UBERON_ for some identifiers (sigh) and if they are all mapped and masking based on presence then we can detect the issues HOWEVER how do we enforce that in reality the _mapping_ is all to /obo/uris/obo/ ?? ''' path = self.path_index(referenceIndex) rrp = path.repo_relative_path s = sncho[rrp.with_suffix('').as_posix()] # TODO check ownership if path.exists(): raise FileExistsError(path) g = OntGraph(path=path) OntCuries.populate(g) # TODO these are really identified by the follow: # base/readable/ # {group}/uris/ # base/ontologies/ # {group}/ontologies/uris/ pos = ( (rdf.type, snchn.IndexGraph), (rdfs.label, rdflib.Literal(f'IndexGraph for {referenceIndex}')), (snchn.referenceIndex, rdflib.Literal(referenceIndex)), # TODO HRM #(snchn.indexRemote, ) ) for po in pos: g.add((s, *po)) # FIXME g.path.parent.mkdir(parents=True) g.write() if commit: path.commit(f'add new index for {referenceIndex}') return path
xsd.minInclusive, xsd.maxInclusive, TEMP.hasValue, TEMP.hasUnit,)) OntCuries({'orcid':'https://orcid.org/', 'ORCID':'https://orcid.org/', 'DOI':'https://doi.org/', 'ror':'https://ror.org/', 'pio.api': 'https://www.protocols.io/api/v3/protocols/', 'dataset':'https://api.blackfynn.io/datasets/N:dataset:', 'package':'https://api.blackfynn.io/packages/N:package:', 'user':'******', 'bibo': 'http://purl.org/ontology/bibo/', # crossref 'prism.basic': 'http://prismstandard.org/namespaces/basic/2.1/', # crossref 'unit': str(unit), 'dim': str(dim), 'asp': str(asp), 'protcur': 'https://uilx.org/tgbugs/u/protcur/', 'hyp-protcur': 'https://uilx.org/tgbugs/u/hypothesis/protcur/', 'aspect-raw': 'https://uilx.org/tgbugs/u/aspect-raw/', 'verb': 'https://uilx.org/tgbugs/u/executor-verb/', 'fuzzy': 'https://uilx.org/tgbugs/u/fuzzy-quantity/', 'tech': str(tech), 'awards':str(TEMP['awards/']), 'sparc':str(sparc),}) def curies_runtime(base): """ base is .e.g https://api.blackfynn.io/datasets/{dataset_id}/ """
elements = rdflib.Namespace('https://apinatomy.org/uris/elements/') readable = rdflib.Namespace('https://apinatomy.org/uris/readable/') # add apinatomy:Graph to ttlser topClasses tc = CustomTurtleSerializer.topClasses if readable.Graph not in tc: sec = CustomTurtleSerializer.SECTIONS CustomTurtleSerializer.topClasses = [readable.Graph] + tc CustomTurtleSerializer.SECTIONS = ('', ) + sec # add apinatomy:Graph as a header section marker OntGraph.metadata_type_markers.append(readable.Graph) OntCuries({ 'apinatomy': str(readable), 'elements': str(elements), # FIXME guranteed name collisions ... # also just read this from the embedded local conventions }) class NoIdError(Exception): """ blob has no id """ apinscm = sc.ApiNATOMYSchema() def make_classes(schema): types = {} def ref_to_list(ref):
sparc.lastName, xsd.minInclusive, xsd.maxInclusive, TEMP.hasValue, TEMP.hasUnit, )) OntCuries({ 'orcid': 'https://orcid.org/', 'ORCID': 'https://orcid.org/', 'DOI': 'https://doi.org/', 'ror': 'https://ror.org/', 'dataset': 'https://api.blackfynn.io/datasets/N:dataset:', 'package': 'https://api.blackfynn.io/packages/N:package:', 'user': '******', 'bibo': 'http://purl.org/ontology/bibo/', # crossref 'prism.basic': 'http://prismstandard.org/namespaces/basic/2.1/', # crossref 'unit': str(unit), 'dim': str(dim), 'asp': str(asp), 'tech': str(tech), 'awards': str(TEMP['awards/']), 'sparc': str(sparc), }) class OntId(OIDB): pass #def atag(self, **kwargs): #if 'curie' in kwargs: #kwargs.pop('curie')
elements = rdflib.Namespace('https://apinatomy.org/uris/elements/') readable = rdflib.Namespace('https://apinatomy.org/uris/readable/') # add apinatomy:Graph to ttlser topClasses tc = CustomTurtleSerializer.topClasses if readable.Graph not in tc: sec = CustomTurtleSerializer.SECTIONS CustomTurtleSerializer.topClasses = [readable.Graph] + tc CustomTurtleSerializer.SECTIONS = ('', ) + sec # add apinatomy:Graph as a header section marker OntGraph.metadata_type_markers.append(readable.Graph) OntCuries({ 'apinatomy': str(readable), 'elements': str(elements), # FIXME guranteed name collisions ... 'PMID': 'https://www.ncbi.nlm.nih.gov/pubmed/', # also just read this from the embedded local conventions }) class NoIdError(Exception): """ blob has no id """ apinscm = sc.ApiNATOMYSchema() def make_classes(schema): types = {} def ref_to_list(ref):
def parse_workflow(): # FIXME TODO these states should probably be compiled down to numbers??? docs = Path(__file__).parent.absolute().resolve().parent / 'docs' rridpath = docs / 'workflow-rrid.graphml' paperpath = docs / 'workflow-paper-id.graphml' cgraph = ConjunctiveGraph() gt.WorkflowMapping(rridpath.as_posix()).graph(cgraph) gt.PaperIdMapping(paperpath.as_posix(), False).graph(cgraph) write(cgraph, '/tmp/workflow.ttl') predicates = set(cgraph.predicates()) OntCuries({cp:str(ip) for cp, ip in cgraph.namespaces()}) OntCuries({'RRID': 'https://scicrunch.org/resolver/RRID:', 'DOI': 'https://doi.org/', 'PMID': 'https://www.ncbi.nlm.nih.gov/pubmed/'}) hg = makeGraph('', graph=cgraph) short = sorted(hg.qname(_) for _ in predicates) wf.hasTag wf.hasReplyTag wf.hasTagOrReplyTag wf.hasOutputTag #if type isa wf.tag tag_types = set(cgraph.transitive_subjects(rdfs.subClassOf, wf.tag)) tag_tokens = {tagType:sorted(set(t for t in cgraph.transitive_subjects(rdf.type, tagType) if t != tagType)) for tagType in tag_types} has_tag_types = set(cgraph.transitive_subjects(rdfs.subPropertyOf, wf.hasTagOrReplyTag)) has_tag_types.add(wf.hasOutputTag) has_next_action_types = set(cgraph.transitive_subjects(rdfs.subPropertyOf, wf.hasOutput)) has_next_action_types.add(wf.hasNextStep) terminals = sorted(tag for ttype in tag_types if ttype != wf.tagScibot # scibot is not 'terminal' for this part for tag in cgraph[:rdf.type:ttype] if not isinstance(tag, BNode) and not any(o for httype in has_tag_types for o in cgraph[tag:httype])) endpoints = sorted(endpoint for endpoint in cgraph[:rdf.type:wf.state] if not isinstance(endpoint, BNode) and not any(o for hnatype in has_next_action_types for o in cgraph[endpoint:hnatype])) complicated = sorted(a_given_tag for tt in tag_types for a_given_tag in cgraph[:rdf.type:tt] if not isinstance(a_given_tag, BNode) and not [successor_tag for htt in has_tag_types for successor_tag in chain(t for t in cgraph[a_given_tag:htt] #if not isinstance(t, BNode) , # we don't actually need this for terminals # we will need it later #(t for b in cgraph[a_given_tag:htt] #if isinstance(b, BNode) #for listhead in cgraph[b:owl.oneOf] #for t in unlist(listhead, cgraph)), )]) def topList(node, g): for s in g[:rdf.rest:node]: yield s def getLists(node, g): for linker in g[:rdf.first:node]: top = None for top in g.transitiveClosure(topList, linker): pass if top: yield top else: yield linker def getIsTagOf(node, g): for htt in has_tag_types: for parent_tag in g[:htt:node]: yield parent_tag def getIsOneOfTagOf(node, g): for list_top in getLists(node, g): for linker in g[:owl.oneOf:list_top]: for parent_tag, _ in g[::linker]: yield parent_tag def getPreviousTag(node, g): # not quite what we need yield from getIsOneOfTagOf(node, g) yield from getIsTagOf(node, g) def getTagChains(node, g, seen=tuple()): # seen to prevent recursion cases where # taggning can occur in either order e.g. PMID -> DOI #print(tc.red(repr(OntId(node)))) # tc.red(OntId(node)) does weird stuff O_o parent_tag = None for parent_tag in chain(getIsOneOfTagOf(node, g), getIsTagOf(node, g)): if parent_tag in seen: parent_tag = None continue ptt = next(g[parent_tag:rdf.type]) #if ptt in tag_types: for pchain in getTagChains(parent_tag, g, seen + (node,)): if ptt in tag_types: out = parent_tag, *pchain else: out = pchain yield out if not ptt and not out: parent_tag = None if not parent_tag: yield tuple() def getInitiatesAction(node, g): for action in g[:wf.initiatesAction:node]: yield action def getIsOneOfOutputOf(node, g): for list_top in getLists(node, g): for linker in g[:owl.oneOf:list_top]: for hot in has_next_action_types: for parent_thing in g[:hot:linker]: yield parent_thing def getActionChains(node, g): parent_action = None for parent_action in chain(getIsOneOfOutputOf(node, g), # works for actions too getInitiatesAction(node, g)): for pchain in getActionChains(parent_action, g): # NOTE may also be a tag... out = parent_action, *pchain #print(tuple(hg.qname(o) for o in out)) yield out if not parent_action: yield tuple() def getRestSubjects(predicate, object, g): """ invert restriction """ rsco = cmb.Restriction(rdfs.subClassOf) for rt in rsco.parse(graph=g): if rt.p == predicate and rt.o == object: yield from g.transitive_subjects(rdfs.subClassOf, rt.s) annoParts = list(getRestSubjects(wf.isAttachedTo, wf.annotation, cgraph)) partInstances = {OntId(a):set(t if isinstance(t, BNode) else OntId(t) for t in cgraph.transitive_subjects(rdf.type, a) if not isinstance(t, BNode) and t != a) for a in annoParts} _endpoint_chains = {OntId(endpoint):[[OntId(endpoint)] + [OntId(e) for e in chain] for chain in getActionChains(endpoint, cgraph)] for endpoint in endpoints} #print([hg.qname(e) for e in endpoints]) #print([print([hg.qname(c) for c in getActionChains(endpoint, cgraph) if c]) #for endpoint in endpoints #if endpoint]) #_ = [print(list(getActionChains(e, cgraph)) for e in endpoints)] #return wat = cgraph.transitiveClosure(getPreviousTag, RRIDCUR.Duplicate) wat = list(wat) #def invOneOf(tag, g): fake_chains = {hg.qname(terminal): [hg.qname(c) for c in cgraph.transitiveClosure(getPreviousTag, terminal)] for terminal in terminals} def make_chains(things, getChains): return {OntId(thing):[[OntId(thing)] + [OntId(e) for e in chain] for chain in getChains(thing, cgraph)] for thing in things #if not print(thing) } def print_chains(thing_chains): print('\nstart from beginning') print('\n'.join(sorted(' -> '.join(hg.qname(e) for e in reversed(chain)) for chains in thing_chains.values() for chain in chains))) print('\nstart from end') print('\n'.join(sorted(' <- '.join(e.curie for e in chain) for chains in thing_chains.values() for chain in chains))) def valid_tagsets(all_chains): # not the most efficient way to do this ... transitions = defaultdict(set) for end, chains in all_chains.items(): for chain in chains: valid = set() prior_state = None for element in reversed(chain): valid.add(element) state = frozenset(valid) transitions[prior_state].add(state) prior_state = state return {s:frozenset(n) for s, n in transitions.items()} endpoint_chains = make_chains(endpoints, getActionChains) #endpoint_transitions = valid_transitions(endpoint_chains) # not the right structure print_chains(endpoint_chains) terminal_chains = make_chains(terminals, getTagChains) print_chains(terminal_chains) tag_transitions = valid_tagsets(terminal_chains) terminal_tags_to_endpoints = 'TODO' def printq(*things): print(*(OntId(t).curie for t in things)) from pprint import pprint def get_linkers(s, o, g, linkerFunc): # FIXME not right for p in g[s::o]: yield p for l in linkerFunc(o, g): #print(tc.blue(f'{OntId(s).curie} {l if isinstance(l, BNode) else OntId(l).curie}')) for p in g[s::l]: #print(tc.red(f'{s} {l} {o} {p}')) yield p return linkers = set(l for l in g.transitiveClosure(linkerFunc, o)) for p, o in g[s::]: if o in linkers: yield p def edge_to_symbol(p, rev=False): if p == wf.initiatesAction: return '<<' if rev else '>>' elif p == wf.hasReplyTag: return '<' if rev else '>' elif p == wf.hasTagOrReplyTag: return '<=' if rev else '=>' elif p == wf.hasOutputTag: return '-<-' if rev else '->-' else: return '<??' if rev else '??>' def chain_to_typed_chain(chain, g, func): # duh... #pprint(chain) for s, o in zip(chain, chain[1:]): # TODO deal with reversed case s, o = s.u, o.u p = None #print(s, o) printq(s, o) for p in get_linkers(s, o, g, func): #print(tc.yellow(p)) #yield (s, edge_to_symbol(p), o) yield from (s, edge_to_symbol(p), o) if not p: for rp in get_linkers(o, s, g, func): print(tc.blue(rp)) yield from (s, edge_to_symbol(rp, rev=True), o) def tchains(thing_chains, func): return sorted([OntId(e).curie if isinstance(e, URIRef) else e for e in chain_to_typed_chain(list(reversed(chain)), cgraph, func)] for chains in thing_chains.values() for chain in chains) def getLinkers(node, g): for list_top in getLists(node, g): for linker in g[:owl.oneOf:list_top]: yield linker def allSubjects(object, graph): yield from (s for s, p in graph[::object]) yield from getLinkers(object, graph) print() ttc = tchains(terminal_chains, allSubjects) tec = tchains(endpoint_chains, allSubjects) pprint(ttc) pprint(tec) valid_tagsets = frozenset((t for s in tag_transitions.values() for t in s)) tts = valid_tagsets - frozenset(tag_transitions) endtype = 'TODO' # tt = {} for endtype, chains in endpoint_chains.items(): for *_chain, tag in chains: if _chain: next_thing = _chain[-1] for ets in tts: if tag in ets: tt[ets] = next_thing terminal_tagsets = tt #[print(wat) for wat in terminal_chains.values()] #pprint(terminal_chains) return tag_types, tag_tokens, partInstances, valid_tagsets, terminal_tagsets, tag_transitions
ilxtr.hasCircuitRolePhenotype) """ http://ontology.neuinfo.org/trees/query/swanr:hasPart1/SWAN:1/ttl/generated/swanson.ttl?restriction=true&depth=40&direction=OUTGOING human cns gray matter regions http://ontology.neuinfo.org/trees/query/swanr:hasPart3/SWAN:1/ttl/generated/swanson.ttl?restriction=true&depth=40&direction=OUTGOING surface features, handy to have around http://ontology.neuinfo.org/trees/query/swanr:hasPart5/SWAN:629/ttl/generated/swanson.ttl?restriction=true&depth=40&direction=OUTGOING """ sgraph = rdflib.Graph().parse( (Neuron.local_base / 'ttl/generated/swanson.ttl').as_posix(), format='ttl') # restriction.parse(sgraph) # FIXME this breaks with weird error message OntCuries({**graphBase.prefixes, **PREFIXES}) rests = [r for r in restriction.parse(graph=sgraph) if r.p == swanr.hasPart3] #restriction = Restriction2(rdfs.subClassOf) class LocalGraphService(ontquery.services.BasicService): def __init__(self, graph): self.graph = graph super().__init__() def query(self, curie=None, iri=None, label=None, term=None, search=None,