def parse_workflow(): # FIXME TODO these states should probably be compiled down to numbers??? docs = Path(__file__).parent.absolute().resolve().parent / 'docs' rridpath = docs / 'workflow-rrid.graphml' paperpath = docs / 'workflow-paper-id.graphml' cgraph = ConjunctiveGraph() gt.WorkflowMapping(rridpath.as_posix()).graph(cgraph) gt.PaperIdMapping(paperpath.as_posix(), False).graph(cgraph) write(cgraph, '/tmp/workflow.ttl') predicates = set(cgraph.predicates()) OntCuries({cp:str(ip) for cp, ip in cgraph.namespaces()}) OntCuries({'RRID': 'https://scicrunch.org/resolver/RRID:', 'DOI': 'https://doi.org/', 'PMID': 'https://www.ncbi.nlm.nih.gov/pubmed/'}) hg = makeGraph('', graph=cgraph) short = sorted(hg.qname(_) for _ in predicates) wf.hasTag wf.hasReplyTag wf.hasTagOrReplyTag wf.hasOutputTag #if type isa wf.tag tag_types = set(cgraph.transitive_subjects(rdfs.subClassOf, wf.tag)) tag_tokens = {tagType:sorted(set(t for t in cgraph.transitive_subjects(rdf.type, tagType) if t != tagType)) for tagType in tag_types} has_tag_types = set(cgraph.transitive_subjects(rdfs.subPropertyOf, wf.hasTagOrReplyTag)) has_tag_types.add(wf.hasOutputTag) has_next_action_types = set(cgraph.transitive_subjects(rdfs.subPropertyOf, wf.hasOutput)) has_next_action_types.add(wf.hasNextStep) terminals = sorted(tag for ttype in tag_types if ttype != wf.tagScibot # scibot is not 'terminal' for this part for tag in cgraph[:rdf.type:ttype] if not isinstance(tag, BNode) and not any(o for httype in has_tag_types for o in cgraph[tag:httype])) endpoints = sorted(endpoint for endpoint in cgraph[:rdf.type:wf.state] if not isinstance(endpoint, BNode) and not any(o for hnatype in has_next_action_types for o in cgraph[endpoint:hnatype])) complicated = sorted(a_given_tag for tt in tag_types for a_given_tag in cgraph[:rdf.type:tt] if not isinstance(a_given_tag, BNode) and not [successor_tag for htt in has_tag_types for successor_tag in chain(t for t in cgraph[a_given_tag:htt] #if not isinstance(t, BNode) , # we don't actually need this for terminals # we will need it later #(t for b in cgraph[a_given_tag:htt] #if isinstance(b, BNode) #for listhead in cgraph[b:owl.oneOf] #for t in unlist(listhead, cgraph)), )]) def topList(node, g): for s in g[:rdf.rest:node]: yield s def getLists(node, g): for linker in g[:rdf.first:node]: top = None for top in g.transitiveClosure(topList, linker): pass if top: yield top else: yield linker def getIsTagOf(node, g): for htt in has_tag_types: for parent_tag in g[:htt:node]: yield parent_tag def getIsOneOfTagOf(node, g): for list_top in getLists(node, g): for linker in g[:owl.oneOf:list_top]: for parent_tag, _ in g[::linker]: yield parent_tag def getPreviousTag(node, g): # not quite what we need yield from getIsOneOfTagOf(node, g) yield from getIsTagOf(node, g) def getTagChains(node, g, seen=tuple()): # seen to prevent recursion cases where # taggning can occur in either order e.g. PMID -> DOI #print(tc.red(repr(OntId(node)))) # tc.red(OntId(node)) does weird stuff O_o parent_tag = None for parent_tag in chain(getIsOneOfTagOf(node, g), getIsTagOf(node, g)): if parent_tag in seen: parent_tag = None continue ptt = next(g[parent_tag:rdf.type]) #if ptt in tag_types: for pchain in getTagChains(parent_tag, g, seen + (node,)): if ptt in tag_types: out = parent_tag, *pchain else: out = pchain yield out if not ptt and not out: parent_tag = None if not parent_tag: yield tuple() def getInitiatesAction(node, g): for action in g[:wf.initiatesAction:node]: yield action def getIsOneOfOutputOf(node, g): for list_top in getLists(node, g): for linker in g[:owl.oneOf:list_top]: for hot in has_next_action_types: for parent_thing in g[:hot:linker]: yield parent_thing def getActionChains(node, g): parent_action = None for parent_action in chain(getIsOneOfOutputOf(node, g), # works for actions too getInitiatesAction(node, g)): for pchain in getActionChains(parent_action, g): # NOTE may also be a tag... out = parent_action, *pchain #print(tuple(hg.qname(o) for o in out)) yield out if not parent_action: yield tuple() def getRestSubjects(predicate, object, g): """ invert restriction """ rsco = cmb.Restriction(rdfs.subClassOf) for rt in rsco.parse(graph=g): if rt.p == predicate and rt.o == object: yield from g.transitive_subjects(rdfs.subClassOf, rt.s) annoParts = list(getRestSubjects(wf.isAttachedTo, wf.annotation, cgraph)) partInstances = {OntId(a):set(t if isinstance(t, BNode) else OntId(t) for t in cgraph.transitive_subjects(rdf.type, a) if not isinstance(t, BNode) and t != a) for a in annoParts} _endpoint_chains = {OntId(endpoint):[[OntId(endpoint)] + [OntId(e) for e in chain] for chain in getActionChains(endpoint, cgraph)] for endpoint in endpoints} #print([hg.qname(e) for e in endpoints]) #print([print([hg.qname(c) for c in getActionChains(endpoint, cgraph) if c]) #for endpoint in endpoints #if endpoint]) #_ = [print(list(getActionChains(e, cgraph)) for e in endpoints)] #return wat = cgraph.transitiveClosure(getPreviousTag, RRIDCUR.Duplicate) wat = list(wat) #def invOneOf(tag, g): fake_chains = {hg.qname(terminal): [hg.qname(c) for c in cgraph.transitiveClosure(getPreviousTag, terminal)] for terminal in terminals} def make_chains(things, getChains): return {OntId(thing):[[OntId(thing)] + [OntId(e) for e in chain] for chain in getChains(thing, cgraph)] for thing in things #if not print(thing) } def print_chains(thing_chains): print('\nstart from beginning') print('\n'.join(sorted(' -> '.join(hg.qname(e) for e in reversed(chain)) for chains in thing_chains.values() for chain in chains))) print('\nstart from end') print('\n'.join(sorted(' <- '.join(e.curie for e in chain) for chains in thing_chains.values() for chain in chains))) def valid_tagsets(all_chains): # not the most efficient way to do this ... transitions = defaultdict(set) for end, chains in all_chains.items(): for chain in chains: valid = set() prior_state = None for element in reversed(chain): valid.add(element) state = frozenset(valid) transitions[prior_state].add(state) prior_state = state return {s:frozenset(n) for s, n in transitions.items()} endpoint_chains = make_chains(endpoints, getActionChains) #endpoint_transitions = valid_transitions(endpoint_chains) # not the right structure print_chains(endpoint_chains) terminal_chains = make_chains(terminals, getTagChains) print_chains(terminal_chains) tag_transitions = valid_tagsets(terminal_chains) terminal_tags_to_endpoints = 'TODO' def printq(*things): print(*(OntId(t).curie for t in things)) from pprint import pprint def get_linkers(s, o, g, linkerFunc): # FIXME not right for p in g[s::o]: yield p for l in linkerFunc(o, g): #print(tc.blue(f'{OntId(s).curie} {l if isinstance(l, BNode) else OntId(l).curie}')) for p in g[s::l]: #print(tc.red(f'{s} {l} {o} {p}')) yield p return linkers = set(l for l in g.transitiveClosure(linkerFunc, o)) for p, o in g[s::]: if o in linkers: yield p def edge_to_symbol(p, rev=False): if p == wf.initiatesAction: return '<<' if rev else '>>' elif p == wf.hasReplyTag: return '<' if rev else '>' elif p == wf.hasTagOrReplyTag: return '<=' if rev else '=>' elif p == wf.hasOutputTag: return '-<-' if rev else '->-' else: return '<??' if rev else '??>' def chain_to_typed_chain(chain, g, func): # duh... #pprint(chain) for s, o in zip(chain, chain[1:]): # TODO deal with reversed case s, o = s.u, o.u p = None #print(s, o) printq(s, o) for p in get_linkers(s, o, g, func): #print(tc.yellow(p)) #yield (s, edge_to_symbol(p), o) yield from (s, edge_to_symbol(p), o) if not p: for rp in get_linkers(o, s, g, func): print(tc.blue(rp)) yield from (s, edge_to_symbol(rp, rev=True), o) def tchains(thing_chains, func): return sorted([OntId(e).curie if isinstance(e, URIRef) else e for e in chain_to_typed_chain(list(reversed(chain)), cgraph, func)] for chains in thing_chains.values() for chain in chains) def getLinkers(node, g): for list_top in getLists(node, g): for linker in g[:owl.oneOf:list_top]: yield linker def allSubjects(object, graph): yield from (s for s, p in graph[::object]) yield from getLinkers(object, graph) print() ttc = tchains(terminal_chains, allSubjects) tec = tchains(endpoint_chains, allSubjects) pprint(ttc) pprint(tec) valid_tagsets = frozenset((t for s in tag_transitions.values() for t in s)) tts = valid_tagsets - frozenset(tag_transitions) endtype = 'TODO' # tt = {} for endtype, chains in endpoint_chains.items(): for *_chain, tag in chains: if _chain: next_thing = _chain[-1] for ets in tts: if tag in ets: tt[ets] = next_thing terminal_tagsets = tt #[print(wat) for wat in terminal_chains.values()] #pprint(terminal_chains) return tag_types, tag_tokens, partInstances, valid_tagsets, terminal_tagsets, tag_transitions