def term_get_ancestors(self, node_type, root_iri): results = self.triplestore.query_template( template_text=self.query, inputs={'root_uri': root_iri}, outputs=['parent_id', 'parent_label', 'child_id', 'child_label']) print('found total ', len(results), ' results.') nodes = set() edges = set() for index, row in enumerate(results): # Output type would be same as input type? ancestor_node = KNode(Text.obo_to_curie(row['parent_id']), name=row['parent_label'], type=node_type) child_node = KNode(Text.obo_to_curie(row['child_id']), name=row['child_label'], type=node_type) if ancestor_node.id == child_node.id: # refrain from adding edge to the node itself continue predicate = LabeledID(identifier='rdfs:subClassOf', label='subclass of') edge = self.create_edge( source_node=child_node, target_node=ancestor_node, predicate=predicate, provided_by='uberongraph.term_get_ancestors', input_id=child_node.id) nodes.add(child_node) nodes.add(ancestor_node) edges.add(edge) return nodes, edges
def get_anatomy_parts(self, anatomy_identifier): """Given an UBERON id, find other UBERONS that are parts of the query""" if anatomy_identifier.startswith('http'): anatomy_identifier = Text.obo_to_curie(anatomy_identifier) text = """ prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#> prefix UBERON: <http://purl.obolibrary.org/obo/UBERON_> prefix BFO: <http://purl.obolibrary.org/obo/BFO_> select distinct ?part ?partlabel from <http://reasoner.renci.org/nonredundant> from <http://example.org/uberon-hp-cl.ttl> where { $anatomy_id BFO:0000051 ?part . graph <http://reasoner.renci.org/redundant> { ?part rdfs:subClassOf UBERON:0001062 . } ?part rdfs:label ?partlabel . } """ results = self.triplestore.query_template( inputs = { 'anatomy_id': anatomy_identifier }, \ outputs = [ 'part', 'partlabel' ], \ template_text = text \ ) for result in results: result['curie'] = Text.obo_to_curie(result['part']) return results
def __init__(self, *args, **kwargs): self.id = None self.name = None self.type = None self.original_curie = None self.properties = {} if args and len(args) == 1 and isinstance(args[0], str): self.id = args[0] args = [] # TODO: Currently hack to only utilize the 1st curie in a list if multiple curies provided elif args and len(args) == 1 and isinstance( args[0], list) and isinstance(args[0][0], str): self.id = args[0][0] args = [] super().__init__(*args, **kwargs) # Another hack to keep things running. if isinstance(self.name, list): self.name = self.name[0] if self.id.startswith('http'): self.id = Text.obo_to_curie(self.id) #Synonyms is just for CURIEs self.synonyms = set() self.synonyms.add(LabeledID(identifier=self.id, label=self.name)) #List of labels to attach to exports self.export_labels = []
def get_anatomy_by_cell_graph(self, cell_node): anatomies = self.cell_to_anatomy(cell_node.identifier) results = [] for r in anatomies: edge = KEdge('uberongraph', 'cellToAnatomy') node = KNode (Text.obo_to_curie(r['anatomyID']), \ node_types.ANATOMY ) node.label = r['anatomyLabel'] results.append((edge, node)) return results
def __init__(self, identifier, node_type, label=None): if identifier.startswith('http'): identifier = Text.obo_to_curie(identifier) self.identifier = identifier self.label = label if node_type not in node_types: raise ValueError('node_type {} unsupported.'.format(node_type)) self.node_type = node_type self.properties = {} self.mesh_identifiers = [] self.synonyms = set()
def create_anatomy_phenotype_edge(self, node_id, node_label, input_id, anatomy_node): predicate = LabeledID(identifier='GAMMA:0000002', label='inverse of has phenotype affecting') phenotype_node = KNode(Text.obo_to_curie(node_id), type=node_types.PHENOTYPIC_FEATURE, name=node_label) edge = self.create_edge(anatomy_node, phenotype_node, 'uberongraph.get_phenotype_by_anatomy_graph', input_id, predicate) #node.name = node_label return edge, phenotype_node
def get_cell_by_anatomy_graph(self, anatomy_node): cells = self.anatomy_to_cell(anatomy_node.id) results = [] predicate = LabeledID(identifier='BFO:0000050', label='part_of') for r in cells: cell_node = KNode(Text.obo_to_curie(r['cellID']), type=node_types.CELL, name=r['cellLabel']) edge = self.create_edge(cell_node, anatomy_node, 'uberongraph.get_cell_by_anatomy_graph', anatomy_node.id, predicate) results.append((edge, cell_node)) return results
def get_anatomy_by_cell_graph(self, cell_node): anatomies = self.cell_to_anatomy(cell_node.id) results = [] predicate = LabeledID(identifier='BFO:0000050', label='part_of') for r in anatomies: anatomy_node = KNode(Text.obo_to_curie(r['anatomyID']), type=node_types.ANATOMY, name=r['anatomyLabel']) edge = self.create_edge(cell_node, anatomy_node, 'uberongraph.get_anatomy_by_cell_graph', cell_node.id, predicate) results.append((edge, anatomy_node)) return results
def __init__(self, *args, **kwargs): self.id = None self.name = None self.type = None self.properties = {} if args and len(args) == 1 and isinstance(args[0], str): self.id = args[0] args = [] super().__init__(*args, **kwargs) if self.id.startswith('http'): self.id = Text.obo_to_curie(self.id) #Synonyms is just for CURIEs self.synonyms = set() self.synonyms.add(LabeledID(identifier=self.id, label=self.name))
def get_out_by_in(self, input_node, output_type, prefixes, subject=True, object=True): returnresults = [] caller = f'uberongraph.{inspect.stack()[1][3]}' results = {'subject': [], 'object': []} curies = set() for pre in prefixes: curies.update(input_node.get_synonyms_by_prefix(pre)) for curie in curies: results['subject'] += self.get_neighbor(curie, output_type, subject=True) results['object'] += self.get_neighbor(curie, output_type, subject=False) for direction in ['subject', 'object']: done = set() for r in results[direction]: key = (r['p'], r['output_id']) if key in done: continue predicate_curie = Text.obo_to_curie(r['p']) prefix = Text.get_curie(predicate_curie) prefix = prefix if prefix == 'ubergraph-axioms.ofn' else prefix.upper( ) upper_cased_predicate_curie = prefix + ":" + Text.un_curie( predicate_curie) predicate = LabeledID(upper_cased_predicate_curie, r['pLabel']) output_node = KNode(r['output_id'], type=output_type, name=r['output_label']) if direction == 'subject': edge = self.create_edge(input_node, output_node, caller, curie, predicate) else: edge = self.create_edge(output_node, input_node, caller, curie, predicate) done.add(key) returnresults.append((edge, output_node)) return returnresults
def term_get_ancestors(self, child_node): root_uri = self.root_uris.get(child_node.type, None) if not root_uri: return [] ### # Query does have an upper bound so for ontologies that start from # # Step 1 get prefixes that are supported for input node curie_set = set() for node_type in child_node.export_labels: ps = self.prefix_set.get(node_type, []) for prefix in ps: synonyms = child_node.get_synonyms_by_prefix(prefix) curie_set.update(synonyms) # Step 2 get parents for those curies we support from uberon graph outputs = [] for curie in curie_set: results = self.triplestore.query_template( template_text=self.query, inputs={ 'child_curie': curie, 'root_uri': root_uri }, outputs=['parent_id', 'label']) for row in results: # Output type would be same as input type? ancestor_node = KNode(Text.obo_to_curie(row['parent_id']), name=row['label'], type=child_node.type) if ancestor_node.id == child_node.id: # refrain from adding edge to the node itself continue predicate = LabeledID(identifier='rdfs:subClassOf', label='subclass of') edge = self.create_edge( source_node=child_node, target_node=ancestor_node, predicate=predicate, provided_by='uberongraph.term_get_ancestors', input_id=child_node.id) outputs.append((edge, ancestor_node)) return outputs
def test_obo_to_curie(): test_set = { 'http://purl.obolibrary.org/obo/prefix_suffix': 'prefix:suffix', 'http://purl.obolibrary.org/obo/prefix_suffix_suffix': 'prefix:suffix_suffix', 'http://purl.obolibrary.org/obo/path/prefix_suffix': 'path_prefix:suffix', 'http://purl.obolibrary.org/obo/path/prefix#suffix': 'path_prefix:suffix', 'http://purl.obolibrary.org/obo/path/prefix#suffix_more_suffix': 'path_prefix:suffix_more_suffix' } for obo, curie in test_set.items(): assert Text.obo_to_curie(obo) == curie # could not find a way to smartly go back to urls containing # back do = 3 for obo, curie in test_set.items(): if do == 1: break do -= 1 assert Text.curie_to_obo(curie) == f'<{obo}>'
def dep_get_anatomy_by_phenotype_graph(self, phenotype_node): results = [] for curie in phenotype_node.get_synonyms_by_prefix('HP'): anatomies = self.phenotype_to_anatomy(curie) for r in anatomies: node = KNode(r['anatomy_id'], type=node_types.ANATOMICAL_ENTITY, name=r['anatomy_label']) # try to derive the label from the relation for the new ubergraph axioms predicate_label = r['predicate_label'] or '_'.join( r['predicate'].split('#')[-1].split('.')) predicate = LabeledID(Text.obo_to_curie(r['predicate']), predicate_label) edge = self.create_edge( phenotype_node, node, 'uberongraph.get_anatomy_by_phenotype_graph', phenotype_node.id, predicate) # edge, node = self.create_phenotype_anatomy_edge(r['anatomy_id'],r['anatomy_label'],curie,phenotype_node) if phenotype_node.name is None: phenotype_node.name = r['input_label'] results.append((edge, node)) #These tend to be very high level terms. Let's also get their parts to #be more inclusive. #TODO: there ought to be a more principled way to take care of this, but #it highlights the uneasy relationship between the high level world of #smartapi and the low-level sparql-vision. part_results = self.get_anatomy_parts(r['anatomy_id']) for pr in part_results: # pedge, pnode = self.create_phenotype_anatomy_edge(pr['part'],pr['partlabel'],curie,phenotype_node) pnode = KNode(pr['part'], type=node_types.ANATOMICAL_ENTITY, name=pr['partlabel']) pedge = self.create_edge( phenotype_node, pnode, 'uberongraph.get_anatomy_by_phenotype_graph', phenotype_node.id, predicate) results.append((pedge, pnode)) return results
def create_phenotype_anatomy_edge(self, node_id, node_label): edge = KEdge('uberongraph', 'phenotypeToAnatomy') node = KNode ( Text.obo_to_curie(node_id), \ node_types.ANATOMY ) node.label = node_label return edge, node