def test_clique_merge8(): """ Test for clique merge where same_as appear as both node and edge properties. """ ppm = {"biolink:Gene": ["HGNC", "NCBIGene", "ENSEMBL", "OMIM"]} g1 = NxGraph() g1.add_node("HGNC:1", **{"category": ["biolink:Gene"]}) g1.add_node("OMIM:2", **{"category": ["biolink:Gene"], "same_as": ["HGNC:1"]}) g1.add_node("NCBIGene:3", **{"category": ["biolink:NamedThing"]}) g1.add_node("ENSEMBL:4", **{"category": ["biolink:Gene"], "same_as": ["HGNC:1"]}) g1.add_node( "ENSEMBL:6", **{"category": ["biolink:Gene"], "same_as": ["NCBIGene:8"]} ) g1.add_node("HGNC:7", **{"category": ["biolink:Gene"]}) g1.add_node("NCBIGene:8", **{"category": ["biolink:Gene"]}) g1.add_edge( "NCBIGene:3", "HGNC:1", edge_key=generate_edge_key("NCBIGene:3", "biolink:same_as", "HGNC:1"), **{"predicate": "biolink:same_as", "relation": "owl:equivalentClass"} ) g1.add_edge( "ENSEMBL:6", "NCBIGene:8", edge_key=generate_edge_key("ENSEMBL:6", "biolink:same_as", "NCBIGene:8"), **{"predicate": "biolink:same_as", "relation": "owl:equivalentClass"} ) g1.add_edge( "HGNC:7", "NCBIGene:8", edge_key=generate_edge_key("HGNC:7", "biolink:same_as", "NCBIGene:8"), **{"predicate": "biolink:same_as", "relation": "owl:equivalentClass"} ) updated_graph, clique_graph = clique_merge( target_graph=g1, prefix_prioritization_map=ppm ) assert updated_graph.number_of_nodes() == 2 assert updated_graph.number_of_edges() == 0 assert updated_graph.has_node("HGNC:1") assert updated_graph.has_node("HGNC:7") n1 = updated_graph.nodes()["HGNC:1"] assert "OMIM:2" in n1["same_as"] assert "NCBIGene:3" in n1["same_as"] assert "ENSEMBL:4" in n1["same_as"] n2 = updated_graph.nodes()["HGNC:7"] assert "ENSEMBL:6" in n2["same_as"] assert "NCBIGene:8" in n2["same_as"] assert not updated_graph.has_node("OMIM:2") assert not updated_graph.has_node("NCBIGene:3") assert not updated_graph.has_node("ENSEMBL:4") assert not updated_graph.has_node("ENSEMBL:6") assert not updated_graph.has_node("NCBIGene:8")
def load_edge(self, edge: Dict) -> None: """ Load an edge into a networkx.MultiDiGraph .. Note:: This methods transformers Reasoner Std API format fields to Biolink Model fields. Parameters ---------- edge : dict An edge """ if 'source_id' in edge: edge['subject'] = edge['source_id'] if 'target_id' in edge: edge['object'] = edge['target_id'] if 'relation_label' in edge: edge['edge_label'] = edge['relation_label'][0] edge = self.validate_edge(edge) kwargs = PandasTransformer._build_kwargs(edge.copy()) if 'subject' in kwargs and 'object' in kwargs: s = kwargs['subject'] o = kwargs['object'] key = generate_edge_key(s, kwargs['edge_label'], o) self.graph.add_edge(s, o, key, **kwargs) else: logging.info( "Ignoring edge with either a missing 'subject' or 'object': {}" .format(kwargs))
def load_edge(self, edge_record: List) -> Tuple: """ Load an edge into an instance of BaseGraph Parameters ---------- edge_record: List A 4-tuple edge record Returns ------- Tuple A tuple with subject ID, object ID, edge key, and edge data """ subject_node = edge_record[0] edge = edge_record[1] object_node = edge_record[2] if 'provided_by' in self.graph_metadata and 'provided_by' not in edge.keys(): edge['provided_by'] = self.graph_metadata['provided_by'] if 'id' not in edge.keys(): edge['id'] = generate_uuid() key = generate_edge_key(subject_node['id'], edge['predicate'], object_node['id']) edge = validate_edge(edge) edge = sanitize_import(edge.copy()) self.edge_properties.update(edge.keys()) return subject_node['id'], object_node['id'], key, edge
def read_edge(self, edge: Dict) -> Optional[Tuple]: """ Load an edge into an instance of BaseGraph. Parameters ---------- edge: Dict An edge Returns ------- Optional[Tuple] A tuple that contains subject id, object id, edge key, and edge data """ edge = validate_edge(edge) edge_data = sanitize_import(edge.copy()) if 'id' not in edge_data: edge_data['id'] = generate_uuid() s = edge_data['subject'] o = edge_data['object'] if 'provided_by' in self.graph_metadata and 'provided_by' not in edge_data.keys(): edge_data['provided_by'] = self.graph_metadata['provided_by'] key = generate_edge_key(s, edge_data['predicate'], o) self.edge_properties.update(list(edge_data.keys())) if self.check_edge_filter(edge_data): self.node_properties.update(edge_data.keys()) return s, o, key, edge_data
def load_edge(self, edge_record: List) -> Tuple: """ Load an edge into an instance of BaseGraph Parameters ---------- edge_record: List A 4-tuple edge record Returns ------- Tuple A tuple with subject ID, object ID, edge key, and edge data """ subject_node = edge_record[0] edge_data = edge_record[1] object_node = edge_record[2] self.set_edge_provenance(edge_data) if "id" not in edge_data.keys(): edge_data["id"] = generate_uuid() key = generate_edge_key(subject_node["id"], edge_data["predicate"], object_node["id"]) edge_data = self.validate_edge(edge_data) if not edge_data: return () edge_data = sanitize_import(edge_data.copy()) self.edge_properties.update(edge_data.keys()) return subject_node["id"], object_node["id"], key, edge_data
def load_ontologies(self): """ Load all required ontologies. """ for ontology in self.ontologies.values(): rdfgraph = rdflib.Graph() input_format = rdflib.util.guess_format(ontology) rdfgraph.parse(ontology, format=input_format) triples = rdfgraph.triples((None, rdflib.RDFS.subClassOf, None)) for s, p, o in triples: subject_curie = contract(s) object_curie = contract(o) self.ontology_graph.add_node(subject_curie) self.ontology_graph.add_node(object_curie) key = generate_edge_key(subject_curie, 'subclass_of', object_curie) self.ontology_graph.add_edge( subject_curie, object_curie, key, **{ 'edge_label': 'subclass_of', 'relation': 'rdfs:subClassOf' }) triples = rdfgraph.triples((None, rdflib.RDFS.label, None)) for s, p, o in triples: key = contract(s) value = o.value value = value.replace(' ', '_') self.curie_map[key] = value self.ontology_graph.add_node(key, name=value)
def load_edge(self, edge: Dict) -> None: """ Load an edge into a networkx.MultiDiGraph Parameters ---------- edge : dict An edge """ if self.check_edge_filter(edge): edge = Transformer.validate_edge(edge) kwargs = PandasTransformer._build_kwargs(edge.copy()) if 'subject' in kwargs and 'object' in kwargs: s = kwargs['subject'] o = kwargs['object'] if 'provided_by' in self.graph_metadata and 'provided_by' not in kwargs.keys( ): kwargs['provided_by'] = self.graph_metadata['provided_by'] key = generate_edge_key(s, kwargs['edge_label'], o) self.graph.add_edge(s, o, key, **kwargs) self._edge_properties.update(list(kwargs.keys())) else: logging.info( "Ignoring edge with either a missing 'subject' or 'object': {}" .format(kwargs)) else: logging.debug(f"Edge fails edge filters: {edge}")
def read_edge(self, edge: Dict) -> Optional[Tuple]: """ Load an edge into an instance of BaseGraph. Parameters ---------- edge: Dict An edge Returns ------- Optional[Tuple] A tuple that contains subject id, object id, edge key, and edge data """ edge = self.validate_edge(edge) if not edge: return None edge_data = sanitize_import(edge.copy(), self.list_delimiter) if "id" not in edge_data: edge_data["id"] = generate_uuid() s = edge_data["subject"] o = edge_data["object"] self.set_edge_provenance(edge_data) key = generate_edge_key(s, edge_data["predicate"], o) self.edge_properties.update(list(edge_data.keys())) if self.check_edge_filter(edge_data): self.node_properties.update(edge_data.keys()) return s, o, key, edge_data
def add_edge(self, subject_iri: URIRef, object_iri: URIRef, predicate_iri: URIRef) -> Tuple[str, str, str]: """ This method should be used by all derived classes when adding an edge to the networkx.MultiDiGraph. This ensures that the `subject` and `object` identifiers are CURIEs, and that `edge_label` is in the correct form. Returns the CURIE identifiers used for the `subject` and `object` in the networkx.MultiDiGraph, and the processed `edge_label`. Parameters ---------- subject_iri: rdflib.URIRef Subject IRI for the subject in a triple object_iri: rdflib.URIRef Object IRI for the object in a triple predicate_iri: rdflib.URIRef Predicate IRI for the predicate in a triple Returns ------- Tuple[str, str, str] A 3-nary tuple (of the form subject, object, predicate) that represents the edge """ s = self.add_node(subject_iri) o = self.add_node(object_iri) relation = self.prefix_manager.contract(predicate_iri) edge_label = process_iri(predicate_iri) if ' ' in edge_label: logging.debug("predicate IRI '{}' yields edge_label '{}' that not in snake_case form; replacing ' ' with '_'".format(predicate_iri, edge_label)) if edge_label.startswith(self.BIOLINK): logging.debug("predicate IRI '{}' yields edge_label '{}' that starts with '{}'; removing IRI prefix".format(predicate_iri, edge_label, self.BIOLINK)) edge_label = edge_label.replace(self.BIOLINK, '') if PrefixManager.is_curie(edge_label): name = curie_lookup(edge_label) if name: logging.debug("predicate IRI '{}' yields edge_label '{}' that is actually a CURIE; Using its mapping instead: {}".format(predicate_iri, edge_label, name)) edge_label = name else: logging.debug("predicate IRI '{}' yields edge_label '{}' that is actually a CURIE; defaulting back to {}".format(predicate_iri, edge_label, self.DEFAULT_EDGE_LABEL)) edge_label = self.DEFAULT_EDGE_LABEL kwargs = { 'subject': s, 'predicate': str(predicate_iri), 'object': o, 'relation': relation, 'edge_label': f"biolink:{edge_label}" } if 'provided_by' in self.graph_metadata: kwargs['provided_by'] = self.graph_metadata['provided_by'] key = generate_edge_key(s, edge_label, o) if not self.graph.has_edge(s, o, key=key): self.graph.add_edge(s, o, key=key, **kwargs) # TODO: support append return s, o, edge_label
def write_edge(self, record: Dict) -> None: """ Write an edge record to graph. Parameters ---------- record: Dict An edge record """ key = (record['key'] if 'key' in record else generate_edge_key( record['subject'], record['predicate'], record['object'])) self.graph.add_edge(record['subject'], record['object'], key, **record)
def add_edge_attribute(self, subject_iri: Union[URIRef, str], object_iri: URIRef, predicate_iri: URIRef, key: str, value: str) -> None: """ Adds an attribute to an edge, while taking into account whether the attribute should be multi-valued. Multi-valued properties will not contain duplicates. The key may be a rdflib.URIRef or a URI string that maps onto a property name as defined in `rdf_utils.property_mapping`. If the nodes in the edge does not exist then they will be created using subject_iri and object_iri. If the edge itself does not exist then it will be created using subject_iri, object_iri and predicate_iri. Parameters ---------- subject_iri: [rdflib.URIRef, str] The IRI of the subject node of an edge in rdflib.Graph object_iri: rdflib.URIRef The IRI of the object node of an edge in rdflib.Graph predicate_iri: rdflib.URIRef The IRI of the predicate representing an edge in rdflib.Graph key: str The name of the attribute. Can be a rdflib.URIRef or URI string value: str The value of the attribute """ if key.lower() in is_property_multivalued: key = key.lower() else: if not isinstance(key, URIRef): key = URIRef(key) key = property_mapping.get(key) if key is not None: subject_curie = make_curie(subject_iri) object_curie = make_curie(object_iri) edge_label = process_iri(predicate_iri) if is_curie(edge_label): edge_label = curie_lookup(edge_label) edge_key = generate_edge_key(subject_curie, edge_label, object_curie) attr_dict = self.graph.get_edge_data(subject_curie, object_curie, key=edge_key) self._add_attribute(attr_dict, key, value)
def write_edge(self, record: Dict) -> None: """ Write an edge record to graph. Parameters ---------- record: Dict An edge record """ if "key" in record: key = (record["key"]) else: key = generate_edge_key( record["subject"], record["predicate"], record["object"] ) self.graph.add_edge(record["subject"], record["object"], key, **record)
def load_edge(self, edge: Dict) -> None: """ Load an edge into a networkx.MultiDiGraph Parameters ---------- edge : dict An edge """ edge = Transformer.validate_edge(edge) kwargs = PandasTransformer._build_kwargs(edge.copy()) if 'subject' in kwargs and 'object' in kwargs: s = kwargs['subject'] o = kwargs['object'] key = generate_edge_key(s, kwargs['edge_label'], o) self.graph.add_edge(s, o, key, **kwargs) else: logging.info( "Ignoring edge with either a missing 'subject' or 'object': {}" .format(kwargs))
def load_edge(self, edge: Relationship) -> None: """ Load an edge from neo4jrestclient.client.Relationship into networkx.MultiDiGraph Parameters ---------- edge: neo4jrestclient.client.Relationship An edge """ edge_subject = edge.start edge_predicate = edge.properties edge_object = edge.end subject_id = edge_subject[ 'id'] if 'id' in edge_subject else edge_subject.id object_id = edge_object['id'] if 'id' in edge_object else edge_object.id attributes = {} for key, value in edge_predicate.items(): attributes[key] = value if 'subject' not in attributes: attributes['subject'] = subject_id if 'object' not in attributes: attributes['object'] = object_id if 'edge_label' not in attributes: attributes['edge_label'] = edge.type if not self.graph.has_node(subject_id): self.load_node(edge_subject) if not self.graph.has_node(object_id): self.load_node(edge_object) key = generate_edge_key(subject_id, attributes['edge_label'], object_id) self.graph.add_edge(subject_id, object_id, key, **attributes)
def consolidate_edges( target_graph: BaseGraph, clique_graph: nx.MultiDiGraph, leader_annotation: str ) -> BaseGraph: """ Move all edges from nodes in a clique to the clique leader. Original subject and object of a node are preserved via ``ORIGINAL_SUBJECT_PROPERTY`` and ``ORIGINAL_OBJECT_PROPERTY`` Parameters ---------- target_graph: kgx.graph.base_graph.BaseGraph The original graph clique_graph: networkx.MultiDiGraph The clique graph leader_annotation: str The field on a node that signifies that the node is the leader of a clique Returns ------- kgx.graph.base_graph.BaseGraph The target graph where all edges from nodes in a clique are moved to clique leader """ cliques = list(nx.strongly_connected_components(clique_graph)) log.info(f"Consolidating edges in {len(cliques)} cliques") for clique in cliques: log.debug(f"Processing clique: {clique}") leaders: List = [ x for x in clique if leader_annotation in clique_graph.nodes()[x] and clique_graph.nodes()[x][leader_annotation] ] if len(leaders) == 0: log.debug("No leader elected for clique {}; skipping".format(clique)) continue leader: str = leaders[0] # update nodes in target graph target_graph.set_node_attributes( target_graph, { leader: { leader_annotation: clique_graph.nodes()[leader].get( leader_annotation ), "election_strategy": clique_graph.nodes()[leader].get( "election_strategy" ), } }, ) leader_equivalent_identifiers = set([x for x in clique_graph.neighbors(leader)]) for node in clique: if node == leader: continue log.debug(f"Looking for in_edges for {node}") in_edges = target_graph.in_edges(node, keys=False, data=True) filtered_in_edges = [x for x in in_edges if x[2]["predicate"] != SAME_AS] equiv_in_edges = [x for x in in_edges if x[2]["predicate"] == SAME_AS] log.debug(f"Moving {len(in_edges)} in-edges from {node} to {leader}") for u, v, edge_data in filtered_in_edges: key = generate_edge_key(u, edge_data["predicate"], v) target_graph.remove_edge(u, v, edge_key=key) edge_data[ORIGINAL_SUBJECT_PROPERTY] = edge_data["subject"] edge_data[ORIGINAL_OBJECT_PROPERTY] = edge_data["object"] edge_data["object"] = leader key = generate_edge_key(u, edge_data["predicate"], leader) if ( edge_data["subject"] == edge_data["object"] and edge_data["predicate"] == SUBCLASS_OF ): continue target_graph.add_edge( edge_data["subject"], edge_data["object"], key, **edge_data ) log.debug(f"Looking for out_edges for {node}") out_edges = target_graph.out_edges(node, keys=False, data=True) filtered_out_edges = [x for x in out_edges if x[2]["predicate"] != SAME_AS] equiv_out_edges = [x for x in out_edges if x[2]["predicate"] == SAME_AS] log.debug(f"Moving {len(out_edges)} out-edges from {node} to {leader}") for u, v, edge_data in filtered_out_edges: key = generate_edge_key(u, edge_data["predicate"], v) target_graph.remove_edge(u, v, edge_key=key) edge_data[ORIGINAL_SUBJECT_PROPERTY] = edge_data["subject"] edge_data[ORIGINAL_OBJECT_PROPERTY] = edge_data["object"] edge_data["subject"] = leader key = generate_edge_key(leader, edge_data["predicate"], v) if ( edge_data["subject"] == edge_data["object"] and edge_data["predicate"] == SUBCLASS_OF ): continue target_graph.add_edge( edge_data["subject"], edge_data["object"], key, **edge_data ) log.debug(f"equiv out edges: {equiv_out_edges}") equivalent_identifiers = set() for u, v, edge_data in equiv_in_edges: if u != leader: equivalent_identifiers.add(u) if v != leader: equivalent_identifiers.add(v) target_graph.remove_edge( u, v, edge_key=generate_edge_key(u, SAME_AS, v) ) log.debug(f"equiv out edges: {equiv_out_edges}") for u, v, edge_data in equiv_out_edges: if u != leader: log.debug(f"{u} is an equivalent identifier of leader {leader}") equivalent_identifiers.add(u) if v != leader: log.debug(f"{v} is an equivalent identifier of leader {leader}") equivalent_identifiers.add(v) target_graph.remove_edge( u, v, edge_key=generate_edge_key(u, SAME_AS, v) ) leader_equivalent_identifiers.update(equivalent_identifiers) log.debug( f"setting same_as property to leader node with {leader_equivalent_identifiers}" ) target_graph.set_node_attributes( target_graph, {leader: {"same_as": list(leader_equivalent_identifiers)}} ) log.debug( f"removing equivalent nodes of leader: {leader_equivalent_identifiers}" ) for n in leader_equivalent_identifiers: target_graph.remove_node(n) return target_graph
def test_generate_edge_key(): key = generate_edge_key('S:CURIE', 'related_to', 'O:CURIE') assert key == 'S:CURIE-related_to-O:CURIE'
def test_clique_merge7(): """ Test for clique merge where each clique has a node that has a disjoint category from other nodes in a clique and the node is not a participant in same_as edges. """ ppm = {'biolink:Gene': ['HGNC', 'NCBIGene', 'ENSEMBL', 'OMIM']} g1 = NxGraph() g1.add_node('HGNC:1', **{'category': ['biolink:Gene']}) g1.add_node('OMIM:2', **{'category': ['biolink:Disease']}) g1.add_node('NCBIGene:3', **{'category': ['biolink:NamedThing']}) g1.add_node('ENSEMBL:4', **{'category': ['biolink:Gene']}) g1.add_node('ENSEMBL:6', **{'category': ['biolink:Gene']}) g1.add_node('HGNC:7', **{'category': ['biolink:Disease']}) g1.add_node('NCBIGene:8', **{'category': ['biolink:Gene']}) g1.add_edge('ENSEMBL:4', 'HGNC:1', edge_key=generate_edge_key('ENSEMBL:4', 'biolink:same_as', 'HGNC:1'), **{ 'predicate': 'biolink:same_as', 'relation': 'owl:equivalentClass' }) g1.add_edge('NCBIGene:3', 'HGNC:1', edge_key=generate_edge_key('NCBIGene:3', 'biolink:same_as', 'HGNC:1'), **{ 'predicate': 'biolink:same_as', 'relation': 'owl:equivalentClass' }) g1.add_edge('OMIM:2', 'HGNC:1', edge_key=generate_edge_key('OMIM:2', 'biolink:same_as', 'HGNC:1'), **{ 'predicate': 'biolink:same_as', 'relation': 'owl:equivalentClass' }) g1.add_edge('ENSEMBL:6', 'NCBIGene:8', edge_key=generate_edge_key('ENSEMBL:6', 'biolink:same_as', 'NCBIGene:8'), **{ 'predicate': 'biolink:same_as', 'relation': 'owl:equivalentClass' }) g1.add_edge('HGNC:7', 'NCBIGene:8', edge_key=generate_edge_key('HGNC:7', 'biolink:same_as', 'NCBIGene:8'), **{ 'predicate': 'biolink:same_as', 'relation': 'owl:equivalentClass' }) updated_graph, clique_graph = clique_merge(target_graph=g1, prefix_prioritization_map=ppm) assert updated_graph.number_of_nodes() == 4 assert updated_graph.number_of_edges() == 2 assert updated_graph.has_node('HGNC:1') assert updated_graph.has_node('NCBIGene:8') n1 = updated_graph.nodes()['HGNC:1'] assert 'NCBIGene:3' in n1['same_as'] assert 'ENSEMBL:4' in n1['same_as'] assert 'OMIM:2' not in n1['same_as'] n2 = updated_graph.nodes()['NCBIGene:8'] assert 'ENSEMBL:6' in n2['same_as'] assert updated_graph.has_node('OMIM:2') assert not updated_graph.has_node('NCBIGene:3') assert not updated_graph.has_node('ENSEMBL:4') assert updated_graph.has_node('HGNC:7')
def test_clique_merge9(): """ Test for clique merge where same_as appear as both node and edge properties, but an invalid node also has a same_as property and participates in same_as edge. """ ppm = {'biolink:Gene': ['HGNC', 'NCBIGene', 'ENSEMBL', 'OMIM']} g1 = NxGraph() g1.add_node('HGNC:1', **{'category': ['biolink:Gene']}) g1.add_node('OMIM:2', **{ 'category': ['biolink:Disease'], 'same_as': ['HGNC:1'] }) g1.add_node('NCBIGene:3', **{'category': ['biolink:NamedThing']}) g1.add_node('ENSEMBL:4', **{ 'category': ['biolink:Gene'], 'same_as': ['HGNC:1'] }) g1.add_node('ENSEMBL:6', **{ 'category': ['biolink:Gene'], 'same_as': ['NCBIGene:8'] }) g1.add_node('HGNC:7', **{'category': ['biolink:Gene']}) g1.add_node('NCBIGene:8', **{'category': ['biolink:Gene']}) g1.add_edge('X:00001', 'OMIM:2', edge_key=generate_edge_key('X:00001', 'biolink:same_as', 'OMIM:2'), **{ 'predicate': 'biolink:same_as', 'relation': 'owl:equivalentClass' }) g1.add_edge('NCBIGene:3', 'HGNC:1', edge_key=generate_edge_key('NCBIGene:3', 'biolink:same_as', 'HGNC:1'), **{ 'predicate': 'biolink:same_as', 'relation': 'owl:equivalentClass' }) g1.add_edge('ENSEMBL:6', 'NCBIGene:8', edge_key=generate_edge_key('ENSEMBL:6', 'biolink:same_as', 'NCBIGene:8'), **{ 'predicate': 'biolink:same_as', 'relation': 'owl:equivalentClass' }) g1.add_edge('HGNC:7', 'NCBIGene:8', edge_key=generate_edge_key('HGNC:7', 'biolink:same_as', 'NCBIGene:8'), **{ 'predicate': 'biolink:same_as', 'relation': 'owl:equivalentClass' }) updated_graph, clique_graph = clique_merge(target_graph=g1, prefix_prioritization_map=ppm) assert updated_graph.number_of_nodes() == 4 assert updated_graph.number_of_edges() == 1 assert updated_graph.has_node('HGNC:1') assert updated_graph.has_node('HGNC:7') n1 = updated_graph.nodes()['HGNC:1'] assert 'OMIM:2' not in n1['same_as'] assert 'NCBIGene:3' in n1['same_as'] assert 'ENSEMBL:4' in n1['same_as'] n2 = updated_graph.nodes()['HGNC:7'] assert 'ENSEMBL:6' in n2['same_as'] assert 'NCBIGene:8' in n2['same_as'] assert updated_graph.has_node('OMIM:2')
def test_generate_edge_key(): """ Test generation of edge key via generate_edge_key method. """ key = generate_edge_key("S:CURIE", "related_to", "O:CURIE") assert key == "S:CURIE-related_to-O:CURIE"
def triple(self, s: URIRef, p: URIRef, o: URIRef) -> None: """ Parse a triple. Parameters ---------- s: URIRef Subject p: URIRef Predicate o: URIRef Object """ self.count += 1 (element_uri, canonical_uri, predicate, property_name) = self.process_predicate(p) if element_uri: prop_uri = element_uri elif predicate: prop_uri = predicate else: prop_uri = property_name s_curie = self.prefix_manager.contract(s) if s_curie.startswith('biolink') or s_curie.startswith('OBAN'): log.warning(f"Skipping {s} {p} {o}") elif s_curie in self.reified_nodes: # subject is a reified node self.add_node_attribute(s, key=prop_uri, value=o) elif p in self.reification_predicates: # subject is a reified node self.reified_nodes.add(s_curie) self.add_node_attribute(s, key=prop_uri, value=o) elif property_name in {'subject', 'predicate', 'object', 'predicate', 'relation'}: # subject is a reified node self.reified_nodes.add(s_curie) self.add_node_attribute(s, key=prop_uri, value=o) elif o in self.reification_types: # subject is a reified node self.reified_nodes.add(s_curie) self.add_node_attribute(s, key=prop_uri, value=o) elif element_uri and element_uri in self.node_property_predicates: # treating predicate as a node property self.add_node_attribute(s, key=prop_uri, value=o) elif ( p in self.node_property_predicates or predicate in self.node_property_predicates or property_name in self.node_property_predicates ): # treating predicate as a node property self.add_node_attribute(s, key=prop_uri, value=o) elif isinstance(o, rdflib.term.Literal): self.add_node_attribute(s, key=prop_uri, value=o) else: # treating predicate as an edge self.add_edge(s, o, p) if len(self.edge_cache) >= self.CACHE_SIZE: while self.reified_nodes: n = self.reified_nodes.pop() data = self.node_cache.pop(n) try: self.dereify(n, data) except ValueError as e: log.info(e) self._incomplete_nodes[n] = data for n in self._incomplete_nodes.keys(): self.node_cache[n] = self._incomplete_nodes[n] self.reified_nodes.add(n) self._incomplete_nodes.clear() for k in self.edge_cache.keys(): if 'id' not in self.edge_cache[k] and 'association_id' not in self.edge_cache[k]: edge_key = generate_edge_key( self.edge_cache[k]['subject'], self.edge_cache[k]['predicate'], self.edge_cache[k]['object'], ) self.edge_cache[k]['id'] = edge_key data = self.edge_cache[k] data = validate_edge(data) data = sanitize_import(data) if 'provided_by' in self.graph_metadata and 'provided_by' not in data.keys(): data['provided_by'] = self.graph_metadata['provided_by'] if self.check_edge_filter(data): self.edge_properties.update(data.keys()) yield k[0], k[1], k[2], data self.edge_cache.clear() yield None
def test_clique_merge1(): """ Test to perform a clique merge where all nodes in a clique are valid. """ ppm = {'biolink:Gene': ['HGNC', 'NCBIGene', 'ENSEMBL', 'OMIM']} g1 = NxGraph() g1.add_node('HGNC:1', **{'category': ['biolink:Gene']}) g1.add_node('OMIM:2', **{'category': ['biolink:Gene']}) g1.add_node('NCBIGene:3', **{'category': ['biolink:Gene']}) g1.add_node('ENSEMBL:4', **{'category': ['biolink:Gene']}) g1.add_node('ENSEMBL:6', **{'category': ['biolink:Gene']}) g1.add_node('HGNC:7', **{'category': ['biolink:Gene']}) g1.add_node('NCBIGene:8', **{'category': ['biolink:Gene']}) g1.add_edge('ENSEMBL:4', 'HGNC:1', edge_key=generate_edge_key('ENSEMBL:4', 'biolink:same_as', 'HGNC:1'), **{ 'predicate': 'biolink:same_as', 'relation': 'owl:equivalentClass' }) g1.add_edge('NCBIGene:3', 'HGNC:1', edge_key=generate_edge_key('NCBIGene:3', 'biolink:same_as', 'HGNC:1'), **{ 'predicate': 'biolink:same_as', 'relation': 'owl:equivalentClass' }) g1.add_edge('OMIM:2', 'HGNC:1', edge_key=generate_edge_key('OMIM:2', 'biolink:same_as', 'HGNC:1'), **{ 'predicate': 'biolink:same_as', 'relation': 'owl:equivalentClass' }) g1.add_edge('ENSEMBL:6', 'NCBIGene:8', edge_key=generate_edge_key('ENSEMBL:6', 'biolink:same_as', 'NCBIGene:8'), **{ 'predicate': 'biolink:same_as', 'relation': 'owl:equivalentClass' }) g1.add_edge('HGNC:7', 'NCBIGene:8', edge_key=generate_edge_key('HGNC:7', 'biolink:same_as', 'NCBIGene:8'), **{ 'predicate': 'biolink:same_as', 'relation': 'owl:equivalentClass' }) updated_graph, clique_graph = clique_merge(target_graph=g1, prefix_prioritization_map=ppm) print_graph(updated_graph) assert updated_graph.number_of_nodes() == 2 assert updated_graph.number_of_edges() == 0 assert updated_graph.has_node('HGNC:1') assert updated_graph.has_node('HGNC:7') n1 = updated_graph.nodes()['HGNC:1'] assert 'OMIM:2' in n1['same_as'] assert 'NCBIGene:3' in n1['same_as'] assert 'ENSEMBL:4' in n1['same_as'] n2 = updated_graph.nodes()['HGNC:7'] assert 'ENSEMBL:6' in n2['same_as'] assert 'NCBIGene:8' in n2['same_as'] assert not updated_graph.has_node('OMIM:2') assert not updated_graph.has_node('NCBIGene:3') assert not updated_graph.has_node('ENSEMBL:4') assert not updated_graph.has_node('ENSEMBL:6') assert not updated_graph.has_node('NCBIGene:8')
def remap_node_identifier(graph: nx.MultiDiGraph, category: str, alternative_property: str, prefix=None) -> nx.MultiDiGraph: """ Remap a node's 'id' attribute with value from a node's ``alternative_property`` attribute. Parameters ---------- graph: networkx.MultiDiGraph The graph category: string category referring to nodes whose 'id' needs to be remapped alternative_property: string property name from which the new value is pulled from prefix: string signifies that the value for ``alternative_property`` is a list and the ``prefix`` indicates which value to pick from the list Returns ------- networkx.MultiDiGraph The modified graph """ mapping = {} for nid, data in graph.nodes(data=True): node_data = data.copy() if 'category' in node_data and category not in node_data['category']: continue if alternative_property in node_data: alternative_values = node_data[alternative_property] if isinstance(alternative_values, (list, set, tuple)): if prefix: for v in alternative_values: if prefix in v: # take the first occurring value that contains the given prefix mapping[nid] = v break else: # no prefix defined; pick the 1st one from list mapping[nid] = alternative_values[0] elif isinstance(alternative_values, str): if prefix: if alternative_values.startswith(prefix): mapping[nid] = alternative_values else: # no prefix defined mapping[nid] = alternative_values else: logging.error(f"Cannot use {alternative_values} from alternative_property {alternative_property}") nx.set_node_attributes(graph, values=mapping, name='id') nx.relabel_nodes(graph, mapping, copy=False) # update 'subject' of all outgoing edges update_edge_keys = {} updated_subject_values = {} updated_object_values = {} for u, v, k, edge_data in graph.edges(keys=True, data=True): if u is not edge_data['subject']: updated_subject_values[(u, v, k)] = u update_edge_keys[(u, v, k)] = generate_edge_key(u, edge_data['edge_label'], v) if v is not edge_data['object']: updated_object_values[(u, v, k)] = v update_edge_keys[(u, v, k)] = generate_edge_key(u, edge_data['edge_label'], v) nx.set_edge_attributes(graph, values=updated_subject_values, name='subject') nx.set_edge_attributes(graph, values=updated_object_values, name='object') nx.set_edge_attributes(graph, values=update_edge_keys, name='edge_key') return graph
def load_edge(self, edge: Dict) -> Generator: """ Load an edge into an instance of BaseGraph Parameters ---------- edge : Dict An edge Returns ------- Generator A generator for node and edge records """ (element_uri, canonical_uri, predicate, property_name) = process_predicate(self.prefix_manager, edge['predicate_id'], self.predicate_mapping) if element_uri: edge_predicate = element_uri elif predicate: edge_predicate = predicate else: edge_predicate = property_name if canonical_uri: edge_predicate = element_uri data = { 'subject': edge['subject_id'], 'predicate': edge_predicate, 'object': edge['object_id'], } del edge['predicate_id'] data = validate_edge(data) subject_node = {} object_node = {} for k, v in edge.items(): if k in SSSOM_NODE_PROPERTY_MAPPING: if k.startswith('subject'): mapped_k = SSSOM_NODE_PROPERTY_MAPPING[k] if mapped_k == 'category' and not PrefixManager.is_curie( v): v = f"biolink:OntologyClass" subject_node[mapped_k] = v elif k.startswith('object'): mapped_k = SSSOM_NODE_PROPERTY_MAPPING[k] if mapped_k == 'category' and not PrefixManager.is_curie( v): v = f"biolink:OntologyClass" object_node[mapped_k] = v else: log.info(f"Ignoring {k} {v}") else: data[k] = v objs = [self.load_node(subject_node), self.load_node(object_node)] for k, v in self.graph_metadata.items(): if k not in {'curie_map'}: data[k] = v edge_data = sanitize_import(data.copy()) if 'subject' in edge_data and 'object' in edge_data: if 'id' not in edge_data: edge_data['id'] = generate_uuid() s = edge_data['subject'] o = edge_data['object'] if 'provided_by' in self.graph_metadata and 'provided_by' not in edge_data.keys( ): edge_data['provided_by'] = self.graph_metadata['provided_by'] key = generate_edge_key(s, edge_data['predicate'], o) self.edge_properties.update(list(edge_data.keys())) objs.append((s, o, key, edge_data)) else: log.info( "Ignoring edge with either a missing 'subject' or 'object': {}" .format(edge_data)) for o in objs: yield o
def test_generate_edge_key(): """ Test generation of edge key via generate_edge_key method. """ key = generate_edge_key('S:CURIE', 'related_to', 'O:CURIE') assert key == 'S:CURIE-related_to-O:CURIE'
def remap_node_identifier( graph: BaseGraph, category: str, alternative_property: str, prefix=None ) -> BaseGraph: """ Remap a node's 'id' attribute with value from a node's ``alternative_property`` attribute. Parameters ---------- graph: kgx.graph.base_graph.BaseGraph The graph category: string category referring to nodes whose 'id' needs to be remapped alternative_property: string property name from which the new value is pulled from prefix: string signifies that the value for ``alternative_property`` is a list and the ``prefix`` indicates which value to pick from the list Returns ------- kgx.graph.base_graph.BaseGraph The modified graph """ mapping: Dict = {} for nid, data in graph.nodes(data=True): node_data = data.copy() if "category" in node_data and category not in node_data["category"]: continue if alternative_property in node_data: alternative_values = node_data[alternative_property] if isinstance(alternative_values, (list, set, tuple)): if prefix: for v in alternative_values: if prefix in v: # take the first occurring value that contains the given prefix mapping[nid] = {"id": v} break else: # no prefix defined; pick the 1st one from list mapping[nid] = {"id": next(iter(alternative_values))} elif isinstance(alternative_values, str): if prefix: if alternative_values.startswith(prefix): mapping[nid] = {"id": alternative_values} else: # no prefix defined mapping[nid] = {"id": alternative_values} else: log.error( f"Cannot use {alternative_values} from alternative_property {alternative_property}" ) graph.set_node_attributes(graph, attributes=mapping) graph.relabel_nodes(graph, {k: list(v.values())[0] for k, v in mapping.items()}) # update 'subject' of all outgoing edges update_edge_keys = {} updated_subject_values = {} updated_object_values = {} for u, v, k, edge_data in graph.edges(data=True, keys=True): if u is not edge_data["subject"]: updated_subject_values[(u, v, k)] = {"subject": u} update_edge_keys[(u, v, k)] = { "edge_key": generate_edge_key(u, edge_data["predicate"], v) } if v is not edge_data["object"]: updated_object_values[(u, v, k)] = {"object": v} update_edge_keys[(u, v, k)] = { "edge_key": generate_edge_key(u, edge_data["predicate"], v) } graph.set_edge_attributes(graph, attributes=updated_subject_values) graph.set_edge_attributes(graph, attributes=updated_object_values) graph.set_edge_attributes(graph, attributes=update_edge_keys) return graph
def triple(self, s: URIRef, p: URIRef, o: URIRef) -> None: """ Parse a triple. Parameters ---------- s: URIRef Subject p: URIRef Predicate o: URIRef Object """ self.count += 1 (element_uri, canonical_uri, predicate, property_name) = self.process_predicate(p) if element_uri: prop_uri = element_uri elif predicate: prop_uri = predicate else: prop_uri = property_name s_curie = self.prefix_manager.contract(s) if s_curie.startswith("biolink") or s_curie.startswith("OBAN"): log.warning(f"Skipping {s} {p} {o}") elif s_curie in self.reified_nodes: # subject is a reified node self.add_node_attribute(s, key=prop_uri, value=o) elif p in self.reification_predicates: # subject is a reified node self.reified_nodes.add(s_curie) self.add_node_attribute(s, key=prop_uri, value=o) elif property_name in { "subject", "predicate", "object", "predicate", "relation", }: # subject is a reified node self.reified_nodes.add(s_curie) self.add_node_attribute(s, key=prop_uri, value=o) elif o in self.reification_types: # subject is a reified node self.reified_nodes.add(s_curie) self.add_node_attribute(s, key=prop_uri, value=o) elif element_uri and element_uri in self.node_property_predicates: # treating predicate as a node property self.add_node_attribute(s, key=prop_uri, value=o) elif (p in self.node_property_predicates or predicate in self.node_property_predicates or property_name in self.node_property_predicates): # treating predicate as a node property self.add_node_attribute(s, key=prop_uri, value=o) elif isinstance(o, rdflib.term.Literal): self.add_node_attribute(s, key=prop_uri, value=o) else: # treating predicate as an edge self.add_edge(s, o, p) if len(self.edge_cache) >= self.CACHE_SIZE: while self.reified_nodes: n = self.reified_nodes.pop() data = self.node_cache.pop(n) try: self.dereify(n, data) except ValueError as e: self.owner.log_error( entity=str(data), error_type=ErrorType.INVALID_EDGE_PROPERTY, message=str(e), message_level=MessageLevel.WARNING) self._incomplete_nodes[n] = data for n in self._incomplete_nodes.keys(): self.node_cache[n] = self._incomplete_nodes[n] self.reified_nodes.add(n) self._incomplete_nodes.clear() for k in self.edge_cache.keys(): if ("id" not in self.edge_cache[k] and "association_id" not in self.edge_cache[k]): edge_key = generate_edge_key( self.edge_cache[k]["subject"], self.edge_cache[k]["predicate"], self.edge_cache[k]["object"], ) self.edge_cache[k]["id"] = edge_key data = self.edge_cache[k] data = self.validate_edge(data) data = sanitize_import(data) self.set_edge_provenance(data) if self.check_edge_filter(data): self.edge_properties.update(data.keys()) yield k[0], k[1], k[2], data self.edge_cache.clear() yield None
def add_edge( self, subject_iri: URIRef, object_iri: URIRef, predicate_iri: URIRef, data: Optional[Dict[Any, Any]] = None, ) -> Dict: """ Add an edge to cache. Parameters ---------- subject_iri: rdflib.URIRef Subject IRI for the subject in a triple object_iri: rdflib.URIRef Object IRI for the object in a triple predicate_iri: rdflib.URIRef Predicate IRI for the predicate in a triple data: Optional[Dict[Any, Any]] Additional edge properties Returns ------- Dict The edge data """ (element_uri, canonical_uri, predicate, property_name) = self.process_predicate( predicate_iri ) subject_curie = self.prefix_manager.contract(subject_iri) object_curie = self.prefix_manager.contract(object_iri) if subject_curie in self.node_cache: subject_node = self.node_cache[subject_curie] else: subject_node = self.add_node(subject_iri) if object_curie in self.node_cache: object_node = self.node_cache[object_curie] else: object_node = self.add_node(object_iri) edge_predicate = element_uri if element_uri else predicate if not edge_predicate: edge_predicate = property_name if ' ' in edge_predicate: log.debug( f"predicate IRI '{predicate_iri}' yields edge_predicate '{edge_predicate}' that not in snake_case form; replacing ' ' with '_'" ) edge_predicate_prefix = self.prefix_manager.get_prefix(edge_predicate) if edge_predicate_prefix not in {'biolink', 'rdf', 'rdfs', 'skos', 'owl'}: if PrefixManager.is_curie(edge_predicate): # name = curie_lookup(edge_predicate) # if name: # log.debug(f"predicate IRI '{predicate_iri}' yields edge_predicate '{edge_predicate}' that is actually a CURIE; Using its mapping instead: {name}") # edge_predicate = f"{edge_predicate_prefix}:{name}" # else: # log.debug(f"predicate IRI '{predicate_iri}' yields edge_predicate '{edge_predicate}' that is actually a CURIE; defaulting back to {self.DEFAULT_EDGE_PREDICATE}") edge_predicate = DEFAULT_EDGE_PREDICATE edge_key = generate_edge_key(subject_node['id'], edge_predicate, object_node['id']) if (subject_node['id'], object_node['id'], edge_key) in self.edge_cache: # edge already exists; process kwargs and update the edge edge_data = self.update_edge(subject_node['id'], object_node['id'], edge_key, data) else: # add a new edge edge_data = data if data else {} edge_data.update( { 'subject': subject_node['id'], 'predicate': f"{edge_predicate}", 'object': object_node['id'], } ) if 'relation' not in edge_data: edge_data['relation'] = predicate if 'provided_by' in self.graph_metadata and 'provided_by' not in edge_data: edge_data['provided_by'] = self.graph_metadata['provided_by'] self.edge_cache[(subject_node['id'], object_node['id'], edge_key)] = edge_data return edge_data
def load_edge(self, edge: Dict) -> Generator: """ Load an edge into an instance of BaseGraph Parameters ---------- edge : Dict An edge Returns ------- Generator A generator for node and edge records """ (element_uri, canonical_uri, predicate, property_name) = process_predicate( self.prefix_manager, edge["predicate_id"], self.predicate_mapping ) if element_uri: edge_predicate = element_uri elif predicate: edge_predicate = predicate else: edge_predicate = property_name if canonical_uri: edge_predicate = element_uri data = { "subject": edge["subject_id"], "predicate": edge_predicate, "object": edge["object_id"], } del edge["predicate_id"] data = self.validate_edge(data) if not data: return # ? subject_node = {} object_node = {} for k, v in edge.items(): if k in SSSOM_NODE_PROPERTY_MAPPING: if k.startswith("subject"): mapped_k = SSSOM_NODE_PROPERTY_MAPPING[k] if mapped_k == "category" and not PrefixManager.is_curie(v): v = f"biolink:OntologyClass" subject_node[mapped_k] = v elif k.startswith("object"): mapped_k = SSSOM_NODE_PROPERTY_MAPPING[k] if mapped_k == "category" and not PrefixManager.is_curie(v): v = f"biolink:OntologyClass" object_node[mapped_k] = v else: log.info(f"Ignoring {k} {v}") else: data[k] = v subject_node = self.load_node(subject_node) object_node = self.load_node(object_node) if not (subject_node and object_node): return # ? objs = [subject_node, object_node] for k, v in self.graph_metadata.items(): if k not in {"curie_map"}: data[k] = v edge_data = sanitize_import(data.copy()) if "subject" in edge_data and "object" in edge_data: if "id" not in edge_data: edge_data["id"] = generate_uuid() s = edge_data["subject"] o = edge_data["object"] self.set_edge_provenance(edge_data) key = generate_edge_key(s, edge_data["predicate"], o) self.edge_properties.update(list(edge_data.keys())) objs.append((s, o, key, edge_data)) else: self.owner.log_error( entity=str(edge_data), error_type=ErrorType.MISSING_NODE, message="Ignoring edge with either a missing 'subject' or 'object'", message_level=MessageLevel.WARNING ) for o in objs: yield o
def consolidate_edges(self) -> nx.MultiDiGraph: """ Move all edges from nodes in a clique to the clique leader. Returns ------- nx.MultiDiGraph The target graph where all edges from nodes in a clique are moved to clique leader """ cliques = list(nx.connected_components(self.clique_graph)) for clique in cliques: logging.info("processing clique: {}".format(clique)) leader = [x for x in clique if LEADER_ANNOTATION in self.clique_graph.nodes[x] and self.clique_graph.nodes[x][LEADER_ANNOTATION]] if len(leader) == 0: logging.debug("No leader for clique {}; skipping".format(clique)) continue else: leader = leader[0] nx.set_node_attributes(self.target_graph, {leader: {LEADER_ANNOTATION: self.clique_graph.nodes[leader].get(LEADER_ANNOTATION), 'election_strategy': self.clique_graph.nodes[leader].get('election_strategy')}}) for node in clique: if node == leader: continue in_edges = self.target_graph.in_edges(node, True) filtered_in_edges = [x for x in in_edges if x[2]['edge_label'] != SAME_AS] equiv_in_edges = [x for x in in_edges if x[2]['edge_label'] == SAME_AS] logging.debug("Moving {} in-edges from {} to {}".format(len(in_edges), node, leader)) for u, v, edge_data in filtered_in_edges: key = generate_edge_key(u, edge_data['edge_label'], v) self.target_graph.remove_edge(u, v, key=key) edge_data['_original_subject'] = edge_data['subject'] edge_data['_original_object'] = edge_data['object'] edge_data['object'] = leader key = generate_edge_key(u, edge_data['edge_label'], leader) self.target_graph.add_edge(edge_data['subject'], edge_data['object'], key, **edge_data) out_edges = self.target_graph.out_edges(node, True) filtered_out_edges = [x for x in out_edges if x[2]['edge_label'] != SAME_AS] equiv_out_edges = [x for x in out_edges if x[2]['edge_label'] == SAME_AS] logging.debug("Moving {} out-edges from {} to {}".format(len(out_edges), node, leader)) for u, v, edge_data in filtered_out_edges: key = generate_edge_key(u, edge_data['edge_label'], v) self.target_graph.remove_edge(u, v, key=key) edge_data['_original_subject'] = edge_data['subject'] edge_data['_original_object'] = edge_data['object'] edge_data['subject'] = leader key = generate_edge_key(leader, edge_data['edge_label'], v) self.target_graph.add_edge(edge_data['subject'], edge_data['object'], key, **edge_data) aliases = self.target_graph.nodes[leader].get('aliases') if 'aliases' in self.target_graph.nodes[leader] else [] for u, v, edge_data in equiv_in_edges: if u != leader: aliases.append(u) if v != leader: aliases.append(v) self.target_graph.remove_edge(u, v, key=generate_edge_key(u, SAME_AS, v)) logging.debug("equiv out edges: {}".format(equiv_out_edges)) for u, v, edge_data in equiv_out_edges: if u != leader: logging.debug("{} is an alias of leader {}".format(u, leader)) aliases.append(u) if v != leader: logging.debug("{} is an alias of leader {}".format(v, leader)) aliases.append(v) self.target_graph.remove_edge(u, v, key=generate_edge_key(u, SAME_AS, v)) # set aliases for leader nx.set_node_attributes(self.target_graph, {leader: {'aliases': aliases}}) # remove all node instances of aliases self.target_graph.remove_nodes_from(aliases) return self.target_graph
def test_clique_merge7(): """ Test for clique merge where each clique has a node that has a disjoint category from other nodes in a clique and the node is not a participant in same_as edges. """ ppm = {"biolink:Gene": ["HGNC", "NCBIGene", "ENSEMBL", "OMIM"]} g1 = NxGraph() g1.add_node("HGNC:1", **{"category": ["biolink:Gene"]}) g1.add_node("OMIM:2", **{"category": ["biolink:Disease"]}) g1.add_node("NCBIGene:3", **{"category": ["biolink:NamedThing"]}) g1.add_node("ENSEMBL:4", **{"category": ["biolink:Gene"]}) g1.add_node("ENSEMBL:6", **{"category": ["biolink:Gene"]}) g1.add_node("HGNC:7", **{"category": ["biolink:Disease"]}) g1.add_node("NCBIGene:8", **{"category": ["biolink:Gene"]}) g1.add_edge( "ENSEMBL:4", "HGNC:1", edge_key=generate_edge_key("ENSEMBL:4", "biolink:same_as", "HGNC:1"), **{"predicate": "biolink:same_as", "relation": "owl:equivalentClass"} ) g1.add_edge( "NCBIGene:3", "HGNC:1", edge_key=generate_edge_key("NCBIGene:3", "biolink:same_as", "HGNC:1"), **{"predicate": "biolink:same_as", "relation": "owl:equivalentClass"} ) g1.add_edge( "OMIM:2", "HGNC:1", edge_key=generate_edge_key("OMIM:2", "biolink:same_as", "HGNC:1"), **{"predicate": "biolink:same_as", "relation": "owl:equivalentClass"} ) g1.add_edge( "ENSEMBL:6", "NCBIGene:8", edge_key=generate_edge_key("ENSEMBL:6", "biolink:same_as", "NCBIGene:8"), **{"predicate": "biolink:same_as", "relation": "owl:equivalentClass"} ) g1.add_edge( "HGNC:7", "NCBIGene:8", edge_key=generate_edge_key("HGNC:7", "biolink:same_as", "NCBIGene:8"), **{"predicate": "biolink:same_as", "relation": "owl:equivalentClass"} ) updated_graph, clique_graph = clique_merge( target_graph=g1, prefix_prioritization_map=ppm ) assert updated_graph.number_of_nodes() == 4 assert updated_graph.number_of_edges() == 2 assert updated_graph.has_node("HGNC:1") assert updated_graph.has_node("NCBIGene:8") n1 = updated_graph.nodes()["HGNC:1"] assert "NCBIGene:3" in n1["same_as"] assert "ENSEMBL:4" in n1["same_as"] assert "OMIM:2" not in n1["same_as"] n2 = updated_graph.nodes()["NCBIGene:8"] assert "ENSEMBL:6" in n2["same_as"] assert updated_graph.has_node("OMIM:2") assert not updated_graph.has_node("NCBIGene:3") assert not updated_graph.has_node("ENSEMBL:4") assert updated_graph.has_node("HGNC:7")