def test_two(self): text = "((1, 2), 3);" enewick_graph = enewick_to_digraph(text) clusters = [(1, 2), (3,)] gold = construct(clusters) GM = GraphMatcher(enewick_graph, gold) self.assertTrue(GM.is_isomorphic())
def is_same(self, prod_rule, ignore_order=False): """ judge whether this production rule is the same as the input one, `prod_rule` Parameters ---------- prod_rule : ProductionRule production rule to be compared Returns ------- is_same : bool isomap : dict isomorphism of nodes and hyperedges. ex) {'bond_42': 'bond_37', 'bond_2': 'bond_1', 'e36': 'e11', 'e16': 'e12', 'e25': 'e18', 'bond_40': 'bond_38', 'e26': 'e21', 'bond_41': 'bond_39'}. key comes from `prod_rule`, value comes from `self`. """ if self.is_start_rule: if not prod_rule.is_start_rule: return False, {} else: if prod_rule.is_start_rule: return False, {} else: if prod_rule.lhs.num_nodes != self.lhs.num_nodes: return False, {} if prod_rule.rhs.num_nodes != self.rhs.num_nodes: return False, {} if prod_rule.rhs.num_edges != self.rhs.num_edges: return False, {} subhg_bond_symbol_counter \ = Counter([prod_rule.rhs.node_attr(each_node)['symbol'] \ for each_node in prod_rule.rhs.nodes]) each_bond_symbol_counter \ = Counter([self.rhs.node_attr(each_node)['symbol'] \ for each_node in self.rhs.nodes]) if subhg_bond_symbol_counter != each_bond_symbol_counter: return False, {} subhg_atom_symbol_counter \ = Counter([prod_rule.rhs.edge_attr(each_edge)['symbol'] \ for each_edge in prod_rule.rhs.edges]) each_atom_symbol_counter \ = Counter([self.rhs.edge_attr(each_edge)['symbol'] \ for each_edge in self.rhs.edges]) if subhg_atom_symbol_counter != each_atom_symbol_counter: return False, {} gm = GraphMatcher( prod_rule.rhs.hg, self.rhs.hg, partial(_node_match_prod_rule, ignore_order=ignore_order), partial(_edge_match, ignore_order=ignore_order)) try: return True, next(gm.isomorphisms_iter()) except StopIteration: return False, {}
def test_one(self): text = "(1, 2);" enewick_graph = enewick_to_digraph(text) clusters = [('1', '2'),] gold = construct(clusters) GM = GraphMatcher(enewick_graph, gold) self.assertTrue(GM.is_isomorphic())
def test_five(self): text = "((1, (2)h#H1)x,(h#H1,3)y)r;" clusters = [(1, 2), (2, 3)] enewick_graph = enewick_to_digraph(text) self.assertEqual(len(enewick_graph.nodes()), 7) gold = construct(clusters) GM = GraphMatcher(enewick_graph, gold) self.assertTrue(GM.is_isomorphic())
def final_state_ligand_degeneracy(self): # Now iterate over graph isomorphisms from networkx.algorithms.isomorphism import GraphMatcher graph_matcher = GraphMatcher(self.final_state_mol_nx, self.final_state_mol_nx) degeneracy = sum([1 for isomorphism in graph_matcher.match()]) return degeneracy
def _matcher(self, other): """ QueryCGRContainer < CGRContainer QueryContainer < QueryCGRContainer[more general] """ if isinstance(other, CGRContainer): return GraphMatcher(other, self, lambda x, y: y == x, lambda x, y: y == x) elif isinstance(other, QueryCGRContainer): return GraphMatcher(other, self, lambda x, y: x == y, lambda x, y: x == y) raise TypeError('only cgr_query-cgr or cgr_query-cgr_query possible')
def test_three(self): text = "((4, 5#1)2, (#1, 6)3);" enewick_graph = enewick_to_digraph(text) self.assertEqual(len(enewick_graph.nodes()), 6) enewick_graph = calc_hybrid(enewick_graph) leafs = get_leaf_nodes(enewick_graph) gold_leafs = ['1', '4', '6', ] self.assertItemsEqual(leafs, gold_leafs) clusters = ["1,2", "2,3"] gold = construct(clusters) g = enewick_to_phylonet(text) GM = GraphMatcher(g, gold) self.assertTrue(GM.is_isomorphic())
def _matcher(self, other): """ QueryContainer < MoleculeContainer QueryContainer < QueryContainer[more general] QueryContainer < QueryCGRContainer[more general] """ if isinstance(other, MoleculeContainer): return GraphMatcher(other, self, lambda x, y: y == x, lambda x, y: y == x) elif isinstance(other, (QueryContainer, QueryCGRContainer)): return GraphMatcher(other, self, lambda x, y: x == y, lambda x, y: x == y) raise TypeError( 'only query-molecule, query-query or query-cgr_query possible')
def test_two(self): text = "((4, 5#1)2, (#1, 6)3);" enewick_graph = enewick_to_digraph(text) self.assertEqual(len(enewick_graph.nodes()), 6) enewick_phylo = enewick_to_phylonet(text) gold_hard = "1,4 1,4,6 1,6 4 6".split() hard = cluster_hard(enewick_graph) self.assertItemsEqual(hard, gold_hard) self.assertEqual(len(enewick_phylo.nodes()), 7) clusters = [(1, 2), (2, 3),] gold = construct(clusters) GM = GraphMatcher(enewick_phylo, gold) self.assertTrue(GM.is_isomorphic())
def _matcher(self, other): """ CGRContainer < CGRContainer """ if isinstance(other, CGRContainer): return GraphMatcher(other, self, lambda x, y: x == y, lambda x, y: x == y) raise TypeError('only cgr-cgr possible')
def test_subgraph_isomorphism_undirected(self, backend): G = Graph(backend=backend(directed=False)) nxG = nx.Graph() G.nx.add_edge("A", "B") nxG.add_edge("A", "B") G.nx.add_edge("B", "C") nxG.add_edge("B", "C") G.nx.add_edge("C", "A") nxG.add_edge("C", "A") from networkx.algorithms.isomorphism import GraphMatcher assert len([ i for i in GraphMatcher(G.nx, G.nx).subgraph_monomorphisms_iter() ]) == len( [i for i in GraphMatcher(nxG, nxG).subgraph_monomorphisms_iter()])
def test_isomorphisms_on_undirected_random_graph(self, host, motif): assert find_motifs(motif, host, isomorphisms_only=True, count_only=True) == len([ i for i in GraphMatcher( host, motif).subgraph_isomorphisms_iter() ])
def get_cgr_matcher(self, g, h): if not isinstance(g, CGRContainer): nm = self.__node_match_reagents em = self.__edge_match_reagents else: nm = self.__node_match em = self.__edge_match return GraphMatcher(g, h, node_match=nm, edge_match=em)
def test_one(self): text = "((MOUSE,(HUMAN,RAT)),CIOIN);" enewick_graph = enewick_to_digraph(text) self.assertEqual(len(enewick_graph.nodes()), 7) enewick_graph = enewick_to_phylonet(text) self.assertEqual(len(enewick_graph.nodes()), 7) clusters = [(1, 2, 3), (2, 3), (4,)] gold = construct(clusters) GM = GraphMatcher(enewick_graph, gold) self.assertTrue(GM.is_isomorphic()) gold_hard = [ 'CIOIN,HUMAN,MOUSE,RAT', 'CIOIN', 'HUMAN', 'MOUSE', 'RAT', 'HUMAN,MOUSE,RAT', 'HUMAN,RAT', ] hard = cluster_hard(enewick_graph) self.assertEqual(len(hard), 7) self.assertItemsEqual(hard, gold_hard)
def check_test_graph(given_graph): # Planarity Check if not nx.check_planarity(given_graph)[0]: return 0 # Subgraph is K4 if GraphMatcher(given_graph, nx.complete_graph(4)).subgraph_is_isomorphic(): return 0 # Some other random checks! return 1
def test_two_hop_count_matches_nx(self): host = nx.fast_gnp_random_graph(10, 0.5, directed=False) motif = nx.Graph() motif.add_edge("A", "B") motif.add_edge("B", "C") assert len(find_motifs(motif, host)) == len([ i for i in GraphMatcher(host, motif).subgraph_monomorphisms_iter() ])
def _matcher(self, other): """ return VF2 GraphMatcher MoleculeContainer < MoleculeContainer MoleculeContainer < CGRContainer """ if isinstance(other, (self._get_subclass('CGRContainer'), MoleculeContainer)): return GraphMatcher(other, self, lambda x, y: x == y, lambda x, y: x == y) raise TypeError('only cgr-cgr possible')
def test_iso(self): base = os.path.dirname(os.path.abspath(__file__)) hg_list = HGGen(os.path.join(base, "test.smi")) hg_list = list(hg_list) hrg = HyperedgeReplacementGrammar() prod_rule_seq_list = hrg.learn(hg_list) not_iso = 0 for idx, each_prod_rule_seq in enumerate(prod_rule_seq_list): hg = hrg.construct(each_prod_rule_seq) self.assertEqual(len(hg.nodes), len(list(hg_list)[idx].nodes)) self.assertEqual(len(hg.edges), len(list(hg_list)[idx].edges)) gm = GraphMatcher(hg.hg, list(hg_list)[idx].hg) try: isomap = next(gm.isomorphisms_iter()) except StopIteration: isomap = None if isomap is None: print("not isomorphic") not_iso += 1 self.assertEqual(not_iso, 0) print("not_iso = {}".format(not_iso))
def sparse_orbigraph_nx(n, H): import networkx as nx from networkx.algorithms.isomorphism import GraphMatcher bag = nx.Graph() for i in range(n): bag.add_node(i, syndrome=H[i,i]) for i in range(n): for j in range(n): if i==j: continue if H.get((i, j)): bag.add_edge(i, j) def node_match(n0, n1): return n0['syndrome'] == n1['syndrome'] matcher = GraphMatcher(bag, bag, node_match=node_match) print("search...") graph = nx.Graph() for i in range(n): graph.add_node(i) count = 0 for iso in matcher.isomorphisms_iter(): # too slow :P #print iso write('.') for i, j in list(iso.items()): graph.add_edge(i, j) count += 1 print() equs = nx.connected_components(graph) m = len(equs) print("isomorphisms:", count) print("components:", m)
def check_patterns(G1, G2, saveisolist=False, readisolist=False, plot=False): # Check isomorphism GM = GraphMatcher(G1=G1, G2=G2, node_match=None, edge_match=None) isomorph = GM.subgraph_is_isomorphic() if saveisolist: # Check if the pickles folder exists if not os.path.isdir("./pickles/"): os.makedirs("./pickles/") # List all isomorphisms between the two graphs isomorph_list = list(GM.subgraph_isomorphisms_iter()) # Save isomorphism list pickling_on = open('pickles/arch2_patt8.pickle', "wb") pickle.dump(isomorph_list, pickling_on) pickling_on.close() if readisolist: # Read pickle file pickle_off = open('pickles/arch2_patt8.pickle', "rb") isomorph_list = pickle.load(pickle_off) pickle_off.close() if plot: # Plot a sample isomorph options = { 'line_color': 'grey', 'font_size': 10, 'node_size': 10, 'with_labels': True } G3 = G1.subgraph(isomorph_list[0]) plt.figure(1) nx.draw(G3, **options) plt.figure(2) nx.draw(G2, **options) plt.show() return isomorph
def small_molecule_degeneracy(graph): # Now iterate over graph isomorphisms def equ_node(n1, n2): if ( int(n1["atomic_num"]) == int(n2["atomic_num"]) and n1["hybridization"] == n2["hybridization"] ): return True else: return False def equ_edge(n1, n2): if int(n1["bond_type"]) == int(n2["bond_type"]): return True else: return False from networkx.algorithms.isomorphism import GraphMatcher graph_matcher = GraphMatcher(graph, graph, node_match=equ_node, edge_match=equ_edge) degeneracy = sum([1 for isomorphism in graph_matcher.match()]) return calc_rot_entropy(degeneracy), degeneracy
def hypergraphs_are_equivalent(graph1, graph2, isomorphy=True): if graph1 is None or graph2 is None: return None graph1 = put_parent_node_first(graph1) graph2 = put_parent_node_first(graph2) if len(graph1) != len(graph2): return None # index_cache_1 = {} # index_cache_2 = {} if not isomorphy: # node type must match as well as node ordering, nx is just needed to check that the edges also align edge2nodes1 = graph1.node_ids_by_edge_id() edge2nodes2 = graph2.node_ids_by_edge_id() def other_node1(node_id, edge_id): nodes = edge2nodes1[edge_id] tmp = [n for n in nodes if n != node_id] assert len(tmp) == 1, "This node is not connected to this edge!" return tmp[0] def other_node2(node_id, edge_id): nodes = edge2nodes2[edge_id] tmp = [n for n in nodes if n != node_id] assert len(tmp) == 1, "This node is not connected to this edge!" return tmp[0] mapping = { id1: id2 for id1, id2 in zip(graph1.node.keys(), graph2.node.keys()) } for i, (node1_id, node1), (node2_id, node2) in zip(range(len(graph1)), graph1.node.items(), graph2.node.items()): if not (graph1.is_parent_node(node1) == graph2.is_parent_node(node2)): return None elif str(node1) != str(node2) or len(node1.edges) != len( node2.edges): return None else: for edge1_id, edge2_id in zip(node1.edge_ids, node2.edge_ids): edge1 = graph1.edges[edge1_id] edge2 = graph2.edges[edge2_id] if edge1.type != edge2.type or edge1.data != edge2.data: return None # now test that matching edges lead to matching nodes candidate_node_id1 = other_node1(node1_id, edge1_id) candidate_node_id2 = other_node2(node2_id, edge2_id) if mapping[candidate_node_id1] != candidate_node_id2: return None return mapping else: def nodes_match(node1, node2): # parent nodes must be aligned if not graph1.is_parent_node( node1['node']) == graph2.is_parent_node(node2['node']): return False # and for all nodes the content must match as well as the ordering return str(node1['node']) == str(node2['node']) def edges_match(edge1, edge2): return edge1['data'].type == edge2['data'].type and edge1[ 'data'].data == edge2['data'].data from networkx.algorithms.isomorphism import GraphMatcher graph1_nx = graph1.to_nx() graph2_nx = graph2.to_nx() GM = GraphMatcher(graph1_nx, graph2_nx, edge_match=edges_match, node_match=nodes_match) if GM.is_isomorphic(): # assert str(graph1) == str(graph2) # for id1, id2 # test = {id1: id2 for id1, id2 in zip(graph1.node.keys(), graph2.node.keys())} # for id1 in test: # if test[id1] != GM.mapping[id1]: # print("what?") assert graph1.parent_node_id is None or GM.mapping[ graph1.parent_node_id] == graph2.parent_node_id return GM.mapping else: return None
def test_empty_hints(self, host, motif): assert find_motifs(motif, host, count_only=True, hints=[]) == len([ i for i in GraphMatcher(host, motif).subgraph_monomorphisms_iter() ])
def add_subhg(self, subhg): if len(self.subhg_list) == 0: node_dict = {} for each_node in subhg.nodes: node_dict[each_node] = subhg.node_attr(each_node)['symbol'].__hash__() node_list = [] for each_key, _ in sorted(node_dict.items(), key=lambda x:x[1]): node_list.append(each_key) for each_idx, each_node in enumerate(node_list): subhg.node_attr(each_node)['order4hrg'] = each_idx self.subhg_list.append(subhg) return 0, True else: match = False for each_idx, each_subhg in enumerate(self.subhg_list): subhg_bond_symbol_counter \ = Counter([subhg.node_attr(each_node)['symbol'] \ for each_node in subhg.nodes]) each_bond_symbol_counter \ = Counter([each_subhg.node_attr(each_node)['symbol'] \ for each_node in each_subhg.nodes]) subhg_atom_symbol_counter \ = Counter([subhg.edge_attr(each_edge).get('symbol', None) \ for each_edge in subhg.edges]) each_atom_symbol_counter \ = Counter([each_subhg.edge_attr(each_edge).get('symbol', None) \ for each_edge in each_subhg.edges]) if not match \ and (subhg.num_nodes == each_subhg.num_nodes and subhg.num_edges == each_subhg.num_edges and subhg_bond_symbol_counter == each_bond_symbol_counter and subhg_atom_symbol_counter == each_atom_symbol_counter): gm = GraphMatcher(each_subhg.hg, subhg.hg, node_match=_easy_node_match, edge_match=_edge_match) try: isomap = next(gm.isomorphisms_iter()) match = True for each_node in each_subhg.nodes: subhg.node_attr(isomap[each_node])['order4hrg'] \ = each_subhg.node_attr(each_node)['order4hrg'] if 'ext_id' in each_subhg.node_attr(each_node): subhg.node_attr(isomap[each_node])['ext_id'] \ = each_subhg.node_attr(each_node)['ext_id'] return each_idx, False except StopIteration: match = False if not match: node_dict = {} for each_node in subhg.nodes: node_dict[each_node] = subhg.node_attr(each_node)['symbol'].__hash__() node_list = [] for each_key, _ in sorted(node_dict.items(), key=lambda x:x[1]): node_list.append(each_key) for each_idx, each_node in enumerate(node_list): subhg.node_attr(each_node)['order4hrg'] = each_idx #for each_idx, each_node in enumerate(subhg.nodes): # subhg.node_attr(each_node)['order4hrg'] = each_idx self.subhg_list.append(subhg) return len(self.subhg_list) - 1, True
rev_tm, train_map = Partition.get_train_node_sets(dataset)[1] g1 = nx.Graph() g1.add_edges_from([(x[0], x[-1]) for x in dataset.triple1]) g2 = nx.Graph() g2.add_edges_from([(train_map[x[0]], train_map[x[-1]]) for x in dataset.triple2]) nx.set_edge_attributes(g1, True, 'g1') nx.set_edge_attributes(g2, True, 'g2') pattern = nx.compose(src_pattern, trg_pattern) print(pattern.edges) g = nx.compose(g1, g2) gm = GraphMatcher(g, pattern, edge_match=lambda x, y: (x.get('g1', False) == y.get('g1', False)) and (x.get('g2', False) == y.get('g2', False))) import utils e1, e2 = map(utils.mp2list, dataset.ents) for subgraph in gm.subgraph_isomorphisms_iter(): # print('src is', subgraph.keys()) # target_nodes = [train_map[0][x] for x in subgraph.keys()] subkeys = sorted(subgraph.keys(), key=lambda x: subgraph[x]) print('--------') print('e1s are: \n', '\n'.join(e1[i] for i in subkeys)) print('\ne2s are: ', '\n'.join(e2[rev_tm[i]] for i in subkeys))
def revert(self, hg: Hypergraph, return_subhg=False): ''' revert applying this production rule. i.e., if there exists a subhypergraph that matches the r.h.s. of this production rule, this method replaces the subhypergraph with a non-terminal hyperedge. Parameters ---------- hg : Hypergraph hypergraph to be reverted return_subhg : bool if True, the removed subhypergraph will be returned. Returns ------- hg : Hypergraph the resultant hypergraph. if it cannot be reverted, the original one is returned without any replacement. success : bool this indicates whether reverting is successed or not. ''' gm = GraphMatcher(hg.hg, self.rhs.hg, node_match=_node_match_prod_rule, edge_match=_edge_match) try: # in case when the matched subhg is connected to the other part via external nodes and more. not_iso = True while not_iso: isomap = next(gm.subgraph_isomorphisms_iter()) adj_node_set = set([]) # reachable nodes from the internal nodes subhg_node_set = set(isomap.keys()) # nodes in subhg for each_node in subhg_node_set: adj_node_set.add(each_node) if isomap[each_node] not in self.ext_node.values(): adj_node_set.update(hg.hg.adj[each_node]) if adj_node_set == subhg_node_set: not_iso = False else: if return_subhg: return hg, False, Hypergraph() else: return hg, False inv_isomap = {v: k for k, v in isomap.items()} ''' isomap = {'e35': 'e8', 'bond_13': 'bond_18', 'bond_14': 'bond_19', 'bond_15': 'bond_17', 'e29': 'e23', 'bond_12': 'bond_20'} where keys come from `hg` and values come from `self.rhs` ''' except StopIteration: if return_subhg: return hg, False, Hypergraph() else: return hg, False if return_subhg: subhg = Hypergraph() for each_node in hg.nodes: if each_node in isomap: subhg.add_node(each_node, attr_dict=hg.node_attr(each_node)) for each_edge in hg.edges: if each_edge in isomap: subhg.add_edge(hg.nodes_in_edge(each_edge), attr_dict=hg.edge_attr(each_edge), edge_name=each_edge) subhg.edge_idx = hg.edge_idx # remove subhg except for the externael nodes for each_key, each_val in isomap.items(): if each_key.startswith('e'): hg.remove_edge(each_key) for each_key, each_val in isomap.items(): if each_key.startswith('bond_'): if each_val not in self.ext_node.values(): hg.remove_node(each_key) # add non-terminal hyperedge nt_node_list = [] for each_ext_id in self.ext_node.keys(): nt_node_list.append(inv_isomap[self.ext_node[each_ext_id]]) hg.add_edge(nt_node_list, attr_dict=dict( terminal=False, symbol=self.lhs_nt_symbol)) if return_subhg: return hg, True, subhg else: return hg, True
def subgraph_match(self): complete_subgraph = nx.complete_graph(self.args.sub_graph_size) gm = GraphMatcher(self.complete_graph, complete_subgraph) is_isomorphic = gm.subgraph_is_isomorphic() print("Is isomorphic?: {}".format(is_isomorphic))
def clone_subgraphs(self, g): if not isinstance(g, CGRContainer): raise InvalidData('only CGRContainer acceptable') r_group = [] x_group = {} r_group_clones = [] newcomponents = [] ''' search bond breaks and creations ''' components, lost_bonds, term_atoms = self.__split_graph(g) lost_map = {x: y for x, y in lost_bonds} ''' extract subgraphs and sort by group type (R or X) ''' x_terminals = set(lost_map.values()) r_terminals = set(lost_map) for i in components: x_terminal_atom = x_terminals.intersection(i) if x_terminal_atom: x_group[x_terminal_atom.pop()] = i continue r_terminal_atom = r_terminals.intersection(i) if r_terminal_atom: r_group.append([r_terminal_atom, i]) continue newcomponents.append(i) ''' search similar R groups and patch. ''' tmp = g for i in newcomponents: for k, j in r_group: gm = GraphMatcher(j, i, node_match=self.__node_match_products, edge_match=self.__edge_match_products) ''' search for similar R-groups started from bond breaks. ''' mapping = next((x for x in gm.subgraph_isomorphisms_iter() if k.issubset(x) and all(x[y] in term_atoms for y in k)), None) if mapping: r_group_clones.append([k, mapping]) tmp = compose(tmp, self.__remap_group(j, tmp, mapping)[0]) break ''' add lose X groups to R groups ''' for i, j in r_group_clones: for k in i: remappedgroup, mapping = self.__remap_group( x_group[lost_map[k]], tmp, {}) tmp = CGRcore.union(tmp, remappedgroup) tmp.add_edge(j[k], mapping[lost_map[k]], s_bond=1, sp_bond=(1, None)) if r_group_clones: tmp.meta.update(g.meta) return tmp.copy()