def hash_graph(graph): """ Returns a hash for the graph based on PyNauty's certificate fn """ if graph.__dict__.get('power', 1) > 1: pyn_g_mem, _ = convert_nx_to_pyn(graph, partition='member') pyn_g_fam, _ = convert_nx_to_pyn(graph, partition='family') g_mem_hash = hash(pyn.certificate(pyn_g_mem)) g_fam_hash = hash(pyn.certificate(pyn_g_fam)) g_hash = hash((g_mem_hash, g_fam_hash)) else: pyn_g, _ = convert_nx_to_pyn(graph) g_hash = hash(pyn.certificate(pyn_g)) return g_hash
def find_type(graph, na_graphlet_cert_dict): """ Given graph T, find an isomorphic graph from 'graphlet_list'. Returns the index of the isomorphic graph in 'graphlet_list'. """ import networkx.algorithms.isomorphism as iso import pynauty as na edge_num = graph.number_of_edges() node_num = graph.number_of_nodes() #na_graphlet_list = na_graphlet_dict[node_num] if node_num == 1 or node_num == 2 or node_num == 3: return SMALL_ISOMORPHIC_GRAPHS_DICT[(node_num, edge_num)] elif node_num == 4: max_degree = max((graph.degree(node) for node in graph.nodes())) if edge_num == 3 or edge_num == 4: return SMALL_ISOMORPHIC_GRAPHS_DICT[(node_num, edge_num, max_degree)] elif edge_num == 5 or edge_num == 6: return SMALL_ISOMORPHIC_GRAPHS_DICT[(node_num, edge_num)] else: graph_cert = na.certificate(nxgraph_to_relabeled_nagraph(graph)) (_, graph_index) = na_graphlet_cert_dict[graph_cert] return graph_index
def get_graphlet(window, num_graphlets): """Compute the Nauty certificate of the graphlet in within a window of the adjacency matrix This function takes the upper triangle of a nxn matrix and computes its hash certificate using nauty. Given the parameters this usually involved computing the graphlet of num_graphlets size This is used for comparison with a bank of known certificates as loaded in get_maps(). Parameters ---------- window : numpy ndarray submatrix inside the adjacency matrix of a graph num_graphlets: int the size of the graphlets to extract Returns ------- cert : byte str certicate of the graphlet produced by finding its canonical representation with Nauty. """ adj_mat = { idx: [i for i in list(np.where(edge)[0]) if i != idx] for idx, edge in enumerate(window) } g = pynauty.Graph(number_of_vertices=num_graphlets, directed=False, adjacency_dict=adj_mat) cert = pynauty.certificate(g) return cert
def get_isomorphic_signature(graph: DiGraph) -> str: """ Generate unique isomorphic id with pynauty """ nauty_graph = pynauty.Graph(len(graph.nodes), directed=True, adjacency_dict=nx.to_dict_of_lists(graph)) return hashlib.md5(pynauty.certificate(nauty_graph)).hexdigest()
def cl(g6): """ compute the canonical labeling? from graph6 """ g = nx.from_graph6_bytes(g6) adj = {v: list(g[v]) for v in g} nauty_graph = pynauty.Graph(g.order(), adjacency_dict=adj) s = pynauty.certificate(nauty_graph) g.clear() del nauty_graph return s
def get_graphlet(window, nsize): """ This function takes the upper triangle of a nxn matrix and computes its canonical map """ adj_mat = {idx: [i for i in list(np.where(edge)[0]) if i!=idx] for idx, edge in enumerate(window)} g = pynauty.Graph(number_of_vertices=nsize, directed=False, adjacency_dict = adj_mat) cert = pynauty.certificate(g) return cert
def get_motif(motif_am, size): adj_mat = { idx: [i for i in list(np.where(edge)[0]) if i != idx] for idx, edge in enumerate(motif_am) } g = pynauty.Graph(number_of_vertices=size, directed=False, adjacency_dict=adj_mat) cert = pynauty.certificate(g) return cert
def get_graphlet_cert_dict(na_graphlet_dict): """ Builds the certificate dictionary for the pynauty graphlets. """ import pynauty as na na_graphlet_cert_dict = {} for num_nodes in na_graphlet_dict.keys(): for (ind, na_graphlet) in enumerate(na_graphlet_dict[num_nodes]): na_graphlet_cert_dict[na.certificate(na_graphlet)] = (num_nodes, ind) return na_graphlet_cert_dict
def get_graphlet(window, nsize): """ This function takes the upper triangle of a nxn matrix and computes its canonical map """ adj_mat = { idx: [i for i in list(np.where(edge)[0]) if i != idx] for idx, edge in enumerate(window) } g = pynauty.Graph(number_of_vertices=nsize, directed=False, adjacency_dict=adj_mat) cert = pynauty.certificate(g) return cert
def get_canonical_map(g): if len(g.nodes()) > 0: a = nx.adjacency_matrix(g) am = a.todense() window = np.array(am) adj_mat = { idx: [i for i in list(np.where(edge)[0]) if i != idx] for idx, edge in enumerate(window) } # This line doesn't take into account the order of nodes, it produce the identical # canonoical map for these graphs # 0-->1 2, 0 1-->2, 0-->2 1 # tmp = pynauty.Graph(number_of_vertices=len(g.nodes()), directed=True, adjacency_dict = adj_mat) tmp = pynauty.Graph( number_of_vertices=len(g.nodes()), directed=True, adjacency_dict=adj_mat, vertex_coloring=[set([t]) for t in range(len(g.nodes(0)))]) cert = pynauty.certificate(tmp) else: cert = '' return cert
def HASH(graph): """ see https://stackoverflow.com/questions/46999771/ use with caution... :return: """ g: nx.Graph = graph pnGraph = pn.Graph(g.number_of_nodes()) edg = list(g.edges) nodesColored = [] colors = { "H": [], "He": [], "Li": [], "Be": [], "B": [], "C": [], "N": [], "O": [], "F": [], "Ne": [], "Na": [], "Mg": [], "Al": [], "Si": [], "P": [], "S": [], "Cl": [], "Ar": [], "K": [], "Ca": [], "Sc": [], "Ti": [], "V": [], "Cr": [], "Mn": [], "Fe": [], "Co": [], "Ni": [], "Cu": [], "Zn": [], "Ga": [], "Ge": [], "As": [], "Se": [], "Br": [], "Kr": [], "Rb": [], "Sr": [], "Y": [], "Zr": [], "Nb": [], "Mo": [], "Tc": [], "Ru": [], "Rh": [], "Pd": [], "Ag": [], "Cd": [], "In": [], "Sn": [], "Sb": [], "Te": [], "I": [], "Xe": [], "Cs": [], "Ba": [], "La": [], "Ce": [], "Pr": [], "Nd": [], "Pm": [], "Sm": [], "Eu": [], "Gd": [], "Tb": [], "Dy": [], "Ho": [], "Er": [], "Tm": [], "Yb": [], "Lu": [], "Hf": [], "Ta": [], "W": [], "Re": [], "Os": [], "Ir": [], "Pt": [], "Au": [], "Hg": [], "Tl": [], "Pb": [], "Bi": [], "Po": [], "At": [], "Rn": [], "Fr": [], "Ra": [], "Ac": [], "Th": [], "Pa": [], "U": [], "Np": [], "Pu": [], "Am": [], "Cm": [], "Bk": [], "Cf": [], "Es": [], "Fm": [], "Md": [], "No": [], "Lr": [], "Rf": [], "Db": [], "Sg": [], "Bh": [], "Hs": [], "Mt": [], "Ds": [], "Rg": [], "Cn": [], "Nh": [], "Fl": [], "Mc": [], "Lv": [], "Ts": [], "Og": [] } for E in edg: pnGraph.connect_vertex(E[0], E[1]) try: nodesColored.index(E[0]) try: colors[g.nodes[E[0]]["symbol"]].append(E[0]) except KeyError: colors[g.nodes[E[0]]["symbol"]] = [] colors[g.nodes[E[0]]["symbol"]].append(E[0]) except ValueError: nodesColored.append(E[0]) try: colors[g.nodes[E[0]]["symbol"]].append(E[0]) except KeyError: colors[g.nodes[E[0]]["symbol"]] = [] colors[g.nodes[E[0]]["symbol"]].append(E[0]) try: nodesColored.index(E[1]) try: colors[g.nodes[E[1]]["symbol"]].append(E[1]) except KeyError: colors[g.nodes[E[1]]["symbol"]] = [] colors[g.nodes[E[1]]["symbol"]].append(E[1]) except ValueError: nodesColored.append(E[1]) try: colors[g.nodes[E[1]]["symbol"]].append(E[1]) except KeyError: colors[g.nodes[E[1]]["symbol"]] = [] colors[g.nodes[E[1]]["symbol"]].append(E[1]) j = -1 for c in colors: j = j + 1 print(str(c) + " " + str(colors[c])) if colors[c] != []: pnGraph.set_vertex_coloring([set(colors[c])]) else: pnGraph.set_vertex_coloring([set([])]) return hash(pn.certificate(pnGraph))
def make_identifier_hash_linelist(npoints, line_list): return pynauty.certificate( make_bipartite_for_design_linelist(npoints, line_list))
def make_identifier_hash(pd): return pynauty.certificate(make_bipartite_for_design(pd))
def get_certificate(self): if not self.certificate: from pynauty import certificate self.certificate = certificate(self.covers_graph()) return self.certificate
def find_type_match(nx_graphlet_dict, na_graphlet_cert_dict, graph): """ Given a graph, find an isomorphism with one of the canonical graphs from 'graphlet_list'. Return index of the corresponding graph from 'graphlet_list' and a match dictionary. The match dictionary has format {u_i: v_i}, 'u_i' are nodes from 'graph' and 'v_i' are nodes from canonical graph. Helper function for 'prob_functions' for unordered method. """ import networkx.algorithms.isomorphism as iso import pynauty as na nodes = graph.nodes() node_num = len(nodes) nx_graphlet_list = nx_graphlet_dict[node_num] if node_num == 1: # trivial graph: relabel the node to zero. return (0, {u: 0 for u in nodes}) if node_num == 2: # 2-path graph: graph is symmetric, choose one of two isomorphisms. return (0, {u: i for i, u in enumerate(nodes)}) if node_num == 3: if graph.number_of_edges() == 2: # 3-path (or wedge): map the root to zero, the other two are # interchangeable. u0 = next((node for node in nodes if graph.degree(node) == 2)) (u1, u2) = (node for node in graph.neighbors(u0)) return (0, {u0: 0, u1: 1, u2: 2}) if graph.number_of_edges() == 3: # 3-clique (or triangle): all three are interchangeable. return (1, {u: i for i, u in enumerate(nodes)}) if node_num == 4: e_num = graph.number_of_edges() max_degree = max((graph.degree(node) for node in nodes)) # 3-star if e_num == 3 and max_degree == 3: u3 = next((node for node in nodes if graph.degree(node) == 3)) (u0, u1, u2) = tuple(graph.neighbors(u3)) return (0, {u0: 0, u1: 1, u2: 2, u3: 3}) # 4-path if e_num == 3 and max_degree == 2: (u0, u1) = (node for node in nodes if graph.degree(node) == 2) u2 = next((node for node in graph.neighbors(u1) if node != u0)) u3 = next((node for node in graph.neighbors(u0) if node != u1)) return (1, {u0: 0, u1: 1, u2: 2, u3: 3}) # 4-tailedtriangle if e_num == 4 and max_degree == 3: u3 = next((node for node in nodes if graph.degree(node) == 3)) (u1, u2) = (node for node in nodes if graph.degree(node) == 2) u0 = next((node for node in nodes if graph.degree(node) == 1)) return (2, {u0: 0, u1: 1, u2: 2, u3: 3}) # 4-cycle if e_num == 4 and max_degree == 2: u0 = next((node for node in nodes)) (u1, u3) = tuple(graph.neighbors(u0)) u2 = next((node for node in graph.neighbors(u1) if node != u0)) return (3, {u0: 0, u1: 1, u2: 2, u3: 3}) # 4-chordcycle if e_num == 5: (u0, u2) = (node for node in nodes if graph.degree(node) == 3) (u1, u3) = (node for node in nodes if graph.degree(node) == 2) return (4, {u0: 0, u1: 1, u2: 2, u3: 3}) # 4-clique if e_num == 6: (u0, u1, u2, u3) = tuple(nodes) return (5, {u0: 0, u1: 1, u2: 2, u3: 3}) raise ValueError("wrong graphlet format") else: # Use pynauty for n > 4. na_graph = nxgraph_to_relabeled_nagraph(graph) (_, ind) = na_graphlet_cert_dict[na.certificate(na_graph)] #import pdb; pdb.set_trace() matcher = iso.GraphMatcher(graph, nx_graphlet_list[ind]) mapping = next(matcher.match()) return (ind, mapping)
def canonicalize(self): """ This method will use `pynauty` library to generate a canonical label for the pattern. This pattern will be stored in `canonical_label` attribute. """ # set a location for logging loc = f"{__file__} : Pattern.canonicalize()" # try importing pynauty to canonicalize the labeling try: import pynauty except ImportError: logger.warning( f"Importing pynauty failed, cannot canonicalize. Pattern equality checking is not guaranteed to work for highly symmetrical species.", loc=loc, ) return # find how many vertices we need lmol = len(self.molecules) lcomp = sum([len(x.components) for x in self.molecules]) node_cnt = lmol + lcomp # initialize our pynauty graph G = pynauty.Graph(node_cnt) # going to need to figure out bonding bond_dict = {} # save our IDs rev_grpIds = {} grpIds = {} # also pointers to each object node_ptrs = {} bond_node_ptrs = {} # we'll need to seutp coloring colors = {} currId = 0 mCopyId = 0 cCopyId = 0 # let's loop over everything in the pattern for molec in self.molecules: # setting colors color_id = (molec.name, None, None) if color_id in colors: colors[color_id].add(currId) else: colors[color_id] = set([currId]) # saving IDs parent_id = (molec.name, None, mCopyId, cCopyId) if parent_id in grpIds: mCopyId += 1 parent_id = (molec.name, None, mCopyId, cCopyId) grpIds[parent_id] = currId else: grpIds[parent_id] = currId rev_grpIds[currId] = parent_id node_ptrs[currId] = molec currId += 1 # now looping over components for comp in molec.components: # saving component coloring comp_color_id = (molec.name, comp.name, comp.state) if comp_color_id in colors: colors[comp_color_id].add(currId) else: colors[comp_color_id] = set([currId]) chid_id = (molec.name, comp.name, mCopyId, cCopyId) # connecting the component to the molecule G.connect_vertex(grpIds[parent_id], [currId]) # saving component IDs if chid_id in grpIds: cCopyId += 1 chid_id = (molec.name, comp.name, mCopyId, cCopyId) grpIds[chid_id] = currId else: grpIds[chid_id] = currId rev_grpIds[currId] = chid_id node_ptrs[currId] = comp currId += 1 # saving bonds if len(comp._bonds) != 0: for bond in comp._bonds: if bond not in bond_dict.keys(): bond_dict[bond] = [chid_id] else: bond_dict[bond].append(chid_id) # now we got everything, we implement it in the graph for bond in bond_dict: # check if each of our bonds have exactly two end points if len(bond_dict[bond]) == 2: id1 = bond_dict[bond][0] id1 = grpIds[id1] id2 = bond_dict[bond][1] id2 = grpIds[id2] G.connect_vertex(id1, [id2]) else: # raise a warning logger.warning( f"Bond {bond} doesn't have exactly 2 end points, please check that you don't have any dangling bonds.", loc=loc, ) # we get our color sets color_sets = list(colors.values()) # set vertex coloring G.set_vertex_coloring(color_sets) # save our graph self.nautyG = G # generate the canonical certificate for the entire graph self.canonical_certificate = pynauty.certificate(self.nautyG) # generate the canonical label for the entire graph # first, we give every node their canonical order canon_order = pynauty.canon_label(self.nautyG) for iordr, ordr in enumerate(canon_order): node_ptrs[ordr].canonical_order = iordr # relabeling bonds relabeling_bond_dict = {} for bond in bond_dict: # check if each of our bonds have exactly two end points if len(bond_dict[bond]) == 2: id1 = bond_dict[bond][0] id1 = grpIds[id1] comp1 = node_ptrs[id1] id2 = bond_dict[bond][1] id2 = grpIds[id2] comp2 = node_ptrs[id2] parent_order = min( comp1.parent_molecule.canonical_order, comp2.parent_molecule.canonical_order, ) comp_order = min(comp1.canonical_order, comp2.canonical_order) relabeling_bond_dict[(parent_order, comp_order)] = (comp1, comp2) else: # raise a warning logger.warning( f"Bond {bond} doesn't have exactly 2 end points, please check that you don't have any dangling bonds.", loc=loc, ) # this will give us the keys to canonically sorted bonds sorted_order = sorted(relabeling_bond_dict.keys()) for ibond, sbond in enumerate(sorted_order): # now we add a canonical bond ID to each component c1, c2 = relabeling_bond_dict[sbond] if c1.canonical_bonds is None: c1.canonical_bonds = [str(ibond + 1)] else: c1.canonical_bonds.append(str(ibond + 1)) if c2.canonical_bonds is None: c2.canonical_bonds = [str(ibond + 1)] else: c2.canonical_bonds.append(str(ibond + 1)) # and now we can get the canonical label self.canonical_label = self.print_canonical()