def edgefeat(g, norm=False, fil='ricci'): """ wrapper for edge_probability and ricciCurvature computation :param g: graph :param fil: edge_p/ricci/jaccard :param whether normalize edge values or not :return: gp, a dense numpy array of shape (n_node, n_node) """ g = nx.convert_node_labels_to_integers(g) assert nx.is_connected(g) adj_m = nx.adj_matrix(g).todense() # dense matrix gp = np.zeros((len(g), len(g))) try: if fil == 'edge_p': gp = np.array(smoother(adj_m, h=0.3)) gp = np.multiply(adj_m, gp) elif fil == 'ricci': g = ricciCurvature(g, alpha=0.5, weight='weight') ricci_dict = nx.get_edge_attributes(g, 'ricciCurvature') for u, v in ricci_dict.keys(): gp[u][v] = ricci_dict[(u, v)] gp += gp.T elif fil == 'jaccard': jac_list = nx.jaccard_coefficient(g, g.edges( )) # important since jaccard can also be defined on non edge for u, v, jac in jac_list: gp[u][v] = jac gp += gp.T except AssertionError: print('Have not implemented fil %s. Treat as all zeros' % fil) gp = np.zeros((len(g), len(g))) assert (gp == gp.T).all() if norm: gp = gp / float(max(abs(gp))) return gp
def nodefeat(g, fil, norm=False, **kwargs): """ :param g: :param fil: deg, cc, random :return: node feature (np.array of shape (n_node, 1)) """ # g = nx.random_geometric_graph(100, 0.2) assert nx.is_connected(g) if fil == 'deg': nodefeat = np.array(list(dict(nx.degree(g)).values())).reshape( len(g), 1) elif fil == 'cc': nodefeat = np.array(list(nx.closeness_centrality(g).values())) nodefeat = nodefeat.reshape(len(g), 1) elif fil == 'random': nodefeat = np.random.random((len(g), 1)) elif fil == 'hop': base = kwargs['base'] assert type(base) == int length = nx.single_source_dijkstra_path_length(g, base) # dict # nodefeat = [length[i] for i in range(len(g))] nodefeat = np.array(nodefeat).reshape(len(g), 1) elif fil == 'fiedler': nodefeat = fiedler_vector(g, normalized=False) # np.ndarray nodefeat = nodefeat.reshape(len(g), 1) elif fil == 'ricci': g = ricciCurvature(g, alpha=0.5, weight='weight') ricci_dict = nx.get_node_attributes(g, 'ricciCurvature') ricci_list = [ricci_dict[i] for i in range(len(g))] nodefeat = np.array(ricci_list).reshape((len(g), 1)) else: raise Exception('No such filtration: %s' % fil) assert nodefeat.shape == (len(g), 1) # normalize if norm: nodefeat = nodefeat / float(max(abs(nodefeat))) return nodefeat
def function_basis(g, allowed, norm_flag='no', recomputation_flag=False, transformation_flag=True): """ :param g: nx graph :param allowed: filtration type, allowed = ['ricci', 'deg', 'hop', 'cc', 'fiedler'] :param norm_flag: normalization flag :param recomputation_flag: :param transformation_flag: if apply linear/nonlinear transformation of filtration function :return: g with ricci, deg, hop, cc, fiedler computed """ # to save recomputation. Look at the existing feature at first and then simply compute the new one. assert nx.is_connected(g) if len(g) < 3: return existing_features = [g.node[list(g.nodes())[0]].keys()] if not recomputation_flag: allowed = [ feature for feature in allowed if feature not in existing_features ] elif recomputation_flag: allowed = allowed def norm(g_, key, flag=norm_flag): if flag == 'no': return 1 elif flag == 'yes': return np.max(np.abs(nx.get_node_attributes(g_, key).values())) + 1e-6 else: raise ('Error') # ricci g_ricci = g if 'ricciCurvature' in allowed: try: g_ricci = ricciCurvature(g, alpha=0.5, weight='weight') assert g_ricci.node.keys() == list(g.nodes()) ricci_norm = norm(g, 'ricciCurvature', norm_flag) for n_ in g_ricci.nodes(): g_ricci.node[n_]['ricciCurvature'] /= ricci_norm except: print('RicciCurvature Error for graph, set 0 for all nodes') for n in g_ricci.nodes(): g_ricci.node[n]['ricciCurvature'] = 0 # degree if 'deg' in allowed: deg_dict = dict(nx.degree(g_ricci)) for n in g_ricci.nodes(): g_ricci.node[n]['deg'] = deg_dict[n] deg_norm = norm(g_ricci, 'deg', norm_flag) for n in g_ricci.nodes(): g_ricci.node[n]['deg'] /= np.float(deg_norm) # hop if 'hop' in allowed: distance = nx.floyd_warshall_numpy(g) # return a matrix distance = np.array(distance) distance = distance.astype(int) if norm_flag == 'no': hop_norm = 1 elif norm_flag == 'yes': hop_norm = np.max(distance) else: raise Exception('norm flag has to be yes or no') for n in g_ricci.nodes(): # if g_ricci has non consecutive nodes, n_idx is the index of hop distance matrix n_idx = list(g_ricci.nodes).index(n) assert n_idx <= len(g_ricci) # print(n, n_idx) g_ricci.node[n]['hop'] = distance[n_idx][:] / float(hop_norm) # closeness_centrality if 'cc' in allowed: cc = nx.closeness_centrality(g) # dict cc = {k: v / min(cc.values()) for k, v in cc.iteritems()} # no normalization for debug use cc = {k: 1.0 / v for k, v in cc.iteritems()} for n in g_ricci.nodes(): g_ricci.node[n]['cc'] = cc[n] # fiedler if 'fiedler' in allowed: fiedler = fiedler_vector(g, normalized=False) # np.ndarray assert max(fiedler) > 0 fiedler = fiedler / max(np.abs(fiedler)) assert max(np.abs(fiedler)) == 1 for n in g_ricci.nodes(): n_idx = list(g_ricci.nodes).index(n) g_ricci.node[n]['fiedler'] = fiedler[n_idx] any_node = list(g_ricci.node)[0] if 'label' not in g_ricci.node[any_node].keys(): for n in g_ricci.nodes(): g_ricci.node[n]['label'] = 0 # add dummy else: # contains label key assert 'label' in g_ricci.node[any_node].keys() for n in g_ricci.nodes(): label_norm = 40 if graph == 'dd_test': label_norm = 90 g_ricci.node[n]['label'] /= float(label_norm) if 'deg' in allowed: for n in g_ricci.nodes(): attribute_mean(g_ricci, n, key='deg', cutoff=1, iteration=0) # better normalization, used to include 1_0_deg_std/ deleted now: if norm_flag == 'yes': for attr in ['1_0_deg_sum']: norm_ = norm(g_ricci, attr, norm_flag) for n in g_ricci.nodes(): g_ricci.node[n][attr] = g_ricci.node[n][attr] / float( norm_) if 'label' in allowed: for n in g_ricci.nodes(): attribute_mean(g_ricci, n, key='label', cutoff=1, iteration=0) for n in g_ricci.nodes(): attribute_mean(g_ricci, n, key='label', cutoff=1, iteration=1) if 'cc_min' in allowed: for n in g_ricci.nodes(): attribute_mean(g_ricci, n, key='cc') if 'ricciCurvature_min' in allowed: for n in g_ricci.nodes(): attribute_mean(g_ricci, n, key='ricciCurvature') return g_ricci
def nodefeat(g, fil, norm=False, **kwargs): """ :param g: :param fil: deg, cc, random :return: node feature (np.array of shape (n_node, 1)) """ # g = nx.random_geometric_graph(100, 0.2) t0 = time.time() assert nx.is_connected(g) if fil == 'deg': nodefeat = np.array(list(dict(nx.degree(g)).values())).reshape( len(g), 1) elif fil == 'cc': nodefeat = np.array(list(nx.closeness_centrality(g).values())) nodefeat = nodefeat.reshape(len(g), 1) elif fil == 'cc_w': nodefeat = np.array( list(nx.closeness_centrality(g, distance='dist').values())) nodefeat = nodefeat.reshape(len(g), 1) elif fil == 'random': nodefeat = np.random.random((len(g), 1)) elif fil == 'hop': base = kwargs['base'] assert type(base) == int length = nx.single_source_dijkstra_path_length(g, base) # dict # nodefeat = [length[i] for i in range(len(g))] nodefeat = np.array(nodefeat).reshape(len(g), 1) elif fil == 'fiedler': if len(g.edges) == 2 * len( g ): # todo hack here. fielder is very slow when n_edges = 2*n_edge nodefeat = np.array(list(dict(nx.degree(g)).values())).reshape( len(g), 1) else: nodefeat = fiedler_vector(g, normalized=False) # np.ndarray nodefeat = nodefeat.reshape(len(g), 1) elif fil == 'fiedler_w': if False: # len(g.edges) == 2 * len(g): # todo hack here. fielder is very slow when n_edges = 2*n_edge nodefeat = np.array(list(dict(nx.degree(g)).values())).reshape( len(g), 1) else: for u, v in g.edges(): try: assert 'dist' in g[u][v].keys() g[u][v]['dist'] += 1e-6 except AssertionError: pass # print(f'g[{u}][{v}] = {g[u][v]}') print(f'bottleneck graph {len(g)}/{len(g.edges())}') # for line in nx.generate_edgelist(g): # print(line) print('-' * 50) nodefeat = fiedler_vector(g, normalized=False, weight='dist', method='tracemin_lu') # np.ndarray print('after true fiedler') nodefeat = nodefeat.reshape(len(g), 1) elif fil == 'fiedler_s': nodefeat = fiedler_vector(g, normalized=False) # np.ndarray nodefeat = nodefeat.reshape(len(g), 1) nodefeat = np.multiply(nodefeat, nodefeat) elif fil == 'ricci': try: g = ricciCurvature(g, alpha=0.5, weight='weight') ricci_dict = nx.get_node_attributes(g, 'ricciCurvature') ricci_list = [ricci_dict[i] for i in range(len(g))] nodefeat = np.array(ricci_list).reshape((len(g), 1)) except: nodefeat = np.random.random( (len(g), 1) ) # cvxpy.error.SolverError: Solver 'ECOS' failed. Try another solver. elif fil[:3] == 'hks': assert fil[3] == '_' t = float(fil[4:]) from Esme.dgms.hks import hks nodefeat = hks(g, t) elif fil == 'ricci_w': try: g = ricciCurvature(g, alpha=0.5, weight='dist') ricci_dict = nx.get_node_attributes(g, 'ricciCurvature') ricci_list = [ricci_dict[i] for i in range(len(g))] nodefeat = np.array(ricci_list).reshape((len(g), 1)) except: nodefeat = np.random.random( (len(g), 1) ) # cvxpy.error.SolverError: Solver 'ECOS' failed. Try another solver. else: raise Exception('No such filtration: %s' % fil) assert nodefeat.shape == (len(g), 1) # normalize if norm: nodefeat = nodefeat / float(max(abs(nodefeat))) if time.time() - t0 > 3: from Esme.helper.time import precision_format print( f'nodefeat takes {precision_format(time.time()-t0, 2)} for g {len(g)}/{len(g.edges)}' ) from Esme.viz.graph import viz_graph # viz_graph(g, show=True) return nodefeat
if __name__ == '__main__': # a bad example for fiedler lines = [ "0 1 {'dist': 1.3296398}", "0 2 {'dist': 0.9401972}", "0 3 {'dist': 0.94019735}", "1 2 {'dist': 0.94019735}", "1 3 {'dist': 0.9401972}", "1 4 {'dist': 0.9402065}", "3 4 {'dist': 1.3296462}", "3 5 {'dist': 0.9402065}", "4 5 {'dist': 0.9401972}" ] g = nx.parse_edgelist(lines, nodetype=int) print(g.edges(data=True)) # v_weight = fiedler_vector(g, normalized=False, weight='dist') # np.ndarray # v_weight = list(nx.closeness_centrality(g, distance='dist').values()) g = ricciCurvature(g, alpha=0.5, weight='dist') ricci_dict = nx.get_node_attributes(g, 'ricciCurvature') v_weight = [ricci_dict[i] for i in range(len(g))] print(v_weight) sys.exit() # g = nx.circulant_graph(10, offsets=[1]*10) w_name = 'weightd' random.seed(43) g = nx.random_tree(20, seed=42) for u, v in g.edges(): g[u][v][w_name] = random.random() # print(g[u][v]) print(g.edges)