def _calculate(self, include: set, is_regression=False): self._features = {} for graph in nx.connected_component_subgraphs(self._gnx): if len(graph) < 2: self._features.update(zip(graph.nodes(), [0.] * len(graph))) else: self._features.update(zip(graph.nodes(), map(float, alg_connectivity.fiedler_vector(graph))))
def _calculate(self, include: set): self._features = {} for connected_component in nx.connected_components(self._gnx): graph = self._gnx.subgraph(connected_component) if len(graph) < 2: self._features.update(zip(graph.nodes(), [0.] * len(graph))) else: self._features.update(zip(graph.nodes(), map(float, alg_connectivity.fiedler_vector(graph))))
def fiedlerVector(gnx, f, ft): start = timer.start(ft, 'fiedler_vector') fiedlerVector = nx.fiedler_vector(gnx) timer.stop(ft, start) fiedlerMap = {} for i in range(len(fiedlerVector)): f.writelines(str(gnx.nodes()[i]) + ',' + str(fiedlerVector[i]) + '\n') fiedlerMap[gnx.nodes()[i]] = fiedlerVector[i] return fiedlerMap
def _basic_partitioning(G, n1, n2): '''Return graph G divided in two parts of specified sizes''' '''n = len(G) if number_of_selfloops(G): raise nx.NetworkXNotImplemented("Graph with self-edges.") if is_weighted(G): raise nx.NetworkXNotImplemented("Weighted graph.") if is_directed(G): raise nx.NetworkXNotImplemented("Directed graph.") if is_empty(G): raise nx.NetworkXNotImplemented("Empty graph.") if not nx.is_connected(G): raise nx.NetworkXException("Non connected graph.") if n < 2: raise nx.NetworkXException("Too small graph.") if n1 + n2 != n: raise nx.NetworkXException("Invalid components.")''' # Preparo il vettore da cui estrarrò le componenti v2 = fiedler_vector(G) # print("v2: ", v2.shape, v2.dtype.name) # print("il fiedler vector è:\n", v2) # print("Somma degli elementi dell'autovettore di fiedler: ", v2.sum()) # Dovrebbe essere circa 0 mapped_v2 = get_mapped_vector(v2) # print("mapped_v2: ", mapped_v2.shape, mapped_v2.dtype.name) # print("l'arrey mappato è:\n", mapped_v2) sorted_v2 = get_sorted_vector(mapped_v2) # print("l'arrey ordinato è:\n", sorted_v2) # Creo e controllo le due classi e relativi insiemi di taglio component_test1 = set(sorted_v2[:n1, 0].flat) component_test2 = set(sorted_v2[:n2, 0].flat) print("Questo è component_test1:\n", component_test1) print("Questo è component_test2:\n", component_test2) cut_size_1 = cut_size(G, component_test1) cut_size_2 = cut_size(G, component_test2) print("Il primo cut size vale: ", cut_size_1) print("Il secondo cut size vale: ", cut_size_2) # Rimuovo gli archi del grafo che fanno parte dell'insieme di taglio if cut_size_1 < cut_size_2: component_final = component_test1 H = graph_division(G, component_final) return H else: component_final = component_test2 H = graph_division(G, component_final) return H
def nx_fiedler_communities(M): nx_graph = get_undirected_nx_network(M) # Remove detected communities community_1 = ['Argentina', 'Venezuela'] community_2 = ['Italy', 'France', 'Belgium', 'Germany', 'Spain', 'United States', 'Portugal', 'United Kingdom', 'Greece'] for node in community_1 + community_2: nx_graph.remove_node(node) nodes = np.array(nx_graph.nodes()) vector = fiedler_vector(nx_graph, weight='weight') print "-----" print nodes[vector >= 0] print nodes[vector < 0]
def nodefeat(g, fil, norm=False, **kwargs): """ :param g: :param fil: deg, cc, random :return: node feature (np.array of shape (n_node, 1)) """ # g = nx.random_geometric_graph(100, 0.2) assert nx.is_connected(g) if fil == 'deg': nodefeat = np.array(list(dict(nx.degree(g)).values())).reshape( len(g), 1) elif fil == 'cc': nodefeat = np.array(list(nx.closeness_centrality(g).values())) nodefeat = nodefeat.reshape(len(g), 1) elif fil == 'random': nodefeat = np.random.random((len(g), 1)) elif fil == 'hop': base = kwargs['base'] assert type(base) == int length = nx.single_source_dijkstra_path_length(g, base) # dict # nodefeat = [length[i] for i in range(len(g))] nodefeat = np.array(nodefeat).reshape(len(g), 1) elif fil == 'fiedler': nodefeat = fiedler_vector(g, normalized=False) # np.ndarray nodefeat = nodefeat.reshape(len(g), 1) elif fil == 'ricci': g = ricciCurvature(g, alpha=0.5, weight='weight') ricci_dict = nx.get_node_attributes(g, 'ricciCurvature') ricci_list = [ricci_dict[i] for i in range(len(g))] nodefeat = np.array(ricci_list).reshape((len(g), 1)) else: raise Exception('No such filtration: %s' % fil) assert nodefeat.shape == (len(g), 1) # normalize if norm: nodefeat = nodefeat / float(max(abs(nodefeat))) return nodefeat
def forced_split_communities_qds(adj, c, cluster_size, normalize, evd_method, tolerence, seed): """Force splits the communities in graph, if the size of the first_community is greater than the threshold, such that the splitting least compromizes modularity density. Parameters ---------- adj : SciPy sparse matrix (csr or csc) The N x N Adjacency matrix of the graph. c : Integer array Current array of community labels for the nodes in the graph as ordered by the adjacency matrix. cluster_size : integer Threshold/maximum size (number of nodes) of a cluster. normalize : bool Whether the normalized Laplacian matrix is used. evd_method : string Method of eigenvalue computation. It should be one of 'tracemin' (TraceMIN), 'lanczos' (Lanczos iteration) and 'lobpcg' (LOBPCG). tolerence : float Tolerance of relative residual in eigenvalue computation. seed : integer, random_state, or None Indicator of random number generation state. Returns ------- Integer array Array of community labels, as a result of splitting, for the nodes in the graph as ordered by the adjacency matrix. """ # Array of unique community labels unique_clusters = np.unique(c) # Tracks the nodes in each community dict_bool = {} # Tracks the clusters that are connected to each community dict_connected = {} for label in unique_clusters: # Track the nodes in each community dict_bool[label] = (c == label) # Initialize each key to an empty set dict_connected[label] = set() # Track the clusters that are connected to each community for comm1 in unique_clusters[:-1]: # index of the community 'comm1' i = np.where(unique_clusters == comm1)[0][0] bool_1 = dict_bool[comm1] adj_comm1 = adj[bool_1] # Track the clusters that are connected to community 'comm1' for comm2 in unique_clusters[i+1:]: bool_2 = dict_bool[comm2] zero = np.zeros(len(c), dtype=int) zero[bool_2] = 1 # Check if 'comm2' is connected to 'comm1' if ((adj_comm1.dot(zero)).sum()) != 0: dict_connected[comm1].add(comm2) dict_connected[comm2].add(comm1) # Create a copy of cluster labels c_new = c.copy() # Split each community, whose size is greater than the threshold for cluster_num in unique_clusters: bool_r = dict_bool[cluster_num] # Sparse adjacency matrix corresponding to 'cluster_num' sub_adj = adj[bool_r].T[bool_r] # Subgraph constructed from sparse adjacency matrix of 'cluster_num' g = nx.from_scipy_sparse_matrix(sub_adj) # Number of nodes in 'g' len_g = len(g) # Don't consider further splitting singleton communities # or communities of size lower than the threshold # or a community which has disconnected modules if ((len_g == 1) | (len_g <= cluster_size) | (not(nx.is_connected(g)))): if(not(nx.is_connected(g))): print("Warning: Check your data as an earliar iteration \ resulted in a cluster with \ internal disconnected components") continue else: # Create an array of community labels for the # nodes in 'cluster_num' c_sub = np.zeros(len_g, dtype=int) # indices of the nodes in 'sub_adj' sub_index = np.arange(len_g) # Determine the fiedler_vector of subgraph 'g' f_vector = fiedler_vector(g, weight='weight', normalized=normalize, tol=tolerence, method=evd_method, seed=seed) # Rearrange the nodes of 'sub_adj' in the descreasing order of # elements of fieldler vector nodeIds = [i for f_vector, i in sorted(zip(f_vector, sub_index), reverse=True)] # Initialize the communities corresponding to # bipartitioning of 'cluster_num' first_community = [] second_community = [] second_community.extend(nodeIds) # Records the splitting information split_info = {} # Create a copy of the latest cluster labels c_latest = c_new.copy() # Create a copy of 'dict_bool' dict_bool_copy = dict_bool.copy() # Possible splits of 'cluster_num' based on the fielder vector for j in range(len(nodeIds)-1): # Split the 'cluster_num' into two clusters first_community.append(nodeIds[j]) second_community.remove(nodeIds[j]) # Graph induced by nodes in 'first_community' g1 = g.subgraph(first_community) # Graph induced by nodes in 'second_community' g2 = g.subgraph(second_community) # Check if 'g1' and 'g2' are connected graphs each if(nx.is_connected(g1) & nx.is_connected(g2)): # Relabel the cluster labels of nodes in 'cluster_num' c_sub[first_community] = cluster_num new_label = max(c_new) + 1 c_sub[second_community] = new_label # Array of the union of connected clusters of the # split communities of 'cluster_num' conn_clusters = \ np.array(list(((dict_connected[cluster_num]) | set([cluster_num, new_label])))) # Update the cluster labels in 'c_latest' c_latest[bool_r] = c_sub # Update the boolean array of the split communities # of 'cluster_num' dict_bool_copy[cluster_num] = (c_latest == cluster_num) dict_bool_copy[new_label] = (c_latest == new_label) # Calculate the modularity density after # splitting 'cluster_num' div_metric = modularity_density(adj, c_latest, np.unique(c_sub[0:]), dict_bool_copy, conn_clusters) # Record the split split_info[div_metric] = j # Delete to save memory del c_latest del dict_bool_copy # Check if atleast one instance of splitting 'cluster_num' exists # that does not result in disconnected modules if len(split_info) > 0: # Split 'cluster_num' based on the division that # least compromizes modularity density best_split = split_info[max(split_info.keys())] c_sub[nodeIds[0:best_split+1]] = cluster_num c_sub[nodeIds[best_split+1:]] = max(c_new) + 1 # Update 'c_new' with new community labels as a # result of splitting 'cluster_num' c_new[bool_r] = c_sub else: print("No split possible for cluster num: {}, \ as any further split results in disconnected modules". format(cluster_num)) # Array of community labels, as a result of splitting, for the nodes # in the graph as ordered by the adjacency matrix return c_new
def nodefeat(g, fil, norm=False, **kwargs): """ :param g: :param fil: deg, cc, random :return: node feature (np.array of shape (n_node, 1)) """ # g = nx.random_geometric_graph(100, 0.2) t0 = time.time() assert nx.is_connected(g) if fil == 'deg': nodefeat = np.array(list(dict(nx.degree(g)).values())).reshape( len(g), 1) elif fil == 'cc': nodefeat = np.array(list(nx.closeness_centrality(g).values())) nodefeat = nodefeat.reshape(len(g), 1) elif fil == 'cc_w': nodefeat = np.array( list(nx.closeness_centrality(g, distance='dist').values())) nodefeat = nodefeat.reshape(len(g), 1) elif fil == 'random': nodefeat = np.random.random((len(g), 1)) elif fil == 'hop': base = kwargs['base'] assert type(base) == int length = nx.single_source_dijkstra_path_length(g, base) # dict # nodefeat = [length[i] for i in range(len(g))] nodefeat = np.array(nodefeat).reshape(len(g), 1) elif fil == 'fiedler': if len(g.edges) == 2 * len( g ): # todo hack here. fielder is very slow when n_edges = 2*n_edge nodefeat = np.array(list(dict(nx.degree(g)).values())).reshape( len(g), 1) else: nodefeat = fiedler_vector(g, normalized=False) # np.ndarray nodefeat = nodefeat.reshape(len(g), 1) elif fil == 'fiedler_w': if False: # len(g.edges) == 2 * len(g): # todo hack here. fielder is very slow when n_edges = 2*n_edge nodefeat = np.array(list(dict(nx.degree(g)).values())).reshape( len(g), 1) else: for u, v in g.edges(): try: assert 'dist' in g[u][v].keys() g[u][v]['dist'] += 1e-6 except AssertionError: pass # print(f'g[{u}][{v}] = {g[u][v]}') print(f'bottleneck graph {len(g)}/{len(g.edges())}') # for line in nx.generate_edgelist(g): # print(line) print('-' * 50) nodefeat = fiedler_vector(g, normalized=False, weight='dist', method='tracemin_lu') # np.ndarray print('after true fiedler') nodefeat = nodefeat.reshape(len(g), 1) elif fil == 'fiedler_s': nodefeat = fiedler_vector(g, normalized=False) # np.ndarray nodefeat = nodefeat.reshape(len(g), 1) nodefeat = np.multiply(nodefeat, nodefeat) elif fil == 'ricci': try: g = ricciCurvature(g, alpha=0.5, weight='weight') ricci_dict = nx.get_node_attributes(g, 'ricciCurvature') ricci_list = [ricci_dict[i] for i in range(len(g))] nodefeat = np.array(ricci_list).reshape((len(g), 1)) except: nodefeat = np.random.random( (len(g), 1) ) # cvxpy.error.SolverError: Solver 'ECOS' failed. Try another solver. elif fil[:3] == 'hks': assert fil[3] == '_' t = float(fil[4:]) from Esme.dgms.hks import hks nodefeat = hks(g, t) elif fil == 'ricci_w': try: g = ricciCurvature(g, alpha=0.5, weight='dist') ricci_dict = nx.get_node_attributes(g, 'ricciCurvature') ricci_list = [ricci_dict[i] for i in range(len(g))] nodefeat = np.array(ricci_list).reshape((len(g), 1)) except: nodefeat = np.random.random( (len(g), 1) ) # cvxpy.error.SolverError: Solver 'ECOS' failed. Try another solver. else: raise Exception('No such filtration: %s' % fil) assert nodefeat.shape == (len(g), 1) # normalize if norm: nodefeat = nodefeat / float(max(abs(nodefeat))) if time.time() - t0 > 3: from Esme.helper.time import precision_format print( f'nodefeat takes {precision_format(time.time()-t0, 2)} for g {len(g)}/{len(g.edges)}' ) from Esme.viz.graph import viz_graph # viz_graph(g, show=True) return nodefeat
def split_communities_mqds(adj, c, normalize, evd_method, tolerence, seed): """Splits the communities in graph if the splitting improves modularity density. Parameters ---------- adj : SciPy sparse matrix (csr or csc) The N x N Adjacency matrix of the graph. c : Integer array Current array of community labels for the nodes in the graph as ordered by the adjacency matrix. normalize : bool Whether the normalized Laplacian matrix is used. evd_method : string Method of eigenvalue computation. It should be one of 'tracemin' (TraceMIN), 'lanczos' (Lanczos iteration) and 'lobpcg' (LOBPCG). tolerence : float Tolerance of relative residual in eigenvalue computation. seed : integer, random_state, or None Indicator of random number generation state. Returns ------- Integer array Array of community labels, as a result of splitting, for the nodes in the graph as ordered by the adjacency matrix. """ unique_clusters = np.unique(c) dict_bool = {} curr_modularity = mula_modularity_density(adj, c) curr_c = c.copy() split_info = [] split = False for label in unique_clusters: # Track the nodes in each community dict_bool[label] = (c == label) for cluster_num in unique_clusters: bool_r = dict_bool[cluster_num] sub_adj = adj[bool_r].T[bool_r] g = nx.from_scipy_sparse_matrix(sub_adj) connected = nx.is_connected(g) len_g = sub_adj.shape[0] if len_g == 1: continue elif not connected: print("Warning: Check your data as an earliar iteration \ resulted in a cluster with \ internal disconnected components") continue f_vector = fiedler_vector(g, weight='weight', normalized=normalize, tol=tolerence, method=evd_method, seed=seed) sub_index = np.arange(len_g) nodeIds = [i for f_vector, i in sorted(zip(f_vector, sub_index), reverse=False, key=lambda x: x[0])] first_community = [] second_community = [] second_community.extend(nodeIds) c_sub = np.zeros(len_g, dtype=int) dict_bool_copy = dict_bool.copy() for idx in range(len_g-1): first_community.append(second_community.pop()) g1 = g.subgraph(first_community) g2 = g.subgraph(second_community) if(nx.is_connected(g1) & nx.is_connected(g2)): c_sub[first_community] = cluster_num new_label = max(curr_c) + 1 c_sub[second_community] = new_label scratch_c = c.copy() scratch_c[bool_r] = c_sub split_value = mula_modularity_density(adj, scratch_c) if split_value > curr_modularity: split_info.append((split_value, scratch_c)) if len(split_info) > 0: split = True curr_c = max(split_info, key=lambda x: x[0])[1] return split, curr_c
start_time = time.time() FILENAME = "soc-Epinions1.txt" first_line = [] with open("graphs_processed/" + FILENAME) as f: first_line = f.readline() line = first_line first_line = line.split() k = first_line[4] start_time = time.time() G = nx.read_edgelist("graphs_processed/" + FILENAME) vec = algebraicconnectivity.fiedler_vector(G, method="tracemin_lu") vec = np.asarray(vec).reshape(-1, 1) clusters = cluster.KMeans(int(k)).fit_predict(vec) cost = 0 nodes = np.asarray(list(G.nodes._nodes.keys())) for i in range(int(k)): size = sum(clusters == i) print(size) cost += algo.cut_size(G, nodes[clusters == i]) / size print("Cost: ", cost) print("--- %s seconds ---" % (time.time() - start_time)) f = open("results/" + FILENAME, "w+")
def split_communities_q(adj, c, split_track, merge_track, r, normalize, evd_method, tolerence, seed): """Splits the communities in graph if the splitting improves modularity. Parameters ---------- adj : SciPy sparse matrix (csr or csc) The N x N Adjacency matrix of the graph. c : Integer array Current array of community labels for the nodes in the graph as ordered by the adjacency matrix. split_track : dictionary Tracks the communities fit for splitting; contains cluster labels as dictionary keys, and corresponding binary values (0 or 1) as values; 1 indicates the community is fit for splitting, 0 indicates the community is not fit for splitting. merge_track : dictionary Tracks the communities fit for merging; contains cluster labels as dictionary keys, and corresponding binary values (0 or 1) as values; 1 indicates the community is fit for merging, 0 indicates the community is not fit for merging. r : float Resolution of the topology: smaller 'r' favors forming larger communities, while larger 'r' favors forming smaller communities. normalize : bool Whether the normalized Laplacian matrix is used. evd_method : string Method of eigenvalue computation. It should be one of 'tracemin' (TraceMIN), 'lanczos' (Lanczos iteration) and 'lobpcg' (LOBPCG). tolerence : float Tolerance of relative residual in eigenvalue computation. seed : integer, random_state, or None Indicator of random number generation state. Returns ------- tuple Tuple of the array of community labels of the nodes (as a result of merging), updated 'split_track' and 'merge_track'. """ # Array of unique cluster labels unique_clusters = np.unique(c) # Tracks the nodes in each community dict_bool = {} for label in unique_clusters: # Track the nodes in each community dict_bool[label] = (c == label) # Determine the contribution of each community to modularity comm_metric = np.array([ modularity_r(adj, c, [cluster_num], r, dict_bool) for cluster_num in unique_clusters ]) # Create a copy of cluster labels c_new = c.copy() # Create a copy of 'split_track' and 'merge_track' split_change = split_track.copy() merge_change = merge_track.copy() # Split each community further if it improves modularity for cluster_num in unique_clusters: bool_r = dict_bool[cluster_num] # Sparse adjacency matrix corresponding to 'cluster_num' sub_adj = adj[bool_r].T[bool_r] # Subgraph constructed from sparse adjacency matrix of 'cluster_num' g = nx.from_scipy_sparse_matrix(sub_adj) # Number of nodes in 'g' len_g = len(g) # Don't consider further splitting singleton communities or a community # which has disconnected modules or # a community which is not fit for splitting if ((len_g == 1) | (not (nx.is_connected(g))) | (split_change[cluster_num] != 1)): if (not (nx.is_connected(g))): print("Warning: Check your data as an earliar iteration \ resulted in a cluster with \ internal disconnected components") continue else: # Create an array of community labels for nodes in 'cluster_num' c_sub = np.zeros(len_g, dtype=int) # indices of the nodes in 'sub_adj' sub_index = np.arange(len_g) # Determine the fiedler_vector of subgraph 'g' f_vector = fiedler_vector(g, weight='weight', normalized=normalize, tol=tolerence, method=evd_method, seed=seed) # Rearrange the nodes of 'sub_adj' in the descreasing order of # elements of fieldler vector nodeIds = [ i for f_vector, i in sorted(zip(f_vector, sub_index), reverse=True) ] # Initialize the communities corresponding to bipartitioning of # 'cluster_num' first_community = [] second_community = [] second_community.extend(nodeIds) # Modularity metric value for 'cluster_num' curr_metric = comm_metric[unique_clusters == cluster_num][0] # Records the splitting information split_info = {} # Create a copy of the latest cluster labels c_latest = c_new.copy() # Possible splits of 'cluster_num' based on the fielder vector for j in range(len(nodeIds) - 1): # Split the 'cluster_num' into two clusters first_community.append(nodeIds[j]) second_community.remove(nodeIds[j]) # Graph induced by nodes in 'first_community' g1 = g.subgraph(first_community) # Graph induced by nodes in 'second_community' g2 = g.subgraph(second_community) # Check if 'g1' and 'g2' are connected graphs each if (nx.is_connected(g1) & nx.is_connected(g2)): # Relabel the cluster labels of nodes in 'cluster_num' c_sub[first_community] = cluster_num new_label = max(c_new) + 1 c_sub[second_community] = new_label # Update the cluster labels in 'c_latest' c_latest[bool_r] = c_sub # Tracks the nodes in each of the split communities # of 'cluster_num' dict_bool_copy = dict() dict_bool_copy[cluster_num] = (c_latest == cluster_num) dict_bool_copy[new_label] = (c_latest == new_label) # Calculate the difference in modularity for # splitting 'cluster_num' div_metric = ( modularity_r(adj, c_latest, np.unique(c_sub[0:]), r, dict_bool_copy) - curr_metric) # Record the split only if it improves the modularity if div_metric > 0: split_info[div_metric] = j # Delete to save memory del dict_bool_copy # Delete to save memory del c_latest # Check if atleast one instance of splitting 'cluster_num' exists # that improves modularity if len(split_info) > 0: # Split 'cluster_num' based on the division that # best improves modularity best_split = split_info[max(split_info.keys())] c_sub[nodeIds[0:best_split + 1]] = cluster_num new_label = max(c_new) + 1 c_sub[nodeIds[best_split + 1:]] = new_label # Update 'c_new' with new community labels as a result of # splitting 'cluster_num' c_new[bool_r] = c_sub # Update the dictionary key-value pair, as the # community 'cluster_num' split into two communities split_change[cluster_num] = 1 split_change[new_label] = 1 merge_change[cluster_num] = 1 merge_change[new_label] = 1 else: # Set the dictionary value to 0 for the # key 'cluster_num' that did not split split_change[cluster_num] = 0 # Resultant integer array of community labels of the # nodes (as a result of splitting), updated 'split_change' # and updated 'merge_change' return (c_new, split_change, merge_change)
def _calculate_dep(self, include: set): # Working on every connected component by itself self._features = dict(zip(self._gnx, alg_connectivity.fiedler_vector(self._gnx)))
""" sbm graph classification """ from Esme.dgms.fil import nodefeat from Esme.graph.function import fil_strategy from Esme.graph.generativemodel import sbms from networkx.linalg.algebraicconnectivity import fiedler_vector if __name__ == '__main__': n = 1 p, q = 0.5, 0.1 gs = sbms(n=n, n1=100, n2=50, p=p, q=q) for i in range(len(gs)): g = gs[i] # lapfeat = nodefeat(g, 'fiedler', norm=True) nodefeat = fiedler_vector(g, normalized=False) # np.ndarray nodefeat = nodefeat.reshape(len(g), 1) gs[i] = fil_strategy(g, nodefeat, method='node', viz_flag=False) print('Finish computing lapfeat')
def function_basis(g, allowed, norm_flag='no', recomputation_flag=False, transformation_flag=True): """ :param g: nx graph :param allowed: filtration type, allowed = ['ricci', 'deg', 'hop', 'cc', 'fiedler'] :param norm_flag: normalization flag :param recomputation_flag: :param transformation_flag: if apply linear/nonlinear transformation of filtration function :return: g with ricci, deg, hop, cc, fiedler computed """ # to save recomputation. Look at the existing feature at first and then simply compute the new one. assert nx.is_connected(g) if len(g) < 3: return existing_features = [g.node[list(g.nodes())[0]].keys()] if not recomputation_flag: allowed = [ feature for feature in allowed if feature not in existing_features ] elif recomputation_flag: allowed = allowed def norm(g_, key, flag=norm_flag): if flag == 'no': return 1 elif flag == 'yes': return np.max(np.abs(nx.get_node_attributes(g_, key).values())) + 1e-6 else: raise ('Error') # ricci g_ricci = g if 'ricciCurvature' in allowed: try: g_ricci = ricciCurvature(g, alpha=0.5, weight='weight') assert g_ricci.node.keys() == list(g.nodes()) ricci_norm = norm(g, 'ricciCurvature', norm_flag) for n_ in g_ricci.nodes(): g_ricci.node[n_]['ricciCurvature'] /= ricci_norm except: print('RicciCurvature Error for graph, set 0 for all nodes') for n in g_ricci.nodes(): g_ricci.node[n]['ricciCurvature'] = 0 # degree if 'deg' in allowed: deg_dict = dict(nx.degree(g_ricci)) for n in g_ricci.nodes(): g_ricci.node[n]['deg'] = deg_dict[n] deg_norm = norm(g_ricci, 'deg', norm_flag) for n in g_ricci.nodes(): g_ricci.node[n]['deg'] /= np.float(deg_norm) # hop if 'hop' in allowed: distance = nx.floyd_warshall_numpy(g) # return a matrix distance = np.array(distance) distance = distance.astype(int) if norm_flag == 'no': hop_norm = 1 elif norm_flag == 'yes': hop_norm = np.max(distance) else: raise Exception('norm flag has to be yes or no') for n in g_ricci.nodes(): # if g_ricci has non consecutive nodes, n_idx is the index of hop distance matrix n_idx = list(g_ricci.nodes).index(n) assert n_idx <= len(g_ricci) # print(n, n_idx) g_ricci.node[n]['hop'] = distance[n_idx][:] / float(hop_norm) # closeness_centrality if 'cc' in allowed: cc = nx.closeness_centrality(g) # dict cc = {k: v / min(cc.values()) for k, v in cc.iteritems()} # no normalization for debug use cc = {k: 1.0 / v for k, v in cc.iteritems()} for n in g_ricci.nodes(): g_ricci.node[n]['cc'] = cc[n] # fiedler if 'fiedler' in allowed: fiedler = fiedler_vector(g, normalized=False) # np.ndarray assert max(fiedler) > 0 fiedler = fiedler / max(np.abs(fiedler)) assert max(np.abs(fiedler)) == 1 for n in g_ricci.nodes(): n_idx = list(g_ricci.nodes).index(n) g_ricci.node[n]['fiedler'] = fiedler[n_idx] any_node = list(g_ricci.node)[0] if 'label' not in g_ricci.node[any_node].keys(): for n in g_ricci.nodes(): g_ricci.node[n]['label'] = 0 # add dummy else: # contains label key assert 'label' in g_ricci.node[any_node].keys() for n in g_ricci.nodes(): label_norm = 40 if graph == 'dd_test': label_norm = 90 g_ricci.node[n]['label'] /= float(label_norm) if 'deg' in allowed: for n in g_ricci.nodes(): attribute_mean(g_ricci, n, key='deg', cutoff=1, iteration=0) # better normalization, used to include 1_0_deg_std/ deleted now: if norm_flag == 'yes': for attr in ['1_0_deg_sum']: norm_ = norm(g_ricci, attr, norm_flag) for n in g_ricci.nodes(): g_ricci.node[n][attr] = g_ricci.node[n][attr] / float( norm_) if 'label' in allowed: for n in g_ricci.nodes(): attribute_mean(g_ricci, n, key='label', cutoff=1, iteration=0) for n in g_ricci.nodes(): attribute_mean(g_ricci, n, key='label', cutoff=1, iteration=1) if 'cc_min' in allowed: for n in g_ricci.nodes(): attribute_mean(g_ricci, n, key='cc') if 'ricciCurvature_min' in allowed: for n in g_ricci.nodes(): attribute_mean(g_ricci, n, key='ricciCurvature') return g_ricci
"4 5 {'dist': 0.9401972}" ] g = nx.parse_edgelist(lines, nodetype=int) print(g.edges(data=True)) # v_weight = fiedler_vector(g, normalized=False, weight='dist') # np.ndarray # v_weight = list(nx.closeness_centrality(g, distance='dist').values()) g = ricciCurvature(g, alpha=0.5, weight='dist') ricci_dict = nx.get_node_attributes(g, 'ricciCurvature') v_weight = [ricci_dict[i] for i in range(len(g))] print(v_weight) sys.exit() # g = nx.circulant_graph(10, offsets=[1]*10) w_name = 'weightd' random.seed(43) g = nx.random_tree(20, seed=42) for u, v in g.edges(): g[u][v][w_name] = random.random() # print(g[u][v]) print(g.edges) v_noweight = fiedler_vector(g, normalized=False) # np.ndarray v_weight = fiedler_vector(g, normalized=False, weight=w_name) # np.ndarray v_fake_weight = fiedler_vector(g, normalized=False, weight='abcdefg') # np.ndarray print('no weight', v_noweight) print('weight', v_weight) print('fake weight', v_fake_weight)