def eig_calc_OLD(G,normalize=None): import scipy as sp #print '\n\t~~~~~~ eig_calc ~~~~~~'; startTime = datetime.now(); sys.stdout.flush() startTime = datetime.now() n = G.number_of_nodes() if n == 1: eigvec = np.array([1]) elif n == 2: # for handling ValueError: k must be less than ndim(A)-1, k=1 return power_iteration(G,normalize=normalize) else: # the graph contains more than 2 nodes A=nx.to_scipy_sparse_matrix(G) '''print '--- eig_calc: is sub graph stochastic? ' + str(gm.check_if_stochastic_matrix(nx.to_numpy_matrix(G)))#; sys.stdout.flush() print '--- eig_calc: is sub graph strongly connected? ' + str(nx.is_strongly_connected(G))#; sys.stdout.flush() print '--- eig_calc: is sub graph aperiodic? ' + str(nx.is_aperiodic(G));# sys.stdout.flush() print '--- eig_calc: debug step took: '+str(datetime.now()-tmpTime); tmpTime = datetime.now(); sys.stdout.flush() ''' try: eigval,eigvec = sp.sparse.linalg.eigen.arpack.eigs(A.T, k=1, sigma=1, which='LM') except RuntimeError: B=nx.to_scipy_sparse_matrix(add_noise(G)) eigval,eigvec = sp.sparse.linalg.eigen.arpack.eigs(B.T, k=1, sigma=1, which='LM') #eigval,eigvec = sp.sparse.linalg.eigen.arpack.eigs(A.T, k=1, which='LM') #print '--- eig_calc: eigs took: '+str(datetime.now()-tmpTime); sys.stdout.flush() #print '--- eig_calc: sub graph eigval- '+str(eigval) eigvec = eigvec/eigvec.sum() if normalize: norm_factor = float(n)/normalize eigvec = eigvec*norm_factor #if (eigvec.imag.sum() != 0. ): # print '##### COMPLEX VECTOR!!!! returning the real part only!!! #####'; #sys.stdout.flush( results_dict = dict(zip(G.nodes(),map(float,eigvec.real))) if n > 100: print '--- eig_calc: calc of class contains '+str(n)+' nodes, ('+str(float(n)/normalize)+'% of the non-isolates nodes from the graph) took-'+str(datetime.now()-startTime); sys.stdout.flush() return results_dict
def gRa(g, w): '''w为图中的边数,表示经过减边p扰动后仍然留在数据中的边数''' tg = g.copy() Rq = nx.to_scipy_sparse_matrix(g) Rq = Rq.toarray() bw = nx.edge_betweenness_centrality(g, normalized=False) norm = sum(bw.values()) e_num = len(g.edges()) n = len(g) N = (n * (n - 1)) / 2 for k, v in bw.items(): g.add_edge(*k, weight=v) # print g.edges(data=True) R = nx.to_scipy_sparse_matrix(g, weight='weight') Rp = R.toarray() Rp = w * Rp * 2.0 / Rp.sum() q = float(e_num - w) / (N - e_num) for i, each in enumerate(Rq): for j, e in enumerate(each): if e == 0: Rp[i, j] = q # 超级绕采用特别方式在Rp中加入Rq for i in range(n): Rp[i,i]=0 #去除对角线上的q return Rp
def test_weight_keyword(self): WP4 = nx.Graph() WP4.add_edges_from((n, n + 1, dict(weight=0.5, other=0.3)) for n in range(3)) P4 = path_graph(4) A = nx.to_scipy_sparse_matrix(P4) np_assert_equal(A.todense(), nx.to_scipy_sparse_matrix(WP4, weight=None).todense()) np_assert_equal(0.5 * A.todense(), nx.to_scipy_sparse_matrix(WP4).todense()) np_assert_equal(0.3 * A.todense(), nx.to_scipy_sparse_matrix(WP4, weight="other").todense())
def dists(G, nbunch = None): G = G.copy() if nbunch is None: nbunch = G.nodes() try: out_degree = G.out_degree(nbunch = nbunch) in_degree = G.in_degree(nbunch = nbunch) gross_out_weight = G.out_degree(weighted = True, nbunch = nbunch) gross_in_weight = G.in_degree(weighted = True, nbunch = nbunch) except TypeError: out_degree = G.out_degree(nbunch = nbunch) in_degree = G.in_degree(nbunch = nbunch) gross_out_weight = G.out_degree(weight = 'weight', nbunch = nbunch) gross_in_weight = G.in_degree(weight = 'weight', nbunch = nbunch) A = nx.to_scipy_sparse_matrix(G, nodelist = nbunch) i, j, grosscells = extract.find(A) selfloops = G.selfloop_edges(data = True) G.remove_edges_from(selfloops) try: net_out_weight = G.out_degree(weighted = True, nbunch = nbunch) net_in_weight = G.in_degree(weighted = True, nbunch = nbunch) except TypeError: net_out_weight = G.out_degree(weight = 'weight', nbunch = nbunch) net_in_weight = G.in_degree(weight = 'weight', nbunch = nbunch) A = nx.to_scipy_sparse_matrix(G, nodelist = nbunch) i, j, netcells = extract.find(A) dists = { 'out-degree': np.array([out_degree[i] for i in nbunch],dtype = np.float32), 'in-degree': np.array([in_degree[i] for i in nbunch],dtype = np.float32), 'gross_out-weight': np.array([gross_out_weight[i] for i in nbunch],dtype = np.float32), 'gross_in-weight': np.array([gross_in_weight[i] for i in nbunch],dtype = np.float32), 'net_out-weight': np.array([net_out_weight[i] for i in nbunch],dtype = np.float32), 'net_in-weight': np.array([net_in_weight[i] for i in nbunch],dtype = np.float32), 'gross_cells': grosscells, 'net_cells': netcells } return dists
def to_sparse( G ): """ DiGraph to scipy sparse matrix. """ try: return nx.to_scipy_sparse_matrix( G.graph, dtype=int, format='csr' ) # in case one sends in G.graph instead. except AttributeError: return nx.to_scipy_sparse_matrix( G, dtype=int, format='csr' )
def eig_calc(G,normalize=None,nstart_norm=None): ''' Calculates the dominant eigenvector of graph (the one related to eigenvector = 1). Parameters ---------- G - networkx directed graph, the strongly connected component (subGraph) in our case normalized - int, the number of nodes in the original (entire) graph- for normlizing the resulted eigenvector as per the proportion of the component from the entire (original) graph nstart_norm - float, the weight [0,1] for normalizing the resulted eigenvector (for referring the risk proportion of the component from the entire (original) graph). *NOTE: normalize and nstart_norm cannot come together!! only one of them can be different from None! Returns ------- results_dict - a dict of the (normalized) dominant eigenvector (the keys are G nodes names- basically integer) ''' import scipy as sp #print '\n\t~~~~~~ eig_calc ~~~~~~'; startTime = datetime.now(); sys.stdout.flush() startTime = datetime.now() n = G.number_of_nodes() if n == 1: eigvec = np.array([1]) elif n == 2: # for handling ValueError: k must be less than ndim(A)-1, k=1 return power_iteration(G,normalize=normalize,nstart_norm=nstart_norm) else: # the graph contains more than 2 nodes A=nx.to_scipy_sparse_matrix(G) '''print '--- eig_calc: is sub graph stochastic? ' + str(gm.check_if_stochastic_matrix(nx.to_numpy_matrix(G)))#; sys.stdout.flush() print '--- eig_calc: is sub graph strongly connected? ' + str(nx.is_strongly_connected(G))#; sys.stdout.flush() print '--- eig_calc: is sub graph aperiodic? ' + str(nx.is_aperiodic(G));# sys.stdout.flush() print '--- eig_calc: debug step took: '+str(datetime.now()-tmpTime); tmpTime = datetime.now(); sys.stdout.flush() ''' try: eigval,eigvec = sp.sparse.linalg.eigen.arpack.eigs(A.T, k=1, sigma=1, which='LM') except RuntimeError: B=nx.to_scipy_sparse_matrix(add_noise(G)) eigval,eigvec = sp.sparse.linalg.eigen.arpack.eigs(B.T, k=1, sigma=1, which='LM') #eigval,eigvec = sp.sparse.linalg.eigen.arpack.eigs(A.T, k=1, which='LM') #print '--- eig_calc: eigs took: '+str(datetime.now()-tmpTime); sys.stdout.flush() #print '--- eig_calc: sub graph eigval- '+str(eigval) eigvec = eigvec/eigvec.sum() if normalize: norm_factor = float(n)/normalize eigvec = eigvec*norm_factor if n > 100: print '--- eig_calc: calc of class contains ',n,' nodes, (',float(n)/normalize,'% of the non-isolates nodes from the graph) took-',datetime.now()-startTime; sys.stdout.flush() elif nstart_norm != None: eigvec = eigvec*nstart_norm if n > 100: print '--- eig_calc: calc of class contains ',n,' nodes, took-',datetime.now()-startTime; sys.stdout.flush() #if (eigvec.imag.sum() != 0. ): # print '##### COMPLEX VECTOR!!!! returning the real part only!!! #####'; #sys.stdout.flush( results_dict = dict(zip(G.nodes(),map(float,eigvec.real))) return results_dict
def unroll_adjacency_matrix(G): '''"Unrolls" the adjacency matrix of the input graph into a vector. This is done by extracting all off-diagonal elements of the nxn adjacency matrix and concatenating them into an n(n - 1)/2 dimensional array. Example: [[0, 1, 0], [1, 0, 1], [0, 1, 0]] gives [1, 0, 1].''' # Number of nodes in the graph n = len(G) # Length of the unrolled matrix dim = n*(n - 1)//2 # Sparse matrix to hold the results result = sp.sparse.lil_matrix((1, dim)) # Adjacency matrix for the graph M = nx.to_scipy_sparse_matrix(G, format = "coo") for i,j,v in zip(M.row, M.col, M.data): # Only care about northeastern corner of the matrix if not j > i: continue ind = i*n - (i*(i+1))//2 + j - i - 1 # Nothing to see here, move along. # Add the encountered element at the appropriate index of result result[0, ind] = v return result
def r_perturbR(g,R): '''可变参数的随机扰动方法''' A=nx.to_scipy_sparse_matrix(g) B=sparse.triu(A).toarray() #print B n=len(g) i = 0 ts=0 while i<n: j=i+1 while j<n: if(B[i,j]==1): if R[i,j]<1: B[i,j] = stats.bernoulli.rvs(R[i,j])#参数p伯努利实验成功的概率 else: B[i, j] = stats.bernoulli.rvs(1) #其实可以去掉 ts=ts + 1 #print "+",ts, ":", i, ",", j, ",", B[i, j] else: if R[i,j]<1: B[i,j] = stats.bernoulli.rvs(R[i,j])#参数q伯努利实验成功的概率 else: B[i, j] = stats.bernoulli.rvs(0) #其实可以去掉 ts=ts + 1 #print "-",ts, ":", i, ",", j, ",", B[i, j] j = j + 1 i=i+1 return nx.from_numpy_matrix(B,create_using=nx.Graph())#重新构建了Graph类型的返回对象
def page_rank_scipy(G,alpha=0.85,max_iter=100,tol=1.0e-4,nodelist=None): """Return a numpy array of the PageRank of G. PageRank computes the largest eigenvector of the stochastic adjacency matrix of G. The eigenvector calculation is done by the power iteration method and has no guarantee of convergence. A starting vector for the power iteration can be given in the dictionary nstart. This implementation requires scipy. """ import scipy.sparse M=NX.to_scipy_sparse_matrix(G,nodelist=nodelist) (n,m)=M.shape # should be square S=scipy.array(M.sum(axis=1)).flatten() index=scipy.where(S<>0)[0] for i in index: M[i,:]*=1.0/S[i] x=scipy.ones((n))/n # initial guess dangle=scipy.array(scipy.where(M.sum(axis=1)==0,1.0/n,0)).flatten() for i in range(max_iter): xlast=x x=alpha*(M.rmatvec(x)+scipy.dot(dangle,xlast))+(1-alpha)*xlast.sum()/n # check convergence, l1 norm err=scipy.absolute(x-xlast).sum() if err < n*tol: return x raise NetworkXError("page_rank: power iteration failed to converge in %d iterations."%(i+1))
def modularity_matrix(G, nodelist=None, weight=None): """Return the modularity matrix of G. The modularity matrix is the matrix B = A - <A>, where A is the adjacency matrix and <A> is the average adjacency matrix, assuming that the graph is described by the configuration model. More specifically, the element B_ij of B is defined as A_ij - k_i k_j / 2 * m where k_i(in) is the degree of node i, and were m is the number of edges in the graph. When weight is set to a name of an attribute edge, Aij, k_i, k_j and m are computed using its value. Parameters ---------- G : Graph A NetworkX graph nodelist : list, optional The rows and columns are ordered according to the nodes in nodelist. If nodelist is None, then the ordering is produced by G.nodes(). weight : string or None, optional (default=None) The edge attribute that holds the numerical value used for the edge weight. If None then all edge weights are 1. Returns ------- B : Numpy matrix The modularity matrix of G. Examples -------- >>> import networkx as nx >>> k =[3, 2, 2, 1, 0] >>> G = nx.havel_hakimi_graph(k) >>> B = nx.modularity_matrix(G) See Also -------- to_numpy_matrix adjacency_matrix laplacian_matrix directed_modularity_matrix References ---------- .. [1] M. E. J. Newman, "Modularity and community structure in networks", Proc. Natl. Acad. Sci. USA, vol. 103, pp. 8577-8582, 2006. """ if nodelist is None: nodelist = list(G) A = nx.to_scipy_sparse_matrix(G, nodelist=nodelist, weight=weight, format='csr') k = A.sum(axis=1) m = k.sum() * 0.5 # Expected adjacency matrix X = k * k.transpose() / (2 * m) return A - X
def compute_slice_matrices(self,G): #Create node and edge layers node_layer = defaultdict(list) for n in G.nodes(): node_layer[n[0]].append(n) edge_layer = defaultdict(list) for e in G.edges(data=True): edge_layer[e[2]['etype']].append(e) ALLNTYPES = [ntype for ntype in node_layer] ALLETYPES = [etype for etype in edge_layer] #### Transform everything into linear algebra... self.OrderedNodes=[] for ntype in ALLNTYPES: self.OrderedNodes = self.OrderedNodes + node_layer[ntype] self.NodeIndex = {} for idx,n in enumerate(self.OrderedNodes): self.NodeIndex[n]=idx #Construct Adjacency Matrices for various slices (single edge type) self.AdjMat = {} self.Degs = {} # Degre #Invdegs = {} for etype in ALLETYPES: print '--computing slice for edge type "'+etype+'"' H = graph_slice(G,etypes=etype) self.AdjMat[etype] = nx.to_scipy_sparse_matrix(H,self.OrderedNodes,format='csr') self.Degs[etype] = np.array([[max(1,float(H.degree(n)))] for n in self.OrderedNodes])
def r_perturbSa(g,p=None): '''固定参数的随机扰动方法,p伯努利实验成功的概率''' A=nx.to_scipy_sparse_matrix(g) B=sparse.triu(A).toarray() #print B n=len(g) e_num=len(g.edges())#图中存在的边数 q = e_num * (1 - p) / ((n * (n - 1)) / 2 - e_num) #print q i = 0 ts=0 listp=stats.bernoulli.rvs(p,size=e_num) listp=listp.tolist() listq=stats.bernoulli.rvs(q,size=(n * (n - 1)) / 2 - e_num) listq=listq.tolist() while i<n: j=i+1#略过对角线上的0 while j<n: if(B[i,j]==1): B[i,j] = listp.pop()#参数p伯努利实验成功的概率 #ts=ts + 1 # print "+",ts, ":", i, ",", j, ",", B[i, j] else: B[i,j] = listq.pop()#参数q伯努利实验成功的概率 #ts=ts + 1 # print "-",ts, ":", i, ",", j, ",", B[i, j] j = j + 1 i=i+1 return nx.from_numpy_matrix(B,create_using=nx.Graph())#重新构建了Graph类型的返回对象
def test_ordering(self): G = nx.DiGraph() G.add_edge(1,2) G.add_edge(2,3) G.add_edge(3,1) M = nx.to_scipy_sparse_matrix(G,nodelist=[3,2,1]) np_assert_equal(M.todense(), np.matrix([[0,0,1],[1,0,0],[0,1,0]]))
def classify_samples(data, labels, unmarked_idxs, sample_size, n_runs, n_clusters): unmarked_point_probs = {} all_idxs = range(len(unmarked_idxs)) random.shuffle(all_idxs) keep_raw_idxs = sorted(all_idxs[:sample_size]) delete_raw_idxs = sorted(all_idxs[sample_size:]) keep_idxs, delete_idxs = (unmarked_idxs[keep_raw_idxs], unmarked_idxs[delete_raw_idxs]) bagging_graph = nx.from_scipy_sparse_matrix(data) bagging_graph.remove_nodes_from(delete_idxs) bagging_adj_matrix = nx.to_scipy_sparse_matrix(bagging_graph) bagging_labels = np.delete(labels, delete_idxs, 0) bagging_unmarked_idxs = np.where( bagging_labels[:, 0] == -1)[0] clf = TransductiveClassifier(n_runs, n_clusters) clf.fit(bagging_adj_matrix, bagging_labels) assert len(keep_idxs) == len(bagging_unmarked_idxs) for i, idx in enumerate(keep_idxs): unmarked_point_probs[idx] = clf.transduction_[ bagging_unmarked_idxs[i]] return unmarked_point_probs
def mypr(G, alpha=0.85, personalization=None, max_iter=100, tol=1.0e-6, weight='weight'): nodelist=G.nodes() M=nx.to_scipy_sparse_matrix(G,nodelist=nodelist,weight=weight) (n,m)=M.shape # should be square S = scipy.array(M.sum(axis=1)) S[S>0] = 1.0 / S[S>0] Sm = scipy.sparse.lil_matrix((n,n)) Sm.setdiag(S.flat) Sm = Sm.tocsr() M = Sm * M ## Q = scipy.sparse.spdiags(S.T, 0, *M.shape, format='csr') ## M = Q * M x=scipy.ones((n))/n # initial guess dangle=scipy.array(scipy.where(M.sum(axis=1)==0,1.0/n,0)).flatten() # add "teleportation"/personalization v=x i=0 while i <= max_iter: # power iteration: make up to max_iter iterations xlast=x x=alpha*(x*M+scipy.dot(dangle,xlast))+(1-alpha)*v x=x/x.sum() # check convergence, l1 norm err=scipy.absolute(x-xlast).sum() if err < n*tol: r = dict(zip(nodelist,x)) return r i+=1 print 'Failed to converge'
def _backward(T, edge_to_P, root, root_prior_distn1d, node_to_data_lmap): """ This is the first pass of a forward-backward algorithm. Parameters ---------- {params} """ # Define a toposort node ordering and a corresponding csr matrix. nodes = nx.topological_sort(T, [root]) node_to_idx = dict((na, i) for i, na in enumerate(nodes)) m = nx.to_scipy_sparse_matrix(T, nodes) # Stack the transition matrices into a single array. nnodes = len(nodes) nstates = root_prior_distn1d.shape[0] trans = np.empty((nnodes-1, nstates, nstates), dtype=float) for (na, nb), P in edge_to_P.items(): edge_idx = node_to_idx[nb] - 1 trans[edge_idx, :, :] = P # Stack the data into a single array. data = np.empty((nnodes, nstates), dtype=float) for i, na in enumerate(nodes): data[i, :] = node_to_data_lmap[na] # Compute the partial likelihoods. lhood = np.empty((nnodes, nstates), dtype=float) validation = 0 _wrapped_first_pass(m.indices, m.indptr, trans, data, lhood, validation) lhood[0, :] *= root_prior_distn1d # Convert the output into a dictionary. return dict((na, lhood[i, :]) for i, na in enumerate(nodes))
def compute_pagerank(network : nx.DiGraph, damping : float=0.85): Adj = nx.to_scipy_sparse_matrix(network, dtype='float', format='csr') deg = np.ravel(Adj.sum(axis=1)) Dinv = sparse.diags(1 / deg) Trans = (Dinv @ Adj).T pr = pagerank_power(Trans, damping=damping) return pr
def configuration_model(self, return_copy=False): """ Reads AdjMatrixSequence Object and returns an edge randomized version. Result is written to txt file. """ if self.is_directed: nx_creator = nx.DiGraph() else: nx_creator = nx.Graph() if return_copy: x = self[:] else: x = self # t_edges=[] for i in range(len(self)): print "configuration model: ", i graphlet = nx.from_scipy_sparse_matrix(x[i], create_using=nx_creator) graphlet = gwh.randomize_network(graphlet) x[i] = nx.to_scipy_sparse_matrix(graphlet, dtype="int") # for u,v in graphlet.edges(): # t_edges.append((u,v,i)) # gwh.write_array(t_edges,"Configuration_model.txt") if return_copy: return x else: return
def main111(): if 1: G = nx.read_edgelist(infname) print nx.info(G) # Graph adj matix A = nx.to_scipy_sparse_matrix(G) print type(A) from scipy import sparse, io io.mmwrite("Results/test.mtx", A) exit() # write to disk clustering coeffs for this graph snm.get_clust_coeff([G], 'orig', 'mmonth') # write to disk egienvalue snm.network_value_distribution([G], [], 'origMmonth') if 0: edgelist = np.loadtxt(infname, dtype=str, delimiter='\t') print edgelist[:4] idx = np.arange(len(edgelist)) np.random.shuffle(idx) subsamp_edgelist = edgelist[idx[:100]] G = nx.Graph() G.add_edges_from([(long(x), long(y)) for x, y in subsamp_edgelist]) # visualize this graph # visualize_graph(G) exit() G = nx.Graph() G.add_edges_from([(long(x), long(y)) for x, y in edgelist]) print nx.info(G) print 'Done'
def list_directed_cc (H): adj_matrix = nx.to_scipy_sparse_matrix(H) # Return the graph adjacency matrix as a SciPy sparse matrix list_cc = sp.sparse.csgraph.connected_components(adj_matrix, directed=True, connection='weak', return_labels=True) print(" All cc: ", list_cc) return list_cc
def get_matrix_norm_by_col(G,sparse=False): """Return the transition matrix normalized by its columns- so each column sums to 1.""" # L is the transition matrix if(sparse==True): L=nx.to_scipy_sparse_matrix(G,nodelist=G.nodes()) else: L=nx.to_numpy_matrix(G,nodelist=G.nodes()) return (normalize(L.copy().T, axis=1, norm='l1')).T
def get_graph(filepath): """ Load the matrix saved at filepath. Args: filepath: path to file holding a sparse matrix Returns: A: SciPy CSR matrix """ filename, ending = os.path.splitext(filepath) if ending == '.mat': from scipy import io A = sparse.csr_matrix(io.loadmat(filepath)['mat'], dtype=float) elif ending == '.csv': A = sparse.csr_matrix(np.genfromtxt(filepath, delimiter=','), dtype=float) elif ending == '.gml': import networkx as nx A = nx.to_scipy_sparse_matrix(nx.read_gml(filepath), dtype=float) elif ending == '.dat': adjlist = np.genfromtxt(filepath) if adjlist.shape[1] == 2: data = np.ones(adjlist.shape[0]) if np.min(adjlist) == 1: adjlist -= 1 # 0 indexing else: data = adjlist[:, 2] if np.min(adjlist[:, :-1]) == 1: adjlist[:, :-1] -= 1 # 0 indexing A = sparse.coo_matrix((data, (np.array(adjlist[:,0], dtype=int), np.array(adjlist[:,1], dtype=int))), dtype=float).tocsr() elif ending == '.gz' or ending == '.txt': filename = os.path.splitext(filename)[0] import networkx as nx A = nx.to_scipy_sparse_matrix( nx.read_weighted_edgelist(filepath, delimiter =' '), dtype=float) else: raise IOError("Could not parse file") return A
def get_overlay_topology(N, f_name=None): g = nx.scale_free_graph(N) g = g.to_undirected() cg_list = nx.connected_component_subgraphs(g) if f_name: save_graph(cg_list[0], f_name) # zdump(np.asarray(nx.to_numpy_matrix(cg_list[0])), 'overlay_adj.pkz') return nx.to_scipy_sparse_matrix(cg_list[0])
def test_partition_graph(rand_lowrank_g, spark_context): print(nx.to_scipy_sparse_matrix(rand_lowrank_g, weight='sign').todense()) labels = partition_graph(rand_lowrank_g, k=rank, sc=spark_context, iterations=20, lambda_=0.1, seed=random_seed) assert adjusted_rand_score(labels, true_labels) == 1.0
def predict(self, beta=0.001, max_power=5, weight='weight', dtype=None): """Predict by Katz (1953) measure Let `A` be an adjacency matrix for the directed network `G`. Then, each element `a_{ij}` of `A^k` (the `k`-th power of `A`) has a value equal to the number of walks with length `k` from `i` to `j`. The probability of a link rapidly decreases as the walks grow longer. Katz therefore introduces an extra parameter (here beta) to weigh longer walks less. Parameters ---------- beta : a float the value of beta in the formula of the Katz equation max_power : an int the maximum number of powers to take into account weight : string or None The edge attribute that holds the numerical value used for the edge weight. If None then treat as unweighted. dtype : a data type data type of edge weights (default numpy.int32) """ from itertools import izip if dtype is None: import numpy dtype = numpy.int32 nodelist = self.G.nodes() adj = nx.to_scipy_sparse_matrix( self.G, dtype=dtype, weight=weight) res = Scoresheet() for k in progressbar(range(1, max_power + 1), "Computing matrix powers: "): # The below method is found to be fastest for iterating through a # sparse matrix, see # http://stackoverflow.com/questions/4319014/iterating-through-a-scipy-sparse-vector-or-matrix matrix = (adj ** k).tocoo() for i, j, d in izip(matrix.row, matrix.col, matrix.data): if i == j: continue u, v = nodelist[i], nodelist[j] if self.eligible(u, v): w = d * (beta ** k) res[(u, v)] += w # We count double in case of undirected networks ((i, j) and (j, i)) if not self.G.is_directed(): for pair in res: res[pair] /= 2 return res
def get_iid_lhoods(T, edge_to_P, root, root_prior_distn1d, node_to_data_lmaps): """ Get the likelihood of this combination of parameters. Parameters ---------- T : directed networkx tree graph Edge and node annotations are ignored. edge_to_P : dict of 2d float ndarrays A map from directed edges of the tree graph to 2d float ndarrays representing state transition probabilities. root : hashable This is the root node. Following networkx convention, this may be anything hashable. root_prior_distn1d : 1d ndarray Prior state distribution at the root. node_to_data_lmaps : sequence of dicts of 1d float ndarrays Observed data. For each iid site, a dict mapping each node to a 1d array giving the observation likelihood for each state. This parameter is similar to the sample_histories output. Returns ------- lhoods : 1d float array Likelihood for each iid site. """ nsites = len(node_to_data_lmaps) # Define a toposort node ordering and a corresponding csr matrix. nodes = nx.topological_sort(T, [root]) node_to_idx = dict((na, i) for i, na in enumerate(nodes)) m = nx.to_scipy_sparse_matrix(T, nodes) # Stack the transition matrices into a single array. nnodes = len(nodes) nstates = root_prior_distn1d.shape[0] trans = np.empty((nnodes-1, nstates, nstates), dtype=float) for (na, nb), P in edge_to_P.items(): edge_idx = node_to_idx[nb] - 1 trans[edge_idx, :, :] = P # Stack the data into a single array. data = np.empty((nsites, nnodes, nstates), dtype=float) for i, node_to_data_lmap in enumerate(node_to_data_lmaps): for j, na in enumerate(nodes): data[i, j, :] = node_to_data_lmap[na] # Compute the likelihoods. lhoods = np.empty(nsites, dtype=float) validation = 0 iid_likelihoods(m.indices, m.indptr, trans, data, root_prior_distn1d, lhoods, validation) # Return the dense array that contains the likelihood at each iid site. return lhoods
def __test_save_and_load_graph_npz(self, x): '''Test save and load a Networkx DiGraph in npz format with np-array wrapping.''' out_file = tempfile.TemporaryFile() np.savez(out_file, x=np.array([nx.to_scipy_sparse_matrix(x)])) out_file.seek(0) # Only needed here to simulate closing & reopening file x2 = np.load(out_file) y = nx.from_scipy_sparse_matrix(x2['x'][0], nx.DiGraph()) assert_equal(x.nodes(), y.nodes(), 'Saving and loading did not restore the original object') assert_equal(x.edges(), y.edges(), 'Saving and loading did not restore the original object')
def nx_graph_nbrw(G): import networkx as nx A = nx.to_scipy_sparse_matrix(G) P = mkm.graph_nbrw_transition_matrix(A) mc = mkm.MarkovChain(P) mc.set_stationary_distribution(mkm.uniform_distribution(mc.get_n())) return mc
def sparse_laplacian(G, weight='weight'): import scipy.sparse nodelist = G.nodes() A = nx.to_scipy_sparse_matrix(G, nodelist=nodelist, weight=weight, format='csr') n,m = A.shape diags = A.sum(axis=1) D = scipy.sparse.spdiags(diags.flatten(), [0], m, n, format='csr') return D - A
def adjacency_matrix(G, nodelist=None, weight='weight'): """Returns adjacency matrix of G. Parameters ---------- G : graph A NetworkX graph nodelist : list, optional The rows and columns are ordered according to the nodes in nodelist. If nodelist is None, then the ordering is produced by G.nodes(). weight : string or None, optional (default='weight') The edge data key used to provide each value in the matrix. If None, then each edge has weight 1. Returns ------- A : SciPy sparse matrix Adjacency matrix representation of G. Notes ----- For directed graphs, entry i,j corresponds to an edge from i to j. If you want a pure Python adjacency matrix representation try networkx.convert.to_dict_of_dicts which will return a dictionary-of-dictionaries format that can be addressed as a sparse matrix. For MultiGraph/MultiDiGraph with parallel edges the weights are summed. See to_numpy_matrix for other options. The convention used for self-loop edges in graphs is to assign the diagonal matrix entry value to the edge weight attribute (or the number 1 if the edge has no weight attribute). If the alternate convention of doubling the edge weight is desired the resulting Scipy sparse matrix can be modified as follows: >>> import scipy as sp >>> G = nx.Graph([(1,1)]) >>> A = nx.adjacency_matrix(G) >>> print(A.todense()) [[1]] >>> A.setdiag(A.diagonal()*2) >>> print(A.todense()) [[2]] See Also -------- to_numpy_matrix to_scipy_sparse_matrix to_dict_of_dicts adjacency_spectrum """ return nx.to_scipy_sparse_matrix(G, nodelist=nodelist, weight=weight)
def adj_mat(graph, weight=None): return to_scipy_sparse_matrix(graph, weight=weight)
graph = nx.DiGraph() graph.add_nodes_from(nodes) graph.add_edges_from(edges) nx.draw(graph, with_labels=True) plt.show() data = { 0: np.arange(24) + 0, 1: np.arange(24) + 1, 2: np.arange(24) + 2, 3: np.arange(24) + 3, 4: np.arange(24) + 4, 5: np.arange(24) + 5 } val_data = {6: np.arange(24) + 6, 7: np.arange(24) + 7} # dense_adjacency = nx.to_pandas_adjacency(graph) sparse_adj = nx.to_scipy_sparse_matrix(graph).tocoo() sparse_adj_in_coo_format = np.stack([sparse_adj.row, sparse_adj.col]) sparse_adj_in_coo_format_tensor = torch.tensor(sparse_adj_in_coo_format, dtype=torch.long).cuda() frame_data = pd.DataFrame.from_dict(data) valframe = pd.DataFrame.from_dict(val_data) data_graphs = [] for i in range(len(frame_data) - 1): x = torch.tensor([frame_data.iloc[i]], dtype=torch.double).cuda() x = x.permute(1, 0) # nodes, features y = torch.tensor([frame_data.iloc[i + 1]], dtype=torch.double).cuda() y = y.permute(1, 0) # nodes, features data_entry = Data(x=x, y=y, edge_index=sparse_adj_in_coo_format_tensor) data_graphs.append(data_entry) loader = DataLoader(data_graphs, batch_size=1)
def directed_modularity_matrix(G, nodelist=None, weight=None): """Return the directed modularity matrix of G. The modularity matrix is the matrix B = A - <A>, where A is the adjacency matrix and <A> is the expected adjacency matrix, assuming that the graph is described by the configuration model. More specifically, the element B_ij of B is defined as B_ij = A_ij - k_i(out) k_j(in) / m where k_i(in) is the in degree of node i, and k_j(out) is the out degree of node j, with m the number of edges in the graph. When weight is set to a name of an attribute edge, Aij, k_i, k_j and m are computed using its value. Parameters ---------- G : DiGraph A NetworkX DiGraph nodelist : list, optional The rows and columns are ordered according to the nodes in nodelist. If nodelist is None, then the ordering is produced by G.nodes(). weight : string or None, optional (default=None) The edge attribute that holds the numerical value used for the edge weight. If None then all edge weights are 1. Returns ------- B : Numpy matrix The modularity matrix of G. Examples -------- >>> import networkx as nx >>> G = nx.DiGraph() >>> G.add_edges_from(((1,2), (1,3), (3,1), (3,2), (3,5), (4,5), (4,6), ... (5,4), (5,6), (6,4))) >>> B = nx.directed_modularity_matrix(G) Notes ----- NetworkX defines the element A_ij of the adjacency matrix as 1 if there is a link going from node i to node j. Leicht and Newman use the opposite definition. This explains the different expression for B_ij. See Also -------- to_numpy_matrix adjacency_matrix laplacian_matrix modularity_matrix References ---------- .. [1] E. A. Leicht, M. E. J. Newman, "Community structure in directed networks", Phys. Rev Lett., vol. 100, no. 11, p. 118703, 2008. """ if nodelist is None: nodelist = list(G) A = nx.to_scipy_sparse_matrix(G, nodelist=nodelist, weight=weight, format='csr') k_in = A.sum(axis=0) k_out = A.sum(axis=1) m = k_in.sum() # Expected adjacency matrix X = k_out * k_in / m return A - X
def transversal_network_mucha_original(dyn_graph: tn.DynGraphSN, om=0.5, form="local", elapsed_time=False, matlab_session=None): """ Multiplex community detection, Mucha et al. Algorithm described in : `Mucha, P. J., Richardson, T., Macon, K., Porter, M. A., & Onnela, J. P. (2010). Community structure in time-dependent, multiscale, and multiplex networks. science, 328(5980), 876-878.` Brief summary: a single network is created by adding nodes between themselves in different snaphsots. A modified modularity optimization algorithm is run on this network For this function, it is necessary to have Matlab installed And to set up the matlab for python engine, see how to there https://fr.mathworks.com/help/matlab/matlab_external/install-the-matlab-engine-for-python.html (you can find the value of matlabroot by tapping matlabroot in your matlab console) :param dyn_graph: dynamic network :param om: :param form: :param elapsed_time: :param matlab_session: :return: """ print("preprocessing MUCHA ") #Original example on genlouvain website #N = length(A{1}); #T = length(A); #B = spalloc(N * T, N * T, N * N * T + 2 * N * T); #twomu = 0; #for s=1:T # k = sum(A # {s}); # twom = sum(k); # twomu = twomu + twom; # indx = [1:N]+(s - 1) * N; # B(indx, indx) = A # {s} - gamma * k # '*k/twom; # # # end # twomu = twomu + 2 * omega * N * (T - 1); # B = B + omega * spdiags(ones(N * T, 2), [-N, N], N * T, N * T); # [S, Q] = genlouvain(B); # Q = Q / twomu # S = reshape(S, N, T); graphs = dyn_graph.snapshots() nodeOrderAllSN = [] listModularityMatrices = [] #for each graph in order for t, gT in enumerate(graphs): g = graphs[gT] nodeOrder = list(g.nodes()) if len(nodeOrder) > 0: nodeOrderAllSN += [(t, n) for n in nodeOrder] gmat = nx.to_scipy_sparse_matrix(g, nodelist=nodeOrder, format="dok") k = gmat.sum(axis=0) #degrees of nodes twom = k.sum(axis=1) #sum of degrees nullModel = k.transpose() * k / twom listModularityMatrices.append(gmat - nullModel) #Concatenate all null modularity matrices #B = scipy.sparse.block_diag(*listModularityMatrices) B = scipy.sparse.block_diag(listModularityMatrices, format="dok") listModularityMatrices = None #B = scipy.sparse.dok_matrix(B) #add the link between same nodes in different timestamps multipleAppearances = {} #for each node, list of indices where it appears ordered_real_times = dyn_graph.snapshots_timesteps() for (i, (t, n)) in enumerate(nodeOrderAllSN): multipleAppearances.setdefault(n, []).append((i, t)) if form == "global": for (n, nAppearences) in multipleAppearances.items(): for (i, t) in nAppearences: for (j, t) in nAppearences: if i != j: B[i, j] = om if form == "local": #print(multipleAppearances) for (n, orderedAppearences) in multipleAppearances.items(): #print(orderedAppearences) for i in range(0, len(orderedAppearences) - 1): #BE CAREFUL, modified recently ii, t = orderedAppearences[i] ii_next, t_next = orderedAppearences[i + 1] #index_t = ordered_real_times.index(t) if ordered_real_times[t + 1] == ordered_real_times[t_next]: B[ii, ii_next] = om if form == "local_relaxed": #print(multipleAppearances) for (n, orderedAppearences) in multipleAppearances.items(): for i in range(0, len(orderedAppearences) - 1): ii, t = orderedAppearences[i] ii_next, t_next = orderedAppearences[i + 1] B[ii, ii_next] = om #print("saving temp file") #numpy.savetxt("test.csv", B, fmt="%.2f", delimiter=",") #print("file saved") #B = scipy.sparse.coo_matrix(B) print("calling external code") (S, duration) = _runMatlabCode(B, matlab_session=matlab_session) #print("transforming back to dynamic net") DCSN = tn.DynCommunitiesSN() times = dyn_graph.snapshots_timesteps() for i in range(len(S)): DCSN.add_affiliation(nodeOrderAllSN[i][1], S[i], times[nodeOrderAllSN[i][0]]) print("sucessfully finished MUCHA ") if elapsed_time: return (DCSN, {"total": duration}) return DCSN
def pagerank_scipy(G, alpha=0.85, personalization=None, max_iter=100, tol=1.0e-6, nstart=None, weight='weight', dangling=None): """Returns the PageRank of the nodes in the graph. PageRank computes a ranking of the nodes in the graph G based on the structure of the incoming links. It was originally designed as an algorithm to rank web pages. Parameters ---------- G : graph A NetworkX graph. Undirected graphs will be converted to a directed graph with two directed edges for each undirected edge. alpha : float, optional Damping parameter for PageRank, default=0.85. personalization: dict, optional The "personalization vector" consisting of a dictionary with a key some subset of graph nodes and personalization value each of those. At least one personalization value must be non-zero. If not specfiied, a nodes personalization value will be zero. By default, a uniform distribution is used. max_iter : integer, optional Maximum number of iterations in power method eigenvalue solver. tol : float, optional Error tolerance used to check convergence in power method solver. nstart : dictionary, optional Starting value of PageRank iteration for each node. weight : key, optional Edge data key to use as weight. If None weights are set to 1. dangling: dict, optional The outedges to be assigned to any "dangling" nodes, i.e., nodes without any outedges. The dict key is the node the outedge points to and the dict value is the weight of that outedge. By default, dangling nodes are given outedges according to the personalization vector (uniform if not specified) This must be selected to result in an irreducible transition matrix (see notes under google_matrix). It may be common to have the dangling dict to be the same as the personalization dict. Returns ------- pagerank : dictionary Dictionary of nodes with PageRank as value Examples -------- >>> G = nx.DiGraph(nx.path_graph(4)) >>> pr = nx.pagerank_scipy(G, alpha=0.9) Notes ----- The eigenvector calculation uses power iteration with a SciPy sparse matrix representation. This implementation works with Multi(Di)Graphs. For multigraphs the weight between two nodes is set to be the sum of all edge weights between those nodes. See Also -------- pagerank, pagerank_numpy, google_matrix Raises ------ PowerIterationFailedConvergence If the algorithm fails to converge to the specified tolerance within the specified number of iterations of the power iteration method. References ---------- .. [1] A. Langville and C. Meyer, "A survey of eigenvector methods of web information retrieval." http://citeseer.ist.psu.edu/713792.html .. [2] Page, Lawrence; Brin, Sergey; Motwani, Rajeev and Winograd, Terry, The PageRank citation ranking: Bringing order to the Web. 1999 http://dbpubs.stanford.edu:8090/pub/showDoc.Fulltext?lang=en&doc=1999-66&format=pdf """ import scipy.sparse N = len(G) if N == 0: return {} nodelist = list(G) M = nx.to_scipy_sparse_matrix(G, nodelist=nodelist, weight=weight, dtype=float) S = scipy.array(M.sum(axis=1)).flatten() S[S != 0] = 1.0 / S[S != 0] Q = scipy.sparse.spdiags(S.T, 0, *M.shape, format='csr') M = Q * M # initial vector if nstart is None: x = scipy.repeat(1.0 / N, N) else: x = scipy.array([nstart.get(n, 0) for n in nodelist], dtype=float) x = x / x.sum() # Personalization vector if personalization is None: p = scipy.repeat(1.0 / N, N) else: p = scipy.array([personalization.get(n, 0) for n in nodelist], dtype=float) p = p / p.sum() # Dangling nodes if dangling is None: dangling_weights = p else: # Convert the dangling dictionary into an array in nodelist order dangling_weights = scipy.array([dangling.get(n, 0) for n in nodelist], dtype=float) dangling_weights /= dangling_weights.sum() is_dangling = scipy.where(S == 0)[0] # power iteration: make up to max_iter iterations for _ in range(max_iter): xlast = x x = alpha * (x * M + sum(x[is_dangling]) * dangling_weights) + \ (1 - alpha) * p # check convergence, l1 norm err = scipy.absolute(x - xlast).sum() if err < N * tol: return dict(zip(nodelist, map(float, x))) raise nx.PowerIterationFailedConvergence(max_iter)
from sklearn.metrics import roc_auc_score from sklearn.metrics import average_precision_score import pickle from node2vec import node2vec from gensim.models import Word2Vec from node2vec.preprocessing import mask_test_edges network_dir = './GraphPickle/HPO-Orphanet.pkl' with open(network_dir, 'rb') as f: adj, features = pickle.load(f) g = nx.Graph(adj) np.random.seed(0) adj_sparse = nx.to_scipy_sparse_matrix(g) # Perform train-test split adj_train, train_edges, train_edges_false, val_edges, val_edges_false, \ test_edges, test_edges_false = mask_test_edges(adj_sparse, test_frac=.3, val_frac=.1) g_train = nx.from_scipy_sparse_matrix(adj_train) P = 1 # Return hyperparameter Q = 0.05 # In-out hyperparameter WINDOW_SIZE = 10 # Context size for optimization NUM_WALKS = 10 # Number of walks per source WALK_LENGTH = 5 # Length of walk per source DIMENSIONS = 128 # Embedding dimension DIRECTED = False # Graph directed/undirected WORKERS = 8 # Num. parallel workers
def test_empty(self): G = nx.Graph() G.add_node(1) M = nx.to_scipy_sparse_matrix(G) npt.assert_equal(M.todense(), np.matrix([[0]]))
def test_identity_graph_matrix(self): "Conversion from graph to sparse matrix to graph." A = nx.to_scipy_sparse_matrix(self.G1) self.identity_conversion(self.G1, A, nx.Graph())
def gen_er(args): g = nx.fast_gnp_random_graph(args.er_n, args.er_p) csr = nx.to_scipy_sparse_matrix(g, weight=None, format='csr') graph_io.save_graph(args.out, csr)
# -*- coding: utf-8 -*- """ Created on Mon Oct 29 00:48:07 2018 @author: Sami """ import networkx as nx hep_graph = nx.read_gml('C:/Users/samiu/Desktop/8009 LAB/hep-th.gml') hep_graph.remove_nodes_from(list(nx.isolates(hep_graph))) gMatrix = nx.to_scipy_sparse_matrix(hep_graph) #graph_dict = nx.to_dict_of_lists(hep_graph) sparse_matrix = gMatrix.todense() def prim(graph, root): assert type(graph)==dict nodes = list(graph) nodes.remove(root) visited = [root] path = [] next = None while nodes: distance = float('inf') for s in visited: for d in graph[s]: if d in visited or s == d: continue if graph[s][d] < distance: distance = graph[s][d] pre = s
def test_identity_digraph_matrix(self): "Conversion from digraph to sparse matrix to digraph." A = nx.to_scipy_sparse_matrix(self.G2) self.identity_conversion(self.G2, A, nx.DiGraph())
def test_identity_weighted_digraph_matrix(self): """Conversion from weighted digraph to sparse matrix to weighted digraph.""" A = nx.to_scipy_sparse_matrix(self.G4) self.identity_conversion(self.G4, A, nx.DiGraph())
def hits_scipy(G, max_iter=100, tol=1.0e-6, normalized=True): """Returns HITS hubs and authorities values for nodes. The HITS algorithm computes two numbers for a node. Authorities estimates the node value based on the incoming links. Hubs estimates the node value based on outgoing links. Parameters ---------- G : graph A NetworkX graph max_iter : integer, optional Maximum number of iterations in power method. tol : float, optional Error tolerance used to check convergence in power method iteration. nstart : dictionary, optional Starting value of each node for power method iteration. normalized : bool (default=True) Normalize results by the sum of all of the values. Returns ------- (hubs,authorities) : two-tuple of dictionaries Two dictionaries keyed by node containing the hub and authority values. Examples -------- >>> G = nx.path_graph(4) >>> h, a = nx.hits(G) Notes ----- This implementation uses SciPy sparse matrices. The eigenvector calculation is done by the power iteration method and has no guarantee of convergence. The iteration will stop after max_iter iterations or an error tolerance of number_of_nodes(G)*tol has been reached. The HITS algorithm was designed for directed graphs but this algorithm does not check if the input graph is directed and will execute on undirected graphs. Raises ------ PowerIterationFailedConvergence If the algorithm fails to converge to the specified tolerance within the specified number of iterations of the power iteration method. References ---------- .. [1] A. Langville and C. Meyer, "A survey of eigenvector methods of web information retrieval." http://citeseer.ist.psu.edu/713792.html .. [2] Jon Kleinberg, Authoritative sources in a hyperlinked environment Journal of the ACM 46 (5): 604-632, 1999. doi:10.1145/324133.324140. http://www.cs.cornell.edu/home/kleinber/auth.pdf. """ try: import numpy as np except ImportError as e: raise ImportError("hits_scipy() requires SciPy and NumPy:" "http://scipy.org/ http://numpy.org/") from e if len(G) == 0: return {}, {} M = nx.to_scipy_sparse_matrix(G, nodelist=list(G)) (n, m) = M.shape # should be square A = M.T * M # authority matrix x = np.ones((n, 1)) / n # initial guess # power iteration on authority matrix i = 0 while True: xlast = x x = A * x x = x / x.max() # check convergence, l1 norm err = np.absolute(x - xlast).sum() if err < tol: break if i > max_iter: raise nx.PowerIterationFailedConvergence(max_iter) i += 1 a = np.asarray(x).flatten() # h=M*a h = np.asarray(M * a).flatten() if normalized: h = h / h.sum() a = a / a.sum() hubs = dict(zip(G, map(float, h))) authorities = dict(zip(G, map(float, a))) return hubs, authorities
def spectral_layout(G, weight='weight', scale=1, center=None, dim=2): """Position nodes using the eigenvectors of the graph Laplacian. Parameters ---------- G : NetworkX graph or list of nodes A position will be assigned to every node in G. weight : string or None optional (default='weight') The edge attribute that holds the numerical value used for the edge weight. If None, then all edge weights are 1. scale : number (default: 1) Scale factor for positions. center : array-like or None Coordinate pair around which to center the layout. dim : int Dimension of layout. Returns ------- pos : dict A dictionary of positions keyed by node Examples -------- >>> G = nx.path_graph(4) >>> pos = nx.spectral_layout(G) Notes ----- Directed graphs will be considered as undirected graphs when positioning the nodes. For larger graphs (>500 nodes) this will use the SciPy sparse eigenvalue solver (ARPACK). """ # handle some special cases that break the eigensolvers import numpy as np G, center = _process_params(G, center, dim) if len(G) <= 2: if len(G) == 0: pos = np.array([]) elif len(G) == 1: pos = np.array([center]) else: pos = np.array([np.zeros(dim), np.array(center) * 2.0]) return dict(zip(G, pos)) try: # Sparse matrix if len(G) < 500: # dense solver is faster for small graphs raise ValueError A = nx.to_scipy_sparse_matrix(G, weight=weight, dtype='d') # Symmetrize directed graphs if G.is_directed(): A = A + np.transpose(A) pos = _sparse_spectral(A, dim) except (ImportError, ValueError): # Dense matrix A = nx.to_numpy_matrix(G, weight=weight) # Symmetrize directed graphs if G.is_directed(): A = A + np.transpose(A) pos = _spectral(A, dim) pos = rescale_layout(pos, scale) + center pos = dict(zip(G, pos)) return pos
def calculate(min_degree, file_path="graph.graph", analyse="no", classifier='SVM'): graph = nx.read_edgelist(file_path, delimiter=" ") nodes = [ node for node, degree in graph.degree().items() if degree >= min_degree ] graph = graph.subgraph(nodes) connected_components = nx.connected_components(graph) largest_cc_nodes = max(connected_components, key=len) graph = graph.subgraph(largest_cc_nodes) adj_sparse = nx.to_scipy_sparse_matrix(graph) adj = nx.to_numpy_matrix(graph) adj_train, train_edges, train_edges_false, val_edges, val_edges_false, \ test_edges, test_edges_false = mask_test_edges(adj_sparse, test_frac=.3, val_frac=.1) g_train = nx.from_scipy_sparse_matrix( adj_train) # new graph object with only non-hidden edges aa_matrix = np.zeros(adj.shape) for u, v, p in nx.adamic_adar_index( g_train): # (u, v) = node indices, p = Adamic-Adar index aa_matrix[u][v] = p aa_matrix[v][u] = p # make sure it's symmetric # Normalize array aa_matrix = aa_matrix / aa_matrix.max() aa_roc, aa_ap = get_roc_score(adj_sparse, test_edges, test_edges_false, aa_matrix) jc_matrix = np.zeros(adj.shape) for u, v, p in nx.jaccard_coefficient( g_train): # (u, v) = node indices, p = Jaccard coefficient jc_matrix[u][v] = p jc_matrix[v][u] = p # make sure it's symmetric jc_matrix = jc_matrix / jc_matrix.max() # Calculate ROC AUC and Average Precision jc_roc, jc_ap = get_roc_score(adj_sparse, test_edges, test_edges_false, jc_matrix) pa_matrix = np.zeros(adj.shape) for u, v, p in nx.preferential_attachment( g_train): # (u, v) = node indices, p = Jaccard coefficient pa_matrix[u][v] = p pa_matrix[v][u] = p # make sure it's symmetric # Normalize array pa_matrix = pa_matrix / pa_matrix.max() # Calculate ROC AUC and Average Precision pa_roc, pa_ap = get_roc_score(adj_sparse, test_edges, test_edges_false, pa_matrix) import time time_before_node2vec32 = time.time() model_factory = ModelFactory(g_train) model = model_factory.get_model("node2vec_32") time_after_node2vec32 = time.time() node2vec32_time = time_after_node2vec32 - time_before_node2vec32 #TODO: refactor these three calls. Make a function out of it # Store embeddings mapping time_before_stacking_embedding = time.time() emb_mappings = model.wv emb_list = [] for node_index in range(0, adj_sparse.shape[0]): node_str = str(node_index) node_emb = emb_mappings[node_str] emb_list.append(node_emb) emb_matrix = np.vstack(emb_list) time_after_stacking_embedding = time.time() time_before_UMAP16 = time.time() umap_obj = model_factory.get_model("UMAP_16") emb_mappings_umap = umap_obj.fit_transform(emb_matrix) time_after_UMAP16 = time.time() umap16_time = time_after_UMAP16 - time_before_UMAP16 emb_list_umap = [] for node_index in range(0, adj_sparse.shape[0]): node_emb = emb_mappings_umap[node_index] emb_list_umap.append(node_emb) emb_matrix_umap = np.vstack(emb_list_umap) time_before_PCA = time.time() pca_obj = model_factory.get_model("PCA_16") emb_mappings_pca = pca_obj.fit_transform(emb_matrix) time_after_PCA = time.time() pca16_time = time_after_PCA - time_before_PCA emb_list_pca = [] for node_index in range(0, adj_sparse.shape[0]): node_emb = emb_mappings_pca[node_index] emb_list_pca.append(node_emb) emb_matrix_pca = np.vstack(emb_list_pca) time_before_node2vec16 = time.time() node2vec16_model = model_factory.get_model("node2vec_16") emb_mappings_node2vec16 = node2vec16_model.wv time_after_node2vec16 = time.time() node2vec16_time = time_after_node2vec16 - time_before_node2vec16 emb_list_node2vec_16 = [] for node_index in range(0, adj_sparse.shape[0]): node_str = str(node_index) node_emb = emb_mappings_node2vec16[node_str] emb_list_node2vec_16.append(node_emb) emb_matrix_node2vec16 = np.vstack(emb_list_node2vec_16) lp_arg = LP_arguments(emb_mappings=emb_mappings, adj_sparse = adj_sparse, train_edges = train_edges, \ train_edges_false = train_edges_false, val_edges = val_edges, val_edges_false = val_edges_false, \ test_edges = test_edges, test_edges_false = test_edges_false, matrix=emb_matrix) lp_arg_umap = LP_arguments(emb_mappings=emb_mappings_umap, adj_sparse=adj_sparse, train_edges = train_edges, \ train_edges_false = train_edges_false, val_edges = val_edges, val_edges_false = val_edges_false, \ test_edges = test_edges, test_edges_false = test_edges_false, matrix=emb_matrix_umap) lp_arg_pca = LP_arguments(emb_mappings=emb_mappings_pca, adj_sparse=adj_sparse, train_edges = train_edges, \ train_edges_false = train_edges_false, val_edges = val_edges, val_edges_false = val_edges_false, \ test_edges = test_edges, test_edges_false = test_edges_false, matrix=emb_matrix_pca) lp_arg_node2vec16 = LP_arguments(emb_mappings=emb_mappings_node2vec16, adj_sparse=adj_sparse, train_edges = train_edges, train_edges_false = train_edges_false, val_edges = val_edges, val_edges_false = val_edges_false, \ test_edges = test_edges, test_edges_false = test_edges_false, matrix=emb_matrix_node2vec16) methods = { "node2vec (32)": lp_arg, "node2vec (16)": lp_arg_node2vec16, "node2vec+UMAP (16)": lp_arg_umap, "node2vec+PCA (16)": lp_arg_pca } adamic_adard_result = MethodResult('Adamic-Adar', aa_roc, aa_ap) jc_result = MethodResult('Jaccard Coefficient', jc_roc, jc_ap) pa_result = MethodResult('Preferential Attachment', pa_roc, pa_ap) lime = False if analyse in ['y', 'yes', 'true']: lime = True methods_list = [adamic_adard_result, jc_result, pa_result] lime_results = [] for key, value in methods.items(): val_roc, val_ap, test_roc, test_ap, lime_explanations,\ training_time = link_prediction_on_embedding(key, value, lime, classifier) methods_list.append(MethodResult(key, test_roc, test_ap)) lime_results.append(lime_explanations) if lime: import os if not os.path.exists('plots'): os.makedirs('plots') lime_plotter = LimeExplainer.LimeExplainerPlotter( lime_results, adj_sparse.shape[0]) lime_plotter.plot_feature_importance() if file_path == "graph.graph": caption = "Link prediction on Wikipedia dataset containing" elif file_path == "soc_hamsterster.edges": caption = "Link prediction on network of the friendships between users of hamsterster.com" elif file_path == "external_graph.csv": caption = "Link prediction on DBLP dataset" else: caption = "Unknown caption" result = ModelAccuracyResults(adj_sparse.shape[0], len(train_edges), len(test_edges), methods_list, caption, classifier, training_time) with open("latex_results.txt", "a") as file: file.write(result.get_latex_representation()) with open("csv_results.txt", "a") as file: file.write(result.get_csv_representation()) methods_time = [ MethodTime("nodevec (32)", node2vec32_time), MethodTime("node2vec (16)", node2vec16_time), MethodTime("node2vec+UMAP (16)", umap16_time), MethodTime("node2vec+PCA (16)", pca16_time) ] time_results = ModelTimeResults( methods_time, adj_sparse.shape[0], len(train_edges), len(test_edges), "Time of the training of algorithms on Wikipedia dataset", classifier, training_time) with open("latex_time.txt", "a") as file: file.write(time_results.get_latex_representation()) with open("csv_time.txt", "a") as file: file.write(time_results.get_csv_representation())
def spectral_layout(G, dim=2, weighted=True, scale=1): """Position nodes using the eigenvectors of the graph Laplacian. Parameters ---------- G : NetworkX graph dim : int Dimension of layout weighted : boolean If True, use edge weights in layout scale : float Scale factor for positions Returns ------- dict : A dictionary of positions keyed by node Examples -------- >>> G=nx.path_graph(4) >>> pos=nx.spectral_layout(G) Notes ----- Directed graphs will be considered as unidrected graphs when positioning the nodes. For larger graphs (>500 nodes) this will use the SciPy sparse eigenvalue solver (ARPACK). """ # handle some special cases that break the eigensolvers try: import numpy as np except ImportError: raise ImportError( "spectral_layout() requires numpy: http://scipy.org/ ") if len(G) <= 2: if len(G) == 0: pos = np.array([]) elif len(G) == 1: pos = np.array([[1, 1]]) else: pos = np.array([[0, 0.5], [1, 0.5]]) return dict(list(zip(G, pos))) try: # Sparse matrix if len(G) < 500: # dense solver is faster for small graphs raise ValueError A = nx.to_scipy_sparse_matrix(G) # Symmetrize directed graphs if G.is_directed(): A = A + np.transpose(A) pos = _sparse_spectral(A, dim=dim, weighted=weighted) except (ImportError, ValueError): # Dense matrix A = nx.to_numpy_matrix(G) # Symmetrize directed graphs if G.is_directed(): A = A + np.transpose(A) pos = _spectral(A, dim=dim, weighted=weighted) pos = _rescale_layout(pos, scale=scale) return dict(list(zip(G, pos)))
def test_null_raise(self): with pytest.raises(nx.NetworkXError): nx.to_scipy_sparse_matrix(nx.Graph())
def b_lexrank(G, baseline_score, alpha=0.85, personalization=None, max_iter=100, tol=1.0e-6, weight='weight', seed_weight=1): """ Return the biased Lexrank scores of the nodes in the graph This program is based upon the pagerank_scipy program from the networkx source. Parameters ___________ G: graph A NetworkX graph alpha: float, optional A damping parameter for PageRank, default = 0.85 personalization: dict, optional The "personalization vector" consisting of a dictionary with a key for every graph node and nonzero personalization value for each node. max_iter : integer, optional Maximum number of iterations in power method eigenvalue solver. tol : float, optional Error tolerance used to check convergence in power method solver. weight : key, optional Edge data key to use as weight. If None weights are set to 1. baseline_score: vector, float similarity scores between the seed and sentences within the graph Returns ------- pagerank : dictionary Dictionary of nodes with PageRank as value Examples -------- >>> G=nx.DiGraph(nx.path_graph(4)) >>> pr=nx.pagerank_scipy(G,alpha=0.9) Notes ----- The eigenvector calculation uses power iteration with a SciPy sparse matrix representation. References ---------- .. [1] A. Langville and C. Meyer, "A survey of eigenvector methods of web information retrieval." http://citeseer.ist.psu.edu/713792.html .. [2] Page, Lawrence; Brin, Sergey; Motwani, Rajeev and Winograd, Terry, The PageRank citation ranking: Bringing order to the Web. 1999 http://dbpubs.stanford.edu:8090/pub/showDoc.Fulltext?lang=en&doc=1999-66&format=pdf [3] Otterbacher, Erkan and Radev, Biased LexRank: Passage Retrieval using Random Walks with Question-Based Priors (2008) """ try: import scipy.sparse import networkx as nx from numpy import diag from networkx.exception import NetworkXError except ImportError: raise ImportError("pagerank_scipy() requires SciPy: http://scipy.org/") if len(G) == 0: return {} # choose ordering in matrix if personalization is None: # use G.nodes() ordering nodelist = G.nodes() elif personalization is 'biased': nodelist = G.nodes() else: # use personalization "vector" ordering nodelist = personalization.keys() M = nx.to_scipy_sparse_matrix(G, nodelist=nodelist, weight=weight, dtype='f') (n, m) = M.shape # should be square S = scipy.array(M.sum(axis=1)).flatten() # for i, j, v in zip( *scipy.sparse.find(M) ): # M[i,j] = v / S[i] S[S > 0] = 1.0 / S[S > 0] #creates a sparse diagonal matrix with normalization values Q = scipy.sparse.spdiags(S.T, 0, *M.shape, format='csr') M = Q * M x = scipy.ones((n)) / n # initial guess dangle = scipy.array(scipy.where(M.sum(axis=1) == 0, 1.0 / n, 0)).flatten() # add "teleportation"/personalization if personalization is 'biased': v = scipy.array(baseline_score) v = v / v.sum() v = seed_weight * v / v.sum() #print v.shape elif personalization is not None: v = scipy.array(list(personalization.values()), dtype=float) v = v / v.sum() else: v = x #print v.shape i = 0 while i <= max_iter: # power iteration: make up to max_iter iterations xlast = x x = alpha * (x * M + scipy.dot(dangle, xlast)) + (1 - alpha) * v x = x / x.sum() # check convergence, l1 norm err = scipy.absolute(x - xlast).sum() if err < n * tol: return dict(zip(nodelist, map(float, x))) i += 1 raise NetworkXError('pagerank_scipy: power iteration failed to converge' 'in %d iterations.' % (i + 1))
def fruchterman_reingold_layout(G, k=None, pos=None, fixed=None, iterations=50, threshold=1e-4, weight='weight', scale=1, center=None, dim=2, random_state=None): """Position nodes using Fruchterman-Reingold force-directed algorithm. Parameters ---------- G : NetworkX graph or list of nodes A position will be assigned to every node in G. k : float (default=None) Optimal distance between nodes. If None the distance is set to 1/sqrt(n) where n is the number of nodes. Increase this value to move nodes farther apart. pos : dict or None optional (default=None) Initial positions for nodes as a dictionary with node as keys and values as a coordinate list or tuple. If None, then use random initial positions. fixed : list or None optional (default=None) Nodes to keep fixed at initial position. iterations : int optional (default=50) Maximum number of iterations taken threshold: float optional (default = 1e-4) Threshold for relative error in node position changes. The iteration stops if the error is below this threshold. weight : string or None optional (default='weight') The edge attribute that holds the numerical value used for the edge weight. If None, then all edge weights are 1. scale : number (default: 1) Scale factor for positions. Not used unless `fixed is None`. center : array-like or None Coordinate pair around which to center the layout. Not used unless `fixed is None`. dim : int Dimension of layout. random_state : int, RandomState instance or None optional (default=None) Set the random state for deterministic node layouts. If int, `random_state` is the seed used by the random number generator, if numpy.random.RandomState instance, `random_state` is the random number generator, if None, the random number generator is the RandomState instance used by numpy.random. Returns ------- pos : dict A dictionary of positions keyed by node Examples -------- >>> G = nx.path_graph(4) >>> pos = nx.spring_layout(G) # The same using longer but equivalent function name >>> pos = nx.fruchterman_reingold_layout(G) """ import numpy as np G, center = _process_params(G, center, dim) if fixed is not None: nfixed = dict(zip(G, range(len(G)))) fixed = np.asarray([nfixed[v] for v in fixed]) if pos is not None: # Determine size of existing domain to adjust initial positions dom_size = max(coord for pos_tup in pos.values() for coord in pos_tup) if dom_size == 0: dom_size = 1 shape = (len(G), dim) pos_arr = random_state.rand(*shape) * dom_size + center for i, n in enumerate(G): if n in pos: pos_arr[i] = np.asarray(pos[n]) else: pos_arr = None if len(G) == 0: return {} if len(G) == 1: return {nx.utils.arbitrary_element(G.nodes()): center} try: # Sparse matrix if len(G) < 500: # sparse solver for large graphs raise ValueError A = nx.to_scipy_sparse_matrix(G, weight=weight, dtype='f') if k is None and fixed is not None: # We must adjust k by domain size for layouts not near 1x1 nnodes, _ = A.shape k = dom_size / np.sqrt(nnodes) pos = _sparse_fruchterman_reingold(A, k, pos_arr, fixed, iterations, threshold, dim, random_state) except: A = nx.to_numpy_matrix(G, weight=weight) if k is None and fixed is not None: # We must adjust k by domain size for layouts not near 1x1 nnodes, _ = A.shape k = dom_size / np.sqrt(nnodes) pos = _fruchterman_reingold(A, k, pos_arr, fixed, iterations, threshold, dim, random_state) if fixed is None: pos = rescale_layout(pos, scale=scale) + center pos = dict(zip(G, pos)) return pos
def learn_embedding(self, graph=None, edge_f=None, is_weighted=False, no_python=False): if not graph and not edge_f: raise Exception('graph/edge_f needed') if not graph: graph = graph_util.loadGraphFromEdgeListTxt(edge_f) S = nx.to_scipy_sparse_matrix(graph) t1 = time() S = (S + S.T) / 2 self._node_num = graph.number_of_nodes() # Generate encoder, decoder and autoencoder self._num_iter = self._n_iter # If cannot use previous step information, initialize new models self._encoder = get_encoder(self._node_num, self._d, self._K, self._n_units, self._nu1, self._nu2, self._actfn) self._decoder = get_decoder(self._node_num, self._d, self._K, self._n_units, self._nu1, self._nu2, self._actfn) self._autoencoder = get_autoencoder(self._encoder, self._decoder) # Initialize self._model # Input x_in = Input(shape=(2 * self._node_num, ), name='x_in') x1 = Lambda(lambda x: x[:, 0:self._node_num], output_shape=(self._node_num, ))(x_in) x2 = Lambda(lambda x: x[:, self._node_num:2 * self._node_num], output_shape=(self._node_num, ))(x_in) # Process inputs [x_hat1, y1] = self._autoencoder(x1) [x_hat2, y2] = self._autoencoder(x2) # Outputs x_diff1 = merge([x_hat1, x1], mode=lambda ab: ab[0] - ab[1], output_shape=lambda L: L[1]) x_diff2 = merge([x_hat2, x2], mode=lambda ab: ab[0] - ab[1], output_shape=lambda L: L[1]) y_diff = merge([y2, y1], mode=lambda ab: ab[0] - ab[1], output_shape=lambda L: L[1]) # Objectives def weighted_mse_x(y_true, y_pred): ''' Hack: This fn doesn't accept additional arguments. We use y_true to pass them. y_pred: Contains x_hat - x y_true: Contains [b, deg] ''' return KBack.sum(KBack.square( y_pred * y_true[:, 0:self._node_num]), axis=-1) / y_true[:, self._node_num] def weighted_mse_y(y_true, y_pred): ''' Hack: This fn doesn't accept additional arguments. We use y_true to pass them. y_pred: Contains y2 - y1 y_true: Contains s12 ''' min_batch_size = KBack.shape(y_true)[0] return KBack.reshape(KBack.sum(KBack.square(y_pred), axis=-1), [min_batch_size, 1]) * y_true # Model self._model = Model(input=x_in, output=[x_diff1, x_diff2, y_diff]) sgd = SGD(lr=self._xeta, decay=1e-5, momentum=0.99, nesterov=True) # adam = Adam(lr=self._xeta, beta_1=0.9, beta_2=0.999, epsilon=1e-08) self._model.compile( optimizer=sgd, loss=[weighted_mse_x, weighted_mse_x, weighted_mse_y], loss_weights=[1, 1, self._alpha]) self._model.fit_generator( generator=batch_generator_sdne(S, self._beta, self._n_batch, True), nb_epoch=self._num_iter, samples_per_epoch=S.nonzero()[0].shape[0] // self._n_batch, verbose=1) # Get embedding for all points self._Y = model_batch_predictor(self._autoencoder, S, self._n_batch) t2 = time() # Save the autoencoder and its weights if (self._weightfile is not None): saveweights(self._encoder, self._weightfile[0]) saveweights(self._decoder, self._weightfile[1]) if (self._modelfile is not None): savemodel(self._encoder, self._modelfile[0]) savemodel(self._decoder, self._modelfile[1]) if (self._savefilesuffix is not None): saveweights(self._encoder, 'encoder_weights_' + self._savefilesuffix + '.hdf5') saveweights(self._decoder, 'decoder_weights_' + self._savefilesuffix + '.hdf5') savemodel(self._encoder, 'encoder_model_' + self._savefilesuffix + '.json') savemodel(self._decoder, 'decoder_model_' + self._savefilesuffix + '.json') # Save the embedding np.savetxt('embedding_' + self._savefilesuffix + '.txt', self._Y) # BLOCCO DI ISTRUZIONI DA ESEGUIRE SE GLI ID DEL DATASET NON SONO COMPATTI listNodes = graph.nodes() listNodes = list(set(listNodes)) # Elimina i doppioni dalla lista listNodes.sort( ) # Ordina la lista che contiene tutti gli ID contenuti nel Grafo originale nA = np.asarray(listNodes, dtype=int) dE = self._d nR = (nA.max()) + 1 XX = np.zeros((nR, dE)) for i in range(0, nA.__len__()): XX[nA[i]] = cp.copy(self._Y[i]) self._Y = np.zeros((nR, dE)) self._Y = cp.copy(XX) return self._Y, (t2 - t1)
def geodesic_matrix(x, tn_ids=None, directed=False, weight='weight'): """ Generates geodesic ("along-the-arbor") distance matrix for treenodes of given neuron. Parameters ---------- x : CatmaidNeuron | CatmaidNeuronList If list, must contain a SINGLE neuron. tn_ids : list | numpy.ndarray, optional Treenode IDs. If provided, will compute distances only FROM this subset to all other nodes. directed : bool, optional If True, pairs without a child->parent path will be returned with ``distance = "inf"``. weight : 'weight' | None, optional If ``weight`` distances are given as physical length. If ``None`` distances is number of nodes. Returns ------- pd.SparseDataFrame Geodesic distance matrix. Distances in nanometres. See Also -------- :func:`~pymaid.distal_to` Check if a node A is distal to node B. :func:`~pymaid.dist_between` Get point-to-point geodesic distances. """ if isinstance(x, core.CatmaidNeuronList): if len(x) == 1: x = x[0] else: raise ValueError('Cannot process more than a single neuron.') elif isinstance(x, core.CatmaidNeuron): pass else: raise ValueError( 'Unable to process data of type "{0}"'.format(type(x))) if x.igraph and config.use_igraph: nodeList = x.igraph.vs.get_attribute_values('node_id') # Matrix is ordered by vertex number m = _igraph_to_sparse(x.igraph, weight_attr=weight) else: nodeList = tuple(x.graph.nodes()) m = nx.to_scipy_sparse_matrix(x.graph, nodeList, weight=weight) if not isinstance(tn_ids, type(None)): tn_ids = set(utils._make_iterable(tn_ids)) tn_indices = tuple(i for i, node in enumerate( nodeList) if node in tn_ids) ix = [nodeList[i] for i in tn_indices] else: tn_indices = None ix = nodeList dmat = csgraph.dijkstra(m, directed=directed, indices=tn_indices) return pd.SparseDataFrame(dmat, columns=nodeList, index=ix, default_fill_value=float('inf'))
def test_format_keyword(self): WP4 = nx.Graph() WP4.add_edges_from( (n, n + 1, dict(weight=0.5, other=0.3)) for n in range(3)) P4 = path_graph(4) A = nx.to_scipy_sparse_matrix(P4, format="csr") npt.assert_equal(A.todense(), nx.to_scipy_sparse_matrix(WP4, weight=None).todense()) A = nx.to_scipy_sparse_matrix(P4, format="csc") npt.assert_equal(A.todense(), nx.to_scipy_sparse_matrix(WP4, weight=None).todense()) A = nx.to_scipy_sparse_matrix(P4, format="coo") npt.assert_equal(A.todense(), nx.to_scipy_sparse_matrix(WP4, weight=None).todense()) A = nx.to_scipy_sparse_matrix(P4, format="bsr") npt.assert_equal(A.todense(), nx.to_scipy_sparse_matrix(WP4, weight=None).todense()) A = nx.to_scipy_sparse_matrix(P4, format="lil") npt.assert_equal(A.todense(), nx.to_scipy_sparse_matrix(WP4, weight=None).todense()) A = nx.to_scipy_sparse_matrix(P4, format="dia") npt.assert_equal(A.todense(), nx.to_scipy_sparse_matrix(WP4, weight=None).todense()) A = nx.to_scipy_sparse_matrix(P4, format="dok") npt.assert_equal(A.todense(), nx.to_scipy_sparse_matrix(WP4, weight=None).todense())
def fruchterman_reingold_layout(G, dim=2, pos=None, fixed=None, iterations=50, weighted=True, scale=1): """Position nodes using Fruchterman-Reingold force-directed algorithm. Parameters ---------- G : NetworkX graph dim : int Dimension of layout pos : dict Initial positions for nodes as a dictionary with node as keys and values as a list or tuple. fixed : list Nodes to keep fixed at initial position. iterations : int Number of iterations of spring-force relaxation weighted : boolean If True, use edge weights in layout scale : float Scale factor for positions Returns ------- dict : A dictionary of positions keyed by node Examples -------- >>> G=nx.path_graph(4) >>> pos=nx.spring_layout(G) # The same using longer function name >>> pos=nx.fruchterman_reingold_layout(G) """ try: import numpy as np except ImportError: raise ImportError( "fruchterman_reingold_layout() requires numpy: http://scipy.org/ ") if fixed is not None: nfixed = dict(list(zip(G, list(range(len(G)))))) fixed = np.asarray([nfixed[v] for v in fixed]) if pos is not None: pos_arr = np.asarray(np.random.random((len(G), dim))) for n, i in zip(G, list(range(len(G)))): if n in pos: pos_arr[i] = np.asarray(pos[n]) else: pos_arr = None if len(G) == 0: return {} if len(G) == 1: return {G.nodes()[0]: (1, ) * dim} try: # Sparse matrix if len(G) < 500: # sparse solver for large graphs raise ValueError A = nx.to_scipy_sparse_matrix(G) pos = _sparse_fruchterman_reingold(A, pos=pos_arr, fixed=fixed, dim=dim, iterations=iterations, weighted=weighted) except: A = nx.to_numpy_matrix(G) pos = _fruchterman_reingold(A, pos=pos_arr, fixed=fixed, dim=dim, iterations=iterations, weighted=weighted) if fixed is None: pos = _rescale_layout(pos, scale=scale) return dict(list(zip(G, pos)))
print("Loaded labels (" + str(len(Config.labels)) + " classes): ", end='') print(Config.labels) # In[93]: threshold = 0.75 adjmat = sim.reshape((-1, )).copy() adjmat[adjmat > threshold] = 0 #adjmat[adjmat > 0] = 1 print("{} out of {} values set to zero".format(len(adjmat[adjmat == 0]), len(adjmat))) adjmat = adjmat.reshape(sim.shape) # In[94]: G = make_graph(adjmat, labels=Config.labels) nx.draw_spring(G, with_labels=True) # In[95]: matrix = nx.to_scipy_sparse_matrix(G) result = mc.run_mcl(matrix, inflation=2) # run MCL with default parameters clusters = mc.get_clusters(result) # get clusters print("There are {} clusters.".format(len(clusters))) mc.draw_graph(matrix, clusters, with_labels=True, edge_color="silver") # In[77]: ref = np.genfromtxt(labelfilename, delimiter=',', dtype=None) print(ref[19])
@author: Seokyong Hong ''' import os import time import networkx from community.SCAN import SCAN input_path = '/Input/Com-Amazon.txt' if __name__ == '__main__': start = time.time() digraph = networkx.read_edgelist(path=input_path, delimiter='\t', create_using=networkx.DiGraph()) graph = networkx.to_scipy_sparse_matrix(digraph.to_undirected()) t0 = time.time() labels = SCAN().detect(graph=graph, epsilon=0.7, mu=2) print 'Community Detection Time: ' + str(time.time() - t0) ''' community_labels = set() community_count = 0 hub_count = 0 outlier_count = 0 community = {} max_label = int(max(labels)) + 1 for index in range(len(labels)): if labels[index] >= 0: community[list(digraph.nodes(data = False))[index]] = int(labels[index])
def learn_embeddings(self, graph=None, edge_f=None): # TensorFlow wizardry config = tf.ConfigProto() # Don't pre-allocate memory; allocate as-needed config.gpu_options.allow_growth = True # Only allow a total of half the GPU memory to be allocated config.gpu_options.per_process_gpu_memory_fraction = 0.1 # Create a session with the above options specified. KBack.tensorflow_backend.set_session(tf.Session(config=config)) if not graph and not edge_f: raise Exception('graph/edge_f needed') if not graph: graph = graph_util.loadGraphFromEdgeListTxt(edge_f) S = nx.to_scipy_sparse_matrix(graph) self._node_num = graph.number_of_nodes() t1 = time() # Generate encoder, decoder and autoencoder self._num_iter = self._n_iter self._encoder = get_encoder(self._node_num, self._d, self._n_units, self._nu1, self._nu2, self._actfn) self._decoder = get_decoder(self._node_num, self._d, self._n_units, self._nu1, self._nu2, self._actfn) self._autoencoder = get_autoencoder(self._encoder, self._decoder) # Initialize self._model # Input x_in = Input(shape=(self._node_num,), name='x_in') # Process inputs [x_hat, y] = self._autoencoder(x_in) # Outputs x_diff = Subtract()([x_hat, x_in]) # Objectives def weighted_mse_x(y_true, y_pred): """ Hack: This fn doesn't accept additional arguments. We use y_true to pass them. y_pred: Contains x_hat - x y_true: Contains b """ return KBack.sum( KBack.square(y_pred * y_true[:, 0:self._node_num]), axis=-1 ) # Model self._model = Model(input=x_in, output=x_diff) sgd = SGD(lr=self._xeta, decay=1e-5, momentum=0.99, nesterov=True) adam = Adam(lr=self._xeta, beta_1=0.9, beta_2=0.999, epsilon=1e-08) self._model.compile(optimizer=sgd, loss=weighted_mse_x) history = self._model.fit_generator( generator=batch_generator_ae(S, self._beta, self._n_batch, True), nb_epoch=self._num_iter, samples_per_epoch=S.shape[0] // self._n_batch, verbose=1, # callbacks=[tensorboard] # callbacks=[callbacks.TerminateOnNaN()] ) loss = history.history['loss'] # Get embedding for all points if loss[0] == np.inf or np.isnan(loss[0]): print('Model diverged. Assigning random embeddings') self._Y = np.random.randn(self._node_num, self._d) else: try: self._Y, self._next_adj = model_batch_predictor_v2(self._autoencoder, S, self._n_batch) except: pdb.set_trace() t2 = time() # Save the autoencoder and its weights """ if self._weightfile is not None: saveweights(self._encoder, self._weightfile[0]) saveweights(self._decoder, self._weightfile[1]) if self._modelfile is not None: savemodel(self._encoder, self._modelfile[0]) savemodel(self._decoder, self._modelfile[1]) if self._savefilesuffix is not None: saveweights(self._encoder, 'encoder_weights_' + self._savefilesuffix + '.hdf5') saveweights(self._decoder, 'decoder_weights_' + self._savefilesuffix + '.hdf5') savemodel(self._encoder, 'encoder_model_' + self._savefilesuffix + '.json') savemodel(self._decoder, 'decoder_model_' + self._savefilesuffix + '.json') # Save the embedding np.savetxt('embedding_' + self._savefilesuffix + '.txt', self._Y) """ return self._Y, (t2 - t1)
import networkx as nx import matplotlib.pyplot as plt import numpy as np from networkx.drawing.nx_pydot import graphviz_layout import matplotlib import copy # Matplotlib parameters. matplotlib.use("Agg") comm = MPI.COMM_WORLD """ Steady state of an L-QSW on a full binary tree of depth 5: """ Graph = nx.balanced_tree(2, 5) G = nx.to_scipy_sparse_matrix(Graph) H = qsw_mpi.operators.transition(1.0, G) """ Local-interction Lindblad operators are derived from the cannonical markov chain transition matrix. """ M = qsw_mpi.operators.markov_chain(G) L = qsw_mpi.operators.local_lindblads(M) omega = 0.5 QSW = qsw_mpi.MPI.LQSW(omega, H, L, comm) """ The system begins in a maximally mixed state. """ QSW.initial_state('mixed')
def gen_ba(args): g = nx.barabasi_albert_graph(args.ba_n, args.ba_m) csr = nx.to_scipy_sparse_matrix(g, weight=None, format='csr') graph_io.save_graph(args.out, csr)
import scipy.sparse as sp g = nx.karate_club_graph().to_undirected().to_directed() src = [] dst = [] for u, v in g.edges(): src.append(u) dst.append(v) with open('edges.txt', 'w') as f: for u, v in zip(src, dst): f.write('{} {}\n'.format(u, v)) torch.save(torch.tensor(src), 'src.pt') torch.save(torch.tensor(dst), 'dst.pt') spmat = nx.to_scipy_sparse_matrix(g) print(spmat) sp.save_npz('scipy_adj.npz', spmat) from networkx.readwrite import json_graph import json with open('adj.json', 'w') as f: json.dump(json_graph.adjacency_data(g), f) node_feat = torch.randn((34, 5)) / 10. edge_feat = torch.ones((156, )) torch.save(node_feat, 'node_feat.pt') torch.save(edge_feat, 'edge_feat.pt')
def fruchterman_reingold_layout( G, k=None, pos=None, fixed=None, iterations=50, threshold=1e-4, weight="weight", scale=1, center=None, dim=2, seed=None, ): """Position nodes using Fruchterman-Reingold force-directed algorithm. The algorithm simulates a force-directed representation of the network treating edges as springs holding nodes close, while treating nodes as repelling objects, sometimes called an anti-gravity force. Simulation continues until the positions are close to an equilibrium. There are some hard-coded values: minimal distance between nodes (0.01) and "temperature" of 0.1 to ensure nodes don't fly away. During the simulation, `k` helps determine the distance between nodes, though `scale` and `center` determine the size and place after rescaling occurs at the end of the simulation. Fixing some nodes doesn't allow them to move in the simulation. It also turns off the rescaling feature at the simulation's end. In addition, setting `scale` to `None` turns off rescaling. Parameters ---------- G : NetworkX graph or list of nodes A position will be assigned to every node in G. k : float (default=None) Optimal distance between nodes. If None the distance is set to 1/sqrt(n) where n is the number of nodes. Increase this value to move nodes farther apart. pos : dict or None optional (default=None) Initial positions for nodes as a dictionary with node as keys and values as a coordinate list or tuple. If None, then use random initial positions. fixed : list or None optional (default=None) Nodes to keep fixed at initial position. ValueError raised if `fixed` specified and `pos` not. iterations : int optional (default=50) Maximum number of iterations taken threshold: float optional (default = 1e-4) Threshold for relative error in node position changes. The iteration stops if the error is below this threshold. weight : string or None optional (default='weight') The edge attribute that holds the numerical value used for the edge weight. If None, then all edge weights are 1. scale : number or None (default: 1) Scale factor for positions. Not used unless `fixed is None`. If scale is None, no rescaling is performed. center : array-like or None Coordinate pair around which to center the layout. Not used unless `fixed is None`. dim : int Dimension of layout. seed : int, RandomState instance or None optional (default=None) Set the random state for deterministic node layouts. If int, `seed` is the seed used by the random number generator, if numpy.random.RandomState instance, `seed` is the random number generator, if None, the random number generator is the RandomState instance used by numpy.random. Returns ------- pos : dict A dictionary of positions keyed by node Examples -------- >>> G = nx.path_graph(4) >>> pos = nx.spring_layout(G) # The same using longer but equivalent function name >>> pos = nx.fruchterman_reingold_layout(G) """ import numpy as np G, center = _process_params(G, center, dim) if fixed is not None: if pos is None: raise ValueError("nodes are fixed without positions given") for node in fixed: if node not in pos: raise ValueError("nodes are fixed without positions given") nfixed = {node: i for i, node in enumerate(G)} fixed = np.asarray([nfixed[node] for node in fixed]) if pos is not None: # Determine size of existing domain to adjust initial positions dom_size = max(coord for pos_tup in pos.values() for coord in pos_tup) if dom_size == 0: dom_size = 1 pos_arr = seed.rand(len(G), dim) * dom_size + center for i, n in enumerate(G): if n in pos: pos_arr[i] = np.asarray(pos[n]) else: pos_arr = None dom_size = 1 if len(G) == 0: return {} if len(G) == 1: return {nx.utils.arbitrary_element(G.nodes()): center} try: # Sparse matrix if len(G) < 500: # sparse solver for large graphs raise ValueError A = nx.to_scipy_sparse_matrix(G, weight=weight, dtype="f") if k is None and fixed is not None: # We must adjust k by domain size for layouts not near 1x1 nnodes, _ = A.shape k = dom_size / np.sqrt(nnodes) pos = _sparse_fruchterman_reingold(A, k, pos_arr, fixed, iterations, threshold, dim, seed) except ValueError: A = nx.to_numpy_array(G, weight=weight) if k is None and fixed is not None: # We must adjust k by domain size for layouts not near 1x1 nnodes, _ = A.shape k = dom_size / np.sqrt(nnodes) pos = _fruchterman_reingold(A, k, pos_arr, fixed, iterations, threshold, dim, seed) if fixed is None and scale is not None: pos = rescale_layout(pos, scale=scale) + center pos = dict(zip(G, pos)) return pos