def get_laplacian(A, normalization_mode = None): """ Compute the different laplacian of a graphs given a Code inspired by networkx python library """ A = scipy.sparse.csr_matrix(A) diags = A.sum(axis=1).flatten()#Degree n,m = A.shape D = scipy.sparse.spdiags(diags, [0], m, n, format='csr') L = D - A if normalization_mode not in ['sym', 'rw', None]: raise Exception('Normalisation mode {} unknown'.format(normalization_mode)) elif normalization_mode == None: return L elif normalization_mode == 'sym': with scipy.errstate(divide='ignore'): diags_sqrt = 1.0/scipy.sqrt(diags) diags_sqrt[scipy.isinf(diags_sqrt)] = 0 DH = scipy.sparse.spdiags(diags_sqrt, [0], m, n, format='csr') return DH.dot(L.dot(DH)) elif normalization_mode == 'rw': with scipy.errstate(divide='ignore'): diags_inverse = 1.0/diags diags_inverse[scipy.isinf(diags_inverse)] = 0 DH = scipy.sparse.spdiags(diags_inverse, [0], m, n, format='csr') return DH.dot(L)
def evaluations_scipy(ty, pv): """ evaluations_scipy(ty, pv) -> (ACC, MSE, SCC) ty, pv: ndarray Calculate accuracy, mean squared error and squared correlation coefficient using the true values (ty) and predicted values (pv). """ if not (scipy != None and isinstance(ty, scipy.ndarray) and isinstance(pv, scipy.ndarray)): raise TypeError("type of ty and pv must be ndarray") if len(ty) != len(pv): raise ValueError("len(ty) must be equal to len(pv)") ACC = 100.0*(ty == pv).mean() MSE = ((ty - pv)**2).mean() l = len(ty) sumv = pv.sum() sumy = ty.sum() sumvy = (pv*ty).sum() sumvv = (pv*pv).sum() sumyy = (ty*ty).sum() with scipy.errstate(all = 'raise'): try: SCC = ((l*sumvy-sumv*sumy)*(l*sumvy-sumv*sumy))/((l*sumvv-sumv*sumv)*(l*sumyy-sumy*sumy)) except: SCC = float('nan') return (float(ACC), float(MSE), float(SCC))
def generate(cls, trueY, forecastY, missing=True): nz_mask = trueY != 0 diff = forecastY - trueY abs_true = sp.absolute(trueY) abs_diff = sp.absolute(diff) def my_mean(x): tmp = x[sp.isfinite(x)] assert len(tmp) != 0 return tmp.mean() with sp.errstate(divide='ignore'): nrmse = sp.sqrt((diff**2).mean()) / abs_true.mean() m_nrmse = my_mean( sp.sqrt((diff**2).mean(axis=0)) / abs_true.mean(axis=0)) nd = abs_diff.sum() / abs_true.sum() m_nd = my_mean(abs_diff.sum(axis=0) / abs_true.sum(axis=0)) abs_baseline = sp.absolute(trueY[1:, :] - trueY[:-1, :]) mase = abs_diff.mean() / abs_baseline.mean() m_mase = my_mean(abs_diff.mean(axis=0) / abs_baseline.mean(axis=0)) mape = my_mean(sp.divide(abs_diff, abs_true, where=nz_mask)) return cls(nd=nd, mase=mase, nrmse=nrmse, m_nd=m_nd, m_mase=m_mase, m_nrmse=m_nrmse, mape=mape)
def proba_matrix(self): n, m = self.A.shape diags = self.A.sum(axis=1).flatten() with scipy.errstate(divide='ignore'): diags_inv = 1.0 / diags D_inv = scipy.sparse.spdiags(diags_inv, [0], m, n) return D_inv.dot(self.A)
def evaluations_scipy(ty, pv): """ evaluations_scipy(ty, pv) -> (ACC, MSE, SCC) ty, pv: ndarray Calculate accuracy, mean squared error and squared correlation coefficient using the true values (ty) and predicted values (pv). """ if not (scipy != None and isinstance(ty, scipy.ndarray) and isinstance(pv, scipy.ndarray)): raise TypeError("type of ty and pv must be ndarray") if len(ty) != len(pv): raise ValueError("len(ty) must be equal to len(pv)") ACC = 100.0 * (ty == pv).mean() MSE = ((ty - pv)**2).mean() l = len(ty) sumv = pv.sum() sumy = ty.sum() sumvy = (pv * ty).sum() sumvv = (pv * pv).sum() sumyy = (ty * ty).sum() with scipy.errstate(all='raise'): try: SCC = ((l * sumvy - sumv * sumy) * (l * sumvy - sumv * sumy)) / ((l * sumvv - sumv * sumv) * (l * sumyy - sumy * sumy)) except: SCC = float('nan') return (float(ACC), float(MSE), float(SCC))
def e_step(self, X): """Compute the e-step of the algorithm Parameters ---------- X : array-like, shape (n_samples, n_dimensions) List of n_features-dimensional data points. Each row corresponds to a single data point. Returns ------- """ # Get some parameters n = X.shape[0] # Compute the membership function K = self.score_samples(X) # Compute the Loglikelhood K *= (0.5) Km = K.max(axis=1) Km.shape = (n, 1) # logsumexp trick LL = (sp.log(sp.exp(K - Km).sum(axis=1))[:, sp.newaxis] + Km).sum() # Compute the posterior with sp.errstate(over='ignore'): for c in xrange(self.C): self.T[:, c] = 1 / sp.exp(K - K[:, c][:, sp.newaxis]).sum(axis=1) return LL
def laplacian_layout(G, norm=False, dim=2, bad=False): A = nx.to_scipy_sparse_matrix(G, format='csr') A = np.array(A.todense()) n, m = A.shape diags = A.sum(axis=1).flatten() D = np.diag(diags) L = D - A B = np.eye(n) if norm == False: layout = eig_layout(G, L, B) else: if bad: with scipy.errstate(divide='ignore'): #diags_sqrt = 1.0 / scipy.power(diags, 1) diags_sqrt = 1.0 / scipy.sqrt(diags) diags_sqrt[scipy.isinf(diags_sqrt)] = 0 DH = np.diag(diags_sqrt) L = np.dot(DH, np.dot(L, DH)) eigenvalues, eigenvectors = scipy.linalg.eigh(L) index = np.argsort(eigenvalues)[1:dim + 1] pos = np.real(eigenvectors[:, index]) pos = np.dot(DH, pos) pos = dict(zip(G, pos)) layout = pos else: B = D layout = eig_layout(G, L, B) #print(L) #print(B) return layout
def generate_stats(trueY, forecastY, missing=True): """ From TRMF code """ nz_mask = trueY != 0 diff = forecastY - trueY abs_true = sp.absolute(trueY) abs_diff = sp.absolute(diff) def my_mean(x): tmp = x[sp.isfinite(x)] assert len(tmp) != 0 return tmp.mean() with sp.errstate(divide='ignore'): # rmse rmse = sp.sqrt((diff**2).mean()) # normalized root mean squared error nrmse = sp.sqrt((diff**2).mean()) / abs_true.mean() # baseline abs_baseline = sp.absolute(trueY[1:, :] - trueY[:-1, :]) mase = abs_diff.mean() / abs_baseline.mean() m_mase = my_mean(abs_diff.mean(axis=0) / abs_baseline.mean(axis=0)) mape = my_mean(sp.divide(abs_diff, abs_true, where=nz_mask)) return mape, mase, rmse
def predict_proba(self, X): """ Predict the membership probabilities for the data samples in X using trained model. Parameters ---------- X : array-like, shape (n_samples, n_features) List of n_features-dimensional data points. Each row corresponds to a single data point. Returns ------- proba : array, shape (n_samples, n_clusters) """ X = check_array(X, copy=False, order='C', dtype=sp.float64) K = self.score_samples(X) T = sp.empty_like(K) # Compute the Loglikelhood K *= (0.5) # Compute the posterior with sp.errstate(over='ignore'): for c in xrange(self.C): T[:, c] = 1 / sp.exp(K-K[:, c][:, sp.newaxis]).sum(axis=1) return T
def e_step(self, X): """Compute the e-step of the algorithm Parameters ---------- X : array-like, shape (n_samples, n_dimensions) List of n_features-dimensional data points. Each row corresponds to a single data point. Returns ------- """ # Get some parameters n = X.shape[0] # Compute the membership function K = self.score_samples(X) # Compute the Loglikelhood K *= (0.5) Km = K.max(axis=1) Km.shape = (n, 1) # logsumexp trick LL = (sp.log(sp.exp(K-Km).sum(axis=1))[:, sp.newaxis]+Km).sum() # Compute the posterior with sp.errstate(over='ignore'): for c in xrange(self.C): self.T[:, c] = 1 / sp.exp(K-K[:, c][:, sp.newaxis]).sum(axis=1) return LL
def predict_proba(self, X): """ Predict the membership probabilities for the data samples in X using trained model. Parameters ---------- X : array-like, shape (n_samples, n_features) List of n_features-dimensional data points. Each row corresponds to a single data point. Returns ------- proba : array, shape (n_samples, n_clusters) """ X = check_array(X, copy=False, order='C', dtype=sp.float64) K = self.score_samples(X) T = sp.empty_like(K) # Compute the Loglikelhood K *= (0.5) # Compute the posterior with sp.errstate(over='ignore'): for c in xrange(self.C): T[:, c] = 1 / sp.exp(K - K[:, c][:, sp.newaxis]).sum(axis=1) return T
def __regularized_laplacian_matrix(adj_matrix, tau): """ Using ARPACK solver, compute the first K eigen vector. The laplacian is computed using the regularised formula from [2] [2]Kamalika Chaudhuri, Fan Chung, and Alexander Tsiatas 2018. Spectral clustering of graphs with general degrees in the extended planted partition model. L = I - D^-1/2 * A * D ^-1/2 :param adj_matrix: adjacency matrix representation of graph where [m][n] >0 if there is edge and [m][n] = weight :param tau: the regularisation constant :return: the first K eigenvector """ import scipy.sparse # Code inspired from nx.normalized_laplacian_matrix, with changes to allow regularisation n, m = adj_matrix.shape I = np.eye(n, m) diags = adj_matrix.sum(axis=1).flatten() # add tau to the diags to produce a regularised diags if tau != 0: diags = np.add(diags, tau) # diags will be zero at points where there is no edge and/or the node you are at # ignore the error and make it zero later with scipy.errstate(divide="ignore"): diags_sqrt = 1.0 / scipy.sqrt(diags) diags_sqrt[scipy.isinf(diags_sqrt)] = 0 D = scipy.sparse.spdiags(diags_sqrt, [0], m, n, format="csr") L = I - (D.dot(adj_matrix.dot(D))) return L
def loglike(self, x, T=None): """ Compute the log likelyhood given a set of samples. :param x: The sample matrix, is of size x \times d where n is the number of samples and d is the number of variables """ flag = False ## Get some parameters n = x.shape[0] ## Compute the membership function K = self.predict(x, out='ki') ## Compute the Loglikelhood K *= (-0.5) Km = K.max(axis=1).reshape(n, 1) LL = (sp.log(sp.exp(K - Km).sum(axis=1)).reshape(n, 1) + Km).sum() # logsumexp trick ## Compute the posterior if T is None: flag = True T = sp.empty_like(K) with sp.errstate(over='ignore'): for i in xrange(K.shape[1]): T[:, i] = 1 / sp.exp(K - K[:, i][:, sp.newaxis]).sum(axis=1) if flag: return LL, T else: return LL
def clean_p_values(counts, lambdas): with scipy.errstate(divide='ignore'): p_values = poisson.logsf(counts, lambdas) p_values /= -baseEtoTen p_values[counts == 0] = 0 p_values[np.isinf(p_values)] = 1000 return p_values
def get_DH(A): # D^{-1/2} diags = A.sum(axis=1).flatten() C, R = A.shape with scipy.errstate(divide='ignore'): diag_s = 1.0 / scipy.sqrt(diags) diag_s[scipy.isinf(diag_s)] = 0 DH = scipy.sparse.spdiags(diag_s, [0], C, R, format='csr') return DH
def normalized_laplacian(W): n, m = W.shape diag = np.diag(W.sum(axis=0)) with sc.errstate(divide='ignore'): diags_sqrt = 1.0 / sc.sqrt(diag) diags_sqrt[sc.isinf(diags_sqrt)] = 0 DH = sp.spdiags(diags_sqrt, [0], m, n, format='csr') DH=DH.toarray() L=DH.dot(L.dot(DH)) return L
def make_matrices(self, G): self.adj_matrix = nx.to_scipy_sparse_matrix( G, nodelist=self.pool_of_nodes) n, m = self.adj_matrix.shape D = self.adj_matrix.sum(axis=1).flatten() D = scipy.sparse.spdiags(D, [0], n, m, format='csr') self.laplacian = csr_matrix(D - self.adj_matrix) with scipy.errstate(divide='ignore', invalid='ignore'): DI = spdiags(1.0 / scipy.array(self.adj_matrix.sum(axis=1).flat), [0], n, m) self.normed_adj_matrix = DI * self.adj_matrix
def _normalize_diffusion_matrix(A): n, m = A.shape A_with_selfloop = A diags = A_with_selfloop.sum(axis=1).flatten() with scipy.errstate(divide='ignore'): diags_sqrt = 1.0 / scipy.sqrt(diags) diags_sqrt[scipy.isinf(diags_sqrt)] = 0 DH = sp.spdiags(diags_sqrt, [0], m, n, format='csc') d = DH.dot(A_with_selfloop.dot(DH)) return d
def calibrateTempCurve(self): self.resistances = numpy.array([2.46, 0.318]) self.temperatures = numpy.array([20, 80]) with scipy.errstate(divide='ignore'): slope, intercept, r_value, p_value, std_err = \ stats.linregress(self.temperatures, self.resistances) self.t = Symbol('t') self.a = Symbol('a') self.calibrationcurve = solve((intercept + slope*self.t) / (intercept + slope*self.t + 2.49) * 1024 - self.a, self.t)
def cocitation_modularity(partition, adjacency_matrix, resolution=1.0): """ Compute the modularity of a node partition of a cocitation graph. Parameters ---------- partition: dict The partition of the nodes. The keys of the dictionary correspond to the nodes and the values to the communities. adjacency_matrix: scipy.csr_matrix or np.ndarray The adjacency matrix of the graph (sparse or dense). resolution: double, optional The resolution parameter in the modularity function (default=1.). Returns ------- modularity : float The modularity. """ if type(adjacency_matrix) == sparse.csr_matrix: adj_matrix = adjacency_matrix elif type(adjacency_matrix) == np.ndarray: adj_matrix = sparse.csr_matrix(adjacency_matrix) else: raise TypeError( "The argument should be a NumPy array or a SciPy Compressed Sparse Row matrix." ) n_nodes = adj_matrix.shape[0] out_degree = np.array(adj_matrix.sum(axis=1).flatten()) in_degree = adj_matrix.sum(axis=0).flatten() total_weight = out_degree.sum() with errstate(divide='ignore'): in_degree_sqrt = 1.0 / sqrt(in_degree) in_degree_sqrt[isinf(in_degree_sqrt)] = 0 in_degree_sqrt = sparse.spdiags(in_degree_sqrt, [0], adj_matrix.shape[1], adj_matrix.shape[1], format='csr') normalized_adjacency = (adj_matrix.dot(in_degree_sqrt)).T communities = lab2com(partition) mod = 0. for community in communities: indicator_vector = np.zeros(n_nodes) indicator_vector[list(community)] = 1 mod += np.linalg.norm(normalized_adjacency.dot(indicator_vector))**2 mod -= (resolution / total_weight) * (np.dot(out_degree, indicator_vector))**2 return float(mod / total_weight)
def learn_embeddings(self): n, m = self.adj_matrix.shape diags = self.adj_matrix.sum(axis=1).flatten() D = sparse.spdiags(diags, [0], m, n, format='csr') L = D - self.adj_matrix with scipy.errstate(divide='ignore'): diags_sqrt = 1.0 / scipy.sqrt(diags) diags_sqrt[scipy.isinf(diags_sqrt)] = 0 DH = sparse.spdiags(diags_sqrt, [0], m, n, format='csr') laplacian = DH.dot(L.dot(DH)) _, v = sparse.linalg.eigs(laplacian, k=self.dim + 1, which='SM') embeddings = v[:, 1:].real return embeddings
def train(self, train_graph, S=None): """ This function trains a figrl model. It returns the trained figrl model and a pandas datarame containing the embeddings generated for the train nodes. Parameters ---------- train_graph : NetworkX Object The graph on which the training step is done on. S : numpy randn matrix of size #of nodes in train_graph """ A = nx.adjacency_matrix(train_graph) n, m = A.shape diags = A.sum(axis=1).flatten() D = scipy.sparse.spdiags(diags, [0], n, n, format='csr') with scipy.errstate(divide='ignore'): diags_sqrt = 1.0 / np.lib.scimath.sqrt(diags) diags_sqrt[np.isinf(diags_sqrt)] = 0 DH = scipy.sparse.spdiags(diags_sqrt, [0], n, n, format='csr') Normalized_random_walk = DH.dot(A.dot(DH)) if S is None: S = np.random.randn(n, self.intermediate_dimension) / np.sqrt( self.intermediate_dimension) np.savetxt("S_train_matrix.csv", S, delimiter=",") #S = np.array(pd.read_csv('S_train_matrix.csv', header=None)) C = Normalized_random_walk.dot(S) from scipy import sparse sC = sparse.csr_matrix(C) U, self.sigma, self.V = scipy.sparse.linalg.svds(sC, k=self.embedding_size, tol=0, which='LM') self.V = self.V.transpose() self.sigma = np.diag(self.sigma) figrl_train_emb = pd.DataFrame(U) figrl_train_emb = figrl_train_emb.set_index(figrl_train_emb.index) self.sigma = np.array(self.sigma) self.V = np.array(self.V) self.St = np.array(S) return figrl_train_emb
def fit(self, adjacency_matrix): """Fits the model from data in adjacency_matrix Parameters ---------- adjacency_matrix : Scipy csr matrix or numpy ndarray Adjacency matrix of the graph node_weights : {'uniform', 'degree', array of length n_nodes with positive entries} Node weights """ if type(adjacency_matrix) == sparse.csr_matrix: adj_matrix = adjacency_matrix elif sparse.isspmatrix(adjacency_matrix) or type(adjacency_matrix) == np.ndarray: adj_matrix = sparse.csr_matrix(adjacency_matrix) else: raise TypeError( "The argument must be a NumPy array or a SciPy Sparse matrix.") n_nodes, m_nodes = adj_matrix.shape if n_nodes != m_nodes: raise ValueError("The adjacency matrix must be a square matrix.") #if csgraph.connected_components(adj_matrix, directed=False)[0] > 1: #raise ValueError("The graph must be connected.") if (adj_matrix != adj_matrix.maximum(adj_matrix.T)).nnz != 0: raise ValueError("The adjacency matrix is not symmetric.") # builds standard laplacian degrees = adj_matrix.dot(np.ones(n_nodes)) degree_matrix = sparse.diags(degrees, format='csr') laplacian = degree_matrix - adj_matrix # applies normalization by node weights with errstate(divide='ignore'): degrees_inv_sqrt = 1.0 / sqrt(degrees) degrees_inv_sqrt[isinf(degrees_inv_sqrt)] = 0 weight_matrix = sparse.diags(degrees_inv_sqrt, format='csr') laplacian = weight_matrix.dot(laplacian.dot(weight_matrix)) # spectral decomposition eigenvalues, eigenvectors = eigsh(laplacian, min(self.embedding_dimension + 1, n_nodes - 1), which='SM') self.eigenvalues_ = eigenvalues[1:] self.embedding_ = np.array(weight_matrix.dot(eigenvectors[:, 1:])) return self
def fit(self, train_graph, S=None): """This function trains a figrl model. It returns the trained figrl model and a pandas datarame containing the embeddings generated for the train nodes. ---------- train_graph : NetworkX Object The graph on which the training step is done on, containing only the seen training nodes. S : ndarray, shape (number of training nodes, intermediate dimension) A random matrix used to create the normalized random walk matrix; if non the fit definition creates a new one Returns ------- figrl_train_emb : pandas Dataframe The embeddings created during the training step for the training nodes. """ A = nx.adjacency_matrix(train_graph) n,m = A.shape diags = A.sum(axis=1).flatten() with scipy.errstate(divide='ignore'): diags_sqrt = 1.0/np.lib.scimath.sqrt(diags) diags_sqrt[np.isinf(diags_sqrt)] = 0 DH = scipy.sparse.spdiags(diags_sqrt, [0], n, n, format='csr') Normalized_random_walk = DH.dot(A.dot(DH)) if S is None: S = np.random.randn(n, self.intermediate_dimension) / np.sqrt(self.intermediate_dimension) #np.savetxt("S_train_matrix.csv", S, delimiter=",") C = Normalized_random_walk.dot(S) from scipy import sparse sC = sparse.csr_matrix(C) U, self.sigma, self.V = scipy.sparse.linalg.svds(sC, k=self.embedding_size, tol=0,which='LM') self.V = self.V.transpose() self.sigma = np.diag(self.sigma) figrl_train_emb = pd.DataFrame(U) figrl_train_emb = figrl_train_emb.set_index(figrl_train_emb.index) self.sigma = np.array(self.sigma) self.V = np.array(self.V) self.St = np.array(S) return figrl_train_emb
def __init__(self, layer_multiplexes, bipartite_files=None): #TODO check and verify inputs self.multiplexes = layer_multiplexes print("\nGenerating bipartite matrix...") if (len(layer_multiplexes.keys()) == 1 or bipartite_files is None): k = list(layer_multiplexes.keys()) self.total_nodes = layer_multiplexes[k[0]].num_nodes self.num_layers = 1 self.pool_of_nodes = list(layer_multiplexes[k[0]].pool_of_nodes) self.supra_adjacency_matrix = layer_multiplexes[ k[0]].layers[0].adj_matrix self.normed_supra_adjacency_matrix = layer_multiplexes[ k[0]].layers[0].normed_adj_matrix self.supra_transition_matrix = layer_multiplexes[ k[0]].layers[0].normed_adj_matrix else: self.bipartite_matrix = dict.fromkeys(bipartite_files, None) self.bipartite_G = dict.fromkeys(bipartite_files, None) for key, value in bipartite_files.items(): if (len(value.columns)) == 2: value.columns = ["source", "target"] value['weight'] = [1.0] * value.shape[0] elif (len(value.columns)) == 3: value.columns = ["source", "target", "weight"] bipartite_rel = value self.bipartite_G[key], self.bipartite_matrix[ key] = self.get_bipartite_graph( layer_multiplexes[key.split("-")[0]], layer_multiplexes[key.split("-")[1]], bipartite_rel) print("Expanding bipartite matrix to fit the multiplex network...") self.supra_adjacency_matrix = self.compute_adjacency_matrix( self.multiplexes, self.bipartite_matrix) with scipy.errstate(divide='ignore', invalid='ignore'): DI = spdiags( 1.0 / scipy.array(self.supra_adjacency_matrix.sum(axis=1).flat), [0], self.total_nodes, self.total_nodes) self.normed_supra_adjacency_matrix = DI * self.supra_adjacency_matrix self.supra_transition_matrix = self.compute_transition_matrix()
def normalized_laplacian_matrix(G, nodelist=None, weight='weight'): r"""Return the normalized Laplacian matrix of G. The normalized graph Laplacian is the matrix .. math:: N = D^{-1/2} L D^{-1/2} where `L` is the graph Laplacian and `D` is the diagonal matrix of node degrees. Parameters ---------- G : graph A NetworkX graph nodelist : list, optional The rows and columns are ordered according to the nodes in nodelist. If nodelist is None, then the ordering is produced by G.nodes(). weight : string or None, optional (default='weight') The edge data key used to compute each value in the matrix. If None, then each edge has weight 1. Returns ------- N : NumPy matrix The normalized Laplacian matrix of G. Notes ----- For MultiGraph/MultiDiGraph, the edges weights are summed. See to_numpy_matrix for other options. If the Graph contains selfloops, D is defined as diag(sum(A,1)), where A is the adjacency matrix [2]_. See Also -------- laplacian_matrix References ---------- .. [1] Fan Chung-Graham, Spectral Graph Theory, CBMS Regional Conference Series in Mathematics, Number 92, 1997. .. [2] Steve Butler, Interlacing For Weighted Graphs Using The Normalized Laplacian, Electronic Journal of Linear Algebra, Volume 16, pp. 90-98, March 2007. """ import scipy import scipy.sparse if nodelist is None: nodelist = list(G) A = nx.to_scipy_sparse_matrix(G, nodelist=nodelist, weight=weight, format='csr') n, m = A.shape diags = A.sum(axis=1).flatten() D = scipy.sparse.spdiags(diags, [0], m, n, format='csr') L = D - A with scipy.errstate(divide='ignore'): diags_sqrt = 1.0 / scipy.sqrt(diags) diags_sqrt[scipy.isinf(diags_sqrt)] = 0 DH = scipy.sparse.spdiags(diags_sqrt, [0], m, n, format='csr') return DH.dot(L.dot(DH))
import networkx as nx import scipy G = nx.read_edgelist('/home/rafael/googledrive/DOC/data/figrl/edgelist') A = nx.adjacency_matrix(G) A[1, 1] dim = 10 n, m = A.shape diags = A.sum(axis=1).flatten() D = scipy.sparse.spdiags(diags, [0], m, n, format='csr') #L = D - A with scipy.errstate(divide='ignore'): diags_sqrt = 1.0 / scipy.sqrt(diags) diags_sqrt[scipy.isinf(diags_sqrt)] = 0 DH = scipy.sparse.spdiags(diags_sqrt, [0], m, n, format='csr') Normalized_random_walk = DH.dot(A.dot(DH)) S = np.random.randn(n, dim) / np.sqrt(dim) C = Normalized_random_walk.dot(S) C.shape np.linalg.svd(C) scipy.linalg.svd(C, lapack_driver='gesvd')
def compute_transition_matrix(self): total_edges = 0 if compute_weights: for key in self.multiplexes.keys(): total_edges += self.multiplexes[key].layers[0].edge_df.shape[0] for key in self.multiplexes.keys(): delta[key] = round( self.multiplexes[key].layers[0].edge_df.shape[0] / total_edges, 4) L = len(self.multiplexes.keys()) self.supra_transition_matrix = self.supra_adjacency_matrix sort_keys = sorted(self.multiplexes.keys()) if (len(sort_keys)) == 1: print("No bipartite relation possible!!") exit(-2) elif (len(sort_keys)) == 2: k1 = sort_keys[0] n1 = self.node_num_dict[k1] k2 = sort_keys[1] n2 = self.node_num_dict[k2] self.supra_transition_matrix[ 0:n1, 0:n1] = delta[k1] * self.supra_transition_matrix[0:n1, 0:n1] if k1 + "-" + k2 in self.bipartite_matrix: if self.bipartite_matrix[k1 + "-" + k2] is not None: self.supra_transition_matrix[ 0:n1, n1:n1 + n2] = (1.0 - delta[k1]) / ( L - 1) * 1.0 * self.supra_transition_matrix[0:n1, n1:n1 + n2] if k2 + "-" + k1 in self.bipartite_matrix: if self.bipartite_matrix[k2 + "-" + k1] is not None: self.supra_transition_matrix[n1:n1 + n2, 0:n1] = ( 1.0 - delta[k2] ) / (L - 1) * 1.0 * self.supra_transition_matrix[n1:n1 + n2, 0:n1] self.supra_transition_matrix[ n1:n1 + n2, n1:n1 + n2] = delta[k2] * self.supra_transition_matrix[n1:n1 + n2, n1:n1 + n2] elif (len(sort_keys)) == 3: k1 = sort_keys[0] n1 = self.node_num_dict[k1] k2 = sort_keys[1] n2 = self.node_num_dict[k2] k3 = sort_keys[2] n3 = self.node_num_dict[k3] self.supra_transition_matrix[ 0:n1, 0:n1] = delta[k1] * self.supra_transition_matrix[0:n1, 0:n1] if k1 + "-" + k2 in self.bipartite_matrix: if self.bipartite_matrix[k1 + "-" + k2] is not None: self.supra_transition_matrix[ 0:n1, n1:n1 + n2] = (1.0 - delta[k1]) / ( L - 1) * 1.0 * self.supra_transition_matrix[0:n1, n1:n1 + n2] if k1 + "-" + k3 in self.bipartite_matrix: if self.bipartite_matrix[k1 + "-" + k3] is not None: self.supra_transition_matrix[ 0:n1, n1 + n2:n1 + n2 + n3] = (1.0 - delta[k1]) / ( L - 1) * 1.0 * self.supra_transition_matrix[ 0:n1, n1 + n2:n1 + n2 + n3] if k2 + "-" + k1 in self.bipartite_matrix: if self.bipartite_matrix[k2 + "-" + k1] is not None: self.supra_transition_matrix[n1:n1 + n2, 0:n1] = ( 1.0 - delta[k2] ) / (L - 1) * 1.0 * self.supra_transition_matrix[n1:n1 + n2, 0:n1] self.supra_transition_matrix[ n1:n1 + n2, n1:n1 + n2] = delta[k2] * self.supra_transition_matrix[n1:n1 + n2, n1:n1 + n2] if k2 + "-" + k3 in self.bipartite_matrix: if self.bipartite_matrix[k2 + "-" + k3] is not None: self.supra_transition_matrix[ n1:n1 + n2, n1 + n2:n1 + n2 + n3] = (1.0 - delta[k2]) / ( L - 1) * 1.0 * self.supra_transition_matrix[ n1:n1 + n2, n1 + n2:n1 + n2 + n3] if k3 + "-" + k1 in self.bipartite_matrix: if self.bipartite_matrix[k3 + "-" + k1] is not None: self.supra_transition_matrix[ n1 + n2:n1 + n2 + n3, 0:n1] = (1.0 - delta[k3]) / ( L - 1) * 1.0 * self.supra_transition_matrix[ n1 + n2:n1 + n2 + n3, 0:n1] if k3 + "-" + k2 in self.bipartite_matrix: if self.bipartite_matrix[k3 + "-" + k2] is not None: self.supra_transition_matrix[ n1 + n2:n1 + n2 + n3, n1:n1 + n2] = (1.0 - delta[k3]) / ( L - 1) * 1.0 * self.supra_transition_matrix[ n1 + n2:n1 + n2 + n3, n1:n1 + n2] self.supra_transition_matrix[ n1 + n2:n1 + n2 + n3, n1 + n2:n1 + n2 + n3] = delta[k3] * self.supra_transition_matrix[n1 + n2:n1 + n2 + n3, n1 + n2:n1 + n2 + n3] else: print("Invalid number of layers!!") exit(-2) with scipy.errstate(divide='ignore', invalid='ignore'): DI = spdiags( 1.0 / scipy.array(self.supra_transition_matrix.sum(axis=1).flat), [0], self.total_nodes, self.total_nodes) self.supra_transition_matrix = DI * self.supra_transition_matrix return (self.supra_transition_matrix)
def preprocess_data(data_home, **kwargs): bucket_size = kwargs.get('bucket', 300) encoding = kwargs.get('encoding', 'utf-8') celebrity_threshold = kwargs.get('celebrity', 10) mindf = kwargs.get('mindf', 10) dtype = kwargs.get('dtype', 'float32') one_hot_label = kwargs.get('onehot', False) vocab_file = os.path.join(data_home, 'vocab.pkl') dump_file = os.path.join(data_home, 'dump.pkl') if os.path.exists(dump_file) and not model_args.builddata: logging.info('loading data from dumped file...') data = load_obj(dump_file) logging.info('loading data finished!') return data dl = DataLoader(data_home=data_home, bucket_size=bucket_size, encoding=encoding, celebrity_threshold=celebrity_threshold, one_hot_labels=one_hot_label, mindf=mindf, token_pattern=r'(?u)(?<![@])#?\b\w\w+\b') dl.load_data() dl.assignClasses() dl.tfidf() vocab = dl.vectorizer.vocabulary_ logging.info('saving vocab in {}'.format(vocab_file)) dump_obj(vocab, vocab_file) logging.info('vocab dumped successfully!') U_test = dl.df_test.index.tolist() U_dev = dl.df_dev.index.tolist() U_train = dl.df_train.index.tolist() dl.get_graph() logging.info('creating adjacency matrix...') adj = nx.adjacency_matrix(dl.graph, nodelist=xrange(len(U_train + U_dev + U_test)), weight='w') adj.setdiag(0) #selfloop_value = np.asarray(adj.sum(axis=1)).reshape(-1,) selfloop_value = 1 adj.setdiag(selfloop_value) n, m = adj.shape diags = adj.sum(axis=1).flatten() with sp.errstate(divide='ignore'): diags_sqrt = 1.0 / sp.sqrt(diags) diags_sqrt[sp.isinf(diags_sqrt)] = 0 D_pow_neghalf = sp.sparse.spdiags(diags_sqrt, [0], m, n, format='csr') A = D_pow_neghalf * adj * D_pow_neghalf A = A.astype(dtype) logging.info('adjacency matrix created.') X_train = dl.X_train X_dev = dl.X_dev X_test = dl.X_test Y_test = dl.test_classes Y_train = dl.train_classes Y_dev = dl.dev_classes classLatMedian = { str(c): dl.cluster_median[c][0] for c in dl.cluster_median } classLonMedian = { str(c): dl.cluster_median[c][1] for c in dl.cluster_median } P_test = [ str(a[0]) + ',' + str(a[1]) for a in dl.df_test[['lat', 'lon']].values.tolist() ] P_train = [ str(a[0]) + ',' + str(a[1]) for a in dl.df_train[['lat', 'lon']].values.tolist() ] P_dev = [ str(a[0]) + ',' + str(a[1]) for a in dl.df_dev[['lat', 'lon']].values.tolist() ] userLocation = {} for i, u in enumerate(U_train): userLocation[u] = P_train[i] for i, u in enumerate(U_test): userLocation[u] = P_test[i] for i, u in enumerate(U_dev): userLocation[u] = P_dev[i] data = (A, X_train, Y_train, X_dev, Y_dev, X_test, Y_test, U_train, U_dev, U_test, classLatMedian, classLonMedian, userLocation) if not model_args.builddata: logging.info('dumping data in {} ...'.format(str(dump_file))) dump_obj(data, dump_file) logging.info('data dump finished!') return data
def main2(data_home, **kwargs): bucket_size = kwargs.get('bucket', 300) batch_size = kwargs.get('batch', 500) hidden_size = kwargs.get('hidden', 500) encoding = kwargs.get('encoding', 'utf-8') regul = kwargs.get('regularization', 1e-6) celebrity_threshold = kwargs.get('celebrity', 10) convolution = kwargs.get('conv', False) dl = DataLoader(data_home=data_home, bucket_size=bucket_size, encoding=encoding, celebrity_threshold=celebrity_threshold) dl.load_data() dl.get_graph() dl.assignClasses() dl.tfidf() U_test = dl.df_test.index.tolist() U_dev = dl.df_dev.index.tolist() U_train = dl.df_train.index.tolist() if convolution: logging.info('creating adjacency matrix...') adj = nx.adjacency_matrix(dl.graph, nodelist=xrange(len(U_train + U_dev + U_test)), weight='w') #adj[adj > 0] = 1 adj.setdiag(1) n, m = adj.shape diags = adj.sum(axis=1).flatten() with sp.errstate(divide='ignore'): diags_sqrt = 1.0 / sp.sqrt(diags) diags_sqrt[sp.isinf(diags_sqrt)] = 0 D_pow_neghalf = sp.sparse.spdiags(diags_sqrt, [0], m, n, format='csr') H = D_pow_neghalf * adj * D_pow_neghalf #logging.info('normalizing adjacency matrix...') #normalize(adj, axis=1, norm='l1', copy=False) #adj = adj.astype('float32') logging.info('vstacking...') X = sp.sparse.vstack([dl.X_train, dl.X_dev, dl.X_test]) logging.info('convolution...') X_conv = H * X X_conv = X_conv.tocsr().astype('float32') X_train = X_conv[0:dl.X_train.shape[0], :] X_dev = X_conv[dl.X_train.shape[0]:dl.X_train.shape[0] + dl.X_dev.shape[0], :] X_test = X_conv[dl.X_train.shape[0] + dl.X_dev.shape[0]:, :] else: X_train = dl.X_train X_dev = dl.X_dev X_test = dl.X_test Y_test = dl.test_classes Y_train = dl.train_classes Y_dev = dl.dev_classes classLatMedian = { str(c): dl.cluster_median[c][0] for c in dl.cluster_median } classLonMedian = { str(c): dl.cluster_median[c][1] for c in dl.cluster_median } P_test = [ str(a[0]) + ',' + str(a[1]) for a in dl.df_test[['lat', 'lon']].values.tolist() ] P_train = [ str(a[0]) + ',' + str(a[1]) for a in dl.df_train[['lat', 'lon']].values.tolist() ] P_dev = [ str(a[0]) + ',' + str(a[1]) for a in dl.df_dev[['lat', 'lon']].values.tolist() ] userLocation = {} for i, u in enumerate(U_train): userLocation[u] = P_train[i] for i, u in enumerate(U_test): userLocation[u] = P_test[i] for i, u in enumerate(U_dev): userLocation[u] = P_dev[i] clf = MLP(n_epochs=200, batch_size=batch_size, init_parameters=None, complete_prob=False, add_hidden=True, regul_coefs=[regul, regul], save_results=False, hidden_layer_size=hidden_size, drop_out=True, drop_out_coefs=[0.5, 0.5], early_stopping_max_down=10, loss_name='log', nonlinearity='rectify') clf.fit(X_train, Y_train, X_dev, Y_dev) print('Test classification accuracy is %f' % clf.accuracy(X_test, Y_test)) y_pred = clf.predict(X_test) geo_eval(Y_test, y_pred, U_test, classLatMedian, classLonMedian, userLocation) print('Dev classification accuracy is %f' % clf.accuracy(X_dev, Y_dev)) y_pred = clf.predict(X_dev) mean, median, acc161 = geo_eval(Y_dev, y_pred, U_dev, classLatMedian, classLonMedian, userLocation) return mean, median, acc161
def _updateInternals(self): """Update internal attributes related to likelihood. Should be called any time branch lengths or model parameters are changed. """ rootnode = self.nnodes - 1 if self._distributionmodel: catweights = self.model.catweights else: catweights = scipy.ones(1, dtype='float') # When there are multiple categories, it is acceptable # for some (but not all) of them to have underflow at # any given site. Note that we still include a check for # Underflow by ensuring that none of the site likelihoods is # zero. undererrstate = 'ignore' if len(catweights) > 1 else 'raise' with scipy.errstate(over='raise', under=undererrstate, divide='raise', invalid='raise'): self.underflowlogscale.fill(0.0) self._computePartialLikelihoods() sitelik = scipy.zeros(self.nsites, dtype='float') assert (self.L[rootnode] >= 0).all(), str(self.L[rootnode]) for k in self._catindices: sitelik += scipy.sum( self._stationarystate(k) * self.L[rootnode][k], axis=1) * catweights[k] assert (sitelik > 0).all(), "Underflow:\n{0}\n{1}".format( sitelik, self.underflowlogscale) self.siteloglik = scipy.log(sitelik) + self.underflowlogscale self.loglik = scipy.sum(self.siteloglik) + self.model.logprior if self.dparamscurrent: self._dloglik = {} for param in self.model.freeparams: if self._distributionmodel and ( param in self.model.distributionparams): name = self.model.distributedparam weighted_dk = (self.model.d_distributionparams[param] * catweights) else: name = param weighted_dk = catweights dsiteloglik = 0 for k in self._catindices: dsiteloglik += (scipy.sum( self._dstationarystate(k, name) * self.L[rootnode][k] + self.dL[name][rootnode][k] * self._stationarystate(k), axis=-1) * weighted_dk[k]) dsiteloglik /= sitelik self._dloglik[param] = (scipy.sum(dsiteloglik, axis=-1) + self.model.dlogprior(param)) if self.dtcurrent: self._dloglik_dt = 0 dLnroot_dt = scipy.array([ self.dL_dt[n2][rootnode] for n2 in sorted(self.dL_dt.keys()) ]) for k in self._catindices: if isinstance(k, int): dLnrootk_dt = dLnroot_dt.swapaxes(0, 1)[k] else: assert k == slice(None) dLnrootk_dt = dLnroot_dt self._dloglik_dt += catweights[k] * scipy.sum( self._stationarystate(k) * dLnrootk_dt, axis=-1) self._dloglik_dt /= sitelik self._dloglik_dt = scipy.sum(self._dloglik_dt, axis=-1) assert self._dloglik_dt.shape == self.t.shape
def normalized_laplacian_matrix(G, nodelist=None, weight='weight'): r"""Returns the normalized Laplacian matrix of G. The normalized graph Laplacian is the matrix .. math:: N = D^{-1/2} L D^{-1/2} where `L` is the graph Laplacian and `D` is the diagonal matrix of node degrees. Parameters ---------- G : graph A NetworkX graph nodelist : list, optional The rows and columns are ordered according to the nodes in nodelist. If nodelist is None, then the ordering is produced by G.nodes(). weight : string or None, optional (default='weight') The edge data key used to compute each value in the matrix. If None, then each edge has weight 1. Returns ------- N : NumPy matrix The normalized Laplacian matrix of G. Notes ----- For MultiGraph/MultiDiGraph, the edges weights are summed. See to_numpy_matrix for other options. If the Graph contains selfloops, D is defined as diag(sum(A,1)), where A is the adjacency matrix [2]_. See Also -------- laplacian_matrix normalized_laplacian_spectrum References ---------- .. [1] Fan Chung-Graham, Spectral Graph Theory, CBMS Regional Conference Series in Mathematics, Number 92, 1997. .. [2] Steve Butler, Interlacing For Weighted Graphs Using The Normalized Laplacian, Electronic Journal of Linear Algebra, Volume 16, pp. 90-98, March 2007. """ import scipy import scipy.sparse if nodelist is None: nodelist = list(G) A = nx.to_scipy_sparse_matrix(G, nodelist=nodelist, weight=weight, format='csr') n, m = A.shape diags = A.sum(axis=1).flatten() D = scipy.sparse.spdiags(diags, [0], m, n, format='csr') L = D - A with scipy.errstate(divide='ignore'): diags_sqrt = 1.0 / scipy.sqrt(diags) diags_sqrt[scipy.isinf(diags_sqrt)] = 0 DH = scipy.sparse.spdiags(diags_sqrt, [0], m, n, format='csr') return DH.dot(L.dot(DH))
def fit(self, adjacency_matrix, tol=1e-6, n_iter='auto', power_iteration_normalizer='auto', random_state=None): """Fits the model from data in adjacency_matrix. Parameters ---------- adjacency_matrix: array-like, shape = (n, m) Adjacency matrix, where n = m = |V| for a standard graph, n = |V1|, m = |V2| for a bipartite graph. tol: float, optional Tolerance for pseudo-inverse of singular values (default=1e-6). n_iter: int or 'auto' (default is 'auto') Number of power iterations. It can be used to deal with very noisy problems. When 'auto', it is set to 4, unless `n_components` is small (< .1 * min(X.shape)) `n_iter` in which case is set to 7. This improves precision with few components. power_iteration_normalizer: 'auto' (default), 'QR', 'LU', 'none' Whether the power iterations are normalized with step-by-step QR factorization (the slowest but most accurate), 'none' (the fastest but numerically unstable when `n_iter` is large, e.g. typically 5 or larger), or 'LU' factorization (numerically stable but can lose slightly in accuracy). The 'auto' mode applies no normalization if `n_iter`<=2 and switches to LU otherwise. random_state: int, RandomState instance or None, optional (default=None) The seed of the pseudo random number generator to use when shuffling the data. If int, random_state is the seed used by the random number generator; If RandomState instance, random_state is the random number generator; If None, the random number generator is the RandomState instance used by `np.random`. Returns ------- self """ if type(adjacency_matrix) == sparse.csr_matrix: adj_matrix = adjacency_matrix elif type(adjacency_matrix) == np.ndarray: adj_matrix = sparse.csr_matrix(adjacency_matrix) else: raise TypeError( "The argument should be a NumPy array or a SciPy Compressed Sparse Row matrix.") n_nodes, m_nodes = adj_matrix.shape # out-degree vector dou = adj_matrix.sum(axis=1).flatten() # in-degree vector din = adj_matrix.sum(axis=0).flatten() with errstate(divide='ignore'): dou_sqrt = 1.0 / sqrt(dou) din_sqrt = 1.0 / sqrt(din) dou_sqrt[isinf(dou_sqrt)] = 0 din_sqrt[isinf(din_sqrt)] = 0 # pseudo inverse square-root out-degree matrix dhou = sparse.spdiags(dou_sqrt, [0], n_nodes, n_nodes, format='csr') # pseudo inverse square-root in-degree matrix dhin = sparse.spdiags(din_sqrt, [0], m_nodes, m_nodes, format='csr') laplacian = dhou.dot(adj_matrix.dot(dhin)) u, sigma, vt = randomized_svd(laplacian, self.n_components, n_iter=n_iter, power_iteration_normalizer=power_iteration_normalizer, random_state=random_state) self.singular_values_ = sigma gamma = 1 - sigma ** 2 gamma_sqrt = np.diag(np.piecewise(gamma, [gamma > tol, gamma <= tol], [lambda x: 1 / np.sqrt(x), 0])) self.embedding_ = dhou.dot(u).dot(gamma_sqrt) self.backward_embedding_ = dhin.dot(vt.T).dot(gamma_sqrt) return self
def MultiRank_Nodes_Layers(H, alpha, gamma, s, a): v_quadratic_error = 0.001 z = np.ones(H.num_layers, ) g = H.supra_adjacency_matrix #g = lil_matrix((H.total_nodes, H.total_nodes), dtype=np.float) # Without bipartite connection sort_keys = sorted(H.multiplexes.keys()) if (len(sort_keys)) == 2: k1 = sort_keys[0] n1 = H.node_num_dict[k1] k2 = sort_keys[1] n2 = H.node_num_dict[k2] g[0:n1, 0:n1] = H.supra_adjacency_matrix[0:n1, 0:n1].T * z[0] g[n1:n1 + n2, n1:n1 + n2] = H.supra_adjacency_matrix[n1:n1 + n2, n1:n1 + n2].T * z[1] elif (len(sort_keys)) == 3: k1 = sort_keys[0] n1 = H.node_num_dict[k1] k2 = sort_keys[1] n2 = H.node_num_dict[k2] k3 = sort_keys[2] n3 = H.node_num_dict[k3] g[0:n1, 0:n1] = H.supra_adjacency_matrix[0:n1, 0:n1].T * z[0] g[n1:n1 + n2, n1:n1 + n2] = H.supra_adjacency_matrix[n1:n1 + n2, n1:n1 + n2].T * z[1] g[n1 + n2:n1 + n2 + n3, n1 + n2:n1 + n2 + n3] = H.supra_adjacency_matrix[n1 + n2:n1 + n2 + n3, n1 + n2:n1 + n2 + n3].T * z[2] B_in = lil_matrix((H.num_layers, H.total_nodes), dtype=np.float) W = np.zeros(H.num_layers) if (len(sort_keys)) == 2: k1 = sort_keys[0] n1 = H.node_num_dict[k1] k2 = sort_keys[1] n2 = H.node_num_dict[k2] W[0] = H.multiplexes[k1].layers[0].adj_matrix.sum() W[1] = H.multiplexes[k2].layers[0].adj_matrix.sum() if k1 + "-" + k2 in H.bipartite_matrix: if H.bipartite_matrix[k1 + "-" + k2] is not None: tmp = H.bipartite_matrix[k1 + "-" + k2].T.sum(axis=0).ravel() with scipy.errstate(divide='ignore', invalid='ignore'): n_tmp = np.array(tmp / W[0]) n_tmp[n_tmp == np.inf] = 0 n_tmp[np.where(np.isnan(n_tmp))] = 0 B_in[0:1, 0:n1] = lil_matrix(n_tmp) if k2 + "-" + k1 in H.bipartite_matrix: if H.bipartite_matrix[k2 + "-" + k1] is not None: tmp = H.bipartite_matrix[k2 + "-" + k1].T.sum(axis=0).ravel() with scipy.errstate(divide='ignore', invalid='ignore'): n_tmp = np.array(tmp / W[1]) n_tmp[n_tmp == np.inf] = 0 n_tmp[np.where(np.isnan(n_tmp))] = 0 B_in[1:, n1:] = lil_matrix(n_tmp) elif (len(sort_keys)) == 3: k1 = sort_keys[0] n1 = H.node_num_dict[k1] k2 = sort_keys[1] n2 = H.node_num_dict[k2] k3 = sort_keys[2] n3 = H.node_num_dict[k3] W[0] = H.multiplexes[k1].layers[0].adj_matrix.sum() W[1] = H.multiplexes[k2].layers[0].adj_matrix.sum() W[2] = H.multiplexes[k3].layers[0].adj_matrix.sum() if k1 + "-" + k2 in H.bipartite_matrix: if H.bipartite_matrix[k1 + "-" + k2] is not None: tmp = H.bipartite_matrix[k1 + "-" + k2].T.sum(axis=0).ravel() B_in[0:1, 0:n1] = tmp if k1 + "-" + k3 in H.bipartite_matrix: if H.bipartite_matrix[k1 + "-" + k3] is not None: tmp = H.bipartite_matrix[k1 + "-" + k3].T.sum(axis=0).ravel() with scipy.errstate(divide='ignore', invalid='ignore'): n_tmp = B_in[0:1, 0:n1] + tmp n_tmp = np.array(n_tmp / W[0]) n_tmp[n_tmp == np.inf] = 0 n_tmp[np.where(np.isnan(n_tmp))] = 0 B_in[0:1, 0:n1] = lil_matrix(n_tmp) if k2 + "-" + k1 in H.bipartite_matrix: if H.bipartite_matrix[k2 + "-" + k1] is not None: tmp = H.bipartite_matrix[k2 + "-" + k1].T.sum(axis=0).ravel() B_in[1:2, n1:n1 + n2] = tmp if k2 + "-" + k3 in H.bipartite_matrix: if H.bipartite_matrix[k2 + "-" + k3] is not None: tmp = H.bipartite_matrix[k2 + "-" + k3].T.sum(axis=0).ravel() with scipy.errstate(divide='ignore', invalid='ignore'): n_tmp = B_in[1:2, n1:n1 + n2] + tmp n_tmp = np.array(n_tmp / W[1]) n_tmp[n_tmp == np.inf] = 0 n_tmp[np.where(np.isnan(n_tmp))] = 0 B_in[1:2, n1:n1 + n2] = lil_matrix(n_tmp) if k3 + "-" + k1 in H.bipartite_matrix: if H.bipartite_matrix[k3 + "-" + k1] is not None: tmp = H.bipartite_matrix[k3 + "-" + k1].T.sum(axis=0).ravel() B_in[2:, n1 + n2:] = tmp if k3 + "-" + k2 in H.bipartite_matrix: if H.bipartite_matrix[k3 + "-" + k2] is not None: tmp = H.bipartite_matrix[k3 + "-" + k2].T.sum(axis=0).ravel() with scipy.errstate(divide='ignore', invalid='ignore'): n_tmp = B_in[2:, n1 + n2:] + tmp n_tmp = np.array(n_tmp / W[2]) n_tmp[n_tmp == np.inf] = 0 n_tmp[np.where(np.isnan(n_tmp))] = 0 B_in[2:, n1 + n2:] = lil_matrix(n_tmp) D = g.sum(axis=1) D[D < 1.0] = 1.0 with scipy.errstate(divide='ignore', invalid='ignore'): D = spdiags(1.0 / scipy.array(D.flat), [0], H.total_nodes, H.total_nodes) x0 = g.sum(axis=0) + g.sum(axis=1).T x0 = scipy.array(x0) with scipy.errstate(divide='ignore', invalid='ignore'): x0 = x0.T / np.count_nonzero(x0) x0[x0 == np.inf] = 0 x0[np.where(np.isnan(x0))] = 0 x0 = scipy.array(x0) l = scipy.array(g.sum(axis=0)) jump = scipy.array(alpha * l.T) jump = np.divide(jump, jump.sum()) x = x0 x = g.dot(D).dot(np.multiply( x, jump)) + np.multiply(x, 1 - jump).sum(axis=0) * x0 x = np.divide(x, x.sum()) z1 = np.power(B_in.sum(axis=1), a) with np.errstate(divide='ignore'): z2 = B_in.dot(np.power(x, (s * gamma))) z2[z2 == np.inf] = 0 z2[np.where(np.isnan(z2))] = 0 with scipy.errstate(divide='ignore', invalid='ignore'): n_tmp = z2 n_tmp = np.array(n_tmp / B_in.sum(axis=1)) n_tmp[n_tmp == np.inf] = 0 n_tmp[np.where(np.isnan(n_tmp))] = 0 z2 = n_tmp z = np.multiply(z1, (np.power(z2, s))) z = np.divide(z, z.sum()) #normalized = (x - x.min()) / (x.max() - x.min()) count = 0 last_x = np.ones(H.total_nodes, ) * np.inf while (True): last_x = x g = lil_matrix((H.total_nodes, H.total_nodes), dtype=np.float) sort_keys = sorted(H.multiplexes.keys()) n_z = list() for item in z.tolist(): n_z.append(item) z = np.array(n_z) z = z.reshape(H.num_layers, ) if (len(sort_keys)) == 2: k1 = sort_keys[0] n1 = H.node_num_dict[k1] k2 = sort_keys[1] n2 = H.node_num_dict[k2] g[0:n1, 0:n1] = H.supra_adjacency_matrix[0:n1, 0:n1].T * z[0] g[n1:n1 + n2, n1:n1 + n2] = H.supra_adjacency_matrix[n1:n1 + n2, n1:n1 + n2].T * z[1] elif (len(sort_keys)) == 3: k1 = sort_keys[0] n1 = H.node_num_dict[k1] k2 = sort_keys[1] n2 = H.node_num_dict[k2] k3 = sort_keys[2] n3 = H.node_num_dict[k3] g[0:n1, 0:n1] = H.supra_adjacency_matrix[0:n1, 0:n1].T * z[0] g[n1:n1 + n2, n1:n1 + n2] = H.supra_adjacency_matrix[n1:n1 + n2, n1:n1 + n2].T * z[1] g[n1 + n2:n1 + n2 + n3, n1 + n2:n1 + n2 + n3] = H.supra_adjacency_matrix[n1 + n2:n1 + n2 + n3, n1 + n2:n1 + n2 + n3].T * z[2] D = g.sum(axis=1) D[D < 1.0] = 1.0 with scipy.errstate(divide='ignore', invalid='ignore'): D = spdiags(1.0 / scipy.array(D.flat), [0], H.total_nodes, H.total_nodes) x0 = g.sum(axis=0) + g.sum(axis=1).T with scipy.errstate(divide='ignore', invalid='ignore'): x0 = x0.T / np.count_nonzero(x0) x0[x0 == np.inf] = 0 x0[np.where(np.isnan(x0))] = 0 l = scipy.array(g.sum(axis=0)) jump = scipy.array(alpha * l.T) jump = np.divide(jump, jump.sum()) x = g.dot(D).dot(np.multiply(x, jump)) + np.multiply( np.multiply(x, 1 - jump).sum(axis=0), x0) x = np.divide(x, x.sum()) z1 = np.power(B_in.sum(axis=1), a) with np.errstate(divide='ignore'): z2 = B_in.dot(np.power(x, (s * gamma))) z2[z2 == np.inf] = 0 z2[np.where(np.isnan(z2))] = 0 with scipy.errstate(divide='ignore', invalid='ignore'): n_tmp = z2 n_tmp = np.array(n_tmp / B_in.sum(axis=1)) n_tmp[n_tmp == np.inf] = 0 n_tmp[np.where(np.isnan(n_tmp))] = 0 z2 = n_tmp with np.errstate(divide='ignore'): z = np.multiply(z1, (np.power(z2, s))) z[z == np.inf] = 0 z[np.where(np.isnan(z))] = 0 z = np.divide(z, z.sum()) try: normed = norm(x - last_x) except: print(count) break if normed < v_quadratic_error: break elif (count > 100): break count = count + 1 return x, z
def normalized_laplacian_matrix(G, nodelist=None, weight="weight"): r"""Returns the normalized Laplacian matrix of G. The normalized graph Laplacian is the matrix .. math:: N = D^{-1/2} L D^{-1/2} where `L` is the graph Laplacian and `D` is the diagonal matrix of node degrees. Parameters ---------- G : graph A NetworkX graph nodelist : list, optional The rows and columns are ordered according to the nodes in nodelist. If nodelist is None, then the ordering is produced by G.nodes(). weight : string or None, optional (default='weight') The edge data key used to compute each value in the matrix. If None, then each edge has weight 1. Returns ------- N : Scipy sparse matrix The normalized Laplacian matrix of G. Notes ----- For MultiGraph/MultiDiGraph, the edges weights are summed. See to_numpy_array for other options. If the Graph contains selfloops, D is defined as diag(sum(A,1)), where A is the adjacency matrix [2]_. See Also -------- laplacian_matrix normalized_laplacian_spectrum References ---------- .. [1] Fan Chung-Graham, Spectral Graph Theory, CBMS Regional Conference Series in Mathematics, Number 92, 1997. .. [2] Steve Butler, Interlacing For Weighted Graphs Using The Normalized Laplacian, Electronic Journal of Linear Algebra, Volume 16, pp. 90-98, March 2007. """ import numpy as np import scipy as sp import scipy.sparse # call as sp.sparse if nodelist is None: nodelist = list(G) A = nx.to_scipy_sparse_array(G, nodelist=nodelist, weight=weight, format="csr") n, m = A.shape diags = A.sum(axis=1) # TODO: rm csr_array wrapper when spdiags can produce arrays D = sp.sparse.csr_array(sp.sparse.spdiags(diags, 0, m, n, format="csr")) L = D - A with sp.errstate(divide="ignore"): diags_sqrt = 1.0 / np.sqrt(diags) diags_sqrt[np.isinf(diags_sqrt)] = 0 # TODO: rm csr_array wrapper when spdiags can produce arrays DH = sp.sparse.csr_array( sp.sparse.spdiags(diags_sqrt, 0, m, n, format="csr")) import warnings warnings.warn( "normalized_laplacian_matrix will return a scipy.sparse array instead of a matrix in Networkx 3.0.", FutureWarning, stacklevel=2, ) # TODO: rm csr_matrix wrapper for NX 3.0 return sp.sparse.csr_matrix(DH @ (L @ DH))