def _check_symmetric_graph_laplacian(mat, normed, copy=True): if not hasattr(mat, 'shape'): mat = eval(mat, dict(np=np, sparse=sparse)) if sparse.issparse(mat): sp_mat = mat mat = sp_mat.toarray() else: sp_mat = sparse.csr_matrix(mat) mat_copy = np.copy(mat) sp_mat_copy = sparse.csr_matrix(sp_mat, copy=True) n_nodes = mat.shape[0] explicit_laplacian = _explicit_laplacian(mat, normed=normed) laplacian = csgraph.laplacian(mat, normed=normed, copy=copy) sp_laplacian = csgraph.laplacian(sp_mat, normed=normed, copy=copy) if copy: assert_allclose(mat, mat_copy) _assert_allclose_sparse(sp_mat, sp_mat_copy) else: if not (normed and check_int_type(mat)): assert_allclose(laplacian, mat) if sp_mat.format == 'coo': _assert_allclose_sparse(sp_laplacian, sp_mat) assert_allclose(laplacian, sp_laplacian.toarray()) for tested in [laplacian, sp_laplacian.toarray()]: if not normed: assert_allclose(tested.sum(axis=0), np.zeros(n_nodes)) assert_allclose(tested.T, tested) assert_allclose(tested, explicit_laplacian)
def _check_laplacian(A, desired_L, desired_d, normed, use_out_degree): for arr_type in np.array, sparse.csr_matrix, sparse.coo_matrix: for t in int, float, complex: adj = arr_type(A, dtype=t) L = csgraph.laplacian(adj, normed=normed, return_diag=False, use_out_degree=use_out_degree) _assert_allclose_sparse(L, desired_L, atol=1e-12) L, d = csgraph.laplacian(adj, normed=normed, return_diag=True, use_out_degree=use_out_degree) _assert_allclose_sparse(L, desired_L, atol=1e-12) _assert_allclose_sparse(d, desired_d, atol=1e-12)
def compute_laplacian(self, laplacian_type='simple', square=False): am = nx.adjacency_matrix(self.graph) if square: am = am.dot(am) if laplacian_type == 'simple': return laplacian(am).asfptype() elif laplacian_type == 'normalized': return laplacian(am, normed=True).asfptype() raise Exception('Unknown Laplacian type: {}'.format(laplacian_type))
def ler(X, Y, n_components=2, affinity='nearest_neighbors', n_neighbors=None, gamma=None, mu=1.0, y_gamma=None, eigen_solver='auto', tol=1e-6, max_iter=100, random_state=None): if eigen_solver not in ('auto', 'arpack', 'dense'): raise ValueError("unrecognized eigen_solver '%s'" % eigen_solver) nbrs = NearestNeighbors(n_neighbors=n_neighbors + 1) nbrs.fit(X) X = nbrs._fit_X Nx, d_in = X.shape Ny = Y.shape[0] if n_components > d_in: raise ValueError("output dimension must be less than or equal " "to input dimension") if Nx != Ny: raise ValueError("X and Y must have same number of points") if affinity == 'nearest_neighbors': if n_neighbors >= Nx: raise ValueError("n_neighbors must be less than number of points") if n_neighbors == None or n_neighbors <= 0: raise ValueError("n_neighbors must be positive") elif affinity == 'rbf': if gamma != None and gamma <= 0: raise ValueError("n_neighbors must be positive") else: raise ValueError("affinity must be 'nearest_neighbors' or 'rbf' must be positive") if Y.ndim == 1: Y = Y[:, None] if y_gamma is None: dists = pairwise_distances(Y) y_gamma = 1.0 / median(dists) if affinity == 'nearest_neighbors': affinity = kneighbors_graph(X, n_neighbors, include_self=True) else: if gamma == None: dists = pairwise_distances(X) gamma = 1.0 / median(dists) affinity = kneighbors_graph(X, n_neighbors, mode='distance', include_self=True) affinity.data = exp(-gamma * affinity.data ** 2) K = rbf_kernel(Y, gamma=y_gamma) lap = laplacian(affinity, normed=True) lapK = laplacian(K, normed=True) embedding, _ = null_space(lap + mu * lapK, n_components, k_skip=1, eigen_solver=eigen_solver, tol=tol, max_iter=max_iter, random_state=random_state) return embedding
def _isoParition(img_graph, ground=0, algCode='full', verbose=False): """Returns the isoperimetric parition. """ n_comp, dummy = connected_components(img_graph) d = img_graph.sum(axis=1) ground = np.argmax(d) # Get the laplacian on which to calculate the solution based on algCode if algCode == 'full': img_laplacian = csr_matrix(laplacian(img_graph)) elif algCode == 'umst': img_graph_umst = get_umst(img_graph) img_laplacian = csr_matrix(laplacian(img_graph_umst)) elif algCode == 'mst' or 'mstGrady': img_graph_mst = get_mst(img_graph) img_laplacian = csr_matrix(laplacian(img_graph_mst)) else: raise Exception("algCode should be one of {'full', 'umst', 'mst'. 'mstGrady'}") # get the seeded laplacian ind = np.arange(img_graph.shape[0], dtype = np.int32) ind = np.hstack([ind[:ground], ind[(ground+1):]]) # Remove the row and column indicated by ground img_laplacian_seeded = (img_laplacian[ind]).transpose()[ind] # Solve the isoperimetric equation d = np.ones(img_laplacian_seeded.shape[0], dtype=np.float64) if algCode == 'mstGrady': x0 = solve(img_laplacian,ground) x0 = x0[ind] else: x0 = spsolve(img_laplacian_seeded, d) minVal = np.min(x0) if minVal < 0: x0[x0<0] = np.max(x0) + 1 if verbose: print("Error is {:4f}".format(norm(img_laplacian_seeded.dot(x0) - d)/norm(d))) x0 = x0 - np.min(x0) + 1e-6 x0 = x0/np.max(x0) # Normalize to get values between [0,1] # Get the total answer ans = np.zeros(img_graph.shape[0], dtype=np.float64) ans[ind]= x0 # Compute the threshold img_laplacian = csr_matrix(laplacian(img_graph)) part1, part2, val = _compute_isoperimetric_partition(ans, img_laplacian) return part1, part2, val, ans
def spectral_distance(G1, G2): adj1 = nx.to_numpy_array(G1) adj2 = nx.to_numpy_array(G2) L1 = laplacian(adj1, normed=False) L2 = laplacian(adj2, normed=False) v1 = np.sort(eigvals(L1)) v2 = np.sort(eigvals(L2)) u1 = np.sort(eigvals(adj1)) u2 = np.sort(eigvals(adj2)) return np.abs(np.sqrt(np.sum(np.square(u1-u2)+np.square(v1-v2))))
def get_laplacian(A, type="unnormalized"): """Returns a sparse Laplacian from a given adjecancy matrix. It can either be of type unnormalized, normalized or random-walk""" if type == "unnormalized": return laplacian(A, normed=False) elif type == "normalized": return laplacian(A, normed=True) elif type == "randomwalk": D_inv = spdiags(1.0 / A.sum(axis=1).flatten(), [0], A.shape[0], A.shape[0], format='csr') return eye(A.shape[0], A.shape[0]) - np.dot(D_inv, A)
def von_Neumann_distance(G1, G2): A1 = __get_adjacency_sparse(G1) A2 = __get_adjacency_sparse(G2) L1 = csgraph.laplacian(A1, normed=False) L2 = csgraph.laplacian(A2, normed=False) L = L1 / L1.diagonal().sum() + L2 / L2.diagonal().sum() S1 = __von_Neumann_entropy(L1) S2 = __von_Neumann_entropy(L2) S12 = __von_Neumann_entropy(L) return np.sqrt(S12 - (S1 + S2) / 2)
def spectral_clustering(adj, sim, alpha, ncluster=10, v0=None): """ individually fair spectral clustering :param adj: adjacency matrix :param sim: similarity matrix :param alpha: regularization parameter :param ncluster: number of clusters :param v0: starting vector for eigen-decomposition :return: soft cluster membership matrix of fair spectral clustering """ lap = laplacian(adj) + alpha * laplacian(sim) lap *= -1 _, u = eigsh(lap, which='LM', k=ncluster, sigma=1.0, v0=v0) return u
def _spectral_embedding(self, affinity_matrix): """ Computes spectral embedding. First calculates normalized laplacian Then does the eigenvalue decomposition """ numComponents, labels = connected_components(affinity_matrix) if numComponents > 1: # for each component figure out embedding, return the complete embedding embedding = [] connected_component = np.zeros(affinity_matrix.shape) for i in xrange(numComponents): for j in affinity_matrix.shape[0]: if labels[j] == i: connected_component[:, i] embedding.append(self._spectral_embedding(connected_component)) return embedding self.n_components += 1 L, diag_vector = laplacian(affinity_matrix, normed=True, return_diag=True) D = np.diag(diag_vector) # eigvals, eigvects = eigsh(-L, k=self.n_components, sigma=1.0, which='LM') eigvals, eigvects = eigh(L) embedding = eigvects.T[: self.n_components] * diag_vector return embedding[1 : self.n_components].T
def compute_tda_for_graphs(graph_folder, filtrations): diag_repo = graph_folder + "diagrams/" if os.path.exists(diag_repo) and os.path.isdir(diag_repo): shutil.rmtree(diag_repo) [os.makedirs(diag_repo + dtype) for dtype in [""] + graph_dtypes] pad_size = 1 for graph_name in os.listdir(graph_folder + "mat/"): A = np.array(loadmat(graph_folder + "mat/" + graph_name)["A"], dtype=np.float32) pad_size = np.max((A.shape[0], pad_size)) print("Pad size for eigenvalues in this dataset is: %i" % pad_size) for graph_name in os.listdir(graph_folder + "mat/"): A = np.array(loadmat(graph_folder + "mat/" + graph_name)["A"], dtype=np.float32) name = graph_name.split("_") gid = int(name[name.index("gid") + 1]) - 1 egvals, egvectors = eigh(csgraph.laplacian(A, normed=True)) for filtration in filtrations: time = float(filtration.split("-")[0]) filtration_val = np.square(egvectors).dot( np.diag(np.exp(-time * egvals))).sum(axis=1) dgmOrd0, dgmExt0, dgmRel1, dgmExt1 = apply_graph_extended_persistence( A, filtration_val, get_base_simplex(A)) [ np.savetxt(diag_repo + "%s/graph_%06i_filt_%s.csv" % (dtype, gid, filtration), diag, delimiter=',') for diag, dtype in zip([dgmOrd0, dgmExt0, dgmRel1, dgmExt1], graph_dtypes) ] return
def main(args: [str]) -> None: ''' Generate eigenvalue distribution chart from graph <input-file>. The input format of the graph, which consists of directed edges [(a_1, b_1), ... (a_n, b_n)], where a_i, b_i are 0-indexed integers: a_1 b_1 a_2 b_2 ... a_n b_n The output will be generated in the <input-file>-eigval.png ''' input_filename = args[0] edges = [] max_node = 0 with open(input_filename, 'r') as inp: for line in inp.readlines(): f, t = map(int, line.split()) edges.append((f, t)) max_node = max(max_node, f, t) nodes = max_node + 1 mtx = np.zeros(shape=(nodes, nodes)) for f, t in edges: mtx[f, t] += 1.0 mtx[t, f] += 1.0 laplacian_mtx = laplacian(mtx) print(f'Laplacian: \n{laplacian_mtx}') print(f'Nodes in graph: {nodes}') vals = eigvals(laplacian_mtx) print(f'Eigenvalues: {vals}') plt.plot(sorted(vals)) # plt.show() # plt.savefig() plt.savefig(input_filename + '-eigval.svg')
def calc(self): x, y, val = [], [], [] for item in self._wlist.get_all(): item.onlyFloatOn() p = item.get_P() if p > 800 and p < 1300: x.append(item.get_lon()) y.append(item.get_lat()) val.append(item.get_P()) val = np.array(val) lon = np.array(x) lat = np.array(y) self._xi_l = np.linspace(-179, 179, self._numcols) self._yi_l = np.linspace(-88, 88, self._numrows) xi, yi = np.meshgrid(self._xi_l, self._yi_l) self._lon_lat = np.c_[lon.ravel(), lat.ravel()] zi = griddata(self._lon_lat, val.ravel(), (xi, yi), method='linear') self._lap = csgraph.laplacian(zi, normed=False) # print(zi[56.3,45.4]) self._zi = zi self._xi = xi self._yi = yi self._val = val self._lon = lon self._lat = lat # print(self._lap) return self
def RGG(node_num, dimension=2): RS=np.random.RandomState(seed=100) features=RS.uniform(low=0, high=1, size=(node_num, dimension)) adj_matrix=rbf_kernel(features, gamma=(1)/(2*(0.5)**2)) np.fill_diagonal(adj_matrix,0) laplacian=csgraph.laplacian(adj_matrix, normed=False) return adj_matrix, laplacian, features
def initializeGW(self,G, Gepsilon): n = len(self.users) L = csgraph.laplacian(G, normed = False) I = np.identity(n = G.shape[0]) GW = I + Gepsilon*L # W is a double stochastic matrix print 'GW', GW return GW.T
def initializeGW(self, G, Gepsilon): n = len(self.users) L = csgraph.laplacian(G, normed=False) I = np.identity(n=G.shape[0]) GW = I + Gepsilon * L # W is a double stochastic matrix print 'GW', GW return GW.T
def _build_A(self, force=False): s_dis = self.settings['s_scheme'] network = self.project.network phase = self.project.phases()[self.settings['phase']] conns = network['throat.conns'] P = phase[self.settings['pressure']] gh = phase[self.settings['hydraulic_conductance']] gd = phase[self.settings['diffusive_conductance']] gd = np.tile(gd, 2) Qij = -gh*np.diff(P[conns], axis=1).squeeze() Qij = np.append(Qij, -Qij) if force: self._pure_A = None if self._pure_A is None: if (s_dis == 'upwind'): w = gd + np.maximum(0, -Qij) A = network.create_adjacency_matrix(weights=w) elif (s_dis == 'hybrid'): w = np.maximum(0, np.maximum(-Qij, gd-Qij/2)) A = network.create_adjacency_matrix(weights=w) elif (s_dis == 'powerlaw'): Peij = np.absolute(Qij/gd) w = gd*np.maximum(0, (1-0.1*Peij)**5) + np.maximum(0, -Qij) A = network.create_adjacency_matrix(weights=w) A = laplacian(A) self._pure_A = A self.A = self._pure_A.copy()
def initializeGW0(self, Gepsilon): G0 = self.W0 L = csgraph.laplacian(G0, normed=False) I = np.identity(n=G0.shape[0]) GW0 = I + Gepsilon * L # W is a double stochastic matrix print 'GW0', GW0 return GW0
def manifold_regularize(feature,predict,size): alpha = 0.99 sigma = 0.2 dm = cdist(feature, feature, 'euclidean') matrix = laplacian(dm, normed=False) u_matrix = np.diag(matrix) s_1 = np.dot(np.dot(np.transpose(predict),matrix),predict) s_1_value = np.sum(np.diag(s_1))/size manifold_value = np.full(size,s_1_value, dtype=np.float32) # rbf = lambda x, sigma: math.exp((-predict)/(2*(math.pow(sigma,2)))) # vfunc = np.vectorize(rbf) # W = vfunc(dm, sigma) # np.fill_diagonal(W, 0) # def calculate_S(W): # d = np.sum(W, axis=1) # D = np.sqrt(d*d[:, np.newaxis]) # return np.divide(W,D,where=D!=0) # S = calculate_S(W) # F = np.dot(S, predict)*alpha + (1-alpha)*predict # n_iter = 400 # for t in range(n_iter): # F = np.dot(S, F)*alpha + (1-alpha)*predict # print('s_1_value',np.shape(manifold_value)) return manifold_value
def num_spanning_trees(A): """ Compute the number of spanning trees in an undirected graph """ from scipy.sparse import csgraph L = csgraph.laplacian(A) return np.linalg.det(L[1:, 1:])
def direct_compute_deepwalk_matrix(A, args, logger): res = {} try: windows = args["prox_params"]['window'] b = args["prox_params"]['negative'] transform = args["prox_params"]['transform'] threshold = args["prox_params"]['threshold'] except KeyError: raise MissingParamError for window in windows: n = A.shape[0] vol = float(A.sum()) L, d_rt = csgraph.laplacian(A, normed=True, return_diag=True) # X = D^{-1/2} A D^{-1/2} X = sparse.identity(n) - L S = np.zeros_like(X) X_power = sparse.identity(n) for i in range(window): logger.info("Deep Walk %d-th power, %d/%d", i+1, i+1, window) X_power = X_power.dot(X) S += X_power S *= vol / window / b D_rt_inv = sparse.diags(d_rt ** -1) M = D_rt_inv.dot(D_rt_inv.dot(S).T) m = T.matrix() if transform == 1: logger.info("log transform") res[window] = log_filter(M, threshold) # res[window] = log_filter(M) elif transform == 2: res[window] = binary_filter(M, logger, threshold) else: logger.info("no transform") res[window] = sparse.csr_matrix(M) return res
def harmonic_function( W , fl ) : #calculating 'l' which indicates it is the number of labeled points l = len( fl ) print l #calculating 'n' total number of points n = len( W ) print n #calculating the laplacian W = np.matrix( W ) #G = np.arange(3) * np.arange(3)[:, np.newaxis] L = csgraph.laplacian( W , normed = False ) print len( L ), L[ 0 ] #calculating the harmonic function #fu = np.reshape( L , ( ) ) fl = np.matrix( fl ) fu = -( linalg.inv(L[(l):n,l:n]) ).dot( L[(l):n,0:l] ).dot( fl ) #compute the CMN solution #the unnormalized class proportion estimate from labeled data with laplace smoothing #q = sumColumn( fl ) + 1 #fu_CMN = fu * np.kron(numpy.ones((n-1,1)), q / sumColumn( fu ) ) #print fu return fu
def model_constrain(self, fixId, handleId, deformationMatrix): #加入constrain deformationMatrix_list = list(map(np.matrix, deformationMatrix)) self.deformedVerts = [] for vert in range(self.point_num): if vert in fixId: #固定点 self.deformedVerts.append((vert, self.vertex[vert])) elif vert in handleId: #先经过deformation matrix deformedVert = np.append(self.vertex[vert], 1) deformedVert = deformedVert.dot( deformationMatrix_list[handleId.index(vert)]) deformedVert = np.delete(deformedVert, 3).flatten() deformedVert = np.squeeze(np.asarray(deformedVert)) self.deformedVerts.append((vert, deformedVert)) #计算laplacian process_num = len(self.deformedVerts) self.laplacian = np.zeros( (self.point_num + process_num, self.point_num + process_num), dtype=np.float32) self.laplacian[:self.point_num, :self.point_num] = csgraph.laplacian( self.weight) for i in range(process_num): vert = self.deformedVerts[i][0] newi = i + self.point_num self.laplacian[newi, vert] = 1 self.laplacian[vert, newi] = 1 process_num = len(self.deformedVerts) self.bArray = np.zeros((self.point_num + process_num, 3)) for i in range(process_num): self.bArray[self.point_num + i] = self.deformedVerts[i][1] self.compute_P()
def spectral_embedding(self, X, rad): """ Spectral Clustering :param X: numpy.ndarray - input data matrix mxn , m data points with n dimensions :param rad: float -radius for neighbor search :return Y: numpy.ndarray - matrix m row, d attributes are reduced dimensional """ # Get the adjacency matrix/nearest neighbor graph; neighbors within the radius of 0.4 A = radius_neighbors_graph( X.T, rad, mode="distance", metric="minkowski", p=2, metric_params=None, include_self=False, ) A = A.toarray() # Find the laplacian of the neighbour graph # L = D - A ; where D is the diagonal degree matrix L = csgraph.laplacian(A, normed=False) # Embedd the data points i low dimension using the Eigen values/vectos # of the laplacian graph to get the most optimal partition of the graph eigval, eigvec = np.linalg.eig(L) # the second smallest eigenvalue represents sparsest cut of the graph. np.where(eigval == np.partition(eigval, 1)[1]) # Partition the graph using the smallest eigen value y_spec = eigvec[:, 1].copy() y_spec[y_spec < 0] = 0 y_spec[y_spec > 0] = 1 return y_spec
def compute_rfa(features, k_neighbours=15, distfn='sym', connected=False, sigma=1.0): """ Computes the target RFA similarity matrix. The RFA matrix of similarities relates to the commute time between pairs of nodes, and it is built on top of the Laplacian of a single connected component k-nearest neighbour graph of the data. """ KNN = kneighbors_graph(features, k_neighbours, mode='distance', include_self=False).toarray() if 'sym' in distfn.lower(): KNN = np.maximum(KNN, KNN.T) else: KNN = np.minimum(KNN, KNN.T) n_components, labels = csgraph.connected_components(KNN) if connected and (n_components > 1): from sklearn.metrics import pairwise_distances distances = pairwise_distances(features, metric='euclidean') KNN = connect_knn(KNN, distances, n_components, labels) S = np.exp(-KNN / (sigma * features.size(1))) S[KNN == 0] = 0 L = csgraph.laplacian(S, normed=False) return torch.Tensor(np.linalg.inv(L + np.eye(L.shape[0])))
def HeatKernelSignature(adjacency_matrix, ntime_stamps=10): """ Compute node features as heat diffusion on graph """ # adjacency_matrix = np.array([[0,1,1,1,0,0,0,0,0,0,0,0], # [1,0,1,1,0,0,0,0,0,0,0,0], # [1,1,0,1,0,0,0,0,0,0,0,0], # [1,1,1,0,1,1,0,0,0,0,0,0], # [0,0,0,1,0,1,0,1,0,0,0,0], # [0,0,0,1,1,0,1,0,0,1,1,1], # [0,0,0,0,0,1,0,1,1,0,0,0], # [0,0,0,0,1,0,1,0,1,0,0,0], # [0,0,0,0,0,0,1,1,0,0,0,0], # [0,0,0,0,0,1,0,0,0,0,1,1], # [0,0,0,0,0,1,0,0,0,1,0,1], # [0,0,0,0,0,1,0,0,0,1,1,0]]) HKS = np.zeros((len(adjacency_matrix), ntime_stamps)) graph_laplacian = csgraph.laplacian(adjacency_matrix) [eigval, eigvec] = np.linalg.eigh(graph_laplacian) time = np.logspace(math.log10(.5 / eigval[-1]), math.log10(1 / eigval[1]), num=ntime_stamps) for t in range(ntime_stamps): time HKS[:, t] = np.diag( np.matmul( eigvec, np.matmul(np.diagflat(np.exp(-time[t] * eigval)), eigvec.T))) return HKS
def update_graph(self): adj = rbf_kernel(self.user_feature_matrix) self.L = csgraph.laplacian(adj, normed=True) A_t_1 = self.A.copy() self.A = np.kron(self.L + 0.01 * np.identity(self.user_num), np.identity(self.dimension)) self.cov += self.alpha * (self.A - A_t_1)
def compute(self, data, target, class_samples): """ Compute the difference matrix and the eigenvalues Args: data: data samples, ndarray (n_samples, n_features) target: target samples, ndarray (n_samples) class_samples : class samples, ndarray (n_class, M, n_features) """ # Compute E_{p(x\mid C_i)} [p(x\mid C_j)] self.S, self.similarity_arrays = compute_expectation_with_monte_carlo( data, target, class_samples, class_indices=self.class_indices, n_class=self.n_class, k_nearest=self.k_nearest, distance=self.distance, ) # Compute the D matrix self.W = np.empty([self.n_class, self.n_class]) for i, j in product(range(self.n_class), range(self.n_class)): self.W[i, j] = 1 - scipy.spatial.distance.braycurtis( self.S[i], self.S[j]) self.difference = 1 - self.W # Get the Laplacian and its eigen values self.L_mat, dd = laplacian(self.W, False, True) self.evals, self.evecs = np.linalg.eigh(self.L_mat) self.csg = self._csg_from_evals(self.evals)
def fit( self, affinity_matrix, labels, n_labeled, n_eig_vecs=10, classifier_type='knn', # ['knn', 'svc'] n_neighbors=5 # Only necessary if classifier_type == 'knn' ): if issparse(affinity_matrix): affinity_matrix = affinity_matrix.todense() # Symmetrize the adjacency matrix self.affinity_matrix = np.maximum(affinity_matrix, affinity_matrix.T) laplace = laplacian(self.affinity_matrix, normed=True) # eigsh gets eigenvectors and values from the graph laplacian. We get an extra # eigenvector because the first vector corresponds to eigenvalue 0 _, eig_vecs = eigsh(laplace, k=n_eig_vecs + 1, which='SM', maxiter=5000) # Don't consider the eigenvector corresponding to eigenvalue 0 self.embeddings = eig_vecs[:, 1:] embeddings_labeled = self.embeddings[:n_labeled, :] self.classifier = neighbors.KNeighborsClassifier( n_neighbors=n_neighbors) self.classifier.fit(embeddings_labeled, labels[:n_labeled])
def _check_laplacian_dtype_none(A, desired_L, desired_d, normed, use_out_degree, copy, dtype, arr_type): mat = arr_type(A, dtype=dtype) L, d = csgraph.laplacian( mat, normed=normed, return_diag=True, use_out_degree=use_out_degree, copy=copy, dtype=None, ) if normed and check_int_type(mat): assert L.dtype == np.float64 assert d.dtype == np.float64 _assert_allclose_sparse(L, desired_L, atol=1e-12) _assert_allclose_sparse(d, desired_d, atol=1e-12) else: assert L.dtype == dtype assert d.dtype == dtype desired_L = np.asarray(desired_L).astype(dtype) desired_d = np.asarray(desired_d).astype(dtype) _assert_allclose_sparse(L, desired_L, atol=1e-12) _assert_allclose_sparse(d, desired_d, atol=1e-12) if not copy: if not (normed and check_int_type(mat)): if type(mat) is np.ndarray: assert_allclose(L, mat) elif mat.format == "coo": _assert_allclose_sparse(L, mat)
def learn_knn_graph(signals, node_num, k=5): #print('Learning KNN Graph') adj=rbf_kernel(signals.T) np.fill_diagonal(adj,0) knn_adj=filter_graph_to_knn(adj, node_num, k=k) knn_lap=csgraph.laplacian(knn_adj, normed=False) return knn_adj, knn_lap
def _build_A(self): r""" Builds the coefficient matrix based on throat conductance values. Notes ----- The conductance to use is specified in stored in the algorithm's settings under ``alg.settings['conductance']``. In subclasses, conductance is set by default. For instance, in ``FickianDiffusion``, it is set to ``throat.diffusive_conductance``, although it can be changed. """ gvals = self.settings['conductance'] # FIXME: this needs to be properly addressed (see issue #1548) try: if gvals in self._get_iterative_props(): self.settings._update({"cache_A": False, "cache_b": False}) except AttributeError: pass if not self.settings['cache_A']: self._pure_A = None if self._pure_A is None: phase = self.project[self.settings.phase] g = phase[gvals] am = self.network.create_adjacency_matrix(weights=g, fmt='coo') self._pure_A = spgr.laplacian(am).astype(float) self.A = self._pure_A.copy()
def spectral_decomposition(matrix): ''' Perform eigendecomposition on a weighted graph matrix, by converting Parameters ---------- matrix: 2-d numpy array Weighted graph adjacency matrix. Returns --------- eigen_vals: numpy array Vector of the graph laplacian eigenvalues. Sorted from largest to smallest. eigen_vecs: 2-d numpy array Matrix of the graph laplacian eigenvectors. Sorted from largest to smallest. ''' affinity = compute_similarity(matrix) laplacian = csgraph.laplacian(affinity, normed=True) eigen_vals, eigen_vecs = eigh(laplacian) eigen_vals, eigen_vecs = np.flip(eigen_vals), np.flip(eigen_vecs) return eigen_vals, eigen_vecs
def fit(self, X): """ Learn the model using the training data. :param X: Training data. """ print('Just fitting') W = (self.alpha * self.W_s) + (self.gamma * self.W_F) L, diag_p = csgraph.laplacian(W, normed=self.normed, return_diag=True) # - Formulate the eigenproblem. lhs_matrix = (X.T.dot(L.dot(X))) rhs_matrix = None # - Solve the problem eigval, eigvec = eigh(a=lhs_matrix, b=rhs_matrix, overwrite_a=True, overwrite_b=True, check_finite=True) eigval = np.real(eigval) # - Select eigenvectors based on eigenvalues # -- get indices of k smallest eigen values k_eig_ixs = np.argpartition(eigval, self.k)[:self.k] # -- columns of eigvec are the eigen vectors corresponding to the eigen values # --- select column vectors corresponding to k largest eigen values self.V = eigvec[:, k_eig_ixs]
def initializeGW_clustering(Gepsilon, relationFileName, newW): G = newW n = newW.shape[0] L = csgraph.laplacian(G, normed = False) I = np.identity(n) GW = I + Gepsilon*L # W is a double stochastic matrix print GW return GW.T
def _prepare_inputs(self, X, W): self.X = X # set up prior M if self.params['use_cov']: self.M = np.cov(X.T) else: self.M = np.identity(X.shape[1]) L = laplacian(W, normed=False) self.loss_matrix = self.X.T.dot(L.dot(self.X))
def _check_symmetric_graph_laplacian(mat, normed): if not hasattr(mat, "shape"): mat = eval(mat, dict(np=np, sparse=sparse)) if sparse.issparse(mat): sp_mat = mat mat = sp_mat.todense() else: sp_mat = sparse.csr_matrix(mat) laplacian = csgraph.laplacian(mat, normed=normed) n_nodes = mat.shape[0] if not normed: assert_array_almost_equal(laplacian.sum(axis=0), np.zeros(n_nodes)) assert_array_almost_equal(laplacian.T, laplacian) assert_array_almost_equal(laplacian, csgraph.laplacian(sp_mat, normed=normed).todense()) assert_array_almost_equal(laplacian, _explicit_laplacian(mat, normed=normed))
def test_graph_laplacian(): for mat in (np.arange(10) * np.arange(10)[:, np.newaxis], np.ones((7, 7)), np.eye(19), np.vander(np.arange(4)) + np.vander(np.arange(4)).T, ): sp_mat = sparse.csr_matrix(mat) for normed in (True, False): laplacian = csgraph.laplacian(mat, normed=normed) n_nodes = mat.shape[0] if not normed: np.testing.assert_array_almost_equal(laplacian.sum(axis=0), np.zeros(n_nodes)) np.testing.assert_array_almost_equal(laplacian.T, laplacian) np.testing.assert_array_almost_equal(\ laplacian, csgraph.laplacian(sp_mat, normed=normed).todense())
def generate_laplacian(df): G = np.zeros((sum(df.shape),sum(df.shape))) ### node indexed from 0, row first, columns second for i in range(0,df.shape[0]): for j in range(0,df.shape[1]): if df.iat[i,j] > 0: connect(G,(i, df.shape[0]+j,1)) G = csgraph.laplacian(G) return G
def __init__(self, X, W, use_cov=True): self.X = X # set up prior M if use_cov: self.M = np.cov(X.T) else: self.M = np.identity(X.shape[1]) L = laplacian(W, normed=False) self.loss_matrix = self.X.T.dot(L).dot(self.X)
def _prepare_inputs(self, X, W): self.X_ = X = check_array(X) W = check_array(W, accept_sparse=True) # set up prior M if self.use_cov: self.M_ = pinvh(np.cov(X, rowvar = False)) else: self.M_ = np.identity(X.shape[1]) L = laplacian(W, normed=False) return X.T.dot(L.dot(X))
def compute_clusters(graph): lap_data = csgraph.laplacian(asarray(graph.get_adjacency(attribute="weight").data), normed=False) lap_cov = covariate_laplacian(graph, 'department') lap_total = 1*lap_data + 10000000000*lap_cov + 1000*eye(len(graph.vs)) + 1000000 [evals, evecs] = eig(lap_total) coords = array((log(evecs[2]/sqrt(evals[2])), log(evecs[1]/sqrt(evals[1])))) coords = transpose(abs(coords)) return coords
def _compute_reg_neighbors(n_ch_x, n_delays, reg_type, method='direct', normed=False): """Compute regularization parameter from neighbors.""" from scipy.sparse.csgraph import laplacian known_types = ('ridge', 'laplacian') if isinstance(reg_type, str): reg_type = (reg_type,) * 2 if len(reg_type) != 2: raise ValueError('reg_type must have two elements, got %s' % (len(reg_type),)) for r in reg_type: if r not in known_types: raise ValueError('reg_type entries must be one of %s, got %s' % (known_types, r)) reg_time = (reg_type[0] == 'laplacian' and n_delays > 1) reg_chs = (reg_type[1] == 'laplacian' and n_ch_x > 1) if not reg_time and not reg_chs: return np.eye(n_ch_x * n_delays) # regularize time if reg_time: reg = np.eye(n_delays) stride = n_delays + 1 reg.flat[1::stride] += -1 reg.flat[n_delays::stride] += -1 reg.flat[n_delays + 1:-n_delays - 1:stride] += 1 args = [reg] * n_ch_x reg = linalg.block_diag(*args) else: reg = np.zeros((n_delays * n_ch_x,) * 2) # regularize features if reg_chs: block = n_delays * n_delays row_offset = block * n_ch_x stride = n_delays * n_ch_x + 1 reg.flat[n_delays:-row_offset:stride] += -1 reg.flat[n_delays + row_offset::stride] += 1 reg.flat[row_offset:-n_delays:stride] += -1 reg.flat[:-(n_delays + row_offset):stride] += 1 assert np.array_equal(reg[::-1, ::-1], reg) if method == 'direct': if normed: norm = np.sqrt(np.diag(reg)) reg /= norm reg /= norm[:, np.newaxis] return reg else: # Use csgraph. Note that our -1's above are really the neighbors! # If we ever want to allow arbitrary adjacency matrices, this is how # we'd want to do it. reg = laplacian(-reg, normed=normed) return reg
def load_dataset(self, interactions, user_count=150, item_count=200, split=0.5): # interactions = [us,is,rs] user_range = len(interactions[0]) user_idx = interactions[0] item_idx = interactions[1] ratings = interactions[2] print("Initialising model variables ...") uig = UserItemGraph(user_count, item_count) print("Building dataset ...") for j in range(user_range): u = user_idx[j]-1; i=item_idx[j]-1; uig.M[u,i] = ratings[j] uig.O[u,i] = 1 print("Computing Leave-one-out test split ...") for u in user_idx: heldout = np.random.choice(list(uig.O[u-1, :].nonzero())[0],1) uig.Otest[u-1, heldout] = 1 uig.Otraining = uig.O - uig.Otest print("Building user interaction matrix ...") # User interactions Wrow = np.zeros((user_count, user_count), dtype=np.int) Wrow = interaction_matrix(Wrow, uig.O, 1) print("Building item interaction matrix ...") # Item interactions Wcol = np.zeros((item_count, item_count), dtype=np.int) Wcol = interaction_matrix(Wcol, uig.O, 0) print("Computing Laplacian of interactions ...") uig.Lrow = csgraph.laplacian(Wrow, normed=True) uig.Lcol = csgraph.laplacian(Wcol, normed=True) self.graph = uig
def initializeGW( Gepsilon ,n, relationFileName): W = np.identity(n) with open(relationFileName) as f: for line in f: line = line.split('\t') if line[0] != 'userID': if int(line[0])<=n and int(line[1]) <=n: W[int(line[0])][int(line[1])] +=1 G = W L = csgraph.laplacian(G, normed = False) I = np.identity(n) GW = I + Gepsilon*L # W is a double stochastic matrix print GW return GW.T
def kwok_lau(graph, v, k, epsilon, path_length_scaler, volbd_scaler): num_vertices = graph.shape[0] volbd = k * volbd_scaler vertices = list(range(num_vertices)) print("N: {0}\nVolume bound: {1}\n".format(num_vertices, volbd)) p = [np.zeros(num_vertices, int)] p[0][v] = 1 last = p[-1] # length of walk to compute # need to get W! I = mlib.identity(num_vertices, int) # assuming symmetric here L, D_vector = csglib.laplacian(graph, return_diag=True) D = mlib.diags(D_vector, (0), format='csc') lazy_walk = 0.5 * (I + lalib.inv(D) * graph) num_iterations = math.ceil(num_vertices ** 2 * math.log(num_vertices, 2)) for t in range(1, num_iterations): p.append(last * lazy_walk) last = p[-1] # value function for sorting: sortkey = lambda t: (lambda vertex: p[t][vertex] / D_vector[vertex]) # initialize set now S = dict() S[0,1] = p[0][v] outset = S[0,1] # when S has one element, the conductance is 1 # conductance is <= 1 outcond = 2 for t in range(1, num_iterations): # so#rt all at once here? p[t] = p[t-1] * lazy_walk for j in range(1, num_vertices): # compute new S[t,j] # don't want to include the entire graph, that's dumb... # should also put another bound in here for later S[t,j] = computeS(sortkey(t), vertices, j, num_vertices) # find smallest S_{t,j} currcond = conductance(S[t,j], L, D) if (currcond < outcond and volume(S[t,j], D) <= volbd): outset = S[t,j] outcond = currcond return outset
def constructLaplacianMatrix(self, W, Gepsilon): G = W.copy() #Convert adjacency matrix of weighted graph to adjacency matrix of unweighted graph for i in self.users: for j in self.users: if G[i.id][j.id] > 0: G[i.id][j.id] = 1 L = csgraph.laplacian(G, normed = False) print L I = np.identity(n = G.shape[0]) GW = I + Gepsilon*L # W is a double stochastic matrix print 'GW', GW return GW.T
def initializeGW(W, epsilon): n = len(W) #print 'W', W G = np.zeros(shape = (n, n)) for i in range(n): for j in range(n): if W[i][j] > 0: G[i][j] = 1 L = csgraph.laplacian(G, normed = False) I = np.identity(n) GW = I + epsilon*L print GW #showheatmap(GW) return GW
def _build_graph(self): """Graph matrix for Label Spreading computes the graph laplacian""" # compute affinity matrix (or gram matrix) if self.kernel == 'knn': self.nn_fit = None n_samples = self.X_.shape[0] affinity_matrix = self._get_kernel(self.X_) laplacian = csgraph.laplacian(affinity_matrix, normed=True) laplacian = -laplacian if sparse.isspmatrix(laplacian): diag_mask = (laplacian.row == laplacian.col) laplacian.data[diag_mask] = 0.0 else: laplacian.flat[::n_samples + 1] = 0.0 # set diag to 0.0 return laplacian
def initializeGW(self, Gepsilon): n = len(self.users) a = np.ones(n-1) b =np.ones(n); c = np.ones(n-1) k1 = -1 k2 = 0 k3 = 1 A = np.diag(a, k1) + np.diag(b, k2) + np.diag(c, k3) G = A L = csgraph.laplacian(G, normed = False) I = np.identity(n) GW = I + Gepsilon*L # W is a double stochastic matrix print GW return GW.T
def fit(self, X): '''Obtain the top-k eigensystem of the graph Laplacian The eigen solver adopts shift-invert mode as described in http://docs.scipy.org/doc/scipy/reference/tutorial/arpack.html ''' nbrs = NearestNeighbors(n_neighbors=self.n_nbrs).fit(X) # NOTE W is a dense graph thus may lead to memory leak W = nbrs.kneighbors_graph(X).toarray() W_sym = np.maximum(W, W.T) L = csr_matrix(csgraph.laplacian(W_sym, normed=True)) [Sigma, U] = eigsh(L, self.n_clusters+1, sigma=0, which='LM') # remove the trivial (smallest) eigenvalues & vectors self.Sigma, self.U = Sigma[1:], U[:,1:]
def test_arpack_eigsh_initialization(): # Non-regression test that shows null-space computation is better with # initialization of eigsh from [-1,1] instead of [0,1] random_state = check_random_state(42) A = random_state.rand(50, 50) A = np.dot(A.T, A) # create s.p.d. matrix A = laplacian(A) + 1e-7 * np.identity(A.shape[0]) k = 5 # Test if eigsh is working correctly # New initialization [-1,1] (as in original ARPACK) # Was [0,1] before, with which this test could fail v0 = random_state.uniform(-1, 1, A.shape[0]) w, _ = eigsh(A, k=k, sigma=0.0, v0=v0) # Eigenvalues of s.p.d. matrix should be nonnegative, w[0] is smallest assert_greater_equal(w[0], 0)
def initializeGW_label(Gepsilon ,n, relationFileName, label, diagnol): W = np.identity(n) with open(relationFileName) as f: for line in f: line = line.split('\t') if line[0] != 'userID' and label[int(line[0])]!=10000 and label[int(line[1])]!=10000: #10000 means not top 100 user. W[label[int(line[0])]][label[int(line[1])]] += 1 # don't need it ''' if diagnol=='1' or diagnol=='0': for i in range(n): W[i][i] = int(diagnol) ''' G = W L = csgraph.laplacian(G, normed = False) I = np.identity(n) GW = I + Gepsilon*L # W is a double stochastic matrix print GW return GW.T
def test_spectral_embedding_unnormalized(): # Test that spectral_embedding is also processing unnormalized laplacian # correctly random_state = np.random.RandomState(36) data = random_state.randn(10, 30) sims = rbf_kernel(data) n_components = 8 embedding_1 = spectral_embedding(sims, norm_laplacian=False, n_components=n_components, drop_first=False) # Verify using manual computation with dense eigh laplacian, dd = csgraph.laplacian(sims, normed=False, return_diag=True) _, diffusion_map = eigh(laplacian) embedding_2 = diffusion_map.T[:n_components] embedding_2 = _deterministic_vector_sign_flip(embedding_2).T assert_array_almost_equal(embedding_1, embedding_2)
def laplacian_eigs(csr_matrix): """ Computes the eigenvalues and eigenvectors of the laplacian matrix. Parameters ---------- csr_matrix: scipy csr_matrix The adjacency matrix of the graph. Returns ------- eig_vals : (M,) double or complex ndarray The eigenvalues, each repeated according to its multiplicity. eig_vecs : (M, M) double or complex ndarray The normalized left eigenvector corresponding to the eigenvalue ``w[i]`` is the column v[:,i]. """ stderr.write('Computing Eigenvectors...') lap = csgraph.laplacian(csr_matrix, normed=False) eig_vals, eig_vecs = linalg.eigh(lap.todense(), type=3) stderr.write('\rEigenvector decomposition complete.\n') return eig_vals, eig_vecs
def low_rank_align(X, Y, Cxy, d=None, mu=0.8): """Input: data matrices X,Y, correspondence matrix Cxy, embedding dimension d, and correspondence weight mu Output: embedded X and embedded Y """ nx, dx = X.shape ny, dy = Y.shape assert Cxy.shape==(nx,ny), \ 'Correspondence matrix must be shape num_X_samples X num_Y_samples.' C = np.fliplr(block_diag(np.fliplr(Cxy),np.fliplr(Cxy.T))) if d is None: d = min(dx,dy) Rx = low_rank_repr(X,d) Ry = low_rank_repr(Y,d) R = block_diag(Rx,Ry) tmp = np.eye(R.shape[0]) - R M = tmp.T.dot(tmp) L = laplacian(C) eigen_prob = (1-mu)*M + 2*mu*L _,F = eigh(eigen_prob,eigvals=(1,d),overwrite_a=True) Xembed = F[:nx] Yembed = F[nx:] return Xembed, Yembed
def initializeGW(FeatureVectors, Gepsilon): n = len(FeatureVectors) W = np.zeros(shape = (n, n)) for i in range(n): sSim = 0 for j in range(n): sim = np.dot(FeatureVectors[i],FeatureVectors[j]) print 'sim',sim if i == j: sim += 1 W[i][j] = sim sSim += sim W[i] /= sSim for a in range(n): print '%.3f' % W[i][a], print '' G = W L = csgraph.laplacian(G, normed = False) I = np.identity(n) GW = I + Gepsilon*L # W is a double stochastic matrix print GW return GW.T
def _build_A(self, force=False): r""" Builds the coefficient matrix based on conductances between pores. The conductance to use is specified in the algorithm's ``settings`` under ``conductance``. In subclasses (e.g. ``FickianDiffusion``) this is set by default, though it can be overwritten. Parameters ---------- force : Boolean (default is ``False``) If set to ``True`` then the A matrix is built from new. If ``False`` (the default), a cached version of A is returned. The cached version is *clean* in the sense that no boundary conditions or sources terms have been added to it. """ if force: self._pure_A = None if self._pure_A is None: network = self.project.network phase = self.project.phases()[self.settings['phase']] g = phase[self.settings['conductance']] am = network.create_adjacency_matrix(weights=g, fmt='coo') self._pure_A = spgr.laplacian(am).astype(float) self.A = self._pure_A.copy()
def formulate_graph_laplacian(self,X): """ Input: X --> The feature set of essays, of size N * D Output: L --> Laplacian Matrix of the graph formed by the essay set X. We form the graph W from X by computing the similarity between x and y for all x and y belonging to X Then we find the degree matrix D of W, which is a diagonal matrix. Laplacian L is defined as L = D - W Normalized Laplacian is defined as l_norm = D^(-1/2)*L*D^(-1/2) Normalized Laplacian are known to work marginally better than in graph diffusion. D --> The degree matrix D of W Note: None """ W = self.similarity_measure(X) W = self.sparsify(W,100,0) D = self.calculate_degree_matrix(W) return csgraph.laplacian(W, normed=True), D