def forward(self, weights): """Compute the MST given the edge weights. The behaviour is the same as that of ``minimum_spanning_tree` in ``scipy.sparse.csgraph``, namely i) the edges are assumed non-negative, ii) if ``weights[i, j]`` and ``weights[j, i]`` are both non-negative, their minimum is taken as the edge weight. Arguments --------- weights: :class:`torch:torch.Tensor` The adjacency matrix of size ``(n, n)``. Returns ------- :class:`torch:torch.Tensor` An ``(n, n)`` matrix adjacency matrix of the minimum spanning tree. Indices corresponding to the edges in the MST are set to one, rest are set to zero. If both weights[i, j] and weights[j, i] are non-zero, then the one will be located in whichever holds the *smaller* value (ties broken arbitrarily). """ mst_matrix = mst(weights.cpu().numpy()).toarray() > 0 assert int(mst_matrix.sum()) + 1 == weights.size(0) return torch.Tensor(mst_matrix.astype(float))
def prepare_mst(tri): csr_matrix = np.zeros((tri.points.shape[0], tri.points.shape[0]), dtype=np.float64) n_indices, n_indptr = tri.vertex_neighbor_vertices for i in range(tri.points.shape[0]): point_i = tri.points[i, :] neighbors = n_indptr[n_indices[i]:n_indices[i + 1]] for j in neighbors: if i > j: sep = point_i - tri.points[j, :] sep = np.sqrt(sep.dot(sep)) csr_matrix[i, j] = sep csr_matrix[j, i] = sep # noinspection PyTypeChecker min_sp_tree = mst(csr_matrix, overwrite=True).toarray() graph = {} edges = {} for i in range(tri.points.shape[0]): p0 = tuple(tri.points[i]) for j in range(tri.points.shape[0]): if min_sp_tree[i, j] != 0: p1 = tuple(tri.points[j]) if p0 in graph: graph[p0].append(p1) else: graph[p0] = [p1] if p1 in graph: graph[p1].append(p0) else: graph[p1] = [p0] fs = frozenset({p0, p1}) if fs not in edges: edges[fs] = min_sp_tree[i, j] return graph, edges
def fit_chowliu(self, data, penalty=0, weights=None): """Select a maximum likelihood tree-structured graph & parameters data: (n,m) nparray of m data points; values {0,1} """ # TODO: add score f'n parameter, default to empirical MI? or too complicated? def MI2(data, weights): """Estimate mutual information between all pairs of *binary* {0,1} variables""" pi = np.average(data.astype(float), axis=1, weights=weights)[np.newaxis, :] pij = np.cov(data, ddof=0, aweights=weights) + (pi.T.dot(pi)) p = np.stack((pij, pi - pij, pi.T - pij, 1 + pij - pi - pi.T), axis=2) p2 = pi.T.dot(pi) q = np.stack((p2, pi - p2, pi.T - p2, 1 + p2 - pi - pi.T), axis=2) MI = (p * (np.log(p + 1e-10) - np.log(q + 1e-10))).sum(axis=2) return MI, pij, pi[0] n, m = data.shape #MI, pij,pi = MI2(to01(data), weights) MI, pij, pi = MI2(data, weights) # data should be 0/1, not -1/+1 from scipy.sparse.csgraph import minimum_spanning_tree as mst tree = mst(penalty - MI).tocoo() factors = [Factor([Var(i, 2)], [1 - pi[i], pi[i]]) for i in range(n)] for i, j, w in zip(tree.row, tree.col, tree.data): if w > 0: continue (i, j) = (int(i), int(j)) if i < j else (int(j), int(i)) tij = [ 1 + pij[i, j] - pi[i] - pi[j], pi[i] - pij[i, j], pi[j] - pij[i, j], pij[i, j] ] fij = Factor([Var(i, 2), Var(j, 2)], tij) fij = fij / fij.sum([i]) / fij.sum([j]) factors.append(fij) self.__init__(factors)
def get_mst(df, neighbors): """Compute the Minimum Spanning Tree (MST) from postions. This function takes a pandas dataframe of poistions and compute the distances to k-neighbors for all poistions given, then find the MST using ``scipy.sparse.csgraph``. Finally finds the non-zero elements in a returned sparse matrix. Args: df: A pandas dataframe all positions with longitude as ``RA`` and latitute as ``DEC``. neighbors(int): The number of neighbors used when computing tress. Returns: A pandas dataframe for all edges in the MST and a tuple of arrays storing the indexes and values of non-zero elements in the MST sparse matrix. """ df = df[['RA', 'DEC']] numA = df.as_matrix(columns=['RA', 'DEC']) G = kng(numA, n_neighbors=neighbors, mode='distance') T = mst(G) index = find(T) row_ls = index[0].tolist() col_ls = index[1].tolist() df1 = df.ix[row_ls].reset_index() df2 = df.ix[col_ls].reset_index() df1 = df1.rename(columns={'RA': 'RA1', 'DEC': 'DEC1', 'index': 'index1'}) df2 = df2.rename(columns={'RA': 'RA2', 'DEC': 'DEC2', 'index': 'index2'}) final = pd.concat([df1, df2], axis=1) final['edges'] = pd.Series(index[2], index=final.index) final.reset_index( inplace=True) # take index into columns for later filtering in JS final = final.rename(columns={'index': 'line_index'}) return final, index
def fit_chowliu(data, penalty=0, weights=None): """Estimate an Ising model using Chow-Liu's max likelihood tree structure & parameters data: (m,n) nparray of m data points; values {0,1} penalty: non-negative penalty on the MI (may give a disconnected / forest graph) """ # TODO: add score f'n parameter, default to empirical MI? or too complicated? def MI2(data, weights, eps=1e-10): """Estimate mutual information between all pairs of *binary* {0,1} variables""" # TODO: expects (n,m) shape data pi = np.average(data.astype(float), axis=1, weights=weights)[np.newaxis, :] pij = np.cov(data, ddof=0, aweights=weights) + (pi.T.dot(pi)) p = np.stack((pij, pi - pij, pi.T - pij, 1 + pij - pi - pi.T), axis=2) p2 = pi.T.dot(pi) q = np.stack((p2, pi - p2, pi.T - p2, 1 + p2 - pi - pi.T), axis=2) MI = (p * (np.log(p + eps) - np.log(q + eps))).sum(axis=2) return MI, pij, pi[0] m, n = data.shape MI, pij, pi = MI2(data.T, weights) # data should be 0/1, not -1/+1 from scipy.sparse.csgraph import minimum_spanning_tree as mst tree = mst(penalty - MI).tocoo() factors = [Factor([Var(i, 2)], [1 - pi[i], pi[i]]) for i in range(n)] for i, j, w in zip(tree.row, tree.col, tree.data): if w > 0: continue (i, j) = (int(i), int(j)) if i < j else (int(j), int(i)) tij = [ 1 + pij[i, j] - pi[i] - pi[j], pi[i] - pij[i, j], pi[j] - pij[i, j], pij[i, j] ] fij = Factor([Var(i, 2), Var(j, 2)], tij) fij = fij / fij.sum([i]) / fij.sum([j]) factors.append(fij) return Ising(factors)
def test_mst(): data = get_carina()[:100, :] tri = Delaunay(data) csr_matrix = np.zeros((tri.points.shape[0], tri.points.shape[0]), dtype=np.float64) n_indices, n_indptr = tri.vertex_neighbor_vertices for i in range(tri.points.shape[0]): point_i = tri.points[i, :] neighbors = n_indptr[n_indices[i]:n_indices[i + 1]] for j in neighbors: if i > j: sep = point_i - tri.points[j, :] sep = np.sqrt(sep.dot(sep)) csr_matrix[i, j] = sep csr_matrix[j, i] = sep # noinspection PyTypeChecker min_sp_tree = mst(csr_matrix, overwrite=True).toarray() plot_list = [] for i in range(tri.points.shape[0]): x1, y1 = tri.points[i] neighbors = [] for j in range(tri.points.shape[0]): if min_sp_tree[i, j] != 0: neighbors.append(tri.points[j]) for n in neighbors: plot_list.append([[x1, n[0]], [y1, n[1]]]) plt.scatter(tri.points[:, 0], tri.points[:, 1], c='r') for p in plot_list: x, y = p plt.plot(x, y, '--', color='k') plt.show()
def connect_stars(coords: List[Tuple[int, int]]) -> Iterable[List[int]]: size = len(coords) cs = array([ hypot(x1 - x2, y1 - y2) for (x1, y1), (x2, y2) in product(coords, coords) ]).reshape(size, size) return sorted(sorted(map(int, z)) for z in zip(*mst(cs).nonzero()))
def minimum_spanning_tree(cluster_means): """ L1 single linkage, minimum spanning tree """ dist = pdist(cluster_means, metric = 'minkowski', p = 1) #dist = mst(squareform(dist), overwrite = False) dist = mst(squareform(dist), overwrite = False) return dist
def _calculate_junctions_air_dist_mst_weight(self, junctions: Set[Junction]) -> float: nr_junctions = len(junctions) idx_to_junction = {idx: junction for idx, junction in enumerate(junctions)} distances_matrix = np.zeros((nr_junctions, nr_junctions), dtype=np.float) for j1_idx in range(nr_junctions): for j2_idx in range(nr_junctions): if j1_idx == j2_idx: continue dist = self._get_distance_between_junctions(idx_to_junction[j1_idx], idx_to_junction[j2_idx]) distances_matrix[j1_idx, j2_idx] = dist distances_matrix[j2_idx, j1_idx] = dist return mst(distances_matrix).sum()
def prepare_mst_simple(tri): csr_matrix = np.zeros((tri.points.shape[0], tri.points.shape[0]), dtype=np.float64) n_indices, n_indptr = tri.vertex_neighbor_vertices for i in range(tri.points.shape[0]): point_i = tri.points[i, :] neighbors = n_indptr[n_indices[i]:n_indices[i + 1]] for j in neighbors: if i > j: sep = point_i - tri.points[j, :] sep = np.sqrt(sep.dot(sep)) csr_matrix[i, j] = sep csr_matrix[j, i] = sep # noinspection PyTypeChecker min_sp_tree = mst(csr_matrix, overwrite=True).toarray() return min_sp_tree
def fit_chowliu(data, penalty=0, weights=None): """Select a maximum likelihood tree-structured graph & parameters data: (m,n) nparray of m data points (values castable to int) """ # TODO: add score f'n parameter, default to empirical MI? or too complicated? def MId(data, weights): """Estimate mutual information between all pairs of discrete variables""" m, n = data.shape d = data.max(0) + 1 MI = np.zeros((n, n)) for i in range(n): for j in range(i + 1, n): pij = empirical([[Var(i, d[i]), Var(j, d[j])]], data)[0] pij += 1e-30 pij /= pij.sum() MI[i, j] = (pij * (pij / pij.sum([i]) / pij.sum([j])).log()).sum() MI[j, i] = MI[i, j] return MI, None, None m, n = data.shape d = data.max(0) + 1 MI, _, _ = MId(data, weights) from scipy.sparse.csgraph import minimum_spanning_tree as mst tree = mst(penalty - MI).tocoo() factors = [empirical([[Var(i, d[i])]], data)[0] for i in range(n)] for f in factors: f /= f.sum() for i, j, w in zip(tree.row, tree.col, tree.data): if w > 0: continue (i, j) = (int(i), int(j)) if i < j else (int(j), int(i)) fij = empirical([[Var(i, d[i]), Var(j, d[j])]], data)[0] fij /= fij.sum() fij = fij / fij.sum([i]) / fij.sum([j]) factors.append(fij) return GraphModel(factors)
from scipy.sparse import csr_matrix from scipy.sparse.csgraph import minimum_spanning_tree as mst from scipy.sparse.csgraph import shortest_path as sp grafo = csr_matrix([ [0,6,6,6,0,0,0,0,0,0,0,0], [6,0,1,0,2,0,0,0,0,0,0,0], [6,1,0,2,7,0,2,0,0,0,0,0], [6,0,2,0,0,0,0,0,0,18,0,0], [0,2,7,0,0,4,0,0,0,0,0,0], [0,0,0,0,4,0,11,10,0,0,0,0], [0,0,2,0,0,11,0,22,2,0,0,0], [0,0,0,0,0,10,22,0,12,0,25,0], [0,0,0,0,0,0,2,12,0,1,16,0], [0,0,0,18,0,0,0,0,1,0,0,8], [0,0,0,0,0,0,0,25,16,0,0,3], [0,0,0,0,0,0,0,0,0,8,3,0] ]) arbol = mst(grafo) print(arbol) print(arbol.toarray().astype(int))
def main(): statistics = open(MAINPATH + "/" + "statistics_ppmi.txt", "w") M, labels, label_names, relations, nounDict = pp.get_M_fromDB() #Choose a method to build your similarity matrix #Term Frequency-Inverse Document Frequency M_ppmi = sim.get_tf_idf_M(M, "raw", "c", norm_samps=True) similarity = "tfidf" #Jensen Shanon Divergence #M_ppmi = sim.JensenShanon(M) #similarity = "jsd" #Positive Pointwise Mutual Information #M_ppmi = sim.raw2ppmi(M) #similarity = "ppmi" #Change this value according to expected number of clusters required #We tested with 50, 100, 200, 300 based on our dataset k = 300 print("Length features and labels:", len(M_ppmi), len(labels)) c = spectral.spectral(M_ppmi, labels, sim.cos_s, dist.euclidean) #c = spectral.spectral(X, Y, sim.gauss_s, dist.euclidean) #Fully connceted c.full_graph("cosine") print(c.graph) for algo in [c.norm_rw_sc, c.norm_sym_sc]: kmeans, kmeans_pred = algo(k) print("Kmeans pred:", kmeans_pred, len(kmeans_pred)) labels_train_pred = kmeans.labels_.astype(np.int) print(c.clustering) printResults(similarity, label_names, labels_train_pred, nounDict, k, c.clustering, c.graph, statistics) n = M.shape[0] '''cosine and knn mutual / gauss mutual''' number = int(2 * (n / np.log(n))) '''gaus non-mutual''' #number = int((n/np.log(n))) #K nearest neighbors c.kNN_graph(number, "euclidean", False) print(c.graph) for algo in [c.norm_rw_sc, c.norm_sym_sc]: kmeans, kmeans_pred = algo(k) print("Kmeans pred:", kmeans_pred, len(kmeans_pred)) labels_train_pred = kmeans.labels_.astype(np.int) print(c.clustering) printResults(similarity, label_names, labels_train_pred, nounDict, k, c.clustering, c.graph, statistics) #Epsilon T = mst(c.W) A = T.toarray().astype(float) eps = np.min(A[np.nonzero(A)]) print("eps", eps) c.eps_graph(eps) print(c.graph) for algo in [c.norm_rw_sc, c.norm_sym_sc]: kmeans, kmeans_pred = algo(k) print("Kmeans pred:", kmeans_pred, len(kmeans_pred)) labels_train_pred = kmeans.labels_.astype(np.int) print(c.clustering) printResults(similarity, label_names, labels_train_pred, nounDict, k, c.clustering, c.graph, statistics) statistics.close()
import numpy as np from scipy.sparse.csgraph import minimum_spanning_tree as mst from sklearn.neighbors import kneighbors_graph as kng import matplotlib.pyplot as plt import pandas as pd import json import csv df=pd.read_csv('DES2131+0043.csv',usecols=['COADD_OBJECTS_ID','RA','DEC']) numA=df.as_matrix(columns=['RA','DEC']) G=kng(numA,n_neighbors=20,mode='distance') T=mst(G) B=T.toarray().astype(bool) index1=np.where(B)[0] index2=np.where(B)[1] df1=pd.DataFrame() df2=pd.DataFrame() df1=df1.append(df.iloc[index1],ignore_index=True) df2=df2.append(df.iloc[index2],ignore_index=True) final=pd.concat([df1,df2],axis=1,ignore_index=True) final2=final.rename(columns={0:'COADD_OBJECTS_ID_1',1:'RA1',2:'DEC1',3:'COADD_OBJECTS_ID_2',4:'RA2',5:'DEC2'}) K=(final2['RA1'].sub(final2['RA2']))**2 F=(final2['DEC1'].sub(final2['DEC2']))**2