def forward(self, weights):
        """Compute the MST given the edge weights.

        The behaviour is the same as that of ``minimum_spanning_tree` in
        ``scipy.sparse.csgraph``, namely i) the edges are assumed non-negative,
        ii) if ``weights[i, j]`` and ``weights[j, i]`` are both non-negative,
        their minimum is taken as the edge weight.

        Arguments
        ---------
        weights: :class:`torch:torch.Tensor`
            The adjacency matrix of size ``(n, n)``.

        Returns
        -------
        :class:`torch:torch.Tensor`
            An ``(n, n)`` matrix adjacency matrix of the minimum spanning tree.

            Indices corresponding to the edges in the MST are set to one, rest
            are set to zero.

            If both weights[i, j] and weights[j, i] are non-zero, then the one
            will be located in whichever holds the *smaller* value (ties broken
            arbitrarily).
        """
        mst_matrix = mst(weights.cpu().numpy()).toarray() > 0
        assert int(mst_matrix.sum()) + 1 == weights.size(0)
        return torch.Tensor(mst_matrix.astype(float))
Beispiel #2
0
def prepare_mst(tri):
    csr_matrix = np.zeros((tri.points.shape[0], tri.points.shape[0]),
                          dtype=np.float64)
    n_indices, n_indptr = tri.vertex_neighbor_vertices
    for i in range(tri.points.shape[0]):
        point_i = tri.points[i, :]
        neighbors = n_indptr[n_indices[i]:n_indices[i + 1]]
        for j in neighbors:
            if i > j:
                sep = point_i - tri.points[j, :]
                sep = np.sqrt(sep.dot(sep))
                csr_matrix[i, j] = sep
                csr_matrix[j, i] = sep
    # noinspection PyTypeChecker
    min_sp_tree = mst(csr_matrix, overwrite=True).toarray()
    graph = {}
    edges = {}
    for i in range(tri.points.shape[0]):
        p0 = tuple(tri.points[i])
        for j in range(tri.points.shape[0]):
            if min_sp_tree[i, j] != 0:
                p1 = tuple(tri.points[j])
                if p0 in graph:
                    graph[p0].append(p1)
                else:
                    graph[p0] = [p1]
                if p1 in graph:
                    graph[p1].append(p0)
                else:
                    graph[p1] = [p0]
                fs = frozenset({p0, p1})
                if fs not in edges:
                    edges[fs] = min_sp_tree[i, j]
    return graph, edges
Beispiel #3
0
    def fit_chowliu(self, data, penalty=0, weights=None):
        """Select a maximum likelihood tree-structured graph & parameters
      data: (n,m) nparray of m data points; values {0,1}
    """

        # TODO: add score f'n parameter, default to empirical MI?  or too complicated?
        def MI2(data, weights):
            """Estimate mutual information between all pairs of *binary* {0,1} variables"""
            pi = np.average(data.astype(float), axis=1,
                            weights=weights)[np.newaxis, :]
            pij = np.cov(data, ddof=0, aweights=weights) + (pi.T.dot(pi))
            p = np.stack((pij, pi - pij, pi.T - pij, 1 + pij - pi - pi.T),
                         axis=2)
            p2 = pi.T.dot(pi)
            q = np.stack((p2, pi - p2, pi.T - p2, 1 + p2 - pi - pi.T), axis=2)
            MI = (p * (np.log(p + 1e-10) - np.log(q + 1e-10))).sum(axis=2)
            return MI, pij, pi[0]

        n, m = data.shape
        #MI, pij,pi = MI2(to01(data), weights)
        MI, pij, pi = MI2(data, weights)  # data should be 0/1, not -1/+1
        from scipy.sparse.csgraph import minimum_spanning_tree as mst
        tree = mst(penalty - MI).tocoo()
        factors = [Factor([Var(i, 2)], [1 - pi[i], pi[i]]) for i in range(n)]
        for i, j, w in zip(tree.row, tree.col, tree.data):
            if w > 0: continue
            (i, j) = (int(i), int(j)) if i < j else (int(j), int(i))
            tij = [
                1 + pij[i, j] - pi[i] - pi[j], pi[i] - pij[i, j],
                pi[j] - pij[i, j], pij[i, j]
            ]
            fij = Factor([Var(i, 2), Var(j, 2)], tij)
            fij = fij / fij.sum([i]) / fij.sum([j])
            factors.append(fij)
        self.__init__(factors)
Beispiel #4
0
def get_mst(df, neighbors):
    """Compute the Minimum Spanning Tree (MST) from postions.

    This function takes a pandas dataframe of poistions and compute the
    distances to k-neighbors for all poistions given, then find the MST using
    ``scipy.sparse.csgraph``. Finally finds the non-zero elements in a
    returned sparse matrix.

    Args:
        df: A pandas dataframe all positions with longitude as ``RA`` and
            latitute as ``DEC``.
        neighbors(int): The number of neighbors used when computing tress.

    Returns:
        A pandas dataframe for all edges in the MST and a tuple of arrays
        storing the indexes and values of non-zero elements in the MST sparse
        matrix.
    """
    df = df[['RA', 'DEC']]
    numA = df.as_matrix(columns=['RA', 'DEC'])
    G = kng(numA, n_neighbors=neighbors, mode='distance')
    T = mst(G)
    index = find(T)
    row_ls = index[0].tolist()
    col_ls = index[1].tolist()
    df1 = df.ix[row_ls].reset_index()
    df2 = df.ix[col_ls].reset_index()
    df1 = df1.rename(columns={'RA': 'RA1', 'DEC': 'DEC1', 'index': 'index1'})
    df2 = df2.rename(columns={'RA': 'RA2', 'DEC': 'DEC2', 'index': 'index2'})
    final = pd.concat([df1, df2], axis=1)
    final['edges'] = pd.Series(index[2], index=final.index)
    final.reset_index(
        inplace=True)  # take index into columns for later filtering in JS
    final = final.rename(columns={'index': 'line_index'})
    return final, index
Beispiel #5
0
def fit_chowliu(data, penalty=0, weights=None):
    """Estimate an Ising model using Chow-Liu's max likelihood tree structure & parameters
      data: (m,n) nparray of m data points; values {0,1}
      penalty: non-negative penalty on the MI (may give a disconnected / forest graph)
    """

    # TODO: add score f'n parameter, default to empirical MI?  or too complicated?
    def MI2(data, weights, eps=1e-10):
        """Estimate mutual information between all pairs of *binary* {0,1} variables"""
        # TODO: expects (n,m) shape data
        pi = np.average(data.astype(float), axis=1,
                        weights=weights)[np.newaxis, :]
        pij = np.cov(data, ddof=0, aweights=weights) + (pi.T.dot(pi))
        p = np.stack((pij, pi - pij, pi.T - pij, 1 + pij - pi - pi.T), axis=2)
        p2 = pi.T.dot(pi)
        q = np.stack((p2, pi - p2, pi.T - p2, 1 + p2 - pi - pi.T), axis=2)
        MI = (p * (np.log(p + eps) - np.log(q + eps))).sum(axis=2)
        return MI, pij, pi[0]

    m, n = data.shape
    MI, pij, pi = MI2(data.T, weights)  # data should be 0/1, not -1/+1
    from scipy.sparse.csgraph import minimum_spanning_tree as mst
    tree = mst(penalty - MI).tocoo()
    factors = [Factor([Var(i, 2)], [1 - pi[i], pi[i]]) for i in range(n)]
    for i, j, w in zip(tree.row, tree.col, tree.data):
        if w > 0: continue
        (i, j) = (int(i), int(j)) if i < j else (int(j), int(i))
        tij = [
            1 + pij[i, j] - pi[i] - pi[j], pi[i] - pij[i, j],
            pi[j] - pij[i, j], pij[i, j]
        ]
        fij = Factor([Var(i, 2), Var(j, 2)], tij)
        fij = fij / fij.sum([i]) / fij.sum([j])
        factors.append(fij)
    return Ising(factors)
Beispiel #6
0
def test_mst():
    data = get_carina()[:100, :]
    tri = Delaunay(data)
    csr_matrix = np.zeros((tri.points.shape[0], tri.points.shape[0]),
                          dtype=np.float64)
    n_indices, n_indptr = tri.vertex_neighbor_vertices
    for i in range(tri.points.shape[0]):
        point_i = tri.points[i, :]
        neighbors = n_indptr[n_indices[i]:n_indices[i + 1]]
        for j in neighbors:
            if i > j:
                sep = point_i - tri.points[j, :]
                sep = np.sqrt(sep.dot(sep))
                csr_matrix[i, j] = sep
                csr_matrix[j, i] = sep
    # noinspection PyTypeChecker
    min_sp_tree = mst(csr_matrix, overwrite=True).toarray()
    plot_list = []
    for i in range(tri.points.shape[0]):
        x1, y1 = tri.points[i]
        neighbors = []
        for j in range(tri.points.shape[0]):
            if min_sp_tree[i, j] != 0:
                neighbors.append(tri.points[j])
        for n in neighbors:
            plot_list.append([[x1, n[0]], [y1, n[1]]])
    plt.scatter(tri.points[:, 0], tri.points[:, 1], c='r')
    for p in plot_list:
        x, y = p
        plt.plot(x, y, '--', color='k')
    plt.show()
Beispiel #7
0
def connect_stars(coords: List[Tuple[int, int]]) -> Iterable[List[int]]:
    size = len(coords)
    cs = array([
        hypot(x1 - x2, y1 - y2)
        for (x1, y1), (x2, y2) in product(coords, coords)
    ]).reshape(size, size)
    return sorted(sorted(map(int, z)) for z in zip(*mst(cs).nonzero()))
Beispiel #8
0
def minimum_spanning_tree(cluster_means):
    """
    L1 single linkage, minimum spanning tree
    """
    dist = pdist(cluster_means, metric = 'minkowski', p = 1)
    #dist = mst(squareform(dist), overwrite = False)
    dist = mst(squareform(dist), overwrite = False)
    return dist 
Beispiel #9
0
 def _calculate_junctions_air_dist_mst_weight(self, junctions: Set[Junction]) -> float:
     nr_junctions = len(junctions)
     idx_to_junction = {idx: junction for idx, junction in enumerate(junctions)}
     distances_matrix = np.zeros((nr_junctions, nr_junctions), dtype=np.float)
     for j1_idx in range(nr_junctions):
         for j2_idx in range(nr_junctions):
             if j1_idx == j2_idx:
                 continue
             dist = self._get_distance_between_junctions(idx_to_junction[j1_idx], idx_to_junction[j2_idx])
             distances_matrix[j1_idx, j2_idx] = dist
             distances_matrix[j2_idx, j1_idx] = dist
     return mst(distances_matrix).sum()
Beispiel #10
0
def prepare_mst_simple(tri):
    csr_matrix = np.zeros((tri.points.shape[0], tri.points.shape[0]),
                          dtype=np.float64)
    n_indices, n_indptr = tri.vertex_neighbor_vertices
    for i in range(tri.points.shape[0]):
        point_i = tri.points[i, :]
        neighbors = n_indptr[n_indices[i]:n_indices[i + 1]]
        for j in neighbors:
            if i > j:
                sep = point_i - tri.points[j, :]
                sep = np.sqrt(sep.dot(sep))
                csr_matrix[i, j] = sep
                csr_matrix[j, i] = sep
    # noinspection PyTypeChecker
    min_sp_tree = mst(csr_matrix, overwrite=True).toarray()
    return min_sp_tree
Beispiel #11
0
def fit_chowliu(data, penalty=0, weights=None):
    """Select a maximum likelihood tree-structured graph & parameters
      data: (m,n) nparray of m data points (values castable to int)
    """

    # TODO: add score f'n parameter, default to empirical MI?  or too complicated?
    def MId(data, weights):
        """Estimate mutual information between all pairs of discrete variables"""
        m, n = data.shape
        d = data.max(0) + 1
        MI = np.zeros((n, n))
        for i in range(n):
            for j in range(i + 1, n):
                pij = empirical([[Var(i, d[i]), Var(j, d[j])]], data)[0]
                pij += 1e-30
                pij /= pij.sum()
                MI[i, j] = (pij *
                            (pij / pij.sum([i]) / pij.sum([j])).log()).sum()
                MI[j, i] = MI[i, j]
        return MI, None, None

    m, n = data.shape
    d = data.max(0) + 1
    MI, _, _ = MId(data, weights)
    from scipy.sparse.csgraph import minimum_spanning_tree as mst
    tree = mst(penalty - MI).tocoo()
    factors = [empirical([[Var(i, d[i])]], data)[0] for i in range(n)]
    for f in factors:
        f /= f.sum()
    for i, j, w in zip(tree.row, tree.col, tree.data):
        if w > 0: continue
        (i, j) = (int(i), int(j)) if i < j else (int(j), int(i))
        fij = empirical([[Var(i, d[i]), Var(j, d[j])]], data)[0]
        fij /= fij.sum()
        fij = fij / fij.sum([i]) / fij.sum([j])
        factors.append(fij)
    return GraphModel(factors)
Beispiel #12
0
from scipy.sparse import csr_matrix
from scipy.sparse.csgraph import minimum_spanning_tree as mst

from scipy.sparse.csgraph import shortest_path as sp

grafo = csr_matrix([
    [0,6,6,6,0,0,0,0,0,0,0,0],
    [6,0,1,0,2,0,0,0,0,0,0,0],
    [6,1,0,2,7,0,2,0,0,0,0,0],
    [6,0,2,0,0,0,0,0,0,18,0,0],
    [0,2,7,0,0,4,0,0,0,0,0,0],
    [0,0,0,0,4,0,11,10,0,0,0,0],
    [0,0,2,0,0,11,0,22,2,0,0,0],
    [0,0,0,0,0,10,22,0,12,0,25,0],
    [0,0,0,0,0,0,2,12,0,1,16,0],
    [0,0,0,18,0,0,0,0,1,0,0,8],
    [0,0,0,0,0,0,0,25,16,0,0,3],
    [0,0,0,0,0,0,0,0,0,8,3,0]
])

arbol = mst(grafo)
print(arbol)
print(arbol.toarray().astype(int))
def main():
    statistics = open(MAINPATH + "/" + "statistics_ppmi.txt", "w")

    M, labels, label_names, relations, nounDict = pp.get_M_fromDB()

    #Choose a method to build your similarity matrix
    #Term Frequency-Inverse Document Frequency
    M_ppmi = sim.get_tf_idf_M(M, "raw", "c", norm_samps=True)
    similarity = "tfidf"

    #Jensen Shanon Divergence
    #M_ppmi = sim.JensenShanon(M)
    #similarity = "jsd"

    #Positive Pointwise Mutual Information
    #M_ppmi = sim.raw2ppmi(M)
    #similarity = "ppmi"

    #Change this value according to expected number of clusters required
    #We tested with 50, 100, 200, 300 based on our dataset
    k = 300
    print("Length features and labels:", len(M_ppmi), len(labels))

    c = spectral.spectral(M_ppmi, labels, sim.cos_s, dist.euclidean)
    #c = spectral.spectral(X, Y, sim.gauss_s, dist.euclidean)

    #Fully connceted
    c.full_graph("cosine")
    print(c.graph)
    for algo in [c.norm_rw_sc, c.norm_sym_sc]:
        kmeans, kmeans_pred = algo(k)
        print("Kmeans pred:", kmeans_pred, len(kmeans_pred))
        labels_train_pred = kmeans.labels_.astype(np.int)
        print(c.clustering)
        printResults(similarity, label_names, labels_train_pred, nounDict, k,
                     c.clustering, c.graph, statistics)

    n = M.shape[0]
    '''cosine and knn mutual / gauss mutual'''
    number = int(2 * (n / np.log(n)))
    '''gaus non-mutual'''
    #number = int((n/np.log(n)))

    #K nearest neighbors
    c.kNN_graph(number, "euclidean", False)
    print(c.graph)
    for algo in [c.norm_rw_sc, c.norm_sym_sc]:
        kmeans, kmeans_pred = algo(k)
        print("Kmeans pred:", kmeans_pred, len(kmeans_pred))
        labels_train_pred = kmeans.labels_.astype(np.int)
        print(c.clustering)
        printResults(similarity, label_names, labels_train_pred, nounDict, k,
                     c.clustering, c.graph, statistics)

#Epsilon
    T = mst(c.W)
    A = T.toarray().astype(float)
    eps = np.min(A[np.nonzero(A)])
    print("eps", eps)
    c.eps_graph(eps)
    print(c.graph)
    for algo in [c.norm_rw_sc, c.norm_sym_sc]:
        kmeans, kmeans_pred = algo(k)
        print("Kmeans pred:", kmeans_pred, len(kmeans_pred))
        labels_train_pred = kmeans.labels_.astype(np.int)
        print(c.clustering)
        printResults(similarity, label_names, labels_train_pred, nounDict, k,
                     c.clustering, c.graph, statistics)

    statistics.close()
Beispiel #14
0
import numpy as np
from scipy.sparse.csgraph import minimum_spanning_tree as mst
from sklearn.neighbors import kneighbors_graph as kng

import matplotlib.pyplot as plt
import pandas as pd
import json
import csv

df=pd.read_csv('DES2131+0043.csv',usecols=['COADD_OBJECTS_ID','RA','DEC'])
numA=df.as_matrix(columns=['RA','DEC'])
G=kng(numA,n_neighbors=20,mode='distance')
T=mst(G)
B=T.toarray().astype(bool)

index1=np.where(B)[0]
index2=np.where(B)[1]
df1=pd.DataFrame()
df2=pd.DataFrame()

df1=df1.append(df.iloc[index1],ignore_index=True)
df2=df2.append(df.iloc[index2],ignore_index=True)

final=pd.concat([df1,df2],axis=1,ignore_index=True)

final2=final.rename(columns={0:'COADD_OBJECTS_ID_1',1:'RA1',2:'DEC1',3:'COADD_OBJECTS_ID_2',4:'RA2',5:'DEC2'})


K=(final2['RA1'].sub(final2['RA2']))**2
F=(final2['DEC1'].sub(final2['DEC2']))**2