Example #1
0
def construct_A(X, k, binary=False):

    nbrs = NearestNeighbors(n_neighbors=1 + k).fit(X)
    if binary:
        return nbrs.kneighbors_graph(X)
    else:
        return nbrs.kneighbors_graph(X, mode='distance')
Example #2
0
 def _compute_neighbors(self):
     V,dim = self.data_frame.shape
     neighbors = NearestNeighbors(n_neighbors=self.num_neighbors,algorithm='auto').fit(self.data_frame)
     _,indices = neighbors.kneighbors(self.data_frame)
     self._adjacency_graph = neighbors.kneighbors_graph(self.data_frame,mode='connectivity')
     self._knn_graph = neighbors.kneighbors_graph(self.data_frame,mode='distance')
     self._neighbors = indices
    def kNN_graph(self, k, metric, mutual=False):
#        self.latex = []
        nn = NearestNeighbors(k, algorithm="brute", metric=metric, n_jobs=-1).fit(self.X)
        UAM = nn.kneighbors_graph(self.X).toarray() #unweighted adjacency matrix
        m = UAM.shape[0]
        self.W = np.zeros((m, m)) #(weighted) adjecancy matrix
        self.D = np.zeros((m, m)) #degree matrix
        if mutual == False:
            if self.full_calculated:
                indices = np.where(UAM == 1)
                self.W[indices] = self.full_W[indices]
                self.D[np.diag_indices(m)] = np.sum(self.W, 1)
            else:
                for i in range(m):
                    for j in range(m):
                        if UAM[i,j] == 1:
                            sim = self.s(self.X[i], self.X[j], self.d)
                            self.W[i,j] = sim
                            self.D[i,i] += sim
        else:
            if self.full_calculated:
                indices = np.where(np.logical_and(UAM == 1, UAM.T == 1).astype(int) == 1)
                self.W[indices] = self.full_W[indices]
                self.D[np.diag_indices(m)] = np.sum(self.W != 0, 1)
            else:
                for i in range(m):
                    for j in range(m):
                        if UAM[i,j] == 1 and UAM[j,i] == 1:
                            sim = self.s(self.X[i], self.X[j], self.d)
                            self.W[i,j] = sim
                            self.D[i,i] += sim
        self.W = np.nan_to_num(self.W)
        self.graph = "kNN graph, k = " + str(k) + ", mutual:" + str(mutual)
def test_connectivity_popagation():
    """
    Check that connectivity in the ward tree is propagated correctly during
    merging.
    """
    from sklearn.neighbors import NearestNeighbors

    X = np.array(
        [
            (0.014, 0.120),
            (0.014, 0.099),
            (0.014, 0.097),
            (0.017, 0.153),
            (0.017, 0.153),
            (0.018, 0.153),
            (0.018, 0.153),
            (0.018, 0.153),
            (0.018, 0.153),
            (0.018, 0.153),
            (0.018, 0.153),
            (0.018, 0.153),
            (0.018, 0.152),
            (0.018, 0.149),
            (0.018, 0.144),
        ]
    )
    nn = NearestNeighbors(n_neighbors=10).fit(X)
    connectivity = nn.kneighbors_graph(X)
    ward = Ward(n_clusters=4, connectivity=connectivity)
    # If changes are not propagated correctly, fit crashes with an
    # IndexError
    ward.fit(X)
Example #5
0
def _sparse_neighbor_graph(X, k, binary=False):
  '''Construct a sparse adj matrix from a matrix of points (one per row).
  Non-zeros are unweighted/binary distance values, depending on the binary arg.
  Doesn't include self-edges.'''
  knn = NearestNeighbors(n_neighbors=k).fit(X)
  mode = 'connectivity' if binary else 'distance'
  try:
    adj = knn.kneighbors_graph(None, mode=mode)
  except IndexError:
    # XXX: we must be running an old (<0.16) version of sklearn
    #  We have to hack around an old bug:
    if binary:
      adj = knn.kneighbors_graph(X, k+1, mode=mode)
      adj.setdiag(0)
    else:
      adj = knn.kneighbors_graph(X, k, mode=mode)
  return Graph.from_adj_matrix(adj)
Example #6
0
def diffusionKernel(X, eps, knn, D=None):
    nbrs = NearestNeighbors(n_neighbors=knn, algorithm='ball_tree').fit(X)    
    D = nbrs.kneighbors_graph(X, mode='distance')
    term = D.multiply(D)/-eps
    G = np.exp(term.toarray())
    G[np.where(G==1)]=0
    G = G + np.eye(G.shape[0])
    deg = np.sum(G,axis=0)
    P = G/deg
    return P, D
Example #7
0
def test_lle_with_sklearn():
    N = 10
    X, color = datasets.samples_generator.make_s_curve(N, random_state=0)
    n_components = 2
    n_neighbors = 3
    knn = NearestNeighbors(n_neighbors + 1).fit(X)
    G = geom.Geometry()
    G.set_data_matrix(X)
    G.set_adjacency_matrix(knn.kneighbors_graph(X, mode = 'distance'))
    sk_Y_lle = manifold.LocallyLinearEmbedding(n_neighbors, n_components, method = 'standard').fit_transform(X)
    (mm_Y_lle, err) = lle.locally_linear_embedding(G, n_components)
    assert(_check_with_col_sign_flipping(sk_Y_lle, mm_Y_lle, 0.05))
Example #8
0
def test_isomap_with_sklearn():
    N = 10
    X, color = datasets.samples_generator.make_s_curve(N, random_state=0)
    n_components = 2
    n_neighbors = 3
    knn = NearestNeighbors(n_neighbors + 1).fit(X)
    # Assign the geometry matrix to get the same answer since sklearn using k-neighbors instead of radius-neighbors
    g = geom.Geometry(X)
    g.set_adjacency_matrix(knn.kneighbors_graph(X, mode = 'distance'))
    # test Isomap with sklearn
    sk_Y_iso = manifold.Isomap(n_neighbors, n_components, eigen_solver = 'arpack').fit_transform(X)
    mm_Y_iso = iso.isomap(g, n_components)
    assert(_check_with_col_sign_flipping(sk_Y_iso, mm_Y_iso, 0.05))
Example #9
0
    def getNeighborStatistics(self,data,samples,pcntl):
        neigh =  NearestNeighbors(n_neighbors=samples)
        neigh.fit(data)
        A = neigh.kneighbors_graph(data,mode='distance')
        b = A.nonzero()
        c = np.log10(np.array(A[b[0],b[1]]))
        mean = c[0].mean()
        std = c[0].std()
        pc = np.percentile(c[0],pcntl)

        n,bins,patches = plt.hist(c[0],50)
        plt.show()
        mx = bins[n.argmax()]
        ret = {'mean':np.power(10,mean),'std':np.power(10,std),'pcntl':np.power(10,pc), 'max':np.power(10,mx)}

        return ret
Example #10
0
def test_ltsa_with_sklearn():
    from sklearn import manifold
    from sklearn import datasets
    from sklearn.neighbors import NearestNeighbors
    N = 10
    X, color = datasets.samples_generator.make_s_curve(N, random_state=0)
    n_components = 2
    n_neighbors = 3
    knn = NearestNeighbors(n_neighbors + 1).fit(X)
    Geometry = geom.Geometry(X)
    Geometry.assign_distance_matrix(knn.kneighbors_graph(X, mode = 'distance'))
    sk_Y_ltsa = manifold.LocallyLinearEmbedding(n_neighbors, n_components, 
                                                method = 'ltsa',
                                                eigen_solver = 'arpack').fit_transform(X)
    (mm_Y_ltsa, err) = ltsa.ltsa(Geometry, n_components, eigen_solver = 'arpack')
    assert(_check_with_col_sign_flipping(sk_Y_ltsa, mm_Y_ltsa, 0.05))
Example #11
0
    def getCollectionStatistics(self,samples):
        neigh =  NearestNeighbors(n_neighbors=samples)
        neigh.fit(self.coordinates)
        A = neigh.kneighbors_graph(self.coordinates,mode='distance')
        b = A.nonzero()
        c = np.log10(np.array(A[b[0],b[1]]))

        mean = c[0].mean()
        std = c[0].std()
        pc = np.percentile(c[0],50)

        n,bins,patches = plt.hist(c[0],80)
        plt.show()
        mx = bins[n.argmax()]

        self.collection_stats = {'mean':np.power(10,mean),'std':np.power(10,std),'pcntl':np.power(10,pc), 'max':np.power(10,mx)}
        return self.collection_stats
Example #12
0
    def fit(self, X):
        '''Obtain the top-k eigensystem of the graph Laplacian

        The eigen solver adopts shift-invert mode as described in
        http://docs.scipy.org/doc/scipy/reference/tutorial/arpack.html
        '''
        nbrs = NearestNeighbors(n_neighbors=self.n_nbrs).fit(X)

        # NOTE W is a dense graph thus may lead to memory leak
        W = nbrs.kneighbors_graph(X).toarray()
        W_sym = np.maximum(W, W.T)

        L = csr_matrix(csgraph.laplacian(W_sym, normed=True))
        [Sigma, U] = eigsh(L, self.n_clusters+1, sigma=0, which='LM')

        # remove the trivial (smallest) eigenvalues & vectors
        self.Sigma, self.U = Sigma[1:], U[:,1:]
Example #13
0
def netview(matrix, k, mst, algorithm, tree):

    nbrs = NearestNeighbors(n_neighbors=k + 1, algorithm=algorithm).fit(matrix)
    adj_knn = nbrs.kneighbors_graph(matrix).toarray()
    np.fill_diagonal(adj_knn, 0)
    adj_mknn = (adj_knn == adj_knn.T) * adj_knn

    if tree:
        adj = mst + adj_mknn
    else:
        adj = adj_mknn

    adjacency = np.tril(adj)
    mst_edges = np.argwhere(adjacency < 1)
    adjacency[adjacency > 0] = 1.0
    edges = np.argwhere(adjacency != 0)
    weights = matrix[edges[:, 0], edges[:, 1]]

    return [k, edges, weights, adjacency, mst_edges]
Example #14
0
def test_isomap_with_sklearn():
    try:
        from sklearn import manifold
        from sklearn import datasets
        from sklearn.neighbors import NearestNeighbors
        N = 10
        X, color = datasets.samples_generator.make_s_curve(N, random_state=0)
        n_components = 2
        n_neighbors = 3
        knn = NearestNeighbors(n_neighbors + 1).fit(X)
        # Assign the geometry matrix to get the same answer since sklearn using k-neighbors instead of radius-neighbors
        Geometry = geom.Geometry(X)
        Geometry.assign_distance_matrix(knn.kneighbors_graph(X, mode = 'distance'))    
        # test Isomap with sklearn
        sk_Y_iso = manifold.Isomap(n_neighbors, n_components, eigen_solver = 'arpack').fit_transform(X)
        mm_Y_iso = iso.isomap(Geometry, n_components)
        assert(_check_with_col_sign_flipping(sk_Y_iso, mm_Y_iso, 0.05))
    except ImportError:
        return True
def entropy_batch_mixing(latent_space, batches):
    def entropy(hist_data):
        n_batches = len(np.unique(hist_data))
        if n_batches > 2:
            raise ValueError("Should be only two clusters for this metric")
        frequency = np.mean(hist_data == 1)
        if frequency == 0 or frequency == 1:
            return 0
        return -frequency * np.log(frequency) - (1 - frequency) * np.log(1 - frequency)

    nne = NearestNeighbors(n_neighbors=51, n_jobs=8)
    nne.fit(latent_space)
    kmatrix = nne.kneighbors_graph(latent_space) - scipy.sparse.identity(latent_space.shape[0])

    score = 0
    for t in range(50):
        indices = np.random.choice(np.arange(latent_space.shape[0]), size=100)
        score += np.mean([entropy(batches[kmatrix[indices].nonzero()[1]\
                                 [kmatrix[indices].nonzero()[0] == i]]) for i in range(100)])
    return score / 50.
def test_precomputed_nearest_neighbors_filtering():
    # Test precomputed graph filtering when containing too many neighbors
    X, y = make_blobs(n_samples=200,
                      random_state=0,
                      centers=[[1, 1], [-1, -1]],
                      cluster_std=0.01)

    n_neighbors = 2
    results = []
    for additional_neighbors in [0, 10]:
        nn = NearestNeighbors(n_neighbors=n_neighbors +
                              additional_neighbors).fit(X)
        graph = nn.kneighbors_graph(X, mode='connectivity')
        labels = SpectralClustering(random_state=0,
                                    n_clusters=2,
                                    affinity='precomputed_nearest_neighbors',
                                    n_neighbors=n_neighbors).fit(graph).labels_
        results.append(labels)

    assert_array_equal(results[0], results[1])
Example #17
0
    def search(self, collection, topicNum = 100):
        topicId = []
        topicArray = []
        print 'start collect'
        for item in collection.find():
            topicId.append(item['url'])
            topics = [0] * int(topicNum)
            if item.get('topics') is not None:
                for tuple in item['topics']:
                    topics[tuple[0]] = tuple[1]
                topicArray.append(topics)

        print 'start nns'
        nbrs = NearestNeighbors(n_neighbors=2, algorithm='ball_tree').fit(topicArray)
        print 'judge '
        nnset = [[i for i, doc in enumerate(vector) if doc == 1 ] for vector in nbrs.kneighbors_graph(topicArray).toarray()]
        print 'update'
        for i,recs in enumerate(nnset):
            print i , ':',recs
            collection.update({'url':topicId[i]},{"$set" : {"rec" : [topicId[key] for key in recs]}})
def get_knn_graph(data_file, data_format, k, d, N, alg):
  if data_format == "binary":
    a = np.fromfile(data_file, dtype=float).reshape((N,d))
  elif data_format == "libsvm":
    x, labels = load_svmlight_file(data_file)
    del labels
    a = x.todense()
    del x
  else:
    print "wrong data format!"
    return 0
  k_plus_1 = k+1
  t_start = time.time()
  nbrs = NearestNeighbors(n_neighbors=(k_plus_1), algorithm=alg, leaf_size=1).fit(a)
  t_tree = time.time()
  knn_graph = nbrs.kneighbors_graph(a)
  t_graph = time.time() - t_tree
  t = time.time() - t_start
  print 'overall time = ' + str(t) + " seconds"
  return knn_graph
def order_border(border):
    '''
    https://stackoverflow.com/questions/37742358/sorting-points-to-form-a-continuous-line
    '''
    n_points = border.shape[0]
    clf = NearestNeighbors(2).fit(border)
    G = clf.kneighbors_graph()
    T = nx.from_scipy_sparse_matrix(G)

    paths = [list(nx.dfs_preorder_nodes(T, i)) for i in range(n_points)]
    min_idx, min_dist = 0, np.inf

    for idx, path in enumerate(paths):
        ordered = border[path]  # ordered nodes
        cost = np.sum(np.diff(ordered)**2)
        if cost < min_dist:
            min_idx, min_dist = idx, cost

    opt_order = paths[min_idx]
    return border[opt_order][:-1]
Example #20
0
def entropy_batch_mixing(latent_space, batches):
    def entropy(hist_data):
        n_batches = len(np.unique(hist_data))
        if n_batches > 2:
            raise ValueError("Should be only two clusters for this metric")
        frequency = np.mean(hist_data == 1)
        if frequency == 0 or frequency == 1:
            return 0
        return -frequency * np.log(frequency) - (1 - frequency) * np.log(1 - frequency)

    nne = NearestNeighbors(n_neighbors=51, n_jobs=8)
    nne.fit(latent_space)
    kmatrix = nne.kneighbors_graph(latent_space) - scipy.sparse.identity(latent_space.shape[0])

    score = 0
    for t in range(50):
        indices = np.random.choice(np.arange(latent_space.shape[0]), size=100)
        score += np.mean([entropy(batches[kmatrix[indices].nonzero()[1]\
                                 [kmatrix[indices].nonzero()[0] == i]]) for i in range(100)])
    return score / 50.
def run_swiss():
    # training data
    data, t = load_swiss_data(train_batch_size)
    data = normalize(data)
    nbrs = NearestNeighbors(n_neighbors=k, algorithm='auto').fit(data)
    nbr_graph = nbrs.kneighbors_graph(data).toarray()
    global nbr_graph_tensor
    nbr_graph_tensor = torch.tensor(nbr_graph)
    data = torch.from_numpy(data).float()
    net = Net()
    # loss function
    loss_func = nn.L1Loss()
    # optimizer
    opti = torch.optim.Adam(net.parameters(), weight_decay=1e-3)
    # train net
    train_swiss_dmvu(epoch, data, net, loss_func, opti, t)
    data, t = load_swiss_data(test_batch_size)
    data = normalize_2(data)
    data = torch.from_numpy(data).float()
    test_swiss_dmvu(data, net, t)
Example #22
0
def distancetree_metric(centroid_path):
    ## calculating the distance of only centroid based on the spanning tree
    centroid_list = joblib.load(log_path + "{}/{}/{}/{}".
                                format(chose_dataset, chose_model, model_layer, centroid_path))

    centroids = list(centroid_list.values())
    neigh = NearestNeighbors(n_neighbors=len(centroids))
    neigh.fit(centroids)
    A = neigh.kneighbors_graph(centroids, mode='distance')
    X = csr_matrix(A)
    Tcsr = minimum_spanning_tree(X)
    distance = Tcsr.toarray().sum()

    a = Tcsr.toarray()
    b = np.reshape(a, (-1,))
    b = np.where(b == 0, np.inf, b)
    minmum_dist = b.min()



    return minmum_dist, distance
Example #23
0
    def fit(self, x):
        """Fit model to data.

        Args:
            x(BaseDataset): Dataset to fit.

        """
        x_np, _ = x.numpy()

        # Determine neighborhood parameters
        x_np, _ = x.numpy()
        if x_np.shape[1] > 100:
            print('Computing PCA before knn search...')
            x_np = PCA(n_components=100).fit_transform(x_np)

        nbrs = NearestNeighbors(n_neighbors=self.n_neighbors,
                                algorithm='auto').fit(x_np)

        self.knn_graph = nbrs.kneighbors_graph()

        super().fit(x)
Example #24
0
def get_knn_graph(data_file, data_format, k, d, N, alg):
    if data_format == "binary":
        a = np.fromfile(data_file, dtype=float).reshape((N, d))
    elif data_format == "libsvm":
        x, labels = load_svmlight_file(data_file)
        del labels
        a = x.todense()
        del x
    else:
        print "wrong data format!"
        return 0
    k_plus_1 = k + 1
    t_start = time.time()
    nbrs = NearestNeighbors(n_neighbors=(k_plus_1), algorithm=alg,
                            leaf_size=1).fit(a)
    t_tree = time.time()
    knn_graph = nbrs.kneighbors_graph(a)
    t_graph = time.time() - t_tree
    t = time.time() - t_start
    print 'overall time = ' + str(t) + " seconds"
    return knn_graph
Example #25
0
    def top_3(self, keywords):
        """
        unormalised vector used to calculated knn.
        KNN calculated with Sklearn
        out

        :return: knn sparse graph matrix
        """
        kws_len = len(keywords)

        vecs = np.zeros((kws_len, self.vec_len), dtype=float)
        for i, kw in enumerate(keywords):
            word = self.nlp(kw)
            vec = np.array(word.vector)
            vecs[i] = vec

        nbrs = NearestNeighbors(n_neighbors=self.k + 1,
                                algorithm='ball_tree').fit(vecs)
        graph = nbrs.kneighbors_graph(vecs).toarray()

        return graph
    def get_connectivity(self, x):

        if self.degree == 0:
            a_net = self.dist2_mat(x)
            a_net = (a_net < self.comm_radius2).astype(float)
        else:
            neigh = NearestNeighbors(n_neighbors=self.degree)
            neigh.fit(x[:, 2:4])
            a_net = np.array(
                neigh.kneighbors_graph(mode='connectivity').todense())

        if self.mean_pooling:
            # Normalize the adjacency matrix by the number of neighbors - results in mean pooling, instead of sum pooling
            n_neighbors = np.reshape(
                np.sum(a_net, axis=1),
                (self.n_agents,
                 1))  # TODO or axis=0? Is the mean in the correct direction?
            n_neighbors[n_neighbors == 0] = 1
            a_net = a_net / n_neighbors

        return a_net
Example #27
0
def passl_local_graph_partial(site, loc_param_indices, params):
    X = site.buff[loc_param_indices[0]]
    K, rbf_sigma, local_graph_index, n_cluster, centers_index, point_cluster_index, inter_graph_index, member_id_index = params
    nins = NN(K + 1, None, metric='euclidean').fit(X)
    W = nins.kneighbors_graph(nins._fit_X, K + 1, mode='distance')
    #W.data=W.data**2
    W.data = np.exp(-W.data**2 / rbf_sigma)
    W[np.diag_indices(W.shape[0])] = 0
    #W[np.diag_indices(W.shape[0])]=0
    site.buff[local_graph_index] = W
    kins = KM(n_cluster)
    point_cluster = kins.fit_predict(X)
    site.buff[point_cluster_index] = point_cluster
    site.buff[centers_index] = kins.cluster_centers_
    #print(kins.cluster_centers_)
    site.buff[inter_graph_index] = {}
    member_id = []
    for i in range(n_cluster):
        member_id.append(np.where(point_cluster == i)[0])
        #print(member_id[-1])
    site.buff[member_id_index] = member_id
Example #28
0
 def calculate_adjacency_matrix(self, X):
     n_samples = X.shape[1]
     adjacency_matrix = np.zeros((n_samples, n_samples))
     knn = KNN(n_neighbors=self.n_neighbors,
               algorithm='kd_tree',
               n_jobs=self.n_jobs)
     knn.fit(X=self.X.T)
     # https://scikit-learn.org/stable/modules/generated/sklearn.neighbors.NearestNeighbors.html#sklearn.neighbors.NearestNeighbors.kneighbors_graph
     # the following function gives n_samples*n_samples matrix, and puts 0 for where points are not connected directly in KNN graph
     connectivity_matrix = knn.kneighbors_graph(
         X=X.T, n_neighbors=self.n_neighbors + 1,
         mode='connectivity')  #+1 because the point itself is also counted
     connectivity_matrix = connectivity_matrix.toarray()
     for point_index in range(connectivity_matrix.shape[0]):
         for point_index_2 in range(connectivity_matrix.shape[1]):
             if connectivity_matrix[point_index, point_index_2] == 1:
                 x1 = X[:, point_index]
                 x2 = X[:, point_index_2]
                 adjacency_matrix[point_index, point_index_2] = math.exp(
                     -(LA.norm(x1 - x2))**2)
     return adjacency_matrix
Example #29
0
def similarity_regression(X, y, n_neighbors=None):
    """
    Calculates similarity based on labels using X (data) y (labels)
    
    this considers X, by use knn first and then a distance metric - in this setting
    we will use the rbf kernel for similarity. 
    
    Then if X is "far" in the knn sense we will set to 0
    we can determine "distance" based on clusters? that is if we build
    a cluster around this obs, which other observations are closest. 
    
    
    """
    from sklearn.neighbors import NearestNeighbors
    if n_neighbors is None:
        n_neighbors = max(int(X.shape[0] * 0.05)+1, 2)
    
    # use NerestNeighbors to determine closest obs
    y_ = np.array(y).reshape(-1,1)
    nbrs = NearestNeighbors(n_neighbors=n_neighbors, algorithm='auto').fit(y_)
    return np.multiply(nbrs.kneighbors_graph(y_).toarray(), rbf_kernel(X, gamma=1))
Example #30
0
def locality_preserving_loss(local_rep, target_rep, locality_preserving_k=5):
    # norm2 = lambda u, v: ((u-v)**2).sum()
    nbrs = NearestNeighbors(n_neighbors=locality_preserving_k + 1,
                            algorithm='ball_tree',
                            metric="euclidean",
                            # metric="pyfunc",
                            # metric_params={"func": norm2}
                            )
    nbrs = nbrs.fit(target_rep)
    alpha = nbrs.kneighbors_graph(target_rep, mode='distance')
    # g = g.eliminate_zeros()
    sigma = 10
    alpha.data = np.exp(-np.power(alpha.data, 2)/(sigma**2))
    alphaT = torch.tensor(alpha.toarray(), device=local_rep.device)

    # dists = scidist.squareform(scidist.cdist(local_rep, norm2))

    dists = torch.cdist(local_rep, local_rep, p=2)
    dists = dists.pow(2)
    losses = torch.mul(dists, alphaT)
    return torch.sum(losses) / local_rep.shape[0]#, alpha, alphaT, dists, losses
Example #31
0
    def getNeighborStatistics(self, data, samples, pcntl):
        neigh = NearestNeighbors(n_neighbors=samples)
        neigh.fit(data)
        A = neigh.kneighbors_graph(data, mode='distance')
        b = A.nonzero()
        c = np.log10(np.array(A[b[0], b[1]]))
        mean = c[0].mean()
        std = c[0].std()
        pc = np.percentile(c[0], pcntl)

        n, bins, patches = plt.hist(c[0], 50)
        plt.show()
        mx = bins[n.argmax()]
        ret = {
            'mean': np.power(10, mean),
            'std': np.power(10, std),
            'pcntl': np.power(10, pc),
            'max': np.power(10, mx)
        }

        return ret
Example #32
0
def k_nearest_network(phase_space, k=5):
	"""
--------------------------------------------
Convert a phase space into a k nearest neighbor network, a directed one
--------------------------------------------
phase_space:	Array. The phase space representation in numpy array format
k:				Number. The k nearest neighbors will be connected
--------------------------------------------
Return a graph object, using igraph representation
--------------------------------------------
Usage example:

import numpy as np
import imp

from ts2cn.ts import phase_space as phs

filename='lorenz.dat'
file = open('ts2cn/thirdy_parties/minfo/data/'+filename, 'r')
ts = file.read().split()
ts = [float(i) for i in ts]

rc = phs.reconstruct_ps(ts, max_dim=20, dims_step=5, false_nn_threshold=0.2, noise_perc=2)

graph = phs.k_nearest_network(rc, k=5)

	"""
	from scipy.spatial.distance import pdist, squareform
	from igraph import Graph
	from igraph import ADJ_UNDIRECTED, ADJ_DIRECTED
	from sklearn.neighbors import NearestNeighbors

	# TODO allow other algorithms
	# it's passed k+1 because each node is considered the nearest neighboor of itself
	nbrs = NearestNeighbors(n_neighbors=k+1, algorithm='kd_tree').fit(phase_space)
	
	adj_mat = nbrs.kneighbors_graph(phase_space, mode='connectivity').toarray()
	diag = range(len(adj_mat))
	adj_mat[diag, diag] = 0
	return Graph.Adjacency(adj_mat.tolist(), mode=ADJ_DIRECTED)
Example #33
0
def SNN(x, k=3, verbose=True, metric='minkowski'):
    '''
    x: n x m matrix, n is #sample, m is #feature
    '''
    n, m = x.shape
    # Find a ranklist of neighbors for each sample
    timestamp = timer()
    if not verbose:
        print('Create KNN matrix...')
    knn = NearestNeighbors(n_neighbors=n, metric=metric)
    knn.fit(x)
    A = knn.kneighbors_graph(x, mode='distance')
    A = A.toarray()
    A_rank = A
    for i in range(n):
        A_rank[i, :] = np.argsort(A[i, :])
    A_rank = np.array(A_rank, dtype='int')
    A_knn = A_rank[:, :k]
    if not verbose:
        print("Time elapsed:\t", timer() - timestamp)

    # Create weighted edges between samples
    timestamp = timer()
    if not verbose:
        print('Generate edges...')
    edge = []
    for i in range(n):
        for j in range(i + 1, n):
            shared = set(A_knn[i, :]).intersection(set(A_knn[j, :]))
            shared = np.array(list(shared))
            if (len(shared) > 0):  # When i and j have shared knn
                strength = k - (match1d(shared, A_knn[i, :]) +
                                match1d(shared, A_knn[j, :]) + 2) / 2
                strength = max(strength)
                if (strength > 0):
                    edge = edge + [i + 1, j + 1, strength]
    edge = np.array(edge).reshape(-1, 3)
    if not verbose:
        print("Time elapsed:\t", timer() - timestamp)
    return (edge)
Example #34
0
    def getCollectionStatistics(self, samples):
        neigh = NearestNeighbors(n_neighbors=samples)
        neigh.fit(self.coordinates)
        A = neigh.kneighbors_graph(self.coordinates, mode='distance')
        b = A.nonzero()
        c = np.log10(np.array(A[b[0], b[1]]))

        mean = c[0].mean()
        std = c[0].std()
        pc = np.percentile(c[0], 50)

        n, bins, patches = plt.hist(c[0], 80)
        plt.show()
        mx = bins[n.argmax()]

        self.collection_stats = {
            'mean': np.power(10, mean),
            'std': np.power(10, std),
            'pcntl': np.power(10, pc),
            'max': np.power(10, mx)
        }
        return self.collection_stats
Example #35
0
def order_points(points):
    """
    https://stackoverflow.com/questions/37742358/sorting-points-to-form-a-continuous-line
    """

    clf = NearestNeighbors(2).fit(points)  #calc nearest neighbour
    G = clf.kneighbors_graph()  #create sparse matrix
    T = nx.from_scipy_sparse_matrix(G)  #construct graph from sparse matrix
    # order paths
    paths = [list(nx.dfs_preorder_nodes(T, i)) for i in range(len(points))]
    mindist = np.inf
    minidx = 0
    for i in range(len(points)):
        p = paths[i]  # order of nodes
        ordered = points[p]  # ordered nodes
        # find cost of that order by the sum of euclidean distances between points (i) and (i+1)
        cost = (((ordered[:-1] - ordered[1:])**2).sum(1)).sum()
        if cost < mindist:
            mindist = cost
            minidx = i

    return paths[minidx]
Example #36
0
 def find_KNN_distance_matrix(self, X, n_neighbors):
     # X: column-wise samples
     # returns KNN_distance_matrix: row-wise --> shape: (n_samples, n_samples) where zero for not neighbors
     # returns neighbors_indices: row-wise --> shape: (n_samples, n_neighbors)
     knn = KNN(n_neighbors=n_neighbors + 1, algorithm='kd_tree',
               n_jobs=-1)  #+1 because the point itself is also counted
     knn.fit(X=X.T)
     # https://scikit-learn.org/stable/modules/generated/sklearn.neighbors.NearestNeighbors.html#sklearn.neighbors.NearestNeighbors.kneighbors_graph
     # the following function gives n_samples*n_samples matrix, and puts 0 for diagonal and also where points are not connected directly in KNN graph
     # if K=n_samples, only diagonal is zero.
     Euclidean_distance_matrix = knn.kneighbors_graph(
         X=X.T, n_neighbors=n_neighbors + 1,
         mode='distance')  #--> gives Euclidean distances
     KNN_distance_matrix = Euclidean_distance_matrix.toarray()
     neighbors_indices = np.zeros(
         (KNN_distance_matrix.shape[0], n_neighbors))
     for sample_index in range(KNN_distance_matrix.shape[0]):
         neighbors_indices[sample_index, :] = np.ravel(
             np.asarray(
                 np.where(KNN_distance_matrix[sample_index, :] != 0)))
     neighbors_indices = neighbors_indices.astype(int)
     return KNN_distance_matrix, neighbors_indices
Example #37
0
def knn_distance_matrix(X, n_neighbors=10, nn_radius='halfk', leaf_size=30):
    knn = NearestNeighbors(n_neighbors,
                           algorithm='auto',
                           metric='sqeuclidean',
                           leaf_size=leaf_size,
                           n_jobs=-1)
    knn.fit(X)
    W = knn.kneighbors_graph(n_neighbors=n_neighbors, mode='distance')

    if nn_radius == 'halfk':
        nn_radius = n_neighbors // 2

    distances, _ = knn.kneighbors(n_neighbors=nn_radius)
    half_k_neighbors_distance = np.sqrt(distances[:, -1].squeeze())
    # normalization based on each points "neighbohood radius"
    for i in range(W.shape[0]):
        W[i, :] /= half_k_neighbors_distance[i]
    W = W.tocsc()
    for j in range(W.shape[0]):
        W[:, j] /= half_k_neighbors_distance[i]

    return W
Example #38
0
def generateHeidiMatrixResults_noorder(inputData,k=20):
    factor=1
    knn=k
    bit_subspace={}
    row=inputData.shape[0]
    count=0
    heidi_matrix=np.zeros(shape=(row,row),dtype=np.uint64)
    max_count=int(math.pow(2,inputData.shape[1]-1))
    allsubspaces=range(1,max_count)
    f=lambda a:sorted(a,key=lambda x:sum(int(d)for d in bin(x)[2:]))
    allsubspaces=f(allsubspaces)
    #print(allsubspaces)
    frmt=str(inputData.shape[1]-1)+'b'
    factor=1
    bit_subspace={}
    count=0
    #print('knn:',knn)

    for i in allsubspaces:
        bin_value=str(format(i,frmt))
        bin_value=bin_value[::-1]
        subspace_col=[index for index,value in enumerate(bin_value) if value=='1']

        filtered_data=inputData.iloc[:,subspace_col+[-1]] #NEED TO CHANGE IF COL IS A LIST
        filtered_data['classLabel_orig']=filtered_data['classLabel'].values
        sorted_data=filtered_data
        subspace=sorted_data.iloc[:,:-2]
        np_subspace=subspace.values#NEED TO CHANGE IF COL IS A LIST
        #print(np_subspace.shape)
        nbrs=NearestNeighbors(n_neighbors=knn,algorithm='ball_tree').fit(np_subspace)
        temp=nbrs.kneighbors_graph(np_subspace).toarray()
        temp=temp.astype(np.uint64)
        heidi_matrix=heidi_matrix + temp*factor
        factor=factor*2
        subspace_col_name=[inputData.columns[j] for j in subspace_col]
        #print(i,subspace_col_name)
        bit_subspace[count]=subspace_col_name
        count+=1
    return heidi_matrix,bit_subspace,sorted_data
Example #39
0
def make_graph_knn(coords, layers, sim_indices, k):
    
    nbrs = NearestNeighbors(algorithm='kd_tree').fit(coords)
    nbrs_sm = nbrs.kneighbors_graph(coords, k)
    nbrs_sm.setdiag(0) #remove self-loop edges
    nbrs_sm.eliminate_zeros() 
    nbrs_sm = nbrs_sm + nbrs_sm.T
    pairs_sel = np.array(nbrs_sm.nonzero()).T
    first,second = pairs_sel[:,0],pairs_sel[:,1]  
    #selected index pair list that we label as connected
    data_sel = np.ones(pairs_sel.shape[0])
    
    #prepare the input and output matrices (already need to store sparse)
    r_shape = (coords.shape[0],pairs_sel.shape[0])
    eye_edges = np.arange(pairs_sel.shape[0])
    
    R_i = csr_matrix((data_sel,(pairs_sel[:,1],eye_edges)),r_shape,dtype=np.uint8)
    R_o = csr_matrix((data_sel,(pairs_sel[:,0],eye_edges)),r_shape,dtype=np.uint8)
        
    #now make truth graph y (i.e. both hits are sim-matched)    
    y = (np.isin(pairs_sel,sim_indices).astype(np.int8).sum(axis=-1) == 2)
    return R_i,R_o,y    
Example #40
0
class Density:

    def __init__(self, X, n_neighbors = 10, theta = 1):
        self.neigh = NearestNeighbors(p=1)
        self.neigh.fit(X)
        self.Pij = - self.neigh.kneighbors_graph(X, n_neighbors, mode='distance') / (2 *
                                                                     theta**2)
        self.Pij.data[:] = np.exp(self.Pij.data)
        self.Wij = self.Pij.sum(0)
        counts = np.bincount(self.Pij.indices, minlength = self.Pij.shape[0])
        counts[np.where(counts ==0)[0]] = 1
        #print(counts)
        self.Gra = np.array(self.Wij / counts).reshape(-1)

    def pick(self,i):
        indices = self.Pij.getrow(i).indices
        temp = self.Gra[i]
        self.Gra[indices] = self.Gra[indices] - temp
        return temp
    
    def getDensity(self):
        return self.Gra
Example #41
0
def get_heidi_input_subspace_noorder(df, bin_value, factor=1, classLabelname='classLabel'):
    #bin_value = [True, False, True, False]
    #factor =1
    #classLabelname='classLabel'
    row=df.shape[0]
    heidi_matrix=np.zeros(shape=(row,row),dtype=np.uint64)
    subspace_col = [i for i,x in enumerate(bin_value) if x]
    filtered_data=df.iloc[:,subspace_col] #NEED TO CHANGE IF COL IS A LIST
    filtered_data[classLabelname]=df[classLabelname].values
    filtered_data['classLabel_orig']=filtered_data[classLabelname].values
    sorted_data=filtered_data
    subspace=sorted_data.iloc[:,:-2]
    np_subspace=subspace.values
    nbrs=NearestNeighbors(n_neighbors=knn,algorithm='ball_tree').fit(np_subspace)
    temp=nbrs.kneighbors_graph(np_subspace).toarray()
    temp=temp.astype(np.uint64)
    heidi_matrix=heidi_matrix + temp*factor
    factor=factor*2
    subspace_col_name=[df.columns[j] for j in subspace_col]
    output='.'
    img,bit_subspace=generateHeidiMatrixResults_noorder_helper(heidi_matrix,bs,output,sorted_data,'legend_heidi')
    return output+'/consolidated_img.png'
Example #42
0
def order_coords(coords, idx_start=0):
    
    clf = NearestNeighbors(n_neighbors=2).fit(coords)
    G = clf.kneighbors_graph()
    
    
    """ New sorting, changed num neighbors to 3 above """
    from scipy.sparse.csgraph import shortest_path
    dist_matrix, predecessors = shortest_path(csgraph=G, directed=False, return_predecessors=True)


    from tsp_solver.greedy import solve_tsp
    path = solve_tsp(dist_matrix, endpoints=(0, len(coords) - 1))
    
    sorted_coords = coords[path[::1]]
    organized_coords = sorted_coords
    
    ### old sorting below
    # T = nx.from_scipy_sparse_matrix(G)
    # order = list(nx.dfs_preorder_nodes(T, 0))
    # organized_coords = coords[order]   # SORT BY ORDER
    
    return organized_coords
 def create_network(self):
     data_path = 'all_stocks_5yr.csv'
     data = pd.read_csv(data_path)
     Name = data['Name']
     companies = list(set(Name))
     time_series = []
     valid_companies = []
     for index, company in enumerate(companies):
         all_time_series = data.loc[data['Name'] == company]
         ts_open = np.array(all_time_series['open'])
         ts_open = ts_open[~np.isnan(ts_open)]
         size = ts_open.shape[0]
         if size > 1100:
             valid_companies.append(company)
             ts_open.resize(1259)
             time_series.append(ts_open)
     time_series = np.array(time_series)
     nbrs = NearestNeighbors(n_neighbors=self.k, algorithm='ball_tree', metric=self.mydist).fit(time_series)
     knn_graph = nbrs.kneighbors_graph(time_series).toarray()
     np.fill_diagonal(knn_graph, 0)
     g = igraph.Graph.Adjacency(knn_graph.tolist(), mode="undirected")
     print 'Network created.....'
     g.write_pajek(self.output)
 def find_geodesic_distance_matrix(self):
     # ----- find k-nearest neighbor graph (distance matrix):
     if self.n_neighbors == None:
         n_samples = self.X.shape[1]
         self.n_neighbors = n_samples
     knn = KNN(
         n_neighbors=self.n_neighbors + 1,
         algorithm='kd_tree',
         n_jobs=self.n_jobs)  #+1 because the point itself is also counted
     knn.fit(X=self.X.T)
     # https://scikit-learn.org/stable/modules/generated/sklearn.neighbors.NearestNeighbors.html#sklearn.neighbors.NearestNeighbors.kneighbors_graph
     # the following function gives n_samples*n_samples matrix, and puts 0 for diagonal and also where points are not connected directly in KNN graph
     # if K=n_samples, only diagonal is zero.
     Euclidean_distance_matrix = knn.kneighbors_graph(
         X=self.X.T, n_neighbors=self.n_neighbors,
         mode='distance')  #--> gives Euclidean distances
     #Euclidean_distance_matrix = Euclidean_distance_matrix.toarray()
     # ----- find geodesic distance graph:
     # https://scikit-learn.org/stable/modules/generated/sklearn.utils.graph_shortest_path.graph_shortest_path.html
     self.geodesic_dist_matrix = graph_shortest_path(
         dist_matrix=Euclidean_distance_matrix,
         method="auto",
         directed=False)
Example #45
0
    def getHeidiImageForSubspace(self, subspace, outputpath):
        row = self.inputData.shape[0]
        heidi_matrix = np.zeros(shape=(row, row), dtype=np.uint64)
        subspace_col = [i for i, x in enumerate(subspace) if x]
        filtered_data = self.inputData.iloc[:, subspace_col]
        np_subspace = filtered_data.values
        nbrs = NearestNeighbors(n_neighbors=knn,
                                algorithm='ball_tree').fit(np_subspace)
        temp = nbrs.kneighbors_graph(np_subspace).toarray()
        temp = temp.astype(np.uint64)
        heidi_matrix = temp
        arr = np.zeros((heidi_matrix.shape[0], heidi_matrix.shape[1], 3))
        for i in range(heidi_matrix.shape[0]):
            for j in range(heidi_matrix.shape[1]):
                if (heidi_matrix[i][j] == 1):
                    arr[i][j] = self.subspaceColors[tuple(subspace_col)]
                else:
                    arr[i][j] = [255, 255, 255]

        tmp = arr.astype(np.uint8)
        img = Image.fromarray(tmp)
        img.save(outputpath)
        return
Example #46
0
def get_knn_graph(X, k):
    '''
    parameters
    ----------
    X : 2-D array
     input data matrix
    k : int
     the number of nearest neighbors
    
    Notes
    ----------
    knn graph whose element ij is distance between xi and xj if xj is in knn of xi
    
    return
    ----------
    knn : csr_matrix(shape = len(X) * len(X))
     pairwise distance matrix of samples
    
    '''
    neigh = NearestNeighbors(n_neighbors=k)
    neigh.fit(X)

    return neigh.kneighbors_graph(mode='distance')
Example #47
0
    def _affinity_mat(self, X):

        r'''
        Computes the affinity matrix based on the selected
        kernel type.

        Parameters
        ----------
        X : array-like, shape (n_samples, n_features)
            The data matrix from which we will compute the
            affinity matrix.

        Returns
        -------
        sims : array-like, shape (n_samples, n_samples)
            The resulting affinity kernel.

        '''

        sims = None

        # If gamma is None, then compute default gamma value for this view
        gamma = self.gamma
        if self.gamma is None:
            distances = cdist(X, X)
            gamma = 1 / (2 * np.median(distances) ** 2)
        # Produce the affinity matrix based on the selected kernel type
        if (self.affinity == 'rbf'):
            sims = rbf_kernel(X, gamma=gamma)
        elif(self.affinity == 'nearest_neighbors'):
            neighbor = NearestNeighbors(n_neighbors=self.n_neighbors)
            neighbor.fit(X)
            sims = neighbor.kneighbors_graph(X).toarray()
        else:
            sims = polynomial_kernel(X, gamma=gamma)

        return sims
Example #48
0
def get_kneighbors_graph(
    points: NDArray[(Any, Any), Number],
    n_farthest_samples: Union[int, float] = 0.3,
    n_random_samples: Union[int, float] = 0.1,
    dmax: int = 500,
    n_neighbors: int = 5,
    n_jobs: Optional[int] = None,
) -> spmatrix:
    """
    Get a graph generated by KNN on given points.

    Args:
        points: array containg point coordinates.
        n_farthest_samples: number of points to keep using farthest points sampling. If
            a float is given, represents the proportion of points used instead.
        n_random_samples: number of points to keep using random sampling. If a float is
            given, represents the proportion of points used instead.
        dmax: maximum distance in pixels between two adjacent nodes.
        n_neighbors: number of neighbors to use for KNN algorithm.
        n_jobs: number of parallel jobs to run for neighbors search. None means 1.

    Returns:
        Sparse distance matrix representing the graph.
    """
    idxs = random_farthest_point_sampling(
        points,
        n_farthest_samples=n_farthest_samples,
        n_random_samples=n_random_samples,
    )
    X = points[idxs]

    knn = NearestNeighbors(n_neighbors=n_neighbors, n_jobs=n_jobs).fit(X)

    A = knn.kneighbors_graph(mode="distance")
    Abool = A.astype(bool) - (A > dmax)
    A = A.multiply(Abool)
    return A.maximum(A.T)
from scipy.spatial.distance import cosine


names = np.load("C:\\Users\\Will\\Desktop\\ml-1m\\DataSet\\NameMatrix.npy")
valuematrix = np.load("C:\\Users\\Will\\Desktop\\ml-1m\\DataSet\\ArrangedMatrix.npy")


#df = pd.DataFrame(valuematrix, columns=np.array(names).tolist())


nbrs = NearestNeighbors(n_neighbors=5, algorithm='ball_tree').fit(valuematrix)
distances,indeces = nbrs.kneighbors(valuematrix)
#print indeces
#print distances

temp = nbrs.kneighbors_graph(valuematrix).toarray()
print temp

# df = df.drop('0',1)
# #df = df.drop(df.head(1).index)
# #print df
#
#
#
# data_ibs = pd.DataFrame(index=df.columns, columns= df.columns)
#
#
# ######################################Let the fun begin#########################################################
# #Here we'll find the Cosin Similarity between items
# #loop through columns
# for i in range(0,len(data_ibs.columns)):
Example #50
0
 def construct_A(self, X, k=1, binary=False): #might generate sparse matrix
     nbrs = NearestNeighbors(n_neighbors=1 + k).fit(X)
     if binary:
         return nbrs.kneighbors_graph(X)
     else:
         return nbrs.kneighbors_graph(X, mode='distance')
from sklearn.neighbors import NearestNeighbors
import numpy as np
X = np.array([[-1, -1], [-2, -1], [-3, -2], [1, 1], [2, 1], [3, 2]])
nbrs = NearestNeighbors(n_neighbors=2, algorithm='ball_tree').fit(X)
test = np.array([[0,0],[0.1,0.9]])
distances, indices = nbrs.kneighbors(test)
print indices
print distances
print nbrs.kneighbors_graph(X).toarray()

from sklearn.neighbors import KDTree
import numpy as np
X = np.array([[-1, -1], [-2, -1], [-3, -2], [1, 1], [2, 1], [3, 2]])
kdt = KDTree(X, leaf_size=30, metric='euclidean')
print kdt.query(X, k=2, return_distance=False)  
print kdt.valid_metrics        

from sklearn.neighbors import NearestNeighbors
import numpy as np
X = np.array([[-1, -1], [-2, -1], [-3, -2], [1, 1], [2, 1], [3, 2]])
nbrs = NearestNeighbors(n_neighbors=2, algorithm='ball_tree').fit(X)
distances, indices = nbrs.kneighbors(X)

nbrs.kneighbors_graph(X).toarray()
print nbrs.kneighbors_graph
Example #53
0
# initialize data reading
cfg_dnn.init_data_reading_test(train_data_spec)

# get the function for feature extraction
log('> ... getting the feat-extraction function')
extract_func = model.build_extract_feat_function(-1)

output_mat = None  # store the features for all the data in memory
log('> ... generating features from the specified layer')
while (not cfg_dnn.test_sets.is_finish()):  # loop over the data
    cfg_dnn.test_sets.load_next_partition(cfg_dnn.test_xy)
    batch_num = int(math.ceil(cfg_dnn.test_sets.cur_frame_num / batch_size))

for batch_index in xrange(batch_num):  # loop over mini-batches
    start_index = batch_index * batch_size
    end_index = min((batch_index+1) * batch_size, cfg_dnn.test_sets.cur_frame_num)  # the residue may be smaller than a mini-batch
    output = extract_func(cfg_dnn.test_x.get_value()[start_index:end_index])
    if output_mat is None:
        output_mat = output
    else:
        output_mat = np.concatenate((output_mat, output)) # this is not efficient

log('> ... fitting a KNN cluster')
knn = KNN(n_neighbors=3)
knn.fit(output_mat)
log('> ... computing the graph of class')
results = knn.kneighbors_graph(output_mat)
print(results)
# results.toarray()
# print(results.toarray())
XTE, YTE = get_data(fullTestFile)
x_new_tr = sparse.lil_matrix(sparse.csr_matrix(XTR)[:,list(range(upcStart-1,nextStart-1))])
x_new_te = sparse.lil_matrix(sparse.csr_matrix(XTE)[:,list(range(upcStart-1,nextStart-1))])
x_new_stack_T = vstack([x_new_tr,x_new_te]).T ## see boundry elements- print(sparse.csr_matrix(x_new_stack_T)[0])
##
#x_new = sparse.lil_matrix(sparse.csr_matrix(XD)[:,list(range(47,115))])
#x_new_T = x_new.T ##
from sklearn.neighbors import NearestNeighbors
from sklearn.utils.graph_shortest_path import graph_shortest_path
import networkx as nx
import pickle
feat='upc'
k=3
nbrs = NearestNeighbors(n_neighbors=k+1,metric='cosine',algorithm='brute').fit(x_new_stack_T) # k=(n_neighbors-1) (first neighbour is 'v' itself)
#distances, indices = nbrs.kneighbors(x_new_T) # not directly needed, for now
knnmatrix = nbrs.kneighbors_graph(x_new_stack_T,mode='distance') # sparse matrix(68x68) with nearest KNeighbours for each of the 68 pt
knnmatrix.data[np.where(knnmatrix.data<0)]=0
sp = graph_shortest_path(knnmatrix,directed=False) # shortest-path-edge-weight from (v_i to v_j), (doc-https://github.com/scikit-learn/scikit-learn/blob/master/sklearn/utils/graph_shortest_path.pyx)
G = nx.Graph(knnmatrix)
spl = nx.shortest_path(G, weight='weight') # shortest-path dict-array from each v_i to v_j, do len(array) to find path-length
## spl = nx.shortest_path(G) # Without weight (just connections-1/0)
pickle.dump(knnmatrix,open('knn_'+feat+'_k_'+str(k)+'.pickle.dump','wb')) # used to smooth out features
pickle.dump(sp,open('sp_all_'+feat+'_k_'+str(k)+'.pickle.dump','wb'))
#np.savetxt('sp_all_'+feat+'_k_'+str(k)+'.np.save', sp)
pickle.dump(spl,open('spl_all_'+feat+'_k_'+str(k)+'.pickle.dump','wb'))
##
knnmatrix_all = pickle.load(open('knn_'+feat+'_k_'+str(k)+'.pickle.dump','rb'))
sp_all = pickle.load(open('sp_all_'+feat+'_k_'+str(k)+'.pickle.dump','rb'))
#sp_all = np.loadtxt('sp_all_'+feat+'_k_'+str(k)+'.np.save')
spl_all = pickle.load(open('spl_all_'+feat+'_k_'+str(k)+'.txt','rb'))
#
Example #55
0
def wishbone(data, s, k=15, l=15, num_graphs=1, num_waypoints=250, 
	verbose=True, metric='euclidean', voting_scheme='exponential', 
	branch=True, flock_waypoints=2, band_sample=False, partial_order=[],
	search_connected_components=True):

	if verbose:
		print('Building lNN graph...')
	

	# Construct nearest neighbors graph
	start = time.process_time()
	nbrs = NearestNeighbors(n_neighbors=l+1, metric=metric).fit(data)  
	lnn = nbrs.kneighbors_graph(data, mode='distance' ) 
	lnn = np.transpose(lnn)
	print('lNN computed in : %.2f seconds' % (time.process_time()-start))

	#set up return structure
	trajectory = []
	waypoints = []
	branches = []
	bas = []

	# generate klNN graphs and iteratively refine a trajectory in each
	for graph_iter in range(num_graphs):
		if k!=l:
			klnn = _spdists_klnn(lnn, k, verbose)
		else:
			klnn = lnn

		# Make the graph undirected
		klnn = _spdists_undirected(klnn)
		klnn.setdiag(0)
		klnn.eliminate_zeros()

		#run traj. landmarks
		traj, dist, iter_l, paths_l2l = _trajectory_landmarks( klnn, data, [s], num_waypoints, partial_order, verbose, metric, flock_waypoints, band_sample, branch)
		if branch:
			if verbose:
				print ('Determining branch point and branch associations...')
			RNK, bp, diffdists, Y = _splittobranches(traj, traj[0], data, iter_l, dist, paths_l2l)


		# calculate weighed trajectory
		W_full = _weighting_scheme(voting_scheme, dist)

		if branch:
			W = _muteCrossBranchVoting(W_full, RNK, RNK[s], iter_l, Y)
		else:
			W = W_full

		
		# save initial solution - start point's shortest path distances
		t = traj[0, :]
		t = [t, np.sum(np.multiply(traj, W), axis=0)]

		# iteratively realign trajectory (because landmarks moved)
		converged, user_break, realign_iter = False, False, 1
		if verbose:
			print('Running iterations...')

		while converged == False and user_break == False:
			realign_iter = realign_iter + 1
			print('Iteration: %d' % realign_iter)

			np.copyto(traj, dist)
			traj = _realign_trajectory(t, dist, iter_l, traj, 0, len(dist), realign_iter)

			if branch:
				RNK, bp, diffdists, Y = _splittobranches(traj, traj[0],data, iter_l, dist,paths_l2l)
				W = _muteCrossBranchVoting(W_full, RNK, RNK[s], iter_l,Y)
			# calculate weighed trajectory
			t.append(np.sum(np.multiply(traj, W), axis=0))
			
			#check for convergence
			fpoint_corr = stats.pearsonr(np.transpose(t[realign_iter]), np.transpose(t[realign_iter - 1]))[0]
			if verbose:
				print('Correlation with previous iteration:  %.4f' % fpoint_corr)
			converged = fpoint_corr > 0.9999
		
			if (realign_iter % 16) == 0:
				# break after too many alignments - something is wrong
				user_break = True
				print('\nWarning: Force exit after ' + str(realign_iter) + ' iterations')

		print(str(realign_iter-1) + ' realignment iterations')

		# save final trajectory for this graph			
		iter_traj = t[realign_iter][:]
		# Normalize the iter_trajectory
		iter_traj = (iter_traj - iter_traj.min()) / (iter_traj.max() - iter_traj.min())
		trajectory.append(iter_traj)
		waypoints.append(iter_l)
		
		if branch:
			# Recalculate branches post reassignments
			RNK, bp, diffdists, Y = _splittobranches(traj, traj[0], data, iter_l, dist,paths_l2l)
			branches.append(RNK)
			bas.append(Y)

		else:
			branches = trajectory # branch
	
		return dict(zip(['Trajectory', 'Waypoints', 'Branches', 'BAS'],
		  [trajectory[0], waypoints[0], branches[0], bas[0]]))
Example #56
0
def get_nearest_neighbor_graph(k, X):
    neigh = NearestNeighbors(n_neighbors=k)
    neigh.fit(X) 
    A = neigh.kneighbors_graph(X)
    distances, indices = neigh.kneighbors(X)
    return distances, indices, A
#! /usr/bin/python3
#For the simple task of finding the nearest neighbors between two sets of data, the unsupervised algorithms within sklearn.neighbors can be used:
from sklearn.neighbors import NearestNeighbors
import numpy as np
#                 0        1         2       3       4       5
X = np.array([[-1, -1], [-2,-1], [-3, -2], [1, 1], [2, 1], [3, 2]])
nbrs = NearestNeighbors(n_neighbors=2, algorithm='auto').fit(X)
distances, indices = nbrs.kneighbors(X)
print(indices)
print(distances)

nearestConnectionMatrix = nbrs.kneighbors_graph(X).toarray()
print(nearestConnectionMatrix)

# use KD-tree or Ball-tree
from sklearn.neighbors import KDTree
import numpy as np
kdt = KDTree(X, leaf_size=30, metric='euclidean')
result = kdt.query(X, k = 2, return_distance=False)
print(result)
Example #58
0
                         (((A[:, column_idx] +
                            B[:, column_idx]) ** 2).mean() <= tol ** 2))
        if not sign:
            return False
    return True

N = 10
X, color = datasets.samples_generator.make_s_curve(N, random_state=0)
n_components = 2
n_neighbors = 3

knn = NearestNeighbors(n_neighbors + 1).fit(X)

# Assign the geometry matrix to get the same answer since sklearn using k-neighbors instead of radius-neighbors
Geometry = geom.Geometry(X)
Geometry.assign_distance_matrix(knn.kneighbors_graph(X, mode = 'distance'))

from sklearn import manifold

# test LTSA with sklearn
sk_Y_ltsa = manifold.LocallyLinearEmbedding(n_neighbors, n_components, 
                                            method = 'ltsa', eigen_solver = 'arpack').fit_transform(X)
import Mmani.embedding.ltsa as ltsa
(mm_Y_ltsa, err) = ltsa.ltsa(Geometry, n_components, eigen_solver = 'arpack')
assert(_check_with_col_sign_flipping(sk_Y_ltsa, mm_Y_ltsa, 0.05))

# test LLE with sklearn
sk_Y_lle = manifold.LocallyLinearEmbedding(n_neighbors, n_components, method = 'standard').fit_transform(X)
import Mmani.embedding.locally_linear_ as lle
(mm_Y_lle, err) = lle.locally_linear_embedding(Geometry, n_components)
assert(_check_with_col_sign_flipping(sk_Y_ltsa, mm_Y_ltsa, 0.05))