Esempio n. 1
0
def test_pipeline_spectral_clustering(seed=36):
    # Test using pipeline to do spectral clustering
    random_state = np.random.RandomState(seed)
    se_rbf = SpectralEmbedding(n_components=n_clusters,
                               affinity="rbf",
                               random_state=random_state)
    se_knn = SpectralEmbedding(n_components=n_clusters,
                               affinity="nearest_neighbors",
                               n_neighbors=5,
                               random_state=random_state)
    for se in [se_rbf, se_knn]:
        km = KMeans(n_clusters=n_clusters, random_state=random_state)
        km.fit(se.fit_transform(S))
        assert_array_almost_equal(
            normalized_mutual_info_score(km.labels_, true_labels), 1.0, 2)
def test_spectral_embedding_precomputed_affinity(X, seed=36):
    # Test spectral embedding with precomputed kernel
    gamma = 1.0
    se_precomp = SpectralEmbedding(n_components=2,
                                   affinity="precomputed",
                                   random_state=np.random.RandomState(seed))
    se_rbf = SpectralEmbedding(n_components=2,
                               affinity="rbf",
                               gamma=gamma,
                               random_state=np.random.RandomState(seed))
    embed_precomp = se_precomp.fit_transform(rbf_kernel(X, gamma=gamma))
    embed_rbf = se_rbf.fit_transform(X)
    assert_array_almost_equal(se_precomp.affinity_matrix_,
                              se_rbf.affinity_matrix_)
    _assert_equal_with_sign_flipping(embed_precomp, embed_rbf, 0.05)
Esempio n. 3
0
def se(X_train_scaled, X_test_scaled, num_components):
    # Locally Linear Embedding
    embedding = SpectralEmbedding(n_components=num_components)
    X_train_se = embedding.fit_transform(X_train_scaled)
    X_test_se = embedding.fit_transform(X_test_scaled)

    return X_train_se, X_test_se
Esempio n. 4
0
    def fit(self, X, y=None):
        """Fit the model from data in X.

        Parameters
        ----------
        X : array-like, shape (n_samples, n_features)
            Training vector, where n_samples is the number of samples
            and n_features is the number of features.

        Y: Ignored.

        Returns
        -------
        self : object
            Returns the instance itself.
        """
        logging.debug('Computing locally adjusted affinities.')
        d = dist.squareform(dist.pdist(X, metric=self.distance))

        if 0 <= self.n_components <= 1:
            n_components = max(int(self.n_components * X.shape[1]), 1)
        else:
            n_components = self.n_components

        logging.debug('Computing embedding of affinities.')
        embedder = SpectralEmbedding(n_components=n_components,
                                     affinity='precomputed_nearest_neighbors',
                                     gamma=None,
                                     random_state=self.random_state,
                                     eigen_solver=self.eigen_solver,
                                     n_neighbors=self.n_neighbors,
                                     n_jobs=self.n_jobs)
        self.embedding_ = embedder.fit_transform(d)
        return self
Esempio n. 5
0
def perform_spectral_embedding(superreads, min_cv_start, max_cv_end):
    X = get_score_matrix(superreads, min_cv_start, max_cv_end)
    return SpectralEmbedding(
        n_components=2,
        random_state=0,
        affinity='precomputed'
    ).fit_transform(X.todense())
Esempio n. 6
0
    def _find_add_idx_cluster(self, gram_matrix):
        ''' find representative samp-les from a pool using clustering method
        :gram_matrix: gram matrix of the pool samples
        :return: list of idx
        '''
        embedding = SpectralEmbedding(
            n_components=self.add_size,
            affinity='precomputed').fit_transform(gram_matrix)

        cluster_result = KMeans(n_clusters=self.add_size,
                                random_state=0).fit_predict(embedding)
        # find all center of clustering
        center = np.array([
            embedding[cluster_result == i].mean(axis=0)
            for i in range(self.add_size)
        ])
        total_distance = defaultdict(
            dict
        )  # (key: cluster_idx, val: dict of (key:sum of distance, val:idx))
        for i in range(len(cluster_result)):
            cluster_class = cluster_result[i]
            total_distance[cluster_class][((
                np.square(embedding[i] -
                          np.delete(center, cluster_class, axis=0))).sum(
                              axis=1)**-0.5).sum()] = i
        add_idx = [
            total_distance[i][min(total_distance[i].keys())]
            for i in range(self.add_size)
        ]  # find min-in-cluster-distance associated idx
        return add_idx
Esempio n. 7
0
    def classifiers(self):
        graph_builder = LabelCooccurrenceGraphBuilder(weighted=True,
                                                      include_self_edges=False)

        param_dicts = {
            'GraphFactorization': dict(epoch=1),
            'GraRep': dict(Kstep=2),
            'HOPE': dict(),
            'LaplacianEigenmaps': dict(),
            'LINE': dict(epoch=1, order=1),
            'LLE': dict(),
        }

        if not (sys.version_info[0] == 2
                or platform.architecture()[0] == '32bit'):
            for embedding in OpenNetworkEmbedder._EMBEDDINGS:
                if embedding == 'LLE':
                    dimension = 3
                else:
                    dimension = 4

                yield EmbeddingClassifier(
                    OpenNetworkEmbedder(copy(graph_builder), embedding,
                                        dimension, 'add', True,
                                        param_dicts[embedding]),
                    LinearRegression(), MLkNN(k=2))

        yield EmbeddingClassifier(
            SKLearnEmbedder(SpectralEmbedding(n_components=2)),
            LinearRegression(), MLkNN(k=2))

        EmbeddingClassifier(CLEMS(metrics.accuracy_score, True),
                            LinearRegression(), MLkNN(k=2), True)
Esempio n. 8
0
    def __find_distant_samples(self, gram_matrix: List[List[float]]) -> List[int]:
        """Find distant samples from a pool using clustering method.

        Parameters
        ----------
        gram_matrix: gram matrix of the samples.

        Returns
        -------
        List of idx
        """
        embedding = SpectralEmbedding(
            n_components=self.args.add_size,
            affinity='precomputed'
        ).fit_transform(gram_matrix)

        cluster_result = KMeans(
            n_clusters=self.args.add_size,
            # random_state=self.args.seed
        ).fit_predict(embedding)
        # find all center of clustering
        center = np.array([embedding[cluster_result == i].mean(axis=0)
                           for i in range(self.args.add_size)])
        total_distance = defaultdict(
            dict)  # (key: cluster_idx, val: dict of (key:sum of distance, val:idx))
        for i in range(len(cluster_result)):
            cluster_class = cluster_result[i]
            total_distance[cluster_class][((np.square(
                embedding[i] - np.delete(center, cluster_class, axis=0))).sum(
                axis=1) ** -0.5).sum()] = i
        add_idx = [total_distance[i][min(total_distance[i].keys())] for i in
                   range(
                       self.args.add_size)]  # find min-in-cluster-distance associated idx
        return add_idx
Esempio n. 9
0
 def sub_window_creation(self, images, kernels):
     gb_all_sw = []
     label = []
     for i in range(0, 100, 11):
         for j in range(0, 50, 11):
             for k in range(len(images)):
                 image = images[k]
                 sw_image = image[i:i+50, j:j+50]
                 sw_image = cv2.resize(sw_image, dsize=(12, 12), interpolation=cv2.INTER_NEAREST)
                 # print('sw size', sw_image.shape)
                 gabored_image = Preprocessing.process(self, sw_image, kernels)
                 # print('gab size', gabored_image.shape)
                 # model = SpectralEmbedding(n_components=100, n_neighbors=10)
                 # reduced_sw = model.fit_transform(gabored_image.reshape(-1, 1))
                 # print('gab size', gabored_image.reshape(1, -1).shape)
                 # gb_all_sw.append(gabored_image)
                 gb_all_sw.append(gabored_image)
                 label.append(int(k/4))
                 # print('red size', reduced_sw.reshape(-1, 1).shape)
                 # plt.imshow(image[i:i+50, j:j+50], cmap='gray')
                 # plt.show()
                 # plt.imshow(gabored_image, cmap='gray')
                 # plt.show()
     print(len(gb_all_sw))
     print(len(gb_all_sw[0]))
     # LEM demension reduction
     model = SpectralEmbedding(n_components=100, n_neighbors=10)
     # reduced_sw = model.fit_transform(gb_all_sw)
     reduced_sw = model.fit_transform(gb_all_sw)
     print('final', len(reduced_sw))
     print('final', reduced_sw[0].shape)
     print(label)
Esempio n. 10
0
def test_spectral_embedding_unknown_eigensolver(seed=36):
    # Test that SpectralClustering fails with an unknown eigensolver
    se = SpectralEmbedding(n_components=1, affinity="precomputed",
                           random_state=np.random.RandomState(seed),
                           eigen_solver="<unknown>")
    with pytest.raises(ValueError):
        se.fit(S)
Esempio n. 11
0
 def initial_embed(self, reduce, d):
     reduce = reduce.lower()
     assert reduce in ['isomap', 'ltsa', 'mds', 'lle', 'se', 'pca', 'none']
     if reduce == 'isomap':
         from sklearn.manifold import Isomap
         embed = Isomap(n_components=d)
     elif reduce == 'ltsa':
         from sklearn.manifold import LocallyLinearEmbedding
         embed = LocallyLinearEmbedding(n_components=d,
                                        n_neighbors=5,
                                        method='ltsa')
     elif reduce == 'mds':
         from sklearn.manifold import MDS
         embed = MDS(n_components=d, metric=False)
     elif reduce == 'lle':
         from sklearn.manifold import LocallyLinearEmbedding
         embed = LocallyLinearEmbedding(n_components=d,
                                        n_neighbors=5,
                                        eigen_solver='dense')
     elif reduce == 'se':
         from sklearn.manifold import SpectralEmbedding
         embed = SpectralEmbedding(n_components=d)
     elif reduce == 'pca':
         from sklearn.decomposition import PCA
         embed = PCA(n_components=d)
     if reduce == 'none':
         self.embed = lambda x: x
     else:
         self.embed = lambda x: embed.fit_transform(x)
Esempio n. 12
0
def get_dim_reds_scikit(pct_features):
    n_components = max(int(pct_features * num_features), 1)
    return [
        LinearDiscriminantAnalysis(n_components=n_components),
        TruncatedSVD(n_components=n_components),
        #SparseCoder(n_components=n_components),
        DictionaryLearning(n_components=n_components),
        FactorAnalysis(n_components=n_components),
        SparsePCA(n_components=n_components),
        NMF(n_components=n_components),
        PCA(n_components=n_components),
        RandomizedPCA(n_components=n_components),
        KernelPCA(kernel="linear", n_components=n_components),
        KernelPCA(kernel="poly", n_components=n_components),
        KernelPCA(kernel="rbf", n_components=n_components),
        KernelPCA(kernel="sigmoid", n_components=n_components),
        KernelPCA(kernel="cosine", n_components=n_components),
        Isomap(n_components=n_components),
        LocallyLinearEmbedding(n_components=n_components,
                               eigen_solver='auto',
                               method='standard'),
        LocallyLinearEmbedding(n_neighbors=n_components,
                               n_components=n_components,
                               eigen_solver='auto',
                               method='modified'),
        LocallyLinearEmbedding(n_neighbors=n_components,
                               n_components=n_components,
                               eigen_solver='auto',
                               method='ltsa'),
        SpectralEmbedding(n_components=n_components)
    ]
Esempio n. 13
0
def plot_md_scaling(mdist, nd=3):
    """
    Plots the dimensionality rescaling of the distance matrix

    :param mdist:
    :param nd:
    :return:
    """
    #mds = MDS(n_components=3, dissimilarity='precomputed')

    mds = SpectralEmbedding(n_components=nd,
                            affinity='precomputed',
                            n_neighbors=3)
    pdata = mds.fit_transform(mdist)

    fig = plt.figure(figsize=(10, 10))
    if nd == 2:
        ax = fig.add_subplot(111)
        ax.scatter(pdata[:, 0], pdata[:, 1], cmap=plt.get_cmap("Blues"))
    else:
        ax = fig.add_subplot(111, projection='3d')
        ax.scatter(pdata[:, 0],
                   pdata[:, 1],
                   pdata[:, 2],
                   depthshade=False,
                   cmap=plt.get_cmap("Blues"))

    plt.show()
Esempio n. 14
0
    def test_spectral_embedding_transform(self):
        """
        Test the unsupervised transformation of molecules in
        MoleculSet using Isomap.

        """
        n_features = 20
        features = np.random.normal(size=(len(self.test_smiles), n_features))
        csv_fpath = self.smiles_seq_to_xl_or_csv(
            ftype="csv", feature_arr=features)
        molecule_set = MoleculeSet(
            molecule_database_src=csv_fpath,
            molecule_database_src_type="csv",
            similarity_measure="l0_similarity",
            is_verbose=True,

        )
        features = StandardScaler().fit_transform(features)
        features = SpectralEmbedding().fit_transform(features)
        error_matrix = features - \
            molecule_set.get_transformed_descriptors(method_="spectral_embedding")
        error_threshold = 1e-6
        self.assertLessEqual(
            error_matrix.min(),
            error_threshold,
            "Expected transformed molecular descriptors to be "
            "equal to SpectralEmbedding decomposed features",
        )
        remove(csv_fpath)
def plotSpectralEmbedding(X, y, filenames=None, dim=3, num=-1, savefile='se.npy', logdir='plots'):
    """ 绘制embedding, tensorboard
    
    Params:
        X: {ndarray(n_samples, n_features)}
        y: {ndarray(n_samples)}
        logdir: {str}
    """
    print("dir: ", logdir)

    if num != -1:
        print("Randomly choose...")
        index = np.random.choice(list(range(X.shape[0])), num, replace=False)
        filenames = np.array(filenames)[index] if filenames is not None else None
        X = X[index]; y = y[index]

    print("SE...")
    X = SpectralEmbedding(n_components=dim).fit_transform(X)

    if filenames is None:
        images = None
    else:
        filenames = list(map(lambda x: x if os.path.isfile(x) else '{}/{}'.format(x, '1.jpg'), filenames))
        images = list(map(lambda x: cv2.imread(x, cv2.IMREAD_COLOR), filenames))
        images = torch.ByteTensor(np.array(list(map(lambda x: np.transpose(x, axes=[2, 0, 1]), images))))

    print("Ploting...")
    with SummaryWriter(logdir) as writer:
        writer.add_embedding(mat=X, metadata=y, label_img=images)
    print("------ Done ------")

    return X, y
def cluster_responses(response_mat, n_clusters, corr_cut=0.6):
    """
    Clusters the neuron responses using spectral clustering
    :param response_mat: The response matrix with all neuron responses
    :param n_clusters: The desired number of clusters
    :param corr_cut: The correlation cutoff to consider a given neuron to be part of a cluster
    :return:
        [0]: The cluster ids
        [1]: 3D embedding coordinates for plotting
    """
    # create trial average
    response_mat = trial_average(response_mat, 3)
    # compute pairwise correlations
    pw_corrs = np.corrcoef(response_mat.T)
    pw_corrs[np.isnan(pw_corrs)] = 0
    pw_corrs[pw_corrs < 0.2] = 0
    # perform spectral clustering
    spec_clust = SpectralClustering(n_clusters, affinity="precomputed")
    clust_ids = spec_clust.fit_predict(pw_corrs)
    spec_emb = SpectralEmbedding(3, affinity="precomputed")
    coords = spec_emb.fit_transform(pw_corrs)
    # use correlation to cluster centroids to determine final cluster membership
    regressors = np.zeros((response_mat.shape[0], n_clusters))
    for i in range(n_clusters):
        regressors[:, i] = np.mean(response_mat[:, clust_ids == i], 1)
    for i in range(response_mat.shape[1]):
        max_ix = -1
        max_corr = 0
        for j in range(n_clusters):
            c = np.corrcoef(response_mat[:, i], regressors[:, j])[0, 1]
            if c >= corr_cut and c > max_corr:
                max_ix = j
                max_corr = c
            clust_ids[i] = max_ix
    return clust_ids, coords
Esempio n. 17
0
def test_spectral_embedding_amg_solver_failure(seed=36):
    # Test spectral embedding with amg solver failure, see issue #13393
    pytest.importorskip('pyamg')

    # The generated graph below is NOT fully connected if n_neighbors=3
    n_samples = 200
    n_clusters = 3
    n_features = 3
    centers = np.eye(n_clusters, n_features)
    S, true_labels = make_blobs(n_samples=n_samples,
                                centers=centers,
                                cluster_std=1.,
                                random_state=42)

    se_amg0 = SpectralEmbedding(n_components=3,
                                affinity="nearest_neighbors",
                                eigen_solver="amg",
                                n_neighbors=3,
                                random_state=np.random.RandomState(seed))
    embed_amg0 = se_amg0.fit_transform(S)

    for i in range(10):
        se_amg0.set_params(random_state=np.random.RandomState(seed + 1))
        embed_amg1 = se_amg0.fit_transform(S)

        assert _check_with_col_sign_flipping(embed_amg0, embed_amg1, 0.05)
def main():
    dataset = datasets.load_digits()

    X = dataset.data
    y = dataset.target

    plt.figure(figsize=(12, 8))

    # 主な多様体学習アルゴリズム (と主成分分析)
    manifolders = {
        'PCA': PCA(),
        'MDS': MDS(),
        'Isomap': Isomap(),
        'LLE': LocallyLinearEmbedding(),
        'Laplacian Eigenmaps': SpectralEmbedding(),
        't-SNE': TSNE(),
    }
    for i, (name, manifolder) in enumerate(manifolders.items()):
        plt.subplot(2, 3, i + 1)

        # 多様体学習アルゴリズムを使って教師データを 2 次元に縮約する
        X_transformed = manifolder.fit_transform(X)

        # 縮約した結果を二次元の散布図にプロットする
        for label in np.unique(y):
            plt.title(name)
            plt.scatter(X_transformed[y == label, 0], X_transformed[y == label, 1])

    plt.show()
Esempio n. 19
0
def test_spectral_embedding_unknown_affinity(seed=36):
    # Test that SpectralClustering fails with an unknown affinity type
    se = SpectralEmbedding(n_components=1,
                           affinity="<unknown>",
                           random_state=np.random.RandomState(seed))
    with pytest.raises(ValueError):
        se.fit(S)
    def runSpectralEmbedding(self, X, n_components=2, n_clusters=2,
                             k_means_=False):

        # Create distance matrix
        self.create_distance_matrix(X)

        # Run spectral embedding for n_components
        embedding = SpectralEmbedding(n_components=n_components,
                                      affinity='precomputed',
                                      random_state=42,
                                      n_jobs=-1).fit(self.adjacencyMatrix)
        # Alternative way
        # embedding_otherapp = spectral_embedding(self.adjacencyMatrix,
        # n_components=n_components, norm_laplacian=True, random_state=42,
        # drop_first=True)

        # Run k means if set to True
        if k_means_:
            _, kmeans_labels, _ = k_means(X=embedding.embedding_,
                                          n_clusters=n_clusters,
                                          random_state=42, n_init=10)

            # Alternative embedding - More freedom, but slower
            # _, kmeans_labels2, _ = k_means(X=embedding_otherapp, n_clusters=
            # n_clusters, random_state=42, n_init=10)

            return kmeans_labels, embedding.embedding_
        else:
            return embedding.embedding_
Esempio n. 21
0
def draw_feature_vecs(X, model, n_samples):
    # create data from same and different classes
    c, data1 = create_1_data(n_samples, X)
    _, data0 = create_0_data(n_samples, X, category=c)

    # isolate last layer of model before dense layer
    layer_output = model.layers[-2].output
    activation_model = keras.models.Model(inputs=model.input,
                                          outputs=layer_output)

    features1 = activation_model.predict(data1)
    features0 = activation_model.predict(data0)
    features = np.concatenate((features1, features0), axis=0)

    # create diffusion map
    embedding = SpectralEmbedding(n_components=2)
    features_transformed = embedding.fit_transform(features)

    # plot classes
    fig = plt.figure(figsize=(8, 6))
    ax = fig.add_subplot(111)
    ax.scatter(features_transformed[:n_samples, 0],
               features_transformed[:n_samples, 1],
               c='r',
               s=10,
               label='same class (1)')
    ax.scatter(features_transformed[n_samples:, 0],
               features_transformed[n_samples:, 1],
               c='b',
               s=10,
               label='diff class (0)')
    plt.title('Feature vectors for inputs from same/different classes')
    ax.legend()
    plt.show()
def pltPer(X, y, W):
    f = plt.figure()
    if X.shape[1] > 3:
        # reduce dimensions for display purposes
        Xw = np.append(X, np.reshape(W, (1, len(W))),
                       0)  # add a column of ones
        #Xs = TSNE(n_components=2, random_state=0, verbose=1).fit_transform(Xw)
        #Xs = Isomap(n_components=2).fit_transform(Xw)
        Xs = SpectralEmbedding(n_components=2).fit_transform(Xw)
        #Xs = PCA(n_components=2, random_state=0).fit_transform(Xw)

        Xs = np.append(np.ones((Xs.shape[0], 1)), Xs,
                       1)  # add a column of ones
        X = Xs[:-1, :]
        W = Xs[-1, :]
        #print(W.shape)

    for n in range(len(y)):
        if y[n] == -1:
            plt.plot(X[n, 1], X[n, 2], 'r*')
        else:
            plt.plot(X[n, 1], X[n, 2], 'b.')
    m, b = -W[1] / W[2], -W[0] / W[2]
    l = np.linspace(min(X[:, 1]), max(X[:, 1]))
    plt.plot(l, m * l + b, 'k-')
    plt.xlabel("$x_1$")
    plt.ylabel("$x_2$")
    plt.title("Linear Regression")
Esempio n. 23
0
 def embed_spectral(train, test):
     traintest = np.concatenate((train, test))
     from sklearn.manifold import SpectralEmbedding
     se = SpectralEmbedding(n_components=2, eigen_solver="arpack")
     X2d = se.fit_transform(traintest)
     X2d = MinMaxScaler().fit_transform(X2d)
     return X2d[:train.shape[0]], X2d[train.shape[0]:]
Esempio n. 24
0
 def __init__(self, n_nodes=None, 
              npcs=0.8, 
              embedding_dims=2,
              cov_estimator='corpcor', 
              cov_reg=None, 
              cov_indices=None,
              max_iter=10,
              sigma=0.01, 
              lam=1., 
              gamma=1.,
              n_neighbors=30,
              just_tree=False):
     self.n_nodes = n_nodes
     self.cov_reg = cov_reg
     self.cov_estimator = cov_estimator
     self.cov_indices = cov_indices
     self.max_iter = max_iter
     self.sigma = sigma
     self.lam = lam
     self.gamma = gamma
     self.npcs = npcs
     self.n_neighbors = n_neighbors
     self.embedding = SpectralEmbedding(n_components=embedding_dims, 
                                        affinity='precomputed')
     self.pca = PCA(n_components=self.npcs)
     self.just_tree = just_tree
Esempio n. 25
0
def mcfs(X, n_selected_features, i, n_emb, n_neighbors):
    """
    This function implements unsupervised feature selection for multi-cluster data.

    Input
    -----
    X: {numpy array}, shape (n_samples, n_features)
        input data
    n_selected_features: {int}
        number of features to select
    i: {int: 0, 1}
        0: use MCFS
        1: use MCFS-I
    n_emb: {int}
        The dimension of the projected subspace.
    n_neighbors : int,
        0: default setting

    Output
    ------
    W: {numpy array}, shape(n_features, n_clusters)
        feature weight matrix
    """

    n_sample, n_feature = X.shape

    if i == 0:
        if n_neighbors == 0:
            spe = SpectralEmbedding(n_components=n_emb)
        else:
            spe = SpectralEmbedding(n_components=n_emb,
                                    n_neighbors=n_neighbors)
        Y = spe.fit_transform(X)
    elif i == 1:
        if n_neighbors == 0:
            iso = Isomap(n_components=n_emb)
        else:
            iso = Isomap(n_components=n_emb, n_neighbors=n_neighbors)
        Y = iso.fit_transform(X)

    # solve K L1-regularized regression problem using LARs algorithm with cardinality constraint being d
    W = np.zeros((n_feature, n_emb))
    for i in range(n_emb):
        clf = linear_model.Lars(n_nonzero_coefs=n_selected_features)
        clf.fit(X, Y[:, i])
        W[:, i] = clf.coef_
    return W
Esempio n. 26
0
    def Spectral_Embed(self, components):
        se_data = SpectralEmbedding(n_components=components).fit_transform(
            self.scaled)

        plt.scatter(se_data[:, 0], se_data[:, 1], s=0.05)
        plt.title("Phi-Psi Spectral Embedding")
        plt.savefig(self.sdir + "Spectral Embedding - " + self.fname + ".png")
        return se_data
Esempio n. 27
0
def spectralembed(input, finaldim):
    from sklearn.manifold import SpectralEmbedding
    # - 'nearest_neighbors' : construct affinity matrix by knn graph
    # - 'rbf' : construct affinity matrix by rbf kernel
    # - 'precomputed' : interpret X as precomputed affinity matrix
    embedding = SpectralEmbedding(n_components=finaldim, affinity='rbf')
    X_transformed = embedding.fit_transform(input.todense().transpose())
    return X_transformed, input.todense() * X_transformed
Esempio n. 28
0
 def _spectral_dbscan(fcd, n_dim=2, eps=0.3, min_samples=50):
     fcd = fcd - fcd.min()
     se = SpectralEmbedding(n_dim, affinity="precomputed")
     xi = se.fit_transform(fcd)
     pd = pdist(xi)
     eps = np.percentile(pd, int(100 * eps))
     db = DBSCAN(eps=eps, min_samples=min_samples).fit(xi)
     return xi.T, db.labels_
Esempio n. 29
0
def LapEigenmap(affinity_matrix, dim, random_state):
    if random_state is None:
        random_state = np.random.RandomState()
    component_embedding = SpectralEmbedding(
        n_components=dim, affinity="precomputed",
        random_state=random_state).fit_transform(affinity_matrix)
    component_embedding /= component_embedding.max()
    return component_embedding
Esempio n. 30
0
def component_layout(data,
                     n_components,
                     component_labels,
                     dim,
                     metric="euclidean",
                     metric_kwds={}):
    """Provide a layout relating the separate connected components. This is done
    by taking the centroid of each component and then performing a spectral embedding
    of the centroids.

    Parameters
    ----------
    data: array of shape (n_samples, n_features)
        The source data -- required so we can generate centroids for each
        connected component of the graph.

    n_components: int
        The number of distinct components to be layed out.

    component_labels: array of shape (n_samples)
        For each vertex in the graph the label of the component to
        which the vertex belongs.

    dim: int
        The chosen embedding dimension.

    metric: string or callable (optional, default 'euclidean')
        The metric used to measure distances among the source data points.

    metric_kwds: dict (optional, default {})
        Keyword arguments to be passed to the metric function.

    Returns
    -------
    component_embedding: array of shape (n_components, dim)
        The ``dim``-dimensional embedding of the ``n_components``-many
        connected components.
    """

    component_centroids = np.empty((n_components, data.shape[1]),
                                   dtype=np.float64)

    for label in range(n_components):
        component_centroids[label] = data[component_labels == label].mean(
            axis=0)

    distance_matrix = pairwise_distances(component_centroids,
                                         metric=metric,
                                         **metric_kwds)
    affinity_matrix = np.exp(-distance_matrix**2)

    component_embedding = SpectralEmbedding(
        n_components=dim,
        affinity="precomputed").fit_transform(affinity_matrix)
    component_embedding /= component_embedding.max()

    return component_embedding