Beispiel #1
0
    def test_affinity_with_precomputed_neighbors(self):
        nn = NearestNeighbors(n_neighbors=30)
        nn.fit(self.x)
        distances, neighbors = nn.kneighbors(n_neighbors=30)

        knn_index = nearest_neighbors.PrecomputedNeighbors(
            neighbors, distances)
        init = initialization.random(self.x, random_state=0)

        for aff in [
                affinity.PerplexityBasedNN(knn_index=knn_index, perplexity=30),
                affinity.Uniform(knn_index=knn_index, k_neighbors=30),
                affinity.FixedSigmaNN(knn_index=knn_index, sigma=1),
                affinity.Multiscale(knn_index=knn_index, perplexities=[10,
                                                                       20]),
                affinity.MultiscaleMixture(knn_index=knn_index,
                                           perplexities=[10, 20]),
        ]:
            # Without initilization
            embedding = TSNE().fit(affinities=aff)
            self.eval_embedding(embedding, self.y, aff.__class__.__name__)

            # With initilization
            embedding = TSNE().fit(affinities=aff, initialization=init)
            self.eval_embedding(embedding, self.y, aff.__class__.__name__)
Beispiel #2
0
    def test_affinity_with_queryable_knn_index(self):
        knn_index = nearest_neighbors.Sklearn(self.x, k=30)
        init = initialization.random(self.x, random_state=0)

        for aff in [
                affinity.PerplexityBasedNN(knn_index=knn_index, perplexity=30),
                affinity.Uniform(knn_index=knn_index, k_neighbors=30),
                affinity.FixedSigmaNN(knn_index=knn_index, sigma=1),
                affinity.Multiscale(knn_index=knn_index, perplexities=[10,
                                                                       20]),
                affinity.MultiscaleMixture(knn_index=knn_index,
                                           perplexities=[10, 20]),
        ]:
            # Without initilization
            embedding = TSNE().fit(affinities=aff)
            self.eval_embedding(embedding, self.y, aff.__class__.__name__)
            new_embedding = embedding.prepare_partial(self.x)
            new_embedding.optimize(50, learning_rate=1, inplace=True)
            self.eval_embedding(new_embedding, self.y,
                                f"transform::{aff.__class__.__name__}")

            # With initilization
            embedding = TSNE().fit(affinities=aff, initialization=init)
            self.eval_embedding(embedding, self.y, aff.__class__.__name__)
            new_embedding = embedding.prepare_partial(self.x)
            new_embedding.optimize(50, learning_rate=1, inplace=True)
            self.eval_embedding(new_embedding, self.y,
                                f"transform::{aff.__class__.__name__}")
Beispiel #3
0
 def test_pca_init_with_only_affinities_passed(self):
     aff = affinity.PerplexityBasedNN(self.x, 5, method="exact")
     desired_init = initialization.spectral(aff.P)
     embedding = TSNE(early_exaggeration_iter=0,
                      n_iter=0,
                      initialization="pca").fit(affinities=aff)
     np.testing.assert_array_equal(embedding, desired_init)
Beispiel #4
0
    def test_precomputed_dist_matrix_via_affinities_uses_spectral_init(self):
        x = np.random.normal(0, 1, (200, 5))
        d = squareform(pdist(x))

        aff = affinity.PerplexityBasedNN(d, metric="precomputed")
        desired_init = initialization.spectral(aff.P)
        embedding = TSNE(early_exaggeration_iter=0,
                         n_iter=0).fit(affinities=aff)
        np.testing.assert_array_equal(embedding, desired_init)
Beispiel #5
0
 def test_1(self):
     init = initialization.pca(self.x)
     aff = affinity.PerplexityBasedNN(self.x, perplexity=30)
     embedding = openTSNE.TSNEEmbedding(init, aff)
     embedding.optimize(25, exaggeration=12, momentum=0.5, inplace=True)
     embedding.optimize(50, exaggeration=1, momentum=0.8, inplace=True)
     self.eval_embedding(embedding, self.y)
     new_embedding = embedding.transform(self.x)
     self.eval_embedding(new_embedding, self.y, "transform")
    def test_affinity_with_precomputed_distances(self):
        d = squareform(pdist(self.x))
        knn_index = nearest_neighbors.PrecomputedDistanceMatrix(d, k=30)
        init = initialization.random(self.x, random_state=0)

        for aff in [
                affinity.PerplexityBasedNN(knn_index=knn_index, perplexity=30),
                affinity.Uniform(knn_index=knn_index, k_neighbors=30),
                affinity.FixedSigmaNN(knn_index=knn_index, sigma=1),
                affinity.Multiscale(knn_index=knn_index, perplexities=[10,
                                                                       20]),
                affinity.MultiscaleMixture(knn_index=knn_index,
                                           perplexities=[10, 20]),
        ]:
            # Without initilization
            embedding = TSNE().fit(affinities=aff)
            self.eval_embedding(embedding, aff.__class__.__name__)

            # With initilization
            embedding = TSNE().fit(affinities=aff, initialization=init)
            self.eval_embedding(embedding, aff.__class__.__name__)
Beispiel #7
0
 def test_precomputed_affinity_is_passed_to_embedding_object(self):
     aff = affinity.PerplexityBasedNN(self.x, 5, method="exact")
     embedding = TSNE(early_exaggeration_iter=0,
                      n_iter=0,
                      initialization=self.init).fit(affinities=aff)
     self.assertIs(embedding.affinities, aff)
Beispiel #8
0
def art_of_tsne(X: np.ndarray,
                metric: Union[str, Callable] = "euclidean",
                exaggeration: float = -1,
                perplexity: int = 30,
                n_jobs: int = -1) -> TSNEEmbedding:
    """
    Implementation of Dmitry Kobak and Philipp Berens
    "The art of using t-SNE for single-cell transcriptomics" based on openTSNE.
    See https://doi.org/10.1038/s41467-019-13056-x | www.nature.com/naturecommunications
    Args:
        X				The data matrix of shape (n_cells, n_genes) i.e. (n_samples, n_features)
        metric			Any metric allowed by PyNNDescent (default: 'euclidean')
        exaggeration	The exaggeration to use for the embedding
        perplexity		The perplexity to use for the embedding

    Returns:
        The embedding as an opentsne.TSNEEmbedding object (which can be cast to an np.ndarray)
    """
    n = X.shape[0]
    if n > 100_000:
        if exaggeration == -1:
            exaggeration = 1 + n / 333_333
        # Subsample, optimize, then add the remaining cells and optimize again
        # Also, use exaggeration == 4
        logging.info(f"Creating subset of {n // 40} elements")
        # Subsample and run a regular art_of_tsne on the subset
        indices = np.random.permutation(n)
        reverse = np.argsort(indices)
        X_sample, X_rest = X[indices[:n // 40]], X[indices[n // 40:]]
        logging.info(f"Embedding subset")
        Z_sample = art_of_tsne(X_sample)

        logging.info(
            f"Preparing partial initial embedding of the {n - n // 40} remaining elements"
        )
        if isinstance(Z_sample.affinities, affinity.Multiscale):
            rest_init = Z_sample.prepare_partial(X_rest,
                                                 k=1,
                                                 perplexities=[1 / 3, 1 / 3])
        else:
            rest_init = Z_sample.prepare_partial(X_rest, k=1, perplexity=1 / 3)
        logging.info(f"Combining the initial embeddings, and standardizing")
        init_full = np.vstack((Z_sample, rest_init))[reverse]
        init_full = init_full / (np.std(init_full[:, 0]) * 10000)

        logging.info(f"Creating multiscale affinities")
        affinities = affinity.PerplexityBasedNN(X,
                                                perplexity=perplexity,
                                                metric=metric,
                                                method="approx",
                                                n_jobs=n_jobs)
        logging.info(f"Creating TSNE embedding")
        Z = TSNEEmbedding(init_full,
                          affinities,
                          negative_gradient_method="fft",
                          n_jobs=n_jobs)
        logging.info(f"Optimizing, stage 1")
        Z.optimize(n_iter=250,
                   inplace=True,
                   exaggeration=12,
                   momentum=0.5,
                   learning_rate=n / 12,
                   n_jobs=n_jobs)
        logging.info(f"Optimizing, stage 2")
        Z.optimize(n_iter=750,
                   inplace=True,
                   exaggeration=exaggeration,
                   momentum=0.8,
                   learning_rate=n / 12,
                   n_jobs=n_jobs)
    elif n > 3_000:
        if exaggeration == -1:
            exaggeration = 1
        # Use multiscale perplexity
        affinities_multiscale_mixture = affinity.Multiscale(
            X,
            perplexities=[perplexity, n / 100],
            metric=metric,
            method="approx",
            n_jobs=n_jobs)
        init = initialization.pca(X)
        Z = TSNEEmbedding(init,
                          affinities_multiscale_mixture,
                          negative_gradient_method="fft",
                          n_jobs=n_jobs)
        Z.optimize(n_iter=250,
                   inplace=True,
                   exaggeration=12,
                   momentum=0.5,
                   learning_rate=n / 12,
                   n_jobs=n_jobs)
        Z.optimize(n_iter=750,
                   inplace=True,
                   exaggeration=exaggeration,
                   momentum=0.8,
                   learning_rate=n / 12,
                   n_jobs=n_jobs)
    else:
        if exaggeration == -1:
            exaggeration = 1
        # Just a plain TSNE with high learning rate
        lr = max(200, n / 12)
        aff = affinity.PerplexityBasedNN(X,
                                         perplexity=perplexity,
                                         metric=metric,
                                         method="approx",
                                         n_jobs=n_jobs)
        init = initialization.pca(X)
        Z = TSNEEmbedding(init,
                          aff,
                          learning_rate=lr,
                          n_jobs=n_jobs,
                          negative_gradient_method="fft")
        Z.optimize(250,
                   exaggeration=12,
                   momentum=0.5,
                   inplace=True,
                   n_jobs=n_jobs)
        Z.optimize(750,
                   exaggeration=exaggeration,
                   momentum=0.8,
                   inplace=True,
                   n_jobs=n_jobs)
    return Z
Beispiel #9
0
 def test_optimize_runs_properly(self):
     x = np.random.normal(100, 50, (25, 4))
     aff = affinity.PerplexityBasedNN(x, 5, method="exact")
     tsne = TSNE(affinities=aff)
     embedding = tsne.fit(x)
     self.assertIs(embedding.affinities, aff)
Beispiel #10
0
 def test_affinities_passed_to_embedding(self):
     x = np.random.normal(100, 50, (25, 4))
     aff = affinity.PerplexityBasedNN(x, 5, method="exact")
     tsne = TSNE(affinities=aff)
     embedding = tsne.prepare_initial(x)
     self.assertIs(embedding.affinities, aff)