def test_affinity_with_precomputed_neighbors(self): nn = NearestNeighbors(n_neighbors=30) nn.fit(self.x) distances, neighbors = nn.kneighbors(n_neighbors=30) knn_index = nearest_neighbors.PrecomputedNeighbors( neighbors, distances) init = initialization.random(self.x, random_state=0) for aff in [ affinity.PerplexityBasedNN(knn_index=knn_index, perplexity=30), affinity.Uniform(knn_index=knn_index, k_neighbors=30), affinity.FixedSigmaNN(knn_index=knn_index, sigma=1), affinity.Multiscale(knn_index=knn_index, perplexities=[10, 20]), affinity.MultiscaleMixture(knn_index=knn_index, perplexities=[10, 20]), ]: # Without initilization embedding = TSNE().fit(affinities=aff) self.eval_embedding(embedding, self.y, aff.__class__.__name__) # With initilization embedding = TSNE().fit(affinities=aff, initialization=init) self.eval_embedding(embedding, self.y, aff.__class__.__name__)
def test_affinity_with_queryable_knn_index(self): knn_index = nearest_neighbors.Sklearn(self.x, k=30) init = initialization.random(self.x, random_state=0) for aff in [ affinity.PerplexityBasedNN(knn_index=knn_index, perplexity=30), affinity.Uniform(knn_index=knn_index, k_neighbors=30), affinity.FixedSigmaNN(knn_index=knn_index, sigma=1), affinity.Multiscale(knn_index=knn_index, perplexities=[10, 20]), affinity.MultiscaleMixture(knn_index=knn_index, perplexities=[10, 20]), ]: # Without initilization embedding = TSNE().fit(affinities=aff) self.eval_embedding(embedding, self.y, aff.__class__.__name__) new_embedding = embedding.prepare_partial(self.x) new_embedding.optimize(50, learning_rate=1, inplace=True) self.eval_embedding(new_embedding, self.y, f"transform::{aff.__class__.__name__}") # With initilization embedding = TSNE().fit(affinities=aff, initialization=init) self.eval_embedding(embedding, self.y, aff.__class__.__name__) new_embedding = embedding.prepare_partial(self.x) new_embedding.optimize(50, learning_rate=1, inplace=True) self.eval_embedding(new_embedding, self.y, f"transform::{aff.__class__.__name__}")
def test_pca_init_with_only_affinities_passed(self): aff = affinity.PerplexityBasedNN(self.x, 5, method="exact") desired_init = initialization.spectral(aff.P) embedding = TSNE(early_exaggeration_iter=0, n_iter=0, initialization="pca").fit(affinities=aff) np.testing.assert_array_equal(embedding, desired_init)
def test_precomputed_dist_matrix_via_affinities_uses_spectral_init(self): x = np.random.normal(0, 1, (200, 5)) d = squareform(pdist(x)) aff = affinity.PerplexityBasedNN(d, metric="precomputed") desired_init = initialization.spectral(aff.P) embedding = TSNE(early_exaggeration_iter=0, n_iter=0).fit(affinities=aff) np.testing.assert_array_equal(embedding, desired_init)
def test_1(self): init = initialization.pca(self.x) aff = affinity.PerplexityBasedNN(self.x, perplexity=30) embedding = openTSNE.TSNEEmbedding(init, aff) embedding.optimize(25, exaggeration=12, momentum=0.5, inplace=True) embedding.optimize(50, exaggeration=1, momentum=0.8, inplace=True) self.eval_embedding(embedding, self.y) new_embedding = embedding.transform(self.x) self.eval_embedding(new_embedding, self.y, "transform")
def test_affinity_with_precomputed_distances(self): d = squareform(pdist(self.x)) knn_index = nearest_neighbors.PrecomputedDistanceMatrix(d, k=30) init = initialization.random(self.x, random_state=0) for aff in [ affinity.PerplexityBasedNN(knn_index=knn_index, perplexity=30), affinity.Uniform(knn_index=knn_index, k_neighbors=30), affinity.FixedSigmaNN(knn_index=knn_index, sigma=1), affinity.Multiscale(knn_index=knn_index, perplexities=[10, 20]), affinity.MultiscaleMixture(knn_index=knn_index, perplexities=[10, 20]), ]: # Without initilization embedding = TSNE().fit(affinities=aff) self.eval_embedding(embedding, aff.__class__.__name__) # With initilization embedding = TSNE().fit(affinities=aff, initialization=init) self.eval_embedding(embedding, aff.__class__.__name__)
def test_precomputed_affinity_is_passed_to_embedding_object(self): aff = affinity.PerplexityBasedNN(self.x, 5, method="exact") embedding = TSNE(early_exaggeration_iter=0, n_iter=0, initialization=self.init).fit(affinities=aff) self.assertIs(embedding.affinities, aff)
def art_of_tsne(X: np.ndarray, metric: Union[str, Callable] = "euclidean", exaggeration: float = -1, perplexity: int = 30, n_jobs: int = -1) -> TSNEEmbedding: """ Implementation of Dmitry Kobak and Philipp Berens "The art of using t-SNE for single-cell transcriptomics" based on openTSNE. See https://doi.org/10.1038/s41467-019-13056-x | www.nature.com/naturecommunications Args: X The data matrix of shape (n_cells, n_genes) i.e. (n_samples, n_features) metric Any metric allowed by PyNNDescent (default: 'euclidean') exaggeration The exaggeration to use for the embedding perplexity The perplexity to use for the embedding Returns: The embedding as an opentsne.TSNEEmbedding object (which can be cast to an np.ndarray) """ n = X.shape[0] if n > 100_000: if exaggeration == -1: exaggeration = 1 + n / 333_333 # Subsample, optimize, then add the remaining cells and optimize again # Also, use exaggeration == 4 logging.info(f"Creating subset of {n // 40} elements") # Subsample and run a regular art_of_tsne on the subset indices = np.random.permutation(n) reverse = np.argsort(indices) X_sample, X_rest = X[indices[:n // 40]], X[indices[n // 40:]] logging.info(f"Embedding subset") Z_sample = art_of_tsne(X_sample) logging.info( f"Preparing partial initial embedding of the {n - n // 40} remaining elements" ) if isinstance(Z_sample.affinities, affinity.Multiscale): rest_init = Z_sample.prepare_partial(X_rest, k=1, perplexities=[1 / 3, 1 / 3]) else: rest_init = Z_sample.prepare_partial(X_rest, k=1, perplexity=1 / 3) logging.info(f"Combining the initial embeddings, and standardizing") init_full = np.vstack((Z_sample, rest_init))[reverse] init_full = init_full / (np.std(init_full[:, 0]) * 10000) logging.info(f"Creating multiscale affinities") affinities = affinity.PerplexityBasedNN(X, perplexity=perplexity, metric=metric, method="approx", n_jobs=n_jobs) logging.info(f"Creating TSNE embedding") Z = TSNEEmbedding(init_full, affinities, negative_gradient_method="fft", n_jobs=n_jobs) logging.info(f"Optimizing, stage 1") Z.optimize(n_iter=250, inplace=True, exaggeration=12, momentum=0.5, learning_rate=n / 12, n_jobs=n_jobs) logging.info(f"Optimizing, stage 2") Z.optimize(n_iter=750, inplace=True, exaggeration=exaggeration, momentum=0.8, learning_rate=n / 12, n_jobs=n_jobs) elif n > 3_000: if exaggeration == -1: exaggeration = 1 # Use multiscale perplexity affinities_multiscale_mixture = affinity.Multiscale( X, perplexities=[perplexity, n / 100], metric=metric, method="approx", n_jobs=n_jobs) init = initialization.pca(X) Z = TSNEEmbedding(init, affinities_multiscale_mixture, negative_gradient_method="fft", n_jobs=n_jobs) Z.optimize(n_iter=250, inplace=True, exaggeration=12, momentum=0.5, learning_rate=n / 12, n_jobs=n_jobs) Z.optimize(n_iter=750, inplace=True, exaggeration=exaggeration, momentum=0.8, learning_rate=n / 12, n_jobs=n_jobs) else: if exaggeration == -1: exaggeration = 1 # Just a plain TSNE with high learning rate lr = max(200, n / 12) aff = affinity.PerplexityBasedNN(X, perplexity=perplexity, metric=metric, method="approx", n_jobs=n_jobs) init = initialization.pca(X) Z = TSNEEmbedding(init, aff, learning_rate=lr, n_jobs=n_jobs, negative_gradient_method="fft") Z.optimize(250, exaggeration=12, momentum=0.5, inplace=True, n_jobs=n_jobs) Z.optimize(750, exaggeration=exaggeration, momentum=0.8, inplace=True, n_jobs=n_jobs) return Z
def test_optimize_runs_properly(self): x = np.random.normal(100, 50, (25, 4)) aff = affinity.PerplexityBasedNN(x, 5, method="exact") tsne = TSNE(affinities=aff) embedding = tsne.fit(x) self.assertIs(embedding.affinities, aff)
def test_affinities_passed_to_embedding(self): x = np.random.normal(100, 50, (25, 4)) aff = affinity.PerplexityBasedNN(x, 5, method="exact") tsne = TSNE(affinities=aff) embedding = tsne.prepare_initial(x) self.assertIs(embedding.affinities, aff)