def transform(self): affinities = Affinities() affinities.P = self.data.tocsr() if self.learning_rate == "auto": n = self.init.shape[0] self.learning_rate = n / self.early_exaggeration emb = openTSNE.TSNEEmbedding( embedding=self.init, affinities=affinities, negative_gradient_method=self.negative_gradient_method, learning_rate=self.learning_rate, n_jobs=self.n_jobs, max_step_norm=self.max_step_norm, random_state=self.random_state, callbacks_every_iters=self.save_iter_freq, callbacks=self.saver, optimizer=GDAnneal(), ) self.data_ = emb.optimize( n_iter=self.n_iter, exaggeration=self.exaggeration, momentum=self.momentum, eps=self.eps, a=self.a, r=self.r, elastic_const=self.elastic_const, inplace=True, n_jobs=self.n_jobs, propagate_exception=True, ) return self.data_
def run(self, n_samples=1000): x, y = self.load_data(n_samples=n_samples) print("-" * 80) start = time.time() start_aff = time.time() affinity = openTSNE.affinity.PerplexityBasedNN( x, perplexity=self.perplexity, method="approx", n_jobs=self.n_jobs, ) print("openTSNE: NN search", time.time() - start_aff) init = openTSNE.initialization.random(x.shape[0], n_components=2) start_optim = time.time() embedding = openTSNE.TSNEEmbedding( init, affinity, learning_rate=self.learning_rate, n_jobs=self.n_jobs, negative_gradient_method="fft", theta=0.5, min_num_intervals=10, ints_in_interval=1, ) embedding.optimize(250, exaggeration=12, momentum=0.8, inplace=True) embedding.optimize(750, momentum=0.5, inplace=True) print("openTSNE: Optimization", time.time() - start_optim) print("openTSNE: Full", time.time() - start)
def test_transform_with_standard_affinity(self): init = openTSNE.initialization.random(self.x) aff = openTSNE.affinity.PerplexityBasedNN(self.x, 5, method="exact") embedding = openTSNE.TSNEEmbedding(init, aff, negative_gradient_method="bh") embedding.optimize(100, inplace=True) # This should not raise an error embedding.transform(self.x_test)
def test_2(self): init = initialization.pca(self.x) aff = affinity.MultiscaleMixture(self.x, perplexities=[5, 30]) embedding = openTSNE.TSNEEmbedding(init, aff) embedding.optimize(25, exaggeration=12, momentum=0.5, inplace=True) embedding.optimize(50, exaggeration=1, momentum=0.8, inplace=True) self.eval_embedding(embedding, self.y) new_embedding = embedding.transform(self.x) self.eval_embedding(new_embedding, self.y, "transform")
def test_1(self): init = initialization.pca(self.x) aff = affinity.PerplexityBasedNN(self.x, perplexity=30) embedding = openTSNE.TSNEEmbedding(init, aff) embedding.optimize(25, exaggeration=12, momentum=0.5, inplace=True) embedding.optimize(50, exaggeration=1, momentum=0.8, inplace=True) self.eval_embedding(embedding, self.y) new_embedding = embedding.transform(self.x) self.eval_embedding(new_embedding, self.y, "transform")
def test_transform_with_multiscale_affinity(self): init = openTSNE.initialization.random(self.x) aff = openTSNE.affinity.Multiscale(self.x, [2, 5], method="exact") embedding = openTSNE.TSNEEmbedding(init, aff, negative_gradient_method="bh") embedding.optimize(100, inplace=True) # This should not raise an error embedding.transform(self.x_test)
def test_transform_with_nonstandard_affinity(self): """Should raise an informative error when a non-standard affinity is used with `transform`.""" init = openTSNE.initialization.random(self.x) aff = openTSNE.affinity.Multiscale(self.x, [2, 5], method="exact") embedding = openTSNE.TSNEEmbedding(init, aff, negative_gradient_method="bh") embedding.optimize(100, inplace=True) with self.assertRaises(TypeError): embedding.transform(self.x_test)
def prepare_embedding(self, affinities, initialization): """Prepare an embedding object with appropriate parameters, given some affinities and initialization.""" return openTSNE.TSNEEmbedding( initialization, affinities, learning_rate=self.learning_rate, theta=self.theta, min_num_intervals=self.min_num_intervals, ints_in_interval=self.ints_in_interval, n_jobs=self.n_jobs, negative_gradient_method=self.negative_gradient_method, callbacks=self.callbacks, callbacks_every_iters=self.callbacks_every_iters, )
def test_fft_transform_with_point_subsets_using_perplexity_nn(self): x_train, x_test = train_test_split( self.iris.data, test_size=0.33, random_state=42 ) # Set up the initial embedding init = openTSNE.initialization.pca(x_train) affinity = openTSNE.affinity.PerplexityBasedNN(x_train, method="exact") embedding = openTSNE.TSNEEmbedding( init, affinity, negative_gradient_method="fft", random_state=42 ) embedding.optimize(n_iter=100, inplace=True) # The test set contains 50 samples, so let's verify on half of those transform_params = dict(n_iter=0) new_embedding_1 = embedding.transform(x_test, **transform_params)[:25] new_embedding_2 = embedding.transform(x_test[:25], **transform_params) np.testing.assert_equal(new_embedding_1, new_embedding_2)
def run(self, n_samples=1000, random_state=None): x, y = self.load_data(n_samples=n_samples) print("-" * 80) print("Random state", random_state) print("-" * 80, flush=True) random_state = check_random_state(random_state) start = time.time() start_aff = time.time() affinity = openTSNE.affinity.PerplexityBasedNN( x, perplexity=self.perplexity, method="annoy", n_jobs=self.n_jobs, random_state=random_state, verbose=True, ) print("openTSNE: NN search", time.time() - start_aff, flush=True) init = openTSNE.initialization.random( x, n_components=2, random_state=random_state, verbose=True, ) start_optim = time.time() embedding = openTSNE.TSNEEmbedding( init, affinity, learning_rate=self.learning_rate, n_jobs=self.n_jobs, negative_gradient_method="fft", random_state=random_state, verbose=True, ) embedding.optimize(250, exaggeration=12, momentum=0.8, inplace=True) embedding.optimize(750, momentum=0.5, inplace=True) print("openTSNE: Optimization", time.time() - start_optim) print("openTSNE: Full", time.time() - start, flush=True)
# fast_tsne(x, nthreads=1) affinities = openTSNE.affinity.PerplexityBasedNN( x, perplexity=30, metric="cosine", method="approx", n_jobs=-1, random_state=0, verbose=True, ) init = openTSNE.initialization.spectral(affinities.P, verbose=True) embedding = openTSNE.TSNEEmbedding( init, affinities, negative_gradient_method="fft", n_jobs=-1, random_state=0, verbose=True, ) embedding.optimize(n_iter=250, exaggeration=12, momentum=0.5, inplace=True) embedding.optimize(n_iter=500, momentum=0.8, inplace=True) import matplotlib.pyplot as plt plt.scatter(embedding[:, 0], embedding[:, 1], s=1) plt.show()
def fit(self, X: np.ndarray, Y: np.ndarray = None) -> openTSNE.TSNEEmbedding: # Sparse data are not supported if sp.issparse(X): raise TypeError( "A sparse matrix was passed, but dense data is required. Use " "X.toarray() to convert to a dense numpy array.") # Build up the affinity matrix, using multiscale if needed if self.multiscale: # The local perplexity should be on the order ~50 while the higher # perplexity should be on the order ~N/50 if not isinstance(self.perplexity, Iterable): raise ValueError( "Perplexity should be an instance of `Iterable`, `%s` " "given." % type(self.perplexity).__name__) affinities = openTSNE.affinity.Multiscale( X, perplexities=self.perplexity, metric=self.metric, method=self.neighbors, random_state=self.random_state, n_jobs=self.n_jobs) else: if isinstance(self.perplexity, Iterable): raise ValueError( "Perplexity should be an instance of `float`, `%s` " "given." % type(self.perplexity).__name__) affinities = openTSNE.affinity.PerplexityBasedNN( X, perplexity=self.perplexity, metric=self.metric, method=self.neighbors, random_state=self.random_state, n_jobs=self.n_jobs) # Create an initial embedding if isinstance(self.initialization, np.ndarray): initialization = self.initialization elif self.initialization == "pca": initialization = openTSNE.initialization.pca( X, self.n_components, random_state=self.random_state) elif self.initialization == "random": initialization = openTSNE.initialization.random( X, self.n_components, random_state=self.random_state) else: raise ValueError( "Invalid initialization `%s`. Please use either `pca` or " "`random` or provide a numpy array." % self.initialization) embedding = openTSNE.TSNEEmbedding( initialization, affinities, learning_rate=self.learning_rate, theta=self.theta, min_num_intervals=self.min_num_intervals, ints_in_interval=self.ints_in_interval, n_jobs=self.n_jobs, negative_gradient_method=self.negative_gradient_method, callbacks=self.callbacks, callbacks_every_iters=self.callbacks_every_iters, ) # Run standard t-SNE optimization embedding.optimize( n_iter=self.early_exaggeration_iter, exaggeration=self.early_exaggeration, inplace=True, momentum=0.5, propagate_exception=True, ) embedding.optimize( n_iter=self.n_iter, exaggeration=self.exaggeration, inplace=True, momentum=0.8, propagate_exception=True, ) return embedding
with Timer("Loading data..."): with gzip.open("../examples/data/macosko_2015.pkl.gz", "rb") as f: data = pickle.load(f) x = data["pca_50"] y, cluster_ids = data["CellType1"], data["CellType2"] with Timer("Finding nearest neighbors..."): affinities = openTSNE.affinity.PerplexityBasedNN( x, perplexity=30, method="approx", n_jobs=8, random_state=3 ) with Timer("Creating initial embedding..."): init = openTSNE.initialization.random(x, random_state=3) with Timer("Creating embedding object..."): embedding = openTSNE.TSNEEmbedding( init, affinities, negative_gradient_method="fft", n_jobs=8, callbacks=openTSNE.callbacks.ErrorLogger(), random_state=3, ) with Timer("Running optimization..."): embedding.optimize(n_iter=250, exaggeration=12, momentum=0.5, inplace=True) embedding.optimize(n_iter=750, momentum=0.8, inplace=True)