Esempio n. 1
0
    def transform(self):
        affinities = Affinities()
        affinities.P = self.data.tocsr()

        if self.learning_rate == "auto":
            n = self.init.shape[0]
            self.learning_rate = n / self.early_exaggeration

        emb = openTSNE.TSNEEmbedding(
            embedding=self.init,
            affinities=affinities,
            negative_gradient_method=self.negative_gradient_method,
            learning_rate=self.learning_rate,
            n_jobs=self.n_jobs,
            max_step_norm=self.max_step_norm,
            random_state=self.random_state,
            callbacks_every_iters=self.save_iter_freq,
            callbacks=self.saver,
            optimizer=GDAnneal(),
        )

        self.data_ = emb.optimize(
            n_iter=self.n_iter,
            exaggeration=self.exaggeration,
            momentum=self.momentum,
            eps=self.eps,
            a=self.a,
            r=self.r,
            elastic_const=self.elastic_const,
            inplace=True,
            n_jobs=self.n_jobs,
            propagate_exception=True,
        )

        return self.data_
Esempio n. 2
0
    def run(self, n_samples=1000):
        x, y = self.load_data(n_samples=n_samples)

        print("-" * 80)
        start = time.time()
        start_aff = time.time()
        affinity = openTSNE.affinity.PerplexityBasedNN(
            x,
            perplexity=self.perplexity,
            method="approx",
            n_jobs=self.n_jobs,
        )
        print("openTSNE: NN search", time.time() - start_aff)

        init = openTSNE.initialization.random(x.shape[0], n_components=2)

        start_optim = time.time()
        embedding = openTSNE.TSNEEmbedding(
            init,
            affinity,
            learning_rate=self.learning_rate,
            n_jobs=self.n_jobs,
            negative_gradient_method="fft",
            theta=0.5,
            min_num_intervals=10,
            ints_in_interval=1,
        )
        embedding.optimize(250, exaggeration=12, momentum=0.8, inplace=True)
        embedding.optimize(750, momentum=0.5, inplace=True)
        print("openTSNE: Optimization", time.time() - start_optim)
        print("openTSNE: Full", time.time() - start)
Esempio n. 3
0
    def test_transform_with_standard_affinity(self):
        init = openTSNE.initialization.random(self.x)
        aff = openTSNE.affinity.PerplexityBasedNN(self.x, 5, method="exact")
        embedding = openTSNE.TSNEEmbedding(init, aff, negative_gradient_method="bh")
        embedding.optimize(100, inplace=True)

        # This should not raise an error
        embedding.transform(self.x_test)
Esempio n. 4
0
 def test_2(self):
     init = initialization.pca(self.x)
     aff = affinity.MultiscaleMixture(self.x, perplexities=[5, 30])
     embedding = openTSNE.TSNEEmbedding(init, aff)
     embedding.optimize(25, exaggeration=12, momentum=0.5, inplace=True)
     embedding.optimize(50, exaggeration=1, momentum=0.8, inplace=True)
     self.eval_embedding(embedding, self.y)
     new_embedding = embedding.transform(self.x)
     self.eval_embedding(new_embedding, self.y, "transform")
Esempio n. 5
0
 def test_1(self):
     init = initialization.pca(self.x)
     aff = affinity.PerplexityBasedNN(self.x, perplexity=30)
     embedding = openTSNE.TSNEEmbedding(init, aff)
     embedding.optimize(25, exaggeration=12, momentum=0.5, inplace=True)
     embedding.optimize(50, exaggeration=1, momentum=0.8, inplace=True)
     self.eval_embedding(embedding, self.y)
     new_embedding = embedding.transform(self.x)
     self.eval_embedding(new_embedding, self.y, "transform")
Esempio n. 6
0
    def test_transform_with_multiscale_affinity(self):
        init = openTSNE.initialization.random(self.x)
        aff = openTSNE.affinity.Multiscale(self.x, [2, 5], method="exact")
        embedding = openTSNE.TSNEEmbedding(init,
                                           aff,
                                           negative_gradient_method="bh")
        embedding.optimize(100, inplace=True)

        # This should not raise an error
        embedding.transform(self.x_test)
Esempio n. 7
0
    def test_transform_with_nonstandard_affinity(self):
        """Should raise an informative error when a non-standard affinity is used
        with `transform`."""
        init = openTSNE.initialization.random(self.x)
        aff = openTSNE.affinity.Multiscale(self.x, [2, 5], method="exact")
        embedding = openTSNE.TSNEEmbedding(init, aff, negative_gradient_method="bh")
        embedding.optimize(100, inplace=True)

        with self.assertRaises(TypeError):
            embedding.transform(self.x_test)
Esempio n. 8
0
 def prepare_embedding(self, affinities, initialization):
     """Prepare an embedding object with appropriate parameters, given some
     affinities and initialization."""
     return openTSNE.TSNEEmbedding(
         initialization,
         affinities,
         learning_rate=self.learning_rate,
         theta=self.theta,
         min_num_intervals=self.min_num_intervals,
         ints_in_interval=self.ints_in_interval,
         n_jobs=self.n_jobs,
         negative_gradient_method=self.negative_gradient_method,
         callbacks=self.callbacks,
         callbacks_every_iters=self.callbacks_every_iters,
     )
    def test_fft_transform_with_point_subsets_using_perplexity_nn(self):
        x_train, x_test = train_test_split(
            self.iris.data, test_size=0.33, random_state=42
        )

        # Set up the initial embedding
        init = openTSNE.initialization.pca(x_train)
        affinity = openTSNE.affinity.PerplexityBasedNN(x_train, method="exact")
        embedding = openTSNE.TSNEEmbedding(
            init, affinity, negative_gradient_method="fft", random_state=42
        )
        embedding.optimize(n_iter=100, inplace=True)

        # The test set contains 50 samples, so let's verify on half of those
        transform_params = dict(n_iter=0)
        new_embedding_1 = embedding.transform(x_test, **transform_params)[:25]
        new_embedding_2 = embedding.transform(x_test[:25], **transform_params)

        np.testing.assert_equal(new_embedding_1, new_embedding_2)
Esempio n. 10
0
    def run(self, n_samples=1000, random_state=None):
        x, y = self.load_data(n_samples=n_samples)

        print("-" * 80)
        print("Random state", random_state)
        print("-" * 80, flush=True)

        random_state = check_random_state(random_state)

        start = time.time()
        start_aff = time.time()
        affinity = openTSNE.affinity.PerplexityBasedNN(
            x,
            perplexity=self.perplexity,
            method="annoy",
            n_jobs=self.n_jobs,
            random_state=random_state,
            verbose=True,
        )
        print("openTSNE: NN search", time.time() - start_aff, flush=True)

        init = openTSNE.initialization.random(
            x,
            n_components=2,
            random_state=random_state,
            verbose=True,
        )

        start_optim = time.time()
        embedding = openTSNE.TSNEEmbedding(
            init,
            affinity,
            learning_rate=self.learning_rate,
            n_jobs=self.n_jobs,
            negative_gradient_method="fft",
            random_state=random_state,
            verbose=True,
        )
        embedding.optimize(250, exaggeration=12, momentum=0.8, inplace=True)
        embedding.optimize(750, momentum=0.5, inplace=True)
        print("openTSNE: Optimization", time.time() - start_optim)
        print("openTSNE: Full", time.time() - start, flush=True)
Esempio n. 11
0
#     fast_tsne(x, nthreads=1)

affinities = openTSNE.affinity.PerplexityBasedNN(
    x,
    perplexity=30,
    metric="cosine",
    method="approx",
    n_jobs=-1,
    random_state=0,
    verbose=True,
)

init = openTSNE.initialization.spectral(affinities.P, verbose=True)

embedding = openTSNE.TSNEEmbedding(
    init,
    affinities,
    negative_gradient_method="fft",
    n_jobs=-1,
    random_state=0,
    verbose=True,
)

embedding.optimize(n_iter=250, exaggeration=12, momentum=0.5, inplace=True)
embedding.optimize(n_iter=500, momentum=0.8, inplace=True)

import matplotlib.pyplot as plt

plt.scatter(embedding[:, 0], embedding[:, 1], s=1)
plt.show()
Esempio n. 12
0
    def fit(self,
            X: np.ndarray,
            Y: np.ndarray = None) -> openTSNE.TSNEEmbedding:
        # Sparse data are not supported
        if sp.issparse(X):
            raise TypeError(
                "A sparse matrix was passed, but dense data is required. Use "
                "X.toarray() to convert to a dense numpy array.")

        # Build up the affinity matrix, using multiscale if needed
        if self.multiscale:
            # The local perplexity should be on the order ~50 while the higher
            # perplexity should be on the order ~N/50
            if not isinstance(self.perplexity, Iterable):
                raise ValueError(
                    "Perplexity should be an instance of `Iterable`, `%s` "
                    "given." % type(self.perplexity).__name__)
            affinities = openTSNE.affinity.Multiscale(
                X,
                perplexities=self.perplexity,
                metric=self.metric,
                method=self.neighbors,
                random_state=self.random_state,
                n_jobs=self.n_jobs)
        else:
            if isinstance(self.perplexity, Iterable):
                raise ValueError(
                    "Perplexity should be an instance of `float`, `%s` "
                    "given." % type(self.perplexity).__name__)
            affinities = openTSNE.affinity.PerplexityBasedNN(
                X,
                perplexity=self.perplexity,
                metric=self.metric,
                method=self.neighbors,
                random_state=self.random_state,
                n_jobs=self.n_jobs)

        # Create an initial embedding
        if isinstance(self.initialization, np.ndarray):
            initialization = self.initialization
        elif self.initialization == "pca":
            initialization = openTSNE.initialization.pca(
                X, self.n_components, random_state=self.random_state)
        elif self.initialization == "random":
            initialization = openTSNE.initialization.random(
                X, self.n_components, random_state=self.random_state)
        else:
            raise ValueError(
                "Invalid initialization `%s`. Please use either `pca` or "
                "`random` or provide a numpy array." % self.initialization)

        embedding = openTSNE.TSNEEmbedding(
            initialization,
            affinities,
            learning_rate=self.learning_rate,
            theta=self.theta,
            min_num_intervals=self.min_num_intervals,
            ints_in_interval=self.ints_in_interval,
            n_jobs=self.n_jobs,
            negative_gradient_method=self.negative_gradient_method,
            callbacks=self.callbacks,
            callbacks_every_iters=self.callbacks_every_iters,
        )

        # Run standard t-SNE optimization
        embedding.optimize(
            n_iter=self.early_exaggeration_iter,
            exaggeration=self.early_exaggeration,
            inplace=True,
            momentum=0.5,
            propagate_exception=True,
        )
        embedding.optimize(
            n_iter=self.n_iter,
            exaggeration=self.exaggeration,
            inplace=True,
            momentum=0.8,
            propagate_exception=True,
        )

        return embedding
Esempio n. 13
0
with Timer("Loading data..."):
    with gzip.open("../examples/data/macosko_2015.pkl.gz", "rb") as f:
        data = pickle.load(f)

x = data["pca_50"]
y, cluster_ids = data["CellType1"], data["CellType2"]

with Timer("Finding nearest neighbors..."):
    affinities = openTSNE.affinity.PerplexityBasedNN(
        x, perplexity=30, method="approx", n_jobs=8, random_state=3
    )

with Timer("Creating initial embedding..."):
    init = openTSNE.initialization.random(x, random_state=3)

with Timer("Creating embedding object..."):
    embedding = openTSNE.TSNEEmbedding(
        init,
        affinities,
        negative_gradient_method="fft",
        n_jobs=8,
        callbacks=openTSNE.callbacks.ErrorLogger(),
        random_state=3,
    )


with Timer("Running optimization..."):
    embedding.optimize(n_iter=250, exaggeration=12, momentum=0.5, inplace=True)
    embedding.optimize(n_iter=750, momentum=0.8, inplace=True)