Ejemplo n.º 1
0
    def __tsne_test_helper(self, data, n_com):
        tsne_def = TSNE(n_components=n_com, metric="euclidean")
        tsne_def = tsne_def(data)

        tsne_euc = TSNE(n_components=n_com, metric=Euclidean)
        tsne_euc = tsne_euc(data)

        tsne_pre = TSNE(n_components=n_com, metric="precomputed")
        tsne_pre = tsne_pre(Euclidean(data))

        self.assertEqual((data.X.shape[0], n_com), tsne_def.embedding_.shape)
        self.assertEqual((data.X.shape[0], n_com), tsne_euc.embedding_.shape)
        self.assertEqual((data.X.shape[0], n_com), tsne_pre.embedding_.shape)
    def _reduce_dimensions(data, method="MDS", use_cosine=False):
        """
        Reduce the dimensionality of the data to 2D.

        Parameters
        ----------
        data: Orange.data.Table
            The image embeddings (vectors of length 2048).
        method: string
            The method to use (default MDS).
        use_cosine: bool
            Precompute cosine distances and pass them to MDS.

        Returns
        -------
        array-like
            The data, reduced to 2 dimensions.

        """
        if method == "MDS":
            if use_cosine:
                mds = MDS(n_init=1, dissimilarity="precomputed")
                dist_matrix = Cosine(data)
                return mds(dist_matrix).embedding_
            else:
                mds = MDS(n_init=1, init_type="PCA")
                return mds(data).embedding_

        elif method == "PCA":
            pca = PCA(n_components=2)
            return pca(data)(data)

        elif method == "TSNE":
            tsne = TSNE(init="pca")
            return tsne(data).embedding_
Ejemplo n.º 3
0
    def test_transform(self):
        # Set perplexity to avoid warnings
        tsne = TSNE(perplexity=10)
        model = tsne(self.iris[::2])
        new_embedding = model(self.iris[1::2])

        # The new embedding should not contain NaNs
        self.assertFalse(np.any(np.isnan(new_embedding.X)))
Ejemplo n.º 4
0
def compute_tsne(data, perplexity, iter, init):
    negative_gradient_method = 'fft' if len(data.X) > 10000 else 'bh'
    neighbor_method = 'approx' if len(data.X) > 10000 else 'exact'
    tsne = TSNE(
        perplexity=perplexity, n_iter=iter, initialization=init, theta=.8,
        early_exaggeration_iter=0, negative_gradient_method=negative_gradient_method,
        neighbors=neighbor_method, random_state=0
    )
    return tsne(data)
Ejemplo n.º 5
0
    def test_continue_optimization_inplace(self):
        tsne = TSNE(n_iter=100)
        model = tsne(self.iris)
        new_model = model.optimize(100, inplace=True)

        # If we don't do things inplace, then the instances should be the same
        self.assertIs(model, new_model)
        self.assertIs(model.embedding, new_model.embedding)
        self.assertIs(model.embedding_, new_model.embedding_)

        # The embeddings in the table should match the embedding object
        np.testing.assert_equal(new_model.embedding.X, new_model.embedding_)
Ejemplo n.º 6
0
    def test_fit(self):
        n_components = 2
        tsne = TSNE(n_components=n_components)
        model = tsne(self.iris)

        # The embedding should have the correct number of dimensions
        self.assertEqual(model.embedding.X.shape, (self.iris.X.shape[0], n_components))

        # The embedding should not contain NaNs
        self.assertFalse(np.any(np.isnan(model.embedding.X)))

        # The embeddings in the table should match the embedding object
        np.testing.assert_equal(model.embedding.X, model.embedding_)
Ejemplo n.º 7
0
    def test_continue_optimization(self):
        tsne = TSNE(n_iter=100)
        model = tsne(self.iris)
        new_model = model.optimize(100, inplace=False)

        # If we don't do things inplace, then the instances should be different
        self.assertIsNot(model, new_model)
        self.assertIsNot(model.embedding, new_model.embedding)
        self.assertIsNot(model.embedding_, new_model.embedding_)

        self.assertFalse(np.allclose(model.embedding.X, new_model.embedding.X),
                         'Embedding should change after further optimization.')

        # The embeddings in the table should match the embedding object
        np.testing.assert_equal(new_model.embedding.X, new_model.embedding_)
Ejemplo n.º 8
0
    def test_pickle(self):
        for neighbors in ("exact", "approx"):
            tsne = TSNE(early_exaggeration_iter=0,
                        n_iter=10,
                        perplexity=30,
                        neighbors=neighbors,
                        random_state=0)
            model = tsne(self.iris[::2])

            loaded_model = pickle.loads(pickle.dumps(model))

            new_embedding = loaded_model(self.iris[1::2]).X

            knn = KNeighborsClassifier(n_neighbors=5)
            knn.fit(new_embedding, self.iris[1::2].Y)
            predicted = knn.predict(new_embedding)
            self.assertTrue(
                accuracy_score(predicted, self.iris[1::2].Y) > 0.95,
                msg=f"Pickling failed with `neighbors={neighbors}`",
            )
Ejemplo n.º 9
0
    def test_fft_correctness(self):
        knn = KNeighborsClassifier(n_neighbors=5)

        # Set iterations to 0 so we check that the initialization is fairly random
        tsne = TSNE(early_exaggeration_iter=0, n_iter=0, perplexity=30,
                    negative_gradient_method='fft', initialization='random',
                    random_state=0)
        model = tsne(self.iris)

        # Evaluate KNN on the random initialization
        knn.fit(model.embedding_, self.iris.Y)
        predicted = knn.predict(model.embedding_)
        self.assertTrue(accuracy_score(predicted, self.iris.Y) < 0.6)

        # 100 iterations should be enough for iris
        model.optimize(n_iter=100, inplace=True)

        # Evaluate KNN on the tSNE embedding
        knn.fit(model.embedding_, self.iris.Y)
        predicted = knn.predict(model.embedding_)
        self.assertTrue(accuracy_score(predicted, self.iris.Y) > 0.95)
Ejemplo n.º 10
0
    def __start(self):
        self.pca_preprocessing()

        # We call PCA through fastTSNE because it involves scaling. Instead of
        # worrying about this ourselves, we'll let the library worry for us.
        initialization = TSNE.default_initialization(self.pca_data.X,
                                                     n_components=2,
                                                     random_state=0)

        # Compute perplexity settings for multiscale
        n_samples = self.pca_data.X.shape[0]
        if self.multiscale:
            perplexity = min((n_samples - 1) / 3, 50), min((n_samples - 1) / 3,
                                                           500)
        else:
            perplexity = self.perplexity

        # Determine whether to use settings for large data sets
        if n_samples > 10_000:
            neighbor_method, gradient_method = "approx", "fft"
        else:
            neighbor_method, gradient_method = "exact", "bh"

        # Set number of iterations to 0 - these will be run subsequently
        self.projection = TSNE(n_components=2,
                               perplexity=perplexity,
                               multiscale=self.multiscale,
                               early_exaggeration_iter=0,
                               n_iter=0,
                               initialization=initialization,
                               exaggeration=self.exaggeration,
                               neighbors=neighbor_method,
                               negative_gradient_method=gradient_method,
                               random_state=0)(self.pca_data)

        self.tsne_runner = TSNERunner(self.projection, step_size=50)
        self.tsne_iterator = self.tsne_runner.run_optimization()
        self.__set_update_loop(self.tsne_iterator)
        self.progressBarInit(processEvents=None)
Ejemplo n.º 11
0
 def test_multiscale(self):
     tsne = TSNE(perplexity=(10, 10), multiscale=True)
     model = tsne(self.iris[::2])
     embedding = model(self.iris[1::2])
     self.assertFalse(np.any(np.isnan(embedding.X)))