Exemple #1
0
    def test_can_pass_callbacks_to_tsne_object(self):
        callback = MagicMock()
        callback2 = MagicMock()
        # We don't want individual callbacks to be iterable
        del callback.__iter__
        del callback2.__iter__

        # Should be able to pass a single callback
        TSNE(callbacks=callback,
             callbacks_every_iters=1,
             early_exaggeration_iter=0,
             n_iter=1).fit(self.x)
        self.assertEqual(callback.call_count, 1)

        # Should be able to pass a list callbacks
        callback.reset_mock()
        TSNE(callbacks=[callback],
             callbacks_every_iters=1,
             early_exaggeration_iter=0,
             n_iter=1).fit(self.x)
        self.assertEqual(callback.call_count, 1)

        # Should be able to change the callback on the object
        callback.reset_mock()
        tsne = TSNE(callbacks=callback,
                    callbacks_every_iters=1,
                    early_exaggeration_iter=0,
                    n_iter=1)
        tsne.callbacks = callback2
        tsne.fit(self.x)
        callback.assert_not_called()
        self.assertEqual(callback2.call_count, 1)
Exemple #2
0
    def test_raises_error_on_unrecognized_metric(self):
        """Unknown distance metric should raise error"""
        tsne = TSNE(metric='imaginary', neighbors='exact')
        with self.assertRaises(ValueError):
            tsne.prepare_initial(self.x)

        tsne = TSNE(metric='imaginary', neighbors='approx')
        with self.assertRaises(ValueError):
            tsne.prepare_initial(self.x)
Exemple #3
0
    def test_same_results_on_fixed_random_state_pca_init(self):
        """Results should be exactly the same if we provide a random state."""
        tsne1 = TSNE(random_state=1, initialization='pca')
        embedding1 = tsne1.fit(self.x)

        tsne2 = TSNE(random_state=1, initialization='pca')
        embedding2 = tsne2.fit(self.x)

        np.testing.assert_array_equal(
            embedding1, embedding2,
            'Same random state produced different initial embeddings')
Exemple #4
0
    def test_embedding_transform(self, param_name, param_value,
                                 gradient_descent):
        # type: (str, Any, MagicMock) -> None
        # Make sure mock still conforms to signature
        gradient_descent.return_value = (1, MagicMock())

        # Perform initial embedding - this is tested above
        tsne = TSNE()
        embedding = tsne.fit(self.x)
        gradient_descent.reset_mock()

        embedding.transform(self.x_test, **{param_name: param_value})

        # Early exaggeration training loop
        if param_name in ('early_exaggeration_iter', 'early_exaggeration'):
            call_idx = 0
        # Main training loop
        elif param_name in ('n_iter', 'final_momentum'):
            call_idx = 1

        # If general parameter, should be applied to every call
        else:
            call_idx = 0

        self.assertEqual(2, gradient_descent.call_count)
        check_call_contains_kwargs(
            gradient_descent.mock_calls[call_idx],
            {param_name: param_value},
        )
Exemple #5
0
 def setUpClass(cls):
     cls.tsne = TSNE(early_exaggeration_iter=20, n_iter=100)
     # Set up two modalities, if we want to viually inspect test results
     cls.x = np.vstack((
         np.random.normal(+1, 1, (100, 4)),
         np.random.normal(-1, 1, (100, 4)),
     ))
     cls.x_test = np.random.normal(0, 1, (25, 4))
Exemple #6
0
 def test_unfitted_pca_model(self):
     """Using PCA initialization in `transform` should fail when the initial
     embedding was initialized with PCA."""
     tsne = TSNE(initialization='random')
     embedding = tsne.fit(self.x)
     # Transforming using `pca` init on embedding that did not use
     # `pca` init did not fail
     with self.assertRaises(AssertionError):
         embedding.transform(self.x_test, initialization='pca')
Exemple #7
0
    def test_same_partial_embedding_on_fixed_random_state(self):
        tsne = TSNE(random_state=1, initialization='random')
        embedding = tsne.fit(self.x)

        partial1 = embedding.prepare_partial(self.x_test,
                                             initialization='random')
        partial2 = embedding.prepare_partial(self.x_test,
                                             initialization='random')

        np.testing.assert_array_equal(
            partial1, partial2,
            'Same random state produced different partial embeddings')
Exemple #8
0
    def test_mismatching_embedding_dimensions_simple_api(self):
        # Fit
        tsne = TSNE(n_components=2, initialization=self.x[:10, :2])
        with self.assertRaises(ValueError,
                               msg='fit::ncorrect number of points'):
            tsne.fit(self.x[:25])

        with self.assertRaises(ValueError,
                               msg='fit::ncorrect number of dimensions'):
            TSNE(n_components=2, initialization=self.x[:10, :4])

        # Transform
        tsne = TSNE(n_components=2, initialization='random')
        embedding = tsne.fit(self.x)
        with self.assertRaises(ValueError,
                               msg='transform::incorrect number of points'):
            embedding.transform(X=self.x[:5], initialization=self.x[:10, :2])

        with self.assertRaises(
                ValueError, msg='transform::incorrect number of dimensions'):
            embedding.transform(X=self.x, initialization=self.x[:, :4])
Exemple #9
0
    def test_low_variance(self):
        """Low variance of the initial embedding is very important for the
        convergence of tSNE."""
        # Cycle through various initializations
        initializations = ['random', 'pca']
        allowed = 1e-3

        for init in initializations:
            tsne = TSNE(initialization=init, perplexity=2)
            embedding = tsne.prepare_initial(self.x)
            np.testing.assert_array_less(
                np.var(embedding, axis=0), allowed,
                'using the `%s` initialization' % init)
def check_error_approx():
    x, y = get_mouse_60k(1500)

    tsne = TSNE(
        perplexity=20,
        learning_rate=100,
        early_exaggeration=12,
        n_jobs=4,
        theta=0.5,
        initialization='pca',
        metric='euclidean',
        n_components=2,
        n_iter=750,
        early_exaggeration_iter=250,
        neighbors='exact',
        negative_gradient_method='bh',
        min_num_intervals=10,
        ints_in_interval=2,
        late_exaggeration_iter=0,
        late_exaggeration=4,
        callbacks=ErrorLogger(),
    )
    embedding = tsne.prepare_initial(x, initialization='random')

    errors = ErrorApproximations(embedding.affinities.P)
    logger = ErrorLogger()
    embedding.optimize(
        250,
        exaggeration=12,
        callbacks=[errors, logger],
        callbacks_every_iters=5,
        inplace=True,
    )
    embedding.optimize(
        750,
        exaggeration=None,
        callbacks=[errors, logger],
        callbacks_every_iters=5,
        inplace=True,
    )
    errors.report()

    plot(embedding, y)

    x = list(range(len(errors.exact_errors)))
    plt.semilogy(x, errors.exact_errors, label='Exact')
    plt.semilogy(x, errors.bh_errors, label='BH')
    plt.semilogy(x, errors.fft_errors, label='FFT')
    plt.legend()
    plt.show()
def transform(n_jobs=4, grad='bh', neighbors='approx'):
    # iris = datasets.load_iris()
    # x, y = iris['data'], iris['target']
    x, y = get_mnist(20000)

    x_train, x_test, y_train, y_test = train_test_split(x,
                                                        y,
                                                        test_size=0.33,
                                                        random_state=42)

    tsne = TSNE(
        n_components=2,
        perplexity=30,
        learning_rate=100,
        early_exaggeration=12,
        n_jobs=n_jobs,
        theta=0.5,
        initialization='random',
        metric='euclidean',
        n_iter=750,
        early_exaggeration_iter=250,
        neighbors=neighbors,
        negative_gradient_method=grad,
        min_num_intervals=10,
        ints_in_interval=2,
        late_exaggeration_iter=0,
        late_exaggeration=4,
        callbacks=[ErrorLogger()],
    )
    start = time.time()
    embedding = tsne.fit(x_train)
    print('tsne train', time.time() - start)

    plt.subplot(121)
    plot(embedding, y_train, show=False, ms=3)

    start = time.time()
    partial_embedding = embedding.transform(x_test, perplexity=20)
    # partial_embedding = embedding.get_partial_embedding_for(
    #     x_test, perplexity=10, initialization='random')
    # partial_embedding.optimize(200, exaggeration=2, inplace=True, momentum=0.1)
    print('tsne transform', time.time() - start)

    plt.subplot(122)
    plot(embedding, y_train, show=False, ms=3, alpha=0.25)
    plt.gca().set_color_cycle(None)
    plot(partial_embedding, y_test, show=False, ms=3, alpha=0.8)

    plt.show()
Exemple #12
0
    def test_embedding_optimize(self, param_name, param_value,
                                gradient_descent):
        # type: (str, Any, MagicMock) -> None
        # Make sure mock still conforms to signature
        gradient_descent.return_value = (1, MagicMock())

        # `optimize` requires us to specify the `n_iter`
        params = {'n_iter': 50, param_name: param_value}

        tsne = TSNE()
        embedding = tsne.prepare_initial(self.x)
        embedding.optimize(**params, inplace=True)

        self.assertEqual(1, gradient_descent.call_count)
        check_call_contains_kwargs(gradient_descent.mock_calls[0], params)
Exemple #13
0
    def test_nndescent_distances(self, param_name, metric,
                                 nndescent: MagicMock):
        """Distance metrics should be properly passed down to NN descent"""
        assert param_name == 'metric'
        tsne = TSNE(metric=metric, neighbors='approx')

        # We don't care about what happens later, just that the NN method is
        # properly called
        nndescent.side_effect = InterruptedError()
        try:
            # Haversine distance only supports two dimensions
            tsne.prepare_initial(self.x[:, :2])
        except InterruptedError:
            pass

        self.assertEqual(nndescent.call_count, 1)
        check_call_contains_kwargs(nndescent.mock_calls[0], {'metric': metric})
Exemple #14
0
    def test_nndescent_mahalanobis_distance(self, nndescent: MagicMock):
        """Distance metrics and additional params should be correctly passed down to NN descent"""
        metric = 'mahalanobis'
        C = np.cov(self.x)

        tsne = TSNE(metric=metric, metric_params={'V': C}, neighbors='approx')

        # We don't care about what happens later, just that the NN method is
        # properly called
        nndescent.side_effect = InterruptedError()
        try:
            tsne.prepare_initial(self.x)
        except InterruptedError:
            pass

        self.assertEqual(nndescent.call_count, 1)
        check_call_contains_kwargs(nndescent.mock_calls[0], {'metric': metric})
Exemple #15
0
def run_graph():
    graph = nx.read_edgelist(join(DATA_DIR, 'dolphins.edges'))
    affinities = NxGraphAffinities(graph)

    tsne = TSNE()
    y_coords = tsne.generate_initial_coordinates(affinities.P,
                                                 initialization='random')
    embedding = TSNEEmbedding(
        y_coords, affinities, {
            'callbacks': None,
            'negative_gradient_method': 'bh',
            'dof': 1,
            'momentum': 0,
            'learning_rate': 100
        })

    embedding.optimize(1000)

    plt.plot(embedding[:, 0], embedding[:, 1], 'o')
    plt.show()
Exemple #16
0
    def test_constructor(self, param_name, param_value, gradient_descent):
        # type: (str, Any, MagicMock) -> None
        # Make sure mock still conforms to signature
        gradient_descent.return_value = (1, MagicMock())

        # Early exaggeration training loop
        if param_name == 'early_exaggeration_iter':
            check_param_name = 'n_iter'
            call_idx = 0
        elif param_name == 'early_exaggeration':
            check_param_name = 'exaggeration'
            call_idx = 0
        elif param_name == 'initial_momentum':
            check_param_name = 'momentum'
            call_idx = 0
        # Main training loop
        elif param_name == 'n_iter':
            check_param_name = param_name
            call_idx = 1
        elif param_name == 'final_momentum':
            check_param_name = 'momentum'
            call_idx = 1
        # Early exaggeration training loop
        elif param_name == 'late_exaggeration_iter':
            check_param_name = 'n_iter'
            call_idx = 2
        elif param_name == 'late_exaggeration':
            check_param_name = 'exaggeration'
            call_idx = 2

        # If general parameter, should be applied to every call
        else:
            check_param_name = param_name
            call_idx = 0

        TSNE(**{param_name: param_value}).fit(self.x)

        self.assertEqual(3, gradient_descent.call_count)
        check_call_contains_kwargs(gradient_descent.mock_calls[call_idx],
                                   {check_param_name: param_value})
Exemple #17
0
    def test_iris(self):
        iris = datasets.load_iris()
        x, y = iris['data'], iris['target']

        # Evaluate tSNE optimization using a KNN classifier
        knn = KNeighborsClassifier(n_neighbors=10)
        tsne = TSNE(perplexity=30, initialization='random', random_state=0)

        # Prepare a random initialization
        embedding = tsne.prepare_initial(x)

        # KNN should do poorly on a random initialization
        knn.fit(embedding, y)
        predictions = knn.predict(embedding)
        self.assertTrue(accuracy_score(predictions, y) < .5)

        # Optimize the embedding for a small number of steps so tests run fast
        embedding.optimize(50, inplace=True)

        # Similar points should be grouped together, therefore KNN should do well
        knn.fit(embedding, y)
        predictions = knn.predict(embedding)
        self.assertTrue(accuracy_score(predictions, y) > .95)
Exemple #18
0
 def setUpClass(cls):
     cls.tsne = TSNE()
     random_state = np.random.RandomState(42)
     cls.x = random_state.randn(100, 4)
     cls.x_test = random_state.randn(25, 4)
def run(perplexity=30, learning_rate=100, n_jobs=4):
    x, y = get_mouse_60k()
    # x, y = get_fashion_mnist()

    angle = 0.5
    ee = 12
    metric = 'euclidean'

    print(x.shape)

    start = time.time()
    tsne = TSNE(
        perplexity=perplexity,
        learning_rate=learning_rate,
        early_exaggeration=ee,
        n_jobs=n_jobs,
        theta=angle,
        initialization='random',
        metric=metric,
        n_components=2,
        n_iter=750,
        early_exaggeration_iter=250,
        neighbors='approx',
        negative_gradient_method='fft',
        min_num_intervals=10,
        ints_in_interval=1,
        late_exaggeration_iter=0,
        late_exaggeration=2.,
        callbacks=ErrorLogger(),
    )
    # x = PCA(n_components=50).fit_transform(x)
    embedding = tsne.fit(x)
    print('-' * 80)
    print('tsne', time.time() - start)
    plt.title('tsne')
    plot(embedding, y)
    return

    x = np.ascontiguousarray(x.astype(np.float64))
    from fitsne import FItSNE
    start = time.time()
    embedding = FItSNE(
        x,
        2,
        perplexity=perplexity,
        stop_lying_iter=250,
        ann_not_vptree=True,
        early_exag_coeff=ee,
        nthreads=n_jobs,
        theta=angle,
    )
    print('-' * 80)
    print('fft interp %.4f' % (time.time() - start))
    plt.title('fft interp')
    plot(embedding, y)
    plt.show()
    return

    init = PCA(n_components=2).fit_transform(x)
    start = time.time()
    embedding = MulticoreTSNE(early_exaggeration=ee,
                              learning_rate=learning_rate,
                              perplexity=perplexity,
                              n_jobs=n_jobs,
                              cheat_metric=False,
                              angle=angle,
                              init=init,
                              metric=metric,
                              verbose=True).fit_transform(x)
    print('-' * 80)
    print('mctsne', time.time() - start)
    plt.title('mctsne')
    plot(embedding, y)
    plt.show()

    start = time.time()
    embedding = SKLTSNE(
        early_exaggeration=ee,
        learning_rate=learning_rate,
        angle=angle,
        perplexity=perplexity,
        init='pca',
        metric=metric,
    ).fit_transform(x)
    print('-' * 80)
    print('sklearn', time.time() - start)
    plt.title('sklearn')
    plot(embedding, y)
    plt.show()
Exemple #20
0
 def test_fitted_pca_model(self):
     """Using PCA initialization in `transform` should work when the initial
     embedding was initialized with PCA."""
     tsne = TSNE(initialization='pca')
     embedding = tsne.fit(self.x)
     embedding.transform(self.x_test, initialization='pca')
Exemple #21
0
 def setUpClass(cls):
     cls.tsne = TSNE()
     cls.x = np.random.randn(100, 4)
     cls.x_test = np.random.randn(25, 4)