예제 #1
0
    def test_can_pass_callbacks_to_tsne_object(self):
        callback = MagicMock()
        callback2 = MagicMock()
        # We don't want individual callbacks to be iterable
        del callback.__iter__
        del callback2.__iter__

        # Should be able to pass a single callback
        TSNE(callbacks=callback,
             callbacks_every_iters=1,
             early_exaggeration_iter=0,
             n_iter=1).fit(self.x)
        self.assertEqual(callback.call_count, 1)

        # Should be able to pass a list callbacks
        callback.reset_mock()
        TSNE(callbacks=[callback],
             callbacks_every_iters=1,
             early_exaggeration_iter=0,
             n_iter=1).fit(self.x)
        self.assertEqual(callback.call_count, 1)

        # Should be able to change the callback on the object
        callback.reset_mock()
        tsne = TSNE(callbacks=callback,
                    callbacks_every_iters=1,
                    early_exaggeration_iter=0,
                    n_iter=1)
        tsne.callbacks = callback2
        tsne.fit(self.x)
        callback.assert_not_called()
        self.assertEqual(callback2.call_count, 1)
예제 #2
0
    def test_same_results_on_fixed_random_state_pca_init(self):
        """Results should be exactly the same if we provide a random state."""
        tsne1 = TSNE(random_state=1, initialization='pca')
        embedding1 = tsne1.fit(self.x)

        tsne2 = TSNE(random_state=1, initialization='pca')
        embedding2 = tsne2.fit(self.x)

        np.testing.assert_array_equal(
            embedding1, embedding2,
            'Same random state produced different initial embeddings')
예제 #3
0
 def test_unfitted_pca_model(self):
     """Using PCA initialization in `transform` should fail when the initial
     embedding was initialized with PCA."""
     tsne = TSNE(initialization='random')
     embedding = tsne.fit(self.x)
     # Transforming using `pca` init on embedding that did not use
     # `pca` init did not fail
     with self.assertRaises(AssertionError):
         embedding.transform(self.x_test, initialization='pca')
예제 #4
0
def transform(n_jobs=4, grad='bh', neighbors='approx'):
    # iris = datasets.load_iris()
    # x, y = iris['data'], iris['target']
    x, y = get_mnist(20000)

    x_train, x_test, y_train, y_test = train_test_split(x,
                                                        y,
                                                        test_size=0.33,
                                                        random_state=42)

    tsne = TSNE(
        n_components=2,
        perplexity=30,
        learning_rate=100,
        early_exaggeration=12,
        n_jobs=n_jobs,
        theta=0.5,
        initialization='random',
        metric='euclidean',
        n_iter=750,
        early_exaggeration_iter=250,
        neighbors=neighbors,
        negative_gradient_method=grad,
        min_num_intervals=10,
        ints_in_interval=2,
        late_exaggeration_iter=0,
        late_exaggeration=4,
        callbacks=[ErrorLogger()],
    )
    start = time.time()
    embedding = tsne.fit(x_train)
    print('tsne train', time.time() - start)

    plt.subplot(121)
    plot(embedding, y_train, show=False, ms=3)

    start = time.time()
    partial_embedding = embedding.transform(x_test, perplexity=20)
    # partial_embedding = embedding.get_partial_embedding_for(
    #     x_test, perplexity=10, initialization='random')
    # partial_embedding.optimize(200, exaggeration=2, inplace=True, momentum=0.1)
    print('tsne transform', time.time() - start)

    plt.subplot(122)
    plot(embedding, y_train, show=False, ms=3, alpha=0.25)
    plt.gca().set_color_cycle(None)
    plot(partial_embedding, y_test, show=False, ms=3, alpha=0.8)

    plt.show()
예제 #5
0
    def test_partial_embedding_optimize(self, param_name, param_value,
                                        gradient_descent):
        # type: (str, Any, MagicMock) -> None
        # Make sure mock still conforms to signature
        gradient_descent.return_value = (1, MagicMock())

        # Perform initial embedding - this is tested above
        tsne = TSNE()
        embedding = tsne.fit(self.x)
        gradient_descent.reset_mock()

        # `optimize` requires us to specify the `n_iter`
        params = {'n_iter': 50, param_name: param_value}

        partial_embedding = embedding.prepare_partial(self.x_test)
        partial_embedding.optimize(**params, inplace=True)

        self.assertEqual(1, gradient_descent.call_count)
        check_call_contains_kwargs(gradient_descent.mock_calls[0], params)
예제 #6
0
    def test_embedding_transform(self, param_name, param_value,
                                 gradient_descent):
        # type: (str, Any, MagicMock) -> None
        # Make sure mock still conforms to signature
        gradient_descent.return_value = (1, MagicMock())

        # Perform initial embedding - this is tested above
        tsne = TSNE()
        embedding = tsne.fit(self.x)
        gradient_descent.reset_mock()

        embedding.transform(self.x_test, **{param_name: param_value})

        # Early exaggeration training loop
        if param_name == 'early_exaggeration_iter':
            check_param_name = 'n_iter'
            call_idx = 0
        elif param_name == 'early_exaggeration':
            check_param_name = 'exaggeration'
            call_idx = 0
        elif param_name == 'initial_momentum':
            check_param_name = 'momentum'
            call_idx = 0
        # Main training loop
        elif param_name == 'n_iter':
            check_param_name = param_name
            call_idx = 1
        elif param_name == 'final_momentum':
            check_param_name = 'momentum'
            call_idx = 1

        # If general parameter, should be applied to every call
        else:
            check_param_name = param_name
            call_idx = 0

        self.assertEqual(2, gradient_descent.call_count)
        check_call_contains_kwargs(gradient_descent.mock_calls[call_idx],
                                   {check_param_name: param_value})
예제 #7
0
def run(perplexity=30, learning_rate=100, n_jobs=4):
    x, y = get_mouse_60k()
    # x, y = get_fashion_mnist()

    angle = 0.5
    ee = 12
    metric = 'euclidean'

    print(x.shape)

    start = time.time()
    tsne = TSNE(
        perplexity=perplexity,
        learning_rate=learning_rate,
        early_exaggeration=ee,
        n_jobs=n_jobs,
        theta=angle,
        initialization='random',
        metric=metric,
        n_components=2,
        n_iter=750,
        early_exaggeration_iter=250,
        neighbors='approx',
        negative_gradient_method='fft',
        min_num_intervals=10,
        ints_in_interval=1,
        late_exaggeration_iter=0,
        late_exaggeration=2.,
        callbacks=ErrorLogger(),
    )
    # x = PCA(n_components=50).fit_transform(x)
    embedding = tsne.fit(x)
    print('-' * 80)
    print('tsne', time.time() - start)
    plt.title('tsne')
    plot(embedding, y)
    return

    x = np.ascontiguousarray(x.astype(np.float64))
    from fitsne import FItSNE
    start = time.time()
    embedding = FItSNE(
        x,
        2,
        perplexity=perplexity,
        stop_lying_iter=250,
        ann_not_vptree=True,
        early_exag_coeff=ee,
        nthreads=n_jobs,
        theta=angle,
    )
    print('-' * 80)
    print('fft interp %.4f' % (time.time() - start))
    plt.title('fft interp')
    plot(embedding, y)
    plt.show()
    return

    init = PCA(n_components=2).fit_transform(x)
    start = time.time()
    embedding = MulticoreTSNE(early_exaggeration=ee,
                              learning_rate=learning_rate,
                              perplexity=perplexity,
                              n_jobs=n_jobs,
                              cheat_metric=False,
                              angle=angle,
                              init=init,
                              metric=metric,
                              verbose=True).fit_transform(x)
    print('-' * 80)
    print('mctsne', time.time() - start)
    plt.title('mctsne')
    plot(embedding, y)
    plt.show()

    start = time.time()
    embedding = SKLTSNE(
        early_exaggeration=ee,
        learning_rate=learning_rate,
        angle=angle,
        perplexity=perplexity,
        init='pca',
        metric=metric,
    ).fit_transform(x)
    print('-' * 80)
    print('sklearn', time.time() - start)
    plt.title('sklearn')
    plot(embedding, y)
    plt.show()
예제 #8
0
 def test_fitted_pca_model(self):
     """Using PCA initialization in `transform` should work when the initial
     embedding was initialized with PCA."""
     tsne = TSNE(initialization='pca')
     embedding = tsne.fit(self.x)
     embedding.transform(self.x_test, initialization='pca')