def test_low_variance(self): """Low variance of the initial embedding is very important for the convergence of tSNE.""" # Cycle through various initializations initializations = ['random', 'pca'] allowed = 1e-3 for init in initializations: tsne = TSNE(initialization=init, perplexity=2) embedding = tsne.prepare_initial(self.x) np.testing.assert_array_less( np.var(embedding, axis=0), allowed, 'using the `%s` initialization' % init)
def check_error_approx(): x, y = get_mouse_60k(1500) tsne = TSNE( perplexity=20, learning_rate=100, early_exaggeration=12, n_jobs=4, theta=0.5, initialization='pca', metric='euclidean', n_components=2, n_iter=750, early_exaggeration_iter=250, neighbors='exact', negative_gradient_method='bh', min_num_intervals=10, ints_in_interval=2, late_exaggeration_iter=0, late_exaggeration=4, callbacks=ErrorLogger(), ) embedding = tsne.prepare_initial(x, initialization='random') errors = ErrorApproximations(embedding.affinities.P) logger = ErrorLogger() embedding.optimize( 250, exaggeration=12, callbacks=[errors, logger], callbacks_every_iters=5, inplace=True, ) embedding.optimize( 750, exaggeration=None, callbacks=[errors, logger], callbacks_every_iters=5, inplace=True, ) errors.report() plot(embedding, y) x = list(range(len(errors.exact_errors))) plt.semilogy(x, errors.exact_errors, label='Exact') plt.semilogy(x, errors.bh_errors, label='BH') plt.semilogy(x, errors.fft_errors, label='FFT') plt.legend() plt.show()
def test_embedding_optimize(self, param_name, param_value, gradient_descent): # type: (str, Any, MagicMock) -> None # Make sure mock still conforms to signature gradient_descent.return_value = (1, MagicMock()) # `optimize` requires us to specify the `n_iter` params = {'n_iter': 50, param_name: param_value} tsne = TSNE() embedding = tsne.prepare_initial(self.x) embedding.optimize(**params, inplace=True) self.assertEqual(1, gradient_descent.call_count) check_call_contains_kwargs(gradient_descent.mock_calls[0], params)
def test_iris(self): iris = datasets.load_iris() x, y = iris['data'], iris['target'] # Evaluate tSNE optimization using a KNN classifier knn = KNeighborsClassifier(n_neighbors=10) tsne = TSNE(perplexity=30, initialization='random', random_state=0) # Prepare a random initialization embedding = tsne.prepare_initial(x) # KNN should do poorly on a random initialization knn.fit(embedding, y) predictions = knn.predict(embedding) self.assertTrue(accuracy_score(predictions, y) < .5) # Optimize the embedding for a small number of steps so tests run fast embedding.optimize(50, inplace=True) # Similar points should be grouped together, therefore KNN should do well knn.fit(embedding, y) predictions = knn.predict(embedding) self.assertTrue(accuracy_score(predictions, y) > .95)