def test_barnes_hut_angle(): # When Barnes-Hut's angle=0 this corresponds to the exact method. angle = 0.0 perplexity = 10 n_samples = 100 for n_components in [2, 3]: n_features = 5 degrees_of_freedom = float(n_components - 1.0) random_state = check_random_state(0) data = random_state.randn(n_samples, n_features) distances = pairwise_distances(data) params = random_state.randn(n_samples, n_components) P = _joint_probabilities(distances, perplexity, verbose=0) kl_exact, grad_exact = _kl_divergence(params, P, degrees_of_freedom, n_samples, n_components) n_neighbors = n_samples - 1 distances_csr = NearestNeighbors().fit(data).kneighbors_graph( n_neighbors=n_neighbors, mode='distance') P_bh = _joint_probabilities_nn(distances_csr, perplexity, verbose=0) kl_bh, grad_bh = _kl_divergence_bh(params, P_bh, degrees_of_freedom, n_samples, n_components, angle=angle, skip_num_points=0, verbose=0) P = squareform(P) P_bh = P_bh.toarray() assert_array_almost_equal(P_bh, P, decimal=5) assert_almost_equal(kl_exact, kl_bh, decimal=3)
def fit(self): """ Performs tSNE transformation. Uses joint probability distribution to create reduced feature space. :return: tSNE transformed embedding """ # store the number of samples n_samples = self.X.shape[0] # Compute euclidean distance btwn each data point distances = pairwise_distances(self.X, metric='euclidean', squared=True) # Compute joint probabilities p_ij from distances. P = _joint_probabilities(distances=distances, desired_perplexity=self.perplexity, verbose=False) # create reduced feature space using randomly selected Gaussian values # The embedding is initialized with iid samples from Gaussians with standard deviation 1e-4. X_embedded = 1e-4 * np.random.mtrand._rand.randn( n_samples, self.n_components).astype(np.float32) # degrees_of_freedom = n_components - 1 comes from # "Learning a Parametric Embedding by Preserving Local Structure" # Laurens van der Maaten, 2009. degrees_of_freedom = max(self.n_components - 1, 1) return self.tsne(P, degrees_of_freedom, n_samples, X_emb=X_embedded)
def test_barnes_hut_angle(): # When Barnes-Hut's angle=0 this corresponds to the exact method. angle = 0.0 perplexity = 10 n_samples = 100 for n_components in [2, 3]: n_features = 5 degrees_of_freedom = float(n_components - 1.0) random_state = check_random_state(0) distances = random_state.randn(n_samples, n_features) distances = distances.astype(np.float32) distances = distances.dot(distances.T) np.fill_diagonal(distances, 0.0) params = random_state.randn(n_samples, n_components) P = _joint_probabilities(distances, perplexity, False) kl, gradex = _kl_divergence(params, P, degrees_of_freedom, n_samples, n_components) k = n_samples - 1 bt = BallTree(distances) distances_nn, neighbors_nn = bt.query(distances, k=k + 1) neighbors_nn = neighbors_nn[:, 1:] Pbh = _joint_probabilities_nn(distances, neighbors_nn, perplexity, False) kl, gradbh = _kl_divergence_bh(params, Pbh, neighbors_nn, degrees_of_freedom, n_samples, n_components, angle=angle, skip_num_points=0, verbose=False) assert_array_almost_equal(Pbh, P, decimal=5) assert_array_almost_equal(gradex, gradbh, decimal=5)
def _run_answer_test(pos_input, pos_output, neighbors, grad_output, verbose=False, perplexity=0.1, skip_num_points=0): distances = pairwise_distances(pos_input).astype(np.float32) args = distances, perplexity, verbose pos_output = pos_output.astype(np.float32) neighbors = neighbors.astype(np.int64) pij_input = _joint_probabilities(*args) pij_input = squareform(pij_input).astype(np.float32) grad_bh = np.zeros(pos_output.shape, dtype=np.float32) from scipy.sparse import csr_matrix P = csr_matrix(pij_input) neighbors = P.indices.astype(np.int64) indptr = P.indptr.astype(np.int64) _barnes_hut_tsne.gradient(P.data, pos_output, neighbors, indptr, grad_bh, 0.5, 2, 1, skip_num_points=0) assert_array_almost_equal(grad_bh, grad_output, decimal=4)
def similarity_matrix(): def _joint_probabilities_constant_sigma(D, sigma): P = np.exp(-D**2/2 * sigma**2) P /= np.sum(P, axis=1) return P # Pairwise_distances between all data points D = pairwise_distances(X, squared=True) # Similarity with constant sigma P_constant = _joint_probabilities_constant_sigma(D, .002) # Similarity with variable sigma P_binary = _joint_probabilities(D, 30., False) # The output of this function needs to be reshaped to a square matrix P_binary_s = squareform(P_binary) plt.figure(figsize=(12, 4)) pal = sns.light_palette("blue", as_cmap=True) plt.subplot(131) plt.imshow(D[::10, ::10], interpolation='none', cmap=pal) plt.axis('off') plt.title('Distance matrix', fontdict={'fontsize':16}) plt.subplot(132) plt.imshow(P_constant[::10, ::10], interpolation='none', cmap=pal) plt.axis('off') plt.title("$p_{j|i}$ (constant $\sigma$)", fontdict={'fontsize':16}) plt.subplot(133) plt.imshow(P_binary_s[::10, ::10], interpolation='none', cmap=pal) plt.axis('off') plt.title("$p_{j|i}$ (variable $\sigma$)", fontdict={'fontsize':16})
def test_gradient(): # Test gradient of Kullback-Leibler divergence. random_state = check_random_state(0) n_samples = 50 n_features = 2 n_components = 2 alpha = 1.0 distances = random_state.randn(n_samples, n_features).astype(np.float32) distances = np.abs(distances.dot(distances.T)) np.fill_diagonal(distances, 0.0) X_embedded = random_state.randn(n_samples, n_components).astype(np.float32) P = _joint_probabilities(distances, desired_perplexity=25.0, verbose=0) def fun(params): return _kl_divergence(params, P, alpha, n_samples, n_components)[0] def grad(params): return _kl_divergence(params, P, alpha, n_samples, n_components)[1] assert_almost_equal(check_grad(fun, grad, X_embedded.ravel()), 0.0, decimal=5)
def calc_latent_kl(vis_latents, aud_latents, perplexity): logging.info( "Calculating joint probability distribution of visual latent space...") vis_dists = calc_dists(vis_latents) vis_distr = tsne._joint_probabilities(distances=vis_dists, desired_perplexity=perplexity, verbose=True) logging.info( "Calculating joint probability distribution of auditive latent space..." ) aud_dists = calc_dists(aud_latents) aud_distr = tsne._joint_probabilities(distances=aud_dists, desired_perplexity=perplexity, verbose=True) kl_va = 2.0 * np.dot(vis_distr, np.log(vis_distr / aud_distr)) kl_av = 2.0 * np.dot(aud_distr, np.log(aud_distr / vis_distr)) logging.info( "Calculated KL divergences of audio-visual latent spaces with perplexity %d: %.2f VA / %.2f AV." % (perplexity, kl_va, kl_av)) return kl_va, kl_av
def _run_answer_test(pos_input, pos_output, neighbors, grad_output, verbose=False, perplexity=0.1, skip_num_points=0): distances = pairwise_distances(pos_input).astype(np.float32) args = distances, perplexity, verbose pos_output = pos_output.astype(np.float32) neighbors = neighbors.astype(np.int64) pij_input = _joint_probabilities(*args) pij_input = squareform(pij_input).astype(np.float32) grad_bh = np.zeros(pos_output.shape, dtype=np.float32) _barnes_hut_tsne.gradient(pij_input, pos_output, neighbors, grad_bh, 0.5, 2, 1, skip_num_points=0) assert_array_almost_equal(grad_bh, grad_output, decimal=4)
def fit(X): n_samples = X.shape[0] # Compute euclidean distance distances = pairwise_distances(X, metric='euclidean', squared=True) # Compute joint probabilities p_ij from distances. P = _joint_probabilities(distances=distances, desired_perplexity=perplexity, verbose=False) # The embedding is initialized with iid samples from Gaussians with standard deviation 1e-4. X_embedded = 1e-4 * np.random.mtrand._rand.randn(n_samples, n_components).astype(np.float32) # degrees_of_freedom = n_components - 1 comes from # "Learning a Parametric Embedding by Preserving Local Structure" # Laurens van der Maaten, 2009. degrees_of_freedom = max(n_components - 1, 1) return _tsne(P, degrees_of_freedom, n_samples, X_embedded=X_embedded)
def test_barnes_hut_angle(): # When Barnes-Hut's angle=0 this corresponds to the exact method. angle = 0.0 perplexity = 10 n_samples = 100 for n_components in [2, 3]: n_features = 5 degrees_of_freedom = float(n_components - 1.0) random_state = check_random_state(0) distances = random_state.randn(n_samples, n_features) distances = distances.astype(np.float32) distances = abs(distances.dot(distances.T)) np.fill_diagonal(distances, 0.0) params = random_state.randn(n_samples, n_components) P = _joint_probabilities(distances, perplexity, verbose=0) kl_exact, grad_exact = _kl_divergence(params, P, degrees_of_freedom, n_samples, n_components) k = n_samples - 1 bt = BallTree(distances) distances_nn, neighbors_nn = bt.query(distances, k=k + 1) neighbors_nn = neighbors_nn[:, 1:] distances_nn = np.array( [distances[i, neighbors_nn[i]] for i in range(n_samples)]) assert np.all(distances[0, neighbors_nn[0]] == distances_nn[0]),\ abs(distances[0, neighbors_nn[0]] - distances_nn[0]) P_bh = _joint_probabilities_nn(distances_nn, neighbors_nn, perplexity, verbose=0) kl_bh, grad_bh = _kl_divergence_bh(params, P_bh, degrees_of_freedom, n_samples, n_components, angle=angle, skip_num_points=0, verbose=0) P = squareform(P) P_bh = P_bh.toarray() assert_array_almost_equal(P_bh, P, decimal=5) assert_almost_equal(kl_exact, kl_bh, decimal=3)
def fit_transform(self, data): n_samples = data.shape[0] distances = pairwise_distances(data, metric=self.metric) P = _joint_probabilities(distances=distances, desired_perplexity=self.perplexity, verbose=False) # Reduced feature space X_embedded = 1e-4 * np.random.mtrand._rand.randn( n_samples, self.n_components).astype(np.float32) degrees_of_freedom = max(self.n_components - 1, 1) return self._tsne(P, degrees_of_freedom, n_samples, X_embedded=X_embedded)
def _run_answer_test(pos_input, pos_output, neighbors, grad_output, verbose=False, perplexity=0.1, skip_num_points=0): distances = pairwise_distances(pos_input).astype(np.float32) args = distances, perplexity, verbose pos_output = pos_output.astype(np.float32) neighbors = neighbors.astype(np.int64, copy=False) pij_input = _joint_probabilities(*args) pij_input = squareform(pij_input).astype(np.float32) grad_bh = np.zeros(pos_output.shape, dtype=np.float32) from scipy.sparse import csr_matrix P = csr_matrix(pij_input) neighbors = P.indices.astype(np.int64) indptr = P.indptr.astype(np.int64) _barnes_hut_tsne.gradient(P.data, pos_output, neighbors, indptr, grad_bh, 0.5, 2, 1, skip_num_points=0) assert_array_almost_equal(grad_bh, grad_output, decimal=4)
def fit(X): # Almacenamos el número de muestras para futura referencia n_samples = X.shape[0] # Distancia euclideana distances = pairwise_distances(X, metric='euclidean', squared=True) # Probabilidades conjuntas p_ij de las distancias P = _joint_probabilities(distances=distances, desired_perplexity=perplexity, verbose=False) # Los embeddings son inicializados con iid muetras de Gaussianos con desviación estander 1e-4. X_embedded = 1e-4 * np.random.mtrand._rand.randn(n_samples, n_components).astype(np.float32) # degrees_of_freedom = n_components - 1 viene de # "Learning a Parametric Embedding by Preserving Local Structure" # Laurens van der Maaten, 2009. degrees_of_freedom = max(n_components - 1, 1) return _tsne(P, degrees_of_freedom, n_samples, X_embedded=X_embedded)
def test_gradient(): """Test gradient of Kullback-Leibler divergence.""" random_state = check_random_state(0) n_samples = 50 n_features = 2 n_components = 2 alpha = 1.0 distances = random_state.randn(n_samples, n_features) distances = distances.dot(distances.T) np.fill_diagonal(distances, 0.0) X_embedded = random_state.randn(n_samples, n_components) P = _joint_probabilities(distances, desired_perplexity=25.0, verbose=0) fun = lambda params: _kl_divergence(params, P, alpha, n_samples, n_components)[0] grad = lambda params: _kl_divergence(params, P, alpha, n_samples, n_components)[1] assert_almost_equal(check_grad(fun, grad, X_embedded.ravel()), 0.0, decimal=5)
def fit_transform(self, X, perplexity=30, n_components=2): """ Fits a TSNE model to the data. :param X: data to be reduced in dimensionality """ self.n_samples = X.shape[0] self.n_components = n_components # compute pairwise distances distances = pairwise_distances(X, metric="euclidean", squared=True) # compute joint probabilities p_ij from distances P = _joint_probabilities(distances=distances, \ desired_perplexity=perplexity, verbose=False) # init low-dim embeddings with standard deviation 1e-4 X_embedded = 1e-4 * np.random.mtrand._rand.randn(self.n_samples, n_components) \ .astype(np.float32) degrees_of_freedom = max(n_components - 1, 1) return self.__tsne(P, degrees_of_freedom, X_embedded=X_embedded)
def test_barnes_hut_angle(): # When Barnes-Hut's angle=0 this corresponds to the exact method. angle = 0.0 perplexity = 10 n_samples = 100 for n_components in [2, 3]: n_features = 5 degrees_of_freedom = float(n_components - 1.0) random_state = check_random_state(0) distances = random_state.randn(n_samples, n_features) distances = distances.astype(np.float32) distances = abs(distances.dot(distances.T)) np.fill_diagonal(distances, 0.0) params = random_state.randn(n_samples, n_components) P = _joint_probabilities(distances, perplexity, verbose=0) kl_exact, grad_exact = _kl_divergence(params, P, degrees_of_freedom, n_samples, n_components) k = n_samples - 1 bt = BallTree(distances) distances_nn, neighbors_nn = bt.query(distances, k=k + 1) neighbors_nn = neighbors_nn[:, 1:] distances_nn = np.array([distances[i, neighbors_nn[i]] for i in range(n_samples)]) assert np.all(distances[0, neighbors_nn[0]] == distances_nn[0]),\ abs(distances[0, neighbors_nn[0]] - distances_nn[0]) P_bh = _joint_probabilities_nn(distances_nn, neighbors_nn, perplexity, verbose=0) kl_bh, grad_bh = _kl_divergence_bh(params, P_bh, degrees_of_freedom, n_samples, n_components, angle=angle, skip_num_points=0, verbose=0) P = squareform(P) P_bh = P_bh.toarray() assert_array_almost_equal(P_bh, P, decimal=5) assert_almost_equal(kl_exact, kl_bh, decimal=3)
for i in range(10)]) def _joint_probabilities_constant_sigma(D, sigma): P = np.exp(-D**2/2 * sigma**2) P /= np.sum(P, axis=1) return P # pairwise distances between all data points D = pairwise_distances(X,squared=True) # Similarity with constant sigma P_constant = _joint_probabilities_constant_sigma(D, .002) # Similarity with variable sigma P_binary = _joint_probabilities(D, 30., False) # output of this function needs to be reshaped to a square matrix P_binary_s = squareform(P_binary) # plot this similarity matrix plt.figure(figsize=(12,4)) pal = sns.light_palette("blue",as_cmap=True) plt.subplot(131) plt.imshow(D[::10, ::10], interpolation='none',cmap=pal) plt.axis('off') plt.title("Distance matrix", fontdict={'fontsize': 16}) plt.subplot(132) plt.imshow(P_constant[::10,::10],interpolation='none',cmap=pal)
#!/usr/bin/env python2 from sklearn.metrics import euclidean_distances from sklearn.manifold import t_sne import numpy as np import _snack as snack for i in xrange(10): X = np.random.randn(1000, 2) * 10 params = X.ravel() D = euclidean_distances(X) probs1 = t_sne._joint_probabilities(D, 30, False) probs2 = snack.my_joint_probabilities(D, 30, False) c1,grad1 = t_sne._kl_divergence(params, probs1, 1.0, len(X), 2) c2,grad2 = snack.my_kl_divergence(params, probs1, 1.0, len(X), 2.0) print "Test", i print "Difference norm:", np.linalg.norm(probs1 - probs2) print "Difference norm:", np.linalg.norm(grad1 - grad2) print "Difference norm:", c1-c2 assert np.allclose(probs1, probs2) assert np.allclose(grad1, grad2) assert np.allclose(c1, c2)
def compute_joint_probabilities(X, perplexity=30, metric='euclidean', method='exact', adj=None, verbose=0): """ Computes the joint probability matrix P from a feature matrix X of size n x f Adapted from sklearn.manifold.t_sne """ # Compute pairwise distances if verbose > 0: print('Computing pairwise distances...') if method == 'exact': if metric == 'precomputed': D = X elif metric == 'euclidean': D = pairwise_distances(X, metric=metric, squared=True) elif metric == 'cosine': D = pairwise_distances(X, metric=metric) elif metric == 'shortest_path': assert adj is not None D = get_shortest_path_matrix(adj, verbose=verbose) P = _joint_probabilities(D, desired_perplexity=perplexity, verbose=verbose) assert np.all(np.isfinite(P)), "All probabilities should be finite" assert np.all(P >= 0), "All probabilities should be non-negative" assert np.all(P <= 1), ("All probabilities should be less " "or then equal to one") P = squareform(P) else: # Cpmpute the number of nearest neighbors to find. # LvdM uses 3 * perplexity as the number of neighbors. # In the event that we have very small # of points # set the neighbors to n - 1. n_samples = X.shape[0] k = min(n_samples - 1, int(3. * perplexity + 1)) # Find the nearest neighbors for every point knn = NearestNeighbors(algorithm='auto', n_neighbors=k, metric=metric) t0 = time() knn.fit(X) duration = time() - t0 if verbose: print("[t-SNE] Indexed {} samples in {:.3f}s...".format( n_samples, duration)) t0 = time() distances_nn, neighbors_nn = knn.kneighbors( None, n_neighbors=k) duration = time() - t0 if verbose: print("[t-SNE] Computed neighbors for {} samples in {:.3f}s..." .format(n_samples, duration)) # Free the memory used by the ball_tree del knn if metric == "euclidean": # knn return the euclidean distance but we need it squared # to be consistent with the 'exact' method. Note that the # the method was derived using the euclidean method as in the # input space. Not sure of the implication of using a different # metric. distances_nn **= 2 # compute the joint probability distribution for the input space P = _joint_probabilities_nn(distances_nn, neighbors_nn, perplexity, verbose) P = P.toarray() # Convert to torch tensor P = torch.from_numpy(P).type(dtypeFloat) return P
def _fit(self, X, skip_num_points=0): """Fit the model using X as training data. Note that sparse arrays can only be handled by method='exact'. It is recommended that you convert your sparse array to dense (e.g. `X.toarray()`) if it fits in memory, or otherwise using a dimensionality reduction technique (e.g. TruncatedSVD). Parameters ---------- X : array, shape (n_samples, n_features) or (n_samples, n_samples) If the metric is 'precomputed' X must be a square distance matrix. Otherwise it contains a sample per row. Note that this when method='barnes_hut', X cannot be a sparse array and if need be will be converted to a 32 bit float array. Method='exact' allows sparse arrays and 64bit floating point inputs. skip_num_points : int (optional, default:0) This does not compute the gradient for points with indices below `skip_num_points`. This is useful when computing transforms of new data where you'd like to keep the old data fixed. """ if self.method not in ['barnes_hut', 'exact']: raise ValueError("'method' must be 'barnes_hut' or 'exact'") if self.angle < 0.0 or self.angle > 1.0: raise ValueError("'angle' must be between 0.0 - 1.0") if self.method == 'barnes_hut' and sp.issparse(X): raise TypeError('A sparse matrix was passed, but dense ' 'data is required for method="barnes_hut". Use ' 'X.toarray() to convert to a dense numpy array if ' 'the array is small enough for it to fit in ' 'memory. Otherwise consider dimensionality ' 'reduction techniques (e.g. TruncatedSVD)') else: X = check_array(X, accept_sparse=['csr', 'csc', 'coo'], dtype=np.float64) random_state = check_random_state(self.random_state) if self.early_exaggeration < 1.0: raise ValueError("early_exaggeration must be at least 1, but is " "%f" % self.early_exaggeration) if self.n_iter < 200: raise ValueError("n_iter should be at least 200") if self.metric == "precomputed": if isinstance(self.init, string_types) and self.init == 'pca': raise ValueError("The parameter init=\"pca\" cannot be used " "with metric=\"precomputed\".") if X.shape[0] != X.shape[1]: raise ValueError("X should be a square distance matrix") distances = X else: if self.verbose: print("[t-SNE] Computing pairwise distances...") if self.metric == "euclidean": distances = pairwise_distances(X, metric=self.metric, squared=True) else: distances = pairwise_distances(X, metric=self.metric) if not np.all(distances >= 0): raise ValueError("All distances should be positive, either " "the metric or precomputed distances given " "as X are not correct") # Degrees of freedom of the Student's t-distribution. The suggestion # degrees_of_freedom = n_components - 1 comes from # "Learning a Parametric Embedding by Preserving Local Structure" # Laurens van der Maaten, 2009. degrees_of_freedom = max(self.n_components - 1.0, 1) n_samples = X.shape[0] # the number of nearest neighbors to find k = min(n_samples - 1, int(3. * self.perplexity + 1)) neighbors_nn = None if self.method == 'barnes_hut': if self.verbose: print("[t-SNE] Computing %i nearest neighbors..." % k) if self.metric == 'precomputed': # Use the precomputed distances to find # the k nearest neighbors and their distances neighbors_nn = np.argsort(distances, axis=1)[:, :k] elif self.rho >= 1: # Find the nearest neighbors for every point bt = BallTree(X) # LvdM uses 3 * perplexity as the number of neighbors # And we add one to not count the data point itself # In the event that we have very small # of points # set the neighbors to n - 1 distances_nn, neighbors_nn = bt.query(X, k=k + 1) neighbors_nn = neighbors_nn[:, 1:] elif self.rho < 1: # Use pyFLANN to find the nearest neighbors myflann = FLANN() testset = X params = myflann.build_index(testset, algorithm="autotuned", target_precision=self.rho, log_level='info') neighbors_nn, distances = myflann.nn_index( testset, k + 1, checks=params["checks"]) neighbors_nn = neighbors_nn[:, 1:] P = _joint_probabilities_nn(distances, neighbors_nn, self.perplexity, self.verbose) else: P = _joint_probabilities(distances, self.perplexity, self.verbose) assert np.all(np.isfinite(P)), "All probabilities should be finite" assert np.all(P >= 0), "All probabilities should be zero or positive" assert np.all(P <= 1), ("All probabilities should be less " "or then equal to one") if isinstance(self.init, np.ndarray): X_embedded = self.init elif self.init == 'pca': pca = PCA(n_components=self.n_components, svd_solver='randomized', random_state=random_state) X_embedded = pca.fit_transform(X) elif self.init == 'random': X_embedded = None else: raise ValueError("Unsupported initialization scheme: %s" % self.init) return self._tsne(P, degrees_of_freedom, n_samples, random_state, X_embedded=X_embedded, neighbors=neighbors_nn, skip_num_points=skip_num_points)
def plot_tsne_result(X, y, n_components): positions = [] errors = [] def _gradient_descent(objective, p0, it, n_iter, n_iter_check=1, n_iter_without_progress=300, momentum=0.8, learning_rate=200.0, min_gain=0.01, min_grad_norm=1e-7, verbose=0, args=None, kwargs=None): if args is None: args = [] if kwargs is None: kwargs = {} p = p0.copy().ravel() update = np.zeros_like(p) gains = np.ones_like(p) error = np.finfo(np.float).max best_error = np.finfo(np.float).max best_iter = i = it tic = time() for i in range(it, n_iter): positions.append(p.copy()) error, grad = objective(p, *args, **kwargs) errors.append(error) grad_norm = linalg.norm(grad) inc = update * grad < 0.0 dec = np.invert(inc) gains[inc] += 0.2 gains[dec] *= 0.8 np.clip(gains, min_gain, np.inf, out=gains) grad *= gains update = momentum * update - learning_rate * grad p += update if (i + 1) % n_iter_check == 0: toc = time() duration = toc - tic tic = toc if verbose >= 2: print("[t-SNE] Iteration %d: error = %.7f," " gradient norm = %.7f" " (%s iterations in %0.3fs)" % (i + 1, error, grad_norm, n_iter_check, duration)) if error < best_error: best_error = error best_iter = i elif i - best_iter > n_iter_without_progress: if verbose >= 2: print("[t-SNE] Iteration %d: did not make any progress " "during the last %d episodes. Finished." % (i + 1, n_iter_without_progress)) break if grad_norm <= min_grad_norm: if verbose >= 2: print("[t-SNE] Iteration %d: gradient norm %f. Finished." % (i + 1, grad_norm)) break return p, error, i D = pairwise_distances(X, squared=True) P_binary = _joint_probabilities(D, 30., False) P_binary_s = squareform(P_binary) positions.clear() errors.clear() manifold.t_sne._gradient_descent = _gradient_descent manifold.TSNE(n_components=n_components, random_state=100).fit_transform(X) if n_components == 3: X_iter = np.dstack(position.reshape(-1, 3) for position in positions) elif n_components == 2: X_iter = np.dstack(position.reshape(-1, 2) for position in positions) cmap = sns.light_palette("blue", as_cmap=True) fig = plt.figure(figsize=(12, 12)) if X.shape[1] == 3: ax1 = fig.add_subplot(3, 4, 1, projection='3d') plot_data_3d_classification(X, y, ax=ax1, new_window=False, title="Original Data") elif X.shape[1] == 2: ax1 = fig.add_subplot(3, 4, 1) plot_data_2d_classification(X, y, ax=ax1, new_window=False, title="Original Data") ax2 = fig.add_subplot(3, 4, 2) plot_distance_matrix(P_binary_s, ax2, cmap, 'Pairwise Similarities') iter_size = int(len(positions) / 5) k = 2 for i in range(5): iter_index = i * iter_size tmp = X_iter[..., iter_index] err = round(errors[iter_index], 2) title = "Iter: " + str(iter_index) + " Loss:" + str(err) k = k + 1 if X_iter.shape[1] == 3: ax3 = fig.add_subplot(3, 4, k, projection='3d') plot_data_3d_classification(tmp, y, ax=ax3, new_window=False, title=title) elif X_iter.shape[1] == 2: ax3 = fig.add_subplot(3, 4, k) plot_data_2d_classification(tmp, y, ax=ax3, new_window=False, title=title) k = k + 1 ax4 = fig.add_subplot(3, 4, k) n = 1. / (pdist(tmp, "sqeuclidean") + 1) Q = n / (2.0 * np.sum(n)) Q = squareform(Q) plot_distance_matrix(Q, ax4, cmap, title=title) plt.subplots_adjust(wspace=0.1, hspace=0.5)
def preprocess(x, metric='euclidean', perplexity=30): dist = pairwise_distances(x, metric=metric, squared=True) p = _joint_probabilities(dist, perplexity, 0) return p
creator.create("FitnessMin", base.Fitness, weights=(-1.0,) * tsnedata.nobj) creator.create("Individual", list, fitness=creator.FitnessMin, pset=pset) toolbox = ParallelToolbox() toolbox.register("compile", gp.compile, pset=pset) toolbox.register("evaluate", evalTSNEMO, tsnedata.data_t, toolbox) tree1 = from_string_np_terms(tree_1_str, pset) tree2 = from_string_np_terms(tree_2_str, pset) print(str(tree1)) print(str(tree2)) ind = creator.Individual([tree1, tree2]) print(ind) tsnedata.fitnessCache = cachetools.LRUCache(maxsize=1e6) tsnedata.outdir = args.outdir tsnedata.dataset = args.dataset tsnedata.degrees_of_freedom = max(num_trees - 1, 1) tsnedata._DOF = (tsnedata.degrees_of_freedom + 1.0) / -2.0 dists = t_sne.pairwise_distances(data, metric="euclidean", squared=True) tsnedata.P_tsne = t_sne._joint_probabilities(dists, perplexity, verbose=True) tsnedata.max_P_tsne = np.maximum(tsnedata.P_tsne, MACHINE_EPSILON) best, reference = do_pso(ind, toolbox, tsnedata.data_t, args.gens) print(best) # We still want to output ones that have no constants (even though the vals didn't change!) if best is not None: ephemerals_indxs = collect_ephemeral_indices(ind) # [(0,5),....(1,4),...] update_ercs(ephemerals_indxs, ind, best, reference) ind.fitness.setValues(evalTSNEMO(tsnedata.data_t, toolbox, ind)) output_ind(ind, toolbox, tsnedata, suffix="-pso", compress=False)
def _fit(self, X, skip_num_points=0): if self.method not in ['barnes_hut', 'exact']: raise ValueError("'method' must be 'barnes_hut' or 'exact'") if self.angle < 0.0 or self.angle > 1.0: raise ValueError("'angle' must be between 0.0 - 1.0") if self.metric == "precomputed": if isinstance(self.init, string_types) and self.init == 'pca': raise ValueError("The parameter init=\"pca\" cannot be " "used with metric=\"precomputed\".") if X.shape[0] != X.shape[1]: raise ValueError("X should be a square distance matrix") if np.any(X < 0): raise ValueError("All distances should be positive, the " "precomputed distances given as X is not " "correct") if self.method == 'barnes_hut' and sp.issparse(X): raise TypeError('A sparse matrix was passed, but dense ' 'data is required for method="barnes_hut". Use ' 'X.toarray() to convert to a dense numpy array if ' 'the array is small enough for it to fit in ' 'memory. Otherwise consider dimensionality ' 'reduction techniques (e.g. TruncatedSVD)') else: X = check_array(X, accept_sparse=['csr', 'csc', 'coo'], dtype=[np.float32, np.float64]) if self.method == 'barnes_hut' and self.n_components > 3: raise ValueError("'n_components' should be inferior to 4 for the " "barnes_hut algorithm as it relies on " "quad-tree or oct-tree.") random_state = check_random_state(self.random_state) if self.early_exaggeration < 1.0: raise ValueError( "early_exaggeration must be at least 1, but is {}".format( self.early_exaggeration)) if self.n_iter < 250: raise ValueError("n_iter should be at least 250") n_samples = X.shape[0] neighbors_nn = None if self.method == "exact": if self.metric == "precomputed": distances = X else: if self.verbose: print("[t-SNE] Computing pairwise distances...") if self.metric == "euclidean": distances = pairwise_distances( X, metric=self.metric, squared=True, **self.metric_params) # <=ADDED else: distances = pairwise_distances( X, metric=self.metric, **self.metric_params) # <=ADDED if np.any(distances < 0): raise ValueError("All distances should be positive, the " "metric given is not correct") P = _joint_probabilities(distances, self.perplexity, self.verbose) assert np.all(np.isfinite(P)), "All probabilities should be finite" assert np.all(P >= 0), "All probabilities should be non-negative" assert np.all(P <= 1), ("All probabilities should be less " "or then equal to one") else: k = min(n_samples - 1, int(3. * self.perplexity + 1)) if self.verbose: print("[t-SNE] Computing {} nearest neighbors...".format(k)) knn = NearestNeighbors(algorithm='auto', n_neighbors=k, metric=self.metric, metric_params=self.metric_params) # <=ADDED t0 = time() knn.fit(X) duration = time() - t0 if self.verbose: print("[t-SNE] Indexed {} samples in {:.3f}s...".format( n_samples, duration)) t0 = time() distances_nn, neighbors_nn = knn.kneighbors(None, n_neighbors=k) duration = time() - t0 if self.verbose: print( "[t-SNE] Computed neighbors for {} samples in {:.3f}s...". format(n_samples, duration)) del knn if self.metric == "euclidean": distances_nn **= 2 P = _joint_probabilities_nn(distances_nn, neighbors_nn, self.perplexity, self.verbose) if isinstance(self.init, np.ndarray): X_embedded = self.init elif self.init == 'pca': pca = PCA(n_components=self.n_components, svd_solver='randomized', random_state=random_state) X_embedded = pca.fit_transform(X).astype(np.float32, copy=False) elif self.init == 'random': X_embedded = 1e-4 * random_state.randn( n_samples, self.n_components).astype(np.float32) else: raise ValueError("'init' must be 'pca', 'random', or " "a numpy array") degrees_of_freedom = max(self.n_components - 1.0, 1) return self._tsne(P, degrees_of_freedom, n_samples, X_embedded=X_embedded, neighbors=neighbors_nn, skip_num_points=skip_num_points)
# plt.savefig('digits_tsne-generated.png', dpi=120) # This algorithm is implemented in the _joint_probabilities private function in scikit-learn¡¯s code. # following function computes the similarity with a constant def _joint_probabilities_constant_sigma(D, sigma): P = np.exp(-D**2 / 2 * sigma**2) P /= np.sum(P, axis=1) return P # Pairwise distances between all data points. D = pairwise_distances(X, squared=True) # Similarity with constant sigma. P_constant = _joint_probabilities_constant_sigma(D, .002) # Similarity with variable sigma. P_binary = _joint_probabilities(D, 30., False) # The output of this function needs to be reshaped to a square matrix. P_binary_s = squareform(P_binary) # We can now display the distance matrix of the data points, and the similarity matrix with # both a constant and variable sigma. plt.figure(figsize=(12, 4)) pal = sns.light_palette("blue", as_cmap=True) plt.subplot(131) plt.imshow(D[::10, ::10], interpolation='none', cmap=pal) plt.axis('off') plt.title("Distance matrix", fontdict={'fontsize': 16}) plt.subplot(132)