def test_barnes_hut_angle():
    # When Barnes-Hut's angle=0 this corresponds to the exact method.
    angle = 0.0
    perplexity = 10
    n_samples = 100
    for n_components in [2, 3]:
        n_features = 5
        degrees_of_freedom = float(n_components - 1.0)

        random_state = check_random_state(0)
        data = random_state.randn(n_samples, n_features)
        distances = pairwise_distances(data)
        params = random_state.randn(n_samples, n_components)
        P = _joint_probabilities(distances, perplexity, verbose=0)
        kl_exact, grad_exact = _kl_divergence(params, P, degrees_of_freedom,
                                              n_samples, n_components)

        n_neighbors = n_samples - 1
        distances_csr = NearestNeighbors().fit(data).kneighbors_graph(
            n_neighbors=n_neighbors, mode='distance')
        P_bh = _joint_probabilities_nn(distances_csr, perplexity, verbose=0)
        kl_bh, grad_bh = _kl_divergence_bh(params, P_bh, degrees_of_freedom,
                                           n_samples, n_components,
                                           angle=angle, skip_num_points=0,
                                           verbose=0)

        P = squareform(P)
        P_bh = P_bh.toarray()
        assert_array_almost_equal(P_bh, P, decimal=5)
        assert_almost_equal(kl_exact, kl_bh, decimal=3)
Beispiel #2
0
    def fit(self):
        """
        Performs tSNE transformation. Uses joint probability distribution to create reduced feature space.
        :return: tSNE transformed embedding
        """
        # store the number of samples
        n_samples = self.X.shape[0]

        # Compute euclidean distance btwn each data point
        distances = pairwise_distances(self.X,
                                       metric='euclidean',
                                       squared=True)

        # Compute joint probabilities p_ij from distances.
        P = _joint_probabilities(distances=distances,
                                 desired_perplexity=self.perplexity,
                                 verbose=False)

        # create reduced feature space using randomly selected Gaussian values
        # The embedding is initialized with iid samples from Gaussians with standard deviation 1e-4.
        X_embedded = 1e-4 * np.random.mtrand._rand.randn(
            n_samples, self.n_components).astype(np.float32)

        # degrees_of_freedom = n_components - 1 comes from
        # "Learning a Parametric Embedding by Preserving Local Structure"
        # Laurens van der Maaten, 2009.
        degrees_of_freedom = max(self.n_components - 1, 1)

        return self.tsne(P, degrees_of_freedom, n_samples, X_emb=X_embedded)
def test_barnes_hut_angle():
    # When Barnes-Hut's angle=0 this corresponds to the exact method.
    angle = 0.0
    perplexity = 10
    n_samples = 100
    for n_components in [2, 3]:
        n_features = 5
        degrees_of_freedom = float(n_components - 1.0)

        random_state = check_random_state(0)
        distances = random_state.randn(n_samples, n_features)
        distances = distances.astype(np.float32)
        distances = distances.dot(distances.T)
        np.fill_diagonal(distances, 0.0)
        params = random_state.randn(n_samples, n_components)
        P = _joint_probabilities(distances, perplexity, False)
        kl, gradex = _kl_divergence(params, P, degrees_of_freedom, n_samples,
                                    n_components)

        k = n_samples - 1
        bt = BallTree(distances)
        distances_nn, neighbors_nn = bt.query(distances, k=k + 1)
        neighbors_nn = neighbors_nn[:, 1:]
        Pbh = _joint_probabilities_nn(distances, neighbors_nn,
                                      perplexity, False)
        kl, gradbh = _kl_divergence_bh(params, Pbh, neighbors_nn,
                                       degrees_of_freedom, n_samples,
                                       n_components, angle=angle,
                                       skip_num_points=0, verbose=False)
        assert_array_almost_equal(Pbh, P, decimal=5)
        assert_array_almost_equal(gradex, gradbh, decimal=5)
Beispiel #4
0
def _run_answer_test(pos_input,
                     pos_output,
                     neighbors,
                     grad_output,
                     verbose=False,
                     perplexity=0.1,
                     skip_num_points=0):
    distances = pairwise_distances(pos_input).astype(np.float32)
    args = distances, perplexity, verbose
    pos_output = pos_output.astype(np.float32)
    neighbors = neighbors.astype(np.int64)
    pij_input = _joint_probabilities(*args)
    pij_input = squareform(pij_input).astype(np.float32)
    grad_bh = np.zeros(pos_output.shape, dtype=np.float32)

    from scipy.sparse import csr_matrix
    P = csr_matrix(pij_input)

    neighbors = P.indices.astype(np.int64)
    indptr = P.indptr.astype(np.int64)

    _barnes_hut_tsne.gradient(P.data,
                              pos_output,
                              neighbors,
                              indptr,
                              grad_bh,
                              0.5,
                              2,
                              1,
                              skip_num_points=0)
    assert_array_almost_equal(grad_bh, grad_output, decimal=4)
Beispiel #5
0
def test_barnes_hut_angle():
    # When Barnes-Hut's angle=0 this corresponds to the exact method.
    angle = 0.0
    perplexity = 10
    n_samples = 100
    for n_components in [2, 3]:
        n_features = 5
        degrees_of_freedom = float(n_components - 1.0)

        random_state = check_random_state(0)
        distances = random_state.randn(n_samples, n_features)
        distances = distances.astype(np.float32)
        distances = distances.dot(distances.T)
        np.fill_diagonal(distances, 0.0)
        params = random_state.randn(n_samples, n_components)
        P = _joint_probabilities(distances, perplexity, False)
        kl, gradex = _kl_divergence(params, P, degrees_of_freedom, n_samples,
                                    n_components)

        k = n_samples - 1
        bt = BallTree(distances)
        distances_nn, neighbors_nn = bt.query(distances, k=k + 1)
        neighbors_nn = neighbors_nn[:, 1:]
        Pbh = _joint_probabilities_nn(distances, neighbors_nn,
                                      perplexity, False)
        kl, gradbh = _kl_divergence_bh(params, Pbh, neighbors_nn,
                                       degrees_of_freedom, n_samples,
                                       n_components, angle=angle,
                                       skip_num_points=0, verbose=False)
        assert_array_almost_equal(Pbh, P, decimal=5)
        assert_array_almost_equal(gradex, gradbh, decimal=5)
Beispiel #6
0
def similarity_matrix():
    def _joint_probabilities_constant_sigma(D, sigma):
        P = np.exp(-D**2/2 * sigma**2)
        P /= np.sum(P, axis=1)
        return P

    # Pairwise_distances between all data points
    D = pairwise_distances(X, squared=True)
    # Similarity with constant sigma
    P_constant = _joint_probabilities_constant_sigma(D, .002)
    # Similarity with variable sigma
    P_binary = _joint_probabilities(D, 30., False)
    # The output of this function needs to be reshaped to a square matrix
    P_binary_s = squareform(P_binary)

    plt.figure(figsize=(12, 4))
    pal = sns.light_palette("blue", as_cmap=True)

    plt.subplot(131)
    plt.imshow(D[::10, ::10], interpolation='none', cmap=pal)
    plt.axis('off')
    plt.title('Distance matrix', fontdict={'fontsize':16})

    plt.subplot(132)
    plt.imshow(P_constant[::10, ::10], interpolation='none', cmap=pal)
    plt.axis('off')
    plt.title("$p_{j|i}$ (constant $\sigma$)", fontdict={'fontsize':16})

    plt.subplot(133)
    plt.imshow(P_binary_s[::10, ::10], interpolation='none', cmap=pal)
    plt.axis('off')
    plt.title("$p_{j|i}$ (variable $\sigma$)", fontdict={'fontsize':16})
Beispiel #7
0
def test_gradient():
    # Test gradient of Kullback-Leibler divergence.
    random_state = check_random_state(0)

    n_samples = 50
    n_features = 2
    n_components = 2
    alpha = 1.0

    distances = random_state.randn(n_samples, n_features).astype(np.float32)
    distances = np.abs(distances.dot(distances.T))
    np.fill_diagonal(distances, 0.0)
    X_embedded = random_state.randn(n_samples, n_components).astype(np.float32)

    P = _joint_probabilities(distances, desired_perplexity=25.0,
                             verbose=0)

    def fun(params):
        return _kl_divergence(params, P, alpha, n_samples, n_components)[0]

    def grad(params):
        return _kl_divergence(params, P, alpha, n_samples, n_components)[1]

    assert_almost_equal(check_grad(fun, grad, X_embedded.ravel()), 0.0,
                        decimal=5)
Beispiel #8
0
def calc_latent_kl(vis_latents, aud_latents, perplexity):
    logging.info(
        "Calculating joint probability distribution of visual latent space...")
    vis_dists = calc_dists(vis_latents)
    vis_distr = tsne._joint_probabilities(distances=vis_dists,
                                          desired_perplexity=perplexity,
                                          verbose=True)
    logging.info(
        "Calculating joint probability distribution of auditive latent space..."
    )
    aud_dists = calc_dists(aud_latents)
    aud_distr = tsne._joint_probabilities(distances=aud_dists,
                                          desired_perplexity=perplexity,
                                          verbose=True)
    kl_va = 2.0 * np.dot(vis_distr, np.log(vis_distr / aud_distr))
    kl_av = 2.0 * np.dot(aud_distr, np.log(aud_distr / vis_distr))
    logging.info(
        "Calculated KL divergences of audio-visual latent spaces with perplexity %d: %.2f VA / %.2f AV."
        % (perplexity, kl_va, kl_av))
    return kl_va, kl_av
def _run_answer_test(pos_input, pos_output, neighbors, grad_output, verbose=False, perplexity=0.1, skip_num_points=0):
    distances = pairwise_distances(pos_input).astype(np.float32)
    args = distances, perplexity, verbose
    pos_output = pos_output.astype(np.float32)
    neighbors = neighbors.astype(np.int64)
    pij_input = _joint_probabilities(*args)
    pij_input = squareform(pij_input).astype(np.float32)
    grad_bh = np.zeros(pos_output.shape, dtype=np.float32)

    _barnes_hut_tsne.gradient(pij_input, pos_output, neighbors, grad_bh, 0.5, 2, 1, skip_num_points=0)
    assert_array_almost_equal(grad_bh, grad_output, decimal=4)
def fit(X):
    n_samples = X.shape[0]
    
    # Compute euclidean distance
    distances = pairwise_distances(X, metric='euclidean', squared=True)
    
    # Compute joint probabilities p_ij from distances.
    P = _joint_probabilities(distances=distances, desired_perplexity=perplexity, verbose=False)
    
    # The embedding is initialized with iid samples from Gaussians with standard deviation 1e-4.
    X_embedded = 1e-4 * np.random.mtrand._rand.randn(n_samples, n_components).astype(np.float32)
    
    # degrees_of_freedom = n_components - 1 comes from
    # "Learning a Parametric Embedding by Preserving Local Structure"
    # Laurens van der Maaten, 2009.
    degrees_of_freedom = max(n_components - 1, 1)
    
    return _tsne(P, degrees_of_freedom, n_samples, X_embedded=X_embedded)
Beispiel #11
0
def test_barnes_hut_angle():
    # When Barnes-Hut's angle=0 this corresponds to the exact method.
    angle = 0.0
    perplexity = 10
    n_samples = 100
    for n_components in [2, 3]:
        n_features = 5
        degrees_of_freedom = float(n_components - 1.0)

        random_state = check_random_state(0)
        distances = random_state.randn(n_samples, n_features)
        distances = distances.astype(np.float32)
        distances = abs(distances.dot(distances.T))
        np.fill_diagonal(distances, 0.0)
        params = random_state.randn(n_samples, n_components)
        P = _joint_probabilities(distances, perplexity, verbose=0)
        kl_exact, grad_exact = _kl_divergence(params, P, degrees_of_freedom,
                                              n_samples, n_components)

        k = n_samples - 1
        bt = BallTree(distances)
        distances_nn, neighbors_nn = bt.query(distances, k=k + 1)
        neighbors_nn = neighbors_nn[:, 1:]
        distances_nn = np.array(
            [distances[i, neighbors_nn[i]] for i in range(n_samples)])
        assert np.all(distances[0, neighbors_nn[0]] == distances_nn[0]),\
            abs(distances[0, neighbors_nn[0]] - distances_nn[0])
        P_bh = _joint_probabilities_nn(distances_nn,
                                       neighbors_nn,
                                       perplexity,
                                       verbose=0)
        kl_bh, grad_bh = _kl_divergence_bh(params,
                                           P_bh,
                                           degrees_of_freedom,
                                           n_samples,
                                           n_components,
                                           angle=angle,
                                           skip_num_points=0,
                                           verbose=0)

        P = squareform(P)
        P_bh = P_bh.toarray()
        assert_array_almost_equal(P_bh, P, decimal=5)
        assert_almost_equal(kl_exact, kl_bh, decimal=3)
Beispiel #12
0
    def fit_transform(self, data):
        n_samples = data.shape[0]

        distances = pairwise_distances(data, metric=self.metric)

        P = _joint_probabilities(distances=distances,
                                 desired_perplexity=self.perplexity,
                                 verbose=False)

        # Reduced feature space
        X_embedded = 1e-4 * np.random.mtrand._rand.randn(
            n_samples, self.n_components).astype(np.float32)

        degrees_of_freedom = max(self.n_components - 1, 1)

        return self._tsne(P,
                          degrees_of_freedom,
                          n_samples,
                          X_embedded=X_embedded)
Beispiel #13
0
def _run_answer_test(pos_input, pos_output, neighbors, grad_output,
                     verbose=False, perplexity=0.1, skip_num_points=0):
    distances = pairwise_distances(pos_input).astype(np.float32)
    args = distances, perplexity, verbose
    pos_output = pos_output.astype(np.float32)
    neighbors = neighbors.astype(np.int64, copy=False)
    pij_input = _joint_probabilities(*args)
    pij_input = squareform(pij_input).astype(np.float32)
    grad_bh = np.zeros(pos_output.shape, dtype=np.float32)

    from scipy.sparse import csr_matrix
    P = csr_matrix(pij_input)

    neighbors = P.indices.astype(np.int64)
    indptr = P.indptr.astype(np.int64)

    _barnes_hut_tsne.gradient(P.data, pos_output, neighbors, indptr,
                              grad_bh, 0.5, 2, 1, skip_num_points=0)
    assert_array_almost_equal(grad_bh, grad_output, decimal=4)
def fit(X):
    # Almacenamos el número de muestras para futura referencia
    n_samples = X.shape[0]
    
    # Distancia euclideana
    distances = pairwise_distances(X, metric='euclidean', squared=True)
    
    # Probabilidades conjuntas p_ij de las distancias
    P = _joint_probabilities(distances=distances, desired_perplexity=perplexity, verbose=False)
    
    # Los embeddings son inicializados con iid muetras de Gaussianos con desviación estander 1e-4.
    X_embedded = 1e-4 * np.random.mtrand._rand.randn(n_samples, n_components).astype(np.float32)
    
    # degrees_of_freedom = n_components - 1 viene de 
    # "Learning a Parametric Embedding by Preserving Local Structure"
    # Laurens van der Maaten, 2009.
    degrees_of_freedom = max(n_components - 1, 1)
    
    return _tsne(P, degrees_of_freedom, n_samples, X_embedded=X_embedded)
Beispiel #15
0
def test_gradient():
    """Test gradient of Kullback-Leibler divergence."""
    random_state = check_random_state(0)

    n_samples = 50
    n_features = 2
    n_components = 2
    alpha = 1.0

    distances = random_state.randn(n_samples, n_features)
    distances = distances.dot(distances.T)
    np.fill_diagonal(distances, 0.0)
    X_embedded = random_state.randn(n_samples, n_components)

    P = _joint_probabilities(distances, desired_perplexity=25.0,
                             verbose=0)
    fun = lambda params: _kl_divergence(params, P, alpha, n_samples,
                                        n_components)[0]
    grad = lambda params: _kl_divergence(params, P, alpha, n_samples,
                                         n_components)[1]
    assert_almost_equal(check_grad(fun, grad, X_embedded.ravel()), 0.0,
                        decimal=5)
Beispiel #16
0
    def fit_transform(self, X, perplexity=30, n_components=2):
        """
        Fits a TSNE model to the data.
        
        :param X:           data to be reduced in dimensionality
        """
        self.n_samples = X.shape[0]
        self.n_components = n_components

        # compute pairwise distances
        distances = pairwise_distances(X, metric="euclidean", squared=True)

        # compute joint probabilities p_ij from distances
        P = _joint_probabilities(distances=distances, \
            desired_perplexity=perplexity, verbose=False)

        # init low-dim embeddings with standard deviation 1e-4
        X_embedded = 1e-4 * np.random.mtrand._rand.randn(self.n_samples, n_components) \
            .astype(np.float32)
        degrees_of_freedom = max(n_components - 1, 1)

        return self.__tsne(P, degrees_of_freedom, X_embedded=X_embedded)
Beispiel #17
0
def test_gradient():
    """Test gradient of Kullback-Leibler divergence."""
    random_state = check_random_state(0)

    n_samples = 50
    n_features = 2
    n_components = 2
    alpha = 1.0

    distances = random_state.randn(n_samples, n_features)
    distances = distances.dot(distances.T)
    np.fill_diagonal(distances, 0.0)
    X_embedded = random_state.randn(n_samples, n_components)

    P = _joint_probabilities(distances, desired_perplexity=25.0, verbose=0)
    fun = lambda params: _kl_divergence(params, P, alpha, n_samples,
                                        n_components)[0]
    grad = lambda params: _kl_divergence(params, P, alpha, n_samples,
                                         n_components)[1]
    assert_almost_equal(check_grad(fun, grad, X_embedded.ravel()),
                        0.0,
                        decimal=5)
def test_barnes_hut_angle():
    # When Barnes-Hut's angle=0 this corresponds to the exact method.
    angle = 0.0
    perplexity = 10
    n_samples = 100
    for n_components in [2, 3]:
        n_features = 5
        degrees_of_freedom = float(n_components - 1.0)

        random_state = check_random_state(0)
        distances = random_state.randn(n_samples, n_features)
        distances = distances.astype(np.float32)
        distances = abs(distances.dot(distances.T))
        np.fill_diagonal(distances, 0.0)
        params = random_state.randn(n_samples, n_components)
        P = _joint_probabilities(distances, perplexity, verbose=0)
        kl_exact, grad_exact = _kl_divergence(params, P, degrees_of_freedom,
                                              n_samples, n_components)

        k = n_samples - 1
        bt = BallTree(distances)
        distances_nn, neighbors_nn = bt.query(distances, k=k + 1)
        neighbors_nn = neighbors_nn[:, 1:]
        distances_nn = np.array([distances[i, neighbors_nn[i]]
                                 for i in range(n_samples)])
        assert np.all(distances[0, neighbors_nn[0]] == distances_nn[0]),\
            abs(distances[0, neighbors_nn[0]] - distances_nn[0])
        P_bh = _joint_probabilities_nn(distances_nn, neighbors_nn,
                                       perplexity, verbose=0)
        kl_bh, grad_bh = _kl_divergence_bh(params, P_bh, degrees_of_freedom,
                                           n_samples, n_components,
                                           angle=angle, skip_num_points=0,
                                           verbose=0)

        P = squareform(P)
        P_bh = P_bh.toarray()
        assert_array_almost_equal(P_bh, P, decimal=5)
        assert_almost_equal(kl_exact, kl_bh, decimal=3)
               for i in range(10)])


def _joint_probabilities_constant_sigma(D, sigma):
    P = np.exp(-D**2/2 * sigma**2)
    P /= np.sum(P, axis=1)
    return P

# pairwise distances between all data points
D = pairwise_distances(X,squared=True)

# Similarity with constant sigma
P_constant = _joint_probabilities_constant_sigma(D, .002)

# Similarity with variable sigma
P_binary = _joint_probabilities(D, 30., False)

# output of this function needs to be reshaped to a square matrix
P_binary_s = squareform(P_binary)

# plot this similarity matrix
plt.figure(figsize=(12,4))
pal = sns.light_palette("blue",as_cmap=True)

plt.subplot(131)
plt.imshow(D[::10, ::10], interpolation='none',cmap=pal)
plt.axis('off')
plt.title("Distance matrix", fontdict={'fontsize': 16})

plt.subplot(132)
plt.imshow(P_constant[::10,::10],interpolation='none',cmap=pal)
Beispiel #20
0
#!/usr/bin/env python2

from sklearn.metrics import euclidean_distances
from sklearn.manifold import t_sne
import numpy as np
import _snack as snack

for i in xrange(10):
    X = np.random.randn(1000, 2) * 10
    params = X.ravel()
    D = euclidean_distances(X)

    probs1 = t_sne._joint_probabilities(D, 30, False)
    probs2 = snack.my_joint_probabilities(D, 30, False)
    c1,grad1 = t_sne._kl_divergence(params, probs1, 1.0, len(X), 2)
    c2,grad2 = snack.my_kl_divergence(params, probs1, 1.0, len(X), 2.0)
    print "Test", i
    print "Difference norm:", np.linalg.norm(probs1 - probs2)
    print "Difference norm:", np.linalg.norm(grad1 - grad2)
    print "Difference norm:", c1-c2

    assert np.allclose(probs1, probs2)
    assert np.allclose(grad1, grad2)
    assert np.allclose(c1, c2)
def compute_joint_probabilities(X, perplexity=30, metric='euclidean', method='exact', adj=None, verbose=0):
    """
    Computes the joint probability matrix P from a feature matrix X of size n x f
    Adapted from sklearn.manifold.t_sne
    """

    # Compute pairwise distances
    if verbose > 0: print('Computing pairwise distances...')

    if method == 'exact':
        if metric == 'precomputed':
            D = X
        elif metric == 'euclidean':
            D = pairwise_distances(X, metric=metric, squared=True)
        elif metric == 'cosine':
            D = pairwise_distances(X, metric=metric)
        elif metric == 'shortest_path':
            assert adj is not None
            D = get_shortest_path_matrix(adj, verbose=verbose)

        P = _joint_probabilities(D, desired_perplexity=perplexity, verbose=verbose)
        assert np.all(np.isfinite(P)), "All probabilities should be finite"
        assert np.all(P >= 0), "All probabilities should be non-negative"
        assert np.all(P <= 1), ("All probabilities should be less "
                                "or then equal to one")

        P = squareform(P)

    else:
        # Cpmpute the number of nearest neighbors to find.
        # LvdM uses 3 * perplexity as the number of neighbors.
        # In the event that we have very small # of points
        # set the neighbors to n - 1.
        n_samples = X.shape[0]
        k = min(n_samples - 1, int(3. * perplexity + 1))

        # Find the nearest neighbors for every point
        knn = NearestNeighbors(algorithm='auto', n_neighbors=k,
                               metric=metric)
        t0 = time()
        knn.fit(X)
        duration = time() - t0
        if verbose:
            print("[t-SNE] Indexed {} samples in {:.3f}s...".format(
                n_samples, duration))

        t0 = time()
        distances_nn, neighbors_nn = knn.kneighbors(
            None, n_neighbors=k)
        duration = time() - t0
        if verbose:
            print("[t-SNE] Computed neighbors for {} samples in {:.3f}s..."
                  .format(n_samples, duration))

        # Free the memory used by the ball_tree
        del knn

        if metric == "euclidean":
            # knn return the euclidean distance but we need it squared
            # to be consistent with the 'exact' method. Note that the
            # the method was derived using the euclidean method as in the
            # input space. Not sure of the implication of using a different
            # metric.
            distances_nn **= 2

        # compute the joint probability distribution for the input space
        P = _joint_probabilities_nn(distances_nn, neighbors_nn,
                                    perplexity, verbose)
        P = P.toarray()

    # Convert to torch tensor
    P = torch.from_numpy(P).type(dtypeFloat)

    return P
    def _fit(self, X, skip_num_points=0):
        """Fit the model using X as training data.

        Note that sparse arrays can only be handled by method='exact'.
        It is recommended that you convert your sparse array to dense
        (e.g. `X.toarray()`) if it fits in memory, or otherwise using a
        dimensionality reduction technique (e.g. TruncatedSVD).

        Parameters
        ----------
        X : array, shape (n_samples, n_features) or (n_samples, n_samples)
            If the metric is 'precomputed' X must be a square distance
            matrix. Otherwise it contains a sample per row. Note that this
            when method='barnes_hut', X cannot be a sparse array and if need be
            will be converted to a 32 bit float array. Method='exact' allows
            sparse arrays and 64bit floating point inputs.

        skip_num_points : int (optional, default:0)
            This does not compute the gradient for points with indices below
            `skip_num_points`. This is useful when computing transforms of new
            data where you'd like to keep the old data fixed.
        """
        if self.method not in ['barnes_hut', 'exact']:
            raise ValueError("'method' must be 'barnes_hut' or 'exact'")
        if self.angle < 0.0 or self.angle > 1.0:
            raise ValueError("'angle' must be between 0.0 - 1.0")
        if self.method == 'barnes_hut' and sp.issparse(X):
            raise TypeError('A sparse matrix was passed, but dense '
                            'data is required for method="barnes_hut". Use '
                            'X.toarray() to convert to a dense numpy array if '
                            'the array is small enough for it to fit in '
                            'memory. Otherwise consider dimensionality '
                            'reduction techniques (e.g. TruncatedSVD)')
        else:
            X = check_array(X,
                            accept_sparse=['csr', 'csc', 'coo'],
                            dtype=np.float64)
        random_state = check_random_state(self.random_state)

        if self.early_exaggeration < 1.0:
            raise ValueError("early_exaggeration must be at least 1, but is "
                             "%f" % self.early_exaggeration)

        if self.n_iter < 200:
            raise ValueError("n_iter should be at least 200")

        if self.metric == "precomputed":
            if isinstance(self.init, string_types) and self.init == 'pca':
                raise ValueError("The parameter init=\"pca\" cannot be used "
                                 "with metric=\"precomputed\".")
            if X.shape[0] != X.shape[1]:
                raise ValueError("X should be a square distance matrix")
            distances = X
        else:
            if self.verbose:
                print("[t-SNE] Computing pairwise distances...")

            if self.metric == "euclidean":
                distances = pairwise_distances(X,
                                               metric=self.metric,
                                               squared=True)
            else:
                distances = pairwise_distances(X, metric=self.metric)

        if not np.all(distances >= 0):
            raise ValueError("All distances should be positive, either "
                             "the metric or precomputed distances given "
                             "as X are not correct")

        # Degrees of freedom of the Student's t-distribution. The suggestion
        # degrees_of_freedom = n_components - 1 comes from
        # "Learning a Parametric Embedding by Preserving Local Structure"
        # Laurens van der Maaten, 2009.
        degrees_of_freedom = max(self.n_components - 1.0, 1)
        n_samples = X.shape[0]
        # the number of nearest neighbors to find
        k = min(n_samples - 1, int(3. * self.perplexity + 1))

        neighbors_nn = None
        if self.method == 'barnes_hut':
            if self.verbose:
                print("[t-SNE] Computing %i nearest neighbors..." % k)
            if self.metric == 'precomputed':
                # Use the precomputed distances to find
                # the k nearest neighbors and their distances
                neighbors_nn = np.argsort(distances, axis=1)[:, :k]
            elif self.rho >= 1:
                # Find the nearest neighbors for every point
                bt = BallTree(X)
                # LvdM uses 3 * perplexity as the number of neighbors
                # And we add one to not count the data point itself
                # In the event that we have very small # of points
                # set the neighbors to n - 1
                distances_nn, neighbors_nn = bt.query(X, k=k + 1)
                neighbors_nn = neighbors_nn[:, 1:]
            elif self.rho < 1:
                # Use pyFLANN to find the nearest neighbors
                myflann = FLANN()
                testset = X
                params = myflann.build_index(testset,
                                             algorithm="autotuned",
                                             target_precision=self.rho,
                                             log_level='info')
                neighbors_nn, distances = myflann.nn_index(
                    testset, k + 1, checks=params["checks"])
                neighbors_nn = neighbors_nn[:, 1:]

            P = _joint_probabilities_nn(distances, neighbors_nn,
                                        self.perplexity, self.verbose)
        else:
            P = _joint_probabilities(distances, self.perplexity, self.verbose)
        assert np.all(np.isfinite(P)), "All probabilities should be finite"
        assert np.all(P >= 0), "All probabilities should be zero or positive"
        assert np.all(P <= 1), ("All probabilities should be less "
                                "or then equal to one")

        if isinstance(self.init, np.ndarray):
            X_embedded = self.init
        elif self.init == 'pca':
            pca = PCA(n_components=self.n_components,
                      svd_solver='randomized',
                      random_state=random_state)
            X_embedded = pca.fit_transform(X)
        elif self.init == 'random':
            X_embedded = None
        else:
            raise ValueError("Unsupported initialization scheme: %s" %
                             self.init)

        return self._tsne(P,
                          degrees_of_freedom,
                          n_samples,
                          random_state,
                          X_embedded=X_embedded,
                          neighbors=neighbors_nn,
                          skip_num_points=skip_num_points)
Beispiel #23
0
def plot_tsne_result(X, y, n_components):
    positions = []
    errors = []

    def _gradient_descent(objective,
                          p0,
                          it,
                          n_iter,
                          n_iter_check=1,
                          n_iter_without_progress=300,
                          momentum=0.8,
                          learning_rate=200.0,
                          min_gain=0.01,
                          min_grad_norm=1e-7,
                          verbose=0,
                          args=None,
                          kwargs=None):
        if args is None:
            args = []
        if kwargs is None:
            kwargs = {}

        p = p0.copy().ravel()
        update = np.zeros_like(p)
        gains = np.ones_like(p)
        error = np.finfo(np.float).max
        best_error = np.finfo(np.float).max
        best_iter = i = it

        tic = time()
        for i in range(it, n_iter):
            positions.append(p.copy())

            error, grad = objective(p, *args, **kwargs)
            errors.append(error)
            grad_norm = linalg.norm(grad)

            inc = update * grad < 0.0
            dec = np.invert(inc)
            gains[inc] += 0.2
            gains[dec] *= 0.8
            np.clip(gains, min_gain, np.inf, out=gains)
            grad *= gains
            update = momentum * update - learning_rate * grad
            p += update

            if (i + 1) % n_iter_check == 0:
                toc = time()
                duration = toc - tic
                tic = toc

            if verbose >= 2:
                print("[t-SNE] Iteration %d: error = %.7f,"
                      " gradient norm = %.7f"
                      " (%s iterations in %0.3fs)" %
                      (i + 1, error, grad_norm, n_iter_check, duration))

            if error < best_error:
                best_error = error
                best_iter = i
            elif i - best_iter > n_iter_without_progress:
                if verbose >= 2:
                    print("[t-SNE] Iteration %d: did not make any progress "
                          "during the last %d episodes. Finished." %
                          (i + 1, n_iter_without_progress))
                break
            if grad_norm <= min_grad_norm:
                if verbose >= 2:
                    print("[t-SNE] Iteration %d: gradient norm %f. Finished." %
                          (i + 1, grad_norm))
                break

        return p, error, i

    D = pairwise_distances(X, squared=True)
    P_binary = _joint_probabilities(D, 30., False)
    P_binary_s = squareform(P_binary)

    positions.clear()
    errors.clear()
    manifold.t_sne._gradient_descent = _gradient_descent
    manifold.TSNE(n_components=n_components, random_state=100).fit_transform(X)
    if n_components == 3:
        X_iter = np.dstack(position.reshape(-1, 3) for position in positions)
    elif n_components == 2:
        X_iter = np.dstack(position.reshape(-1, 2) for position in positions)

    cmap = sns.light_palette("blue", as_cmap=True)

    fig = plt.figure(figsize=(12, 12))
    if X.shape[1] == 3:
        ax1 = fig.add_subplot(3, 4, 1, projection='3d')
        plot_data_3d_classification(X,
                                    y,
                                    ax=ax1,
                                    new_window=False,
                                    title="Original Data")
    elif X.shape[1] == 2:
        ax1 = fig.add_subplot(3, 4, 1)
        plot_data_2d_classification(X,
                                    y,
                                    ax=ax1,
                                    new_window=False,
                                    title="Original Data")

    ax2 = fig.add_subplot(3, 4, 2)
    plot_distance_matrix(P_binary_s, ax2, cmap, 'Pairwise Similarities')

    iter_size = int(len(positions) / 5)
    k = 2
    for i in range(5):
        iter_index = i * iter_size
        tmp = X_iter[..., iter_index]
        err = round(errors[iter_index], 2)
        title = "Iter: " + str(iter_index) + " Loss:" + str(err)

        k = k + 1
        if X_iter.shape[1] == 3:
            ax3 = fig.add_subplot(3, 4, k, projection='3d')
            plot_data_3d_classification(tmp,
                                        y,
                                        ax=ax3,
                                        new_window=False,
                                        title=title)
        elif X_iter.shape[1] == 2:
            ax3 = fig.add_subplot(3, 4, k)
            plot_data_2d_classification(tmp,
                                        y,
                                        ax=ax3,
                                        new_window=False,
                                        title=title)

        k = k + 1
        ax4 = fig.add_subplot(3, 4, k)
        n = 1. / (pdist(tmp, "sqeuclidean") + 1)
        Q = n / (2.0 * np.sum(n))
        Q = squareform(Q)
        plot_distance_matrix(Q, ax4, cmap, title=title)
    plt.subplots_adjust(wspace=0.1, hspace=0.5)
Beispiel #24
0
def preprocess(x, metric='euclidean', perplexity=30):
    dist = pairwise_distances(x, metric=metric, squared=True)
    p = _joint_probabilities(dist, perplexity, 0)

    return p
Beispiel #25
0
    creator.create("FitnessMin", base.Fitness, weights=(-1.0,) * tsnedata.nobj)
    creator.create("Individual", list, fitness=creator.FitnessMin, pset=pset)
    toolbox = ParallelToolbox()
    toolbox.register("compile", gp.compile, pset=pset)
    toolbox.register("evaluate", evalTSNEMO, tsnedata.data_t, toolbox)
    tree1 = from_string_np_terms(tree_1_str, pset)
    tree2 = from_string_np_terms(tree_2_str, pset)
    print(str(tree1))
    print(str(tree2))
    ind = creator.Individual([tree1, tree2])
    print(ind)

    tsnedata.fitnessCache = cachetools.LRUCache(maxsize=1e6)
    tsnedata.outdir = args.outdir
    tsnedata.dataset = args.dataset
    tsnedata.degrees_of_freedom = max(num_trees - 1, 1)
    tsnedata._DOF = (tsnedata.degrees_of_freedom + 1.0) / -2.0
    dists = t_sne.pairwise_distances(data, metric="euclidean", squared=True)
    tsnedata.P_tsne = t_sne._joint_probabilities(dists, perplexity, verbose=True)
    tsnedata.max_P_tsne = np.maximum(tsnedata.P_tsne, MACHINE_EPSILON)

    best, reference = do_pso(ind, toolbox, tsnedata.data_t, args.gens)
    print(best)
    # We still want to output ones that have no constants (even though the vals didn't change!)
    if best is not None:
        ephemerals_indxs = collect_ephemeral_indices(ind)  # [(0,5),....(1,4),...]
        update_ercs(ephemerals_indxs, ind, best, reference)
        ind.fitness.setValues(evalTSNEMO(tsnedata.data_t, toolbox, ind))
    output_ind(ind, toolbox, tsnedata, suffix="-pso", compress=False)
Beispiel #26
0
    def _fit(self, X, skip_num_points=0):
        if self.method not in ['barnes_hut', 'exact']:
            raise ValueError("'method' must be 'barnes_hut' or 'exact'")
        if self.angle < 0.0 or self.angle > 1.0:
            raise ValueError("'angle' must be between 0.0 - 1.0")
        if self.metric == "precomputed":
            if isinstance(self.init, string_types) and self.init == 'pca':
                raise ValueError("The parameter init=\"pca\" cannot be "
                                 "used with metric=\"precomputed\".")
            if X.shape[0] != X.shape[1]:
                raise ValueError("X should be a square distance matrix")
            if np.any(X < 0):
                raise ValueError("All distances should be positive, the "
                                 "precomputed distances given as X is not "
                                 "correct")
        if self.method == 'barnes_hut' and sp.issparse(X):
            raise TypeError('A sparse matrix was passed, but dense '
                            'data is required for method="barnes_hut". Use '
                            'X.toarray() to convert to a dense numpy array if '
                            'the array is small enough for it to fit in '
                            'memory. Otherwise consider dimensionality '
                            'reduction techniques (e.g. TruncatedSVD)')
        else:
            X = check_array(X,
                            accept_sparse=['csr', 'csc', 'coo'],
                            dtype=[np.float32, np.float64])
        if self.method == 'barnes_hut' and self.n_components > 3:
            raise ValueError("'n_components' should be inferior to 4 for the "
                             "barnes_hut algorithm as it relies on "
                             "quad-tree or oct-tree.")
        random_state = check_random_state(self.random_state)

        if self.early_exaggeration < 1.0:
            raise ValueError(
                "early_exaggeration must be at least 1, but is {}".format(
                    self.early_exaggeration))

        if self.n_iter < 250:
            raise ValueError("n_iter should be at least 250")

        n_samples = X.shape[0]

        neighbors_nn = None
        if self.method == "exact":
            if self.metric == "precomputed":
                distances = X
            else:
                if self.verbose:
                    print("[t-SNE] Computing pairwise distances...")

                if self.metric == "euclidean":
                    distances = pairwise_distances(
                        X,
                        metric=self.metric,
                        squared=True,
                        **self.metric_params)  # <=ADDED
                else:
                    distances = pairwise_distances(
                        X, metric=self.metric, **self.metric_params)  # <=ADDED

                if np.any(distances < 0):
                    raise ValueError("All distances should be positive, the "
                                     "metric given is not correct")

            P = _joint_probabilities(distances, self.perplexity, self.verbose)
            assert np.all(np.isfinite(P)), "All probabilities should be finite"
            assert np.all(P >= 0), "All probabilities should be non-negative"
            assert np.all(P <= 1), ("All probabilities should be less "
                                    "or then equal to one")

        else:
            k = min(n_samples - 1, int(3. * self.perplexity + 1))

            if self.verbose:
                print("[t-SNE] Computing {} nearest neighbors...".format(k))

            knn = NearestNeighbors(algorithm='auto',
                                   n_neighbors=k,
                                   metric=self.metric,
                                   metric_params=self.metric_params)  # <=ADDED
            t0 = time()
            knn.fit(X)
            duration = time() - t0
            if self.verbose:
                print("[t-SNE] Indexed {} samples in {:.3f}s...".format(
                    n_samples, duration))

            t0 = time()
            distances_nn, neighbors_nn = knn.kneighbors(None, n_neighbors=k)
            duration = time() - t0
            if self.verbose:
                print(
                    "[t-SNE] Computed neighbors for {} samples in {:.3f}s...".
                    format(n_samples, duration))

            del knn

            if self.metric == "euclidean":
                distances_nn **= 2

            P = _joint_probabilities_nn(distances_nn, neighbors_nn,
                                        self.perplexity, self.verbose)

        if isinstance(self.init, np.ndarray):
            X_embedded = self.init
        elif self.init == 'pca':
            pca = PCA(n_components=self.n_components,
                      svd_solver='randomized',
                      random_state=random_state)
            X_embedded = pca.fit_transform(X).astype(np.float32, copy=False)
        elif self.init == 'random':
            X_embedded = 1e-4 * random_state.randn(
                n_samples, self.n_components).astype(np.float32)
        else:
            raise ValueError("'init' must be 'pca', 'random', or "
                             "a numpy array")

        degrees_of_freedom = max(self.n_components - 1.0, 1)

        return self._tsne(P,
                          degrees_of_freedom,
                          n_samples,
                          X_embedded=X_embedded,
                          neighbors=neighbors_nn,
                          skip_num_points=skip_num_points)
Beispiel #27
0
    # plt.savefig('digits_tsne-generated.png', dpi=120)

    # This algorithm is implemented in the _joint_probabilities private function in scikit-learn¡¯s code.
    # following function computes the similarity with a constant
    def _joint_probabilities_constant_sigma(D, sigma):
        P = np.exp(-D**2 / 2 * sigma**2)
        P /= np.sum(P, axis=1)
        return P

    #  Pairwise distances between all data points.
    D = pairwise_distances(X, squared=True)
    # Similarity with constant sigma.
    P_constant = _joint_probabilities_constant_sigma(D, .002)
    # Similarity with variable sigma.
    P_binary = _joint_probabilities(D, 30., False)
    # The output of this function needs to be reshaped to a square matrix.
    P_binary_s = squareform(P_binary)

    # We can now display the distance matrix of the data points, and the similarity matrix with
    #  both a constant and variable sigma.

    plt.figure(figsize=(12, 4))
    pal = sns.light_palette("blue", as_cmap=True)

    plt.subplot(131)
    plt.imshow(D[::10, ::10], interpolation='none', cmap=pal)
    plt.axis('off')
    plt.title("Distance matrix", fontdict={'fontsize': 16})

    plt.subplot(132)