Beispiel #1
0
def test_n_iter_without_progress():
    # Use a dummy negative n_iter_without_progress and check output on stdout
    random_state = check_random_state(0)
    X = random_state.randn(100, 10)
    for method in ["barnes_hut", "exact"]:
        tsne = TSNE(n_iter_without_progress=-1,
                    verbose=2,
                    learning_rate=1e8,
                    random_state=0,
                    method=method,
                    n_iter=351,
                    init="random")
        tsne._N_ITER_CHECK = 1
        tsne._EXPLORATION_N_ITER = 0

        old_stdout = sys.stdout
        sys.stdout = StringIO()
        try:
            tsne.fit_transform(X)
        finally:
            out = sys.stdout.getvalue()
            sys.stdout.close()
            sys.stdout = old_stdout

        # The output needs to contain the value of n_iter_without_progress
        assert ("did not make any progress during the "
                "last -1 episodes. Finished." in out)
Beispiel #2
0
def test_early_exaggeration_used():
    # check that the ``early_exaggeration`` parameter has an effect
    random_state = check_random_state(0)
    n_components = 2
    methods = ['exact', 'barnes_hut']
    X = random_state.randn(25, n_components).astype(np.float32)
    for method in methods:
        tsne = TSNE(n_components=n_components,
                    perplexity=1,
                    learning_rate=100.0,
                    init="pca",
                    random_state=0,
                    method=method,
                    early_exaggeration=1.0,
                    n_iter=250)
        X_embedded1 = tsne.fit_transform(X)
        tsne = TSNE(n_components=n_components,
                    perplexity=1,
                    learning_rate=100.0,
                    init="pca",
                    random_state=0,
                    method=method,
                    early_exaggeration=10.0,
                    n_iter=250)
        X_embedded2 = tsne.fit_transform(X)

        assert not np.allclose(X_embedded1, X_embedded2)
Beispiel #3
0
def test_accessible_kl_divergence():
    # Ensures that the accessible kl_divergence matches the computed value
    random_state = check_random_state(0)
    X = random_state.randn(50, 2)
    tsne = TSNE(n_iter_without_progress=2,
                verbose=2,
                random_state=0,
                method='exact',
                n_iter=500)

    old_stdout = sys.stdout
    sys.stdout = StringIO()
    try:
        tsne.fit_transform(X)
    finally:
        out = sys.stdout.getvalue()
        sys.stdout.close()
        sys.stdout = old_stdout

    # The output needs to contain the accessible kl_divergence as the error at
    # the last iteration
    for line in out.split('\n')[::-1]:
        if 'Iteration' in line:
            _, _, error = line.partition('error = ')
            if error:
                error, _, _ = error.partition(',')
                break
    assert_almost_equal(tsne.kl_divergence_, float(error), decimal=5)
Beispiel #4
0
def test_uniform_grid(method):
    """Make sure that TSNE can approximately recover a uniform 2D grid

    Due to ties in distances between point in X_2d_grid, this test is platform
    dependent for ``method='barnes_hut'`` due to numerical imprecision.

    Also, t-SNE is not assured to converge to the right solution because bad
    initialization can lead to convergence to bad local minimum (the
    optimization problem is non-convex). To avoid breaking the test too often,
    we re-run t-SNE from the final point when the convergence is not good
    enough.
    """
    seeds = [0, 1, 2]
    n_iter = 500
    for seed in seeds:
        tsne = TSNE(n_components=2,
                    init='random',
                    random_state=seed,
                    perplexity=20,
                    n_iter=n_iter,
                    method=method)
        Y = tsne.fit_transform(X_2d_grid)

        try_name = "{}_{}".format(method, seed)
        try:
            assert_uniform_grid(Y, try_name)
        except AssertionError:
            # If the test fails a first time, re-run with init=Y to see if
            # this was caused by a bad initialization. Note that this will
            # also run an early_exaggeration step.
            try_name += ":rerun"
            tsne.init = Y
            Y = tsne.fit_transform(X_2d_grid)
            assert_uniform_grid(Y, try_name)
Beispiel #5
0
def test_pca_initialization_not_compatible_with_precomputed_kernel():
    # Precomputed distance matrices must be square matrices.
    tsne = TSNE(metric="precomputed", init="pca")
    with pytest.raises(ValueError,
                       match="The parameter init=\"pca\" cannot"
                       " be used with"
                       " metric=\"precomputed\"."):
        tsne.fit_transform(np.array([[0.0], [1.0]]))
Beispiel #6
0
def test_high_perplexity_precomputed_sparse_distances():
    # Perplexity should be less than 50
    dist = np.array([[1., 0., 0.], [0., 1., 0.], [1., 0., 0.]])
    bad_dist = sp.csr_matrix(dist)
    tsne = TSNE(metric="precomputed")
    msg = "3 neighbors per samples are required, but some samples have only 1"
    with pytest.raises(ValueError, match=msg):
        tsne.fit_transform(bad_dist)
Beispiel #7
0
def test_angle_out_of_range_checks():
    # check the angle parameter range
    for angle in [-1, -1e-6, 1 + 1e-6, 2]:
        tsne = TSNE(angle=angle)
        with pytest.raises(ValueError,
                           match="'angle' must be between "
                           "0.0 - 1.0"):
            tsne.fit_transform(np.array([[0.0], [1.0]]))
Beispiel #8
0
def test_distance_not_available():
    # 'metric' must be valid.
    tsne = TSNE(metric="not available", method='exact')
    with pytest.raises(ValueError, match="Unknown metric not available.*"):
        tsne.fit_transform(np.array([[0.0], [1.0]]))

    tsne = TSNE(metric="not available", method='barnes_hut')
    with pytest.raises(ValueError, match="Metric 'not available' not valid.*"):
        tsne.fit_transform(np.array([[0.0], [1.0]]))
Beispiel #9
0
def test_non_positive_computed_distances():
    # Computed distance matrices must be positive.
    def metric(x, y):
        return -1

    tsne = TSNE(metric=metric, method='exact')
    X = np.array([[0.0, 0.0], [1.0, 1.0]])
    with pytest.raises(ValueError, match="All distances .*metric given.*"):
        tsne.fit_transform(X)
Beispiel #10
0
def test_optimization_minimizes_kl_divergence():
    """t-SNE should give a lower KL divergence with more iterations."""
    random_state = check_random_state(0)
    X, _ = make_blobs(n_features=3, random_state=random_state)
    kl_divergences = []
    for n_iter in [250, 300, 350]:
        tsne = TSNE(n_components=2,
                    perplexity=10,
                    learning_rate=100.0,
                    n_iter=n_iter,
                    random_state=0)
        tsne.fit_transform(X)
        kl_divergences.append(tsne.kl_divergence_)
    assert kl_divergences[1] <= kl_divergences[0]
    assert kl_divergences[2] <= kl_divergences[1]
Beispiel #11
0
def test_kl_divergence_not_nan(method):
    # Ensure kl_divergence_ is computed at last iteration
    # even though n_iter % n_iter_check != 0, i.e. 1003 % 50 != 0
    random_state = check_random_state(0)

    X = random_state.randn(50, 2)
    tsne = TSNE(n_components=2,
                perplexity=2,
                learning_rate=100.0,
                random_state=0,
                method=method,
                verbose=0,
                n_iter=503)
    tsne.fit_transform(X)

    assert not np.isnan(tsne.kl_divergence_)
Beispiel #12
0
def test_n_iter_used():
    # check that the ``n_iter`` parameter has an effect
    random_state = check_random_state(0)
    n_components = 2
    methods = ['exact', 'barnes_hut']
    X = random_state.randn(25, n_components).astype(np.float32)
    for method in methods:
        for n_iter in [251, 500]:
            tsne = TSNE(n_components=n_components,
                        perplexity=1,
                        learning_rate=0.5,
                        init="random",
                        random_state=0,
                        method=method,
                        early_exaggeration=1.0,
                        n_iter=n_iter)
            tsne.fit_transform(X)

            assert tsne.n_iter_ == n_iter - 1
Beispiel #13
0
def test_sparse_precomputed_distance():
    """Make sure that TSNE works identically for sparse and dense matrix"""
    random_state = check_random_state(0)
    X = random_state.randn(100, 2)

    D_sparse = kneighbors_graph(X,
                                n_neighbors=100,
                                mode='distance',
                                include_self=True)
    D = pairwise_distances(X)
    assert sp.issparse(D_sparse)
    assert_almost_equal(D_sparse.A, D)

    tsne = TSNE(metric="precomputed", random_state=0)
    Xt_dense = tsne.fit_transform(D)

    for fmt in ['csr', 'lil']:
        Xt_sparse = tsne.fit_transform(D_sparse.asformat(fmt))
        assert_almost_equal(Xt_dense, Xt_sparse)
Beispiel #14
0
def test_min_grad_norm():
    # Make sure that the parameter min_grad_norm is used correctly
    random_state = check_random_state(0)
    X = random_state.randn(100, 2)
    min_grad_norm = 0.002
    tsne = TSNE(min_grad_norm=min_grad_norm,
                verbose=2,
                random_state=0,
                method='exact')

    old_stdout = sys.stdout
    sys.stdout = StringIO()
    try:
        tsne.fit_transform(X)
    finally:
        out = sys.stdout.getvalue()
        sys.stdout.close()
        sys.stdout = old_stdout

    lines_out = out.split('\n')

    # extract the gradient norm from the verbose output
    gradient_norm_values = []
    for line in lines_out:
        # When the computation is Finished just an old gradient norm value
        # is repeated that we do not need to store
        if 'Finished' in line:
            break

        start_grad_norm = line.find('gradient norm')
        if start_grad_norm >= 0:
            line = line[start_grad_norm:]
            line = line.replace('gradient norm = ', '').split(' ')[0]
            gradient_norm_values.append(float(line))

    # Compute how often the gradient norm is smaller than min_grad_norm
    gradient_norm_values = np.array(gradient_norm_values)
    n_smaller_gradient_norms = \
        len(gradient_norm_values[gradient_norm_values <= min_grad_norm])

    # The gradient norm can be smaller than min_grad_norm at most once,
    # because in the moment it becomes smaller the optimization stops
    assert n_smaller_gradient_norms <= 1
Beispiel #15
0
def test_verbose():
    # Verbose options write to stdout.
    random_state = check_random_state(0)
    tsne = TSNE(verbose=2)
    X = random_state.randn(5, 2)

    old_stdout = sys.stdout
    sys.stdout = StringIO()
    try:
        tsne.fit_transform(X)
    finally:
        out = sys.stdout.getvalue()
        sys.stdout.close()
        sys.stdout = old_stdout

    assert ("[t-SNE]" in out)
    assert ("nearest neighbors..." in out)
    assert ("Computed conditional probabilities" in out)
    assert ("Mean sigma" in out)
    assert ("early exaggeration" in out)
Beispiel #16
0
def test_preserve_trustworthiness_approximately(method, init):
    # Nearest neighbors should be preserved approximately.
    random_state = check_random_state(0)
    n_components = 2
    X = random_state.randn(50, n_components).astype(np.float32)
    tsne = TSNE(n_components=n_components,
                init=init,
                random_state=0,
                method=method,
                n_iter=700)
    X_embedded = tsne.fit_transform(X)
    t = trustworthiness(X, X_embedded, n_neighbors=1)
    assert t > 0.85
Beispiel #17
0
def test_fit_csr_matrix(method):
    # X can be a sparse matrix.
    rng = check_random_state(0)
    X = rng.randn(50, 2)
    X[(rng.randint(0, 50, 25), rng.randint(0, 2, 25))] = 0.0
    X_csr = sp.csr_matrix(X)
    tsne = TSNE(n_components=2,
                perplexity=10,
                learning_rate=100.0,
                random_state=0,
                method=method,
                n_iter=750)
    X_embedded = tsne.fit_transform(X_csr)
    assert_allclose(trustworthiness(X_csr, X_embedded, n_neighbors=1),
                    1.0,
                    rtol=1.1e-1)
Beispiel #18
0
def test_preserve_trustworthiness_approximately_with_precomputed_distances():
    # Nearest neighbors should be preserved approximately.
    random_state = check_random_state(0)
    for i in range(3):
        X = random_state.randn(80, 2)
        D = squareform(pdist(X), "sqeuclidean")
        tsne = TSNE(n_components=2,
                    perplexity=2,
                    learning_rate=100.0,
                    early_exaggeration=2.0,
                    metric="precomputed",
                    random_state=i,
                    verbose=0,
                    n_iter=500)
        X_embedded = tsne.fit_transform(D)
        t = trustworthiness(D, X_embedded, n_neighbors=1, metric="precomputed")
        assert t > .95
Beispiel #19
0
def test_64bit(method, dt):
    # Ensure 64bit arrays are handled correctly.
    random_state = check_random_state(0)

    X = random_state.randn(10, 2).astype(dt, copy=False)
    tsne = TSNE(n_components=2,
                perplexity=2,
                learning_rate=100.0,
                random_state=0,
                method=method,
                verbose=0,
                n_iter=300)
    X_embedded = tsne.fit_transform(X)
    effective_type = X_embedded.dtype

    # tsne cython code is only single precision, so the output will
    # always be single precision, irrespectively of the input dtype
    assert effective_type == np.float32
Beispiel #20
0
def test_tsne():
    # Test chaining KNeighborsTransformer and TSNE
    n_iter = 250
    perplexity = 5
    n_neighbors = int(3. * perplexity + 1)

    rng = np.random.RandomState(0)
    X = rng.randn(20, 2)

    for metric in ['minkowski', 'sqeuclidean']:

        # compare the chained version and the compact version
        est_chain = make_pipeline(
            KNeighborsTransformer(n_neighbors=n_neighbors, mode='distance',
                                  metric=metric),
            TSNE(metric='precomputed', perplexity=perplexity,
                 method="barnes_hut", random_state=42, n_iter=n_iter))
        est_compact = TSNE(metric=metric, perplexity=perplexity, n_iter=n_iter,
                           method="barnes_hut", random_state=42)

        Xt_chain = est_chain.fit_transform(X)
        Xt_compact = est_compact.fit_transform(X)
        assert_array_almost_equal(Xt_chain, Xt_compact)
Beispiel #21
0
def test_bh_match_exact():
    # check that the ``barnes_hut`` method match the exact one when
    # ``angle = 0`` and ``perplexity > n_samples / 3``
    random_state = check_random_state(0)
    n_features = 10
    X = random_state.randn(30, n_features).astype(np.float32)
    X_embeddeds = {}
    n_iter = {}
    for method in ['exact', 'barnes_hut']:
        tsne = TSNE(n_components=2,
                    method=method,
                    learning_rate=1.0,
                    init="random",
                    random_state=0,
                    n_iter=251,
                    perplexity=30.0,
                    angle=0)
        # Kill the early_exaggeration
        tsne._EXPLORATION_N_ITER = 0
        X_embeddeds[method] = tsne.fit_transform(X)
        n_iter[method] = tsne.n_iter_

    assert n_iter['exact'] == n_iter['barnes_hut']
    assert_allclose(X_embeddeds['exact'], X_embeddeds['barnes_hut'], rtol=1e-4)
Beispiel #22
0
def test_early_exaggeration_too_small():
    # Early exaggeration factor must be >= 1.
    tsne = TSNE(early_exaggeration=0.99)
    with pytest.raises(ValueError, match="early_exaggeration .*"):
        tsne.fit_transform(np.array([[0.0], [0.0]]))
Beispiel #23
0
def test_too_few_iterations():
    # Number of gradient descent iterations must be at least 200.
    tsne = TSNE(n_iter=199)
    with pytest.raises(ValueError, match="n_iter .*"):
        tsne.fit_transform(np.array([[0.0], [0.0]]))
Beispiel #24
0
def test_bad_precomputed_distances(method, D, retype, message_regex):
    tsne = TSNE(metric="precomputed", method=method)
    with pytest.raises(ValueError, match=message_regex):
        tsne.fit_transform(retype(D))
Beispiel #25
0
def test_chebyshev_metric():
    # t-SNE should allow metrics that cannot be squared (issue #3526).
    random_state = check_random_state(0)
    tsne = TSNE(metric="chebyshev")
    X = random_state.randn(5, 2)
    tsne.fit_transform(X)
Beispiel #26
0
def test_exact_no_precomputed_sparse():
    tsne = TSNE(metric='precomputed', method='exact')
    with pytest.raises(TypeError, match='sparse'):
        tsne.fit_transform(sp.csr_matrix([[0, 5], [5, 0]]))
Beispiel #27
0
def test_init_not_available():
    # 'init' must be 'pca', 'random', or numpy array.
    tsne = TSNE(init="not available")
    m = "'init' must be 'pca', 'random', or a numpy array"
    with pytest.raises(ValueError, match=m):
        tsne.fit_transform(np.array([[0.0], [1.0]]))
Beispiel #28
0
def test_init_ndarray():
    # Initialize TSNE with ndarray and test fit
    tsne = TSNE(init=np.zeros((100, 2)))
    X_embedded = tsne.fit_transform(np.ones((100, 5)))
    assert_array_equal(np.zeros((100, 2)), X_embedded)
Beispiel #29
0
def test_n_components_range():
    # barnes_hut method should only be used with n_components <= 3
    tsne = TSNE(n_components=4, method="barnes_hut")
    with pytest.raises(ValueError, match="'n_components' should be .*"):
        tsne.fit_transform(np.array([[0.0], [1.0]]))
Beispiel #30
0
def test_method_not_available():
    # 'nethod' must be 'barnes_hut' or 'exact'
    tsne = TSNE(method='not available')
    with pytest.raises(ValueError, match="'method' must be 'barnes_hut' or "):
        tsne.fit_transform(np.array([[0.0], [1.0]]))