def check_uniform_grid(method, seeds=[0, 1, 2], n_iter=1000):
    """Make sure that TSNE can approximately recover a uniform 2D grid

    Due to ties in distances between point in X_2d_grid, this test is platform
    dependent for ``method='barnes_hut'`` due to numerical imprecision.

    Also, t-SNE is not assured to converge to the right solution because bad
    initialization can lead to convergence to bad local minimum (the
    optimization problem is non-convex). To avoid breaking the test too often,
    we re-run t-SNE from the final point when the convergence is not good
    enough.
    """
    for seed in seeds:
        tsne = TSNE(n_components=2, init='random', random_state=seed,
                    perplexity=20, n_iter=n_iter, method=method)
        Y = tsne.fit_transform(X_2d_grid)

        try_name = "{}_{}".format(method, seed)
        try:
            assert_uniform_grid(Y, try_name)
        except AssertionError:
            # If the test fails a first time, re-run with init=Y to see if
            # this was caused by a bad initialization. Note that this will
            # also run an early_exaggeration step.
            try_name += ":rerun"
            tsne.init = Y
            Y = tsne.fit_transform(X_2d_grid)
            assert_uniform_grid(Y, try_name)
def test_accessible_kl_divergence():
    # Ensures that the accessible kl_divergence matches the computed value
    random_state = check_random_state(0)
    X = random_state.randn(100, 2)
    tsne = TSNE(n_iter_without_progress=2, verbose=2,
                random_state=0, method='exact')

    old_stdout = sys.stdout
    sys.stdout = StringIO()
    try:
        tsne.fit_transform(X)
    finally:
        out = sys.stdout.getvalue()
        sys.stdout.close()
        sys.stdout = old_stdout

    # The output needs to contain the accessible kl_divergence as the error at
    # the last iteration
    for line in out.split('\n')[::-1]:
        if 'Iteration' in line:
            _, _, error = line.partition('error = ')
            if error:
                error, _, _ = error.partition(',')
                break
    assert_almost_equal(tsne.kl_divergence_, float(error), decimal=5)
def test_64bit():
    # Ensure 64bit arrays are handled correctly.
    random_state = check_random_state(0)
    methods = ["barnes_hut", "exact"]
    for method in methods:
        for dt in [np.float32, np.float64]:
            X = random_state.randn(100, 2).astype(dt)
            tsne = TSNE(n_components=2, perplexity=2, learning_rate=100.0, random_state=0, method=method)
            tsne.fit_transform(X)
def test_optimization_minimizes_kl_divergence():
    """t-SNE should give a lower KL divergence with more iterations."""
    random_state = check_random_state(0)
    X, _ = make_blobs(n_features=3, random_state=random_state)
    kl_divergences = []
    for n_iter in [200, 250, 300]:
        tsne = TSNE(n_components=2, perplexity=10, learning_rate=100.0, n_iter=n_iter, random_state=0)
        tsne.fit_transform(X)
        kl_divergences.append(tsne.kl_divergence_)
    assert_less_equal(kl_divergences[1], kl_divergences[0])
    assert_less_equal(kl_divergences[2], kl_divergences[1])
Beispiel #5
0
def test_kl_divergence_not_nan(method):
    # Ensure kl_divergence_ is computed at last iteration
    # even though n_iter % n_iter_check != 0, i.e. 1003 % 50 != 0
    random_state = check_random_state(0)

    X = random_state.randn(50, 2)
    tsne = TSNE(n_components=2, perplexity=2, learning_rate=100.0,
                random_state=0, method=method, verbose=0, n_iter=1003)
    tsne.fit_transform(X)

    assert not np.isnan(tsne.kl_divergence_)
def test_n_iter_used():
    # check that the ``n_iter`` parameter has an effect
    random_state = check_random_state(0)
    n_components = 2
    methods = ['exact', 'barnes_hut']
    X = random_state.randn(25, n_components).astype(np.float32)
    for method in methods:
        for n_iter in [251, 500]:
            tsne = TSNE(n_components=n_components, perplexity=1,
                        learning_rate=0.5, init="random", random_state=0,
                        method=method, early_exaggeration=1.0, n_iter=n_iter)
            tsne.fit_transform(X)

            assert tsne.n_iter_ == n_iter - 1
Beispiel #7
0
def test_reduction_to_one_component():
    """t-SNE should allow reduction to one component (issue #4154)."""
    random_state = check_random_state(0)
    tsne = TSNE(n_components=1)
    X = random_state.randn(5, 2)
    X_embedded = tsne.fit_transform(X)
    assert(np.all(np.isfinite(X_embedded)))
def test_preserve_trustworthiness_approximately_with_precomputed_distances():
    # Nearest neighbors should be preserved approximately.
    random_state = check_random_state(0)
    X = random_state.randn(100, 2)
    D = squareform(pdist(X), "sqeuclidean")
    tsne = TSNE(n_components=2, perplexity=2, learning_rate=100.0, metric="precomputed", random_state=0, verbose=0)
    X_embedded = tsne.fit_transform(D)
    assert_almost_equal(trustworthiness(D, X_embedded, n_neighbors=1, precomputed=True), 1.0, decimal=1)
def test_early_exaggeration_used():
    # check that the ``early_exaggeration`` parameter has an effect
    random_state = check_random_state(0)
    n_components = 2
    methods = ['exact', 'barnes_hut']
    X = random_state.randn(25, n_components).astype(np.float32)
    for method in methods:
        tsne = TSNE(n_components=n_components, perplexity=1,
                    learning_rate=100.0, init="pca", random_state=0,
                    method=method, early_exaggeration=1.0)
        X_embedded1 = tsne.fit_transform(X)
        tsne = TSNE(n_components=n_components, perplexity=1,
                    learning_rate=100.0, init="pca", random_state=0,
                    method=method, early_exaggeration=10.0)
        X_embedded2 = tsne.fit_transform(X)

        assert not np.allclose(X_embedded1, X_embedded2)
def test_fit_csr_matrix():
    # X can be a sparse matrix.
    random_state = check_random_state(0)
    X = random_state.randn(100, 2)
    X[(np.random.randint(0, 100, 50), np.random.randint(0, 2, 50))] = 0.0
    X_csr = sp.csr_matrix(X)
    tsne = TSNE(n_components=2, perplexity=10, learning_rate=100.0, random_state=0, method="exact")
    X_embedded = tsne.fit_transform(X_csr)
    assert_almost_equal(trustworthiness(X_csr, X_embedded, n_neighbors=1), 1.0, decimal=1)
def test_n_iter_without_progress():
    # Use a dummy negative n_iter_without_progress and check output on stdout
    random_state = check_random_state(0)
    X = random_state.randn(100, 2)
    tsne = TSNE(n_iter_without_progress=-1, verbose=2,
                random_state=1, method='exact')

    old_stdout = sys.stdout
    sys.stdout = StringIO()
    try:
        tsne.fit_transform(X)
    finally:
        out = sys.stdout.getvalue()
        sys.stdout.close()
        sys.stdout = old_stdout

    # The output needs to contain the value of n_iter_without_progress
    assert_in("did not make any progress during the "
              "last -1 episodes. Finished.", out)
Beispiel #12
0
def test_n_iter_without_progress():
    # Make sure that the parameter n_iter_without_progress is used correctly
    random_state = check_random_state(0)
    X = random_state.randn(100, 2)
    tsne = TSNE(n_iter_without_progress=2, verbose=2,
                random_state=0, method='exact')

    old_stdout = sys.stdout
    sys.stdout = StringIO()
    try:
        tsne.fit_transform(X)
    finally:
        out = sys.stdout.getvalue()
        sys.stdout.close()
        sys.stdout = old_stdout

    # The output needs to contain the value of n_iter_without_progress
    assert_in("did not make any progress during the "
              "last 2 episodes. Finished.", out)
Beispiel #13
0
def test_preserve_trustworthiness_approximately():
    """Nearest neighbors should be preserved approximately."""
    random_state = check_random_state(0)
    X = random_state.randn(100, 2)
    for init in ('random', 'pca'):
        tsne = TSNE(n_components=2, perplexity=10, learning_rate=100.0,
                    init=init, random_state=0)
        X_embedded = tsne.fit_transform(X)
        assert_almost_equal(trustworthiness(X, X_embedded, n_neighbors=1), 1.0,
                            decimal=1)
def test_verbose():
    # Verbose options write to stdout.
    random_state = check_random_state(0)
    tsne = TSNE(verbose=2)
    X = random_state.randn(5, 2)

    old_stdout = sys.stdout
    sys.stdout = StringIO()
    try:
        tsne.fit_transform(X)
    finally:
        out = sys.stdout.getvalue()
        sys.stdout.close()
        sys.stdout = old_stdout

    assert("[t-SNE]" in out)
    assert("nearest neighbors..." in out)
    assert("Computed conditional probabilities" in out)
    assert("Mean sigma" in out)
    assert("early exaggeration" in out)
Beispiel #15
0
def test_verbose():
    # Verbose options write to stdout.
    random_state = check_random_state(0)
    tsne = TSNE(verbose=2)
    X = random_state.randn(5, 2)

    old_stdout = sys.stdout
    sys.stdout = StringIO()
    try:
        tsne.fit_transform(X)
    finally:
        out = sys.stdout.getvalue()
        sys.stdout.close()
        sys.stdout = old_stdout

    assert("[t-SNE]" in out)
    assert("nearest neighbors..." in out)
    assert("Computed conditional probabilities" in out)
    assert("Mean sigma" in out)
    assert("early exaggeration" in out)
Beispiel #16
0
def test_fit_csr_matrix():
    # X can be a sparse matrix.
    random_state = check_random_state(0)
    X = random_state.randn(100, 2)
    X[(np.random.randint(0, 100, 50), np.random.randint(0, 2, 50))] = 0.0
    X_csr = sp.csr_matrix(X)
    tsne = TSNE(n_components=2, perplexity=10, learning_rate=100.0,
                random_state=0, method='exact')
    X_embedded = tsne.fit_transform(X_csr)
    assert_almost_equal(trustworthiness(X_csr, X_embedded, n_neighbors=1), 1.0,
                        decimal=1)
Beispiel #17
0
def test_fit_csr_matrix(method):
    # X can be a sparse matrix.
    rng = check_random_state(0)
    X = rng.randn(50, 2)
    X[(rng.randint(0, 50, 25), rng.randint(0, 2, 25))] = 0.0
    X_csr = sp.csr_matrix(X)
    tsne = TSNE(n_components=2, perplexity=10, learning_rate=100.0,
                random_state=0, method=method, n_iter=750)
    X_embedded = tsne.fit_transform(X_csr)
    assert_allclose(trustworthiness(X_csr, X_embedded, n_neighbors=1),
                    1.0, rtol=1.1e-1)
Beispiel #18
0
def test_min_grad_norm():
    # Make sure that the parameter min_grad_norm is used correctly
    random_state = check_random_state(0)
    X = random_state.randn(100, 2)
    min_grad_norm = 0.002
    tsne = TSNE(min_grad_norm=min_grad_norm, verbose=2,
                random_state=0, method='exact')

    old_stdout = sys.stdout
    sys.stdout = StringIO()
    try:
        tsne.fit_transform(X)
    finally:
        out = sys.stdout.getvalue()
        sys.stdout.close()
        sys.stdout = old_stdout

    lines_out = out.split('\n')

    # extract the gradient norm from the verbose output
    gradient_norm_values = []
    for line in lines_out:
        # When the computation is Finished just an old gradient norm value
        # is repeated that we do not need to store
        if 'Finished' in line:
            break

        start_grad_norm = line.find('gradient norm')
        if start_grad_norm >= 0:
            line = line[start_grad_norm:]
            line = line.replace('gradient norm = ', '').split(' ')[0]
            gradient_norm_values.append(float(line))

    # Compute how often the gradient norm is smaller than min_grad_norm
    gradient_norm_values = np.array(gradient_norm_values)
    n_smaller_gradient_norms = \
        len(gradient_norm_values[gradient_norm_values <= min_grad_norm])

    # The gradient norm can be smaller than min_grad_norm at most once,
    # because in the moment it becomes smaller the optimization stops
    assert_less_equal(n_smaller_gradient_norms, 1)
Beispiel #19
0
def test_preserve_trustworthiness_approximately_with_precomputed_distances():
    # Nearest neighbors should be preserved approximately.
    random_state = check_random_state(0)
    for i in range(3):
        X = random_state.randn(100, 2)
        D = squareform(pdist(X), "sqeuclidean")
        tsne = TSNE(n_components=2, perplexity=2, learning_rate=100.0,
                    early_exaggeration=2.0, metric="precomputed",
                    random_state=i, verbose=0)
        X_embedded = tsne.fit_transform(D)
        t = trustworthiness(D, X_embedded, n_neighbors=1, metric="precomputed")
        assert t > .95
Beispiel #20
0
def test_verbose():
    random_state = check_random_state(0)
    tsne = TSNE(verbose=2)
    X = random_state.randn(5, 2)

    old_stdout = sys.stdout
    sys.stdout = StringIO()
    try:
        tsne.fit_transform(X)
    finally:
        out = sys.stdout.getvalue()
        sys.stdout.close()
        sys.stdout = old_stdout

    assert ("[t-SNE]" in out)
    assert ("Computing pairwise distances" in out)
    assert ("Computed conditional probabilities" in out)
    assert ("Mean sigma" in out)
    assert ("Finished" in out)
    assert ("early exaggeration" in out)
    assert ("Finished" in out)
def TSNE_Gist(name, csvfilename):
    idsT = imagesHandler.get_all_img_ids()
    ids = []
    for id in idsT:
        ids.append(str(id[0]))
    print ids
    gistVals = util.loadCSV(csvfilename)
    X = np.array(gistVals)
    model = TSNE(n_components=2, random_state=0)
    tsne_vals = model.fit_transform(X)
    tsneHandler.storeTsneValsWIds(name, tsne_vals, ids)
    return tsne_vals, ids
Beispiel #22
0
def test_verbose():
    random_state = check_random_state(0)
    tsne = TSNE(verbose=2)
    X = random_state.randn(5, 2)

    old_stdout = sys.stdout
    sys.stdout = StringIO()
    try:
        tsne.fit_transform(X)
    finally:
        out = sys.stdout.getvalue()
        sys.stdout.close()
        sys.stdout = old_stdout

    assert("[t-SNE]" in out)
    assert("Computing pairwise distances" in out)
    assert("Computed conditional probabilities" in out)
    assert("Mean sigma" in out)
    assert("Finished" in out)
    assert("early exaggeration" in out)
    assert("Finished" in out)
def TSNE_Gist(name, csvfilename):
    idsT = imagesHandler.get_all_img_ids()
    ids = []
    for id in idsT:
        ids.append(str(id[0]))
    print ids
    gistVals = util.loadCSV(csvfilename)
    X = np.array(gistVals)
    model = TSNE(n_components=2, random_state=0)
    tsne_vals = model.fit_transform(X)
    tsneHandler.storeTsneValsWIds(name, tsne_vals, ids)
    return tsne_vals, ids
Beispiel #24
0
def test_preserve_trustworthiness_approximately_with_precomputed_distances():
    # Nearest neighbors should be preserved approximately.
    random_state = check_random_state(0)
    for i in range(3):
        X = random_state.randn(100, 2)
        D = squareform(pdist(X), "sqeuclidean")
        tsne = TSNE(n_components=2, perplexity=2, learning_rate=100.0,
                    early_exaggeration=2.0, metric="precomputed",
                    random_state=i, verbose=0)
        X_embedded = tsne.fit_transform(D)
        t = trustworthiness(D, X_embedded, n_neighbors=1, metric="precomputed")
        assert t > .95
def test_preserve_trustworthiness_approximately():
    # Nearest neighbors should be preserved approximately.
    random_state = check_random_state(0)
    n_components = 2
    methods = ['exact', 'barnes_hut']
    X = random_state.randn(50, n_components).astype(np.float32)
    for init in ('random', 'pca'):
        for method in methods:
            tsne = TSNE(n_components=n_components, init=init, random_state=0,
                        method=method)
            X_embedded = tsne.fit_transform(X)
            t = trustworthiness(X, X_embedded, n_neighbors=1)
            assert_greater(t, 0.9)
Beispiel #26
0
def test_preserve_trustworthiness_approximately(method, init):
    # Nearest neighbors should be preserved approximately.
    random_state = check_random_state(0)
    n_components = 2
    X = random_state.randn(50, n_components).astype(np.float32)
    tsne = TSNE(n_components=n_components,
                init=init,
                random_state=0,
                method=method,
                n_iter=700)
    X_embedded = tsne.fit_transform(X)
    t = trustworthiness(X, X_embedded, n_neighbors=1)
    assert t > 0.85
Beispiel #27
0
def test_n_iter_without_progress():
    # Make sure that the parameter n_iter_without_progress is used correctly
    random_state = check_random_state(0)
    X = random_state.randn(100, 2)
    tsne = TSNE(n_iter_without_progress=2,
                verbose=2,
                random_state=0,
                method='exact')

    old_stdout = sys.stdout
    sys.stdout = StringIO()
    try:
        tsne.fit_transform(X)
    finally:
        out = sys.stdout.getvalue()
        sys.stdout.close()
        sys.stdout = old_stdout

    # The output needs to contain the value of n_iter_without_progress
    assert_in(
        "did not make any progress during the "
        "last 2 episodes. Finished.", out)
Beispiel #28
0
def test_n_iter_without_progress():
    # Use a dummy negative n_iter_without_progress and check output on stdout
    random_state = check_random_state(0)
    X = random_state.randn(100, 2)
    tsne = TSNE(n_iter_without_progress=-1,
                verbose=2,
                random_state=1,
                method='exact')

    old_stdout = sys.stdout
    sys.stdout = StringIO()
    try:
        tsne.fit_transform(X)
    finally:
        out = sys.stdout.getvalue()
        sys.stdout.close()
        sys.stdout = old_stdout

    # The output needs to contain the value of n_iter_without_progress
    assert_in(
        "did not make any progress during the "
        "last -1 episodes. Finished.", out)
Beispiel #29
0
def test_n_iter_without_progress():
    # Use a dummy negative n_iter_without_progress and check output on stdout
    random_state = check_random_state(0)
    X = random_state.randn(100, 10)
    for method in ["barnes_hut", "exact"]:
        tsne = TSNE(n_iter_without_progress=-1, verbose=2, learning_rate=1e8,
                    random_state=0, method=method, n_iter=351, init="random")
        tsne._N_ITER_CHECK = 1
        tsne._EXPLORATION_N_ITER = 0

        old_stdout = sys.stdout
        sys.stdout = StringIO()
        try:
            tsne.fit_transform(X)
        finally:
            out = sys.stdout.getvalue()
            sys.stdout.close()
            sys.stdout = old_stdout

        # The output needs to contain the value of n_iter_without_progress
        assert_in("did not make any progress during the "
                  "last -1 episodes. Finished.", out)
Beispiel #30
0
def test_64bit(method, dt):
    # Ensure 64bit arrays are handled correctly.
    random_state = check_random_state(0)

    X = random_state.randn(50, 2).astype(dt)
    tsne = TSNE(n_components=2, perplexity=2, learning_rate=100.0,
                random_state=0, method=method, verbose=0)
    X_embedded = tsne.fit_transform(X)
    effective_type = X_embedded.dtype

    # tsne cython code is only single precision, so the output will
    # always be single precision, irrespectively of the input dtype
    assert effective_type == np.float32
Beispiel #31
0
def show_tsne(X, labels):
    from sklearn.manifold.t_sne import TSNE
    tsne = TSNE(n_components=2)
    tsne_X = tsne.fit_transform(X)

    tsne_X = (tsne_X - tsne_X.min()) / (tsne_X.max() - tsne_X.min())

    for i in range(tsne_X.shape[0]):
        plt.scatter(
            tsne_X[i, 0],
            tsne_X[i, 1],
            color=plt.cm.Set1(labels[i]),
        )
    plt.show()
Beispiel #32
0
def test_preserve_trustworthiness_approximately():
    # Nearest neighbors should be preserved approximately.
    random_state = check_random_state(0)
    X = random_state.randn(100, 2)
    for init in ('random', 'pca'):
        tsne = TSNE(n_components=2,
                    perplexity=10,
                    learning_rate=100.0,
                    init=init,
                    random_state=0)
        X_embedded = tsne.fit_transform(X)
        assert_almost_equal(trustworthiness(X, X_embedded, n_neighbors=1),
                            1.0,
                            decimal=1)
Beispiel #33
0
def test_preserve_trustworthiness_approximately():
    # Nearest neighbors should be preserved approximately.
    random_state = check_random_state(0)
    n_components = 2
    methods = ['exact', 'barnes_hut']
    X = random_state.randn(50, n_components).astype(np.float32)
    for init in ('random', 'pca'):
        for method in methods:
            tsne = TSNE(n_components=n_components, init=init, random_state=0,
                        method=method)
            X_embedded = tsne.fit_transform(X)
            t = trustworthiness(X, X_embedded, n_neighbors=1)
            assert_greater(t, 0.85, msg='Trustworthiness={:0.3f} < 0.85 '
                                        'for method={} and '
                                        'init={}'.format(t, method, init))
def TSNE_sift(name):
    conn = sqlite3.connect(dirm.sqlite_file)
    c = conn.cursor()
    dist = sift_cb_handler.get_distributions()
    X_Ids = []
    X_data = []
    for d in dist:
        x_id = d[0]
        x_data = d[1:]
        X_Ids.append(x_id)
        X_data.append(x_data)
    X_data = np.array(X_data)
    model = TSNE(n_components=2)
    tsne_x = model.fit_transform(X_data)
    tsneHandler.storeTsneValsWIds(name, tsne_x, X_Ids)
    return tsne_x, X_Ids
def TSNE_sift(name):
    conn = sqlite3.connect(dirm.sqlite_file)
    c = conn.cursor()
    dist = sift_cb_handler.get_distributions()
    X_Ids = []
    X_data = []
    for d in dist:
        x_id = d[0]
        x_data = d[1:]
        X_Ids.append(x_id)
        X_data.append(x_data)
    X_data = np.array(X_data)
    model = TSNE(n_components=2)
    tsne_x = model.fit_transform(X_data)
    tsneHandler.storeTsneValsWIds(name, tsne_x, X_Ids)
    return tsne_x, X_Ids
def TSNE_General(tablename):
    conn = sqlite3.connect(dirm.sqlite_file)
    c = conn.cursor()
    cmd = 'SELECT * FROM {tn}'.format(tn=tablename)
    c.execute(cmd)
    all_rows = c.fetchall()
    ids = []
    data = []
    for row in all_rows:
        ids.append(str(row[0]))
        data.append(row[1:])
    X = np.array(data)
    model = TSNE(n_components=2, random_state=0)
    tsne_vals = model.fit_transform(X)
    tsneHandler.storeTsneValsWIds(tablename, tsne_vals, ids)
    return tsne_vals, ids
def save_tsne_plot(latent_space, path):
    import matplotlib.pyplot as plt
    from mpl_toolkits.mplot3d import Axes3D
    from matplotlib import cm
    # from MulticoreTSNE import MulticoreTSNE as TSNE
    from sklearn.manifold.t_sne import TSNE

    tsne = TSNE(3)
    reduced = tsne.fit_transform(latent_space)
    fig = plt.figure(figsize=(5, 5))
    # ax = fig.add_subplot(111, projection='3d')
    ax = Axes3D(fig)
    ax.scatter(reduced[:, 0], reduced[:, 1], reduced[:, 2])
    ax.view_init(30, 45)
    plt.savefig(os.path.join(path, 'TSNE.png'), bbox_inches='tight')
    plt.close()
def TSNE_General(tablename):
    conn = sqlite3.connect(dirm.sqlite_file)
    c = conn.cursor()
    cmd = "SELECT * FROM {tn}".format(tn=tablename)
    c.execute(cmd)
    all_rows = c.fetchall()
    ids = []
    data = []
    for row in all_rows:
        ids.append(str(row[0]))
        data.append(row[1:])
    X = np.array(data)
    model = TSNE(n_components=2, random_state=0)
    tsne_vals = model.fit_transform(X)
    tsneHandler.storeTsneValsWIds(tablename, tsne_vals, ids)
    return tsne_vals, ids
Beispiel #39
0
def test_preserve_trustworthiness_approximately_with_precomputed_distances():
    # Nearest neighbors should be preserved approximately.
    random_state = check_random_state(0)
    X = random_state.randn(100, 2)
    D = squareform(pdist(X), "sqeuclidean")
    tsne = TSNE(n_components=2,
                perplexity=10,
                learning_rate=100.0,
                metric="precomputed",
                random_state=0)
    X_embedded = tsne.fit_transform(D)
    assert_almost_equal(trustworthiness(D,
                                        X_embedded,
                                        n_neighbors=1,
                                        precomputed=True),
                        1.0,
                        decimal=1)
Beispiel #40
0
def save_embed():
    C = np.zeros((6000, PARAMS["M"]*PARAMS["N"]))
    for i in range(0, 6000, 100):
        logits_y, _ = model(x_test[i: i+100], tau=0.5, hard=HARD)
        code = tf.reshape(tf.reshape(logits_y, [-1, 30, 10]), [-1,300])
        # code = tf.reshape(model.sample_y, (100, PARAMS["M"]*PARAMS["N"]))  # (100, 300)
        C[i: i+100] = code

    from sklearn.manifold.t_sne import TSNE
    tsne = TSNE()
    viz = tsne.fit_transform(C)

    color = ['aliceblue', 'cyan', 'darkorange', 'fuchsia', 'lightpink', 
             'pink', 'springgreen', 'yellow', 'orange', 'mediumturquoise']
    for i in range(0, 6000):
        plt.scatter(viz[i, 0], viz[i, 1], c=color[y_test[i]])
    plt.savefig('vae-pic/vae_embed.png')
def test_preserve_trustworthiness_approximately():
    # Nearest neighbors should be preserved approximately.
    random_state = check_random_state(0)
    # The Barnes-Hut approximation uses a different method to estimate
    # P_ij using only a number of nearest neighbors instead of all
    # points (so that k = 3 * perplexity). As a result we set the
    # perplexity=5, so that the number of neighbors is 5%.
    n_components = 2
    methods = ['exact', 'barnes_hut']
    X = random_state.randn(100, n_components).astype(np.float32)
    for init in ('random', 'pca'):
        for method in methods:
            tsne = TSNE(n_components=n_components, perplexity=50,
                        learning_rate=100.0, init=init, random_state=0,
                        method=method)
            X_embedded = tsne.fit_transform(X)
            T = trustworthiness(X, X_embedded, n_neighbors=1)
            assert_almost_equal(T, 1.0, decimal=1)
Beispiel #42
0
def test_preserve_trustworthiness_approximately():
    # Nearest neighbors should be preserved approximately.
    random_state = check_random_state(0)
    # The Barnes-Hut approximation uses a different method to estimate
    # P_ij using only a a number of nearest neighbors instead of all
    # points (so that k = 3 * perplexity). As a result we set the
    # perplexity=5, so that the number of neighbors is 5%.
    n_components = 2
    methods = ['exact', 'barnes_hut']
    X = random_state.randn(100, n_components).astype(np.float32)
    for init in ('random', 'pca'):
        for method in methods:
            tsne = TSNE(n_components=n_components, perplexity=50,
                        learning_rate=100.0, init=init, random_state=0,
                        method=method)
            X_embedded = tsne.fit_transform(X)
            T = trustworthiness(X, X_embedded, n_neighbors=1)
            assert_almost_equal(T, 1.0, decimal=1)
Beispiel #43
0
def test_bh_match_exact():
    # check that the ``barnes_hut`` method match the exact one when
    # ``angle = 0`` and ``perplexity > n_samples / 3``
    random_state = check_random_state(0)
    n_features = 10
    X = random_state.randn(30, n_features).astype(np.float32)
    X_embeddeds = {}
    n_iter = {}
    for method in ['exact', 'barnes_hut']:
        tsne = TSNE(n_components=2, method=method, learning_rate=1.0,
                    init="random", random_state=0, n_iter=251,
                    perplexity=30.0, angle=0)
        # Kill the early_exaggeration
        tsne._EXPLORATION_N_ITER = 0
        X_embeddeds[method] = tsne.fit_transform(X)
        n_iter[method] = tsne.n_iter_

    assert n_iter['exact'] == n_iter['barnes_hut']
    assert_array_almost_equal(X_embeddeds['exact'], X_embeddeds['barnes_hut'],
                              decimal=3)
Beispiel #44
0
def tsne_view(trainset, volume_manager):

    batch_scheduler = TractographyBatchScheduler(trainset,
                                                 batch_size=20000,
                                                 noisy_streamlines_sigma=False,
                                                 seed=1234,
                                                 normalize_target=True)
    rng = np.random.RandomState(42)
    rng.shuffle(batch_scheduler.indices)

    bundle_name_pattern = "CST_Left"
    # batch_inputs, batch_targets, batch_mask = batch_scheduler._prepare_batch(trainset.get_bundle(bundle_name_pattern, return_idx=True))
    inputs, targets, mask = batch_scheduler._next_batch(3)
    mask = mask.astype(bool)
    idx = np.arange(mask.sum())
    rng.shuffle(idx)

    coords = T.matrix('coords')
    eval_at_coords = theano.function([coords], volume_manager.eval_at_coords(coords))

    M = 2000 * len(trainset.subjects)
    coords = inputs[mask][idx[:M]]
    X = eval_at_coords(coords)

    from sklearn.manifold.t_sne import TSNE
    tsne = TSNE(n_components=2, verbose=2, random_state=42)
    Y = tsne.fit_transform(X)

    import matplotlib.pyplot as plt
    plt.figure()
    ids = range(len(trainset.subjects))
    markers = ['s', 'o', '^', 'v', '<', '>', 'h']
    colors = ['cyan', 'darkorange', 'darkgreen', 'magenta', 'pink', 'k']
    for i, marker, color in zip(ids, markers, colors):
        idx = coords[:, -1] == i
        print("Subject #{}: ".format(i), idx.sum())
        plt.scatter(Y[idx, 0], Y[idx, 1], 20, color=color, marker=marker, label="Subject #{}".format(i))

    plt.legend()
    plt.show()
Beispiel #45
0
def load_and_transform(con, cur, tag):
    # erase old data
    cur.execute("delete from computed_viz where id = ?",(tag,))
    con.commit()

    # load/transform data structure
    words, mtx, topics = load_data_structures(cur, tag)

    # compute
    matrix = mtx.toarray()

    # PCA
    pca = PCA(n_components=2)
    X_r = pca.fit(matrix).transform(matrix)
    save_transformation(cur, tag, "pca", X_r, topics)
    con.commit()

    # T-SNE
    t_sne = TSNE(n_components=2, random_state=0, verbose=1)
    X_r = t_sne.fit_transform(matrix)
    save_transformation(cur, tag, "tsne", X_r, topics)
    con.commit()
Beispiel #46
0
def plot_tsne(model_fp, output_fp, limit, mode):

    videos = np.load(VIDEOS_TEST_FP)
    labels = np.load(LABELS_TEST_FP)

    model = tor.load(model_fp)
    model.cuda()

    videos = normalize(videos / 255.)
    videos = select_data(videos, VIDEOS_MAX_BATCH)

    correct, total = 0, len(labels)

    features_rnn = []

    for i, (x, label) in enumerate(zip(videos, labels), 1):
        print("Process: {}/{}".format(i, total))
        x = tor.Tensor(x).permute(0, 3, 1, 2).cuda()
        if mode == "rnn":
            f = model.get_feature(x).cpu().data.numpy()
        else:
            f = model(x).cpu().data.numpy()
        features_rnn.append(f[0])

    features_rnn = np.array(features_rnn)
    ### tSNE
    tsne = TSNE(
        n_components=2,
        random_state=0,
    )
    f_tsne = tsne.fit_transform(features_rnn)

    for i in range(11):
        plt.scatter(f_tsne[labels == i, 0], f_tsne[labels == i, 1])

    plt.legend(["Label {}".format(i) for i in range(11)])

    fn = "tSNE_RNN.jpg" if mode == "rnn" else "tSNE_CNN.jpg"
    plt.savefig(os.path.join(output_fp, fn))
Beispiel #47
0
def load_and_transform(con, cur, tag):
    # erase old data
    cur.execute("delete from computed_viz where id = ?", (tag, ))
    con.commit()

    # load/transform data structure
    words, mtx, topics = load_data_structures(cur, tag)

    # compute
    matrix = mtx.toarray()

    # PCA
    pca = PCA(n_components=2)
    X_r = pca.fit(matrix).transform(matrix)
    save_transformation(cur, tag, "pca", X_r, topics)
    con.commit()

    # T-SNE
    t_sne = TSNE(n_components=2, random_state=0, verbose=1)
    X_r = t_sne.fit_transform(matrix)
    save_transformation(cur, tag, "tsne", X_r, topics)
    con.commit()
Beispiel #48
0
def check_uniform_grid(method, seeds=[0, 1, 2], n_iter=1000):
    """Make sure that TSNE can approximately recover a uniform 2D grid"""
    for seed in seeds:
        tsne = TSNE(n_components=2, init='random', random_state=seed,
                    perplexity=10, n_iter=n_iter, method=method)
        Y = tsne.fit_transform(X_2d_grid)

        # Ensure that the convergence criterion has been triggered
        assert tsne.n_iter_ < n_iter

        # Ensure that the resulting embedding leads to approximately
        # uniformly spaced points: the distance to the closest neighbors
        # should be non-zero and approximately constant.
        nn = NearestNeighbors(n_neighbors=1).fit(Y)
        dist_to_nn = nn.kneighbors(return_distance=True)[0].ravel()
        assert dist_to_nn.min() > 0.1

        smallest_to_mean = dist_to_nn.min() / np.mean(dist_to_nn)
        largest_to_mean = dist_to_nn.max() / np.mean(dist_to_nn)

        try_name = "{}_{}".format(method, seed)
        assert_greater(smallest_to_mean, .5, msg=try_name)
        assert_less(largest_to_mean, 2, msg=try_name)
def check_uniform_grid(method, seeds=[0, 1, 2], n_iter=1000):
    """Make sure that TSNE can approximately recover a uniform 2D grid"""
    for seed in seeds:
        tsne = TSNE(n_components=2, init='random', random_state=seed,
                    perplexity=10, n_iter=n_iter, method=method)
        Y = tsne.fit_transform(X_2d_grid)

        # Ensure that the convergence criterion has been triggered
        assert tsne.n_iter_ < n_iter

        # Ensure that the resulting embedding leads to approximately
        # uniformly spaced points: the distance to the closest neighbors
        # should be non-zero and approximately constant.
        nn = NearestNeighbors(n_neighbors=1).fit(Y)
        dist_to_nn = nn.kneighbors(return_distance=True)[0].ravel()
        assert dist_to_nn.min() > 0.1

        smallest_to_mean = dist_to_nn.min() / np.mean(dist_to_nn)
        largest_to_mean = dist_to_nn.max() / np.mean(dist_to_nn)

        try_name = "{}_{}".format(method, seed)
        assert_greater(smallest_to_mean, .5, msg=try_name)
        assert_less(largest_to_mean, 2, msg=try_name)
Beispiel #50
0
def tSNE(self, analysis_name: str, dataset_name: str, **kwargs):
    # preprocessing of our current strange format
    analysis_details = dataset_name, tSNE.__name__, analysis_name
    manifold = TSNE(**kwargs, verbose=True)

    with status_notifier(self) as notify, \
            open_analysis(*analysis_details) as tmp_path:
        notify('PRESERVING CONFIGURATION')
        config_path = os.path.join(tmp_path, 'options')
        dump_configuration(config_path, kwargs)
        notify('LOADING DATA')
        data = load_dataset(dataset_name)
        notify('RUNNING T-SNE')
        result = manifold.fit_transform(data.spectra)
        notify('PRESERVING RESULTS')
        model_path = os.path.join(tmp_path, 'model')
        joblib.dump(manifold, model_path + '.pkl')
        result_path = os.path.join(tmp_path, 'result')
        joblib.dump(result, result_path + '.pkl')
        np.savetxt(result_path + '.csv', result)
        normalized = data_utils.as_normalized(result, data.coordinates,
                                              data.labels)
        dataset_path = os.path.join(tmp_path, 'data.txt')
        data_utils.dumps_txt(dataset_path, normalized)
Beispiel #51
0
def test_chebyshev_metric():
    # t-SNE should allow metrics that cannot be squared (issue #3526).
    random_state = check_random_state(0)
    tsne = TSNE(metric="chebyshev")
    X = random_state.randn(5, 2)
    tsne.fit_transform(X)
                   sep=',',
                   header=0,
                   index_col=0)
anno = pd.read_excel('../data/GDSC/Screened_Compounds.xlsx')
anno.index = anno['DRUG_ID']
drug_ids = list(set(data.index) & set(anno.index))
data = data.loc[drug_ids, :]
anno = anno.loc[drug_ids, :]
pathways = list(set(anno['TARGET_PATHWAY']))
pathways.sort()
model = TSNE(n_components=2,
             random_state=19890904,
             method='exact',
             learning_rate=80,
             perplexity=20)
data_tsne = model.fit_transform(data)
data_tsne = pd.DataFrame(data_tsne, index=data.index, columns=range(2))
clustering = pd.DataFrame(index=data.index)
clustering['Silhouette score'] = silhouette_samples(
    data_tsne, anno.loc[data.index, 'TARGET_PATHWAY'])
clustering['TARGET_PATHWAY'] = anno.loc[data.index, 'TARGET_PATHWAY']
clustering = clustering.sort_values('Silhouette score', ascending=False)
clustering = clustering.drop_duplicates('TARGET_PATHWAY')
f, ax = plt.subplots(1, 1, dpi=600, figsize=(2, 2))
sns.set_palette('GnBu', 21)
for j in range(len(pathways)):
    pw = pathways[j]
    fil = anno.loc[:, 'TARGET_PATHWAY'] == pw
    indexes = anno.index[fil]
    plt.plot(data_tsne.loc[indexes, 0], data_tsne.loc[indexes, 1], 'o')
    fil = np.in1d(clustering.index, indexes)
filename = dirm.outputDirectory + 'surfsubresized400extended30distrwh.csv'
file = open(filename)

data = np.loadtxt(file, delimiter=',')
#X = data[:, 1:]
X = data
XAvr = []

for i in range(0, len(X)):
    sumofArray = sum(X[i])
    tempXi = X[i] / sumofArray
    XAvr.append(tempXi)

#normaliser = Normalizer().fit(X)
#X_norm = normaliser.transform(X)

#XAvrN = Normalizer().fit(XAvr).transform(XAvr)

#pca = PCA(n_components=50)
#pca.fit(X_norm)
#X_pca = pca.transform(X_norm)

#print X_pca

model = TSNE(n_components=2, random_state=0)
X_Tsne = model.fit_transform(XAvr)
#print len(X_Tsne)
#print len(X_Tsne[1])
#print X_Tsne
writetoCSV(X_Tsne, "surfsubresized400extended30tsneAvr")
Beispiel #54
0
def test_init_ndarray():
    # Initialize TSNE with ndarray and test fit
    tsne = TSNE(init=np.zeros((100, 2)))
    X_embedded = tsne.fit_transform(np.ones((100, 5)))
    assert_array_equal(np.zeros((100, 2)), X_embedded)