Esempio n. 1
0
def test_tsne_pickle(tmpdir, datatype, nrows, ncols):
    iris = load_iris()
    iris_selection = np.random.RandomState(42).choice([True, False],
                                                      150,
                                                      replace=True,
                                                      p=[0.75, 0.25])
    X = iris.data[iris_selection]

    model = cuml.manifold.TSNE(n_components=2, random_state=199)

    # Pickle the model
    model_pickle = pickle_save_load(tmpdir, model)
    model_params = model_pickle.__dict__
    if "handle" in model_params:
        del model_params["handle"]

    # Confirm params in model are identical
    new_keys = set(model_params.keys())
    for key, value in zip(model_params.keys(), model_params.values()):
        assert (model_params[key] == value)
        new_keys -= set([key])

    # Check all keys have been checked
    assert (len(new_keys) == 0)

    # Transform data
    model.fit(X)
    trust_before = trustworthiness(X, model.Y, 10)

    # Save model + embeddings
    model = pickle_save_load(tmpdir, model)
    trust_after = trustworthiness(X, model.Y.to_pandas(), 10)

    assert trust_before == trust_after
Esempio n. 2
0
def test_trustworthiness_not_euclidean_metric():
    # Test trustworthiness with a metric different from 'euclidean' and
    # 'precomputed'
    random_state = check_random_state(0)
    X = random_state.randn(100, 2)
    assert (trustworthiness(X, X, metric='cosine') == trustworthiness(
        pairwise_distances(X, metric='cosine'), X, metric='precomputed'))
Esempio n. 3
0
def test_umap_pickle(tmpdir, datatype, model, nrows, ncols):

    iris = load_iris()
    iris_selection = np.random.RandomState(42).choice([True, False],
                                                      150,
                                                      replace=True,
                                                      p=[0.75, 0.25])
    X_train = iris.data[iris_selection]

    cu_before_pickle_transform = model.fit_transform(X_train)

    cu_before_embed = model.arr_embed

    cu_trust_before = trustworthiness(X_train, cu_before_pickle_transform, 10)

    cu_after_pickle_model = pickle_save_load(tmpdir, model)

    cu_after_pickle_transform = cu_after_pickle_model.transform(X_train)

    cu_after_embed = model.arr_embed

    cu_trust_after = trustworthiness(X_train, cu_after_pickle_transform, 10)

    assert array_equal(cu_before_embed, cu_after_embed)
    assert cu_trust_after >= cu_trust_before - 0.2
Esempio n. 4
0
def test_umap_fit_transform_trust(name):

    if name == 'iris':
        iris = datasets.load_iris()
        data = iris.data
        labels = iris.target

    elif name == 'digits':
        digits = datasets.load_digits(n_class=5)
        data = digits.data
        labels = digits.target

    elif name == 'wine':
        wine = datasets.load_wine()
        data = wine.data
        labels = wine.target
    else:
        data, labels = make_blobs(n_samples=5000,
                                  n_features=10,
                                  centers=10,
                                  random_state=42)

    model = umap.UMAP(n_neighbors=10, min_dist=0.01)
    cuml_model = cuUMAP(n_neighbors=10, min_dist=0.01, verbose=False)
    embedding = model.fit_transform(data)
    cuml_embedding = cuml_model.fit_transform(data, convert_dtype=True)

    trust = trustworthiness(data, embedding, 10)
    cuml_trust = trustworthiness(data, cuml_embedding, 10)

    assert array_equal(trust, cuml_trust, 1e-1, with_sign=True)
Esempio n. 5
0
def test_tsne_knn_graph_used(name, type_knn_graph):

    datasets
    X = eval("datasets.load_{}".format(name))().data

    neigh = skKNN(n_neighbors=90) if type_knn_graph == 'sklearn' \
        else cuKNN(n_neighbors=90)

    neigh.fit(X)
    knn_graph = neigh.kneighbors_graph(X, mode="distance")
    tsne = TSNE()

    # Perform tsne with normal knn_graph
    Y = tsne.fit_transform(X, True, knn_graph)
    trust_normal = trustworthiness(X, Y)
    print("Trust = ", trust_normal)

    X_garbage = np.ones(X.shape)
    knn_graph_garbage = neigh.kneighbors_graph(X_garbage, mode="distance")

    # Perform tsne with garbage knn_graph
    Y = tsne.fit_transform(X, True, knn_graph_garbage)
    trust_garbage = trustworthiness(X, Y)
    print("Trust = ", trust_garbage)
    assert (trust_normal - trust_garbage) > 0.15

    Y = tsne.fit_transform(X, True, knn_graph_garbage.tocoo())
    trust_garbage = trustworthiness(X, Y)
    print("Trust = ", trust_garbage)
    assert (trust_normal - trust_garbage) > 0.15

    Y = tsne.fit_transform(X, True, knn_graph_garbage.tocsc())
    trust_garbage = trustworthiness(X, Y)
    print("Trust = ", trust_garbage)
    assert (trust_normal - trust_garbage) > 0.15
Esempio n. 6
0
def test_trustworthiness_not_euclidean_metric():
    # Test trustworthiness with a metric different from 'euclidean' and
    # 'precomputed'
    random_state = check_random_state(0)
    X = random_state.randn(100, 2)
    assert_equal(trustworthiness(X, X, metric='cosine'),
                 trustworthiness(pairwise_distances(X, metric='cosine'), X,
                                 metric='precomputed'))
Esempio n. 7
0
def test_tsne(name):
    """
    This tests how TSNE handles a lot of input data across time.
    (1) cuDF DataFrames are passed input
    (2) Numpy arrays are passed in
    (3) Params are changed in the TSNE class
    (4) The class gets re-used across time
    (5) Trustworthiness is checked
    (6) Tests NAN in TSNE output for learning rate explosions
    (7) Tests verbosity
    """
    datasets
    X = eval("datasets.load_{}".format(name))().data
    X_cudf = cudf.DataFrame.from_pandas(pd.DataFrame(X))

    for i in range(3):
        print("iteration = ", i)

        tsne = TSNE(2, random_state=i, verbose=0, learning_rate=2 + i)

        Y = tsne.fit_transform(X_cudf).to_pandas().values
        nans = np.sum(np.isnan(Y))
        trust = trustworthiness(X, Y)
        print("Trust = ", trust)
        assert trust > 0.76
        assert nans == 0
        del Y

        # Reuse
        Y = tsne.fit_transform(X)
        nans = np.sum(np.isnan(Y))
        trust = trustworthiness(X, Y)
        print("Trust = ", trust)
        assert trust > 0.76
        assert nans == 0
        del Y

        # Again
        tsne = TSNE(2, random_state=i + 2, verbose=1, learning_rate=2 + i + 2)

        Y = tsne.fit_transform(X_cudf).to_pandas().values
        nans = np.sum(np.isnan(Y))
        trust = trustworthiness(X, Y)
        print("Trust = ", trust)
        assert trust > 0.76
        assert nans == 0
        del Y

        # Reuse
        Y = tsne.fit_transform(X)
        nans = np.sum(np.isnan(Y))
        trust = trustworthiness(X, Y)
        print("Trust = ", trust)
        assert trust > 0.76
        assert nans == 0
        del Y
Esempio n. 8
0
def run_training(corruption_chance, perplexity, batch_size):

  global train_data, test_data
  corrupt = lambda x: 0 if np.random.uniform() <= corruption_chance else x
  train_data = np.vectorize(corrupt)(train_data)
  test_data = np.vectorize(corrupt)(test_data)

  def hook(args):
    print(args)
    if np.isnan(args[2]):
      raise Exception
    if isinstance(args[0], PTSNE) and args[2] <= 0.0:
      raise Exception

  vae = VAE(
    [n_input_dimensions],
    get_gaussian_network_builder(vae_encoder_layers, n_latent_dimensions),
    gaussian_prior_supplier,
    gaussian_supplier,
    get_bernoulli_network_builder(vae_decoder_layers, n_input_dimensions),
    bernoulli_supplier)

  ptsne = PTSNE(
    [n_input_dimensions],
    get_feed_forward_network_builder(vptsne_layers, batch_normalization=False),
    perplexity=perplexity)

  vptsne = VPTSNE(
    vae,
    get_feed_forward_network_builder(vptsne_layers, batch_normalization=False),
    perplexity=perplexity)

  ptsne.fit(train_data, n_iters=1500, batch_size=batch_size, hook_fn=hook)
  vptsne.fit(train_data, n_iters=1500, n_vae_iters=10000, batch_size=batch_size, vae_batch_size=1000, hook_fn=hook)

  knn_score = KNC(n_neighbors=1).fit(
    ptsne.transform(train_data), train_labels).score(
    ptsne.transform(test_data), test_labels)
  knn_score_vptsne = KNC(n_neighbors=1).fit(
    vptsne.transform(train_data), train_labels).score(
    vptsne.transform(test_data), test_labels)

  tw = trustworthiness(
    test_data,
    ptsne.transform(test_data),
    n_neighbors=12)
  tw_vptsne = trustworthiness(
    test_data,
    vptsne.transform(test_data),
    n_neighbors=12)

  train_data = np.copy(non_corrupted_train_data)
  test_data = np.copy(non_corrupted_test_data)

  return knn_score, tw, knn_score_vptsne, tw_vptsne
Esempio n. 9
0
def test_supervised_umap_trustworthiness_against_umap_learn():
    iris = datasets.load_iris()
    data = iris.data
    embedding = cuUMAP(n_neighbors=10, min_dist=0.01,
                       verbose=False).fit_transform(data, iris.target,
                                                    convert_dtype=True)

    skl_embedding = umap.UMAP(n_neighbors=10, min_dist=0.01,
                              verbose=False).fit_transform(data, iris.target)

    trust = trustworthiness(iris.data, embedding, 10)

    skl_trust = trustworthiness(iris.data, skl_embedding, 10)
    assert (skl_trust - 0.009) <= trust <= (skl_trust + 0.009)
Esempio n. 10
0
def test_tsne_knn_graph_used(dataset, type_knn_graph, method):

    X = dataset.data

    neigh = cuKNN(n_neighbors=DEFAULT_N_NEIGHBORS, metric="euclidean").fit(X)
    knn_graph = neigh.kneighbors_graph(X, mode="distance").astype('float32')

    if type_knn_graph == 'cuml':
        knn_graph = cupyx.scipy.sparse.csr_matrix(knn_graph)

    tsne = TSNE(random_state=1,
                n_neighbors=DEFAULT_N_NEIGHBORS,
                method=method,
                perplexity=DEFAULT_PERPLEXITY,
                learning_rate_method='none',
                min_grad_norm=1e-12)

    # Perform tsne with normal knn_graph
    Y = tsne.fit_transform(X, True, knn_graph)

    trust_normal = trustworthiness(X, Y, n_neighbors=DEFAULT_N_NEIGHBORS)

    X_garbage = np.ones(X.shape)
    knn_graph_garbage = neigh.kneighbors_graph(
        X_garbage, mode="distance").astype('float32')

    if type_knn_graph == 'cuml':
        knn_graph_garbage = cupyx.scipy.sparse.csr_matrix(knn_graph_garbage)

    tsne = TSNE(random_state=1,
                n_neighbors=DEFAULT_N_NEIGHBORS,
                method=method,
                perplexity=DEFAULT_PERPLEXITY,
                learning_rate_method='none',
                min_grad_norm=1e-12)

    # Perform tsne with garbage knn_graph
    Y = tsne.fit_transform(X, True, knn_graph_garbage)

    trust_garbage = trustworthiness(X, Y, n_neighbors=DEFAULT_N_NEIGHBORS)
    assert (trust_normal - trust_garbage) > 0.15

    Y = tsne.fit_transform(X, True, knn_graph_garbage)
    trust_garbage = trustworthiness(X, Y, n_neighbors=DEFAULT_N_NEIGHBORS)
    assert (trust_normal - trust_garbage) > 0.15

    Y = tsne.fit_transform(X, True, knn_graph_garbage)
    trust_garbage = trustworthiness(X, Y, n_neighbors=DEFAULT_N_NEIGHBORS)
    assert (trust_normal - trust_garbage) > 0.15
Esempio n. 11
0
def test_umap_trustworthiness_on_iris():
    data = iris.data
    embedding = UMAP(n_neighbors=10, min_dist=0.01, random_state=42).fit_transform(data)
    trust = trustworthiness(iris.data, embedding, 10)
    assert (
        trust >= 0.97
    ), "Insufficiently trustworthy embedding for iris dataset: {}".format(trust)
Esempio n. 12
0
def check_embedding(X, Y, score=0.76):
    """Compares TSNE embedding trustworthiness, NAN and verbosity"""
    nans = np.sum(np.isnan(Y))
    trust = trustworthiness(X, Y)
    print("Trust = ", trust)
    assert trust > score
    assert nans == 0
Esempio n. 13
0
def test_tsne_transform_on_digits_sparse(input_type):

    datasets
    digits = datasets.load_digits()

    digits_selection = np.random.RandomState(42).choice(
        [True, False], 1797, replace=True, p=[0.60, 0.40])

    if input_type == 'cupy':
        sp_prefix = cupyx.scipy.sparse
    else:
        sp_prefix = scipy.sparse

    fitter = TSNE(2, n_neighbors=15,
                  random_state=1,
                  learning_rate=500,
                  angle=0.8)

    new_data = sp_prefix.csr_matrix(
        scipy.sparse.csr_matrix(digits.data[~digits_selection]))

    embedding = fitter.fit_transform(new_data, convert_dtype=True)

    if input_type == 'cupy':
        embedding = embedding.get()

    trust = trustworthiness(digits.data[~digits_selection], embedding, 15)
    assert trust >= 0.85
Esempio n. 14
0
def test_umap_trustworthiness_on_sphere_iris():
    data = iris.data
    embedding = UMAP(
        n_neighbors=10,
        min_dist=0.01,
        n_epochs=200,
        random_state=42,
        output_metric="haversine",
    ).fit_transform(data)
    # Since trustworthiness doesn't support haversine, project onto
    # a 3D embedding of the sphere and use cosine distance
    r = 3
    projected_embedding = np.vstack(
        [
            r * np.sin(embedding[:, 0]) * np.cos(embedding[:, 1]),
            r * np.sin(embedding[:, 0]) * np.sin(embedding[:, 1]),
            r * np.cos(embedding[:, 0]),
        ]
    ).T
    trust = trustworthiness(iris.data, projected_embedding, 10, metric="cosine")
    assert_greater_equal(
        trust,
        0.80,
        "Insufficiently trustworthy spherical embedding for iris dataset: {}".format(
            trust
        ),
    )
Esempio n. 15
0
def test_supervised_umap_trustworthiness_on_iris():
    iris = datasets.load_iris()
    data = iris.data
    embedding = UMAP(n_neighbors=10, min_dist=0.01,
                     verbose=True).fit_transform(data, iris.target)
    trust = trustworthiness(iris.data, embedding, 10)
    assert trust >= 0.97
Esempio n. 16
0
def test_tsne_fit_transform_on_digits_sparse(input_type, method):

    digits = test_datasets['digits'].data

    if input_type == 'cupy':
        sp_prefix = cupyx.scipy.sparse
    else:
        sp_prefix = scipy.sparse

    fitter = TSNE(n_components=2,
                  random_state=1,
                  method=method,
                  min_grad_norm=1e-12,
                  n_neighbors=DEFAULT_N_NEIGHBORS,
                  learning_rate_method="none",
                  perplexity=DEFAULT_PERPLEXITY)

    new_data = sp_prefix.csr_matrix(
        scipy.sparse.csr_matrix(digits)).astype('float32')

    embedding = fitter.fit_transform(new_data, convert_dtype=True)

    if input_type == 'cupy':
        embedding = embedding.get()

    trust = trustworthiness(digits, embedding, n_neighbors=DEFAULT_N_NEIGHBORS)
    assert trust >= 0.85
Esempio n. 17
0
def test_umap_trustworthiness_on_iris():
    iris = datasets.load_iris()
    data = iris.data
    embedding = cuUMAP(n_neighbors=10,
                       min_dist=0.01).fit_transform(data, convert_dtype=True)
    trust = trustworthiness(iris.data, embedding, 10)
    assert trust >= 0.97
Esempio n. 18
0
def compute_metrics(original_data, embedding_dict, labels):

    metric_dict = {
        '1NNgeneralization_error': {},
        'trustworthiness': {},
        'cost_function_value': {}
    }
    for key in embedding_dict.keys():

        # 1NN generalization error
        labels_predict = predict_labels(embedding_dict[key], labels)
        generalization_error = compute_generalization_error(
            labels, labels_predict)
        metric_dict['1NNgeneralization_error'][key[0]] = generalization_error

        if len(labels) < 20000:
            # trustworthiness (12)
            trustw = trustworthiness(original_data,
                                     embedding_dict[key][:, 0:2],
                                     n_neighbors=12)
            metric_dict['trustworthiness'][key[0]] = trustw

        # cost function value
        cost = sum(embedding_dict[key][:, 2])
        metric_dict['cost_function_value'][key[0]] = cost

    return metric_dict
Esempio n. 19
0
def test_umap_fit_transform_trustworthiness_with_consistency_enabled():
    iris = datasets.load_iris()
    data = iris.data
    algo = cuUMAP(n_neighbors=10, min_dist=0.01, random_state=42)
    embedding = algo.fit_transform(data, convert_dtype=True)
    trust = trustworthiness(iris.data, embedding, 10)
    assert trust >= 0.97
Esempio n. 20
0
def test_umap_trustworthiness_on_iris_random_init():
    iris = datasets.load_iris()
    data = iris.data
    embedding = UMAP(
        n_neighbors=10, min_dist=0.01,  init="random"
    ).fit_transform(data)
    trust = trustworthiness(iris.data, embedding, 10)
    assert trust >= 0.95
Esempio n. 21
0
def test_umap_trustworthiness_on_iris():
    embedding = iris_model.embedding_
    trust = trustworthiness(iris.data, embedding, 10)
    assert_greater_equal(
        trust,
        0.97,
        "Insufficiently trustworthy embedding for" "iris dataset: {}".format(trust),
    )
Esempio n. 22
0
def test_preserve_trustworthiness_approximately_with_precomputed_distances():
    # Nearest neighbors should be preserved approximately.
    random_state = check_random_state(0)
    X = random_state.randn(100, 2)
    D = squareform(pdist(X), "sqeuclidean")
    tsne = TSNE(n_components=2, perplexity=2, learning_rate=100.0, metric="precomputed", random_state=0, verbose=0)
    X_embedded = tsne.fit_transform(D)
    assert_almost_equal(trustworthiness(D, X_embedded, n_neighbors=1, precomputed=True), 1.0, decimal=1)
Esempio n. 23
0
def validate_embedding(X, Y, score=0.74, n_neighbors=DEFAULT_N_NEIGHBORS):
    """Compares TSNE embedding trustworthiness, NAN and verbosity"""
    nans = np.sum(np.isnan(Y))
    trust = trustworthiness(X, Y, n_neighbors=n_neighbors)

    print("Trust=%s" % trust)
    assert trust > score
    assert nans == 0
Esempio n. 24
0
def test_supervised_umap_trustworthiness_on_iris():
    iris = datasets.load_iris()
    data = iris.data
    embedding = cuUMAP(n_neighbors=10, min_dist=0.01,
                       verbose=False).fit_transform(data,
                                                    iris.target,
                                                    convert_dtype=True)
    trust = trustworthiness(iris.data, embedding, 10)
    assert trust >= 0.97 - TRUST_TOLERANCE_THRESH
Esempio n. 25
0
def test_fit_csr_matrix():
    # X can be a sparse matrix.
    random_state = check_random_state(0)
    X = random_state.randn(100, 2)
    X[(np.random.randint(0, 100, 50), np.random.randint(0, 2, 50))] = 0.0
    X_csr = sp.csr_matrix(X)
    tsne = TSNE(n_components=2, perplexity=10, learning_rate=100.0, random_state=0, method="exact")
    X_embedded = tsne.fit_transform(X_csr)
    assert_almost_equal(trustworthiness(X_csr, X_embedded, n_neighbors=1), 1.0, decimal=1)
Esempio n. 26
0
def test_trustworthiness():
    # Test trustworthiness score.
    random_state = check_random_state(0)

    # Affine transformation
    X = random_state.randn(100, 2)
    assert_equal(trustworthiness(X, 5.0 + X / 10.0), 1.0)

    # Randomly shuffled
    X = np.arange(100).reshape(-1, 1)
    X_embedded = X.copy()
    random_state.shuffle(X_embedded)
    assert_less(trustworthiness(X, X_embedded), 0.6)

    # Completely different
    X = np.arange(5).reshape(-1, 1)
    X_embedded = np.array([[0], [2], [4], [1], [3]])
    assert_almost_equal(trustworthiness(X, X_embedded, n_neighbors=1), 0.2)
Esempio n. 27
0
def test_umap_sparse_trustworthiness():
    embedding = UMAP(n_neighbors=10).fit_transform(sparse_test_data[:100])
    trust = trustworthiness(sparse_test_data[:100].toarray(), embedding, 10)
    assert_greater_equal(
        trust,
        0.91,
        "Insufficiently trustworthy embedding for"
        "sparse test dataset: {}".format(trust),
    )
Esempio n. 28
0
def test_umap_trustworthiness_on_iris():
    iris = datasets.load_iris()
    data = iris.data
    embedding = UMAP(n_neighbors=10, min_dist=0.01).fit_transform(data)
    trust = trustworthiness(iris.data, embedding, 10)

    # We are doing a spectral embedding but not a
    # multi-component layout (which is marked experimental).
    # As a result, our score drops by 0.006.
    assert trust >= 0.964
Esempio n. 29
0
def test_preserve_trustworthiness_approximately_with_precomputed_distances():
    # Nearest neighbors should be preserved approximately.
    random_state = check_random_state(0)
    X = random_state.randn(100, 2)
    D = squareform(pdist(X), "sqeuclidean")
    tsne = TSNE(n_components=2, perplexity=2, learning_rate=100.0,
                metric="precomputed", random_state=0, verbose=0)
    X_embedded = tsne.fit_transform(D)
    assert_almost_equal(trustworthiness(D, X_embedded, n_neighbors=1,
                                        precomputed=True), 1.0, decimal=1)
Esempio n. 30
0
def test_preserve_trustworthiness_approximately():
    """Nearest neighbors should be preserved approximately."""
    random_state = check_random_state(0)
    X = random_state.randn(100, 2)
    for init in ('random', 'pca'):
        tsne = TSNE(n_components=2, perplexity=10, learning_rate=100.0,
                    init=init, random_state=0)
        X_embedded = tsne.fit_transform(X)
        assert_almost_equal(trustworthiness(X, X_embedded, n_neighbors=1), 1.0,
                            decimal=1)
Esempio n. 31
0
def test_preserve_trustworthiness_approximately():
    """Nearest neighbors should be preserved approximately."""
    random_state = check_random_state(0)
    X = random_state.randn(100, 2)
    for init in ('random', 'pca'):
        tsne = TSNE(n_components=2, perplexity=10, learning_rate=100.0,
                    init=init, random_state=0)
        X_embedded = tsne.fit_transform(X)
        assert_almost_equal(trustworthiness(X, X_embedded, n_neighbors=1), 1.0,
                            decimal=1)
def test_preserve_trustworthiness_approximately(method, init):
    # Nearest neighbors should be preserved approximately.
    random_state = check_random_state(0)
    n_components = 2
    X = random_state.randn(50, n_components).astype(np.float32)
    tsne = TSNE(n_components=n_components, init=init, random_state=0,
                method=method, n_iter=700)
    X_embedded = tsne.fit_transform(X)
    t = trustworthiness(X, X_embedded, n_neighbors=1)
    assert t > 0.85
Esempio n. 33
0
def test_semisupervised_umap_trustworthiness_on_iris():
    iris = datasets.load_iris()
    data = iris.data
    target = iris.target.copy()
    target[25:75] = -1
    embedding = cuUMAP(n_neighbors=10, min_dist=0.01,
                       verbose=False).fit_transform(data, target)

    trust = trustworthiness(iris.data, embedding, 10)
    assert trust >= 0.97 - TRUST_TOLERANCE_THRESH
Esempio n. 34
0
    def assert_model(pickled_model, X_train):
        cu_after_embed = pickled_model.embedding_.to_output('numpy')

        n_neighbors = pickled_model.n_neighbors
        assert array_equal(result["umap_embedding"], cu_after_embed)

        cu_trust_after = trustworthiness(X_train,
                                         pickled_model.transform(X_train),
                                         n_neighbors)
        assert cu_trust_after >= result["umap"] - 0.2
Esempio n. 35
0
def test_preserve_trustworthiness_approximately_with_precomputed_distances():
    # Nearest neighbors should be preserved approximately.
    random_state = check_random_state(0)
    for i in range(3):
        X = random_state.randn(100, 2)
        D = squareform(pdist(X), "sqeuclidean")
        tsne = TSNE(n_components=2, perplexity=2, learning_rate=100.0,
                    early_exaggeration=2.0, metric="precomputed",
                    random_state=i, verbose=0)
        X_embedded = tsne.fit_transform(D)
        t = trustworthiness(D, X_embedded, n_neighbors=1, metric="precomputed")
        assert t > .95
def test_preserve_trustworthiness_approximately():
    # Nearest neighbors should be preserved approximately.
    random_state = check_random_state(0)
    n_components = 2
    methods = ['exact', 'barnes_hut']
    X = random_state.randn(50, n_components).astype(np.float32)
    for init in ('random', 'pca'):
        for method in methods:
            tsne = TSNE(n_components=n_components, init=init, random_state=0,
                        method=method)
            X_embedded = tsne.fit_transform(X)
            t = trustworthiness(X, X_embedded, n_neighbors=1)
            assert_greater(t, 0.9)
Esempio n. 37
0
def tsne(D, medoids_df, dest_dir, fn):
    # Reproducing braincode/calculate_cluster_medoids_tSNE
    print('2D TSNE embedding plotting')
    tSNE = TSNE(n_components=2, perplexity=5,
                early_exaggeration=1.0, learning_rate=10.0,
                metric='precomputed', verbose=True, random_state=0)
    medoids2D = pd.DataFrame(tSNE.fit_transform(D), index=medoids_df.index)
    print('Trusty TSNE: %.2f' % trustworthiness(D.values,
                                                medoids2D.values,
                                                n_neighbors=5,
                                                precomputed=True))

    fig, ax = plt.subplots(nrows=1, ncols=1)
    cluster_scatter_plot(medoids2D[0], medoids2D[1],
                         labels=map(str, medoids2D.index),
                         ax=ax)
    plt.savefig(op.join(dest_dir, fn + '.singletons.tsne.png'))
Esempio n. 38
0
def test_preserve_trustworthiness_approximately():
    # Nearest neighbors should be preserved approximately.
    random_state = check_random_state(0)
    # The Barnes-Hut approximation uses a different method to estimate
    # P_ij using only a number of nearest neighbors instead of all
    # points (so that k = 3 * perplexity). As a result we set the
    # perplexity=5, so that the number of neighbors is 5%.
    n_components = 2
    methods = ['exact', 'barnes_hut']
    X = random_state.randn(100, n_components).astype(np.float32)
    for init in ('random', 'pca'):
        for method in methods:
            tsne = TSNE(n_components=n_components, perplexity=50,
                        learning_rate=100.0, init=init, random_state=0,
                        method=method)
            X_embedded = tsne.fit_transform(X)
            T = trustworthiness(X, X_embedded, n_neighbors=1)
            assert_almost_equal(T, 1.0, decimal=1)