def test_cosine_distance(self):
        k = 15
        # Compute cosine distance nearest neighbors using ball tree
        knn_index = nearest_neighbors.BallTree("cosine")
        indices, distances = knn_index.build(self.x1, k=k)

        # Compute the exact nearest neighbors as a reference
        true_distances = squareform(pdist(self.x1, metric="cosine"))
        true_indices_ = np.argsort(true_distances, axis=1)[:, 1:k + 1]
        true_distances_ = np.vstack([d[i] for d, i in zip(true_distances, true_indices_)])

        np.testing.assert_array_equal(
            indices, true_indices_, err_msg="Nearest neighbors do not match"
        )
        np.testing.assert_array_equal(
            distances, true_distances_, err_msg="Distances do not match"
        )
Beispiel #2
0
        data = pickle.load(f)

x = data["pca_50"]
y, cluster_ids = data["CellType1"], data["CellType2"]

results = []
n_reps = 5

for sample_size in range(1000, 8_001, 1000):
    print("Sample size:", sample_size)
    indices = np.random.choice(range(x.shape[0]), size=sample_size)
    sample = x[indices]

    for i in range(n_reps):
        start = time()
        nn = nearest_neighbors.BallTree(metric="euclidean", n_jobs=1)
        nn.build(sample, k=15)
        results.append(("Ball Tree (1 core)", sample_size, time() - start))

    for i in range(n_reps):
        start = time()
        nn = nearest_neighbors.Annoy(metric="euclidean", n_jobs=1)
        nn.build(sample, k=15)
        results.append(("Annoy (1 core)", sample_size, time() - start))

    for i in range(n_reps):
        start = time()
        nn = nearest_neighbors.BallTree(metric="euclidean", n_jobs=4)
        nn.build(sample, k=15)
        results.append(("Ball Tree (4 cores)", sample_size, time() - start))