def test_cosine_distance(self): k = 15 # Compute cosine distance nearest neighbors using ball tree knn_index = nearest_neighbors.BallTree("cosine") indices, distances = knn_index.build(self.x1, k=k) # Compute the exact nearest neighbors as a reference true_distances = squareform(pdist(self.x1, metric="cosine")) true_indices_ = np.argsort(true_distances, axis=1)[:, 1:k + 1] true_distances_ = np.vstack([d[i] for d, i in zip(true_distances, true_indices_)]) np.testing.assert_array_equal( indices, true_indices_, err_msg="Nearest neighbors do not match" ) np.testing.assert_array_equal( distances, true_distances_, err_msg="Distances do not match" )
data = pickle.load(f) x = data["pca_50"] y, cluster_ids = data["CellType1"], data["CellType2"] results = [] n_reps = 5 for sample_size in range(1000, 8_001, 1000): print("Sample size:", sample_size) indices = np.random.choice(range(x.shape[0]), size=sample_size) sample = x[indices] for i in range(n_reps): start = time() nn = nearest_neighbors.BallTree(metric="euclidean", n_jobs=1) nn.build(sample, k=15) results.append(("Ball Tree (1 core)", sample_size, time() - start)) for i in range(n_reps): start = time() nn = nearest_neighbors.Annoy(metric="euclidean", n_jobs=1) nn.build(sample, k=15) results.append(("Annoy (1 core)", sample_size, time() - start)) for i in range(n_reps): start = time() nn = nearest_neighbors.BallTree(metric="euclidean", n_jobs=4) nn.build(sample, k=15) results.append(("Ball Tree (4 cores)", sample_size, time() - start))