def test_query_same_result_with_fixed_random_state(self): knn_index1 = nearest_neighbors.NNDescent("euclidean", random_state=1) indices1, distances1 = knn_index1.build(self.x1, k=30) knn_index2 = nearest_neighbors.NNDescent("euclidean", random_state=1) indices2, distances2 = knn_index2.build(self.x1, k=30) np.testing.assert_equal(indices1, indices2) np.testing.assert_equal(distances1, distances2)
def test_random_cluster_when_invalid_indices(self): class MockIndex: def __init__(self, data, n_neighbors, **_): n_samples = data.shape[0] rs = check_random_state(0) indices = rs.randint(0, n_samples, size=(n_samples, n_neighbors)) distances = rs.exponential(5, (n_samples, n_neighbors)) # Set some of the points to have invalid indices indices[:10] = -1 distances[:10] = -1 self.neighbor_graph = indices, distances with patch("pynndescent.NNDescent", wraps=MockIndex): knn_index = nearest_neighbors.NNDescent(self.x1, 5, "euclidean", n_jobs=2) indices, distances = knn_index.build() # Check that indices were replaced by something self.assertTrue(np.all(indices[:10] != -1)) # Check that that "something" are all indices of failed points self.assertTrue(np.all(indices[:10] < 10)) # And check that the distances were set to something positive self.assertTrue(np.all(distances[:10] > 0))
def test_runs_with_correct_njobs_if_sparse_input(self): with patch("pynndescent.NNDescent", wraps=pynndescent.NNDescent) as nndescent: x_sparse = sp.csr_matrix(self.x1) knn_index = nearest_neighbors.NNDescent("euclidean", n_jobs=2) knn_index.build(x_sparse, k=5) check_mock_called_with_kwargs(nndescent, dict(n_jobs=2))
def test_random_state_being_passed_through(self, nndescent): random_state = 1 knn_index = nearest_neighbors.NNDescent("euclidean", random_state=random_state) knn_index.build(self.x1, k=30) nndescent.assert_called_once() check_mock_called_with_kwargs(nndescent, {"random_state": random_state})
def test_runs_with_correct_njobs_if_dense_input(self): with patch("pynndescent.NNDescent", wraps=pynndescent.NNDescent) as nndescent: knn_index = nearest_neighbors.NNDescent(self.x1, 5, "euclidean", n_jobs=2) knn_index.build() check_mock_called_with_kwargs(nndescent, dict(n_jobs=2))
def test_random_state_being_passed_through(self): random_state = 1 with patch("pynndescent.NNDescent", wraps=pynndescent.NNDescent) as nndescent: knn_index = nearest_neighbors.NNDescent( self.x1, 30, "euclidean", random_state=random_state ) knn_index.build() nndescent.assert_called_once() check_mock_called_with_kwargs(nndescent, {"random_state": random_state})
def test_building_with_lt15_builds_proper_graph(self, nndescent): knn_index = nearest_neighbors.NNDescent("euclidean") indices, distances = knn_index.build(self.x1, k=10) self.assertEqual(indices.shape, (self.x1.shape[0], 10)) self.assertEqual(distances.shape, (self.x1.shape[0], 10)) self.assertFalse(np.all(indices[:, 0] == np.arange(self.x1.shape[0]))) # Should be called with 11 because nearest neighbor in pynndescent is itself check_mock_called_with_kwargs(nndescent, dict(n_neighbors=11))
def test_uncompiled_callable_is_compiled(self): knn_index = nearest_neighbors.NNDescent("manhattan") def manhattan(x, y): result = 0.0 for i in range(x.shape[0]): result += np.abs(x[i] - y[i]) return result compiled_metric = knn_index.check_metric(manhattan) self.assertTrue(isinstance(compiled_metric, CPUDispatcher))
def test_building_with_gt15_calls_query(self, nndescent): nndescent.query = MagicMock(wraps=nndescent.query) knn_index = nearest_neighbors.NNDescent("euclidean") indices, distances = knn_index.build(self.x1, k=30) self.assertEqual(indices.shape, (self.x1.shape[0], 30)) self.assertEqual(distances.shape, (self.x1.shape[0], 30)) self.assertFalse(np.all(indices[:, 0] == np.arange(self.x1.shape[0]))) # The index should be built with 15 neighbors check_mock_called_with_kwargs(nndescent, dict(n_neighbors=15)) # And subsequently queried with the correct number of neighbors. Check # for 31 neighbors because query will return the original point as well, # which we don't consider. check_mock_called_with_kwargs(nndescent.query, dict(k=31))
def test_runs_with_njobs1_if_sparse_input(self, nndescent): x_sparse = sp.csr_matrix(self.x1) knn_index = nearest_neighbors.NNDescent("euclidean", n_jobs=4) knn_index.build(x_sparse, k=5) check_mock_called_with_kwargs(nndescent, dict(n_jobs=1))
def test_runs_with_correct_njobs_if_dense_input(self, nndescent): knn_index = nearest_neighbors.NNDescent("euclidean", n_jobs=4) knn_index.build(self.x1, k=5) check_mock_called_with_kwargs(nndescent, dict(n_jobs=4))