def test_query_same_result_with_fixed_random_state(self):
        knn_index1 = nearest_neighbors.NNDescent("euclidean", random_state=1)
        indices1, distances1 = knn_index1.build(self.x1, k=30)

        knn_index2 = nearest_neighbors.NNDescent("euclidean", random_state=1)
        indices2, distances2 = knn_index2.build(self.x1, k=30)

        np.testing.assert_equal(indices1, indices2)
        np.testing.assert_equal(distances1, distances2)
    def test_random_cluster_when_invalid_indices(self):
        class MockIndex:
            def __init__(self, data, n_neighbors, **_):
                n_samples = data.shape[0]

                rs = check_random_state(0)
                indices = rs.randint(0, n_samples, size=(n_samples, n_neighbors))
                distances = rs.exponential(5, (n_samples, n_neighbors))

                # Set some of the points to have invalid indices
                indices[:10] = -1
                distances[:10] = -1

                self.neighbor_graph = indices, distances

        with patch("pynndescent.NNDescent", wraps=MockIndex):
            knn_index = nearest_neighbors.NNDescent(self.x1, 5, "euclidean", n_jobs=2)
            indices, distances = knn_index.build()

            # Check that indices were replaced by something
            self.assertTrue(np.all(indices[:10] != -1))
            # Check that that "something" are all indices of failed points
            self.assertTrue(np.all(indices[:10] < 10))
            # And check that the distances were set to something positive
            self.assertTrue(np.all(distances[:10] > 0))
예제 #3
0
 def test_runs_with_correct_njobs_if_sparse_input(self):
     with patch("pynndescent.NNDescent",
                wraps=pynndescent.NNDescent) as nndescent:
         x_sparse = sp.csr_matrix(self.x1)
         knn_index = nearest_neighbors.NNDescent("euclidean", n_jobs=2)
         knn_index.build(x_sparse, k=5)
         check_mock_called_with_kwargs(nndescent, dict(n_jobs=2))
예제 #4
0
    def test_random_state_being_passed_through(self, nndescent):
        random_state = 1
        knn_index = nearest_neighbors.NNDescent("euclidean", random_state=random_state)
        knn_index.build(self.x1, k=30)

        nndescent.assert_called_once()
        check_mock_called_with_kwargs(nndescent, {"random_state": random_state})
예제 #5
0
 def test_runs_with_correct_njobs_if_dense_input(self):
     with patch("pynndescent.NNDescent",
                wraps=pynndescent.NNDescent) as nndescent:
         knn_index = nearest_neighbors.NNDescent(self.x1,
                                                 5,
                                                 "euclidean",
                                                 n_jobs=2)
         knn_index.build()
         check_mock_called_with_kwargs(nndescent, dict(n_jobs=2))
    def test_random_state_being_passed_through(self):
        random_state = 1
        with patch("pynndescent.NNDescent", wraps=pynndescent.NNDescent) as nndescent:
            knn_index = nearest_neighbors.NNDescent(
                self.x1, 30, "euclidean", random_state=random_state
            )
            knn_index.build()

            nndescent.assert_called_once()
            check_mock_called_with_kwargs(nndescent, {"random_state": random_state})
예제 #7
0
    def test_building_with_lt15_builds_proper_graph(self, nndescent):
        knn_index = nearest_neighbors.NNDescent("euclidean")
        indices, distances = knn_index.build(self.x1, k=10)

        self.assertEqual(indices.shape, (self.x1.shape[0], 10))
        self.assertEqual(distances.shape, (self.x1.shape[0], 10))
        self.assertFalse(np.all(indices[:, 0] == np.arange(self.x1.shape[0])))

        # Should be called with 11 because nearest neighbor in pynndescent is itself
        check_mock_called_with_kwargs(nndescent, dict(n_neighbors=11))
    def test_uncompiled_callable_is_compiled(self):
        knn_index = nearest_neighbors.NNDescent("manhattan")

        def manhattan(x, y):
            result = 0.0
            for i in range(x.shape[0]):
                result += np.abs(x[i] - y[i])

            return result

        compiled_metric = knn_index.check_metric(manhattan)
        self.assertTrue(isinstance(compiled_metric, CPUDispatcher))
예제 #9
0
    def test_building_with_gt15_calls_query(self, nndescent):
        nndescent.query = MagicMock(wraps=nndescent.query)
        knn_index = nearest_neighbors.NNDescent("euclidean")
        indices, distances = knn_index.build(self.x1, k=30)

        self.assertEqual(indices.shape, (self.x1.shape[0], 30))
        self.assertEqual(distances.shape, (self.x1.shape[0], 30))
        self.assertFalse(np.all(indices[:, 0] == np.arange(self.x1.shape[0])))

        # The index should be built with 15 neighbors
        check_mock_called_with_kwargs(nndescent, dict(n_neighbors=15))
        # And subsequently queried with the correct number of neighbors. Check
        # for 31 neighbors because query will return the original point as well,
        # which we don't consider.
        check_mock_called_with_kwargs(nndescent.query, dict(k=31))
예제 #10
0
 def test_runs_with_njobs1_if_sparse_input(self, nndescent):
     x_sparse = sp.csr_matrix(self.x1)
     knn_index = nearest_neighbors.NNDescent("euclidean", n_jobs=4)
     knn_index.build(x_sparse, k=5)
     check_mock_called_with_kwargs(nndescent, dict(n_jobs=1))
예제 #11
0
 def test_runs_with_correct_njobs_if_dense_input(self, nndescent):
     knn_index = nearest_neighbors.NNDescent("euclidean", n_jobs=4)
     knn_index.build(self.x1, k=5)
     check_mock_called_with_kwargs(nndescent, dict(n_jobs=4))