Example #1
0
 def create(
     cls,
     index_vectors: np.ndarray,
     metric: Metric = DEFAULT_METRIC,
     leaf_size: int = DEFAULT_LEAF_SIZE,
 ) -> "BallTree":
     return BallTree(
         index=SkBallTree(index_vectors,
                          leaf_size=leaf_size,
                          metric=_METRICS[metric]),
         metric=metric,
     )
Example #2
0
    def testNearestNeighbors(self):
        rs = np.random.RandomState(0)
        raw_X = rs.rand(10, 5)
        raw_Y = rs.rand(8, 5)

        X = mt.tensor(raw_X)
        Y = mt.tensor(raw_Y)

        raw_sparse_x = sps.random(10,
                                  5,
                                  density=0.5,
                                  format='csr',
                                  random_state=rs)
        raw_sparse_y = sps.random(8,
                                  5,
                                  density=0.4,
                                  format='csr',
                                  random_state=rs)

        X_sparse = mt.tensor(raw_sparse_x)
        Y_sparse = mt.tensor(raw_sparse_y)

        metric_func = lambda u, v: np.sqrt(((u - v)**2).sum())

        _ = NearestNeighbors(algorithm='auto',
                             metric='precomputed',
                             metric_params={})

        with self.assertRaises(ValueError):
            _ = NearestNeighbors(algorithm='unknown')

        with self.assertRaises(ValueError):
            _ = NearestNeighbors(algorithm='kd_tree', metric=metric_func)

        with self.assertRaises(ValueError):
            _ = NearestNeighbors(algorithm='auto', metric='unknown')

        assert_warns(SyntaxWarning, NearestNeighbors, metric_params={'p': 1})

        with self.assertRaises(ValueError):
            _ = NearestNeighbors(metric='wminkowski', p=0)

        with self.assertRaises(ValueError):
            _ = NearestNeighbors(algorithm='auto', metric='minkowski', p=0)

        nn = NearestNeighbors(algorithm='auto', metric='minkowski', p=1)
        nn.fit(X)
        self.assertEqual(nn.effective_metric_, 'manhattan')

        nn = NearestNeighbors(algorithm='auto', metric='minkowski', p=2)
        nn.fit(X)
        self.assertEqual(nn.effective_metric_, 'euclidean')

        nn = NearestNeighbors(algorithm='auto', metric='minkowski', p=np.inf)
        nn.fit(X)
        self.assertEqual(nn.effective_metric_, 'chebyshev')

        nn2 = NearestNeighbors(algorithm='auto', metric='minkowski')
        nn2.fit(nn)
        self.assertEqual(nn2._fit_method, nn._fit_method)

        nn = NearestNeighbors(algorithm='auto', metric='minkowski')
        ball_tree = SkBallTree(raw_X)
        nn.fit(ball_tree)
        self.assertEqual(nn._fit_method, 'ball_tree')

        nn = NearestNeighbors(algorithm='auto', metric='minkowski')
        kd_tree = SkKDTree(raw_X)
        nn.fit(kd_tree)
        self.assertEqual(nn._fit_method, 'kd_tree')

        with self.assertRaises(ValueError):
            nn = NearestNeighbors()
            nn.fit(np.random.rand(0, 10))

        nn = NearestNeighbors(algorithm='ball_tree')
        assert_warns(UserWarning, nn.fit, X_sparse)

        nn = NearestNeighbors(metric='haversine')
        with self.assertRaises(ValueError):
            nn.fit(X_sparse)

        nn = NearestNeighbors(metric=metric_func, n_neighbors=1)
        nn.fit(X)
        self.assertEqual(nn._fit_method, 'ball_tree')

        nn = NearestNeighbors(metric='sqeuclidean', n_neighbors=1)
        nn.fit(X)
        self.assertEqual(nn._fit_method, 'brute')

        with self.assertRaises(ValueError):
            nn = NearestNeighbors(n_neighbors=-1)
            nn.fit(X)

        with self.assertRaises(TypeError):
            nn = NearestNeighbors(n_neighbors=1.3)
            nn.fit(X)

        nn = NearestNeighbors()
        nn.fit(X)
        with self.assertRaises(ValueError):
            nn.kneighbors(Y, n_neighbors=-1)
        with self.assertRaises(TypeError):
            nn.kneighbors(Y, n_neighbors=1.3)
        with self.assertRaises(ValueError):
            nn.kneighbors(Y, n_neighbors=11)

        nn = NearestNeighbors(algorithm='ball_tree')
        nn.fit(X)
        with self.assertRaises(ValueError):
            nn.kneighbors(Y_sparse)
Example #3
0
def test_nearest_neighbors(setup):
    rs = np.random.RandomState(0)
    raw_X = rs.rand(10, 5)
    raw_Y = rs.rand(8, 5)

    X = mt.tensor(raw_X)
    Y = mt.tensor(raw_Y)

    raw_sparse_x = sps.random(10,
                              5,
                              density=0.5,
                              format="csr",
                              random_state=rs)
    raw_sparse_y = sps.random(8, 5, density=0.4, format="csr", random_state=rs)

    X_sparse = mt.tensor(raw_sparse_x)
    Y_sparse = mt.tensor(raw_sparse_y)

    metric_func = lambda u, v: np.sqrt(((u - v)**2).sum())

    _ = NearestNeighbors(algorithm="auto",
                         metric="precomputed",
                         metric_params={})

    with pytest.raises(ValueError):
        _ = NearestNeighbors(algorithm="unknown")

    with pytest.raises(ValueError):
        _ = NearestNeighbors(algorithm="kd_tree", metric=metric_func)

    with pytest.raises(ValueError):
        _ = NearestNeighbors(algorithm="auto", metric="unknown")

    assert_warns(SyntaxWarning, NearestNeighbors, metric_params={"p": 1})

    with pytest.raises(ValueError):
        _ = NearestNeighbors(metric="wminkowski", p=0)

    with pytest.raises(ValueError):
        _ = NearestNeighbors(algorithm="auto", metric="minkowski", p=0)

    nn = NearestNeighbors(algorithm="auto", metric="minkowski", p=1)
    nn.fit(X)
    assert nn.effective_metric_ == "manhattan"

    nn = NearestNeighbors(algorithm="auto", metric="minkowski", p=2)
    nn.fit(X)
    assert nn.effective_metric_ == "euclidean"

    nn = NearestNeighbors(algorithm="auto", metric="minkowski", p=np.inf)
    nn.fit(X)
    assert nn.effective_metric_ == "chebyshev"

    nn2 = NearestNeighbors(algorithm="auto", metric="minkowski")
    nn2.fit(nn)
    assert nn2._fit_method == nn._fit_method

    nn = NearestNeighbors(algorithm="auto", metric="minkowski")
    ball_tree = SkBallTree(raw_X)
    nn.fit(ball_tree)
    assert nn._fit_method == "ball_tree"

    nn = NearestNeighbors(algorithm="auto", metric="minkowski")
    kd_tree = SkKDTree(raw_X)
    nn.fit(kd_tree)
    assert nn._fit_method == "kd_tree"

    with pytest.raises(ValueError):
        nn = NearestNeighbors()
        nn.fit(np.random.rand(0, 10))

    nn = NearestNeighbors(algorithm="ball_tree")
    assert_warns(UserWarning, nn.fit, X_sparse)

    nn = NearestNeighbors(metric="haversine")
    with pytest.raises(ValueError):
        nn.fit(X_sparse)

    nn = NearestNeighbors(metric=metric_func, n_neighbors=1)
    nn.fit(X)
    assert nn._fit_method == "ball_tree"

    nn = NearestNeighbors(metric="sqeuclidean", n_neighbors=1)
    nn.fit(X)
    assert nn._fit_method == "brute"

    with pytest.raises(ValueError):
        nn = NearestNeighbors(n_neighbors=-1)
        nn.fit(X)

    with pytest.raises(TypeError):
        nn = NearestNeighbors(n_neighbors=1.3)
        nn.fit(X)

    nn = NearestNeighbors()
    nn.fit(X)
    with pytest.raises(ValueError):
        nn.kneighbors(Y, n_neighbors=-1)
    with pytest.raises(TypeError):
        nn.kneighbors(Y, n_neighbors=1.3)
    with pytest.raises(ValueError):
        nn.kneighbors(Y, n_neighbors=11)

    nn = NearestNeighbors(algorithm="ball_tree")
    nn.fit(X)
    with pytest.raises(ValueError):
        nn.kneighbors(Y_sparse)