Python NearestNeighbors.kneighbors 예제들, mars.learn.neighbors.NearestNeighbors.kneighbors Python 예제들

예제 #1

0

파일 보기

파일: test_cluster.py 프로젝트: wenyuanyu/mars

    def testLearnInLocalCluster(self, *_):
        from mars.learn.neighbors import NearestNeighbors
        from sklearn.neighbors import NearestNeighbors as SkNearestNeighbors

        with new_cluster(scheduler_n_process=2,
                         worker_n_process=3,
                         shared_memory='20M') as cluster:
            rs = np.random.RandomState(0)
            raw_X = rs.rand(10, 5)
            raw_Y = rs.rand(8, 5)

            X = mt.tensor(raw_X, chunk_size=7)
            Y = mt.tensor(raw_Y, chunk_size=(5, 3))
            nn = NearestNeighbors(n_neighbors=3)
            nn.fit(X)

            ret = nn.kneighbors(Y, session=cluster.session)

            snn = SkNearestNeighbors(n_neighbors=3)
            snn.fit(raw_X)
            expected = snn.kneighbors(raw_Y)

            result = [r.fetch() for r in ret]
            np.testing.assert_almost_equal(result[0], expected[0])
            np.testing.assert_almost_equal(result[1], expected[1])

예제 #2

0

파일 보기

파일: test_faiss.py 프로젝트: haijohn/mars

def test_faiss_query(setup, X, Y, metric):
    faiss_index = build_faiss_index(X,
                                    'Flat',
                                    None,
                                    metric=metric,
                                    random_state=0)
    d, i = faiss_query(faiss_index, Y, 5, nprobe=10)
    distance, indices = fetch(*execute(d, i))

    nn = NearestNeighbors(metric=metric)
    nn.fit(x)
    expected_distance, expected_indices = nn.kneighbors(y, 5)

    np.testing.assert_array_equal(indices, expected_indices.fetch())
    np.testing.assert_almost_equal(distance,
                                   expected_distance.fetch(),
                                   decimal=4)

    # test other index
    X2 = X.astype(np.float64)
    Y2 = y.astype(np.float64)
    faiss_index = build_faiss_index(X2,
                                    'PCAR6,IVF8_HNSW32,SQ8',
                                    10,
                                    random_state=0,
                                    return_index_type='object')
    d, i = faiss_query(faiss_index, Y2, 5, nprobe=10)
    # test execute only
    execute(d, i)

예제 #3

0

파일 보기

파일: test_faiss.py 프로젝트: ueshin/mars

    def testFaissQuery(self):
        d = 8
        n = 50
        n_test = 10
        x = np.random.RandomState(0).rand(n, d).astype(np.float32)
        y = np.random.RandomState(1).rand(n_test, d).astype(np.float32)

        test_tensors = [
            # multi chunks
            (mt.tensor(x, chunk_size=(20, 5)), mt.tensor(y, chunk_size=5)),
            # one chunk
            (mt.tensor(x, chunk_size=50), mt.tensor(y, chunk_size=10))
        ]

        for X, Y in test_tensors:
            for metric in ['l2', 'cosine']:
                faiss_index = build_faiss_index(X, 'Flat', None, metric=metric,
                                                random_state=0, return_index_type='object')
                d, i = faiss_query(faiss_index, Y, 5, nprobe=10)
                distance, indices = self.executor.execute_tensors([d, i])

                nn = NearestNeighbors(metric=metric)
                nn.fit(x)
                expected_distance, expected_indices = nn.kneighbors(y, 5)

                np.testing.assert_array_equal(indices, expected_indices.fetch())
                np.testing.assert_almost_equal(distance, expected_distance.fetch())

예제 #4

0

파일 보기

파일: test_mars_cluster.py 프로젝트: wjsi/aliyun-odps-python-sdk

    def testMarsKNN(self):
        client = self.odps.create_mars_cluster(1,
                                               4,
                                               8,
                                               name=str(uuid.uuid4()),
                                               scheduler_mem=12,
                                               scheduler_cpu=4)

        try:
            import numpy as np
            import mars.tensor as mt
            from mars.learn.neighbors import NearestNeighbors
            from sklearn.neighbors import NearestNeighbors as SkNearestNeighbors

            rs = np.random.RandomState(0)
            raw_X = rs.rand(10, 5)
            raw_Y = rs.rand(8, 5)

            X = mt.tensor(raw_X, chunk_size=7)
            Y = mt.tensor(raw_Y, chunk_size=(5, 3))

            nn = NearestNeighbors(n_neighbors=3)
            nn.fit(X)
            ret = nn.kneighbors(Y)

            snn = SkNearestNeighbors(n_neighbors=3)
            snn.fit(raw_X)

            expected = snn.kneighbors(raw_Y)
            result = [r.fetch() for r in ret]
            np.testing.assert_almost_equal(result[0], expected[0])
            np.testing.assert_almost_equal(result[1], expected[1])
        finally:
            client.stop_server()

예제 #5

0

파일 보기

파일: test_cluster.py 프로젝트: zymITsky/mars

    def testLearnInLocalCluster(self, *_):
        from mars.learn.cluster import KMeans
        from mars.learn.neighbors import NearestNeighbors
        from sklearn.cluster import KMeans as SK_KMEANS
        from sklearn.neighbors import NearestNeighbors as SkNearestNeighbors

        with new_cluster(scheduler_n_process=2, worker_n_process=3, shared_memory='20M') as cluster:
            rs = np.random.RandomState(0)
            raw_X = rs.rand(10, 5)
            raw_Y = rs.rand(8, 5)

            X = mt.tensor(raw_X, chunk_size=7)
            Y = mt.tensor(raw_Y, chunk_size=(5, 3))
            nn = NearestNeighbors(n_neighbors=3)
            nn.fit(X)

            ret = nn.kneighbors(Y, session=cluster.session)

            snn = SkNearestNeighbors(n_neighbors=3)
            snn.fit(raw_X)
            expected = snn.kneighbors(raw_Y)

            result = [r.fetch() for r in ret]
            np.testing.assert_almost_equal(result[0], expected[0])
            np.testing.assert_almost_equal(result[1], expected[1])

            raw = np.array([[1, 2], [1, 4], [1, 0],
                            [10, 2], [10, 4], [10, 0]])
            X = mt.array(raw)
            kmeans = KMeans(n_clusters=2, random_state=0, init='k-means++').fit(X)
            sk_km_elkan = SK_KMEANS(n_clusters=2, random_state=0, init='k-means++').fit(raw)
            np.testing.assert_allclose(kmeans.cluster_centers_, sk_km_elkan.cluster_centers_)

예제 #6

0

파일 보기

    def testFaissNearestNeighborsExecution(self):
        rs = np.random.RandomState(0)
        raw_X = rs.rand(10, 5)
        raw_Y = rs.rand(8, 5)

        # test faiss execution
        X = mt.tensor(raw_X, chunk_size=7)
        Y = mt.tensor(raw_Y, chunk_size=(5, 3))

        nn = NearestNeighbors(n_neighbors=3, algorithm='faiss', metric='l2')
        nn.fit(X)

        ret = nn.kneighbors(Y)

        snn = SkNearestNeighbors(n_neighbors=3, algorithm='auto', metric='l2')
        snn.fit(raw_X)
        expected = snn.kneighbors(raw_Y)

        result = [r.fetch() for r in ret]
        np.testing.assert_almost_equal(result[0], expected[0], decimal=6)
        np.testing.assert_almost_equal(result[1], expected[1])

        # test return_distance=False
        ret = nn.kneighbors(Y, return_distance=False)

        result = ret.fetch()
        np.testing.assert_almost_equal(result, expected[1])

        # test y is x
        ret = nn.kneighbors()

        expected = snn.kneighbors()

        result = [r.fetch() for r in ret]
        np.testing.assert_almost_equal(result[0], expected[0], decimal=5)
        np.testing.assert_almost_equal(result[1], expected[1])

예제 #7

0

파일 보기

    def testLearnInLocalCluster(self, *_):
        from mars.learn.neighbors import NearestNeighbors
        from sklearn.neighbors import NearestNeighbors as SkNearestNeighbors
        from mars.learn.metrics import roc_curve, auc
        from sklearn.metrics import roc_curve as sklearn_roc_curve, auc as sklearn_auc

        with new_cluster(scheduler_n_process=2,
                         worker_n_process=3,
                         shared_memory='20M') as cluster:
            rs = np.random.RandomState(0)
            raw_X = rs.rand(10, 5)
            raw_Y = rs.rand(8, 5)

            X = mt.tensor(raw_X, chunk_size=7)
            Y = mt.tensor(raw_Y, chunk_size=(5, 3))
            nn = NearestNeighbors(n_neighbors=3)
            nn.fit(X)

            ret = nn.kneighbors(Y, session=cluster.session)

            snn = SkNearestNeighbors(n_neighbors=3)
            snn.fit(raw_X)
            expected = snn.kneighbors(raw_Y)

            result = [r.fetch() for r in ret]
            np.testing.assert_almost_equal(result[0], expected[0])
            np.testing.assert_almost_equal(result[1], expected[1])

            rs = np.random.RandomState(0)
            raw = pd.DataFrame({
                'a': rs.randint(0, 10, (10, )),
                'b': rs.rand(10)
            })

            df = md.DataFrame(raw)
            y = df['a'].to_tensor().astype('int')
            pred = df['b'].to_tensor().astype('float')
            fpr, tpr, thresholds = roc_curve(y, pred, pos_label=2)
            m = auc(fpr, tpr)

            sk_fpr, sk_tpr, sk_threshod = sklearn_roc_curve(
                raw['a'].to_numpy().astype('int'),
                raw['b'].to_numpy().astype('float'),
                pos_label=2)
            expect_m = sklearn_auc(sk_fpr, sk_tpr)
            self.assertAlmostEqual(m.fetch(), expect_m)

예제 #8

0

파일 보기

파일: test_faiss.py 프로젝트: ueshin/mars

    def testAutoIndex(self):
        d = 8
        n = 50
        n_test = 10
        x = np.random.RandomState(0).rand(n, d).astype(np.float32)
        y = np.random.RandomState(1).rand(n_test, d).astype(np.float32)

        for chunk_size in (50, 20):
            X = mt.tensor(x, chunk_size=chunk_size)

            faiss_index = build_faiss_index(X, random_state=0, return_index_type='object')
            d, i = faiss_query(faiss_index, y, 5, nprobe=10)
            indices = self.executor.execute_tensor(i, concat=True)[0]

            nn = NearestNeighbors()
            nn.fit(x)
            expected_indices = nn.kneighbors(y, 5, return_distance=False)

            np.testing.assert_array_equal(indices, expected_indices)

예제 #9

0

파일 보기

    def testGPUFaissNearestNeighborsExecution(self):
        rs = np.random.RandomState(0)

        raw_X = rs.rand(10, 5)
        raw_Y = rs.rand(8, 5)

        # test faiss execution
        X = mt.tensor(raw_X, chunk_size=7).to_gpu()
        Y = mt.tensor(raw_Y, chunk_size=8).to_gpu()

        nn = NearestNeighbors(n_neighbors=3, algorithm='faiss', metric='l2')
        nn.fit(X)

        ret = nn.kneighbors(Y)

        snn = SkNearestNeighbors(n_neighbors=3, algorithm='auto', metric='l2')
        snn.fit(raw_X)
        expected = snn.kneighbors(raw_Y)

        result = [r.fetch() for r in ret]
        np.testing.assert_almost_equal(result[0].get(), expected[0], decimal=6)
        np.testing.assert_almost_equal(result[1].get(), expected[1])

예제 #10

0

파일 보기

파일: test_faiss.py 프로젝트: haijohn/mars

def test_manual_build_faiss_index(setup):
    d = 8
    n = 50
    n_test = 10
    x = np.random.RandomState(0).rand(n, d).astype(np.float32)
    y = np.random.RandomState(0).rand(n_test, d).astype(np.float32)

    nn = NearestNeighbors(algorithm='kd_tree')
    nn.fit(x)
    _, expected_indices = nn.kneighbors(y, 5)

    # test brute-force search
    X = mt.tensor(x, chunk_size=10)
    index = build_faiss_index(X,
                              'Flat',
                              None,
                              random_state=0,
                              same_distribution=True)
    faiss_index = index.execute().fetch()

    index_shards = faiss.IndexShards(d)
    for ind in faiss_index:
        shard = _load_index(ind, -1)
        index_shards.add_shard(shard)
    faiss_index = index_shards

    faiss_index.nprob = 10
    _, indices = faiss_index.search(y, k=5)

    np.testing.assert_array_equal(indices, expected_indices.fetch())

    # test one chunk, brute force
    X = mt.tensor(x, chunk_size=50)
    index = build_faiss_index(X,
                              'Flat',
                              None,
                              random_state=0,
                              same_distribution=True)
    faiss_index = _load_index(index.execute().fetch(), -1)

    faiss_index.nprob = 10
    _, indices = faiss_index.search(y, k=5)

    np.testing.assert_array_equal(indices, expected_indices.fetch())

    # test train, same distribution
    X = mt.tensor(x, chunk_size=10)
    index = build_faiss_index(X,
                              'IVF30,Flat',
                              30,
                              random_state=0,
                              same_distribution=True)
    faiss_index = _load_index(index.execute().fetch(), -1)

    assert isinstance(faiss_index, faiss.IndexIVFFlat)
    assert faiss_index.ntotal == n
    assert len(tile(index).chunks) == 1

    # test train, distributions are variant
    X = mt.tensor(x, chunk_size=10)
    index = build_faiss_index(X,
                              'IVF10,Flat',
                              None,
                              random_state=0,
                              same_distribution=False)
    faiss_index = index.execute().fetch()

    assert len(faiss_index) == 5
    for ind in faiss_index:
        ind = _load_index(ind, -1)
        assert isinstance(ind, faiss.IndexIVFFlat)
        assert ind.ntotal == 10

    # test more index type
    index = build_faiss_index(X, 'PCAR6,IVF8_HNSW32,SQ8', 10, random_state=0)
    faiss_index = index.execute().fetch()

    assert len(faiss_index) == 5
    for ind in faiss_index:
        ind = _load_index(ind, -1)
        assert isinstance(ind, faiss.IndexPreTransform)
        assert ind.ntotal == 10

    # test one chunk, train
    X = mt.tensor(x, chunk_size=50)
    index = build_faiss_index(X,
                              'IVF30,Flat',
                              30,
                              random_state=0,
                              same_distribution=True)
    faiss_index = _load_index(index.execute().fetch(), -1)

    assert isinstance(faiss_index, faiss.IndexIVFFlat)
    assert faiss_index.ntotal == n

    # test wrong index
    with pytest.raises(ValueError):
        build_faiss_index(X, 'unknown_index', None)

    # test unknown metric
    with pytest.raises(ValueError):
        build_faiss_index(X, 'Flat', None, metric='unknown_metric')

예제 #11

0

파일 보기

    def testNearestNeighbors(self):
        rs = np.random.RandomState(0)
        raw_X = rs.rand(10, 5)
        raw_Y = rs.rand(8, 5)

        X = mt.tensor(raw_X)
        Y = mt.tensor(raw_Y)

        raw_sparse_x = sps.random(10,
                                  5,
                                  density=0.5,
                                  format='csr',
                                  random_state=rs)
        raw_sparse_y = sps.random(8,
                                  5,
                                  density=0.4,
                                  format='csr',
                                  random_state=rs)

        X_sparse = mt.tensor(raw_sparse_x)
        Y_sparse = mt.tensor(raw_sparse_y)

        metric_func = lambda u, v: np.sqrt(((u - v)**2).sum())

        _ = NearestNeighbors(algorithm='auto',
                             metric='precomputed',
                             metric_params={})

        with self.assertRaises(ValueError):
            _ = NearestNeighbors(algorithm='unknown')

        with self.assertRaises(ValueError):
            _ = NearestNeighbors(algorithm='kd_tree', metric=metric_func)

        with self.assertRaises(ValueError):
            _ = NearestNeighbors(algorithm='auto', metric='unknown')

        assert_warns(SyntaxWarning, NearestNeighbors, metric_params={'p': 1})

        with self.assertRaises(ValueError):
            _ = NearestNeighbors(metric='wminkowski', p=0)

        with self.assertRaises(ValueError):
            _ = NearestNeighbors(algorithm='auto', metric='minkowski', p=0)

        nn = NearestNeighbors(algorithm='auto', metric='minkowski', p=1)
        nn.fit(X)
        self.assertEqual(nn.effective_metric_, 'manhattan')

        nn = NearestNeighbors(algorithm='auto', metric='minkowski', p=2)
        nn.fit(X)
        self.assertEqual(nn.effective_metric_, 'euclidean')

        nn = NearestNeighbors(algorithm='auto', metric='minkowski', p=np.inf)
        nn.fit(X)
        self.assertEqual(nn.effective_metric_, 'chebyshev')

        nn2 = NearestNeighbors(algorithm='auto', metric='minkowski')
        nn2.fit(nn)
        self.assertEqual(nn2._fit_method, nn._fit_method)

        nn = NearestNeighbors(algorithm='auto', metric='minkowski')
        ball_tree = SkBallTree(raw_X)
        nn.fit(ball_tree)
        self.assertEqual(nn._fit_method, 'ball_tree')

        nn = NearestNeighbors(algorithm='auto', metric='minkowski')
        kd_tree = SkKDTree(raw_X)
        nn.fit(kd_tree)
        self.assertEqual(nn._fit_method, 'kd_tree')

        with self.assertRaises(ValueError):
            nn = NearestNeighbors()
            nn.fit(np.random.rand(0, 10))

        nn = NearestNeighbors(algorithm='ball_tree')
        assert_warns(UserWarning, nn.fit, X_sparse)

        nn = NearestNeighbors(metric='haversine')
        with self.assertRaises(ValueError):
            nn.fit(X_sparse)

        nn = NearestNeighbors(metric=metric_func, n_neighbors=1)
        nn.fit(X)
        self.assertEqual(nn._fit_method, 'ball_tree')

        nn = NearestNeighbors(metric='sqeuclidean', n_neighbors=1)
        nn.fit(X)
        self.assertEqual(nn._fit_method, 'brute')

        with self.assertRaises(ValueError):
            nn = NearestNeighbors(n_neighbors=-1)
            nn.fit(X)

        with self.assertRaises(TypeError):
            nn = NearestNeighbors(n_neighbors=1.3)
            nn.fit(X)

        nn = NearestNeighbors()
        nn.fit(X)
        with self.assertRaises(ValueError):
            nn.kneighbors(Y, n_neighbors=-1)
        with self.assertRaises(TypeError):
            nn.kneighbors(Y, n_neighbors=1.3)
        with self.assertRaises(ValueError):
            nn.kneighbors(Y, n_neighbors=11)

        nn = NearestNeighbors(algorithm='ball_tree')
        nn.fit(X)
        with self.assertRaises(ValueError):
            nn.kneighbors(Y_sparse)

예제 #12

0

파일 보기

    def testNearestNeighborsExecution(self):
        rs = np.random.RandomState(0)
        raw_X = rs.rand(10, 5)
        raw_Y = rs.rand(8, 5)

        X = mt.tensor(raw_X, chunk_size=7)
        Y = mt.tensor(raw_Y, chunk_size=(5, 3))

        for algo in ['brute', 'ball_tree', 'kd_tree', 'auto']:
            for metric in ['minkowski', 'manhattan']:
                nn = NearestNeighbors(n_neighbors=3,
                                      algorithm=algo,
                                      metric=metric)
                nn.fit(X)

                ret = nn.kneighbors(Y)

                snn = SkNearestNeighbors(n_neighbors=3,
                                         algorithm=algo,
                                         metric=metric)
                snn.fit(raw_X)
                expected = snn.kneighbors(raw_Y)

                result = [r.fetch() for r in ret]
                np.testing.assert_almost_equal(result[0], expected[0])
                np.testing.assert_almost_equal(result[1], expected[1])

                if nn._tree is not None:
                    self.assertIsInstance(nn._tree.fetch(), type(snn._tree))

                # test return_distance=False
                ret = nn.kneighbors(Y, return_distance=False)

                result = ret.fetch()
                np.testing.assert_almost_equal(result, expected[1])

                # test y is x
                ret = nn.kneighbors()

                expected = snn.kneighbors()

                result = [r.fetch() for r in ret]
                np.testing.assert_almost_equal(result[0], expected[0])
                np.testing.assert_almost_equal(result[1], expected[1])

                # test y is x, and return_distance=False
                ret = nn.kneighbors(return_distance=False)

                result = ret.fetch()
                np.testing.assert_almost_equal(result, expected[1])

        # test callable metric
        metric = lambda u, v: np.sqrt(((u - v)**2).sum())
        for algo in ['brute', 'ball_tree']:
            nn = NearestNeighbors(n_neighbors=3, algorithm=algo, metric=metric)
            nn.fit(X)

            ret = nn.kneighbors(Y)

            snn = SkNearestNeighbors(n_neighbors=3,
                                     algorithm=algo,
                                     metric=metric)
            snn.fit(raw_X)
            expected = snn.kneighbors(raw_Y)

            result = [r.fetch() for r in ret]
            np.testing.assert_almost_equal(result[0], expected[0])
            np.testing.assert_almost_equal(result[1], expected[1])

        # test sparse
        raw_sparse_x = sps.random(10,
                                  5,
                                  density=0.5,
                                  format='csr',
                                  random_state=rs)
        raw_sparse_y = sps.random(8,
                                  5,
                                  density=0.4,
                                  format='csr',
                                  random_state=rs)

        X = mt.tensor(raw_sparse_x, chunk_size=7)
        Y = mt.tensor(raw_sparse_y, chunk_size=5)

        nn = NearestNeighbors(n_neighbors=3)
        nn.fit(X)

        ret = nn.kneighbors(Y)

        snn = SkNearestNeighbors(n_neighbors=3)
        snn.fit(raw_sparse_x)
        expected = snn.kneighbors(raw_sparse_y)

        result = [r.fetch() for r in ret]
        np.testing.assert_almost_equal(result[0], expected[0])
        np.testing.assert_almost_equal(result[1], expected[1])

        # test input with unknown shape
        X = mt.tensor(raw_X, chunk_size=7)
        X = X[X[:, 0] > 0.1]
        Y = mt.tensor(raw_Y, chunk_size=(5, 3))
        Y = Y[Y[:, 0] > 0.1]

        nn = NearestNeighbors(n_neighbors=3)
        nn.fit(X)

        ret = nn.kneighbors(Y)

        x2 = raw_X[raw_X[:, 0] > 0.1]
        y2 = raw_Y[raw_Y[:, 0] > 0.1]
        snn = SkNearestNeighbors(n_neighbors=3)
        snn.fit(x2)
        expected = snn.kneighbors(y2)

        result = ret.fetch()
        self.assertEqual(nn._fit_method, snn._fit_method)
        np.testing.assert_almost_equal(result[0], expected[0])
        np.testing.assert_almost_equal(result[1], expected[1])

        # test serialization
        graph = ret[0].build_graph()
        self.assertEqual(len(graph.from_pb(graph.to_pb())), len(graph))
        self.assertEqual(len(graph.from_json(graph.to_json())), len(graph))

        # test fit a sklearn tree
        nn = NearestNeighbors(n_neighbors=3)
        nn.fit(snn._tree)

        ret = nn.kneighbors(Y)
        result = ret.fetch()
        self.assertEqual(nn._fit_method, snn._fit_method)
        np.testing.assert_almost_equal(result[0], expected[0])
        np.testing.assert_almost_equal(result[1], expected[1])

        # test serialization
        graph = ret[0].build_graph()
        self.assertEqual(len(graph.from_pb(graph.to_pb())), len(graph))
        self.assertEqual(len(graph.from_json(graph.to_json())), len(graph))

예제 #13

0

파일 보기

파일: test_faiss.py 프로젝트: ueshin/mars

    def testManualBuildFaissIndex(self):
        d = 8
        n = 50
        n_test = 10
        x = np.random.RandomState(0).rand(n, d).astype(np.float32)
        y = np.random.RandomState(0).rand(n_test, d).astype(np.float32)

        nn = NearestNeighbors(algorithm='kd_tree')
        nn.fit(x)
        _, expected_indices = nn.kneighbors(y, 5)

        for index_type in ['object', 'filename', 'bytes']:
            # test brute-force search
            X = mt.tensor(x, chunk_size=10)
            index = build_faiss_index(X, 'Flat', None, random_state=0,
                                      same_distribution=True, return_index_type=index_type)
            faiss_index = self.executor.execute_tileable(index)

            index_shards = faiss.IndexShards(d)
            for ind in faiss_index:
                shard = _load_index(None, index.op, ind, -1)
                index_shards.add_shard(shard)
            faiss_index = index_shards

            faiss_index.nprob = 10
            _, indices = faiss_index.search(y, k=5)

            np.testing.assert_array_equal(indices, expected_indices.fetch())

        # test one chunk, brute force
        X = mt.tensor(x, chunk_size=50)
        index = build_faiss_index(X, 'Flat', None, random_state=0,
                                  same_distribution=True, return_index_type='object')
        faiss_index = self.executor.execute_tileable(index)[0]

        faiss_index.nprob = 10
        _, indices = faiss_index.search(y, k=5)

        np.testing.assert_array_equal(indices, expected_indices.fetch())

        # test train, same distribution
        X = mt.tensor(x, chunk_size=10)
        index = build_faiss_index(X, 'IVF30,Flat', 30, random_state=0,
                                  same_distribution=True, return_index_type='object')
        faiss_index = self.executor.execute_tileable(index)[0]

        self.assertIsInstance(faiss_index, faiss.IndexIVFFlat)
        self.assertEqual(faiss_index.ntotal, n)
        self.assertEqual(len(get_tiled(index).chunks), 1)

        # test train, distributions are variant
        X = mt.tensor(x, chunk_size=10)
        index = build_faiss_index(X, 'IVF10,Flat', None, random_state=0,
                                  same_distribution=False, return_index_type='object')
        faiss_index = self.executor.execute_tileable(index)

        self.assertEqual(len(faiss_index), 5)
        for ind in faiss_index:
            self.assertIsInstance(ind, faiss.IndexIVFFlat)
            self.assertEqual(ind.ntotal, 10)

        # test one chunk, train
        X = mt.tensor(x, chunk_size=50)
        index = build_faiss_index(X, 'IVF30,Flat', 30, random_state=0,
                                  same_distribution=True, return_index_type='object')
        faiss_index = self.executor.execute_tileable(index)[0]

        self.assertIsInstance(faiss_index, faiss.IndexIVFFlat)
        self.assertEqual(faiss_index.ntotal, n)

        # test wrong index
        with self.assertRaises(ValueError):
            build_faiss_index(X, 'unknown_index', None)

        # test unknown metric
        with self.assertRaises(ValueError):
            build_faiss_index(X, 'Flat', None, metric='unknown_metric')

예제 #14

0

파일 보기

파일: test_nearest_neighbors.py 프로젝트: melodylail/mars

    def testNearestNeighborsExecution(self):
        rs = np.random.RandomState(0)
        raw_X = rs.rand(10, 5)
        raw_Y = rs.rand(8, 5)

        X = mt.tensor(raw_X, chunk_size=7)
        Y = mt.tensor(raw_Y, chunk_size=(5, 3))

        for algo in ['brute', 'ball_tree', 'kd_tree', 'auto']:
            for metric in ['minkowski', 'manhattan']:
                nn = NearestNeighbors(n_neighbors=3,
                                      algorithm=algo,
                                      metric=metric)
                nn.fit(X)

                ret = nn.kneighbors(Y)

                snn = SkNearestNeighbors(n_neighbors=3,
                                         algorithm=algo,
                                         metric=metric)
                snn.fit(raw_X)
                expected = snn.kneighbors(raw_Y)

                result = [r.fetch() for r in ret]
                np.testing.assert_almost_equal(result[0], expected[0])
                np.testing.assert_almost_equal(result[1], expected[1])

                # test return_distance=False
                ret = nn.kneighbors(Y, return_distance=False)

                result = ret.fetch()
                np.testing.assert_almost_equal(result, expected[1])

                # test y is x
                ret = nn.kneighbors()

                expected = snn.kneighbors()

                result = [r.fetch() for r in ret]
                np.testing.assert_almost_equal(result[0], expected[0])
                np.testing.assert_almost_equal(result[1], expected[1])

                # test y is x, and return_distance=False
                ret = nn.kneighbors(return_distance=False)

                result = ret.fetch()
                np.testing.assert_almost_equal(result, expected[1])

        # test callable metric
        metric = lambda u, v: np.sqrt(((u - v)**2).sum())
        for algo in ['brute', 'ball_tree']:
            nn = NearestNeighbors(n_neighbors=3, algorithm=algo, metric=metric)
            nn.fit(X)

            ret = nn.kneighbors(Y)

            snn = SkNearestNeighbors(n_neighbors=3,
                                     algorithm=algo,
                                     metric=metric)
            snn.fit(raw_X)
            expected = snn.kneighbors(raw_Y)

            result = [r.fetch() for r in ret]
            np.testing.assert_almost_equal(result[0], expected[0])
            np.testing.assert_almost_equal(result[1], expected[1])

        # test sparse
        raw_sparse_x = sps.random(10,
                                  5,
                                  density=0.5,
                                  format='csr',
                                  random_state=rs)
        raw_sparse_y = sps.random(8,
                                  5,
                                  density=0.4,
                                  format='csr',
                                  random_state=rs)

        X = mt.tensor(raw_sparse_x, chunk_size=7)
        Y = mt.tensor(raw_sparse_y, chunk_size=5)

        nn = NearestNeighbors(n_neighbors=3)
        nn.fit(X)

        ret = nn.kneighbors(Y)

        snn = SkNearestNeighbors(n_neighbors=3)
        snn.fit(raw_sparse_x)
        expected = snn.kneighbors(raw_sparse_y)

        result = [r.fetch() for r in ret]
        np.testing.assert_almost_equal(result[0], expected[0])
        np.testing.assert_almost_equal(result[1], expected[1])