Ejemplo n.º 1
0
def test_faiss_query(setup, X, Y, metric):
    faiss_index = build_faiss_index(X,
                                    'Flat',
                                    None,
                                    metric=metric,
                                    random_state=0)
    d, i = faiss_query(faiss_index, Y, 5, nprobe=10)
    distance, indices = fetch(*execute(d, i))

    nn = NearestNeighbors(metric=metric)
    nn.fit(x)
    expected_distance, expected_indices = nn.kneighbors(y, 5)

    np.testing.assert_array_equal(indices, expected_indices.fetch())
    np.testing.assert_almost_equal(distance,
                                   expected_distance.fetch(),
                                   decimal=4)

    # test other index
    X2 = X.astype(np.float64)
    Y2 = y.astype(np.float64)
    faiss_index = build_faiss_index(X2,
                                    'PCAR6,IVF8_HNSW32,SQ8',
                                    10,
                                    random_state=0,
                                    return_index_type='object')
    d, i = faiss_query(faiss_index, Y2, 5, nprobe=10)
    # test execute only
    execute(d, i)
Ejemplo n.º 2
0
    def testFaissQuery(self):
        d = 8
        n = 50
        n_test = 10
        x = np.random.RandomState(0).rand(n, d).astype(np.float32)
        y = np.random.RandomState(1).rand(n_test, d).astype(np.float32)

        test_tensors = [
            # multi chunks
            (mt.tensor(x, chunk_size=(20, 5)), mt.tensor(y, chunk_size=5)),
            # one chunk
            (mt.tensor(x, chunk_size=50), mt.tensor(y, chunk_size=10))
        ]

        for X, Y in test_tensors:
            for metric in ['l2', 'cosine']:
                faiss_index = build_faiss_index(X, 'Flat', None, metric=metric,
                                                random_state=0, return_index_type='object')
                d, i = faiss_query(faiss_index, Y, 5, nprobe=10)
                distance, indices = self.executor.execute_tensors([d, i])

                nn = NearestNeighbors(metric=metric)
                nn.fit(x)
                expected_distance, expected_indices = nn.kneighbors(y, 5)

                np.testing.assert_array_equal(indices, expected_indices.fetch())
                np.testing.assert_almost_equal(distance, expected_distance.fetch())
Ejemplo n.º 3
0
    def testAutoIndex(self):
        d = 8
        n = 50
        n_test = 10
        x = np.random.RandomState(0).rand(n, d).astype(np.float32)
        y = np.random.RandomState(1).rand(n_test, d).astype(np.float32)

        for chunk_size in (50, 20):
            X = mt.tensor(x, chunk_size=chunk_size)

            faiss_index = build_faiss_index(X, random_state=0, return_index_type='object')
            d, i = faiss_query(faiss_index, y, 5, nprobe=10)
            indices = self.executor.execute_tensor(i, concat=True)[0]

            nn = NearestNeighbors()
            nn.fit(x)
            expected_indices = nn.kneighbors(y, 5, return_distance=False)

            np.testing.assert_array_equal(indices, expected_indices)
Ejemplo n.º 4
0
def test_manual_build_faiss_index(setup):
    d = 8
    n = 50
    n_test = 10
    x = np.random.RandomState(0).rand(n, d).astype(np.float32)
    y = np.random.RandomState(0).rand(n_test, d).astype(np.float32)

    nn = NearestNeighbors(algorithm='kd_tree')
    nn.fit(x)
    _, expected_indices = nn.kneighbors(y, 5)

    # test brute-force search
    X = mt.tensor(x, chunk_size=10)
    index = build_faiss_index(X,
                              'Flat',
                              None,
                              random_state=0,
                              same_distribution=True)
    faiss_index = index.execute().fetch()

    index_shards = faiss.IndexShards(d)
    for ind in faiss_index:
        shard = _load_index(ind, -1)
        index_shards.add_shard(shard)
    faiss_index = index_shards

    faiss_index.nprob = 10
    _, indices = faiss_index.search(y, k=5)

    np.testing.assert_array_equal(indices, expected_indices.fetch())

    # test one chunk, brute force
    X = mt.tensor(x, chunk_size=50)
    index = build_faiss_index(X,
                              'Flat',
                              None,
                              random_state=0,
                              same_distribution=True)
    faiss_index = _load_index(index.execute().fetch(), -1)

    faiss_index.nprob = 10
    _, indices = faiss_index.search(y, k=5)

    np.testing.assert_array_equal(indices, expected_indices.fetch())

    # test train, same distribution
    X = mt.tensor(x, chunk_size=10)
    index = build_faiss_index(X,
                              'IVF30,Flat',
                              30,
                              random_state=0,
                              same_distribution=True)
    faiss_index = _load_index(index.execute().fetch(), -1)

    assert isinstance(faiss_index, faiss.IndexIVFFlat)
    assert faiss_index.ntotal == n
    assert len(tile(index).chunks) == 1

    # test train, distributions are variant
    X = mt.tensor(x, chunk_size=10)
    index = build_faiss_index(X,
                              'IVF10,Flat',
                              None,
                              random_state=0,
                              same_distribution=False)
    faiss_index = index.execute().fetch()

    assert len(faiss_index) == 5
    for ind in faiss_index:
        ind = _load_index(ind, -1)
        assert isinstance(ind, faiss.IndexIVFFlat)
        assert ind.ntotal == 10

    # test more index type
    index = build_faiss_index(X, 'PCAR6,IVF8_HNSW32,SQ8', 10, random_state=0)
    faiss_index = index.execute().fetch()

    assert len(faiss_index) == 5
    for ind in faiss_index:
        ind = _load_index(ind, -1)
        assert isinstance(ind, faiss.IndexPreTransform)
        assert ind.ntotal == 10

    # test one chunk, train
    X = mt.tensor(x, chunk_size=50)
    index = build_faiss_index(X,
                              'IVF30,Flat',
                              30,
                              random_state=0,
                              same_distribution=True)
    faiss_index = _load_index(index.execute().fetch(), -1)

    assert isinstance(faiss_index, faiss.IndexIVFFlat)
    assert faiss_index.ntotal == n

    # test wrong index
    with pytest.raises(ValueError):
        build_faiss_index(X, 'unknown_index', None)

    # test unknown metric
    with pytest.raises(ValueError):
        build_faiss_index(X, 'Flat', None, metric='unknown_metric')
Ejemplo n.º 5
0
    def testManualBuildFaissIndex(self):
        d = 8
        n = 50
        n_test = 10
        x = np.random.RandomState(0).rand(n, d).astype(np.float32)
        y = np.random.RandomState(0).rand(n_test, d).astype(np.float32)

        nn = NearestNeighbors(algorithm='kd_tree')
        nn.fit(x)
        _, expected_indices = nn.kneighbors(y, 5)

        for index_type in ['object', 'filename', 'bytes']:
            # test brute-force search
            X = mt.tensor(x, chunk_size=10)
            index = build_faiss_index(X, 'Flat', None, random_state=0,
                                      same_distribution=True, return_index_type=index_type)
            faiss_index = self.executor.execute_tileable(index)

            index_shards = faiss.IndexShards(d)
            for ind in faiss_index:
                shard = _load_index(None, index.op, ind, -1)
                index_shards.add_shard(shard)
            faiss_index = index_shards

            faiss_index.nprob = 10
            _, indices = faiss_index.search(y, k=5)

            np.testing.assert_array_equal(indices, expected_indices.fetch())

        # test one chunk, brute force
        X = mt.tensor(x, chunk_size=50)
        index = build_faiss_index(X, 'Flat', None, random_state=0,
                                  same_distribution=True, return_index_type='object')
        faiss_index = self.executor.execute_tileable(index)[0]

        faiss_index.nprob = 10
        _, indices = faiss_index.search(y, k=5)

        np.testing.assert_array_equal(indices, expected_indices.fetch())

        # test train, same distribution
        X = mt.tensor(x, chunk_size=10)
        index = build_faiss_index(X, 'IVF30,Flat', 30, random_state=0,
                                  same_distribution=True, return_index_type='object')
        faiss_index = self.executor.execute_tileable(index)[0]

        self.assertIsInstance(faiss_index, faiss.IndexIVFFlat)
        self.assertEqual(faiss_index.ntotal, n)
        self.assertEqual(len(get_tiled(index).chunks), 1)

        # test train, distributions are variant
        X = mt.tensor(x, chunk_size=10)
        index = build_faiss_index(X, 'IVF10,Flat', None, random_state=0,
                                  same_distribution=False, return_index_type='object')
        faiss_index = self.executor.execute_tileable(index)

        self.assertEqual(len(faiss_index), 5)
        for ind in faiss_index:
            self.assertIsInstance(ind, faiss.IndexIVFFlat)
            self.assertEqual(ind.ntotal, 10)

        # test one chunk, train
        X = mt.tensor(x, chunk_size=50)
        index = build_faiss_index(X, 'IVF30,Flat', 30, random_state=0,
                                  same_distribution=True, return_index_type='object')
        faiss_index = self.executor.execute_tileable(index)[0]

        self.assertIsInstance(faiss_index, faiss.IndexIVFFlat)
        self.assertEqual(faiss_index.ntotal, n)

        # test wrong index
        with self.assertRaises(ValueError):
            build_faiss_index(X, 'unknown_index', None)

        # test unknown metric
        with self.assertRaises(ValueError):
            build_faiss_index(X, 'Flat', None, metric='unknown_metric')