Exemple #1
0
def test_numpy_indexer_with_ref_indexer(compress_level, test_metas):
    vectors = np.array([[1, 1, 1],
                        [10, 10, 10],
                        [100, 100, 100],
                        [1000, 1000, 1000]])
    keys = np.array(['4', '5', '6', '7'], dtype=(np.str_, 16))
    with NumpyIndexer(metric='euclidean', index_filename='np.test.gz', compress_level=compress_level,
                      metas=test_metas) as indexer:
        indexer.add(keys, vectors)
        indexer.save()
        assert os.path.exists(indexer.index_abspath)
        index_filename = indexer.index_filename

    queries = np.array([[1, 1, 1],
                        [10, 10, 10],
                        [100, 100, 100],
                        [1000, 1000, 1000]])
    with NumpyIndexer(metric='euclidean', ref_indexer=indexer, metas=test_metas) as new_indexer:
        assert new_indexer.compress_level == compress_level
        assert new_indexer.index_filename == index_filename
        assert isinstance(indexer, NumpyIndexer)
        if compress_level == 0:
            assert isinstance(new_indexer.query_handler, np.memmap)
        idx, dist = new_indexer.query(queries, top_k=2)
        np.testing.assert_equal(idx, np.array([['4', '5'], ['5', '4'], ['6', '5'], ['7', '6']]))
        assert idx.shape == dist.shape
        assert idx.shape == (4, 2)
        np.testing.assert_equal(new_indexer.query_by_key(['7', '4']), vectors[[3, 0]])
Exemple #2
0
def test_numpy_indexer_known(batch_size, compress_level, test_metas):
    vectors = np.array([[1, 1, 1], [10, 10, 10], [100, 100, 100],
                        [1000, 1000, 1000]])
    keys = np.array(['4', '5', '6', '7'], dtype=(np.str_, 16))
    with NumpyIndexer(metric='euclidean',
                      index_filename='np.test.gz',
                      compress_level=compress_level,
                      metas=test_metas) as indexer:
        indexer.batch_size = batch_size
        indexer.add(keys, vectors)
        indexer.save()
        assert os.path.exists(indexer.index_abspath)
        save_abspath = indexer.save_abspath

    queries = np.array([[1, 1, 1], [10, 10, 10], [100, 100, 100],
                        [1000, 1000, 1000]])
    with BaseIndexer.load(save_abspath) as indexer:
        assert isinstance(indexer, NumpyIndexer)
        if compress_level == 0:
            assert isinstance(indexer.query_handler, np.memmap)
        idx, dist = indexer.query(queries, top_k=2)
        np.testing.assert_equal(
            idx, np.array([['4', '5'], ['5', '4'], ['6', '5'], ['7', '6']]))
        assert idx.shape == dist.shape
        assert idx.shape == (4, 2)
        np.testing.assert_equal(indexer.query_by_id(np.array(['7', '4'])),
                                vectors[[3, 0]])
Exemple #3
0
def test_standard(test_metas):
    test_metas['name'] = 'a'
    with NumpyIndexer(
        index_filename=filename, compress_level=0, metas=test_metas
    ) as ni:
        ni.batch_size = 512
        ni.add(vec_idx, vec)
Exemple #4
0
def test_scipy_indexer_known_big(compress_level, test_metas):
    """Let's try to have some real test. We will have an index with 10k vectors of random values between 5 and 10.
     We will change tweak some specific vectors that we expect to be retrieved at query time. We will tweak vector
     at index [0, 1000, 2000, 3000, 4000, 5000, 6000, 7000, 8000, 9000], this will also be the query vectors.
     Then the keys will be assigned shifted to test the proper usage of `int2ext_id` and `ext2int_id`
    """
    vectors = np.random.uniform(low=5.0, high=10.0, size=(10000, 1024))

    queries = np.empty((10, 1024))
    for idx in range(0, 10000, 1000):
        array = idx * np.ones((1, 1024))
        queries[int(idx / 1000)] = array
        vectors[idx] = array

    keys = np.squeeze(np.array(np.arange(10000, 20000).reshape(-1, 1), dtype=(np.str_, 16)))

    with NumpyIndexer(metric='euclidean', index_filename='np.test.gz', backend='scipy', compress_level=compress_level,
                      metas=test_metas) as indexer:
        indexer.add(keys, vectors)
        indexer.save()
        assert os.path.exists(indexer.index_abspath)
        save_abspath = indexer.save_abspath

    with BaseIndexer.load(save_abspath) as indexer:
        assert isinstance(indexer, NumpyIndexer)
        if compress_level == 0:
            assert isinstance(indexer.query_handler, np.memmap)
        idx, dist = indexer.query(queries, top_k=1)
        np.testing.assert_equal(idx, np.array(
            [['10000'], ['11000'], ['12000'], ['13000'], ['14000'], ['15000'], ['16000'], ['17000'], ['18000'], ['19000']]))
        assert idx.shape == dist.shape
        assert idx.shape == (10, 1)
        np.testing.assert_equal(indexer.query_by_key(['10000', '15000']), vectors[[0, 5000]])
def test_numpy_indexer_known(batch_size, compress_level, test_metas):
    vectors = np.array([[1, 1, 1], [10, 10, 10], [100, 100, 100],
                        [1000, 1000, 1000]])
    keys = np.array([4, 5, 6, 7]).reshape(-1, 1)
    with NumpyIndexer(metric='euclidean',
                      index_filename='np.test.gz',
                      compress_level=compress_level,
                      metas=test_metas) as indexer:
        indexer.batch_size = batch_size
        indexer.add(keys, vectors)
        indexer.save()
        assert Path(indexer.index_abspath).exists()
        save_abspath = indexer.save_abspath

    queries = np.array([[1, 1, 1], [10, 10, 10], [100, 100, 100],
                        [1000, 1000, 1000]])
    with BaseIndexer.load(save_abspath) as indexer:
        assert isinstance(indexer, NumpyIndexer)
        if compress_level == 0:
            assert isinstance(indexer.raw_ndarray, np.memmap)
        idx, dist = indexer.query(queries, top_k=2)
        np.testing.assert_equal(idx, np.array([[4, 5], [5, 4], [6, 5], [7,
                                                                        6]]))
        assert idx.shape == dist.shape
        assert idx.shape == (4, 2)
        np.testing.assert_equal(indexer.query_by_id([7, 4]), vectors[[3, 0]])
def test_indexer_zeros(metric, dimension, test_metas):
    import math

    query_vec = np.array(np.zeros([1, dimension]), dtype=np.float32)
    add_vec_idx = np.array(np.random.randint(0, high=num_data,
                                             size=[num_data]),
                           dtype=(np.str_, 16))
    add_vec = np.random.random([num_data, dimension])
    with NumpyIndexer(metric=metric,
                      index_filename='np.test.gz',
                      metas=test_metas) as indexer:
        indexer.add(add_vec_idx, add_vec)
        indexer.save()
        assert os.path.exists(indexer.index_abspath)
        save_abspath = indexer.save_abspath

    with BaseIndexer.load(save_abspath) as indexer:
        assert isinstance(indexer, NumpyIndexer)
        assert isinstance(indexer.query_handler, np.memmap)
        idx, dist = indexer.query(query_vec, top_k=4)

        assert idx.shape == dist.shape
        assert idx.shape == (1, 4)
        if metric == 'cosine':
            assert all(math.isnan(x) for x in dist[0])
        else:
            assert not any(math.isnan(x) for x in dist[0])
Exemple #7
0
def test_numpy_update_delete(compress_level, test_metas):
    np.random.seed(500)
    num_dim = 3
    vec_idx = np.array(['12', '112', '903'], dtype=(np.str_, 16))
    vec = np.random.random([len(vec_idx), num_dim])

    with NumpyIndexer(metric='euclidean', index_filename='np.test.gz', compress_level=compress_level,
                      metas=test_metas) as indexer:
        indexer.add(vec_idx, vec)
        indexer.save()
        assert indexer.num_dim == num_dim
        assert indexer.size == len(vec_idx)
        assert os.path.exists(indexer.index_abspath)
        save_abspath = indexer.save_abspath

    with BaseIndexer.load(save_abspath) as indexer:
        assert isinstance(indexer, NumpyIndexer)
        query_results = indexer.query_by_key(vec_idx)
        assert np.array_equal(vec, query_results)

    # update
    key_to_update = vec_idx[0]
    data_to_update = np.random.random([1, num_dim])
    # nonexistent key
    random_keys = np.array(['999'], dtype=(np.str_, 16))
    random_data = np.random.random([1, num_dim])

    with BaseIndexer.load(save_abspath) as indexer:
        assert isinstance(indexer, NumpyIndexer)
        # NON-EXISTENT KEYS: this will log warning but not fail
        indexer.update(random_keys, random_data)
        indexer.update([key_to_update], data_to_update)
        indexer.save()

    with BaseIndexer.load(save_abspath) as indexer:
        assert isinstance(indexer, NumpyIndexer)
        query_results = indexer.query_by_key([key_to_update])
        assert np.array_equal(data_to_update, query_results)

    # delete
    keys_to_delete = 1
    vec_idx_to_delete = vec_idx[:keys_to_delete]

    with BaseIndexer.load(save_abspath) as indexer:
        assert isinstance(indexer, NumpyIndexer)
        indexer.delete(vec_idx_to_delete)
        indexer.save()
        assert indexer.size == len(vec_idx) - keys_to_delete

    assert indexer.size == len(vec_idx) - keys_to_delete

    with BaseIndexer.load(save_abspath) as indexer:
        assert isinstance(indexer, NumpyIndexer)
        assert indexer.size == len(vec_idx) - keys_to_delete
        # random non-existent key
        assert indexer.query_by_key(['123861942']) is None
        query_results = indexer.query_by_key(vec_idx[keys_to_delete:])
        expected = vec[keys_to_delete:]
        np.testing.assert_allclose(query_results, expected, equal_nan=True)
Exemple #8
0
def test_numpy_indexer_defaults(test_metas):
    indexer = NumpyIndexer()
    # this values come from v0.8.12 before introducing JAML, add here for regression
    assert indexer.pea_id == 0
    assert indexer.workspace is None
    assert indexer._workspace == './'
    assert indexer.py_modules is None
    assert indexer.name.startswith('jina.executors.indexers')
Exemple #9
0
def test_separated_workspace(test_metas):
    indexer = NumpyIndexer()
    # this values come from v0.8.12 before introducing JAML, add here for regression
    assert not indexer.separated_workspace
    assert indexer.pea_id == 0
    assert indexer.workspace == './'
    assert indexer.py_modules == None
    assert indexer.pea_workspace == './/None-0'
    assert indexer.name.startswith('jina.executors.indexers')
Exemple #10
0
def test_numpy_indexer_known_and_delete(batch_size, compress_level,
                                        test_metas):
    vectors = np.array([[1, 1, 1], [10, 10, 10], [100, 100, 100]])
    keys = np.array([4, 5, 6])
    with NumpyIndexer(metric='euclidean',
                      index_filename='np.test.gz',
                      compress_level=compress_level,
                      metas=test_metas) as indexer:
        indexer.batch_size = batch_size
        indexer.add(keys, vectors)
        indexer.save()
        assert os.path.exists(indexer.index_abspath)
        save_abspath = indexer.save_abspath

    top_k = 3
    queries = np.array([[1, 1, 1], [10, 10, 10]])
    with BaseIndexer.load(save_abspath) as indexer:
        assert isinstance(indexer, NumpyIndexer)
        idx, dist = indexer.query(queries, top_k=top_k)
        np.testing.assert_equal(idx, np.array([[4, 5, 6], [5, 4, 6]]))
        assert idx.shape == dist.shape
        assert idx.shape == (len(queries), top_k)
        np.testing.assert_equal(indexer.query_by_id([5, 4, 6]),
                                vectors[[1, 0, 2]])

    # update and query again
    key_to_update = np.array([4])
    data_to_update = np.array([[1000, 1000, 1000]])

    with BaseIndexer.load(save_abspath) as indexer:
        assert isinstance(indexer, NumpyIndexer)
        indexer.update(key_to_update, data_to_update)
        indexer.save()

    with BaseIndexer.load(save_abspath) as indexer:
        assert isinstance(indexer, NumpyIndexer)
        idx, dist = indexer.query(queries, top_k=top_k)
        np.testing.assert_equal(idx, np.array([[5, 6, 4], [5, 6, 4]]))
        assert idx.shape == dist.shape
        assert idx.shape == (len(queries), top_k)

    # delete and query again
    with BaseIndexer.load(save_abspath) as indexer:
        assert isinstance(indexer, NumpyIndexer)
        indexer.delete([4])
        indexer.save()

    top_k = 2
    queries = np.array([[100, 100, 100], [10, 10, 10]])
    with BaseIndexer.load(save_abspath) as indexer:
        assert isinstance(indexer, NumpyIndexer)
        idx, dist = indexer.query(queries, top_k=2)
        np.testing.assert_equal(idx, np.array([[6, 5], [5, 6]]))
        assert idx.shape == dist.shape
        assert idx.shape == (len(queries), top_k)
        np.testing.assert_equal(indexer.query_by_id([6, 5]), vectors[[2, 1]])
Exemple #11
0
def test_numpy_indexer_assert_shape_mismatch(test_metas):
    with NumpyIndexer(metric='euclidean',
                      index_filename='np.test.gz',
                      compress_level=0,
                      metas=test_metas) as indexer:
        indexer.batch_size = 4
        vec_short = np.array([[1, 1, 1], [2, 2, 2]])
        vec_keys = np.array([1, 2, 3])
        with pytest.raises(ValueError):
            indexer.add(vec_keys, vec_short)
Exemple #12
0
def test__get_sorted_top_k(batch_size, num_docs, top_k):
    dist = np.random.uniform(size=(batch_size, num_docs))

    expected_idx = np.argsort(dist)[:, :top_k]
    expected_dist = np.sort(dist)[:, :top_k]

    with NumpyIndexer(metric='euclidean') as indexer:
        idx, dist = indexer._get_sorted_top_k(dist, top_k=top_k)

        np.testing.assert_equal(idx, expected_idx)
        np.testing.assert_equal(dist, expected_dist)
Exemple #13
0
def test_annoy_wrap_indexer(metas):
    with NumpyIndexer(index_filename='wrap-npidx.gz', metas=metas) as indexer:
        indexer.name = 'wrap-npidx'
        indexer.add(vec_idx, vec)

    with BaseIndexer.load_config(os.path.join(
            cur_dir, 'yaml/annoy-wrap.yml')) as indexer:
        assert isinstance(indexer, AnnoyIndexer)
        idx, dist = indexer.query(query, top_k=4)
        assert idx.shape == dist.shape
        assert idx.shape == (10, 4)
Exemple #14
0
def test_sptag_wrap_indexer(metas):
    with NumpyIndexer(index_filename='wrap-npidx.gz', metas=metas) as indexer:
        indexer.name = 'wrap-npidx'
        indexer.add(vec_idx, vec)
        indexer.save()

    with BaseIndexer.load_config(os.path.join(
            cur_dir, 'yaml/sptag-wrap.yml')) as indexer:
        assert isinstance(indexer, SptagIndexer)
        idx, dist = indexer.query(query, top_k=top_k)
        assert idx.shape == dist.shape
        assert idx.shape == (num_queries, top_k)
Exemple #15
0
def test_numpy_indexer_empty_data(batch_size, compress_level, test_metas):
    idx_file_path = os.path.join(test_metas['workspace'], 'np.test.gz')
    with NumpyIndexer(index_filename=str(idx_file_path), compress_level=compress_level, metas=test_metas) as indexer:
        indexer.batch_size = batch_size
        indexer.touch()
        indexer.save()
        assert os.path.exists(indexer.index_abspath)
        save_abspath = indexer.save_abspath

    with BaseIndexer.load(save_abspath) as indexer:
        assert isinstance(indexer, NumpyIndexer)
        idx, dist = indexer.query(query, top_k=4)
        assert len(idx) == 0
        assert len(dist) == 0
Exemple #16
0
def test_numpy_indexer(batch_size, compress_level, test_metas):
    with NumpyIndexer(index_filename='np.test.gz',
                      compress_level=compress_level,
                      metas=test_metas) as indexer:
        indexer.batch_size = batch_size
        indexer.add(vec_idx, vec)
        indexer.save()
        assert os.path.exists(indexer.index_abspath)
        save_abspath = indexer.save_abspath

    with BaseIndexer.load(save_abspath) as indexer:
        assert isinstance(indexer, NumpyIndexer)
        idx, dist = indexer.query(query, top_k=4)
        assert idx.shape == dist.shape
        assert idx.shape == (num_query, 4)
def test_numpy_indexer_empty_data(batch_size, compress_level, test_metas):
    idx_file_path = Path(test_metas['workspace']) / 'np.test.gz'
    with NumpyIndexer(index_filename=str(idx_file_path),
                      compress_level=compress_level,
                      metas=test_metas) as indexer:
        indexer.batch_size = batch_size
        indexer.touch()
        indexer.save()
        assert Path(indexer.index_abspath).exists()
        save_abspath = indexer.save_abspath

    with BaseIndexer.load(save_abspath) as indexer:
        assert isinstance(indexer, NumpyIndexer)
        idx, dist = indexer.query(query, top_k=4)
        assert idx is None
        assert dist is None
Exemple #18
0
def test_scipy_indexer(batch_size, compress_level, test_metas):
    with NumpyIndexer(metric='euclidean', index_filename='np.test.gz', backend='scipy', compress_level=compress_level,
                      metas=test_metas) as indexer:
        indexer.batch_size = batch_size
        indexer.add(vec_idx, vec)
        indexer.save()
        assert os.path.exists(indexer.index_abspath)
        save_abspath = indexer.save_abspath

    with BaseIndexer.load(save_abspath) as indexer:
        assert isinstance(indexer, NumpyIndexer)
        if compress_level == 0:
            assert isinstance(indexer.query_handler, np.memmap)
        idx, dist = indexer.query(query, top_k=4)
        assert idx.shape == dist.shape
        assert idx.shape == (num_query, 4)
Exemple #19
0
def test_numpy_indexer_long_ids(test_metas):
    with NumpyIndexer(metric='euclidean', index_filename='np.test.gz', compress_level=0,
                      metas=test_metas) as indexer:
        indexer.batch_size = 4
        long_vec_id = np.array(vec_idx, dtype=(np.str_, 20))
        long_vec_id[0] = '1234512345123451234'
        indexer.add(long_vec_id, vec)
        indexer.save()
        assert os.path.exists(indexer.index_abspath)
        save_abspath = indexer.save_abspath
        # assert False

    with BaseIndexer.load(save_abspath) as indexer:
        assert isinstance(indexer, NumpyIndexer)
        idx, dist = indexer.query(query, top_k=4)
        assert idx.shape == dist.shape
        assert idx.shape == (num_query, 4)
Exemple #20
0
def test_annoy_wrap_indexer():
    with NumpyIndexer(index_filename='wrap-npidx.gz') as indexer:
        indexer.name = 'wrap-npidx'
        indexer.add(vec_idx, vec)
        indexer.save()
        save_abspath = indexer.save_abspath
        index_abspath = indexer.index_abspath

    with BaseIndexer.load_config(os.path.join(cur_dir, 'yaml/annoy-wrap.yml')) as indexer:
        assert isinstance(indexer, AnnoyIndexer)
        idx, dist = indexer.query(query, top_k=4)
        global retr_idx
        if retr_idx is None:
            retr_idx = idx
        else:
            np.testing.assert_almost_equal(retr_idx, idx)
        assert idx.shape == dist.shape
        assert idx.shape == (10, 4)
    rm_files([save_abspath, index_abspath])
Exemple #21
0
def test_indexer_one_dimensional(metric, test_metas):
    import math
    add_vec_idx = np.array(['0'], dtype=(np.str_, 16))
    add_vec = np.asarray([[1]])
    query_vec = np.asarray([[2]])
    with NumpyIndexer(metric=metric, index_filename='np.test.gz',
                      metas=test_metas) as indexer:
        indexer.add(add_vec_idx, add_vec)

        indexer.save()
        assert os.path.exists(indexer.index_abspath)
        save_abspath = indexer.save_abspath

    with BaseIndexer.load(save_abspath) as indexer:
        assert isinstance(indexer, NumpyIndexer)
        assert isinstance(indexer.query_handler, np.memmap)
        idx, dist = indexer.query(query_vec, top_k=4)
        assert idx.shape == dist.shape
        assert idx.shape == (1, 1)
        assert not math.isnan(dist[0])
Exemple #22
0
def test_scipy_indexer():
    with NumpyIndexer(index_filename='np.test.gz', backend='scipy') as indexer:
        indexer.add(vec_idx, vec)
        indexer.save()
        assert os.path.exists(indexer.index_abspath)
        index_abspath = indexer.index_abspath
        save_abspath = indexer.save_abspath

    with BaseIndexer.load(save_abspath) as indexer:
        assert isinstance(indexer, NumpyIndexer)
        idx, dist = indexer.query(query, top_k=4)
        global retr_idx
        if retr_idx is None:
            retr_idx = idx
        else:
            np.testing.assert_almost_equal(retr_idx, idx)
        assert idx.shape == dist.shape
        assert idx.shape == (10, 4)

    rm_files([index_abspath, save_abspath])
Exemple #23
0
def test_scipy_indexer(batch_size, compress_level, test_metas):
    with NumpyIndexer(index_filename='np.test.gz',
                      backend='scipy',
                      compress_level=compress_level,
                      metas=test_metas) as indexer:
        indexer.batch_size = batch_size
        indexer.add(vec_idx, vec)
        indexer.save()
        assert os.path.exists(indexer.index_abspath)
        save_abspath = indexer.save_abspath

    with BaseIndexer.load(save_abspath) as indexer:
        assert isinstance(indexer, NumpyIndexer)
        idx, dist = indexer.query(query, top_k=4)
        global retr_idx
        if retr_idx is None:
            retr_idx = idx
        else:
            np.testing.assert_almost_equal(retr_idx, idx)
        assert idx.shape == dist.shape
        assert idx.shape == (num_query, 4)
Exemple #24
0
def test_indexer_one_dimensional(metric, test_metas):
    import math
    add_vec_idx = np.asarray([0])
    add_vec = np.asarray([[1]])
    query_vec = np.asarray([[2]])
    with NumpyIndexer(metric=metric,
                      index_filename='np.test.gz',
                      metas=test_metas) as indexer:
        indexer.add(add_vec_idx, add_vec)

        indexer.save()
        assert Path(indexer.index_abspath).exists()
        save_abspath = indexer.save_abspath

    with BaseIndexer.load(save_abspath) as indexer:
        assert isinstance(indexer, NumpyIndexer)
        idx, dist = indexer.query(query_vec, top_k=4)
        print(f'metric {metric} => dist {dist}')
        assert idx.shape == dist.shape
        assert idx.shape == (1, 1)
        assert not math.isnan(dist[0])
Exemple #25
0
def test_numpy_indexer_empty_data(batch_size, compress_level, test_metas):
    np.random.seed(500)
    num_dim = 64
    num_query = 10
    query = np.array(np.random.random([num_query, num_dim]), dtype=np.float32)

    idx_file_path = Path(test_metas['workspace']) / 'np.test.gz'
    with NumpyIndexer(index_filename=str(idx_file_path),
                      compress_level=compress_level,
                      metas=test_metas) as indexer:
        indexer.batch_size = batch_size
        indexer.touch()
        indexer.save()
        assert Path(indexer.index_abspath).exists()
        save_abspath = indexer.save_abspath

    with BaseIndexer.load(save_abspath) as indexer:
        assert isinstance(indexer, NumpyIndexer)
        idx, dist = indexer.query(query, top_k=4)
        assert idx == None
        assert dist == None
Exemple #26
0
def test_standard_query(tmpdir, test_standard):
    mem1 = used_memory(1)
    print(used_memory_readable())
    with NumpyIndexer.load(os.path.join(tmpdir, 'a.bin')) as ni:
        ni.batch_size = 256
        print(used_memory_readable())
        print(ni.raw_ndarray.shape)
        print(used_memory_readable())
        with TimeContext('query topk') as ti:
            result = ni.query(queries, top_k=10)
            mem2 = used_memory(1)
            print(used_memory_readable())
            print(result[0].shape)
        with open(summary_file, 'a') as fp:
            json.dump(
                {
                    'name': 'naive',
                    'memory': mem2 - mem1,
                    'readable': get_readable_size(mem2 - mem1),
                    'time': ti.duration
                }, fp)
            fp.write('\n')
Exemple #27
0
def test_numpy_indexer_known():
    vectors = np.array([[1, 1, 1], [10, 10, 10], [100, 100, 100],
                        [1000, 1000, 1000]])
    keys = np.array([4, 5, 6, 7]).reshape(-1, 1)
    with NumpyIndexer(index_filename='np.test.gz') as indexer:
        indexer.add(keys, vectors)
        indexer.save()
        assert os.path.exists(indexer.index_abspath)
        index_abspath = indexer.index_abspath
        save_abspath = indexer.save_abspath

    queries = np.array([[1, 1, 1], [10, 10, 10], [100, 100, 100],
                        [1000, 1000, 1000]])
    with BaseIndexer.load(save_abspath) as indexer:
        assert isinstance(indexer, NumpyIndexer)
        idx, dist = indexer.query(queries, top_k=2)
        np.testing.assert_equal(idx, np.array([[4, 5], [5, 4], [6, 5], [7,
                                                                        6]]))
        assert idx.shape == dist.shape
        assert idx.shape == (4, 2)
        np.testing.assert_equal(indexer.query_by_id([7, 4]), vectors[[3, 0]])

    rm_files([index_abspath, save_abspath])
Exemple #28
0
def test_standard():
    with NumpyIndexer(index_filename=filename, compress_level=0) as ni:
        ni.batch_size = 512
        ni.add(vec_idx, vec)
        ni.save('a.bin')
Exemple #29
0
def test_set_batch_size():
    batch_size = 325
    metas = get_default_metas()
    metas['batch_size'] = batch_size
    indexer = NumpyIndexer(index_filename=f'test.gz', metas=metas)
    assert indexer.batch_size == batch_size
Exemple #30
0
def test_numpy_indexer_known_and_delete_delete_dump(batch_size, compress_level,
                                                    test_metas):
    vectors = np.array([[1, 1, 1], [10, 10, 10], [100, 100, 100]])
    keys = np.array(['4', '5', '6'], dtype=(np.str_, 16))
    with NumpyIndexer(
            metric='euclidean',
            index_filename='np.test.gz',
            compress_level=compress_level,
            metas=test_metas,
            delete_on_dump=True,
    ) as indexer:
        indexer.batch_size = batch_size
        indexer.add(keys, vectors)
        indexer.save()
        assert os.path.exists(indexer.index_abspath)
        save_abspath = indexer.save_abspath

    top_k = 3
    queries = np.array([[1, 1, 1], [10, 10, 10]])
    with BaseIndexer.load(save_abspath) as indexer:
        assert isinstance(indexer, NumpyIndexer)
        idx, dist = indexer.query(queries, top_k=top_k)
        np.testing.assert_equal(idx,
                                np.array([['4', '5', '6'], ['5', '4', '6']]))
        assert idx.shape == dist.shape
        assert idx.shape == (len(queries), top_k)
        np.testing.assert_equal(indexer.query_by_key(['5', '4', '6']),
                                vectors[[1, 0, 2]])

    # update and query again
    key_to_update = np.array(['4'])
    data_to_update = np.array([[1000, 1000, 1000]])

    with BaseIndexer.load(save_abspath) as indexer:
        assert isinstance(indexer, NumpyIndexer)
        indexer.update(key_to_update, data_to_update)
        indexer.save()

    with BaseIndexer.load(save_abspath) as indexer:
        # this tests the real delete
        assert len(indexer.valid_indices) == indexer.size
        assert isinstance(indexer, NumpyIndexer)
        idx, dist = indexer.query(queries, top_k=top_k)
        np.testing.assert_equal(idx,
                                np.array([['5', '6', '4'], ['5', '6', '4']]))
        assert idx.shape == dist.shape
        assert idx.shape == (len(queries), top_k)

    # delete and query again
    with BaseIndexer.load(save_abspath) as indexer:
        assert isinstance(indexer, NumpyIndexer)
        indexer.delete([4])
        indexer.save()

    top_k = 2
    queries = np.array([[100, 100, 100], [10, 10, 10]])
    with BaseIndexer.load(save_abspath) as indexer:
        assert isinstance(indexer, NumpyIndexer)
        idx, dist = indexer.query(queries, top_k=2)
        np.testing.assert_equal(idx, np.array([['6', '5'], ['5', '6']]))
        assert idx.shape == dist.shape
        assert idx.shape == (len(queries), top_k)
        np.testing.assert_equal(indexer.query_by_key(['6', '5']),
                                vectors[[2, 1]])

    # test query by nonexistent key
    with BaseIndexer.load(save_abspath) as indexer:
        assert isinstance(indexer, NumpyIndexer)
        assert indexer.query_by_key(['91237124']) is None