Example #1
0
    def test_binary_flat(self):
        k = 10

        index_ref = faiss.IndexBinaryFlat(self.d_bin)
        index_ref.add(self.xb_bin)
        D_ref, I_ref = index_ref.search(self.xq_bin, k)

        index = faiss.GpuIndexBinaryFlat(faiss.StandardGpuResources(),
                                         self.d_bin)
        index.add(self.xb_bin)
        D, I = index.search(self.xq_bin, k)

        for d_ref, i_ref, d_new, i_new in zip(D_ref, I_ref, D, I):
            # exclude max distance
            assert d_ref.max() == d_new.max()
            dmax = d_ref.max()

            # sort by (distance, id) pairs to be reproducible
            ref = [(d, i) for d, i in zip(d_ref, i_ref) if d < dmax]
            ref.sort()

            new = [(d, i) for d, i in zip(d_new, i_new) if d < dmax]
            new.sort()

            assert ref == new
Example #2
0
 def __init__(self,
              dataset,
              vector=False,
              binary=False,
              dimension=768,
              build=False,
              gpu=-1):
     self.dataset, self.dimension, self.mode, self.binary = dataset, dimension, vector, binary
     if vector:
         # faiss
         func = faiss.IndexBinaryFlat if binary else faiss.IndexFlatL2
         self.searcher = func(dimension)
         if gpu >= 0:
             # GpuIndexBinaryFlat: https://github.com/facebookresearch/faiss/blob/master/faiss/gpu/test/test_gpu_index.py#L176
             res = faiss.StandardGpuResources()  # use a single GPU
             if binary:
                 self.searcher = faiss.GpuIndexBinaryFlat(res, dimension)
             else:
                 self.searcher = faiss.GpuIndexFlatL2(res, dimension)
             print(f'[!] gpu is used for faiss to speed up')
         else:
             print(f'[!] cpu is used for faiss')
         self.corpus = []
     else:
         # elasticsearch
         self.searcher = Elasticsearch()
         if build:
             try:
                 self.searcher.indices.delete(index=dataset)
             except:
                 print(f'[!] index {dataset} is empty, donot delete')
             mapping = {
                 'properties': {
                     'utterance': {
                         'type': 'text',
                         'analyzer': 'ik_max_word',
                         'search_analyzer': 'ik_smart',
                     }
                 }
             }
             self.searcher.indices.create(index=dataset)
             self.searcher.indices.put_mapping(body=mapping, index=dataset)