def test_binary_flat(self): k = 10 index_ref = faiss.IndexBinaryFlat(self.d_bin) index_ref.add(self.xb_bin) D_ref, I_ref = index_ref.search(self.xq_bin, k) index = faiss.GpuIndexBinaryFlat(faiss.StandardGpuResources(), self.d_bin) index.add(self.xb_bin) D, I = index.search(self.xq_bin, k) for d_ref, i_ref, d_new, i_new in zip(D_ref, I_ref, D, I): # exclude max distance assert d_ref.max() == d_new.max() dmax = d_ref.max() # sort by (distance, id) pairs to be reproducible ref = [(d, i) for d, i in zip(d_ref, i_ref) if d < dmax] ref.sort() new = [(d, i) for d, i in zip(d_new, i_new) if d < dmax] new.sort() assert ref == new
def __init__(self, dataset, vector=False, binary=False, dimension=768, build=False, gpu=-1): self.dataset, self.dimension, self.mode, self.binary = dataset, dimension, vector, binary if vector: # faiss func = faiss.IndexBinaryFlat if binary else faiss.IndexFlatL2 self.searcher = func(dimension) if gpu >= 0: # GpuIndexBinaryFlat: https://github.com/facebookresearch/faiss/blob/master/faiss/gpu/test/test_gpu_index.py#L176 res = faiss.StandardGpuResources() # use a single GPU if binary: self.searcher = faiss.GpuIndexBinaryFlat(res, dimension) else: self.searcher = faiss.GpuIndexFlatL2(res, dimension) print(f'[!] gpu is used for faiss to speed up') else: print(f'[!] cpu is used for faiss') self.corpus = [] else: # elasticsearch self.searcher = Elasticsearch() if build: try: self.searcher.indices.delete(index=dataset) except: print(f'[!] index {dataset} is empty, donot delete') mapping = { 'properties': { 'utterance': { 'type': 'text', 'analyzer': 'ik_max_word', 'search_analyzer': 'ik_smart', } } } self.searcher.indices.create(index=dataset) self.searcher.indices.put_mapping(body=mapping, index=dataset)