def setUpClass(cls) -> None: nprobe = 10 cls.CONFIG = FaissServiceConfig(nprobe=nprobe, normalize_query=False) cls.CONFIG_NORM = FaissServiceConfig(nprobe=nprobe, normalize_query=True) cls.FAISS_CONFIG = FaissConfig(dim=64, db_size=100000, nlist=100) cls.INDEX = cls.create_index() cls.SERVICE = faiss_pb2.DESCRIPTOR.services_by_name['FaissService'] # faiss index must be cloned, because index attribute will be changed # FaissServiceServicer constructor (e.g. nprobe) cls.SERVER = grpc_testing.server_from_dictionary( { cls.SERVICE: FaissServiceServicer(faiss.clone_index(cls.INDEX), cls.CONFIG) }, grpc_testing.strict_real_time(), ) # server for normalize_query is True cls.SERVER_NORM = grpc_testing.server_from_dictionary( { cls.SERVICE: FaissServiceServicer(faiss.clone_index(cls.INDEX), cls.CONFIG_NORM) }, grpc_testing.strict_real_time(), ) # set nprobe, after complete cloning index cls.INDEX.nprobe = nprobe
def test_clone(self): index = faiss.index_factory(16, 'IVF10,PQ4np') xb = faiss.rand((1000, 16)) index.train(xb) index.add(xb) index2 = faiss.clone_index(index) assert index2.ntotal == index.ntotal
def test_clone_size(self): index = faiss.index_factory(20, 'PCA10,Flat') xb = faiss.rand((100, 20)) index.train(xb) index.add(xb) index2 = faiss.clone_index(index) assert index2.ntotal == 100
def test_failed_different_nprobe_Search(self) -> None: # k must be set large value, # becauseof avoiding to miss error case came from small nprobe value. # if both nprobe and k are set small, search result would be same, even # if failed to set nprobe on server side. k = 1000 val = np.ones(self.FAISS_CONFIG.dim, dtype=np.float32) vector = Vector(val=val) request = SearchRequest(query=vector, k=k) rpc = self.SERVER.invoke_unary_unary( self.method_descriptor_by_name(ServiceMethodDescriptor.search), (), request, None, ) # set different nprobe index = faiss.clone_index(self.INDEX) index.nprobe = 1 distances, ids = index.search(np.atleast_2d(val), k) unexpected = SearchResponse( neighbors=self.to_neighbors(distances, ids)) response, _, code, _ = rpc.termination() self.assertNotEqual(response, unexpected) self.assertIs(code, grpc.StatusCode.OK)
def make_mmap_index(self, base_index: BASE_INDEX, ids: np.array, embs: np.array): # Get invlists index = faiss.clone_index(base_index) index.add_with_ids(embs, ids) ivf_vector = faiss.InvertedListsPtrVector() ivf_vector.push_back(index.invlists) index.own_invlists = False del index gc.collect() # Make MMAP ivfdata index_name = p.abspath(self.sub_dir / f'{self.seed_name}') invlists = faiss.OnDiskInvertedLists(base_index.nlist, base_index.code_size, f'{index_name}.ivfdata') ntotal = invlists.merge_from(ivf_vector.data(), ivf_vector.size()) # Link index to ivfdata and save index = faiss.clone_index(base_index) index.ntotal = ntotal index.replace_invlists(invlists) faiss.write_index(index, f'{index_name}.index')
def _execute_map(cls, ctx, op): (data, ), device_id, xp = as_same_device([ctx[op.inputs[0].key]], device=op.device, ret_extra=True) index = ctx[op.inputs[1].key] if len(op.inputs) == 2 else None with device(device_id): data = xp.ascontiguousarray(data) if index is not None: # fetch the trained index trained_index = _load_index(ctx, op, index, device_id) return_index_type = _get_index_type(op.return_index_type, ctx) if return_index_type == 'object': # clone a new one, # because faiss does not ensure thread-safe for operations that change index # https://github.com/facebookresearch/faiss/wiki/Threads-and-asynchronous-calls#thread-safety trained_index = faiss.clone_index(trained_index) else: trained_index = faiss.index_factory(data.shape[1], op.faiss_index, op.faiss_metric_type) if op.same_distribution: # no need to train, just create index pass else: # distribution no the same, train on each chunk trained_index.train(data) if device_id >= 0: # pragma: no cover trained_index = _index_to_gpu(trained_index, device_id) if op.metric == 'cosine': # faiss does not support cosine distances directly, # data needs to be normalize before adding to index, # refer to: # https://github.com/facebookresearch/faiss/wiki/FAQ#how-can-i-index-vectors-for-cosine-distance faiss.normalize_L2(data) # add data into index if device_id >= 0: # pragma: no cover # gpu trained_index.add_c(data.shape[0], _swig_ptr_from_cupy_float32_array(data)) else: trained_index.add(data) ctx[op.outputs[0].key] = _store_index(ctx, op, trained_index, device_id)
def subtest_io_and_clone(self, index, Dnsg, Insg): fd, tmpfile = tempfile.mkstemp() os.close(fd) try: faiss.write_index(index, tmpfile) index2 = faiss.read_index(tmpfile) finally: if os.path.exists(tmpfile): os.unlink(tmpfile) Dnsg2, Insg2 = index2.search(self.xq, 1) np.testing.assert_array_equal(Dnsg2, Dnsg) np.testing.assert_array_equal(Insg2, Insg) # also test clone index3 = faiss.clone_index(index) Dnsg3, Insg3 = index3.search(self.xq, 1) np.testing.assert_array_equal(Dnsg3, Dnsg) np.testing.assert_array_equal(Insg3, Insg)
def io_and_retest(self, index, Dhnsw, Ihnsw): _, tmpfile = tempfile.mkstemp() try: faiss.write_index(index, tmpfile) index2 = faiss.read_index(tmpfile) finally: if os.path.exists(tmpfile): os.unlink(tmpfile) Dhnsw2, Ihnsw2 = index2.search(self.xq, 1) self.assertTrue(np.all(Dhnsw2 == Dhnsw)) self.assertTrue(np.all(Ihnsw2 == Ihnsw)) # also test clone index3 = faiss.clone_index(index) Dhnsw3, Ihnsw3 = index3.search(self.xq, 1) self.assertTrue(np.all(Dhnsw3 == Dhnsw)) self.assertTrue(np.all(Ihnsw3 == Ihnsw))
def indexing(feats, pos, imgID): feats_np = np.zeros(shape=(len(feats), feats[0].shape[0])) pos_np = np.zeros(shape=(len(feats), 2)) imgID_np = np.zeros(shape=(len(feats), 1)) for i in range(len(feats)): feats_np[i, :] = feats[i] pos_np[i, :] = pos[i] imgID_np[i, :] = imgID[i] # construct the visual vocabulary voc_size = const_params.__voc_size__ niter = 20 verbose = False d = feats[0].shape[0] code_size = 8 quantizer = faiss.IndexFlatL2(d) # this remains the same index_ = faiss.IndexIVFPQ(quantizer, d, voc_size, code_size, 8) # 8 specifies that each sub-vector is encoded as 8 bits index_.train(feats_np.astype('float32')) index_.add(feats_np.astype('float32')) index_.nprobe = 5 #faiss.write_index(faiss.clone_index(index_), '../index.faiss') #fp = open('../query.pkl', 'r') #des, pt = pickle.load(fp) #fp.close() #q = np.asarray(des).astype('float32') #D, I = index_.search(q, 10) return [faiss.clone_index(index_), pos_np, imgID_np]
def approximate_batch(self, vectors, output_path): if self.faiss_index is None: self.load() cloned_index = faiss.clone_index(self.faiss_index) cloned_index.add(vectors) faiss.write_index(cloned_index, str(output_path))
fp = open('../database.pkl', 'r') feats, pos, imgID, filenames, featNum = pickle.load(fp) fp.close() index_, pos_np, imgID_np = indexing(feats, pos, imgID) #fp = open('../query.pkl', 'r') #des, pt = pickle.load(fp) #fp.close() #imgNum = 500000 #q = np.asarray(des).astype('float32') #D, I = index_.search(q, 10) fp = open('../index.pkl', 'w') pickle.dump([pos_np, imgID_np], fp) fp.close() faiss.write_index(faiss.clone_index(index_), '../index.faiss') print('testing...............') print('=======================') print('the number of points {0}'.format(imgID_np.shape[0])) print('the pos is {0}'.format(pos_np[0, :])) print('=======================')