def test_roundoff(self): # params that force use of BLAS implementation nb = 100 nq = 25 d = 4 xb = np.zeros((nb, d), dtype='float32') xb[:, 0] = np.arange(nb) + 12345 xq = xb[:nq] + 0.3 index = faiss.IndexFlat(d) index.add(xb) D, I = index.search(xq, 1) # this does not work assert not np.all(I.ravel() == np.arange(nq)) index = faiss.IndexPreTransform( faiss.CenteringTransform(d), faiss.IndexFlat(d)) index.train(xb) index.add(xb) D, I = index.search(xq, 1) # this works assert np.all(I.ravel() == np.arange(nq))
def test_OPQ(self): M = 4 ev = Randu10kUnbalanced() d = ev.d index = faiss.IndexPQ(d, M, 8) res = ev.launch('PQ', index) e_pq = ev.evalres(res) index_pq = faiss.IndexPQ(d, M, 8) opq_matrix = faiss.OPQMatrix(d, M) # opq_matrix.verbose = true opq_matrix.niter = 10 opq_matrix.niter_pq = 4 index = faiss.IndexPreTransform(opq_matrix, index_pq) res = ev.launch('OPQ', index) e_opq = ev.evalres(res) print('e_pq=%s' % e_pq) print('e_opq=%s' % e_opq) # verify that OPQ better than PQ for r in 1, 10, 100: assert(e_opq[r] > e_pq[r])
def test_OIVFPQ(self): # Parameters inverted indexes ncentroids = 50 M = 4 ev = Randu10kUnbalanced() d = ev.d quantizer = faiss.IndexFlatL2(d) index = faiss.IndexIVFPQ(quantizer, d, ncentroids, M, 8) index.nprobe = 5 res = ev.launch('IVFPQ', index) e_ivfpq = ev.evalres(res) quantizer = faiss.IndexFlatL2(d) index_ivfpq = faiss.IndexIVFPQ(quantizer, d, ncentroids, M, 8) index_ivfpq.nprobe = 5 opq_matrix = faiss.OPQMatrix(d, M) opq_matrix.niter = 10 index = faiss.IndexPreTransform(opq_matrix, index_ivfpq) res = ev.launch('O+IVFPQ', index) e_oivfpq = ev.evalres(res) # verify same on OIVFPQ for r in 1, 10, 100: print(e_oivfpq[r], e_ivfpq[r]) assert(e_oivfpq[r] >= e_ivfpq[r])
def index_patches(patches, index_file, pca_dims=64): # settings for faiss: num_lists, M, num_bits = 200, 16, 8 # assertions: assert type(pca_dims) == int and pca_dims > 0 if pca_dims > patches.shape[1]: print('WARNING: Input dimension < %d. Using fewer PCA dimensions.' % pca_dims) pca_dims = patches.shape[1] - (patches.shape[1] % M) # construct faiss index: quantizer = faiss.IndexFlatL2(pca_dims) assert pca_dims % M == 0 sub_index = faiss.IndexIVFPQ(quantizer, pca_dims, num_lists, M, num_bits) pca_matrix = faiss.PCAMatrix(patches.shape[1], pca_dims, 0, True) faiss_index = faiss.IndexPreTransform(pca_matrix, sub_index) # train faiss index: patches = patches#.numpy() faiss_index.train(patches) faiss_index.add(patches) # save faiss index: print('| writing faiss index to %s' % index_file) faiss.write_index(faiss_index, index_file)
def do_mmappedIO(self, sparse, in_pretransform=False): d = 10 nb = 1000 nq = 200 nt = 200 xt, xb, xq = get_dataset_2(d, nt, nb, nq) quantizer = faiss.IndexFlatL2(d) index1 = faiss.IndexIVFFlat(quantizer, d, 20) if sparse: # makes the inverted lists sparse because all elements get # assigned to the same invlist xt += (np.ones(10) * 1000).astype('float32') if in_pretransform: # make sure it still works when wrapped in an IndexPreTransform index1 = faiss.IndexPreTransform(index1) index1.train(xt) index1.add(xb) _, fname = tempfile.mkstemp() try: faiss.write_index(index1, fname) index2 = faiss.read_index(fname) self.compare_results(index1, index2, xq) index3 = faiss.read_index(fname, faiss.IO_FLAG_MMAP) self.compare_results(index1, index3, xq) finally: if os.path.exists(fname): os.unlink(fname)
def index_patches(patches, pca_dims=64): # settings for faiss: num_lists, M, num_bits = 200, 16, 8 # assertions: assert torch.is_tensor(patches) and patches.dim() == 2 assert type(pca_dims) == int and pca_dims > 0 if pca_dims > patches.size(1): print('WARNING: Input dimension < %d. Using fewer PCA dimensions.' % pca_dims) pca_dims = patches.size(1) - (patches.size(1) % M) # construct faiss index: quantizer = faiss.IndexFlatL2(pca_dims) assert pca_dims % M == 0 sub_index = faiss.IndexIVFPQ(quantizer, pca_dims, num_lists, M, num_bits) pca_matrix = faiss.PCAMatrix(patches.size(1), pca_dims, 0, True) faiss_index = faiss.IndexPreTransform(pca_matrix, sub_index) # train faiss index: patches = patches.numpy() faiss_index.train(patches) faiss_index.add(patches) return faiss_index, sub_index
def make_index(): quantizer = faiss.IndexFlatIP(dim) index = faiss.IndexIVFFlat(quantizer, dim, nlist) if pca: # No idea what eigen_power: float or random_rotation: bool arguments of PCAMatrix do pca_matrix = faiss.PCAMatrix(in_dim, dim) index = faiss.IndexPreTransform(pca_matrix, index) return index
def train_index(start_data, quantizer_path, trained_index_path, num_clusters, fine_quant='SQ4', cuda=False, hnsw=False): ds = start_data.shape[1] quantizer = faiss.IndexFlatIP(ds) # Used only for reimplementation if fine_quant == 'SQ4': start_index = faiss.IndexIVFScalarQuantizer( quantizer, ds, num_clusters, faiss.ScalarQuantizer.QT_4bit, faiss.METRIC_INNER_PRODUCT) # Default index type elif 'OPQ' in fine_quant: code_size = int(fine_quant[fine_quant.index('OPQ') + 3:]) if hnsw: start_index = faiss.IndexHNSWPQ(ds, "HNSW32,PQ96", faiss.METRIC_INNER_PRODUCT) else: opq_matrix = faiss.OPQMatrix(ds, code_size) opq_matrix.niter = 10 sub_index = faiss.IndexIVFPQ(quantizer, ds, num_clusters, code_size, 8, faiss.METRIC_INNER_PRODUCT) start_index = faiss.IndexPreTransform(opq_matrix, sub_index) elif 'none' in fine_quant: start_index = faiss.IndexFlatIP(ds) else: raise ValueError(fine_quant) start_index.verbose = False if cuda: # Convert to GPU index res = faiss.StandardGpuResources() co = faiss.GpuClonerOptions() co.useFloat16 = True gpu_index = faiss.index_cpu_to_gpu(res, 0, start_index, co) gpu_index.verbose = False # Train on GPU and back to CPU gpu_index.train(start_data) start_index = faiss.index_gpu_to_cpu(gpu_index) else: start_index.train(start_data) # Make sure to set direct map again if 'none' not in fine_quant: index_ivf = faiss.extract_index_ivf(start_index) index_ivf.make_direct_map() index_ivf.set_direct_map_type(faiss.DirectMap.Hashtable) faiss.write_index(start_index, trained_index_path)
def test_IndexPreTransform(self): ltrans = faiss.NormalizationTransform(d) sub_index = faiss.IndexFlatL2(d) index = faiss.IndexPreTransform(ltrans, sub_index) index.add(xb) del ltrans gc.collect() index.add(xb) del sub_index gc.collect() index.add(xb)
def test_IndexPreTransform_2(self): sub_index = faiss.IndexFlatL2(d) index = faiss.IndexPreTransform(sub_index) ltrans = faiss.NormalizationTransform(d) index.prepend_transform(ltrans) index.add(xb) del ltrans gc.collect() index.add(xb) del sub_index gc.collect() index.add(xb)
def fit(self, X): nlist = self.params['nlist'] nprobe = self.params['nprobe'] m = self.params['m'] b = self.params['b'] h, w = X.shape d = int((w + m - 1) / m) * m self.remapper = faiss.RemapDimensionsTransform(w, d, True) self.quantizer = faiss.IndexFlatL2(d) self.index_pq = faiss.IndexIVFPQ(self.quantizer, d, nlist, m, b) self.index = faiss.IndexPreTransform(self.remapper, self.index_pq) self.index.train(X.astype('float32')) self.index.add(X.astype('float32')) self.index.nprobe = nprobe
def __init__(self, d): d2 = 256 nlist = 100 # numCentroids m = 8 # numQuantizers coarse_quantizer = faiss.IndexFlatL2(d2) sub_index = faiss.IndexIVFPQ(coarse_quantizer, d2, nlist, 16, 8) pca_matrix = faiss.PCAMatrix(d, d2, 0, True) self.index2 = faiss.IndexPreTransform(pca_matrix, sub_index) sub_index.own_fields = True coarse_quantizer.this.disown() self.sub_index = sub_index self.pca_matrix = pca_matrix self.index2.nprobe = 10
def index_factory(d: int, index_key: str, metric_type: int, ef_construction: Optional[int] = None): """ custom index_factory that fix some issues of faiss.index_factory with inner product metrics. """ if metric_type == faiss.METRIC_INNER_PRODUCT: # make the index described by the key if any(re.findall(r"OPQ\d+_\d+,IVF\d+,PQ\d+", index_key)): params = [int(x) for x in re.findall(r"\d+", index_key)] cs = params[3] # code size (in Bytes if nbits=8) nbits = params[4] if len(params) == 5 else 8 # default value ncentroids = params[2] out_d = params[1] M_OPQ = params[0] quantizer = faiss.index_factory(out_d, "Flat", metric_type) assert quantizer.metric_type == metric_type index_ivfpq = faiss.IndexIVFPQ(quantizer, out_d, ncentroids, cs, nbits, metric_type) assert index_ivfpq.metric_type == metric_type index_ivfpq.own_fields = True quantizer.this.disown() # pylint: disable = no-member opq_matrix = faiss.OPQMatrix(d, M=M_OPQ, d2=out_d) # opq_matrix.niter = 50 # Same as default value index = faiss.IndexPreTransform(opq_matrix, index_ivfpq) elif any(re.findall(r"OPQ\d+_\d+,IVF\d+_HNSW\d+,PQ\d+", index_key)): params = [int(x) for x in re.findall(r"\d+", index_key)] M_HNSW = params[3] cs = params[4] # code size (in Bytes if nbits=8) nbits = params[5] if len(params) == 6 else 8 # default value ncentroids = params[2] out_d = params[1] M_OPQ = params[0] quantizer = faiss.IndexHNSWFlat(out_d, M_HNSW, metric_type) if ef_construction is not None and ef_construction >= 1: quantizer.hnsw.efConstruction = ef_construction assert quantizer.metric_type == metric_type index_ivfpq = faiss.IndexIVFPQ(quantizer, out_d, ncentroids, cs, nbits, metric_type) assert index_ivfpq.metric_type == metric_type index_ivfpq.own_fields = True quantizer.this.disown() # pylint: disable = no-member opq_matrix = faiss.OPQMatrix(d, M=M_OPQ, d2=out_d) # opq_matrix.niter = 50 # Same as default value index = faiss.IndexPreTransform(opq_matrix, index_ivfpq) elif any(re.findall(r"Pad\d+,IVF\d+_HNSW\d+,PQ\d+", index_key)): params = [int(x) for x in re.findall(r"\d+", index_key)] out_d = params[0] M_HNSW = params[2] cs = params[3] # code size (in Bytes if nbits=8) nbits = params[4] if len(params) == 5 else 8 # default value ncentroids = params[1] remapper = faiss.RemapDimensionsTransform(d, out_d, True) quantizer = faiss.IndexHNSWFlat(out_d, M_HNSW, metric_type) if ef_construction is not None and ef_construction >= 1: quantizer.hnsw.efConstruction = ef_construction index_ivfpq = faiss.IndexIVFPQ(quantizer, out_d, ncentroids, cs, nbits, metric_type) index_ivfpq.own_fields = True quantizer.this.disown() # pylint: disable = no-member index = faiss.IndexPreTransform(remapper, index_ivfpq) elif any(re.findall(r"HNSW\d+", index_key)): params = [int(x) for x in re.findall(r"\d+", index_key)] M_HNSW = params[0] index = faiss.IndexHNSWFlat(d, M_HNSW, metric_type) assert index.metric_type == metric_type elif index_key == "Flat": index = faiss.index_factory(d, index_key, metric_type) else: index = faiss.index_factory(d, index_key, metric_type) raise ValueError(( "Be careful, faiss might not create what you expect when using the " "inner product similarity metric, remove this line to try it anyway." "Happened with index_key: " + str(index_key))) else: index = faiss.index_factory(d, index_key, metric_type) return index
ncent, d = centroids.shape print('apply random rotation') rrot = faiss.RandomRotationMatrix(d, d) rrot.init(1234) centroids = rrot.apply_py(centroids) print('make HNSW index as quantizer') quantizer = faiss.IndexHNSWFlat(d, 32) quantizer.hnsw.efSearch = 1024 quantizer.hnsw.efConstruction = 200 quantizer.add(centroids) print('build index') index = faiss.IndexPreTransform( rrot, faiss.IndexIVFScalarQuantizer(quantizer, d, ncent, faiss.ScalarQuantizer.QT_6bit)) def ivecs_mmap(fname): a = np.memmap(fname, dtype='int32', mode='r') d = a[0] return a.reshape(-1, d + 1)[:, 1:] def fvecs_mmap(fname): return ivecs_mmap(fname).view('float32') print('finish training index') xt = fvecs_mmap(deep1bdir + 'learn.fvecs')