def __init__(self, target, nprobe=128, num_gpu=None, index_factory_str=None, verbose=False, mode='proxy', using_gpu=True): self._res_list = [] found_gpu = len(os.environ['CUDA_VISIBLE_DEVICES'].split(",")) if found_gpu == 0: raise RuntimeError( "No GPU found. Please export CUDA_VISIBLE_DEVICES") if num_gpu is None or num_gpu > found_gpu: num_gpu = found_gpu print('[faiss gpu] #GPU: {}'.format(num_gpu)) size, dim = target.shape assert size > 0, "size: {}".format(size) index_factory_str = "IVF{},PQ{}".format( min(8192, 16 * round(np.sqrt(size))), 32) if index_factory_str is None else index_factory_str cpu_index = faiss.index_factory(dim, index_factory_str) cpu_index.nprobe = nprobe if mode == 'proxy': co = faiss.GpuClonerOptions() co.useFloat16 = True co.usePrecomputed = False index = faiss.IndexProxy() for i in range(num_gpu): res = faiss.StandardGpuResources() self._res_list.append(res) sub_index = faiss.index_cpu_to_gpu( res, i, cpu_index, co) if using_gpu else cpu_index index.addIndex(sub_index) elif mode == 'shard': raise NotImplementedError else: raise KeyError("Unknown index mode") index = faiss.IndexIDMap(index) index.verbose = verbose # get nlist to decide how many samples used for training nlist = int([ item for item in index_factory_str.split(",") if 'IVF' in item ][0].replace("IVF", "")) # training if not index.is_trained: indexes_sample_for_train = np.random.randint(0, size, nlist * 256) index.train(target[indexes_sample_for_train]) # add with ids target_ids = np.arange(0, size) index.add_with_ids(target, target_ids) self.index = index
def train_kmeans(x, k, ngpu): "Runs kmeans on one or several GPUs" d = x.shape[1] clus = faiss.Clustering(d, k) clus.verbose = True clus.niter = 20 # otherwise the kmeans implementation sub-samples the training set clus.max_points_per_centroid = 10000000 res = [faiss.StandardGpuResources() for i in range(ngpu)] useFloat16 = False if ngpu == 1: index = faiss.GpuIndexFlatL2(res[0], 0, d, useFloat16) else: indexes = [faiss.GpuIndexFlatL2(res[i], i, d, useFloat16) for i in range(ngpu)] index = faiss.IndexProxy() for sub_index in indexes: index.addIndex(sub_index) # perform the training clus.train(x, index) centroids = faiss.vector_float_to_array(clus.centroids) obj = faiss.vector_float_to_array(clus.obj) print "final objective: %.4g" % obj[-1] return centroids.reshape(k, d)
def train_kmeans(x, k, ngpu): "Runs kmeans on one or several GPUs" d = x.shape[1] clus = faiss.Clustering(d, k) clus.verbose = True clus.niter = 20 # otherwise the kmeans implementation sub-samples the training set clus.max_points_per_centroid = 10000000 res = [faiss.StandardGpuResources() for i in range(ngpu)] flat_config = [] for i in range(ngpu): cfg = faiss.GpuIndexFlatConfig() cfg.useFloat16 = False cfg.device = i flat_config.append(cfg) if ngpu == 1: index = faiss.GpuIndexFlatIP(res[-1], d, flat_config[0]) else: indexes = [ faiss.GpuIndexFlatIP(res[i], d, flat_config[i]) for i in range(ngpu) ] index = faiss.IndexProxy() for sub_index in indexes: index.addIndex(sub_index) # perform the training clus.train(x, index) centroids = faiss.vector_float_to_array(clus.centroids) obj = faiss.vector_float_to_array(clus.obj) print("final objective: %.4g" % obj[-1]) return centroids.reshape(k, d)
def test_proxy(self): index = faiss.IndexProxy() for i in range(3): sub_index = faiss.IndexFlatL2(self.d) sub_index.add(self.xb) index.addIndex(sub_index) assert index.d == self.d index.search(self.xb, 10)
def get_populated_index(preproc): if not index_cachefile or not os.path.exists(index_cachefile): if not altadd: gpu_index, indexall = compute_populated_index(preproc) else: gpu_index, indexall = compute_populated_index_2(preproc) if index_cachefile: print "store", index_cachefile faiss.write_index(indexall, index_cachefile) else: print "load", index_cachefile indexall = faiss.read_index(index_cachefile) gpu_index = None co = faiss.GpuMultipleClonerOptions() co.useFloat16 = use_float16 co.useFloat16CoarseQuantizer = False co.usePrecomputed = use_precomputed_tables co.indicesOptions = 0 co.verbose = True co.shard = True # the replicas will be made "manually" t0 = time.time() print "CPU index contains %d vectors, move to GPU" % indexall.ntotal if replicas == 1: if not gpu_index: print "copying loaded index to GPUs" vres, vdev = make_vres_vdev() index = faiss.index_cpu_to_gpu_multiple( vres, vdev, indexall, co) else: index = gpu_index else: del gpu_index # We override the GPU index print "Copy CPU index to %d sharded GPU indexes" % replicas index = faiss.IndexProxy() for i in range(replicas): gpu0 = ngpu * i / replicas gpu1 = ngpu * (i + 1) / replicas vres, vdev = make_vres_vdev(gpu0, gpu1) print " dispatch to GPUs %d:%d" % (gpu0, gpu1) index1 = faiss.index_cpu_to_gpu_multiple( vres, vdev, indexall, co) index1.this.disown() index.addIndex(index1) index.own_fields = True del indexall print "move to GPU done in %.3f s" % (time.time() - t0) return index
def train_kmeans(x, num_clusters=1000, gpu_ids=None, niter=100, nredo=1, verbose=0): """ Runs k-means clustering on one or several GPUs """ assert np.all(~np.isnan(x)), 'x contains NaN' assert np.all(np.isfinite(x)), 'x contains Inf' if isinstance(gpu_ids, int): gpu_ids = [gpu_ids] assert gpu_ids is None or len(gpu_ids) d = x.shape[1] kmeans = faiss.Clustering(d, num_clusters) kmeans.verbose = bool(verbose) kmeans.niter = niter kmeans.nredo = nredo # otherwise the kmeans implementation sub-samples the training set kmeans.max_points_per_centroid = 10000000 if gpu_ids is not None: res = [faiss.StandardGpuResources() for i in gpu_ids] flat_config = [] for i in gpu_ids: cfg = faiss.GpuIndexFlatConfig() cfg.useFloat16 = False cfg.device = i flat_config.append(cfg) if len(gpu_ids) == 1: index = faiss.GpuIndexFlatL2(res[0], d, flat_config[0]) else: indexes = [ faiss.GpuIndexFlatL2(res[i], d, flat_config[i]) for i in range(len(gpu_ids)) ] index = faiss.IndexProxy() for sub_index in indexes: index.addIndex(sub_index) else: index = faiss.IndexFlatL2(d) # perform the training kmeans.train(x, index) centroids = faiss.vector_float_to_array(kmeans.centroids) objective = faiss.vector_float_to_array(kmeans.obj) #logging.debug("Final objective: %.4g" % objective[-1]) return centroids.reshape(num_clusters, d)
def _init_faiss(self, ngpu, feat_len): self.flat_config = [] for i in range(ngpu): self.cfg = faiss.GpuIndexFlatConfig() self.cfg.useFloat16 = False self.cfg.device = i self.flat_config.append(self.cfg) self.res = [faiss.StandardGpuResources() for i in range(ngpu)] self.indexes = [faiss.GpuIndexFlatIP(self.res[i], feat_len, self.flat_config[i]) for i in range(ngpu)] self.index = faiss.IndexProxy() for sub_index in self.indexes: self.index.addIndex(sub_index)
def _init_faiss(self, ngpu, feat_len, tempIVF, tempPQ, tempNP): co = faiss.GpuClonerOptions() co.useFloat16 = True co.usePrecomputed = False # Setting up GPU resources self.res = [faiss.StandardGpuResources() for i in range(ngpu)] self.indexes = [] for i in range(ngpu): index = faiss.index_factory(feat_len,tempIVF + "," + tempPQ) index.nprobe = tempNP index = faiss.index_cpu_to_gpu(self.res[i],i,index,co) self.indexes.append(index) self.index = faiss.IndexProxy() for sub_index in self.indexes: self.index.addIndex(sub_index)
def test_stress(self): # a mixture of the above, from issue #631 target = np.random.rand(50, 16).astype('float32') index = faiss.IndexProxy() size, dim = target.shape num_gpu = 4 for i in range(num_gpu): config = faiss.GpuIndexFlatConfig() config.device = 0 # simulate on a single GPU sub_index = faiss.GpuIndexFlatIP(faiss.StandardGpuResources(), dim, config) index.addIndex(sub_index) index = faiss.IndexIDMap(index) ids = np.arange(size) index.add_with_ids(target, ids)
def train_kmeans(x, num_clusters=1000, num_gpus=1): """ Runs k-means clustering on one or several GPUs """ d = x.shape[1] kmeans = faiss.Clustering(d, num_clusters) kmeans.verbose = True kmeans.niter = 20 # otherwise the kmeans implementation sub-samples the training set kmeans.max_points_per_centroid = 10000000 res = [faiss.StandardGpuResources() for i in range(num_gpus)] flat_config = [] for i in range(num_gpus): cfg = faiss.GpuIndexFlatConfig() cfg.useFloat16 = False cfg.device = i flat_config.append(cfg) if num_gpus == 1: index = faiss.GpuIndexFlatL2(res[0], d, flat_config[0]) else: indexes = [faiss.GpuIndexFlatL2(res[i], d, flat_config[i]) for i in range(num_gpus)] index = faiss.IndexProxy() for sub_index in indexes: index.addIndex(sub_index) # perform the training kmeans.train(x, index) print 'Total number of indexed vectors (after kmeans.train()):', index.ntotal centroids = faiss.vector_float_to_array(kmeans.centroids) objective = faiss.vector_float_to_array(kmeans.obj) print 'Objective values per iter:', objective print "Final objective: %.4g" % objective[-1] # TODO: return cluster assignment return centroids.reshape(num_clusters, d)
def kmeans(features, nclusters, num_iters, ngpu, njobs, seed): """ Run k-means on features, generating nclusters clusters. It will use, in order of preference, Faiss, pomegranate, or scikit-learn. :param features: Features to cluster. :param nclusters: Number of clusters to generate. :param num_iters: Maximum number of iterations to perform. :param ngpu: Number of GPUs to use (if GPUs are available). :param njobs: Number of threads to use. :param seed: Seed for reproducibility. :return: centroids: The centroids found with k-means. """ print('Running k-means...') if USE_FAISS: d = features.shape[1] pca_features = np.ascontiguousarray(features).astype('float32') clus = faiss.Clustering(d, nclusters) clus.verbose = True clus.niter = num_iters if seed is not None: clus.seed = seed # otherwise the kmeans implementation sub-samples the training set clus.max_points_per_centroid = 10000000 if USE_GPU: res = [faiss.StandardGpuResources() for i in range(ngpu)] flat_config = [] for i in range(ngpu): cfg = faiss.GpuIndexFlatConfig() cfg.useFloat16 = False cfg.device = i flat_config.append(cfg) if ngpu == 1: index = faiss.GpuIndexFlatL2(res[0], d, flat_config[0]) else: indexes = [ faiss.GpuIndexFlatL2(res[i], d, flat_config[i]) for i in range(ngpu) ] index = faiss.IndexProxy() for sub_index in indexes: index.addIndex(sub_index) else: index = faiss.IndexFlatL2(d) clus.train(pca_features, index) centroids = faiss.vector_float_to_array(clus.centroids) centroids = centroids.reshape(nclusters, d) elif USE_POMEGRANATE and seed is None: kmeans = pomegranate.kmeans.Kmeans(nclusters, init='kmeans++', n_init=10) kmeans.fit(features, max_iterations=num_iters, n_jobs=njobs) centroids = kmeans.centroids else: if USE_POMEGRANATE and seed is not None: print( 'Pomegranate does not currently support k-means with a seed. Switching to scikit-learn instead.' ) print('Using scikit-learn. This may be slow!') kmeans = sklearn.cluster.KMeans(n_clusters=nclusters, random_state=seed).fit(features) centroids = kmeans.cluster_centers_ return centroids
cfg = faiss.GpuIndexIVFFlatConfig( ) #faiss.GpuIndexFlatConfig() faiss.GpuIndexIVFPQConfig() cfg.useFloat16 = False cfg.device = i flat_config.append(cfg) #indexes = [faiss.GpuIndexFlatL2(res[i],d,flat_config[i]) for i in range(ngpus)] #可行,速度快,不需要train,直接计算L2距离 #indexes = [faiss.GpuIndexIVFPQ(res[i],d,nlist, m,4,faiss.METRIC_L2,flat_config[i]) for i in range(ngpus)] indexes = [ faiss.GpuIndexIVFFlat(res[i], d, nlist, faiss.METRIC_L2, flat_config[i]) for i in range(ngpus) ] # then we make an Index array # useFloat16 is a boolean value index = faiss.IndexProxy() for sub_index in indexes: index.addIndex(sub_index) index.train(pin_data_drop_new) #影响PQ的时间的因素??? print(index.is_trained) index.add(pin_data_drop_new) index.nprobe = 30 #参数需要调,适当增加nprobe可以得到和brute-force相同的结果,nprobe控制了速度和精度的平衡 print(index.ntotal) #index中向量的个数 beg = time.time() query_self = pin_data_drop_new[:200000] # 查询本身 dis, ind = index.search(query_self, k) #查询自身 print(time.time() - beg)