def train_kmeans(x, k, ngpu): "Runs kmeans on one or several GPUs" d = x.shape[1] clus = faiss.Clustering(d, k) clus.verbose = True clus.niter = 20 # otherwise the kmeans implementation sub-samples the training set clus.max_points_per_centroid = 10000000 res = [faiss.StandardGpuResources() for i in range(ngpu)] useFloat16 = False if ngpu == 1: index = faiss.GpuIndexFlatL2(res[0], 0, d, useFloat16) else: indexes = [faiss.GpuIndexFlatL2(res[i], i, d, useFloat16) for i in range(ngpu)] index = faiss.IndexProxy() for sub_index in indexes: index.addIndex(sub_index) # perform the training clus.train(x, index) centroids = faiss.vector_float_to_array(clus.centroids) obj = faiss.vector_float_to_array(clus.obj) print "final objective: %.4g" % obj[-1] return centroids.reshape(k, d)
def train_kmeans(x, num_clusters=1000, gpu_ids=None, niter=100, nredo=1, verbose=0): """ Runs k-means clustering on one or several GPUs """ assert np.all(~np.isnan(x)), 'x contains NaN' assert np.all(np.isfinite(x)), 'x contains Inf' if isinstance(gpu_ids, int): gpu_ids = [gpu_ids] assert gpu_ids is None or len(gpu_ids) d = x.shape[1] kmeans = faiss.Clustering(d, num_clusters) kmeans.verbose = bool(verbose) kmeans.niter = niter kmeans.nredo = nredo # otherwise the kmeans implementation sub-samples the training set kmeans.max_points_per_centroid = 10000000 if gpu_ids is not None: res = [faiss.StandardGpuResources() for i in gpu_ids] flat_config = [] for i in gpu_ids: cfg = faiss.GpuIndexFlatConfig() cfg.useFloat16 = False cfg.device = i flat_config.append(cfg) if len(gpu_ids) == 1: index = faiss.GpuIndexFlatL2(res[0], d, flat_config[0]) else: indexes = [ faiss.GpuIndexFlatL2(res[i], d, flat_config[i]) for i in range(len(gpu_ids)) ] index = faiss.IndexProxy() for sub_index in indexes: index.addIndex(sub_index) else: index = faiss.IndexFlatL2(d) # perform the training kmeans.train(x, index) centroids = faiss.vector_float_to_array(kmeans.centroids) objective = faiss.vector_float_to_array(kmeans.obj) #logging.debug("Final objective: %.4g" % objective[-1]) return centroids.reshape(num_clusters, d)
def test_IndexIVFPQ(self): (xt, xb, xq) = self.get_dataset() d = xt.shape[1] dev_no = 0 usePrecomputed = True res = faiss.StandardGpuResources() gt_index = faiss.GpuIndexFlatL2(res, dev_no, d, False) gt_index.add(xb) D, gt_nns = gt_index.search(xq, 1) coarse_quantizer = faiss.IndexFlatL2(d) ncentroids = int(np.sqrt(xb.shape[0])) * 4 index = faiss.IndexIVFPQ(coarse_quantizer, d, ncentroids, 32, 8) # add implemented on GPU but not train index.train(xt) gpuIndex = faiss.GpuIndexIVFPQ(res, dev_no, faiss.INDICES_64_BIT, False, index) gpuIndex.setPrecomputedCodes(usePrecomputed) gpuIndex.setNumProbes(64) index.add(xb) D, nns = index.search(xq, 10) n_ok = (nns == gt_nns).sum() nq = xq.shape[0] print ncentroids, n_ok, nq self.assertGreater(n_ok, nq * 0.2)
def run_kmeans(x, nmb_clusters, verbose=False): """Runs kmeans on 1 GPU. Args: x: data nmb_clusters (int): number of clusters Returns: list: ids of data in each cluster """ n_data, d = x.shape # faiss implementation of k-means clus = faiss.Clustering(d, nmb_clusters) # Change faiss seed at each k-means so that the randomly picked # initialization centroids do not correspond to the same feature ids # from an epoch to another. clus.seed = np.random.randint(1234) clus.niter = 20 clus.max_points_per_centroid = 10000000 res = faiss.StandardGpuResources() flat_config = faiss.GpuIndexFlatConfig() flat_config.useFloat16 = False flat_config.device = 0 index = faiss.GpuIndexFlatL2(res, d, flat_config) # perform the training clus.train(x, index) _, I = index.search(x, 1) stats = clus.iteration_stats losses = np.array([stats.at(i).obj for i in range(stats.size())]) if verbose: print('k-means loss evolution: {0}'.format(losses)) return [int(n[0]) for n in I], losses[-1]
def run_kmeans(vecs, ncentroids=10, niter=20, device=0, verbose=True): dim = vecs.shape[1] if device == -1: print(" On CPU") kmeans = faiss.Kmeans(dim, ncentroids, niter=niter, verbose=verbose) kmeans.train(vecs) distances, groups = kmeans.index.search(vecs, 1) else: print(" On GPU") if vecs.sum() == 0: msg = "All Image has no value. " msg += "Please retry with the other weight." print(msg) clus = faiss.Clustering(dim, ncentroids) clus.verbose = verbose clus.niter = niter clus.max_points_per_centroid = 10000000 res = faiss.StandardGpuResources() cfg = faiss.GpuIndexFlatConfig() cfg.useFloat16 = False cfg.device = device index = faiss.GpuIndexFlatL2(res, dim, cfg) clus.train(vecs, index) distances, groups = index.search(vecs, 1) return groups
def build(self, data: np.ndarray): k = data.shape[1] if self.use_gpu is True: self.index = faiss.GpuIndexFlatL2(self.res, k) else: self.index = faiss.IndexFlatL2(k) self.index.add(data)
def run_kmeans(x, nmb_clusters, verbose=False): """Runs kmeans on 1 GPU. Args: x: data nmb_clusters (int): number of clusters Returns: list: ids of data in each cluster """ n_data, d = x.shape # print(n_data, d) # faiss implementation of k-means clus = faiss.Clustering(d, nmb_clusters) # Change faiss seed at each k-means so that the randomly picked # initialization centroids do not correspond to the same feature ids # from an epoch to another. clus.seed = np.random.randint(1234) clus.niter = 20 # clus.min_points_per_centroid = 5 clus.max_points_per_centroid = 100000000 res = faiss.StandardGpuResources() flat_config = faiss.GpuIndexFlatConfig() # flat_config = faiss.GpuIndexIVFFlatConfig() # IVF flat_config.useFloat16 = False flat_config.device = 0 # index = faiss.GpuIndexIVFFlat(res, d, nmb_clusters, faiss.METRIC_L2, flat_config) # faiss.Metric_INNER_PRODUCT, index = faiss.GpuIndexFlatL2(res, d, flat_config) # index = faiss.GpuIndexIP(res, d, flat_config) # Inner product between samples # perform the training clus.train(x, index) D, I = index.search(x, 1) losses = faiss.vector_to_array(clus.obj) if verbose: print('k-means loss evolution: {0}'.format(losses)) return [int(n[0]) for n in I], losses[-1], np.array([(d[0]) for d in D])
def search_index_pytorch(database, x, k): """ KNN search via Faiss :param database BxNxC x BxMxC :return D BxMxK I BxMxK """ Dptr = database.storage().data_ptr() if not (x.is_cuda or database.is_cuda): index = faiss.IndexFlatL2(database.size(-1)) else: index = faiss.GpuIndexFlatL2(GPU_RES, database.size(-1)) # dimension is 3 index.add_c(database.size(0), faiss.cast_integer_to_float_ptr(Dptr)) assert x.is_contiguous() n, d = x.size() assert d == index.d D = torch.empty((n, k), dtype=torch.float32, device=x.device) I = torch.empty((n, k), dtype=torch.int64, device=x.device) torch.cuda.synchronize() xptr = __swig_ptr_from_FloatTensor(x) Iptr = __swig_ptr_from_LongTensor(I) Dptr = __swig_ptr_from_FloatTensor(D) index.search_c(n, xptr, k, Dptr, Iptr) torch.cuda.synchronize() index.reset() return D, I
def run_kmeans(x, nmb_clusters, verbose=False, seed=DEFAULT_KMEANS_SEED, gpu_device=0): """ Runs kmeans on 1 GPU. Args: ----- x: data nmb_clusters (int): number of clusters Returns: -------- list: ids of data in each cluster """ n_data, d = x.shape # faiss implementation of k-means clus = faiss.Clustering(d, nmb_clusters) clus.niter = 20 clus.max_points_per_centroid = 10000000 clus.seed = seed res = faiss.StandardGpuResources() flat_config = faiss.GpuIndexFlatConfig() flat_config.useFloat16 = False flat_config.device = gpu_device index = faiss.GpuIndexFlatL2(res, d, flat_config) # perform the training clus.train(x, index) _, I = index.search(x, 1) losses = faiss.vector_to_array(clus.obj) if verbose: print('k-means loss evolution: {0}'.format(losses)) return [int(n[0]) for n in I], losses[-1]
def searchAllDB(self, xq, k=2): D = [] N = [] d = 512 LOG.logI('Load index start...') res = faiss.StandardGpuResources() gpu_index = faiss.GpuIndexFlatL2(res, d) LOG.logI('Load index finished...') for i, db in enumerate(self.dbs): name = self.names[i] gpu_index.reset() gpu_index.add(db.to('cpu').numpy().astype('float32')) tempD, tempI = self.search_index_pytorch(gpu_index, xq, k) tempD = tempD.to('cpu').numpy() tempI = tempI.to('cpu').numpy() tempN = name[tempI] if len(D) == 0: D, N = tempD, tempN continue D = np.hstack((D, tempD)) N = np.hstack((N, tempN)) RD, RN = D, N for i, d in enumerate(D): index = np.argsort(d) RD[i] = D[i][index] RN[i] = N[i][index] return RD[:, :k], RN[:, :k]
def forward(self, inputs, targets,class_emb): n_classes = class_emb.shape[0] neg_samples = self.num_neighbours if(self.dataset.lower() in ['coco','cityscapes','voc']): targets[targets==255]=-1 with torch.no_grad(): gpu_index = faiss.GpuIndexFlatL2(self.res,class_emb.shape[1]) gpu_index.add(class_emb) trans_inputs = torch.transpose(torch.transpose(inputs,1,2),2,3).reshape(inputs.size()[0]*inputs.size()[2]*inputs.size()[3],class_emb.shape[1]) D, I = gpu_index.search(trans_inputs, neg_samples) ret_index = torch.transpose(torch.transpose(I.reshape(inputs.size()[0],inputs.size()[2],inputs.size()[3],neg_samples),2,3),1,2) mask_tar = (targets.unsqueeze(1) == ret_index) ret_index[mask_tar] = ret_index[mask_tar] - 1 ret_index[ret_index==-1] = 0 input_index = torch.cat([targets.unsqueeze(1),ret_index],dim=1) embmat = class_emb[input_index] embmat = torch.transpose(torch.transpose(embmat,3,4),2,3) dist_mat = torch.cdist(class_emb, class_emb, p=2) min_dist = dist_mat.topk(2,largest=False)[0][:,1] reg_loss = torch.max(0.2 - min_dist, torch.zeros(min_dist.shape).cuda()).sum()/n_classes norm_loss = torch.norm(inputs.unsqueeze(1) - embmat,dim = 2) target_mod = (targets==-1) new_targets = -1*target_mod cross_entropy_loss = F.cross_entropy(-self.temp*norm_loss,new_targets,size_average=True,ignore_index=-1,reduce=True,reduction='mean') return cross_entropy_loss + reg_loss
def run_kmeans(x, nmb_clusters, verbose=False, use_gpu=True): """Runs kmeans on 1 GPU. Args: x: data nmb_clusters (int): number of clusters Returns: list: ids of data in each cluster """ n_data, d = x.shape # faiss implementation of k-means clus = faiss.Clustering(d, nmb_clusters) clus.niter = 20 clus.max_points_per_centroid = 10000000 if use_gpu: res = faiss.StandardGpuResources() flat_config = faiss.GpuIndexFlatConfig() flat_config.useFloat16 = False flat_config.device = 0 index = faiss.GpuIndexFlatL2(res, d, flat_config) else: index = faiss.IndexFlatL2(d) # perform the training clus.train(x, index) _, I = index.search(x, 1) centroids = faiss.vector_to_array(clus.centroids).reshape( (nmb_clusters, d)) # Also return centroids! losses = faiss.vector_to_array(clus.obj) if verbose: print('k-means loss evolution: {0}'.format(losses)) return [int(n[0]) for n in I], losses[-1], centroids, index
def nearest_neighbor(labeled_features, unlabeled_features, labels, k): """ Find the nearest neighbors to each unlabeled feature in terms of l2 distance. Assign the label of each unlabeled feature to be the mode of the k labels from the k nearest labeled features. :param labeled_features: Features whose labels are known :param unlabeled_features: Features for which you want to perform k-NN :param labels: Labels corresponding to the labeled features :param k: Number of nearest neighbors to use :return: Estimated label for each feature in unlabeled_features """ labeled_features = np.ascontiguousarray(labeled_features).astype('float32') unlabeled_features = np.ascontiguousarray(unlabeled_features).astype( 'float32') nq, d = labeled_features.shape if defaults.device.type == 'cuda': res = faiss.StandardGpuResources() flat_config = faiss.GpuIndexFlatConfig() flat_config.device = 0 index = faiss.GpuIndexFlatL2(res, d, flat_config) else: index = faiss.IndexFlatL2(d) index.add(labeled_features) D, idxs = index.search(unlabeled_features, k) nn_labels = labels[idxs] yhat_unlabeled = scipy.stats.mode(nn_labels, axis=1)[0].flatten() return torch.Tensor(yhat_unlabeled).to(defaults.device)
def run_kmeans(x, nmb_clusters): n_data, d = x.shape # faiss implementation of k-means clus = faiss.Clustering(d, nmb_clusters) # Change faiss seed at each k-means so that the randomly picked # initialization centroids do not correspond to the same feature ids # from an epoch to another. clus.seed = np.random.randint(1234) clus.niter = 20 clus.max_points_per_centroid = 10000000 res = faiss.StandardGpuResources() flat_config = faiss.GpuIndexFlatConfig() flat_config.useFloat16 = False flat_config.device = 0 index = faiss.GpuIndexFlatL2(res, d, flat_config) # perform the training clus.train(x, index) _, I = index.search(x, 1) losses = faiss.vector_to_array(clus.obj) print('k-means loss evolution: {0}'.format(losses)) return [int(n[0]) for n in I], losses[-1]
def build_graph(self, X, k): # kNN search for the graph d = X.shape[1] res = faiss.StandardGpuResources() flat_config = faiss.GpuIndexFlatConfig() flat_config.device = 0 index = faiss.GpuIndexFlatL2(res, d, flat_config) # build the index #normalize_L2(X) index.add(X) N = X.shape[0] Nidx = index.ntotal c = time.time() D, I = index.search(X, k + 1) elapsed = time.time() - c LOG.debug('kNN Search done in %d seconds'.format(elapsed), LOG.ll.CLASSIFIER) # Create the graph D = D[:, 1:]**3 I = I[:, 1:] row_idx = np.arange(N) row_idx_rep = np.tile(row_idx, (k, 1)).T W = scipy.sparse.csr_matrix( (D.flatten('F'), (row_idx_rep.flatten('F'), I.flatten('F'))), shape=(N, N)) W = W + W.T return W
def run_kmeans_faiss(x: Union[np.ndarray, Tensor], nmb_clusters: int, n_iter: int, cuda: bool, verbose: bool = False) -> Tensor: if isinstance(x, torch.Tensor): x = x.numpy() x = np.reshape(x, (x.shape[0], -1)) n_data, d = x.shape if cuda: # faiss implementation of k-means clus = faiss.Clustering(d, nmb_clusters) clus.niter = n_iter clus.max_points_per_centroid = 10000000 clus.verbose = verbose res = faiss.StandardGpuResources() flat_config = faiss.GpuIndexFlatConfig() flat_config.useFloat16 = False index = faiss.GpuIndexFlatL2(res, d, flat_config) # perform the training clus.train(x, index) flat_config.device = 0 _, I = index.search(x, 1) else: kmeans = faiss.Kmeans(d=d, k=nmb_clusters, verbose=verbose, niter=20) kmeans.train(x) _, I = kmeans.index.search(x, 1) I = torch.as_tensor(I, dtype=torch.long).squeeze() return I
def test_assign(self): d = 32 res = faiss.StandardGpuResources() res.noTempMemory() index = faiss.GpuIndexFlatL2(res, d) xb = torch.rand(10000, d, device=torch.device('cuda', 0), dtype=torch.float32) index.add(xb) index_cpu = faiss.IndexFlatL2(d) index.copyTo(index_cpu) # Test assign with native gpu output # both input as gpu torch and input as cpu torch xq = torch.rand(10, d, device=torch.device('cuda', 0), dtype=torch.float32) labels = index.assign(xq, 5) labels_cpu = index_cpu.assign(xq.cpu(), 5) self.assertTrue(torch.equal(labels.cpu(), labels_cpu)) # Test assign with np input labels = index.assign(xq.cpu().numpy(), 5) labels_cpu = index_cpu.assign(xq.cpu().numpy(), 5) self.assertTrue(np.array_equal(labels, labels_cpu)) # Test assign with numpy output provided labels = np.empty((xq.shape[0], 5), dtype='int64') index.assign(xq.cpu().numpy(), 5, labels) self.assertTrue(np.array_equal(labels, labels_cpu)) # Test assign with torch cpu output provided labels = torch.empty(xq.shape[0], 5, dtype=torch.int64) index.assign(xq.cpu(), 5, labels) labels_cpu = index_cpu.assign(xq.cpu(), 5) self.assertTrue(torch.equal(labels, labels_cpu))
def run_kmeans(x, nmb_clusters, verbose=False): """Runs kmeans on 1 GPU. Args: x: data nmb_clusters (int): number of clusters Returns: list: ids of data in each cluster """ n_data, d = x.shape # faiss implementation of k-means clus = faiss.Clustering(d, nmb_clusters) clus.niter = 20 clus.max_points_per_centroid = 10000000 res = faiss.StandardGpuResources() flat_config = faiss.GpuIndexFlatConfig() flat_config.useFloat16 = False flat_config.device = 0 index = faiss.GpuIndexFlatL2(res, d, flat_config) # perform the training clus.train(x, index) _, I = index.search(x, 1) return [int(n[0]) for n in I], 0
def find_closest_centroid(centroids, features): """ Find the closest centroid to each feature in terms of l2 distance. :param centroids: Centroids from codebook generation. :param features: Features for which you want to compute the nearest centroid. :return: idxs: Indices of the nearest centroid to each observation in features. """ if USE_FAISS: centroids = np.ascontiguousarray(centroids).astype('float32') features = np.ascontiguousarray(features).astype('float32') nq, d = centroids.shape if USE_GPU: res = faiss.StandardGpuResources() flat_config = faiss.GpuIndexFlatConfig() flat_config.device = 0 index = faiss.GpuIndexFlatL2(res, d, flat_config) else: index = faiss.IndexFlatL2(d) # add vectors to index index.add(centroids) # find nearest neighbors D, idxs = index.search(features, 1) idxs = idxs.flatten() else: idxs = np.argmin(scipy.spatial.distance.cdist(centroids, features), axis=0) return idxs
def _retrieve_knn_faiss_gpu_euclidean(query_embeddings, db_embeddings, k, gpu_id=0): """ Retrieve k nearest neighbor based on inner product Args: query_embeddings: numpy array of size [NUM_QUERY_IMAGES x EMBED_SIZE] db_embeddings: numpy array of size [NUM_DB_IMAGES x EMBED_SIZE] k: number of nn results to retrieve excluding query gpu_id: gpu device id to use for nearest neighbor (if possible for `metric` chosen) Returns: dists: numpy array of size [NUM_QUERY_IMAGES x k], distances of k nearest neighbors for each query retrieved_db_indices: numpy array of size [NUM_QUERY_IMAGES x k], indices of k nearest neighbors for each query """ import faiss res = faiss.StandardGpuResources() flat_config = faiss.GpuIndexFlatConfig() flat_config.device = gpu_id # Evaluate with inner product index = faiss.GpuIndexFlatL2(res, db_embeddings.shape[1], flat_config) index.add(db_embeddings) # retrieved k+1 results in case that query images are also in the db dists, retrieved_result_indices = index.search(query_embeddings, k + 1) return dists, retrieved_result_indices
def run_kmeans(x, num_clusters, temperature): """ Args: x: data to be clustered """ print('performing kmeans clustering') results = {'im2cluster': [], 'centroids': [], 'density': []} for seed, num_cluster in enumerate(num_clusters): # intialize faiss clustering parameters d = x.shape[1] k = int(num_cluster) clus = faiss.Clustering(d, k) clus.verbose = False clus.niter = 20 clus.nredo = 5 clus.seed = seed clus.max_points_per_centroid = 1000 clus.min_points_per_centroid = 5 res = faiss.StandardGpuResources() cfg = faiss.GpuIndexFlatConfig() cfg.useFloat16 = False cfg.device = 0 index = faiss.GpuIndexFlatL2(res, d, cfg) clus.train(x, index) D, I = index.search( x, 1) # for each sample, find cluster distance and assignments im2cluster = [int(n[0]) for n in I] # get cluster centroids centroids = faiss.vector_to_array(clus.centroids).reshape(k, d) # sample-to-centroid distances for each cluster Dcluster = [[] for c in range(k)] for im, i in enumerate(im2cluster): Dcluster[i].append(D[im][0]) # concentration estimation (phi) density = np.zeros(k) for i, dist in enumerate(Dcluster): if len(dist) > 1: d = (np.asarray(dist)**0.5).mean() / np.log(len(dist) + 10) density[i] = d # if cluster only has one point, use the max to estimate its concentration dmax = density.max() for i, dist in enumerate(Dcluster): if len(dist) <= 1: density[i] = dmax density = density.clip(np.percentile(density, 10), np.percentile( density, 90)) # clamp extreme values for stability density = temperature * density / density.mean( ) # scale the mean to temperature # convert to cuda Tensors for broadcast centroids = torch.Tensor(centroids).cuda() centroids = nn.functional.normalize(centroids, p=2, dim=1) im2cluster = torch.LongTensor(im2cluster).cuda() density = torch.Tensor(density).cuda() results['centroids'].append(centroids) results['density'].append(density) results['im2cluster'].append(im2cluster) return results
def build_nn_index(self, database): ''' :param database: numpy array of Nx3 :return: Faiss index, in CPU ''' index = faiss.GpuIndexFlatL2(self.res, self.dimension, self.flat_config) # dimension is 3 index.add(database) return index
def get_gpu_index(X, gpu_device_num=0): d = X.shape[1] res = faiss.StandardGpuResources() flat_config = faiss.GpuIndexFlatConfig() flat_config.device = gpu_device_num index = faiss.GpuIndexFlatL2(res, d, flat_config) index.add(X) return index
def nearest_neighbors_gpu(X, n_neighbors): """Compute the ``n_neighbors`` nearest points for each data point in ``X``. This may be exact, but more likely is approximated via nearest neighbor search of the faiss library. Parameters ---------- X: array of shape (n_samples, n_features) The input data to compute the k-neighbor graph of. n_neighbors: int The number of nearest neighbors to compute for each sample in ``X``. Returns ------- knn_indices: array of shape (n_samples, n_neighbors) The indices on the ``n_neighbors`` closest points in the dataset. knn_dists: array of shape (n_samples, n_neighbors) The distances to the ``n_neighbors`` closest points in the dataset. """ n_samples = X.shape[0] n_dims = X.shape[1] # Simple implementation. Basically also brute force, but does not need as # much memory as bruteForceKnn for unknown reasons. Performs maybe half a # second slower for small data sets # assure that data is contiguous, otherwise FAISS can not process it X = np.ascontiguousarray(X.astype(np.float32)) resource = faiss.StandardGpuResources() index = faiss.GpuIndexFlatL2(resource, n_dims) index.train(X) index.add(X) #knn_dists, knn_indices = index.search(X, n_neighbors) # query in batches above certain limit # TODO determine limit, requires to known device memory size, possible input if n_samples < 500000: knn_dists, knn_indices = index.search(X, n_neighbors) else: # query 5 times, since FAISS reserves approximately 18% of memory as # temporary memory and thus querying 5 times always allows each query # to fit in memory n_queries = 5 slice_size = ceil(n_samples / n_queries) knn_dists = np.zeros((n_samples, n_neighbors), dtype=np.float32) knn_indices = np.zeros((n_samples, n_neighbors), dtype=np.int64) for i in range(n_queries): start = i * slice_size end = min(start + slice_size, n_samples) knn_dists[start:end], knn_indices[start:end] = index.search( X[start:end], n_neighbors) return knn_indices, knn_dists, []
def compute_nnf(ref_img, prop_img, patchsize, K=10, gpuid=0, pxform=None): """ Compute the Nearest Neighbor Field for Optical Flow """ C, H, W = ref_img.shape B, T = 1, 1 # -- tile patches -- query = repeat(ref_img, 'c h w -> 1 1 c h w') q_patches = tile_patches(query, patchsize, pxform).pix.cpu().numpy() B, N, R, ND = q_patches.shape query = rearrange(q_patches, 'b t r nd -> (b t r) nd') db = repeat(prop_img, 'c h w -> 1 1 c h w') db_patches = tile_patches(db, patchsize, pxform).pix.cpu().numpy() Bd, Nd, Rd, NDd = db_patches.shape database = rearrange(db_patches, 'b t r nd -> (b t r) nd') # -- faiss setup -- res = faiss.StandardGpuResources() faiss_cfg = faiss.GpuIndexFlatConfig() faiss_cfg.useFloat16 = False faiss_cfg.device = gpuid faiss.cvar.distance_compute_blas_threshold = 40 # -- create database -- gpu_index = faiss.GpuIndexFlatL2(res, ND, faiss_cfg) gpu_index.add(database) # -- execute search -- D, I = gpu_index.search(query, K) D = rearrange(D, '(b t r) k -> b t r k', b=B, t=T) I = rearrange(I, '(b t r) k -> b t r k', b=B, t=T) # -- get nnf (x,y) from I -- vals, locs = [], [] for b in range(B): for t in range(T): D_bt, I_bt = D[b][t], I[b][t] vals_bt = rearrange(D_bt, '(h w) k -> h w k', h=H) locs_bt = np.unravel_index(I_bt, (H, W)) # only works with B,T == 1 locs_bt = np.stack(locs_bt, axis=-1) # (AT END) swap: (rows,cols) -> (cols,rows) aka (y,x) -> (x,y) locs_bt = rearrange(locs_bt, '(h w) k two -> h w k two', h=H) vals.append(vals_bt) locs.append(locs_bt) vals = rearrange(vals, '(b t) h w k -> b t h w k', b=B) locs = rearrange(locs, '(b t) h w k two -> b t h w k two', b=B) locs[..., :] = locs[..., ::-1] # (HERE) row,cols -> cols,rows return vals, locs
def BuildKNNGraphByFAISS_GPU(descriptor, k): dbsize, dim = descriptor.shape flat_config = faiss.GpuIndexFlatConfig() flat_config.device = 0 res = faiss.StandardGpuResources() nn = faiss.GpuIndexFlatL2(res, dim, flat_config) nn.add(descriptor) dists,idx = nn.search(descriptor, k+1) return idx[:,1:], dists[:,1:]
def train_kmeans(x, num_clusters=1000, num_gpus=1): """ Runs k-means clustering on one or several GPUs """ d = x.shape[1] kmeans = faiss.Clustering(d, num_clusters) kmeans.verbose = True kmeans.niter = 20 # otherwise the kmeans implementation sub-samples the training set kmeans.max_points_per_centroid = 10000000 res = [faiss.StandardGpuResources() for i in range(num_gpus)] flat_config = [] for i in range(num_gpus): cfg = faiss.GpuIndexFlatConfig() cfg.useFloat16 = False cfg.device = i flat_config.append(cfg) if num_gpus == 1: index = faiss.GpuIndexFlatL2(res[0], d, flat_config[0]) else: indexes = [faiss.GpuIndexFlatL2(res[i], d, flat_config[i]) for i in range(num_gpus)] index = faiss.IndexProxy() for sub_index in indexes: index.addIndex(sub_index) # perform the training kmeans.train(x, index) print 'Total number of indexed vectors (after kmeans.train()):', index.ntotal centroids = faiss.vector_float_to_array(kmeans.centroids) objective = faiss.vector_float_to_array(kmeans.obj) print 'Objective values per iter:', objective print "Final objective: %.4g" % objective[-1] # TODO: return cluster assignment return centroids.reshape(num_clusters, d)
def train_kmeans(x, k, ngpu, max_points_per_centroid=256): "Runs kmeans on one or several GPUs" d = x.shape[1] clus = faiss.Clustering(d, k) clus.verbose = True clus.niter = 20 clus.max_points_per_centroid = max_points_per_centroid if ngpu == 0: index = faiss.IndexFlatL2(d) else: res = [faiss.StandardGpuResources() for i in range(ngpu)] flat_config = [] for i in range(ngpu): cfg = faiss.GpuIndexFlatConfig() cfg.useFloat16 = False cfg.device = i flat_config.append(cfg) if ngpu == 1: index = faiss.GpuIndexFlatL2(res[0], d, flat_config[0]) else: indexes = [ faiss.GpuIndexFlatL2(res[i], d, flat_config[i]) for i in range(ngpu) ] index = faiss.IndexReplicas() for sub_index in indexes: index.addIndex(sub_index) # perform the training clus.train(x, index) centroids = faiss.vector_float_to_array(clus.centroids) stats = clus.iteration_stats stats = [stats.at(i) for i in range(stats.size())] obj = np.array([st.obj for st in stats]) print("final objective: %.4g" % obj[-1]) return centroids.reshape(k, d)
def _init_faiss(self, ngpu, feat_len): self.flat_config = [] for i in range(ngpu): self.cfg = faiss.GpuIndexFlatConfig() self.cfg.useFloat16 = False self.cfg.device = i self.flat_config.append(self.cfg) self.res = [faiss.StandardGpuResources() for i in range(ngpu)] self.indexes = [faiss.GpuIndexFlatL2(self.res[i], feat_len, self.flat_config[i]) for i in range(ngpu)] self.index = faiss.IndexProxy() for sub_index in self.indexes: self.index.addIndex(sub_index)
def _init_faiss( self, dimension, ): import faiss res = faiss.StandardGpuResources() self._faiss_index = faiss.GpuIndexFlatL2( res, dimension, )