def resume(self, inc, resume_full_path): """ Load previous REMIND model to continue training. :param inc: which increment number was saved :param resume_full_path: path where weights are saved :return: (classifier state dict, latent dict, rehearsal ixs list, class id to item ix dict) """ print(f'\nResuming REMIND model from {resume_full_path}') state = torch.load( os.path.join(resume_full_path, 'remind_classifier_F_%d.pth' % inc)) self.classifier_F.load_state_dict(state['model_state_dict']) self.optimizer.load_state_dict(state['optimizer_state_dict']) # load parameters with open(os.path.join(resume_full_path, 'remind_buffer_%d.pkl' % inc), 'rb') as f: d = pickle.load(f) nbits = int(np.log2(self.codebook_size)) pq = faiss.ProductQuantizer(self.num_channels, self.num_codebooks, nbits) faiss.copy_array_to_vector(d['pq_centroids'].ravel(), pq.centroids) return state, d['latent_dict'], d['rehearsal_ixs'], d[ 'class_id_to_item_ix_dict'], pq
def do_test_codec(self, nbit): pq = faiss.ProductQuantizer(16, 2, nbit) # simulate training rs = np.random.RandomState(123) centroids = rs.rand(2, 1 << nbit, 8).astype('float32') faiss.copy_array_to_vector(centroids.ravel(), pq.centroids) idx = rs.randint(1 << nbit, size=(100, 2)) # can be encoded exactly x = np.hstack((centroids[0, idx[:, 0]], centroids[1, idx[:, 1]])) # encode / decode codes = pq.compute_codes(x) xr = pq.decode(codes) assert np.all(xr == x) # encode w/ external index assign_index = faiss.IndexFlatL2(8) pq.assign_index = assign_index codes2 = np.empty((100, pq.code_size), dtype='uint8') pq.compute_codes_with_assign_index(faiss.swig_ptr(x), faiss.swig_ptr(codes2), 100) assert np.all(codes == codes2)
nb, d = xb.shape nq, d = xq.shape nt, d = xt.shape # fastest to slowest if 'lsq-gpu' in todo: lsq = faiss.LocalSearchQuantizer(d, M, nbits) ngpus = faiss.get_num_gpus() lsq.icm_encoder_factory = faiss.GpuIcmEncoderFactory(ngpus) lsq.verbose = True eval_quantizer(lsq, xb, xt, 'lsq-gpu') if 'pq' in todo: pq = faiss.ProductQuantizer(d, M, nbits) print("===== PQ") eval_quantizer(pq, xq, xb, gt, xt) if 'opq' in todo: d2 = ((d + M - 1) // M) * M print("OPQ d2=", d2) opq = faiss.OPQMatrix(d, M, d2) opq.train(xt) xq2 = opq.apply(xq) xb2 = opq.apply(xb) xt2 = opq.apply(xt) pq = faiss.ProductQuantizer(d2, M, nbits) print("===== PQ") eval_quantizer(pq, xq2, xb2, gt, xt2)
try: xt2 = ds.get_train(maxtrain=maxtrain) except NotImplementedError: print("No training set: training on database") xt2 = ds.get_database()[:maxtrain] print("train, size", xt2.shape) assert np.all(np.isfinite(xt2)) if (isinstance(vec_transform, faiss.OPQMatrix) and isinstance(index_ivf, faiss.IndexIVFPQFastScan)): print(" Forcing OPQ training PQ to PQ4") ref_pq = index_ivf.pq training_pq = faiss.ProductQuantizer( ref_pq.d, ref_pq.M, ref_pq.nbits ) vec_transform.pq vec_transform.pq = training_pq if args.get_centroids_from == '': if args.clustering_niter >= 0: print(("setting nb of clustering iterations to %d" % args.clustering_niter)) index_ivf.cp.niter = args.clustering_niter if args.train_on_gpu: print("add a training index on GPU") train_index = faiss.index_cpu_to_all_gpus(
import numpy import faiss numpy.random.seed(13) # =============测试数据============= d = 32 # data dimension cs = 4 # code size (bytes) # train set nt = 10000 xt = numpy.random.rand(nt, d).astype('float32') # dataset to encode (could be same as train) n = 20000 x = numpy.random.rand(n, d).astype('float32') pq = faiss.ProductQuantizer(d, cs, 8) pq.train(xt) # encode # PQ 编码 codes = pq.compute_codes(x) # decode # PQ 解码 x2 = pq.decode(codes) # =============展示结果============= # compute reconstruction error 计算复现误差 avg_relative_error = ((x - x2)**2).sum() / (x**2) print avg_relative_error
for idx, key in enumerate(tqdm(imgids)): data = train_data_pkl[key][()] print(data.shape[-2:]) data_tr = np.transpose(data, (0, 2, 3, 1)).reshape(-1, d).astype("float32") train_data_base_init.append(data_tr) # train_data_base_init = np.concatenate(train_data_base_init) train_data_base_init = np.ascontiguousarray(train_data_base_init, dtype=np.float32) print('Data loading done ..........') #%% print('Training Product Quantizer..........') nbits = 8 #int(np.log2(d)) print("nbits:", nbits) pq = faiss.ProductQuantizer(d, cs, nbits) pq.train(train_data_base_init) print('Encoding, Decoding and saving Reconstructed Features..........') #%% for fname in ['backbone.7.0_test', 'backbone.7.0_trainval']: in_fname = 'resnet_imagenet_features/' + fname + '.h5' data_h5 = h5py.File(in_fname, 'r') h5_file = fname + '_reconstructed' reconstructed_h5 = h5py.File(f'resnet_imagenet_features/{h5_file}.h5', 'w') keys = list(data_h5.keys()) for idx, key in enumerate(tqdm(keys)): data = data_h5[key][()] _, dim, r, c = data.shape
x.tofile(fname) x = fvecs_read("sift/sift_base.fvecs") # x = x[:10000, :] n, d = x.shape m = 8 opq = faiss.OPQMatrix(d, 8) # help(opq) opq.train(x) A = faiss.vector_to_array(opq.A).reshape(d, d) print(A.shape) # print(A) xt = opq.apply_py(x) # print(((np.dot(x[0], A.T) - xt[0])**2).sum()) # print(x[0, :10]) # print(xt[0, :10]) print(xt.shape) pq = faiss.ProductQuantizer(d, 8, 8) pq.train(xt) codes = pq.compute_codes(x) cen = faiss.vector_to_array(pq.centroids) cen = cen.reshape(pq.M, pq.ksub, pq.dsub) print(cen.shape) print(codes.shape) # print(cen[0][codes[0, 0]][:10]) fvecs_write(A, 'opq/R.fvecs') for i in range(m): fvecs_write(cen[i], 'opq/c' + str(i) + '.fvecs') cvecs_write(codes, 'opq/code.cvecs')
def fit_pq(feats_base_init, labels_base_init, item_ix_base_init, num_channels, spatial_feat_dim, num_codebooks, codebook_size, batch_size=128, counter=utils.Counter()): """ Fit the PQ model and then quantize and store the latent codes of the data used to train the PQ in a dictionary to be used later as a replay buffer. :param feats_base_init: numpy array of base init features that will be used to train the PQ :param labels_base_init: numpy array of the base init labels used to train the PQ :param item_ix_base_init: numpy array of the item_ixs used to train the PQ :param num_channels: number of channels in desired features :param spatial_feat_dim: spatial dimension of desired features :param num_codebooks: number of codebooks for PQ :param codebook_size: size of each codebook for PQ :param batch_size: batch size used to extract PQ features :param counter: object to count how many latent codes are in the replay buffer/dict :return: (trained PQ object, dictionary of latent codes, list of item_ixs for latent codes, dict of visited classes and associated item_ixs) """ train_data_base_init = np.transpose(feats_base_init, (0, 2, 3, 1)) train_data_base_init = np.reshape(train_data_base_init, (-1, num_channels)) num_samples = len(train_data_base_init) print('\nTraining Product Quantizer') start = time.time() nbits = int(np.log2(codebook_size)) pq = faiss.ProductQuantizer(num_channels, num_codebooks, nbits) pq.train(train_data_base_init) print("Completed in {} secs".format(time.time() - start)) del train_data_base_init print('\nEncoding and Storing Base Init Codes') start_time = time.time() latent_dict = {} class_id_to_item_ix_dict = defaultdict(list) rehearsal_ixs = [] mb = min(batch_size, num_samples) for i in range(0, num_samples, mb): start = i end = min(start + mb, num_samples) data_batch = feats_base_init[start:end] batch_labels = labels_base_init[start:end] batch_item_ixs = item_ix_base_init[start:end] data_batch = np.transpose(data_batch, (0, 2, 3, 1)) data_batch = np.reshape(data_batch, (-1, num_channels)) codes = pq.compute_codes(data_batch) codes = np.reshape( codes, (-1, spatial_feat_dim, spatial_feat_dim, num_codebooks)) # put codes and labels into buffer (dictionary) for j in range(len(batch_labels)): ix = int(batch_item_ixs[j]) latent_dict[ix] = [codes[j], batch_labels[j]] rehearsal_ixs.append(ix) class_id_to_item_ix_dict[int(batch_labels[j])].append(ix) counter.update() print("Completed in {} secs".format(time.time() - start_time)) return pq, latent_dict, rehearsal_ixs, class_id_to_item_ix_dict
import faiss import numpy as np x = np.random.rand(10000, 32).astype('float32') pq = faiss.ProductQuantizer(32, 4, 8) pq.train(x) codes = pq.compute_codes(x) x2 = pq.decode(codes) avg_relative_error = ((x - x2)**2).sum() / (x**2).sum() print(avg_relative_error)