Python ProductQuantizer Examples, faiss.ProductQuantizer Python Examples

Example #1

0

Show file

    def resume(self, inc, resume_full_path):
        """
        Load previous REMIND model to continue training.
        :param inc: which increment number was saved
        :param resume_full_path: path where weights are saved
        :return: (classifier state dict, latent dict, rehearsal ixs list, class id to item ix dict)
        """

        print(f'\nResuming REMIND model from {resume_full_path}')
        state = torch.load(
            os.path.join(resume_full_path, 'remind_classifier_F_%d.pth' % inc))
        self.classifier_F.load_state_dict(state['model_state_dict'])
        self.optimizer.load_state_dict(state['optimizer_state_dict'])

        # load parameters
        with open(os.path.join(resume_full_path, 'remind_buffer_%d.pkl' % inc),
                  'rb') as f:
            d = pickle.load(f)
        nbits = int(np.log2(self.codebook_size))
        pq = faiss.ProductQuantizer(self.num_channels, self.num_codebooks,
                                    nbits)
        faiss.copy_array_to_vector(d['pq_centroids'].ravel(), pq.centroids)
        return state, d['latent_dict'], d['rehearsal_ixs'], d[
            'class_id_to_item_ix_dict'], pq

Example #2

0

Show file

    def do_test_codec(self, nbit):
        pq = faiss.ProductQuantizer(16, 2, nbit)

        # simulate training
        rs = np.random.RandomState(123)
        centroids = rs.rand(2, 1 << nbit, 8).astype('float32')
        faiss.copy_array_to_vector(centroids.ravel(), pq.centroids)

        idx = rs.randint(1 << nbit, size=(100, 2))
        # can be encoded exactly
        x = np.hstack((centroids[0, idx[:, 0]], centroids[1, idx[:, 1]]))

        # encode / decode
        codes = pq.compute_codes(x)
        xr = pq.decode(codes)
        assert np.all(xr == x)

        # encode w/ external index
        assign_index = faiss.IndexFlatL2(8)
        pq.assign_index = assign_index
        codes2 = np.empty((100, pq.code_size), dtype='uint8')
        pq.compute_codes_with_assign_index(faiss.swig_ptr(x),
                                           faiss.swig_ptr(codes2), 100)
        assert np.all(codes == codes2)

Example #3

0

Show file

File: bench_quantizer.py Project: ifeherva/faiss

nb, d = xb.shape
nq, d = xq.shape
nt, d = xt.shape

# fastest to slowest

if 'lsq-gpu' in todo:
    lsq = faiss.LocalSearchQuantizer(d, M, nbits)
    ngpus = faiss.get_num_gpus()
    lsq.icm_encoder_factory = faiss.GpuIcmEncoderFactory(ngpus)
    lsq.verbose = True
    eval_quantizer(lsq, xb, xt, 'lsq-gpu')

if 'pq' in todo:
    pq = faiss.ProductQuantizer(d, M, nbits)
    print("===== PQ")
    eval_quantizer(pq, xq, xb, gt, xt)

if 'opq' in todo:
    d2 = ((d + M - 1) // M) * M
    print("OPQ d2=", d2)
    opq = faiss.OPQMatrix(d, M, d2)
    opq.train(xt)
    xq2 = opq.apply(xq)
    xb2 = opq.apply(xb)
    xt2 = opq.apply(xt)
    pq = faiss.ProductQuantizer(d2, M, nbits)
    print("===== PQ")
    eval_quantizer(pq, xq2, xb2, gt, xt2)

Example #4

0

Show file

File: bench_all_ivf.py Project: haojunyu/faiss

    try:
        xt2 = ds.get_train(maxtrain=maxtrain)
    except NotImplementedError:
        print("No training set: training on database")
        xt2 = ds.get_database()[:maxtrain]

    print("train, size", xt2.shape)
    assert np.all(np.isfinite(xt2))

    if (isinstance(vec_transform, faiss.OPQMatrix) and
        isinstance(index_ivf, faiss.IndexIVFPQFastScan)):
        print("  Forcing OPQ training PQ to PQ4")
        ref_pq = index_ivf.pq
        training_pq = faiss.ProductQuantizer(
            ref_pq.d, ref_pq.M, ref_pq.nbits
        )
        vec_transform.pq
        vec_transform.pq = training_pq


    if args.get_centroids_from == '':

        if args.clustering_niter >= 0:
            print(("setting nb of clustering iterations to %d" %
                   args.clustering_niter))
            index_ivf.cp.niter = args.clustering_niter

        if args.train_on_gpu:
            print("add a training index on GPU")
            train_index = faiss.index_cpu_to_all_gpus(

Example #5

0

Show file

import numpy
import faiss

numpy.random.seed(13)

# =============测试数据=============
d = 32  # data dimension
cs = 4  # code size (bytes)

# train set
nt = 10000
xt = numpy.random.rand(nt, d).astype('float32')

# dataset to encode (could be same as train)
n = 20000
x = numpy.random.rand(n, d).astype('float32')

pq = faiss.ProductQuantizer(d, cs, 8)
pq.train(xt)

# encode                        # PQ 编码
codes = pq.compute_codes(x)

# decode                        # PQ 解码
x2 = pq.decode(codes)

# =============展示结果=============
# compute reconstruction error 计算复现误差
avg_relative_error = ((x - x2)**2).sum() / (x**2)
print avg_relative_error

Example #6

0

Show file

File: pq_encoding_voc.py Project: vishwa30/rodeo

for idx, key in enumerate(tqdm(imgids)):
    data = train_data_pkl[key][()]
    print(data.shape[-2:])
    data_tr = np.transpose(data, (0, 2, 3, 1)).reshape(-1, d).astype("float32")
    train_data_base_init.append(data_tr)
#
train_data_base_init = np.concatenate(train_data_base_init)
train_data_base_init = np.ascontiguousarray(train_data_base_init,
                                            dtype=np.float32)
print('Data loading done ..........')
#%%

print('Training Product Quantizer..........')
nbits = 8  #int(np.log2(d))
print("nbits:", nbits)
pq = faiss.ProductQuantizer(d, cs, nbits)
pq.train(train_data_base_init)

print('Encoding, Decoding and saving Reconstructed Features..........')

#%%

for fname in ['backbone.7.0_test', 'backbone.7.0_trainval']:
    in_fname = 'resnet_imagenet_features/' + fname + '.h5'
    data_h5 = h5py.File(in_fname, 'r')
    h5_file = fname + '_reconstructed'
    reconstructed_h5 = h5py.File(f'resnet_imagenet_features/{h5_file}.h5', 'w')
    keys = list(data_h5.keys())
    for idx, key in enumerate(tqdm(keys)):
        data = data_h5[key][()]
        _, dim, r, c = data.shape

Example #7

0

Show file

File: encode.py Project: ParadoxZW/ANNS

    x.tofile(fname)


x = fvecs_read("sift/sift_base.fvecs")
# x = x[:10000, :]
n, d = x.shape
m = 8
opq = faiss.OPQMatrix(d, 8)
# help(opq)
opq.train(x)
A = faiss.vector_to_array(opq.A).reshape(d, d)
print(A.shape)
# print(A)
xt = opq.apply_py(x)
# print(((np.dot(x[0], A.T) - xt[0])**2).sum())
# print(x[0, :10])
# print(xt[0, :10])
print(xt.shape)
pq = faiss.ProductQuantizer(d, 8, 8)
pq.train(xt)
codes = pq.compute_codes(x)
cen = faiss.vector_to_array(pq.centroids)
cen = cen.reshape(pq.M, pq.ksub, pq.dsub)
print(cen.shape)
print(codes.shape)
# print(cen[0][codes[0, 0]][:10])
fvecs_write(A, 'opq/R.fvecs')
for i in range(m):
    fvecs_write(cen[i], 'opq/c' + str(i) + '.fvecs')
cvecs_write(codes, 'opq/code.cvecs')

Example #8

0

Show file

File: imagenet_base_initialization.py Project: yangdb/REMIND

def fit_pq(feats_base_init,
           labels_base_init,
           item_ix_base_init,
           num_channels,
           spatial_feat_dim,
           num_codebooks,
           codebook_size,
           batch_size=128,
           counter=utils.Counter()):
    """
    Fit the PQ model and then quantize and store the latent codes of the data used to train the PQ in a dictionary to 
    be used later as a replay buffer.
    :param feats_base_init: numpy array of base init features that will be used to train the PQ
    :param labels_base_init: numpy array of the base init labels used to train the PQ
    :param item_ix_base_init: numpy array of the item_ixs used to train the PQ
    :param num_channels: number of channels in desired features
    :param spatial_feat_dim: spatial dimension of desired features
    :param num_codebooks: number of codebooks for PQ
    :param codebook_size: size of each codebook for PQ
    :param batch_size: batch size used to extract PQ features
    :param counter: object to count how many latent codes are in the replay buffer/dict
    :return: (trained PQ object, dictionary of latent codes, list of item_ixs for latent codes, dict of visited classes
     and associated item_ixs)
    """

    train_data_base_init = np.transpose(feats_base_init, (0, 2, 3, 1))
    train_data_base_init = np.reshape(train_data_base_init, (-1, num_channels))
    num_samples = len(train_data_base_init)

    print('\nTraining Product Quantizer')
    start = time.time()
    nbits = int(np.log2(codebook_size))
    pq = faiss.ProductQuantizer(num_channels, num_codebooks, nbits)
    pq.train(train_data_base_init)
    print("Completed in {} secs".format(time.time() - start))
    del train_data_base_init

    print('\nEncoding and Storing Base Init Codes')
    start_time = time.time()
    latent_dict = {}
    class_id_to_item_ix_dict = defaultdict(list)
    rehearsal_ixs = []
    mb = min(batch_size, num_samples)
    for i in range(0, num_samples, mb):
        start = i
        end = min(start + mb, num_samples)
        data_batch = feats_base_init[start:end]
        batch_labels = labels_base_init[start:end]
        batch_item_ixs = item_ix_base_init[start:end]

        data_batch = np.transpose(data_batch, (0, 2, 3, 1))
        data_batch = np.reshape(data_batch, (-1, num_channels))
        codes = pq.compute_codes(data_batch)
        codes = np.reshape(
            codes, (-1, spatial_feat_dim, spatial_feat_dim, num_codebooks))

        # put codes and labels into buffer (dictionary)
        for j in range(len(batch_labels)):
            ix = int(batch_item_ixs[j])
            latent_dict[ix] = [codes[j], batch_labels[j]]
            rehearsal_ixs.append(ix)
            class_id_to_item_ix_dict[int(batch_labels[j])].append(ix)
            counter.update()

    print("Completed in {} secs".format(time.time() - start_time))
    return pq, latent_dict, rehearsal_ixs, class_id_to_item_ix_dict

Example #9

0

Show file

import faiss
import numpy as np

x = np.random.rand(10000, 32).astype('float32')

pq = faiss.ProductQuantizer(32, 4, 8)
pq.train(x)

codes = pq.compute_codes(x)
x2 = pq.decode(codes)

avg_relative_error = ((x - x2)**2).sum() / (x**2).sum()
print(avg_relative_error)