Python GpuParameterSpaceの例、faiss.GpuParameterSpace Pythonの例

コード例 #1

0

ファイルを表示

    def __init__(self,
                 xt_path="/home/wenqingfu/sift1b/bigann_learn.bvecs",
                 xb_path="/home/wenqingfu/sift1b/bigann_base.bvecs",
                 ngpu=3):
        self.xt = self.mmap_bvecs(xt_path)
        self.xb = self.mmap_bvecs(xb_path)
        self.xt = self.sanitize(self.xt[:1000000])
        self.xb = self.sanitize(self.xb[self.db_start * 1000 *
                                        1000:self.db_end * 1000 * 1000])
        self.gpu_resources = []
        for i in range(0, ngpu):
            res = faiss.StandardGpuResources()
            if tempmem >= 0:
                res.setTempMemory(tempmem)
                print("set tempemm to %d" % tempmem)
            self.gpu_resources.append(res)
        self.vres = faiss.GpuResourcesVector()
        self.vdev = faiss.IntVector()

        for i in range(0, ngpu):
            self.vdev.push_back(i)
            self.vres.push_back(self.gpu_resources[i])

        self.co = faiss.GpuMultipleClonerOptions()
        self.co.useFloat16 = True
        self.co.useFloat16CoarseQuantizer = False
        self.co.usePrecomputed = False
        self.co.indicesOptions = 0
        self.co.verbose = True
        self.co.shard = True

        self.ps = faiss.GpuParameterSpace()

コード例 #2

0

ファイルを表示

 def queryFeatures(self, featureResource, numberOfResultsToRetrieve):
     numberOfResultsToRetrieve = int(numberOfResultsToRetrieve)
     ps = None
     if self.ngpu > 0:
         ps = faiss.GpuParameterSpace()
         ps.initialize(self.index)
         #ps.set_index_parameter(self.index, 'nprobe', self.nprobe)
     features = self.deserializeFeatures(featureResource)
     pfeatures = self.preproc.apply_py(indexfunctions.sanitize(features))
     D, I = self.index.search(pfeatures, numberOfResultsToRetrieve)
     sortedIDs, sortedVotes, maxvoteval = indexfunctions.tallyVotes(
         D, I, numcores=1)
     #print('number of ids: ',len(self.IDToImage))
     # voteScores = 1.0 * sortedVotes / (1.0 * np.max(sortedVotes))
     voteScores = 1.0 * sortedVotes / (maxvoteval)
     resultScores = filteringResults()
     resultScores.D = D
     resultScores.I = I
     #print(list(self.IDToImage.keys())[0]+'\n')
     for i in range(0, min(len(sortedIDs), numberOfResultsToRetrieve)):
         id = sortedIDs[i]
         id_str = str(id)
         #print(id_str)
         if id_str in self.IDToImage:
             imname = self.IDToImage[id_str]
             score = voteScores[i]
             resultScores.addScore(imname, score, ID=id)
     resultScores.pairDownResults(numberOfResultsToRetrieve)
     return resultScores

コード例 #3

0

ファイルを表示

ファイル: faiss.py プロジェクト: Purg/SMQTK-Indexing

    def _set_index_nprobe(self) -> bool:
        """
        Try to set the currently configured nprobe value to the current faiss
        index.

        :returns: True if nprobe was actually set and False if it wasn't (not
            an appropriate index type).
        """
        with self._model_lock:
            idx = self._faiss_index
            idx_name = idx.__class__.__name__
            try:
                # Attempting to use GpuParameterSpace doesn't error and seems
                # to function even when there is no GPU available, so the usual
                # pythonic EAFP doesn't cause an exception to catch when doing
                # the "improper" thing first.
                if self._use_gpu and isinstance(idx, faiss.GpuIndex):
                    ps = faiss.GpuParameterSpace()
                else:
                    ps = faiss.ParameterSpace()
                ps.set_index_parameter(idx, 'nprobe', self._ivf_nprobe)
                LOG.debug(f"Set nprobe={self._ivf_nprobe} to index, instance "
                          f"of {idx_name}")
                return True
            except RuntimeError as sip_ex:
                s_ex = str(sip_ex)
                if "could not set parameter nprobe" in s_ex:
                    # OK, index does not support nprobe parameter
                    LOG.debug(f"Current index ({idx_name}) does not "
                              f"support the nprobe parameter.")
                    return False
                # Otherwise re-raise
                raise

コード例 #4

0

ファイルを表示

ファイル: indexing.py プロジェクト: yuanjungod/LASER

def IndexLoad(idx_name, nprobe):
    print(' - loading FAISS index', idx_name)
    index = faiss.read_index(idx_name)
    print(' - found {:d} sentences of dim {:d}'.format(index.ntotal, index.d))
    print(' - setting nbprobe to {:d}'.format(nprobe))
    faiss.GpuParameterSpace().set_index_parameter(index, 'nprobe', nprobe)
    return index

コード例 #5

0

ファイルを表示

ファイル: bench_gpu_1bn.py プロジェクト: h7lost/H7

def eval_dataset(index, preproc):

    ps = faiss.GpuParameterSpace()
    ps.initialize(index)

    nq_gt = gt_I.shape[0]
    print "search..."
    sl = query_batch_size
    nq = xq.shape[0]
    for nprobe in nprobes:
        ps.set_index_parameter(index, 'nprobe', nprobe)
        t0 = time.time()

        if sl == 0:
            D, I = index.search(preproc.apply_py(sanitize(xq)), nnn)
        else:
            I = np.empty((nq, nnn), dtype='int32')
            D = np.empty((nq, nnn), dtype='float32')

            inter_res = ''

            for i0, xs in dataset_iterator(xq, preproc, sl):
                print '\r%d/%d (%.3f s%s)   ' % (
                    i0, nq, time.time() - t0, inter_res),
                sys.stdout.flush()

                i1 = i0 + xs.shape[0]
                Di, Ii = index.search(xs, nnn)

                I[i0:i1] = Ii
                D[i0:i1] = Di

                if knngraph and not inter_res and i1 >= nq_gt:
                    ires = eval_intersection_measure(
                        gt_I[:, :nnn], I[:nq_gt])
                    inter_res = ', %.4f' % ires

        t1 = time.time()
        if knngraph:
            ires = eval_intersection_measure(gt_I[:, :nnn], I[:nq_gt])
            print "  probe=%-3d: %.3f s rank-%d intersection results: %.4f" % (
                nprobe, t1 - t0, nnn, ires)
        else:
            print "  probe=%-3d: %.3f s" % (nprobe, t1 - t0),
            gtc = gt_I[:, :1]
            nq = xq.shape[0]
            for rank in 1, 10, 100:
                if rank > nnn: continue
                nok = (I[:, :rank] == gtc).sum()
                print "1-R@%d: %.4f" % (rank, nok / float(nq)),
            print
        if I_fname:
            I_fname_i = I_fname % I
            print "storing", I_fname_i
            np.save(I, I_fname_i)
        if D_fname:
            D_fname_i = I_fname % I
            print "storing", D_fname_i
            np.save(D, D_fname_i)

コード例 #6

0

ファイルを表示

def set_search_hyperparameters(index: faiss.Index,
                               param_str: str,
                               use_gpu: bool = False) -> None:
    """ set hyperparameters to an index """
    # depends on installed faiss version # pylint: disable=no-member
    params = faiss.ParameterSpace(
    ) if not use_gpu else faiss.GpuParameterSpace()
    params.set_index_parameters(index, param_str)

コード例 #7

0

ファイルを表示

ファイル: test_gpu_index.py プロジェクト: bitsun/faiss

    def do_cpu_to_gpu(self, index_key):
        ts = []
        ts.append(time.time())
        (xt, xb, xq) = self.get_dataset(small_one=True)
        nb, d = xb.shape

        index = faiss.index_factory(d, index_key)
        if index.__class__ == faiss.IndexIVFPQ:
            # speed up test
            index.pq.cp.niter = 2
            index.do_polysemous_training = False
        ts.append(time.time())

        index.train(xt)
        ts.append(time.time())

        # adding some ids because there was a bug in this case
        index.add_with_ids(xb, np.arange(nb).astype(np.int64) * 3 + 12345)
        ts.append(time.time())

        index.nprobe = 4
        D, Iref = index.search(xq, 10)
        ts.append(time.time())

        res = faiss.StandardGpuResources()
        gpu_index = faiss.index_cpu_to_gpu(res, 0, index)
        ts.append(time.time())

        gpu_index.setNumProbes(4)

        D, Inew = gpu_index.search(xq, 10)
        ts.append(time.time())
        print('times:', [t - ts[0] for t in ts])

        self.assertGreaterEqual((Iref == Inew).sum(), Iref.size)

        if faiss.get_num_gpus() == 1:
            return

        for shard in False, True:

            # test on just 2 GPUs
            res = [faiss.StandardGpuResources() for i in range(2)]
            co = faiss.GpuMultipleClonerOptions()
            co.shard = shard

            gpu_index = faiss.index_cpu_to_gpu_multiple_py(res, index, co)

            faiss.GpuParameterSpace().set_index_parameter(
                gpu_index, 'nprobe', 4)

            D, Inew = gpu_index.search(xq, 10)

            # 0.99: allow some tolerance in results otherwise test
            # fails occasionally (not reproducible)
            self.assertGreaterEqual((Iref == Inew).sum(), Iref.size * 0.99)

コード例 #8

0

ファイルを表示

ファイル: indexing.py プロジェクト: zmwebdev/LASER

def IndexLoad(idx_name, nprobe, gpu=False):
    print('Reading FAISS index')
    print(' - index: {:s}'.format(idx_name))
    index = faiss.read_index(idx_name)
    print(' - found {:d} sentences of dim {:d}'.format(index.ntotal, index.d))
    print(' - setting nbprobe to {:d}'.format(nprobe))
    if gpu:
        print(' - transfer index to %d GPUs ' % faiss.get_num_gpus())
        #co = faiss.GpuMultipleClonerOptions()
        #co.shard = True
        index = faiss.index_cpu_to_all_gpus(index)  # co=co
        faiss.GpuParameterSpace().set_index_parameter(index, 'nprobe', nprobe)
    return index

コード例 #9

0

ファイルを表示

ファイル: indexing.py プロジェクト: facebookresearch/SentAugment

def IndexLoad(idx_path, nprobe=0, gpu=False):
    print('Reading FAISS index', file=sys.stderr)
    print(' - index: {:s}'.format(idx_path), file=sys.stderr)
    index = faiss.read_index(idx_path)
    print(' - found {:d} sentences of dim {:d}'.format(index.ntotal, index.d),
          file=sys.stderr)
    print(' - setting nbprobe to {:d}'.format(nprobe), file=sys.stderr)
    if gpu:
        print(' - transfer index to %d GPUs ' % faiss.get_num_gpus(),
              file=sys.stderr)
        index = faiss.index_cpu_to_all_gpus(index)  # co=co
        faiss.GpuParameterSpace().set_index_parameter(index, 'nprobe', nprobe)
    return index

コード例 #10

0

ファイルを表示

    def compute_index(self, data_points: torch.Tensor, samples_for_pole: int = 10000):
        data_points = data_points.cpu()
        if samples_for_pole == 0:
            samples_for_pole = data_points.size(0)
        perm = torch.randperm(data_points.size(0))
        idx = perm[:min(samples_for_pole, perm.size(0))]
        self.pole = compute_pole(data_points[idx], self.manifold)

        tqdm.write("Creating nns index")
        ivf_size = 2 ** (ceil(4 * sqrt(data_points.size(0)) - 1)).bit_length()
        index_flat = faiss.index_factory(data_points.size(-1),
                                         "Flat") #f"PCAR16,IVF{ivf_size},SQ4")

        general_config = get_config().general
        _use_gpu: bool = general_config.gpu >= 0
        # make it into a gpu index
        if _use_gpu:
            res = faiss.StandardGpuResources()
            # make it into a gpu index
            self.index = faiss.index_cpu_to_gpu(res, 0, index_flat)

            params = faiss.GpuParameterSpace()
        else:
            self.index = index_flat
            params = faiss.ParameterSpace()

        # params.set_index_parameter(self.index, 'nprobe', 100)
        params.initialize(self.index)

        num_blocks = 200
        block_size = ceil(data_points.size(0) / num_blocks)
        num_blocks = ceil(data_points.size(0) / block_size)
        self.data_embedding = data_points
        pole_batch = self.pole.unsqueeze(0).expand_as(data_points[:block_size])

        for i in tqdm(range(num_blocks), desc="Euclidean Project",
                      dynamic_ncols=True):
            start_index = i * block_size
            end_index = min((i + 1) * block_size, data_points.size(0))
            self.data_embedding[start_index:end_index] = self.manifold.log(
                pole_batch[0: end_index - start_index], data_points[start_index:end_index])

        tqdm.write("Training Index")
        train_size = int(20 * sqrt(data_points.size(0)))
        perm = torch.randperm(data_points.size(0))
        train_points = \
            self.data_embedding.cpu().detach()[perm[:train_size]].numpy()

        self.index.train(train_points)
        tqdm.write("Adding Vectors to Index")
        self.index.add(self.data_embedding.cpu().detach().numpy())

コード例 #11

0

ファイルを表示

    def compute_index(self,
                      data_points: torch.Tensor,
                      samples_for_pole: int = 10000):
        data_points = data_points.cpu()
        if samples_for_pole == 0:
            samples_for_pole = data_points.size(0)
        perm = torch.randperm(data_points.size(0))
        idx = perm[:min(samples_for_pole, perm.size(0))]
        self.pole = compute_pole(data_points[idx], self.manifold)

        print("Creating nns index")
        res = faiss.StandardGpuResources()
        ivf_size = 2**(ceil(4 * sqrt(data_points.size(0)) - 1)).bit_length()
        index_flat = faiss.index_factory(data_points.size(-1),
                                         f"PCAR64,IVF{ivf_size},SQ8")
        # make it into a gpu index
        self.index = faiss.index_cpu_to_gpu(res, 0, index_flat)

        params = faiss.GpuParameterSpace()
        params.set_index_parameter(self.index, 'nprobe', 100)
        params.initialize(self.index)

        num_blocks = 50
        block_size = ceil(data_points.size(0) / num_blocks)
        num_blocks = ceil(data_points.size(0) / block_size)
        self.data_embedding = data_points
        pole_batch = self.pole.unsqueeze(0).expand_as(data_points[:block_size])

        print("Projecting to Euclidean space for nns:")
        for i in tqdm(range(num_blocks)):
            start_index = i * block_size
            end_index = min((i + 1) * block_size, data_points.size(0))
            self.data_embedding[start_index:end_index] = self.manifold.log(
                pole_batch[0:end_index - start_index],
                data_points[start_index:end_index])

        print("Training Index")
        self.index.train(self.data_embedding.cpu().detach().numpy())
        print("Adding Vectors to Index")
        self.index.add(self.data_embedding.cpu().detach().numpy())

コード例 #12

0

ファイルを表示

ファイル: test_gpu_index.py プロジェクト: wangxiaobo007/faiss

 def test_set_gpu_param(self):
     index = faiss.index_factory(12, "PCAR8,IVF10,PQ4")
     res = faiss.StandardGpuResources()
     gpu_index = faiss.index_cpu_to_gpu(res, 0, index)
     faiss.GpuParameterSpace().set_index_parameter(gpu_index, "nprobe", 3)

コード例 #13

0

ファイルを表示

ファイル: demo_auto_tune.py プロジェクト: zhfzhmsra/ANN__faiss

op_per_key = []

# keep track of optimal operating points seen so far
op = faiss.OperatingPoints()

for index_key in keys_to_test:

    print "============ key", index_key

    # make the index described by the key
    index = faiss.index_factory(d, index_key)

    if use_gpu:
        # transfer to GPU (may be partial)
        index = faiss.index_cpu_to_gpu(res, dev_no, index)
        params = faiss.GpuParameterSpace()
    else:
        params = faiss.ParameterSpace()

    params.initialize(index)

    print "[%.3f s] train & add" % (time.time() - t0)

    index.train(xt)
    index.add(xb)

    print "[%.3f s] explore op points" % (time.time() - t0)

    # find operating points for this index
    opi = params.explore(index, xq, crit)

コード例 #14

0

ファイルを表示

def eval_dataset(index, preproc):

    ps = faiss.GpuParameterSpace()
    ps.initialize(index)

    nq_gt = gt_I.shape[0]
    print("search...")
    sl = query_batch_size

    nq = xq.shape[0]
    print(nq)

    for nprobe in nprobes:
        ps.set_index_parameter(index, 'nprobe', nprobe)
        t0 = time.time()

        if sl == 0:
            D, I = index.search(preproc.apply_py(sanitize(xq)), nnn)
        else:
            I = np.empty((nq, nnn), dtype='int32')
            D = np.empty((nq, nnn), dtype='float32')

            inter_res = ''

            for i0, xs in dataset_iterator(xq, preproc, sl):
                # print('\r%d/%d (%.3f s%s)   ' % (
                #     i0, nq, time.time() - t0, inter_res), end=' ')
                # sys.stdout.flush()

                i1 = i0 + xs.shape[0]
                # Wenqi: debugging memory overflow
                # print(xs.shape)
                Di, Ii = index.search(xs, nnn)

                I[i0:i1] = Ii
                D[i0:i1] = Di

                if knngraph and not inter_res and i1 >= nq_gt:
                    ires = eval_intersection_measure(gt_I[:, :nnn], I[:nq_gt])
                    inter_res = ', %.4f' % ires

        t1 = time.time()
        if knngraph:
            ires = eval_intersection_measure(gt_I[:, :nnn], I[:nq_gt])
            print("  probe=%-3d: %.3f s rank-%d intersection results: %.4f" %
                  (nprobe, t1 - t0, nnn, ires))
        else:
            print("  probe=%-3d: %.3f s" % (nprobe, t1 - t0), end=' ')
            gtc = gt_I[:, :1]
            nq = xq.shape[0]
            # WENQI modified, when only using 1000 query, comment below
            # because groud truth verification have problems with shape
            for rank in 1, 10, 100:
                if rank > nnn: continue
                nok = (I[:, :rank] == gtc).sum()
                print("1-R@%d: %.4f" % (rank, nok / float(nq)), end=' ')
            print()
        if I_fname:
            I_fname_i = I_fname % I
            print("storing", I_fname_i)
            np.save(I, I_fname_i)
        if D_fname:
            D_fname_i = I_fname % I
            print("storing", D_fname_i)
            np.save(D, D_fname_i)

コード例 #15

0

ファイルを表示

    result = result.transpose(0, 1)
    result = pad(result)
    #result = result.to('cpu')
    testfeatures.append(result)
    testlabels.append(key)
torch.cuda.empty_cache()

print("building  test mfcc took : ", time.time() - start, " seconds")

#########################################################################################################

##IF GPU MEMORY IS A PROBLEM for test/use files
## combine this with the read above and make liberal use of empty cache

start = time.time()
accuracy = []
faiss.GpuParameterSpace().set_index_parameter(gpu_index, "nprobe", 2)

for l, m in tqdm(zip(testlabels, testfeatures), total=len(testlabels)):

    D, I = search_index_pytorch(gpu_index, m, 5)
    res.syncDefaultStreamCurrentDevice()
    r = torch.flatten(I).cpu().numpy()
    commons = Counter(r).most_common()
    most_likely = commons[0][0]
    accuracy.append(int(most_likely == l))

print("Running ", len(testlabels), " queries took ",
      time.time() - start, " seconds")
print("Accuracy: ", np.mean(accuracy))

コード例 #16

0

ファイルを表示

    def do_cpu_to_gpu(self, index_key):
        ts = []
        ts.append(time.time())
        (xt, xb, xq) = self.get_dataset(small_one=True)
        nb, d = xb.shape

        index = faiss.index_factory(d, index_key)
        if index.__class__ == faiss.IndexIVFPQ:
            # speed up test
            index.pq.cp.niter = 2
            index.do_polysemous_training = False
        ts.append(time.time())

        index.train(xt)
        ts.append(time.time())

        # adding some ids because there was a bug in this case;
        # those need to be cast to idx_t(= int64_t), because
        # on windows the numpy int default is int32
        ids = (np.arange(nb) * 3 + 12345).astype('int64')
        index.add_with_ids(xb, ids)
        ts.append(time.time())

        index.nprobe = 4
        Dref, Iref = index.search(xq, 10)
        ts.append(time.time())

        res = faiss.StandardGpuResources()
        gpu_index = faiss.index_cpu_to_gpu(res, 0, index)
        ts.append(time.time())

        # Validate the layout of the memory info
        mem_info = res.getMemoryInfo()

        assert type(mem_info) == dict
        assert type(mem_info[0]['FlatData']) == tuple
        assert type(mem_info[0]['FlatData'][0]) == int
        assert type(mem_info[0]['FlatData'][1]) == int

        gpu_index.setNumProbes(4)

        Dnew, Inew = gpu_index.search(xq, 10)
        ts.append(time.time())
        print('times:', [t - ts[0] for t in ts])

        # Give us some margin of error
        self.assertGreaterEqual((Iref == Inew).sum(), Iref.size - 50)

        if faiss.get_num_gpus() == 1:
            return

        for shard in False, True:

            # test on just 2 GPUs
            res = [faiss.StandardGpuResources() for i in range(2)]
            co = faiss.GpuMultipleClonerOptions()
            co.shard = shard

            gpu_index = faiss.index_cpu_to_gpu_multiple_py(res, index, co)

            faiss.GpuParameterSpace().set_index_parameter(
                gpu_index, 'nprobe', 4)

            Dnew, Inew = gpu_index.search(xq, 10)

            # 0.99: allow some tolerance in results otherwise test
            # fails occasionally (not reproducible)
            self.assertGreaterEqual((Iref == Inew).sum(), Iref.size * 0.99)

コード例 #17

0

ファイルを表示

ファイル: runfaiss.py プロジェクト: SNU-ARC/google-research

def build_faiss(args,
                log2kstar,
                cacheroot,
                coarse_dir,
                split,
                N_,
                D,
                index_key,
                is_cached,
                query_,
                train=None,
                base=None):

    # set global variables
    name1_to_metric = {
        "dot_product": faiss.METRIC_INNER_PRODUCT,
        "squared_l2": faiss.METRIC_L2
    }
    global fmetric
    fmetric = name1_to_metric[args.metric]
    global xt
    if is_cached == False:
        xt = sanitize(train)
    global xb
    if is_cached == False:
        xb = base
    global dbname
    dbname = args.dataset
    global dim
    dim = D
    global gpu_resources
    global ngpu
    global usePrecomputed
    global useFloat16
    global query
    query = sanitize(query_)
    global N
    N = N_

    usePrecomputed = False
    useFloat16 = True
    print("usefloat16? ", useFloat16)
    replicas = 1
    addBatchSize = 32768
    ngpu = faiss.get_num_gpus()
    tempmem = -1

    if ngpu == 0 and args.is_gpu == True:
        assert False, "Cannot detect gpu in this machine"

    # process index_key
    preproc_str, ivf_str, pqflat_str = process_index_key(index_key)
    ncentroid = int(ivf_str[3:])

    # check cache files
    if not os.path.isdir(cacheroot):
        print("%s does not exist, creating it" % cacheroot)
        os.makedirs(cacheroot, exist_ok=True)

    print("cachefiles:")
    if preproc_str:
        preproc_cachefile = '%s%s_preproc_%s_%s.vectrans' % (
            cacheroot, args.metric, dbname, preproc_str[:-1])
        print(preproc_cachefile)
    else:
        preproc_str = ''
        preproc_cachefile = None

    cent_cachefile = '%s%s_cent_%s_%s%s_%s.npy' % (
        coarse_dir, args.metric, dbname, preproc_str, ivf_str, D)

    index_cachefile = '%s%s_%s_%s_%s_%s%s,%s.index' % (
        cacheroot, args.metric, dbname, split, args.num_split, preproc_str,
        ivf_str, pqflat_str)

    first_index_cachefile = '%s%s_%s_0_%s_%s%s,%s.index' % (
        cacheroot, args.metric, dbname, args.num_split, preproc_str, ivf_str,
        pqflat_str)

    if log2kstar == 4 and args.is_gpu == False:
        if preproc_str:
            preproc_cachefile = preproc_cachefile + "fs"
        cent_cachefile = cent_cachefile + "fs.npy"
        index_cachefile = index_cachefile + "fs"
        first_index_cachefile = first_index_cachefile + "fs"
    print(preproc_cachefile)
    print(cent_cachefile)
    print(index_cachefile)
    print(first_index_cachefile)

    # GPU resources
    if args.is_gpu:
        gpu_resources = []
        for i in range(ngpu):
            res = faiss.StandardGpuResources()
            if tempmem >= 0:
                res.setTempMemory(tempmem)
            gpu_resources.append(res)

    # pre-processing
    preproc = get_preprocessor(preproc_str, preproc_cachefile)

    # build index
    if not index_cachefile or not os.path.exists(index_cachefile):
        # train index
        coarse_quantizer = prepare_coarse_quantizer(preproc, cent_cachefile,
                                                    ncentroid, args.is_gpu)
        if split == 0:
            index_trained = prepare_trained_index(preproc, coarse_quantizer,
                                                  ncentroid, pqflat_str,
                                                  args.is_gpu)
        else:
            index_trained = faiss.read_index(first_index_cachefile)
            index_trained.ntotal = 0
            index_trained.invlists.reset()

        # centroids = faiss.vector_to_array(index_trained.pq.centroids).reshape(index_trained.pq.M, index_trained.pq.ksub, index_trained.pq.dsub)
        # print("index_load: ", centroids.shape)
        # print("index_load: ", centroids)

        index_all, index_gpu = add_vectors(index_trained, preproc, args.is_gpu,
                                           addBatchSize)

        if index_cachefile:
            print("store", index_cachefile)
            faiss.write_index(index_all, index_cachefile)

        if args.is_gpu:
            index = index_gpu
        else:
            index = index_all
    else:
        print("load", index_cachefile)
        index_load = faiss.read_index(index_cachefile)

        # move to GPU
        if args.is_gpu:
            index = copyToGpu(index_load)
            del index_load
        else:
            index = index_load

    global ps
    index.use_precomputed_table = usePrecomputed
    if args.is_gpu:
        ps = faiss.GpuParameterSpace()
        ps.initialize(index)
        # ps.set_index_parameter(index, 'nprobe', w)
    else:
        # faiss.omp_set_num_threads(faiss.omp_get_max_threads())
        faiss.omp_set_num_threads(args.batch)
        # index.nprobe = w

    return index, preproc