Beispiel #1
0
    def __init__(self,
                 target,
                 nprobe=128,
                 num_gpu=None,
                 index_factory_str=None,
                 verbose=False,
                 mode='proxy',
                 using_gpu=True):
        self._res_list = []

        found_gpu = len(os.environ['CUDA_VISIBLE_DEVICES'].split(","))
        if found_gpu == 0:
            raise RuntimeError(
                "No GPU found. Please export CUDA_VISIBLE_DEVICES")
        if num_gpu is None or num_gpu > found_gpu:
            num_gpu = found_gpu
        print('[faiss gpu] #GPU: {}'.format(num_gpu))

        size, dim = target.shape
        assert size > 0, "size: {}".format(size)
        index_factory_str = "IVF{},PQ{}".format(
            min(8192, 16 * round(np.sqrt(size))),
            32) if index_factory_str is None else index_factory_str
        cpu_index = faiss.index_factory(dim, index_factory_str)
        cpu_index.nprobe = nprobe

        if mode == 'proxy':
            co = faiss.GpuClonerOptions()
            co.useFloat16 = True
            co.usePrecomputed = False

            index = faiss.IndexProxy()
            for i in range(num_gpu):
                res = faiss.StandardGpuResources()
                self._res_list.append(res)
                sub_index = faiss.index_cpu_to_gpu(
                    res, i, cpu_index, co) if using_gpu else cpu_index
                index.addIndex(sub_index)
        elif mode == 'shard':
            raise NotImplementedError
        else:
            raise KeyError("Unknown index mode")

        index = faiss.IndexIDMap(index)
        index.verbose = verbose

        # get nlist to decide how many samples used for training
        nlist = int([
            item for item in index_factory_str.split(",") if 'IVF' in item
        ][0].replace("IVF", ""))

        # training
        if not index.is_trained:
            indexes_sample_for_train = np.random.randint(0, size, nlist * 256)
            index.train(target[indexes_sample_for_train])

        # add with ids
        target_ids = np.arange(0, size)
        index.add_with_ids(target, target_ids)
        self.index = index
def train_kmeans(x, k, ngpu):
    "Runs kmeans on one or several GPUs"
    d = x.shape[1]
    clus = faiss.Clustering(d, k)
    clus.verbose = True
    clus.niter = 20

    # otherwise the kmeans implementation sub-samples the training set
    clus.max_points_per_centroid = 10000000

    res = [faiss.StandardGpuResources() for i in range(ngpu)]

    useFloat16 = False

    if ngpu == 1:
        index = faiss.GpuIndexFlatL2(res[0], 0, d, useFloat16)
    else:
        indexes = [faiss.GpuIndexFlatL2(res[i], i, d, useFloat16)
                   for i in range(ngpu)]
        index = faiss.IndexProxy()
        for sub_index in indexes:
            index.addIndex(sub_index)

    # perform the training
    clus.train(x, index)
    centroids = faiss.vector_float_to_array(clus.centroids)

    obj = faiss.vector_float_to_array(clus.obj)
    print "final objective: %.4g" % obj[-1]

    return centroids.reshape(k, d)
def train_kmeans(x, k, ngpu):
    "Runs kmeans on one or several GPUs"
    d = x.shape[1]
    clus = faiss.Clustering(d, k)
    clus.verbose = True
    clus.niter = 20
    # otherwise the kmeans implementation sub-samples the training set
    clus.max_points_per_centroid = 10000000
    res = [faiss.StandardGpuResources() for i in range(ngpu)]
    flat_config = []
    for i in range(ngpu):
        cfg = faiss.GpuIndexFlatConfig()
        cfg.useFloat16 = False
        cfg.device = i
        flat_config.append(cfg)
    if ngpu == 1:
        index = faiss.GpuIndexFlatIP(res[-1], d, flat_config[0])
    else:
        indexes = [
            faiss.GpuIndexFlatIP(res[i], d, flat_config[i])
            for i in range(ngpu)
        ]
        index = faiss.IndexProxy()
        for sub_index in indexes:
            index.addIndex(sub_index)
    # perform the training
    clus.train(x, index)
    centroids = faiss.vector_float_to_array(clus.centroids)
    obj = faiss.vector_float_to_array(clus.obj)
    print("final objective: %.4g" % obj[-1])
    return centroids.reshape(k, d)
Beispiel #4
0
 def test_proxy(self):
     index = faiss.IndexProxy()
     for i in range(3):
         sub_index = faiss.IndexFlatL2(self.d)
         sub_index.add(self.xb)
         index.addIndex(sub_index)
     assert index.d == self.d
     index.search(self.xb, 10)
Beispiel #5
0
def get_populated_index(preproc):

    if not index_cachefile or not os.path.exists(index_cachefile):
        if not altadd:
            gpu_index, indexall = compute_populated_index(preproc)
        else:
            gpu_index, indexall = compute_populated_index_2(preproc)
        if index_cachefile:
            print "store", index_cachefile
            faiss.write_index(indexall, index_cachefile)
    else:
        print "load", index_cachefile
        indexall = faiss.read_index(index_cachefile)
        gpu_index = None

    co = faiss.GpuMultipleClonerOptions()
    co.useFloat16 = use_float16
    co.useFloat16CoarseQuantizer = False
    co.usePrecomputed = use_precomputed_tables
    co.indicesOptions = 0
    co.verbose = True
    co.shard = True    # the replicas will be made "manually"
    t0 = time.time()
    print "CPU index contains %d vectors, move to GPU" % indexall.ntotal
    if replicas == 1:

        if not gpu_index:
            print "copying loaded index to GPUs"
            vres, vdev = make_vres_vdev()
            index = faiss.index_cpu_to_gpu_multiple(
                vres, vdev, indexall, co)
        else:
            index = gpu_index

    else:
        del gpu_index # We override the GPU index

        print "Copy CPU index to %d sharded GPU indexes" % replicas

        index = faiss.IndexProxy()

        for i in range(replicas):
            gpu0 = ngpu * i / replicas
            gpu1 = ngpu * (i + 1) / replicas
            vres, vdev = make_vres_vdev(gpu0, gpu1)

            print "   dispatch to GPUs %d:%d" % (gpu0, gpu1)

            index1 = faiss.index_cpu_to_gpu_multiple(
                vres, vdev, indexall, co)
            index1.this.disown()
            index.addIndex(index1)
        index.own_fields = True
    del indexall
    print "move to GPU done in %.3f s" % (time.time() - t0)
    return index
Beispiel #6
0
def train_kmeans(x,
                 num_clusters=1000,
                 gpu_ids=None,
                 niter=100,
                 nredo=1,
                 verbose=0):
    """
    Runs k-means clustering on one or several GPUs
    """
    assert np.all(~np.isnan(x)), 'x contains NaN'
    assert np.all(np.isfinite(x)), 'x contains Inf'
    if isinstance(gpu_ids, int):
        gpu_ids = [gpu_ids]
    assert gpu_ids is None or len(gpu_ids)

    d = x.shape[1]
    kmeans = faiss.Clustering(d, num_clusters)
    kmeans.verbose = bool(verbose)
    kmeans.niter = niter
    kmeans.nredo = nredo

    # otherwise the kmeans implementation sub-samples the training set
    kmeans.max_points_per_centroid = 10000000

    if gpu_ids is not None:
        res = [faiss.StandardGpuResources() for i in gpu_ids]

        flat_config = []
        for i in gpu_ids:
            cfg = faiss.GpuIndexFlatConfig()
            cfg.useFloat16 = False
            cfg.device = i
            flat_config.append(cfg)

        if len(gpu_ids) == 1:
            index = faiss.GpuIndexFlatL2(res[0], d, flat_config[0])
        else:
            indexes = [
                faiss.GpuIndexFlatL2(res[i], d, flat_config[i])
                for i in range(len(gpu_ids))
            ]
            index = faiss.IndexProxy()
            for sub_index in indexes:
                index.addIndex(sub_index)
    else:
        index = faiss.IndexFlatL2(d)

    # perform the training
    kmeans.train(x, index)
    centroids = faiss.vector_float_to_array(kmeans.centroids)

    objective = faiss.vector_float_to_array(kmeans.obj)
    #logging.debug("Final objective: %.4g" % objective[-1])

    return centroids.reshape(num_clusters, d)
Beispiel #7
0
 def _init_faiss(self, ngpu, feat_len):
     self.flat_config = []
     for i in range(ngpu):
         self.cfg = faiss.GpuIndexFlatConfig()
         self.cfg.useFloat16 = False
         self.cfg.device = i
         self.flat_config.append(self.cfg)
     self.res = [faiss.StandardGpuResources() for i in range(ngpu)]
     self.indexes = [faiss.GpuIndexFlatIP(self.res[i], feat_len, self.flat_config[i]) for i in range(ngpu)]
     self.index = faiss.IndexProxy()
     for sub_index in self.indexes:
         self.index.addIndex(sub_index)
Beispiel #8
0
    def _init_faiss(self, ngpu, feat_len, tempIVF, tempPQ, tempNP):
        co = faiss.GpuClonerOptions()
        co.useFloat16 = True
        co.usePrecomputed = False

        # Setting up GPU resources
        self.res = [faiss.StandardGpuResources() for i in range(ngpu)]
        self.indexes = []
        for i in range(ngpu):
            index = faiss.index_factory(feat_len,tempIVF + "," + tempPQ)
            index.nprobe = tempNP
            index = faiss.index_cpu_to_gpu(self.res[i],i,index,co)
            self.indexes.append(index)
        self.index = faiss.IndexProxy()
        for sub_index in self.indexes:
            self.index.addIndex(sub_index)
Beispiel #9
0
    def test_stress(self):
        # a mixture of the above, from issue #631
        target = np.random.rand(50, 16).astype('float32')

        index = faiss.IndexProxy()
        size, dim = target.shape
        num_gpu = 4
        for i in range(num_gpu):
            config = faiss.GpuIndexFlatConfig()
            config.device = 0  # simulate on a single GPU
            sub_index = faiss.GpuIndexFlatIP(faiss.StandardGpuResources(), dim,
                                             config)
            index.addIndex(sub_index)

        index = faiss.IndexIDMap(index)
        ids = np.arange(size)
        index.add_with_ids(target, ids)
Beispiel #10
0
def train_kmeans(x, num_clusters=1000, num_gpus=1):
    """
    Runs k-means clustering on one or several GPUs
    """
    d = x.shape[1]
    kmeans = faiss.Clustering(d, num_clusters)
    kmeans.verbose = True
    kmeans.niter = 20

    # otherwise the kmeans implementation sub-samples the training set
    kmeans.max_points_per_centroid = 10000000

    res = [faiss.StandardGpuResources() for i in range(num_gpus)]

    flat_config = []
    for i in range(num_gpus):
        cfg = faiss.GpuIndexFlatConfig()
        cfg.useFloat16 = False
        cfg.device = i
        flat_config.append(cfg)

    if num_gpus == 1:
        index = faiss.GpuIndexFlatL2(res[0], d, flat_config[0])
    else:
        indexes = [faiss.GpuIndexFlatL2(res[i], d, flat_config[i])
                   for i in range(num_gpus)]
        index = faiss.IndexProxy()
        for sub_index in indexes:
            index.addIndex(sub_index)

    # perform the training
    kmeans.train(x, index)
    print 'Total number of indexed vectors (after kmeans.train()):', index.ntotal
    centroids = faiss.vector_float_to_array(kmeans.centroids)

    objective = faiss.vector_float_to_array(kmeans.obj)
    print 'Objective values per iter:', objective
    print "Final objective: %.4g" % objective[-1]

    # TODO: return cluster assignment

    return centroids.reshape(num_clusters, d)
def kmeans(features, nclusters, num_iters, ngpu, njobs, seed):
    """
    Run k-means on features, generating nclusters clusters. It will use, in order of preference, Faiss, pomegranate, or
    scikit-learn.

    :param features: Features to cluster.
    :param nclusters: Number of clusters to generate.
    :param num_iters: Maximum number of iterations to perform.
    :param ngpu: Number of GPUs to use (if GPUs are available).
    :param njobs: Number of threads to use.
    :param seed: Seed for reproducibility.
    :return: centroids: The centroids found with k-means.
    """
    print('Running k-means...')
    if USE_FAISS:
        d = features.shape[1]
        pca_features = np.ascontiguousarray(features).astype('float32')

        clus = faiss.Clustering(d, nclusters)
        clus.verbose = True
        clus.niter = num_iters
        if seed is not None:
            clus.seed = seed

        # otherwise the kmeans implementation sub-samples the training set
        clus.max_points_per_centroid = 10000000

        if USE_GPU:
            res = [faiss.StandardGpuResources() for i in range(ngpu)]

            flat_config = []
            for i in range(ngpu):
                cfg = faiss.GpuIndexFlatConfig()
                cfg.useFloat16 = False
                cfg.device = i
                flat_config.append(cfg)

            if ngpu == 1:
                index = faiss.GpuIndexFlatL2(res[0], d, flat_config[0])
            else:
                indexes = [
                    faiss.GpuIndexFlatL2(res[i], d, flat_config[i])
                    for i in range(ngpu)
                ]
                index = faiss.IndexProxy()
                for sub_index in indexes:
                    index.addIndex(sub_index)
        else:
            index = faiss.IndexFlatL2(d)

        clus.train(pca_features, index)
        centroids = faiss.vector_float_to_array(clus.centroids)
        centroids = centroids.reshape(nclusters, d)

    elif USE_POMEGRANATE and seed is None:
        kmeans = pomegranate.kmeans.Kmeans(nclusters,
                                           init='kmeans++',
                                           n_init=10)
        kmeans.fit(features, max_iterations=num_iters, n_jobs=njobs)
        centroids = kmeans.centroids
    else:
        if USE_POMEGRANATE and seed is not None:
            print(
                'Pomegranate does not currently support k-means with a seed. Switching to scikit-learn instead.'
            )
        print('Using scikit-learn. This may be slow!')
        kmeans = sklearn.cluster.KMeans(n_clusters=nclusters,
                                        random_state=seed).fit(features)
        centroids = kmeans.cluster_centers_

    return centroids
Beispiel #12
0
    cfg = faiss.GpuIndexIVFFlatConfig(
    )  #faiss.GpuIndexFlatConfig()  faiss.GpuIndexIVFPQConfig()
    cfg.useFloat16 = False
    cfg.device = i
    flat_config.append(cfg)

#indexes = [faiss.GpuIndexFlatL2(res[i],d,flat_config[i]) for i in range(ngpus)]    #可行,速度快,不需要train,直接计算L2距离
#indexes = [faiss.GpuIndexIVFPQ(res[i],d,nlist, m,4,faiss.METRIC_L2,flat_config[i]) for i in range(ngpus)]
indexes = [
    faiss.GpuIndexIVFFlat(res[i], d, nlist, faiss.METRIC_L2, flat_config[i])
    for i in range(ngpus)
]
# then we make an Index array
# useFloat16 is a boolean value

index = faiss.IndexProxy()

for sub_index in indexes:
    index.addIndex(sub_index)

index.train(pin_data_drop_new)  #影响PQ的时间的因素???
print(index.is_trained)

index.add(pin_data_drop_new)
index.nprobe = 30  #参数需要调,适当增加nprobe可以得到和brute-force相同的结果,nprobe控制了速度和精度的平衡
print(index.ntotal)  #index中向量的个数

beg = time.time()
query_self = pin_data_drop_new[:200000]  # 查询本身
dis, ind = index.search(query_self, k)  #查询自身
print(time.time() - beg)