Beispiel #1
0
    def test_replica_flag_propagation(self):
        d = 64  # dimension
        nb = 1000
        rs = np.random.RandomState(1234)
        xb = rs.rand(nb, d).astype('float32')
        nlist = 10
        quantizer1 = faiss.IndexFlatL2(d)
        quantizer2 = faiss.IndexFlatL2(d)
        index1 = faiss.IndexIVFFlat(quantizer1, d, nlist)
        index2 = faiss.IndexIVFFlat(quantizer2, d, nlist)

        index = faiss.IndexReplicas(d, True)
        index.add_replica(index1)
        index.add_replica(index2)

        self.assertFalse(index.is_trained)
        index.train(xb)
        self.assertTrue(index.is_trained)

        self.assertEqual(index.ntotal, 0)
        index.add(xb)
        self.assertEqual(index.ntotal, nb)

        index.remove_replica(index2)
        self.assertEqual(index.ntotal, nb)
        index.remove_replica(index1)
        self.assertEqual(index.ntotal, 0)
Beispiel #2
0
 def test_proxy(self):
     index = faiss.IndexReplicas()
     for _i in range(3):
         sub_index = faiss.IndexFlatL2(self.d)
         sub_index.add(self.xb)
         index.addIndex(sub_index)
     assert index.d == self.d
     index.search(self.xb, 10)
Beispiel #3
0
def get_populated_index(preproc):

    if not index_cachefile or not os.path.exists(index_cachefile):
        if not altadd:
            gpu_index, indexall = compute_populated_index(preproc)
        else:
            gpu_index, indexall = compute_populated_index_2(preproc)
        if index_cachefile:
            print("store", index_cachefile)
            faiss.write_index(indexall, index_cachefile)
    else:
        print("load", index_cachefile)
        indexall = faiss.read_index(index_cachefile)
        gpu_index = None

    co = faiss.GpuMultipleClonerOptions()
    co.useFloat16 = use_float16
    co.useFloat16CoarseQuantizer = False
    co.usePrecomputed = use_precomputed_tables
    co.indicesOptions = 0
    co.verbose = True
    co.shard = True  # the replicas will be made "manually"
    t0 = time.time()
    print("CPU index contains %d vectors, move to GPU" % indexall.ntotal)
    if replicas == 1:

        if not gpu_index:
            print("copying loaded index to GPUs")
            vres, vdev = make_vres_vdev()
            index = faiss.index_cpu_to_gpu_multiple(vres, vdev, indexall, co)
        else:
            index = gpu_index

    else:
        del gpu_index  # We override the GPU index

        print("Copy CPU index to %d sharded GPU indexes" % replicas)

        index = faiss.IndexReplicas()

        for i in range(replicas):
            gpu0 = ngpu * i / replicas
            gpu1 = ngpu * (i + 1) / replicas
            vres, vdev = make_vres_vdev(gpu0, gpu1)

            print("   dispatch to GPUs %d:%d" % (gpu0, gpu1))

            index1 = faiss.index_cpu_to_gpu_multiple(vres, vdev, indexall, co)
            index1.this.disown()
            index.addIndex(index1)
        index.own_fields = True
    del indexall
    print("move to GPU done in %.3f s" % (time.time() - t0))
    return index
def run_kmeans_multi_gpu(x,
                         nmb_clusters,
                         verbose=False,
                         seed=DEFAULT_KMEANS_SEED,
                         gpu_device=0):
    """
    Runs kmeans on multi GPUs.

    Args:
    -----
    x: data
    nmb_clusters (int): number of clusters

    Returns:
    --------
    list: ids of data in each cluster
    """
    n_data, d = x.shape
    ngpus = len(gpu_device)
    assert ngpus > 1

    # faiss implementation of k-means
    clus = faiss.Clustering(d, nmb_clusters)
    clus.niter = 20
    clus.max_points_per_centroid = 10000000
    clus.seed = seed
    res = [faiss.StandardGpuResources() for i in range(ngpus)]
    flat_config = []
    for i in gpu_device:
        cfg = faiss.GpuIndexFlatConfig()
        cfg.useFloat16 = False
        cfg.device = i
        flat_config.append(cfg)

    indexes = [
        faiss.GpuIndexFlatL2(res[i], d, flat_config[i]) for i in range(ngpus)
    ]
    index = faiss.IndexReplicas()
    for sub_index in indexes:
        index.addIndex(sub_index)

    # perform the training
    clus.train(x, index)
    _, I = index.search(x, 1)
    losses = faiss.vector_to_array(clus.obj)
    if verbose:
        print('k-means loss evolution: {0}'.format(losses))

    return [int(n[0]) for n in I], losses[-1]
Beispiel #5
0
    def test_stress(self):
        # a mixture of the above, from issue #631
        target = np.random.rand(50, 16).astype('float32')

        index = faiss.IndexReplicas()
        size, dim = target.shape
        num_gpu = 4
        for _i in range(num_gpu):
            config = faiss.GpuIndexFlatConfig()
            config.device = 0   # simulate on a single GPU
            sub_index = faiss.GpuIndexFlatIP(faiss.StandardGpuResources(), dim, config)
            index.addIndex(sub_index)

        index = faiss.IndexIDMap(index)
        ids = np.arange(size)
        index.add_with_ids(target, ids)
Beispiel #6
0
def train_kmeans(x, k, ngpu, max_points_per_centroid=256):
    "Runs kmeans on one or several GPUs"
    d = x.shape[1]
    clus = faiss.Clustering(d, k)
    clus.verbose = True
    clus.niter = 20
    clus.max_points_per_centroid = max_points_per_centroid

    if ngpu == 0:
        index = faiss.IndexFlatL2(d)
    else:
        res = [faiss.StandardGpuResources() for i in range(ngpu)]

        flat_config = []
        for i in range(ngpu):
            cfg = faiss.GpuIndexFlatConfig()
            cfg.useFloat16 = False
            cfg.device = i
            flat_config.append(cfg)

        if ngpu == 1:
            index = faiss.GpuIndexFlatL2(res[0], d, flat_config[0])
        else:
            indexes = [
                faiss.GpuIndexFlatL2(res[i], d, flat_config[i])
                for i in range(ngpu)
            ]
            index = faiss.IndexReplicas()
            for sub_index in indexes:
                index.addIndex(sub_index)

    # perform the training
    clus.train(x, index)
    centroids = faiss.vector_float_to_array(clus.centroids)

    stats = clus.iteration_stats
    stats = [stats.at(i) for i in range(stats.size())]
    obj = np.array([st.obj for st in stats])
    print("final objective: %.4g" % obj[-1])

    return centroids.reshape(k, d)
def train_kmeans(x, k, ngpu):
    "Runs kmeans on one or several GPUs"
    d = x.shape[1]
    clus = faiss.Clustering(d, k)
    clus.verbose = True
    clus.niter = 20

    # otherwise the kmeans implementation sub-samples the training set
    clus.max_points_per_centroid = 10000000

    res = [faiss.StandardGpuResources() for i in range(ngpu)]

    flat_config = []
    for i in range(ngpu):
        cfg = faiss.GpuIndexFlatConfig()
        cfg.useFloat16 = False
        cfg.device = i
        flat_config.append(cfg)

    if ngpu == 1:
        index = faiss.GpuIndexFlatL2(res[0], d, flat_config[0])
    else:
        indexes = [
            faiss.GpuIndexFlatL2(res[i], d, flat_config[i])
            for i in range(ngpu)
        ]
        index = faiss.IndexReplicas()
        for sub_index in indexes:
            index.addIndex(sub_index)

    # perform the training
    clus.train(x, index)
    centroids = faiss.vector_float_to_array(clus.centroids)

    obj = faiss.vector_float_to_array(clus.obj)
    print "final objective: %.4g" % obj[-1]

    return centroids.reshape(k, d)