예제 #1
0
def knnGPU(x, y, k, mem=512*1024*1024):
    ngpus = faiss.get_num_gpus()
    print("number of GPUs:", ngpus)
    dim = x.shape[1]
    batch_size = mem // (dim*4)
    sim = np.zeros((x.shape[0], k), dtype=np.float32)
    ind = np.zeros((x.shape[0], k), dtype=np.int64)
    for xfrom in range(0, x.shape[0], batch_size):
        xto = min(xfrom + batch_size, x.shape[0])
        bsims, binds = [], []
        for yfrom in range(0, y.shape[0], batch_size):
            yto = min(yfrom + batch_size, y.shape[0])
            # print('{}-{}  ->  {}-{}'.format(xfrom, xto, yfrom, yto))
            idx = faiss.IndexFlatIP(dim)
            # idx = faiss.GpuIndexIVFFlat(dim)
            idx = faiss.index_cpu_to_all_gpus(idx)
            idx.add(y[yfrom:yto])
            bsim, bind = idx.search(x[xfrom:xto], min(k, yto-yfrom))
            bsims.append(bsim)
            binds.append(bind + yfrom)
            del idx
        bsims = np.concatenate(bsims, axis=1)
        binds = np.concatenate(binds, axis=1)
        aux = np.argsort(-bsims, axis=1)
        for i in range(xfrom, xto):
            for j in range(k):
                sim[i, j] = bsims[i-xfrom, aux[i-xfrom, j]]
                ind[i, j] = binds[i-xfrom, aux[i-xfrom, j]]
    return sim, ind
예제 #2
0
def global_level_semantic_sim(embs,
                              k=50,
                              search_batch_sz=50000,
                              index_batch_sz=500000,
                              split=False,
                              norm=True,
                              gpu=True):
    print('FAISS number of GPUs=', faiss.get_num_gpus())
    size = [embs[0].size(0), embs[1].size(0)]
    emb_size = embs[0].size(1)
    if norm:
        embs = apply(norm_process, *embs)
    emb_q, emb_id = apply(lambda x: x.cpu().numpy(), *embs)
    del embs
    gc.collect()
    vals, inds = [], []
    total_size = emb_id.shape[0]
    for i_batch in range(0, total_size, index_batch_sz):
        i_end = min(total_size, i_batch + index_batch_sz)
        val, ind = faiss_search_impl(emb_q, emb_id[i_batch:i_end], emb_size,
                                     i_batch, k, search_batch_sz, gpu)
        vals.append(val)
        inds.append(ind)

    vals, inds = torch.cat(vals, dim=1), torch.cat(inds, dim=1)
    print(vals.size(), inds.size())

    return topk2spmat(vals, inds, size, 0, torch.device('cpu'), split)
예제 #3
0
def get_knn(reference_embeddings,
            test_embeddings,
            k,
            embeddings_come_from_same_source=False):
    """
    Finds the k elements in reference_embeddings that are closest to each
    element of test_embeddings.
    Args:
        reference_embeddings: numpy array of size (num_samples, dimensionality).
        test_embeddings: numpy array of size (num_samples2, dimensionality).
        k: int, number of nearest neighbors to find
        embeddings_come_from_same_source: if True, then the nearest neighbor of
                                         each element (which is actually itself)
                                         will be ignored.
    """
    d = reference_embeddings.shape[1]
    logging.info("running k-nn with k=%d" % k)
    logging.info("embedding dimensionality is %d" % d)
    index = faiss.IndexFlatL2(d)
    if faiss.get_num_gpus() > 0:
        index = faiss.index_cpu_to_all_gpus(index)
    index.add(reference_embeddings)
    _, indices = index.search(test_embeddings, k + 1)
    if embeddings_come_from_same_source:
        return indices[:, 1:]
    return indices[:, :k]
예제 #4
0
def search_gpu(query_path, refer_path, output, topk=100):
    queryfeas, queryconts = loadFeaFromPickle(query_path)
    referfeas, referconts = loadFeaFromPickle(refer_path)
    assert(queryfeas.shape[1] == referfeas.shape[1])
    dim = int(queryfeas.shape[1])
    print("=> query feature shape: {}".format(queryfeas.shape), file=sys.stderr)
    print("=> refer feature shape: {}".format(referfeas.shape), file=sys.stderr)
    
    start = time.time()
    ngpus = faiss.get_num_gpus()
    print("=> search use gpu number of GPUs: {}".format(ngpus), file=sys.stderr)
    cpu_index = faiss.IndexFlat(dim, faiss.METRIC_INNER_PRODUCT)   # build the index
    gpu_index = faiss.index_cpu_to_all_gpus(  # build the index
            cpu_index
            )
    gpu_index.add(referfeas)                  # add vectors to the index print(index.ntotal)
    print("=> building gpu index success, \
           total index number: {}".format(gpu_index), file=sys.stderr)
    distance, ind = gpu_index.search(queryfeas, int(topk))
    assert(distance.shape == ind.shape)
    end = time.time()
    print("=> searching total use time {}".format(end - start), file=sys.stderr)
    outdic = {}
    for key_id in range(queryfeas.shape[0]):
        querycont = queryconts[key_id]
        searchresult = [(referconts[ind[key_id][i]], distance[key_id][i]) \
                         for i in range(len(distance[key_id]))]
        outdic[querycont] = searchresult
    print("=> convert search gpu result to output format success")
    pickle.dump(outdic, open(output,"wb"), protocol=2)
def run_kmeans(x, nmb_clusters):
    """
    Args:
        x: data
        nmb_clusters (int): number of clusters
    Returns:
        list: ids of data in each cluster
    """
    x = c_f.to_numpy(x).astype(np.float32)
    n_data, d = x.shape
    logging.info("running k-means clustering with k=%d" % nmb_clusters)
    logging.info("embedding dimensionality is %d" % d)

    # faiss implementation of k-means
    clus = faiss.Clustering(d, nmb_clusters)
    clus.niter = 20
    clus.max_points_per_centroid = 10000000
    index = faiss.IndexFlatL2(d)
    if faiss.get_num_gpus() > 0:
        index = faiss.index_cpu_to_all_gpus(index)
    # perform the training
    clus.train(x, index)
    _, idxs = index.search(x, 1)

    return [int(n[0]) for n in idxs]
예제 #6
0
def get_gpu_index(cpu_index):
    gpu_resources = []
    ngpu = faiss.get_num_gpus()
    tempmem = -1
    for i in range(ngpu):
        res = faiss.StandardGpuResources()
        if tempmem >= 0:
            res.setTempMemory(tempmem)
        gpu_resources.append(res)

    def make_vres_vdev(i0=0, i1=-1):
        " return vectors of device ids and resources useful for gpu_multiple"
        vres = faiss.GpuResourcesVector()
        vdev = faiss.IntVector()
        if i1 == -1:
            i1 = ngpu
        for i in range(i0, i1):
            vdev.push_back(i)
            vres.push_back(gpu_resources[i])
        return vres, vdev

    co = faiss.GpuMultipleClonerOptions()
    co.shard = True
    gpu_vector_resources, gpu_devices_vector = make_vres_vdev(0, ngpu)
    gpu_index = faiss.index_cpu_to_gpu_multiple(gpu_vector_resources,
                                                gpu_devices_vector, cpu_index,
                                                co)
    return gpu_index
예제 #7
0
def clean_faiss_gpu():
    ngpu = faiss.get_num_gpus()
    tempmem = 0
    for i in range(ngpu):
        res = faiss.StandardGpuResources()
        if tempmem >= 0:
            res.setTempMemory(tempmem)
예제 #8
0
def load_index(path_index, mode="cpu"):
    index = faiss.read_index(path_index)
    if mode == "gpu":
        ngpus = faiss.get_num_gpus()
        if ngpus > 0:
            index = faiss.index_cpu_to_all_gpus(index)
    return index
예제 #9
0
def build_faiss_index(nd_feats_array, mode):
    """
    build index on multi GPUs
    :param nd_feats_array:
    :param mode: 0: CPU; 1: GPU; 2: Multi-GPU
    :return:
    """
    d = nd_feats_array.shape[1]

    cpu_index = faiss.IndexFlatL2(d)  # build the index on CPU
    if mode == 0:
        print("[INFO] Is trained? >> {}".format(cpu_index.is_trained))
        cpu_index.add(nd_feats_array)  # add vectors to the index
        print("[INFO] Capacity of gallery: {}".format(cpu_index.ntotal))

        return cpu_index
    elif mode == 1:
        ngpus = faiss.get_num_gpus()
        print("[INFO] number of GPUs:", ngpus)
        res = faiss.StandardGpuResources()  # use a single GPU
        gpu_index = faiss.index_cpu_to_gpu(res, 0, cpu_index)
        gpu_index.add(nd_feats_array)  # add vectors to the index
        print("[INFO] Capacity of gallery: {}".format(gpu_index.ntotal))

        return gpu_index
    elif mode == 2:
        multi_gpu_index = faiss.index_cpu_to_all_gpus(
            cpu_index)  # build the index on multi GPUs
        multi_gpu_index.add(nd_feats_array)  # add vectors to the index
        print("[INFO] Capacity of gallery: {}".format(multi_gpu_index.ntotal))

        return multi_gpu_index
예제 #10
0
def get_k(bases, xb, params, k_data, dates):
    if params['kLineFirst']:
        dim = len(params['pickedStockKLine']['values'])
    else:
        dim = len(params['pickedStockTicks']['values'])
    #query
    kLine = params['pickedStockKLine']['values']
    open = list(map(lambda x: x[0], kLine))
    close = list(map(lambda x: x[1], kLine))
    low = list(map(lambda x: x[2], kLine))
    high = list(map(lambda x: x[3], kLine))
    volume = params['pickedStockKLine']['volumes']
    query = list(map(lambda x: x / open[0], open)) + list(
        map(lambda x: x / open[0], close)) + list(
            map(lambda x: x / open[0], low)) + list(
                map(lambda x: x / open[0], high)) + list(
                    map(lambda x: x[1] / volume[0][1], volume))
    xq = np.array([np.array(query)]).astype('float32')
    ngpus = faiss.get_num_gpus()
    #build index
    start = clock()
    cpu_index = faiss.IndexFlatL2(dim * 5)
    gpu_index = faiss.index_cpu_to_all_gpus(cpu_index)
    gpu_index.add(xb)

    D, I = gpu_index.search(xq, 10)
    #have not done
    end = clock()
    print(end - start)
    results = list(map(lambda x: bases[x], I[0]))
    print(results)
    return jsonify(back_and_front(results, k_data, dates))  #not yet
예제 #11
0
파일: faiss_gpu.py 프로젝트: yuk12/dgl
    def __init__(self,
                 target,
                 nprobe=128,
                 index_factory_str=None,
                 verbose=False,
                 mode='proxy',
                 using_gpu=True):
        self._res_list = []

        num_gpu = faiss.get_num_gpus()
        print('[faiss gpu] #GPU: {}'.format(num_gpu))

        size, dim = target.shape
        assert size > 0, "size: {}".format(size)
        index_factory_str = "IVF{},PQ{}".format(
            min(8192, 16 * round(np.sqrt(size))),
            32) if index_factory_str is None else index_factory_str
        cpu_index = faiss.index_factory(dim, index_factory_str)
        cpu_index.nprobe = nprobe

        if mode == 'proxy':
            co = faiss.GpuClonerOptions()
            co.useFloat16 = True
            co.usePrecomputed = False

            index = faiss.IndexProxy()
            for i in range(num_gpu):
                res = faiss.StandardGpuResources()
                self._res_list.append(res)
                sub_index = faiss.index_cpu_to_gpu(
                    res, i, cpu_index, co) if using_gpu else cpu_index
                index.addIndex(sub_index)
        elif mode == 'shard':
            co = faiss.GpuMultipleClonerOptions()
            co.useFloat16 = True
            co.usePrecomputed = False
            co.shard = True
            index = faiss.index_cpu_to_all_gpus(cpu_index, co, ngpu=num_gpu)
        else:
            raise KeyError("Unknown index mode")

        index = faiss.IndexIDMap(index)
        index.verbose = verbose

        # get nlist to decide how many samples used for training
        nlist = int(
            float([
                item for item in index_factory_str.split(",") if 'IVF' in item
            ][0].replace("IVF", "")))

        # training
        if not index.is_trained:
            indexes_sample_for_train = np.random.randint(0, size, nlist * 256)
            index.train(target[indexes_sample_for_train])

        # add with ids
        target_ids = np.arange(0, size)
        index.add_with_ids(target, target_ids)
        self.index = index
예제 #12
0
    def init_index(self):
        d = 128
        ngpus = faiss.get_num_gpus()

        print("number of GPUs:", ngpus)

        self.cpu_index = faiss.IndexFlatL2(d)
        self.cpu_index.add(self.known_encoding_faces2)
예제 #13
0
def range_ground_truth(xq,
                       db_iterator,
                       threshold,
                       metric_type=faiss.METRIC_L2,
                       shard=False,
                       ngpu=-1):
    """Computes the range-search search results for a dataset that possibly
    does not fit in RAM but for which we have an iterator that
    returns it block by block.
    """
    nq, d = xq.shape
    t0 = time.time()
    xq = np.ascontiguousarray(xq, dtype='float32')

    index = faiss.IndexFlat(d, metric_type)
    if ngpu == -1:
        ngpu = faiss.get_num_gpus()
    if ngpu:
        LOG.info('running on %d GPUs' % ngpu)
        co = faiss.GpuMultipleClonerOptions()
        co.shard = shard
        index_gpu = faiss.index_cpu_to_all_gpus(index, co=co, ngpu=ngpu)

    # compute ground-truth by blocks
    i0 = 0
    D = [[] for _i in range(nq)]
    I = [[] for _i in range(nq)]
    all_lims = []
    for xbi in db_iterator:
        ni = xbi.shape[0]
        if ngpu > 0:
            index_gpu.add(xbi)
            lims_i, Di, Ii = range_search_gpu(xq, threshold, index_gpu, xbi)
            index_gpu.reset()
        else:
            index.add(xbi)
            lims_i, Di, Ii = index.range_search(xq, threshold)
            index.reset()
        Ii += i0
        for j in range(nq):
            l0, l1 = lims_i[j], lims_i[j + 1]
            if l1 > l0:
                D[j].append(Di[l0:l1])
                I[j].append(Ii[l0:l1])
        i0 += ni
        LOG.info("%d db elements, %.3f s" % (i0, time.time() - t0))

    empty_I = np.zeros(0, dtype='int64')
    empty_D = np.zeros(0, dtype='float32')
    # import pdb; pdb.set_trace()
    D = [(np.hstack(i) if i != [] else empty_D) for i in D]
    I = [(np.hstack(i) if i != [] else empty_I) for i in I]
    sizes = [len(i) for i in I]
    assert len(sizes) == nq
    lims = np.zeros(nq + 1, dtype="uint64")
    lims[1:] = np.cumsum(sizes)
    return lims, np.hstack(D), np.hstack(I)
예제 #14
0
    def do_cpu_to_gpu(self, index_key):
        ts = []
        ts.append(time.time())
        (xt, xb, xq) = self.get_dataset(small_one=True)
        nb, d = xb.shape

        index = faiss.index_factory(d, index_key)
        if index.__class__ == faiss.IndexIVFPQ:
            # speed up test
            index.pq.cp.niter = 2
            index.do_polysemous_training = False
        ts.append(time.time())

        index.train(xt)
        ts.append(time.time())

        # adding some ids because there was a bug in this case
        index.add_with_ids(xb, np.arange(nb).astype(np.int64) * 3 + 12345)
        ts.append(time.time())

        index.nprobe = 4
        D, Iref = index.search(xq, 10)
        ts.append(time.time())

        res = faiss.StandardGpuResources()
        gpu_index = faiss.index_cpu_to_gpu(res, 0, index)
        ts.append(time.time())

        gpu_index.setNumProbes(4)

        D, Inew = gpu_index.search(xq, 10)
        ts.append(time.time())
        print('times:', [t - ts[0] for t in ts])

        self.assertGreaterEqual((Iref == Inew).sum(), Iref.size)

        if faiss.get_num_gpus() == 1:
            return

        for shard in False, True:

            # test on just 2 GPUs
            res = [faiss.StandardGpuResources() for i in range(2)]
            co = faiss.GpuMultipleClonerOptions()
            co.shard = shard

            gpu_index = faiss.index_cpu_to_gpu_multiple_py(res, index, co)

            faiss.GpuParameterSpace().set_index_parameter(
                gpu_index, 'nprobe', 4)

            D, Inew = gpu_index.search(xq, 10)

            # 0.99: allow some tolerance in results otherwise test
            # fails occasionally (not reproducible)
            self.assertGreaterEqual((Iref == Inew).sum(), Iref.size * 0.99)
예제 #15
0
파일: index.py 프로젝트: studio-ousia/bpr
    def to_gpu(self):
        if faiss.get_num_gpus() == 1:
            res = faiss.StandardGpuResources()
            self.index = faiss.index_cpu_to_gpu(res, 0, self.index)
        else:
            cloner_options = faiss.GpuMultipleClonerOptions()
            cloner_options.shard = True
            self.index = faiss.index_cpu_to_all_gpus(self.index,
                                                     co=cloner_options)

        return self.index
예제 #16
0
    def do_cpu_to_gpu(self, index_key):
        ts = []
        ts.append(time.time())
        (xt, xb, xq) = self.get_dataset(small_one=True)
        nb, d = xb.shape

        index = faiss.index_factory(d, index_key)
        if index.__class__ == faiss.IndexIVFPQ:
            # speed up test
            index.pq.cp.niter = 2
            index.do_polysemous_training = False
        ts.append(time.time())

        index.train(xt)
        ts.append(time.time())

        # adding some ids because there was a bug in this case
        index.add_with_ids(xb, np.arange(nb) * 3 + 12345)
        ts.append(time.time())

        index.nprobe = 4
        D, Iref = index.search(xq, 10)
        ts.append(time.time())

        res = faiss.StandardGpuResources()
        gpu_index = faiss.index_cpu_to_gpu(res, 0, index)
        ts.append(time.time())

        gpu_index.setNumProbes(4)

        D, Inew = gpu_index.search(xq, 10)
        ts.append(time.time())
        print 'times:', [t - ts[0] for t in ts]

        self.assertGreaterEqual((Iref == Inew).sum(), Iref.size)

        if faiss.get_num_gpus() == 1:
            return

        for shard in False, True:

            # test on just 2 GPUs
            res = [faiss.StandardGpuResources() for i in range(2)]
            co = faiss.GpuMultipleClonerOptions()
            co.shard = shard

            gpu_index = faiss.index_cpu_to_gpu_multiple_py(res, index, co)

            faiss.GpuParameterSpace().set_index_parameter(
                gpu_index, 'nprobe', 4)

            D, Inew = gpu_index.search(xq, 10)

            self.assertGreaterEqual((Iref == Inew).sum(), Iref.size)
    def load(self, path: str, device: Optional[str] = None) -> None:
        r"""Load the index and meta data from ``path`` directory.

        Args:
            path (str): A path to the directory to load the index from.
            device (optional str): Device to load the index into. If None,
                value will be picked from hyperparameters.

        """

        if not os.path.exists(path):
            raise ValueError(
                f"Failed to load the index. {path} " f"does not exist."
            )

        cpu_index = faiss.read_index(f"{path}/index.faiss")

        if device is None:
            device = self._config.device

        if device.lower().startswith("gpu"):
            gpu_resource = faiss.StandardGpuResources()
            gpu_id = int(device[3:])
            if faiss.get_num_gpus() < gpu_id:
                gpu_id = 0
                logging.warning(
                    "Cannot create the index on device %s. "
                    "Total number of GPUs on this machine is "
                    "%s. Using the gpu0 for the index.",
                    device,
                    faiss.get_num_gpus(),
                )
            self._index = faiss.index_cpu_to_gpu(
                gpu_resource, gpu_id, cpu_index
            )

        else:
            self._index = cpu_index

        with open(f"{path}/index.meta_data", "rb") as f:
            self._meta_data = pickle.load(f)
예제 #18
0
    def __init__(self):
        self.ngpu = faiss.get_num_gpus()

        if self.ngpu == 0:
            return

        self.tempmem = 1 << 33
        self.max_add_per_gpu = 1 << 25
        self.max_add = self.max_add_per_gpu * self.ngpu
        self.add_batch_size = 65536

        self.gpu_resources = self._prepare_gpu_resources()
예제 #19
0
    def __loadIndex(self):
        assert self.dbs != [], "You should load db before load index, use self.loadDB() ..."
        d = self.dbs[0].shape[-1]
        ngpu = faiss.get_num_gpus()
        index = faiss.IndexFlatL2(d)

        res = faiss.StandardGpuResources()

        for i, db in enumerate(self.dbs):
            gpu_index = faiss.index_cpu_to_gpu(res, i, index)
            gpu_index.add(db)
            self.gpu_index.append(gpu_index)
    def __init__(self, config: Optional[Union[Dict, Config]] = None):
        super().__init__()
        self._config = Config(
            hparams=config, default_hparams=self.default_configs()
        )
        self._meta_data: Dict[int, str] = {}

        index_type = self._config.index_type
        device = self._config.device
        dim = self._config.dim

        if device.lower().startswith("gpu"):
            if isinstance(index_type, str) and not index_type.startswith("Gpu"):
                index_type = "Gpu" + index_type

            index_class = utils.get_class(index_type, module_paths=["faiss"])
            gpu_resource = faiss.StandardGpuResources()
            gpu_id = int(device[3:])
            if faiss.get_num_gpus() < gpu_id:
                gpu_id = 0
                logging.warning(
                    "Cannot create the index on device %s. "
                    "Total number of GPUs on this machine is "
                    "%s. Using gpu0 for the index.",
                    self._config.device,
                    faiss.get_num_gpus(),
                )
            config_class_name = self.INDEX_TYPE_TO_CONFIG.get(
                index_class.__name__
            )
            config = utils.get_class(
                config_class_name, module_paths=["faiss"]
            )()
            config.device = gpu_id
            self._index = index_class(gpu_resource, dim, config)

        else:
            index_class = utils.get_class(index_type, module_paths=["faiss"])
            self._index = index_class(dim)
예제 #21
0
def to_all_gpus(
        cpu_index: faiss.Index,
        co: Optional['faiss.GpuMultipleClonerOptions'] = None) -> faiss.Index:
    """
    TODO: docstring

    """

    n_gpus = faiss.get_num_gpus()
    assert n_gpus != 0, 'Attempting to move index to GPU without any GPUs'

    gpu_index = faiss.index_cpu_to_all_gpus(cpu_index, co=co)
    return gpu_index
예제 #22
0
파일: db.py 프로젝트: JCBrouwer/mmss
    def upgrade_indices(self, new_index_type="IDMap,IVF100,PQ8"):
        for column_name, index in self.indices.items():
            if faiss.get_num_gpus() > 0:
                index = faiss.index_cpu_to_gpu(faiss.StandardGpuResources(), self.rank, index)

            vectors = index.reconstruct_n(0, index.ntotal)
            ids = np.array([index.id_map.at(i) for i in range(index.id_map.size())])
            assert len(vectors) == len(ids)

            new_index = faiss.index_factory(vectors.shape[1], new_index_type)
            if faiss.get_num_gpus() > 0:
                new_index = faiss.index_cpu_to_gpu(faiss.StandardGpuResources(), self.rank, new_index)

            if not new_index.is_trained:
                new_index.train(vectors)

            new_index.add_with_ids(vectors, ids)

            if faiss.get_num_gpus() > 0:
                new_index = faiss.index_gpu_to_cpu(new_index)

            faiss.write_index(new_index, f"{self.directory}_new/{column_name}.index")
예제 #23
0
def IndexLoad(idx_path, nprobe=0, gpu=False):
    print('Reading FAISS index', file=sys.stderr)
    print(' - index: {:s}'.format(idx_path), file=sys.stderr)
    index = faiss.read_index(idx_path)
    print(' - found {:d} sentences of dim {:d}'.format(index.ntotal, index.d),
          file=sys.stderr)
    print(' - setting nbprobe to {:d}'.format(nprobe), file=sys.stderr)
    if gpu:
        print(' - transfer index to %d GPUs ' % faiss.get_num_gpus(),
              file=sys.stderr)
        index = faiss.index_cpu_to_all_gpus(index)  # co=co
        faiss.GpuParameterSpace().set_index_parameter(index, 'nprobe', nprobe)
    return index
예제 #24
0
파일: indexing.py 프로젝트: zmwebdev/LASER
def IndexLoad(idx_name, nprobe, gpu=False):
    print('Reading FAISS index')
    print(' - index: {:s}'.format(idx_name))
    index = faiss.read_index(idx_name)
    print(' - found {:d} sentences of dim {:d}'.format(index.ntotal, index.d))
    print(' - setting nbprobe to {:d}'.format(nprobe))
    if gpu:
        print(' - transfer index to %d GPUs ' % faiss.get_num_gpus())
        #co = faiss.GpuMultipleClonerOptions()
        #co.shard = True
        index = faiss.index_cpu_to_all_gpus(index)  # co=co
        faiss.GpuParameterSpace().set_index_parameter(index, 'nprobe', nprobe)
    return index
 def read_faiss_index_gpu(self, index_filepath):
     """
     Load a FAISS index. If we're on GPU, then convert it to GPU index
     :param index_filepath:
     :return:
     """
     print("read_faiss_index start.")
     index = faiss.read_index(index_filepath)
     if faiss.get_num_gpus():
         print("read_faiss_index: Converting FAISS index from CPU to GPU.")
         index = faiss.index_cpu_to_gpu(faiss.StandardGpuResources(), 0,
                                        index)
     return index
def make_index(sx, preproc=ident):
    N, p = sx.shape
    ngpu = faiss.get_num_gpus()

    if N < 1000:
        indextype = 'Flat'
    elif N < 10**6:
        indextype = 'GPUFlat'
    elif N < 100000:
        indextype = 'GPUIVFFlat'
    else:
        indextype = 'GPUIVFFlatShards'

    if (indextype == 'IVFFlat' or indextype == 'GPUIVFFlat'
            or indextype == 'GPUIVFFlatShards'):
        ncentroids = int(4 * np.floor(np.sqrt(N)))
        nprobe = 256
        print("using IndexIVFFlat with %d/%d centroids" % (nprobe, ncentroids))
        q = faiss.IndexFlatL2(p)
        index = faiss.IndexIVFFlat(q, p, ncentroids,
                                   faiss.METRIC_INNER_PRODUCT)
        if nprobe >= ncentroids * 3 / 4:
            nprobe = int(ncentroids * 3 / 4)
            print("  forcing nprobe to %d" % nprobe)
        index.nprobe = nprobe
        index.quantizer_no_dealloc = q
        if indextype.startswith('GPU') and ngpu > 0:
            index = move_index_to_gpu(index, indextype == 'GPUIVFFlatShards')
        ntrain = min(ncentroids * 100, N)
        print("prepare train set, size=%d" % ntrain)
        trainset = sx[:ntrain]
        trainset.max()  # force move to RAM
        print("train")
        index.train(trainset)

    elif indextype == 'GPUFlat' or indextype == 'Flat':
        index = faiss.IndexFlatIP(p)
        if indextype.startswith('GPU') and ngpu > 0:
            co = faiss.GpuMultipleClonerOptions()
            co.useFloat16 = True
            index = faiss.index_cpu_to_all_gpus(index, co)
    else:
        assert False

    bs = 16384
    for i0, i1, block in dataset_iterator(sx, preproc, bs):
        print("   add %d:%d / %d\r" % (i0, i1, N), end=' ')
        sys.stdout.flush()
        index.add(block)

    return index
예제 #27
0
def compute_GT_GPU(xb, xq, gt_sl):
    nq_gt, _ = xq.shape
    print("compute GT GPU")
    t0 = time.time()

    gt_I = np.zeros((nq_gt, gt_sl), dtype='int64')
    gt_D = np.zeros((nq_gt, gt_sl), dtype='float32')
    heaps = faiss.float_maxheap_array_t()
    heaps.k = gt_sl
    heaps.nh = nq_gt
    heaps.val = faiss.swig_ptr(gt_D)
    heaps.ids = faiss.swig_ptr(gt_I)
    heaps.heapify()
    bs = 10 ** 5
    # Please change this based on your GPU memory size.
    tempmem = 3500*1024*1024

    n, d = xb.shape
    xqs = sanitize(xq[:nq_gt])
 
    ngpu = faiss.get_num_gpus()
    gpu_resources = []

    for i in range(ngpu):
        res = faiss.StandardGpuResources()
        res.setTempMemory(tempmem)
        gpu_resources.append(res)

    vres = faiss.GpuResourcesVector()
    vdev = faiss.IntVector()
    for i in range(0, ngpu):
        vdev.push_back(i)
        vres.push_back(gpu_resources[i])

    db_gt = faiss.IndexFlatL2(d)
    db_gt_gpu = faiss.index_cpu_to_gpu_multiple(
        vres, vdev, db_gt)

    # compute ground-truth by blocks of bs, and add to heaps
    for i0, xsl in dataset_iterator(xb, IdentPreproc(d), bs):
        db_gt_gpu.add(xsl)
        D, I = db_gt_gpu.search(xqs, gt_sl)
        I += i0
        heaps.addn_with_ids(
            gt_sl, faiss.swig_ptr(D), faiss.swig_ptr(I), gt_sl)
        db_gt_gpu.reset()
    heaps.reorder()

    print("GT GPU time: {} s".format(time.time() - t0))
    return gt_I, gt_D
예제 #28
0
    def init_index(self):
        d = 128
        ngpus = faiss.get_num_gpus()

        print("number of GPUs:", ngpus)

        cpu_index = faiss.IndexFlatL2(d)

        self.gpu_index = faiss.index_cpu_to_all_gpus(  # build the index
            cpu_index
        )

        self.gpu_index.add(self.known_encoding_faces2)              # add vectors to the index
        print('index', self.gpu_index.ntotal)
예제 #29
0
    def build(self, use_gpu=False):
        self.vectors = np.array(self.vectors)

        faiss.normalize_L2(self.vectors)

        logging.info('Indexing {} vectors'.format(self.vectors.shape[0]))

        if self.vectors.shape[0] > 50000:
            num_centroids = 8 * int(
                math.sqrt(math.pow(2, int(math.log(self.vectors.shape[0],
                                                   2)))))

            logging.info('Using {} centroids'.format(num_centroids))

            self.index = faiss.index_factory(
                self.d, "IVF{}_HNSW32,Flat".format(num_centroids))

            ngpu = faiss.get_num_gpus()
            if ngpu > 0 and use_gpu:
                logging.info('Using {} GPUs'.format(ngpu))

                index_ivf = faiss.extract_index_ivf(self.index)
                clustering_index = faiss.index_cpu_to_all_gpus(
                    faiss.IndexFlatL2(self.d))
                index_ivf.clustering_index = clustering_index

            logging.info('Training index...')

            self.index.train(self.vectors)
        else:
            self.index = faiss.IndexFlatL2(self.d)
            if faiss.get_num_gpus() > 0 and use_gpu:
                self.index = faiss.index_cpu_to_all_gpus(self.index)

        logging.info('Adding vectors to index...')

        self.index.add(self.vectors)
def move_index_to_gpu(index, shard=False):
    ngpu = faiss.get_num_gpus()
    gpu_resources = [faiss.StandardGpuResources() for i in range(ngpu)]

    co = faiss.GpuMultipleClonerOptions()
    co.useFloat16 = True
    co.shard = shard
    co.shard_type = 1

    print("   moving to %d GPUs" % ngpu)
    t0 = time.time()
    index = faiss.index_cpu_to_gpu_multiple_py(gpu_resources, index, co)
    index.dont_dealloc_me = gpu_resources
    print("      done in %.3f s" % (time.time() - t0))
    return index
예제 #31
0
def init_index(known_encoding_faces2):
    known_encoding_faces2 = known_encoding_faces2.astype(np.float32)
    d = 512
    ngpus = faiss.get_num_gpus()

    print("number of GPUs:", ngpus)

    cpu_index = faiss.IndexFlatL2(d)

    gpu_index = faiss.index_cpu_to_all_gpus(  # build the index
        cpu_index)

    gpu_index.add(known_encoding_faces2)  # add vectors to the index
    print('index', gpu_index.ntotal)
    return gpu_index
예제 #32
0
파일: 4-GPU.py 프로젝트: wuhh/faiss
from __future__ import print_function
import numpy as np

d = 64                           # dimension
nb = 100000                      # database size
nq = 10000                       # nb of queries
np.random.seed(1234)             # make reproducible
xb = np.random.random((nb, d)).astype('float32')
xb[:, 0] += np.arange(nb) / 1000.
xq = np.random.random((nq, d)).astype('float32')
xq[:, 0] += np.arange(nq) / 1000.

import faiss                   # make faiss available

print("number of GPUs:", faiss.get_num_gpus())

index = faiss.IndexFlatL2(d)   # build the index

res = faiss.StandardGpuResources()

index = faiss.index_cpu_to_gpu(res, 0, index)

index.add(xb)                  # add vectors to the index
print(index.ntotal)

k = 4                          # we want to see 4 nearest neighbors
D, I = index.search(xq, k)     # actual search
print(I[:5])                   # neighbors of the 5 first queries
print(I[-5:])                  # neighbors of the 5 last queries
예제 #33
0
-knngraph          instead of the standard setup for the dataset,
                   compute a k-nn graph with nnn neighbors per element
-oI xx%d.npy       output the search result indices to this numpy file,
                   %d will be replaced with the nprobe
-oD xx%d.npy       output the search result distances to this file

"""
    sys.exit(1)


# default values

dbname = None
index_key = None

ngpu = faiss.get_num_gpus()

replicas = 1  # nb of replicas of sharded dataset
add_batch_size = 32768
query_batch_size = 16384
nprobes = [1 << l for l in range(9)]
knngraph = False
use_precomputed_tables = True
tempmem = -1  # if -1, use system default
max_add = -1
use_float16 = False
use_cache = True
nnn = 10
altadd = False
I_fname = None
D_fname = None