コード例 #1
0
 def setUpClass(cls) -> None:
     nprobe = 10
     cls.CONFIG = FaissServiceConfig(nprobe=nprobe, normalize_query=False)
     cls.CONFIG_NORM = FaissServiceConfig(nprobe=nprobe,
                                          normalize_query=True)
     cls.FAISS_CONFIG = FaissConfig(dim=64, db_size=100000, nlist=100)
     cls.INDEX = cls.create_index()
     cls.SERVICE = faiss_pb2.DESCRIPTOR.services_by_name['FaissService']
     # faiss index must be cloned, because index attribute will be changed
     # FaissServiceServicer constructor (e.g. nprobe)
     cls.SERVER = grpc_testing.server_from_dictionary(
         {
             cls.SERVICE:
             FaissServiceServicer(faiss.clone_index(cls.INDEX), cls.CONFIG)
         },
         grpc_testing.strict_real_time(),
     )
     # server for normalize_query is True
     cls.SERVER_NORM = grpc_testing.server_from_dictionary(
         {
             cls.SERVICE:
             FaissServiceServicer(faiss.clone_index(cls.INDEX),
                                  cls.CONFIG_NORM)
         },
         grpc_testing.strict_real_time(),
     )
     # set nprobe, after complete cloning index
     cls.INDEX.nprobe = nprobe
コード例 #2
0
ファイル: test_factory.py プロジェクト: ifeherva/faiss
 def test_clone(self):
     index = faiss.index_factory(16, 'IVF10,PQ4np')
     xb = faiss.rand((1000, 16))
     index.train(xb)
     index.add(xb)
     index2 = faiss.clone_index(index)
     assert index2.ntotal == index.ntotal
コード例 #3
0
ファイル: test_factory.py プロジェクト: ifeherva/faiss
 def test_clone_size(self):
     index = faiss.index_factory(20, 'PCA10,Flat')
     xb = faiss.rand((100, 20))
     index.train(xb)
     index.add(xb)
     index2 = faiss.clone_index(index)
     assert index2.ntotal == 100
コード例 #4
0
    def test_failed_different_nprobe_Search(self) -> None:
        # k must be set large value,
        # becauseof avoiding to miss error case came from small nprobe value.
        # if both nprobe and k are set small, search result would be same, even
        # if failed to set nprobe on server side.
        k = 1000
        val = np.ones(self.FAISS_CONFIG.dim, dtype=np.float32)
        vector = Vector(val=val)
        request = SearchRequest(query=vector, k=k)
        rpc = self.SERVER.invoke_unary_unary(
            self.method_descriptor_by_name(ServiceMethodDescriptor.search),
            (),
            request,
            None,
        )

        # set different nprobe
        index = faiss.clone_index(self.INDEX)
        index.nprobe = 1
        distances, ids = index.search(np.atleast_2d(val), k)
        unexpected = SearchResponse(
            neighbors=self.to_neighbors(distances, ids))

        response, _, code, _ = rpc.termination()

        self.assertNotEqual(response, unexpected)
        self.assertIs(code, grpc.StatusCode.OK)
コード例 #5
0
ファイル: index_builder.py プロジェクト: Ljferrer/SimSent
    def make_mmap_index(self, base_index: BASE_INDEX, ids: np.array,
                        embs: np.array):
        # Get invlists
        index = faiss.clone_index(base_index)
        index.add_with_ids(embs, ids)
        ivf_vector = faiss.InvertedListsPtrVector()
        ivf_vector.push_back(index.invlists)
        index.own_invlists = False
        del index
        gc.collect()

        # Make MMAP ivfdata
        index_name = p.abspath(self.sub_dir / f'{self.seed_name}')
        invlists = faiss.OnDiskInvertedLists(base_index.nlist,
                                             base_index.code_size,
                                             f'{index_name}.ivfdata')
        ntotal = invlists.merge_from(ivf_vector.data(), ivf_vector.size())

        # Link index to ivfdata and save
        index = faiss.clone_index(base_index)
        index.ntotal = ntotal
        index.replace_invlists(invlists)
        faiss.write_index(index, f'{index_name}.index')
コード例 #6
0
    def _execute_map(cls, ctx, op):
        (data, ), device_id, xp = as_same_device([ctx[op.inputs[0].key]],
                                                 device=op.device,
                                                 ret_extra=True)
        index = ctx[op.inputs[1].key] if len(op.inputs) == 2 else None

        with device(device_id):
            data = xp.ascontiguousarray(data)
            if index is not None:
                # fetch the trained index
                trained_index = _load_index(ctx, op, index, device_id)
                return_index_type = _get_index_type(op.return_index_type, ctx)
                if return_index_type == 'object':
                    # clone a new one,
                    # because faiss does not ensure thread-safe for operations that change index
                    # https://github.com/facebookresearch/faiss/wiki/Threads-and-asynchronous-calls#thread-safety
                    trained_index = faiss.clone_index(trained_index)
            else:
                trained_index = faiss.index_factory(data.shape[1],
                                                    op.faiss_index,
                                                    op.faiss_metric_type)
                if op.same_distribution:
                    # no need to train, just create index
                    pass
                else:
                    # distribution no the same, train on each chunk
                    trained_index.train(data)

                if device_id >= 0:  # pragma: no cover
                    trained_index = _index_to_gpu(trained_index, device_id)
            if op.metric == 'cosine':
                # faiss does not support cosine distances directly,
                # data needs to be normalize before adding to index,
                # refer to:
                # https://github.com/facebookresearch/faiss/wiki/FAQ#how-can-i-index-vectors-for-cosine-distance
                faiss.normalize_L2(data)

            # add data into index
            if device_id >= 0:  # pragma: no cover
                # gpu
                trained_index.add_c(data.shape[0],
                                    _swig_ptr_from_cupy_float32_array(data))
            else:
                trained_index.add(data)

            ctx[op.outputs[0].key] = _store_index(ctx, op, trained_index,
                                                  device_id)
コード例 #7
0
ファイル: test_index.py プロジェクト: ifeherva/faiss
    def subtest_io_and_clone(self, index, Dnsg, Insg):
        fd, tmpfile = tempfile.mkstemp()
        os.close(fd)
        try:
            faiss.write_index(index, tmpfile)
            index2 = faiss.read_index(tmpfile)
        finally:
            if os.path.exists(tmpfile):
                os.unlink(tmpfile)

        Dnsg2, Insg2 = index2.search(self.xq, 1)
        np.testing.assert_array_equal(Dnsg2, Dnsg)
        np.testing.assert_array_equal(Insg2, Insg)

        # also test clone
        index3 = faiss.clone_index(index)
        Dnsg3, Insg3 = index3.search(self.xq, 1)
        np.testing.assert_array_equal(Dnsg3, Dnsg)
        np.testing.assert_array_equal(Insg3, Insg)
コード例 #8
0
ファイル: test_index.py プロジェクト: zhyq/faiss
    def io_and_retest(self, index, Dhnsw, Ihnsw):
        _, tmpfile = tempfile.mkstemp()
        try:
            faiss.write_index(index, tmpfile)
            index2 = faiss.read_index(tmpfile)
        finally:
            if os.path.exists(tmpfile):
                os.unlink(tmpfile)

        Dhnsw2, Ihnsw2 = index2.search(self.xq, 1)

        self.assertTrue(np.all(Dhnsw2 == Dhnsw))
        self.assertTrue(np.all(Ihnsw2 == Ihnsw))

        # also test clone
        index3 = faiss.clone_index(index)
        Dhnsw3, Ihnsw3 = index3.search(self.xq, 1)

        self.assertTrue(np.all(Dhnsw3 == Dhnsw))
        self.assertTrue(np.all(Ihnsw3 == Ihnsw))
コード例 #9
0
ファイル: index.py プロジェクト: Bug88/decoration_remove
def indexing(feats, pos, imgID):

    feats_np = np.zeros(shape=(len(feats), feats[0].shape[0]))
    pos_np = np.zeros(shape=(len(feats), 2))
    imgID_np = np.zeros(shape=(len(feats), 1))

    for i in range(len(feats)):
        feats_np[i, :] = feats[i]
        pos_np[i, :] = pos[i]
        imgID_np[i, :] = imgID[i]

    # construct the visual vocabulary
    voc_size = const_params.__voc_size__

    niter = 20
    verbose = False
    d = feats[0].shape[0]

    code_size = 8
    quantizer = faiss.IndexFlatL2(d)  # this remains the same
    index_ = faiss.IndexIVFPQ(quantizer, d, voc_size, code_size, 8)
                                  # 8 specifies that each sub-vector is encoded as 8 bits
    index_.train(feats_np.astype('float32'))
    index_.add(feats_np.astype('float32'))
    index_.nprobe = 5

    #faiss.write_index(faiss.clone_index(index_), '../index.faiss')

    #fp = open('../query.pkl', 'r')
    #des, pt = pickle.load(fp)
    #fp.close()

    #q = np.asarray(des).astype('float32')
    #D, I = index_.search(q, 10)

    return [faiss.clone_index(index_), pos_np, imgID_np]
コード例 #10
0
 def approximate_batch(self, vectors, output_path):
     if self.faiss_index is None:
         self.load()
     cloned_index = faiss.clone_index(self.faiss_index)
     cloned_index.add(vectors)
     faiss.write_index(cloned_index, str(output_path))
コード例 #11
0
ファイル: index.py プロジェクト: Bug88/decoration_remove
    fp = open('../database.pkl', 'r')
    feats, pos, imgID, filenames, featNum = pickle.load(fp)
    fp.close()

    index_, pos_np, imgID_np = indexing(feats, pos, imgID)

    #fp = open('../query.pkl', 'r')
    #des, pt = pickle.load(fp)
    #fp.close()

    #imgNum = 500000

    #q = np.asarray(des).astype('float32')
    #D, I = index_.search(q, 10)


    fp = open('../index.pkl', 'w')
    pickle.dump([pos_np, imgID_np], fp)
    fp.close()

    faiss.write_index(faiss.clone_index(index_), '../index.faiss')

    print('testing...............')

    print('=======================')

    print('the number of points {0}'.format(imgID_np.shape[0]))
    print('the pos is {0}'.format(pos_np[0, :]))

    print('=======================')