Beispiel #1
0
    def test_wrapped_quantizer_HNSW(self):
        faiss.omp_set_num_threads(1)

        def bin2float(v):
            def byte2float(byte):
                return np.array(
                    [-1.0 + 2.0 * (byte & (1 << b) != 0) for b in range(0, 8)])

            return np.hstack([byte2float(byte)
                              for byte in v]).astype('float32')

        def floatvec2nparray(v):
            return np.array([np.float32(v.at(i)) for i in range(0, v.size())]) \
                     .reshape(-1, d)

        d = 256
        nt = 12800
        nb = 10000
        nq = 500
        (xt, xb, xq) = make_binary_dataset(d, nb, nt, nq)

        index_ref = faiss.IndexBinaryFlat(d)

        index_ref.add(xb)

        nlist = 256
        clus = faiss.Clustering(d, nlist)
        clus_index = faiss.IndexFlatL2(d)

        xt_f = np.array([bin2float(v) for v in xt])
        clus.train(xt_f, clus_index)

        centroids = floatvec2nparray(clus.centroids)
        hnsw_quantizer = faiss.IndexHNSWFlat(d, 32)
        hnsw_quantizer.add(centroids)
        hnsw_quantizer.is_trained = True
        wrapped_quantizer = faiss.IndexBinaryFromFloat(hnsw_quantizer)

        assert nlist == hnsw_quantizer.ntotal
        assert nlist == wrapped_quantizer.ntotal
        assert wrapped_quantizer.is_trained

        index = faiss.IndexBinaryIVF(wrapped_quantizer, d,
                                     hnsw_quantizer.ntotal)
        index.nprobe = 128

        assert index.is_trained

        index.add(xb)

        D_ref, I_ref = index_ref.search(xq, 10)
        D, I = index.search(xq, 10)

        recall = sum(gti[0] in Di[:10] for gti, Di in zip(D_ref, D)) \
                 / float(D_ref.shape[0])

        assert recall > 0.77, "recall = %g" % recall
Beispiel #2
0
    def test_ivf_flat2(self):
        d = self.xq.shape[1] * 8

        quantizer = faiss.IndexBinaryFlat(d)
        index = faiss.IndexBinaryIVF(quantizer, d, 8)
        index.cp.min_points_per_centroid = 5  # quiet warning
        index.nprobe = 4
        index.train(self.xt)
        index.add(self.xb)
        Divfflat, _ = index.search(self.xq, 10)

        self.assertEqual((self.Dref == Divfflat).sum(), 4122)
Beispiel #3
0
    def test_ivf_flat_empty(self):
        d = self.xq.shape[1] * 8

        index = faiss.IndexBinaryIVF(faiss.IndexBinaryFlat(d), d, 8)
        index.train(self.xt)

        for use_heap in [True, False]:
            index.use_heap = use_heap
            Divfflat, Iivfflat = index.search(self.xq, 10)

            assert (np.all(Iivfflat == -1))
            assert (np.all(Divfflat == 2147483647))  # NOTE(hoss): int32_t max
Beispiel #4
0
    def test_ivf_flat_exhaustive(self):
        d = self.xq.shape[1] * 8

        quantizer = faiss.IndexBinaryFlat(d)
        index = faiss.IndexBinaryIVF(quantizer, d, 8)
        index.cp.min_points_per_centroid = 5  # quiet warning
        index.nprobe = 8
        index.train(self.xt)
        index.add(self.xb)
        Divfflat, _ = index.search(self.xq, 10)

        np.testing.assert_array_equal(self.Dref, Divfflat)
def train():
    all_data = np.array(get_all_data())
    if len(all_data) == 0:
        print("No images. exit()")
        exit()
    d = 32 * 8
    centroids = round(sqrt(all_data.shape[0]))
    print(f'centroids: {centroids}')
    quantizer = faiss.IndexBinaryFlat(d)
    index = faiss.IndexBinaryIVF(quantizer, d, centroids)
    index.nprobe = 8
    index.train(all_data)
    faiss.write_index_binary(index, "./" + "trained_import.index")
    def test_wrapped_quantizer_HNSW(self):
        def bin2float2d(v):
            n, d = v.shape
            vf = ((v.reshape(-1, 1) >> np.arange(8)) & 1).astype("float32")
            vf *= 2
            vf -= 1
            return vf.reshape(n, d * 8)

        d = 256
        nt = 12800
        nb = 10000
        nq = 500
        (xt, xb, xq) = make_binary_dataset(d, nb, nt, nq)

        index_ref = faiss.IndexBinaryFlat(d)

        index_ref.add(xb)

        nlist = 256
        clus = faiss.Clustering(d, nlist)
        clus_index = faiss.IndexFlatL2(d)

        xt_f = bin2float2d(xt)
        clus.train(xt_f, clus_index)

        centroids = faiss.vector_to_array(clus.centroids).reshape(-1, clus.d)
        hnsw_quantizer = faiss.IndexHNSWFlat(d, 32)
        hnsw_quantizer.add(centroids)
        hnsw_quantizer.is_trained = True
        wrapped_quantizer = faiss.IndexBinaryFromFloat(hnsw_quantizer)

        assert nlist == hnsw_quantizer.ntotal
        assert nlist == wrapped_quantizer.ntotal
        assert wrapped_quantizer.is_trained

        index = faiss.IndexBinaryIVF(wrapped_quantizer, d,
                                     hnsw_quantizer.ntotal)
        index.nprobe = 128

        assert index.is_trained

        index.add(xb)

        D_ref, I_ref = index_ref.search(xq, 10)
        D, I = index.search(xq, 10)

        recall = sum(gti[0] in Di[:10] for gti, Di in zip(D_ref, D)) \
                 / float(D_ref.shape[0])

        assert recall >= 0.77, "recall = %g" % recall
Beispiel #7
0
    def test_ivf_flat2(self):
        d = self.xq.shape[1] * 8

        quantizer = faiss.IndexBinaryFlat(d)
        index = faiss.IndexBinaryIVF(quantizer, d, 8)
        index.cp.min_points_per_centroid = 5    # quiet warning
        index.nprobe = 4
        index.train(self.xt)
        index.add(self.xb)
        Divfflat, _ = index.search(self.xq, 10)

        # Some centroids are equidistant from the query points.
        # So the answer will depend on the implementation of the heap.
        self.assertGreater((self.Dref == Divfflat).sum(), 4100)
Beispiel #8
0
    def test_ivf_reconstruction(self):
        d = self.xq.shape[1] * 8
        quantizer = faiss.IndexBinaryFlat(d)
        index = faiss.IndexBinaryIVF(quantizer, d, 8)
        index.cp.min_points_per_centroid = 5  # quiet warning
        index.nprobe = 4
        index.train(self.xt)

        index.add(self.xb)
        index.set_direct_map_type(faiss.DirectMap.Array)

        for i in range(0, len(self.xb), 13):
            np.testing.assert_array_equal(index.reconstruct(i), self.xb[i])

        # try w/ hashtable
        index = faiss.IndexBinaryIVF(quantizer, d, 8)
        rs = np.random.RandomState(123)
        ids = rs.choice(10000, size=len(self.xb), replace=False)
        index.add_with_ids(self.xb, ids)
        index.set_direct_map_type(faiss.DirectMap.Hashtable)

        for i in range(0, len(self.xb), 13):
            np.testing.assert_array_equal(index.reconstruct(int(ids[i])),
                                          self.xb[i])
Beispiel #9
0
def init_index():
    global index
    try:
        index = faiss.read_index_binary("trained.index")
    except:
        d = 32 * 8
        quantizer = faiss.IndexBinaryFlat(d)
        index = faiss.IndexBinaryIVF(quantizer, d, 1)
        index.nprobe = 1
        index.train(np.array([np.zeros(32)], dtype=np.uint8))
    all_data = get_all_data()
    image_ids = np.array([np.int64(x[0]) for x in all_data])
    phashes = np.array([x[1] for x in all_data])
    if len(all_data) != 0:
        index.add_with_ids(phashes, image_ids)
    print("Index is ready")
Beispiel #10
0
def train():
    all_descriptors=[]
    all_data=import_get_all_data()
    if len(all_data)==0:
        print("No images. exit()")
        exit()
    for x in all_data:
        all_descriptors.append(x[1])
    all_descriptors=np.concatenate(all_descriptors, axis=0)

    d=61*8
    centroids = round(sqrt(all_descriptors.shape[0]))
    print(f'centroids: {centroids}')
    quantizer = faiss.IndexBinaryFlat(d)
    index = faiss.IndexBinaryIVF(quantizer, d, centroids)
    index.nprobe = 8
    index.train(all_descriptors)
    faiss.write_index_binary(index, "./" + "trained_import.index")
Beispiel #11
0
def train():
    all_descriptors = []
    all_ids = get_all_ids()
    if len(all_ids) == 0:
        print("No images. exit()")
        exit()
    for id in all_ids:
        x = convert_array(get_akaze_features_by_id(id))
        all_descriptors.append(x)
    all_descriptors = np.concatenate(all_descriptors, axis=0)

    d = 61 * 8
    centroids = round(sqrt(all_descriptors.shape[0]))
    print(f'centroids: {centroids}')
    quantizer = faiss.IndexBinaryFlat(d)
    index = faiss.IndexBinaryIVF(quantizer, d, centroids)
    index.nprobe = 8
    index.train(all_descriptors)
    faiss.write_index_binary(index, "./" + "trained.index")
Beispiel #12
0
    def test_ivf_range(self):
        d = self.xq.shape[1] * 8

        quantizer = faiss.IndexBinaryFlat(d)
        index = faiss.IndexBinaryIVF(quantizer, d, 8)
        index.cp.min_points_per_centroid = 5    # quiet warning
        index.nprobe = 4
        index.train(self.xt)
        index.add(self.xb)
        D, I = index.search(self.xq, 10)

        radius = int(np.median(D[:, -1]) + 1)
        Lr, Dr, Ir = index.range_search(self.xq, radius)

        for i in range(len(self.xq)):
            res = Ir[Lr[i]:Lr[i + 1]]
            if D[i, -1] < radius:
                self.assertTrue(set(I[i]) <= set(res))
            else:
                subset = I[i, D[i, :] < radius]
                self.assertTrue(set(subset) == set(res))
def init_index():
    global index, POINT_ID
    try:
        index = faiss.read_index_binary("trained.index")
    except:  #temporary index
        d = 61 * 8
        quantizer = faiss.IndexBinaryFlat(d)
        index = faiss.IndexBinaryIVF(quantizer, d, 1)
        index.nprobe = 1
        index.train(np.array([np.zeros(61)], dtype=np.uint8))
    all_ids = get_all_ids()
    for image_id in tqdm(all_ids):
        features = convert_array(get_akaze_features_by_id(image_id))
        point_ids = np.arange(start=POINT_ID,
                              stop=POINT_ID + len(features),
                              dtype=np.int64)
        for point_id in point_ids:
            point_id_to_image_id_map[point_id] = image_id
        image_id_to_point_ids_map[image_id] = point_ids
        POINT_ID += len(features)
        index.add_with_ids(features, point_ids)
    print("Index is ready")
Beispiel #14
0
    def test_ivf_flat(self):
        d = self.xq.shape[1] * 8

        quantizer = faiss.IndexBinaryFlat(d)
        index = faiss.IndexBinaryIVF(quantizer, d, 8)
        index.cp.min_points_per_centroid = 5  # quiet warning
        index.nprobe = 4
        index.train(self.xt)
        index.add(self.xb)
        D, I = index.search(self.xq, 3)

        tmpnam = tempfile.NamedTemporaryFile().name
        try:
            faiss.write_index_binary(index, tmpnam)

            index2 = faiss.read_index_binary(tmpnam)

            D2, I2 = index2.search(self.xq, 3)

            assert (I2 == I).all()
            assert (D2 == D).all()

        finally:
            os.remove(tmpnam)
Beispiel #15
0
 def test_binary_ivf(self):
     index = faiss.IndexBinaryIVF(faiss.IndexBinaryFlat(dbin), dbin, 10)
     gc.collect()
     index.train(xtbin)
Beispiel #16
0
import faiss
import numpy as np

objects = np.array([[1, 1, 2, 1], [5, 4, 6, 5], [1, 2, 1, 2]], dtype=np.uint8)

quantizer = faiss.IndexBinaryFlat(32)
index = faiss.IndexBinaryIVF(quantizer, 32, 2)
index.train(objects)
index.add(objects)
distances, ids = index.search(objects, 3)

print(distances)
print(ids)
Beispiel #17
0
 def create_cpu(dim):
     quantizer = faiss.IndexBinaryFlat(dim)
     return faiss.IndexBinaryIVF(quantizer, dim, centroids)