예제 #1
0
    def test_roundoff(self):
        # params that force use of BLAS implementation
        nb = 100
        nq = 25
        d = 4
        xb = np.zeros((nb, d), dtype='float32')

        xb[:, 0] = np.arange(nb) + 12345
        xq = xb[:nq] + 0.3

        index = faiss.IndexFlat(d)
        index.add(xb)

        D, I = index.search(xq, 1)

        # this does not work
        assert not np.all(I.ravel() == np.arange(nq))

        index = faiss.IndexPreTransform(
            faiss.CenteringTransform(d),
            faiss.IndexFlat(d))

        index.train(xb)
        index.add(xb)

        D, I = index.search(xq, 1)

        # this works
        assert np.all(I.ravel() == np.arange(nq))
예제 #2
0
    def test_OPQ(self):

        M = 4

        ev = Randu10kUnbalanced()
        d = ev.d
        index = faiss.IndexPQ(d, M, 8)

        res = ev.launch('PQ', index)
        e_pq = ev.evalres(res)

        index_pq = faiss.IndexPQ(d, M, 8)
        opq_matrix = faiss.OPQMatrix(d, M)
        # opq_matrix.verbose = true
        opq_matrix.niter = 10
        opq_matrix.niter_pq = 4
        index = faiss.IndexPreTransform(opq_matrix, index_pq)

        res = ev.launch('OPQ', index)
        e_opq = ev.evalres(res)

        print('e_pq=%s' % e_pq)
        print('e_opq=%s' % e_opq)

        # verify that OPQ better than PQ
        for r in 1, 10, 100:
            assert(e_opq[r] > e_pq[r])
예제 #3
0
    def test_OIVFPQ(self):
        # Parameters inverted indexes
        ncentroids = 50
        M = 4

        ev = Randu10kUnbalanced()
        d = ev.d
        quantizer = faiss.IndexFlatL2(d)
        index = faiss.IndexIVFPQ(quantizer, d, ncentroids, M, 8)
        index.nprobe = 5

        res = ev.launch('IVFPQ', index)
        e_ivfpq = ev.evalres(res)

        quantizer = faiss.IndexFlatL2(d)
        index_ivfpq = faiss.IndexIVFPQ(quantizer, d, ncentroids, M, 8)
        index_ivfpq.nprobe = 5
        opq_matrix = faiss.OPQMatrix(d, M)
        opq_matrix.niter = 10
        index = faiss.IndexPreTransform(opq_matrix, index_ivfpq)

        res = ev.launch('O+IVFPQ', index)
        e_oivfpq = ev.evalres(res)

        # verify same on OIVFPQ
        for r in 1, 10, 100:
            print(e_oivfpq[r], e_ivfpq[r])
            assert(e_oivfpq[r] >= e_ivfpq[r])
def index_patches(patches, index_file, pca_dims=64):

    # settings for faiss:
    num_lists, M, num_bits = 200, 16, 8

    # assertions:
    assert type(pca_dims) == int and pca_dims > 0
    if pca_dims > patches.shape[1]:
        print('WARNING: Input dimension < %d. Using fewer PCA dimensions.' % pca_dims)
        pca_dims = patches.shape[1] - (patches.shape[1] % M)

    # construct faiss index:
    quantizer = faiss.IndexFlatL2(pca_dims)
    assert pca_dims % M == 0
    sub_index = faiss.IndexIVFPQ(quantizer, pca_dims, num_lists, M, num_bits)
    pca_matrix = faiss.PCAMatrix(patches.shape[1], pca_dims, 0, True)
    faiss_index = faiss.IndexPreTransform(pca_matrix, sub_index)

    # train faiss index:
    patches = patches#.numpy()
    faiss_index.train(patches)
    faiss_index.add(patches)

    # save faiss index:
    print('| writing faiss index to %s' % index_file)
    faiss.write_index(faiss_index, index_file)
예제 #5
0
    def do_mmappedIO(self, sparse, in_pretransform=False):
        d = 10
        nb = 1000
        nq = 200
        nt = 200
        xt, xb, xq = get_dataset_2(d, nt, nb, nq)

        quantizer = faiss.IndexFlatL2(d)
        index1 = faiss.IndexIVFFlat(quantizer, d, 20)
        if sparse:
            # makes the inverted lists sparse because all elements get
            # assigned to the same invlist
            xt += (np.ones(10) * 1000).astype('float32')

        if in_pretransform:
            # make sure it still works when wrapped in an IndexPreTransform
            index1 = faiss.IndexPreTransform(index1)

        index1.train(xt)
        index1.add(xb)

        _, fname = tempfile.mkstemp()
        try:

            faiss.write_index(index1, fname)

            index2 = faiss.read_index(fname)
            self.compare_results(index1, index2, xq)

            index3 = faiss.read_index(fname, faiss.IO_FLAG_MMAP)
            self.compare_results(index1, index3, xq)
        finally:
            if os.path.exists(fname):
                os.unlink(fname)
def index_patches(patches, pca_dims=64):

    # settings for faiss:
    num_lists, M, num_bits = 200, 16, 8

    # assertions:
    assert torch.is_tensor(patches) and patches.dim() == 2
    assert type(pca_dims) == int and pca_dims > 0
    if pca_dims > patches.size(1):
        print('WARNING: Input dimension < %d. Using fewer PCA dimensions.' %
              pca_dims)
        pca_dims = patches.size(1) - (patches.size(1) % M)

    # construct faiss index:
    quantizer = faiss.IndexFlatL2(pca_dims)
    assert pca_dims % M == 0
    sub_index = faiss.IndexIVFPQ(quantizer, pca_dims, num_lists, M, num_bits)
    pca_matrix = faiss.PCAMatrix(patches.size(1), pca_dims, 0, True)
    faiss_index = faiss.IndexPreTransform(pca_matrix, sub_index)

    # train faiss index:
    patches = patches.numpy()
    faiss_index.train(patches)
    faiss_index.add(patches)
    return faiss_index, sub_index
예제 #7
0
 def make_index():
     quantizer = faiss.IndexFlatIP(dim)
     index = faiss.IndexIVFFlat(quantizer, dim, nlist)
     if pca:
         # No idea what eigen_power: float or random_rotation: bool arguments of PCAMatrix do
         pca_matrix = faiss.PCAMatrix(in_dim, dim)
         index = faiss.IndexPreTransform(pca_matrix, index)
     return index
def train_index(start_data,
                quantizer_path,
                trained_index_path,
                num_clusters,
                fine_quant='SQ4',
                cuda=False,
                hnsw=False):
    ds = start_data.shape[1]
    quantizer = faiss.IndexFlatIP(ds)

    # Used only for reimplementation
    if fine_quant == 'SQ4':
        start_index = faiss.IndexIVFScalarQuantizer(
            quantizer, ds, num_clusters, faiss.ScalarQuantizer.QT_4bit,
            faiss.METRIC_INNER_PRODUCT)

    # Default index type
    elif 'OPQ' in fine_quant:
        code_size = int(fine_quant[fine_quant.index('OPQ') + 3:])
        if hnsw:
            start_index = faiss.IndexHNSWPQ(ds, "HNSW32,PQ96",
                                            faiss.METRIC_INNER_PRODUCT)
        else:
            opq_matrix = faiss.OPQMatrix(ds, code_size)
            opq_matrix.niter = 10
            sub_index = faiss.IndexIVFPQ(quantizer, ds, num_clusters,
                                         code_size, 8,
                                         faiss.METRIC_INNER_PRODUCT)
            start_index = faiss.IndexPreTransform(opq_matrix, sub_index)
    elif 'none' in fine_quant:
        start_index = faiss.IndexFlatIP(ds)
    else:
        raise ValueError(fine_quant)

    start_index.verbose = False
    if cuda:
        # Convert to GPU index
        res = faiss.StandardGpuResources()
        co = faiss.GpuClonerOptions()
        co.useFloat16 = True
        gpu_index = faiss.index_cpu_to_gpu(res, 0, start_index, co)
        gpu_index.verbose = False

        # Train on GPU and back to CPU
        gpu_index.train(start_data)
        start_index = faiss.index_gpu_to_cpu(gpu_index)
    else:
        start_index.train(start_data)

    # Make sure to set direct map again
    if 'none' not in fine_quant:
        index_ivf = faiss.extract_index_ivf(start_index)
        index_ivf.make_direct_map()
        index_ivf.set_direct_map_type(faiss.DirectMap.Hashtable)
    faiss.write_index(start_index, trained_index_path)
예제 #9
0
 def test_IndexPreTransform(self):
     ltrans = faiss.NormalizationTransform(d)
     sub_index = faiss.IndexFlatL2(d)
     index = faiss.IndexPreTransform(ltrans, sub_index)
     index.add(xb)
     del ltrans
     gc.collect()
     index.add(xb)
     del sub_index
     gc.collect()
     index.add(xb)
예제 #10
0
 def test_IndexPreTransform_2(self):
     sub_index = faiss.IndexFlatL2(d)
     index = faiss.IndexPreTransform(sub_index)
     ltrans = faiss.NormalizationTransform(d)
     index.prepend_transform(ltrans)
     index.add(xb)
     del ltrans
     gc.collect()
     index.add(xb)
     del sub_index
     gc.collect()
     index.add(xb)
예제 #11
0
    def fit(self, X):
        nlist = self.params['nlist']
        nprobe = self.params['nprobe']
        m = self.params['m']
        b = self.params['b']
        h, w = X.shape
        d = int((w + m - 1) / m) * m
        self.remapper = faiss.RemapDimensionsTransform(w, d, True)
        self.quantizer = faiss.IndexFlatL2(d)
        self.index_pq = faiss.IndexIVFPQ(self.quantizer, d, nlist, m, b)
        self.index = faiss.IndexPreTransform(self.remapper, self.index_pq)
        self.index.train(X.astype('float32'))

        self.index.add(X.astype('float32'))
        self.index.nprobe = nprobe
예제 #12
0
    def __init__(self, d):
        d2 = 256
        nlist = 100  # numCentroids
        m = 8  # numQuantizers

        coarse_quantizer = faiss.IndexFlatL2(d2)
        sub_index = faiss.IndexIVFPQ(coarse_quantizer, d2, nlist, 16, 8)
        pca_matrix = faiss.PCAMatrix(d, d2, 0, True)
        self.index2 = faiss.IndexPreTransform(pca_matrix, sub_index)

        sub_index.own_fields = True
        coarse_quantizer.this.disown()

        self.sub_index = sub_index
        self.pca_matrix = pca_matrix

        self.index2.nprobe = 10
예제 #13
0
def index_factory(d: int,
                  index_key: str,
                  metric_type: int,
                  ef_construction: Optional[int] = None):
    """
    custom index_factory that fix some issues of
    faiss.index_factory with inner product metrics.
    """

    if metric_type == faiss.METRIC_INNER_PRODUCT:

        # make the index described by the key
        if any(re.findall(r"OPQ\d+_\d+,IVF\d+,PQ\d+", index_key)):
            params = [int(x) for x in re.findall(r"\d+", index_key)]

            cs = params[3]  # code size (in Bytes if nbits=8)
            nbits = params[4] if len(params) == 5 else 8  # default value
            ncentroids = params[2]
            out_d = params[1]
            M_OPQ = params[0]

            quantizer = faiss.index_factory(out_d, "Flat", metric_type)
            assert quantizer.metric_type == metric_type
            index_ivfpq = faiss.IndexIVFPQ(quantizer, out_d, ncentroids, cs,
                                           nbits, metric_type)
            assert index_ivfpq.metric_type == metric_type
            index_ivfpq.own_fields = True
            quantizer.this.disown()  # pylint: disable = no-member
            opq_matrix = faiss.OPQMatrix(d, M=M_OPQ, d2=out_d)
            # opq_matrix.niter = 50 # Same as default value
            index = faiss.IndexPreTransform(opq_matrix, index_ivfpq)
        elif any(re.findall(r"OPQ\d+_\d+,IVF\d+_HNSW\d+,PQ\d+", index_key)):
            params = [int(x) for x in re.findall(r"\d+", index_key)]

            M_HNSW = params[3]
            cs = params[4]  # code size (in Bytes if nbits=8)
            nbits = params[5] if len(params) == 6 else 8  # default value
            ncentroids = params[2]
            out_d = params[1]
            M_OPQ = params[0]

            quantizer = faiss.IndexHNSWFlat(out_d, M_HNSW, metric_type)
            if ef_construction is not None and ef_construction >= 1:
                quantizer.hnsw.efConstruction = ef_construction
            assert quantizer.metric_type == metric_type
            index_ivfpq = faiss.IndexIVFPQ(quantizer, out_d, ncentroids, cs,
                                           nbits, metric_type)
            assert index_ivfpq.metric_type == metric_type
            index_ivfpq.own_fields = True
            quantizer.this.disown()  # pylint: disable = no-member
            opq_matrix = faiss.OPQMatrix(d, M=M_OPQ, d2=out_d)
            # opq_matrix.niter = 50 # Same as default value
            index = faiss.IndexPreTransform(opq_matrix, index_ivfpq)

        elif any(re.findall(r"Pad\d+,IVF\d+_HNSW\d+,PQ\d+", index_key)):
            params = [int(x) for x in re.findall(r"\d+", index_key)]

            out_d = params[0]
            M_HNSW = params[2]
            cs = params[3]  # code size (in Bytes if nbits=8)
            nbits = params[4] if len(params) == 5 else 8  # default value
            ncentroids = params[1]

            remapper = faiss.RemapDimensionsTransform(d, out_d, True)

            quantizer = faiss.IndexHNSWFlat(out_d, M_HNSW, metric_type)
            if ef_construction is not None and ef_construction >= 1:
                quantizer.hnsw.efConstruction = ef_construction
            index_ivfpq = faiss.IndexIVFPQ(quantizer, out_d, ncentroids, cs,
                                           nbits, metric_type)
            index_ivfpq.own_fields = True
            quantizer.this.disown()  # pylint: disable = no-member

            index = faiss.IndexPreTransform(remapper, index_ivfpq)
        elif any(re.findall(r"HNSW\d+", index_key)):
            params = [int(x) for x in re.findall(r"\d+", index_key)]
            M_HNSW = params[0]
            index = faiss.IndexHNSWFlat(d, M_HNSW, metric_type)
            assert index.metric_type == metric_type
        elif index_key == "Flat":
            index = faiss.index_factory(d, index_key, metric_type)
        else:
            index = faiss.index_factory(d, index_key, metric_type)
            raise ValueError((
                "Be careful, faiss might not create what you expect when using the "
                "inner product similarity metric, remove this line to try it anyway."
                "Happened with index_key: " + str(index_key)))

    else:
        index = faiss.index_factory(d, index_key, metric_type)

    return index
예제 #14
0
ncent, d = centroids.shape

print('apply random rotation')
rrot = faiss.RandomRotationMatrix(d, d)
rrot.init(1234)
centroids = rrot.apply_py(centroids)

print('make HNSW index as quantizer')
quantizer = faiss.IndexHNSWFlat(d, 32)
quantizer.hnsw.efSearch = 1024
quantizer.hnsw.efConstruction = 200
quantizer.add(centroids)

print('build index')
index = faiss.IndexPreTransform(
    rrot,
    faiss.IndexIVFScalarQuantizer(quantizer, d, ncent,
                                  faiss.ScalarQuantizer.QT_6bit))


def ivecs_mmap(fname):
    a = np.memmap(fname, dtype='int32', mode='r')
    d = a[0]
    return a.reshape(-1, d + 1)[:, 1:]


def fvecs_mmap(fname):
    return ivecs_mmap(fname).view('float32')


print('finish training index')
xt = fvecs_mmap(deep1bdir + 'learn.fvecs')