Ejemplo n.º 1
0
    def test_OIVFPQ(self):
        # Parameters inverted indexes
        ncentroids = 50
        M = 4

        ev = Randu10kUnbalanced()
        d = ev.d
        quantizer = faiss.IndexFlatL2(d)
        index = faiss.IndexIVFPQ(quantizer, d, ncentroids, M, 8)
        index.nprobe = 5

        res = ev.launch('IVFPQ', index)
        e_ivfpq = ev.evalres(res)

        quantizer = faiss.IndexFlatL2(d)
        index_ivfpq = faiss.IndexIVFPQ(quantizer, d, ncentroids, M, 8)
        index_ivfpq.nprobe = 5
        opq_matrix = faiss.OPQMatrix(d, M)
        opq_matrix.niter = 10
        index = faiss.IndexPreTransform(opq_matrix, index_ivfpq)

        res = ev.launch('O+IVFPQ', index)
        e_oivfpq = ev.evalres(res)

        # verify same on OIVFPQ
        for r in 1, 10, 100:
            print(e_oivfpq[r], e_ivfpq[r])
            assert(e_oivfpq[r] >= e_ivfpq[r])
Ejemplo n.º 2
0
    def test_OPQ(self):

        M = 4

        ev = Randu10kUnbalanced()
        d = ev.d
        index = faiss.IndexPQ(d, M, 8)

        res = ev.launch('PQ', index)
        e_pq = ev.evalres(res)

        index_pq = faiss.IndexPQ(d, M, 8)
        opq_matrix = faiss.OPQMatrix(d, M)
        # opq_matrix.verbose = true
        opq_matrix.niter = 10
        opq_matrix.niter_pq = 4
        index = faiss.IndexPreTransform(opq_matrix, index_pq)

        res = ev.launch('OPQ', index)
        e_opq = ev.evalres(res)

        print('e_pq=%s' % e_pq)
        print('e_opq=%s' % e_opq)

        # verify that OPQ better than PQ
        for r in 1, 10, 100:
            assert(e_opq[r] > e_pq[r])
def train_index(start_data,
                quantizer_path,
                trained_index_path,
                num_clusters,
                fine_quant='SQ4',
                cuda=False,
                hnsw=False):
    ds = start_data.shape[1]
    quantizer = faiss.IndexFlatIP(ds)

    # Used only for reimplementation
    if fine_quant == 'SQ4':
        start_index = faiss.IndexIVFScalarQuantizer(
            quantizer, ds, num_clusters, faiss.ScalarQuantizer.QT_4bit,
            faiss.METRIC_INNER_PRODUCT)

    # Default index type
    elif 'OPQ' in fine_quant:
        code_size = int(fine_quant[fine_quant.index('OPQ') + 3:])
        if hnsw:
            start_index = faiss.IndexHNSWPQ(ds, "HNSW32,PQ96",
                                            faiss.METRIC_INNER_PRODUCT)
        else:
            opq_matrix = faiss.OPQMatrix(ds, code_size)
            opq_matrix.niter = 10
            sub_index = faiss.IndexIVFPQ(quantizer, ds, num_clusters,
                                         code_size, 8,
                                         faiss.METRIC_INNER_PRODUCT)
            start_index = faiss.IndexPreTransform(opq_matrix, sub_index)
    elif 'none' in fine_quant:
        start_index = faiss.IndexFlatIP(ds)
    else:
        raise ValueError(fine_quant)

    start_index.verbose = False
    if cuda:
        # Convert to GPU index
        res = faiss.StandardGpuResources()
        co = faiss.GpuClonerOptions()
        co.useFloat16 = True
        gpu_index = faiss.index_cpu_to_gpu(res, 0, start_index, co)
        gpu_index.verbose = False

        # Train on GPU and back to CPU
        gpu_index.train(start_data)
        start_index = faiss.index_gpu_to_cpu(gpu_index)
    else:
        start_index.train(start_data)

    # Make sure to set direct map again
    if 'none' not in fine_quant:
        index_ivf = faiss.extract_index_ivf(start_index)
        index_ivf.make_direct_map()
        index_ivf.set_direct_map_type(faiss.DirectMap.Hashtable)
    faiss.write_index(start_index, trained_index_path)
Ejemplo n.º 4
0
def train_preprocessor(preproc_str):
    print("train preproc", preproc_str)
    t0 = time.time()
    if preproc_str.startswith('OPQ'):
        fi = preproc_str[3:-1].split('_')
        m = int(fi[0])
        dout = int(fi[1]) if len(fi) == 2 else dim
        preproc = faiss.OPQMatrix(dim, m, dout)
    elif preproc_str.startswith('PCAR'):
        dout = int(preproc_str[4:-1])
        preproc = faiss.PCAMatrix(dim, dout, 0, True)
    else:
        assert False
    preproc.train(sanitize(xt))
    print("preproc train done in %.3f s" % (time.time() - t0))
    return preproc
 def train_preprocessor(self, preproc_str_local, xt_local):
     if not self.preproc_cachefile or not os.path.exists(
             self.preproc_cachefile):
         print("train preproc", preproc_str_local)
         d = xt_local.shape[1]
         t0 = time.time()
         if preproc_str_local.startswith('OPQ'):
             fi = preproc_str_local[3:].split('_')
             m = int(fi[0])
             dout = int(fi[1]) if len(fi) == 2 else d
             preproc = faiss.OPQMatrix(d, m, dout)
         elif preproc_str_local.startswith('PCAR'):
             dout = int(preproc_str_local[4:-1])
             preproc = faiss.PCAMatrix(d, dout, 0, True)
         else:
             assert False
         preproc.train(indexfunctions.sanitize(xt_local[:100000000]))
         print("preproc train done in %.3f s" % (time.time() - t0))
         faiss.write_VectorTransform(preproc, self.preproc_cachefile)
     else:
         print("load preproc ", self.preproc_cachefile)
         preproc = faiss.read_VectorTransform(self.preproc_cachefile)
     return preproc
Ejemplo n.º 6
0
if 'lsq-gpu' in todo:
    lsq = faiss.LocalSearchQuantizer(d, M, nbits)
    ngpus = faiss.get_num_gpus()
    lsq.icm_encoder_factory = faiss.GpuIcmEncoderFactory(ngpus)
    lsq.verbose = True
    eval_quantizer(lsq, xb, xt, 'lsq-gpu')

if 'pq' in todo:
    pq = faiss.ProductQuantizer(d, M, nbits)
    print("===== PQ")
    eval_quantizer(pq, xq, xb, gt, xt)

if 'opq' in todo:
    d2 = ((d + M - 1) // M) * M
    print("OPQ d2=", d2)
    opq = faiss.OPQMatrix(d, M, d2)
    opq.train(xt)
    xq2 = opq.apply(xq)
    xb2 = opq.apply(xb)
    xt2 = opq.apply(xt)
    pq = faiss.ProductQuantizer(d2, M, nbits)
    print("===== PQ")
    eval_quantizer(pq, xq2, xb2, gt, xt2)

if 'prq' in todo:
    print(f"===== PRQ{nsplits}x{Msub}x{nbits}")
    prq = faiss.ProductResidualQuantizer(d, nsplits, Msub, nbits)
    variants = [("max_beam_size", i) for i in (1, 2, 4, 8, 16, 32)]
    eval_quantizer(prq, xq, xb, gt, xt, variants=variants)

if 'plsq' in todo:
Ejemplo n.º 7
0
def index_factory(d: int,
                  index_key: str,
                  metric_type: int,
                  ef_construction: Optional[int] = None):
    """
    custom index_factory that fix some issues of
    faiss.index_factory with inner product metrics.
    """

    if metric_type == faiss.METRIC_INNER_PRODUCT:

        # make the index described by the key
        if any(re.findall(r"OPQ\d+_\d+,IVF\d+,PQ\d+", index_key)):
            params = [int(x) for x in re.findall(r"\d+", index_key)]

            cs = params[3]  # code size (in Bytes if nbits=8)
            nbits = params[4] if len(params) == 5 else 8  # default value
            ncentroids = params[2]
            out_d = params[1]
            M_OPQ = params[0]

            quantizer = faiss.index_factory(out_d, "Flat", metric_type)
            assert quantizer.metric_type == metric_type
            index_ivfpq = faiss.IndexIVFPQ(quantizer, out_d, ncentroids, cs,
                                           nbits, metric_type)
            assert index_ivfpq.metric_type == metric_type
            index_ivfpq.own_fields = True
            quantizer.this.disown()  # pylint: disable = no-member
            opq_matrix = faiss.OPQMatrix(d, M=M_OPQ, d2=out_d)
            # opq_matrix.niter = 50 # Same as default value
            index = faiss.IndexPreTransform(opq_matrix, index_ivfpq)
        elif any(re.findall(r"OPQ\d+_\d+,IVF\d+_HNSW\d+,PQ\d+", index_key)):
            params = [int(x) for x in re.findall(r"\d+", index_key)]

            M_HNSW = params[3]
            cs = params[4]  # code size (in Bytes if nbits=8)
            nbits = params[5] if len(params) == 6 else 8  # default value
            ncentroids = params[2]
            out_d = params[1]
            M_OPQ = params[0]

            quantizer = faiss.IndexHNSWFlat(out_d, M_HNSW, metric_type)
            if ef_construction is not None and ef_construction >= 1:
                quantizer.hnsw.efConstruction = ef_construction
            assert quantizer.metric_type == metric_type
            index_ivfpq = faiss.IndexIVFPQ(quantizer, out_d, ncentroids, cs,
                                           nbits, metric_type)
            assert index_ivfpq.metric_type == metric_type
            index_ivfpq.own_fields = True
            quantizer.this.disown()  # pylint: disable = no-member
            opq_matrix = faiss.OPQMatrix(d, M=M_OPQ, d2=out_d)
            # opq_matrix.niter = 50 # Same as default value
            index = faiss.IndexPreTransform(opq_matrix, index_ivfpq)

        elif any(re.findall(r"Pad\d+,IVF\d+_HNSW\d+,PQ\d+", index_key)):
            params = [int(x) for x in re.findall(r"\d+", index_key)]

            out_d = params[0]
            M_HNSW = params[2]
            cs = params[3]  # code size (in Bytes if nbits=8)
            nbits = params[4] if len(params) == 5 else 8  # default value
            ncentroids = params[1]

            remapper = faiss.RemapDimensionsTransform(d, out_d, True)

            quantizer = faiss.IndexHNSWFlat(out_d, M_HNSW, metric_type)
            if ef_construction is not None and ef_construction >= 1:
                quantizer.hnsw.efConstruction = ef_construction
            index_ivfpq = faiss.IndexIVFPQ(quantizer, out_d, ncentroids, cs,
                                           nbits, metric_type)
            index_ivfpq.own_fields = True
            quantizer.this.disown()  # pylint: disable = no-member

            index = faiss.IndexPreTransform(remapper, index_ivfpq)
        elif any(re.findall(r"HNSW\d+", index_key)):
            params = [int(x) for x in re.findall(r"\d+", index_key)]
            M_HNSW = params[0]
            index = faiss.IndexHNSWFlat(d, M_HNSW, metric_type)
            assert index.metric_type == metric_type
        elif index_key == "Flat":
            index = faiss.index_factory(d, index_key, metric_type)
        else:
            index = faiss.index_factory(d, index_key, metric_type)
            raise ValueError((
                "Be careful, faiss might not create what you expect when using the "
                "inner product similarity metric, remove this line to try it anyway."
                "Happened with index_key: " + str(index_key)))

    else:
        index = faiss.index_factory(d, index_key, metric_type)

    return index
Ejemplo n.º 8
0
    x = x.view('int32')
    y = np.ones((d, 1), dtype='int32') * w
    x = np.concatenate([y, x], -1).reshape(-1)
    x.tofile(fname)


def cvecs_write(x, fname):
    x = x.astype('uint8')
    x.tofile(fname)


x = fvecs_read("sift/sift_base.fvecs")
# x = x[:10000, :]
n, d = x.shape
m = 8
opq = faiss.OPQMatrix(d, 8)
# help(opq)
opq.train(x)
A = faiss.vector_to_array(opq.A).reshape(d, d)
print(A.shape)
# print(A)
xt = opq.apply_py(x)
# print(((np.dot(x[0], A.T) - xt[0])**2).sum())
# print(x[0, :10])
# print(xt[0, :10])
print(xt.shape)
pq = faiss.ProductQuantizer(d, 8, 8)
pq.train(xt)
codes = pq.compute_codes(x)
cen = faiss.vector_to_array(pq.centroids)
cen = cen.reshape(pq.M, pq.ksub, pq.dsub)