Ejemplo n.º 1
0
        vec_transform = lambda x: x
    assert isinstance(index_ivf, faiss.IndexIVF)
    index_ivf.verbose = True
    index_ivf.quantizer.verbose = True
    index_ivf.cp.verbose = True

    maxtrain = args.maxtrain
    if maxtrain == 0:
        if 'IMI' in args.indexkey:
            maxtrain = int(256 * 2**(np.log2(index_ivf.nlist) / 2))
        else:
            maxtrain = 50 * index_ivf.nlist
        print "setting maxtrain to %d" % maxtrain
        args.maxtrain = maxtrain

    xt2 = sanitize(xt[:args.maxtrain])
    assert np.all(np.isfinite(xt2))

    print "train, size", xt2.shape

    if args.get_centroids_from == '':

        if args.clustering_niter >= 0:
            print("setting nb of clustering iterations to %d" %
                  args.clustering_niter)
            index_ivf.cp.niter = args.clustering_niter

        if args.train_on_gpu:
            print "add a training index on GPU"
            train_index = faiss.index_cpu_to_all_gpus(faiss.IndexFlatL2(d))
            index_ivf.clustering_index = train_index
Ejemplo n.º 2
0
    else:
        index_hnsw = index
        vec_transform = lambda x: x

    hnsw = index_hnsw.hnsw
    hnsw.efConstruction = args.efConstruction
    hnsw_stats = faiss.cvar.hnsw_stats
    index.verbose = True
    index_hnsw.verbose = True
    index_hnsw.storage.verbose = True

    if args.M0 != -1:
        print "set level 0 nb of neighbors to", args.M0
        hnsw.set_nb_neighbors(0, args.M0)

    xt2 = sanitize(xt[:args.maxtrain])
    assert np.all(np.isfinite(xt2))

    print "train, size", xt.shape
    t0 = time.time()
    index.train(xt2)
    print "  train in %.3f s" % (time.time() - t0)

    print "adding"
    t0 = time.time()
    if args.add_bs == -1:
        index.add(sanitize(xb))
    else:
        for i0 in range(0, nb, args.add_bs):
            i1 = min(nb, i0 + args.add_bs)
            print "  adding %d:%d / %d" % (i0, i1, nb)
Ejemplo n.º 3
0
        if not index_ivf.quantizer.is_trained:
            print("  training quantizer")
            index_ivf.quantizer.train(centroids)

        print("  add centroids to quantizer")
        index_ivf.quantizer.add(centroids)
        del src_index

    t0 = time.time()
    index.train(xt2)
    print("  train in %.3f s" % (time.time() - t0))

    print("adding")
    t0 = time.time()
    if args.add_bs == -1:
        index.add(sanitize(ds.get_database()))
    else:
        i0 = 0
        for xblock in ds.database_iterator(bs=args.add_bs):
            i1 = i0 + len(xblock)
            print("  adding %d:%d / %d [%.3f s, RSS %d kiB] " % (
                i0, i1, ds.nb, time.time() - t0,
                faiss.get_mem_usage_kb()))
            index.add(xblock)
            i0 = i1

    print("  add in %.3f s" % (time.time() - t0))
    if args.indexfile:
        print("storing", args.indexfile)
        faiss.write_index(index, args.indexfile)
Ejemplo n.º 4
0
    xt_pca = xt[args.nt:args.nt + 10000]
    xt = xt[:args.nt]
else:
    xt_pca = xt[args.nt_sample:args.nt_sample + 10000]
    rs = np.random.RandomState(args.seed)
    idx = rs.choice(args.nt_sample, size=args.nt, replace=False)
    xt = xt[idx]

xb = xb[:args.nb]

d = xb.shape[1]

if args.pcadim != -1:
    print "training PCA: %d -> %d" % (d, args.pcadim)
    pca = faiss.PCAMatrix(d, args.pcadim)
    pca.train(sanitize(xt_pca))
    xt = pca.apply_py(sanitize(xt))
    xb = pca.apply_py(sanitize(xb))
    d = xb.shape[1]


######################################################
# Run clustering
######################################################


index = faiss.IndexFlatL2(d)

if ngpu > 0:
    print "moving index to GPU"
    index = faiss.index_cpu_to_all_gpus(index)