Esempio n. 1
0
    def do_test_bounded(self,
                        nbin,
                        n,
                        shift=2,
                        minv=500,
                        rspan=None,
                        seed=None):
        if seed is None:
            for run in range(50):
                self.do_test_bounded(nbin,
                                     n,
                                     shift,
                                     minv,
                                     rspan,
                                     seed=123 + run)
            return

        if rspan is None:
            rmin, rmax = 0, nbin * 6
        else:
            rmin, rmax = rspan

        rs = np.random.RandomState(seed)
        tab = rs.randint(rmin, rmax, size=n).astype('uint16')
        bc = np.bincount(tab, minlength=65536)

        binsize = 1 << shift
        ref_histogram = bc[minv:minv + binsize * nbin]

        def pad_and_reshape(x, m, n):
            xout = np.zeros(m * n, dtype=x.dtype)
            xout[:x.size] = x
            return xout.reshape(m, n)

        ref_histogram = pad_and_reshape(ref_histogram, nbin, binsize)
        ref_histogram = ref_histogram.sum(1)

        tab_a = faiss.AlignedTableUint16()
        faiss.copy_array_to_AlignedTable(tab, tab_a)
        sp = faiss.swig_ptr

        hist = np.zeros(nbin, 'int32')
        if nbin == 8:
            faiss.simd_histogram_8(tab_a.get(), n, minv, shift, sp(hist))
        elif nbin == 16:
            faiss.simd_histogram_16(tab_a.get(), n, minv, shift, sp(hist))
        else:
            raise AssertionError()

        np.testing.assert_array_equal(hist, ref_histogram)
Esempio n. 2
0
def do_partition(n, qin, maxval=65536, seed=123, id_type='int64'):
    print(f"n={n} qin={qin} maxval={maxval} id_type={id_type}  ",
          end="\t",
          flush=True)

    # print("seed=", seed)
    rs = np.random.RandomState(seed)
    vals = rs.randint(maxval, size=n).astype('uint16')
    ids = (rs.permutation(n) + 12345).astype(id_type)

    sp = faiss.swig_ptr

    tab_a = faiss.AlignedTableUint16()
    faiss.copy_array_to_AlignedTable(vals, tab_a)

    nrun = 2000

    times = []
    nerr = 0
    stats = faiss.cvar.partition_stats
    stats.reset()
    for _run in range(nrun):
        faiss.copy_array_to_AlignedTable(vals, tab_a)
        t0 = time.time()
        # print("tab a type", tab_a.get())
        if type(qin) == int:
            q = qin
            faiss.CMax_uint16_partition_fuzzy(tab_a.get(), sp(ids), n, q, q,
                                              None)
        else:
            q_min, q_max = qin
            q = np.array([-1], dtype='uint64')
            thresh2 = faiss.CMax_uint16_partition_fuzzy(
                tab_a.get(), sp(ids), n, q_min, q_max, sp(q))
            q = q[0]

            if not (q_min <= q <= q_max):
                nerr += 1

        t1 = time.time()

        times.append(t1 - t0)

    times = np.array(times[100:]) * 1000000

    print(f"times {times.mean():.3f} µs (± {times.std():.4f} µs) nerr={nerr} "
          f"bissect {stats.bissect_cycles / 1e6:.3f} Mcy "
          f"compress {stats.compress_cycles / 1e6:.3f} Mcy")
Esempio n. 3
0
    def do_test(self, nbin, n):
        rs = np.random.RandomState(123)
        tab = rs.randint(nbin, size=n).astype('uint16')
        ref_histogram = np.bincount(tab, minlength=nbin)

        tab_a = faiss.AlignedTableUint16()
        faiss.copy_array_to_AlignedTable(tab, tab_a)

        sp = faiss.swig_ptr
        hist = np.zeros(nbin, 'int32')
        if nbin == 8:
            faiss.simd_histogram_8(tab_a.get(), n, 0, -1, sp(hist))
        elif nbin == 16:
            faiss.simd_histogram_16(tab_a.get(), n, 0, -1, sp(hist))
        else:
            raise AssertionError()
        np.testing.assert_array_equal(hist, ref_histogram)
Esempio n. 4
0
    def do_partition(self, n, q, maxval=65536, seed=None):
        #seed = 1235
        if seed is None:
            for i in range(50):
                self.do_partition(n, q, maxval, i + 1234)
        # print("seed=", seed)
        rs = np.random.RandomState(seed)
        vals = rs.randint(maxval, size=n).astype('uint16')
        ids = (rs.permutation(n) + 12345).astype('int64')
        dic = dict(zip(ids, vals))

        sp = faiss.swig_ptr
        vals_orig = vals.copy()

        tab_a = faiss.AlignedTableUint16()
        vals_inv = (65535 - vals).astype('uint16')
        faiss.copy_array_to_AlignedTable(vals_inv, tab_a)

        # print("tab a type", tab_a.get())
        if type(q) == int:
            faiss.CMin_uint16_partition_fuzzy(tab_a.get(), sp(ids), n, q, q,
                                              None)
        else:
            q_min, q_max = q
            q = np.array([-1], dtype='uint64')
            thresh2 = faiss.CMin_uint16_partition_fuzzy(
                tab_a.get(), sp(ids), n, q_min, q_max, sp(q))
            q = q[0]
            assert q_min <= q <= q_max

        vals_inv = faiss.AlignedTable_to_array(tab_a)
        vals = 65535 - vals_inv

        o = vals_orig.argsort()
        thresh = vals_orig[o[q]]
        n_eq = (vals_orig[o[:q]] == thresh).sum()

        for i in range(q):
            self.assertEqual(vals[i], dic[ids[i]])
            self.assertLessEqual(vals[i], thresh)
            if vals[i] == thresh:
                n_eq -= 1
        self.assertEqual(n_eq, 0)