def do_test_bounded(self, nbin, n, shift=2, minv=500, rspan=None, seed=None): if seed is None: for run in range(50): self.do_test_bounded(nbin, n, shift, minv, rspan, seed=123 + run) return if rspan is None: rmin, rmax = 0, nbin * 6 else: rmin, rmax = rspan rs = np.random.RandomState(seed) tab = rs.randint(rmin, rmax, size=n).astype('uint16') bc = np.bincount(tab, minlength=65536) binsize = 1 << shift ref_histogram = bc[minv:minv + binsize * nbin] def pad_and_reshape(x, m, n): xout = np.zeros(m * n, dtype=x.dtype) xout[:x.size] = x return xout.reshape(m, n) ref_histogram = pad_and_reshape(ref_histogram, nbin, binsize) ref_histogram = ref_histogram.sum(1) tab_a = faiss.AlignedTableUint16() faiss.copy_array_to_AlignedTable(tab, tab_a) sp = faiss.swig_ptr hist = np.zeros(nbin, 'int32') if nbin == 8: faiss.simd_histogram_8(tab_a.get(), n, minv, shift, sp(hist)) elif nbin == 16: faiss.simd_histogram_16(tab_a.get(), n, minv, shift, sp(hist)) else: raise AssertionError() np.testing.assert_array_equal(hist, ref_histogram)
def do_partition(n, qin, maxval=65536, seed=123, id_type='int64'): print(f"n={n} qin={qin} maxval={maxval} id_type={id_type} ", end="\t", flush=True) # print("seed=", seed) rs = np.random.RandomState(seed) vals = rs.randint(maxval, size=n).astype('uint16') ids = (rs.permutation(n) + 12345).astype(id_type) sp = faiss.swig_ptr tab_a = faiss.AlignedTableUint16() faiss.copy_array_to_AlignedTable(vals, tab_a) nrun = 2000 times = [] nerr = 0 stats = faiss.cvar.partition_stats stats.reset() for _run in range(nrun): faiss.copy_array_to_AlignedTable(vals, tab_a) t0 = time.time() # print("tab a type", tab_a.get()) if type(qin) == int: q = qin faiss.CMax_uint16_partition_fuzzy(tab_a.get(), sp(ids), n, q, q, None) else: q_min, q_max = qin q = np.array([-1], dtype='uint64') thresh2 = faiss.CMax_uint16_partition_fuzzy( tab_a.get(), sp(ids), n, q_min, q_max, sp(q)) q = q[0] if not (q_min <= q <= q_max): nerr += 1 t1 = time.time() times.append(t1 - t0) times = np.array(times[100:]) * 1000000 print(f"times {times.mean():.3f} µs (± {times.std():.4f} µs) nerr={nerr} " f"bissect {stats.bissect_cycles / 1e6:.3f} Mcy " f"compress {stats.compress_cycles / 1e6:.3f} Mcy")
def do_test(self, nbin, n): rs = np.random.RandomState(123) tab = rs.randint(nbin, size=n).astype('uint16') ref_histogram = np.bincount(tab, minlength=nbin) tab_a = faiss.AlignedTableUint16() faiss.copy_array_to_AlignedTable(tab, tab_a) sp = faiss.swig_ptr hist = np.zeros(nbin, 'int32') if nbin == 8: faiss.simd_histogram_8(tab_a.get(), n, 0, -1, sp(hist)) elif nbin == 16: faiss.simd_histogram_16(tab_a.get(), n, 0, -1, sp(hist)) else: raise AssertionError() np.testing.assert_array_equal(hist, ref_histogram)
def do_partition(self, n, q, maxval=65536, seed=None): #seed = 1235 if seed is None: for i in range(50): self.do_partition(n, q, maxval, i + 1234) # print("seed=", seed) rs = np.random.RandomState(seed) vals = rs.randint(maxval, size=n).astype('uint16') ids = (rs.permutation(n) + 12345).astype('int64') dic = dict(zip(ids, vals)) sp = faiss.swig_ptr vals_orig = vals.copy() tab_a = faiss.AlignedTableUint16() vals_inv = (65535 - vals).astype('uint16') faiss.copy_array_to_AlignedTable(vals_inv, tab_a) # print("tab a type", tab_a.get()) if type(q) == int: faiss.CMin_uint16_partition_fuzzy(tab_a.get(), sp(ids), n, q, q, None) else: q_min, q_max = q q = np.array([-1], dtype='uint64') thresh2 = faiss.CMin_uint16_partition_fuzzy( tab_a.get(), sp(ids), n, q_min, q_max, sp(q)) q = q[0] assert q_min <= q <= q_max vals_inv = faiss.AlignedTable_to_array(tab_a) vals = 65535 - vals_inv o = vals_orig.argsort() thresh = vals_orig[o[q]] n_eq = (vals_orig[o[:q]] == thresh).sum() for i in range(q): self.assertEqual(vals[i], dic[ids[i]]) self.assertLessEqual(vals[i], thresh) if vals[i] == thresh: n_eq -= 1 self.assertEqual(n_eq, 0)