def timeit_verbose( stmt, setup="pass", total_duration=2, globals=None, norm=None, name=None, print_time=False, max_length_name=33, ): result = timeit( stmt, setup=setup, total_duration=total_duration, globals=globals ) if norm is None: norm = result norm_given = False else: norm_given = True if name is None: name = stmt.split("(")[0] fmt_name = f"{{:{max_length_name}s}}" name = fmt_name.format(name) if print_time: raw_time = f" = {result:7.3g} s" else: raw_time = "" print(f"{name}: {result/norm:5.3g} * norm{raw_time}") if not norm_given and not print_time: print(f"norm = {norm:5.3g} s") return result
def bench(call, norm=None): ret = result = timeit(call, globals=loc) if norm is None: norm = result result /= norm print(f"{call.split('(')[0]:33s}: {result:.2f}") return ret
stiffness = Stiffness() x = np.array([0.0, 1.0, 0.0]) y = np.array([0.0, 0.0, 1.0]) m = np.zeros(21) stiffness.compute(x, y, m) m_3d = np.zeros(21) stiffness.compute_3d(x, y, m_3d) assert np.allclose(m, m_3d) timeit = partial(timeit, globals=locals(), total_duration=8) # time = timeit("stiffness.compute(x, y, m)", globals=locals()) # print(f"{time * 1e6:.2f} µs") time = timeit("stiffness.compute_3d(x, y, m)") print(f"{time * 1e6:.2f} µs") try: from __pythran__.bench_e8c958181e03f6fe586ca36dbe63fe01 import __for_method__Stiffness__compute_3d except ImportError: from __pythran__.bench_16ca22d6c4f7271d975a6bac0b0cfb62 import __for_method__Stiffness__compute_3d s = stiffness time = timeit( "__for_method__Stiffness__compute_3d(s.gq3d, s.grads3d, x, y, m)") print(f"{time * 1e6:.2f} µs") grads = stiffness.grads3d gq = stiffness.gq3d
def lorenz(u, t): x, y, z = u return 10.0 * (y - x), x * (28.0 - z) - y, x * y - 2.66 * z lorenz_pythran = jit(native=True, xsimd=True)(lorenz) lorenz_numba = numba.jit(lorenz) u0 = (1.0, 0.0, 0.0) lorenz_pythran(u0, 0) lorenz_numba(u0, 0) wait_for_all_extensions() # tspan = (0.0, 100.0) t = np.linspace(0, 100, 1001) sol = odeint(lorenz, u0, t) norm = timeit("odeint(lorenz, u0, t, rtol = 1e-8, atol=1e-8)", globals=locals()) timeit( "odeint(lorenz_pythran, u0, t, rtol = 1e-8, atol=1e-8)", globals=locals(), norm=norm, ) timeit( "odeint(lorenz_numba, u0, t, rtol = 1e-8, atol=1e-8)", globals=locals(), norm=norm, )
x_init = np.zeros(n_sleds) y_init = np.random.rand(n_sleds) v_init = np.zeros(n_sleds) u_init = np.zeros(n_sleds) + 3.5 solver(board, x_init, y_init, u_init, v_init, 0.01, n_time) # end code functions (don't remove this line) bench_pythran = jit(bench) # Numba does not support this code... # bench_numba = jit(backend="numba")(bench) from transonic import wait_for_all_extensions # warmup (compilation of the Pythran extension) bench_pythran(1, 1) wait_for_all_extensions() if __name__ == "__main__": from transonic.util import timeit_verbose as timeit n_sleds = 10 n_time = 200 g = locals() norm = timeit("bench(n_sleds, n_time)", globals=g) timeit("bench_pythran(n_sleds, n_time)", globals=g, norm=norm) # timeit("bench_numba(n_sleds, n_time)", globals=g, norm=norm)
velocities = np.zeros_like(positions) accelerations = np.zeros_like(positions) time_step = 1.0 advance_positions_nosimd(positions, velocities, accelerations, time_step) advance_positions_simd(positions, velocities, accelerations, time_step) advance_positions_loops(positions, velocities, accelerations, time_step) advance_positions_nosimd_ra(positions, velocities, accelerations, time_step) advance_positions_simd_ra(positions, velocities, accelerations, time_step) advance_positions_loops_ra(positions, velocities, accelerations, time_step) wait_for_all_extensions() norm = timeit( "advance_positions(positions, velocities, accelerations, time_step)", globals=locals(), ) timeit( "advance_positions_simd(positions, velocities, accelerations, time_step)", globals=locals(), norm=norm, ) timeit( "advance_positions_nosimd(positions, velocities, accelerations, time_step)", globals=locals(), norm=norm, ) timeit( "advance_positions_loops(positions, velocities, accelerations, time_step)", globals=locals(), norm=norm,
shape = 1024, dim print("shape=", shape) masses = np.zeros(shape[0]) positions = np.zeros(shape) velocities = np.zeros_like(positions) accelerations = np.zeros_like(positions) x = 0.0 for ip in range(shape[0]): positions[ip, 0] = x x += 1.0 glo = globals() norm = timeit("compute(accelerations, masses, positions)", globals=glo) timeit("compute_opt(accelerations, masses, positions)", globals=glo, norm=norm) timeit("compute_opt1(accelerations, masses, positions)", globals=glo, norm=norm) timeit("compute_opt2(accelerations, masses, positions)", globals=glo, norm=norm) """ Oh, `pythran -P` gives: ``` coef = (1.0 / (np.square(math.sqrt(builtins.sum(np.square(vector)))) * math.sqrt(builtins.sum(np.square(vector))))) ``` I guess the C++ compiler is smart enough to simplify that, but it's a bit strange... """
def bench(functions, arr, columns): print(backend_default.capitalize()) for func in functions: result = timeit("func(arr, columns)", globals=locals()) print(f"{func.__name__:20s} {result:.3e} s") print()
shape = 256, 4 positions = np.zeros(shape) velocities = np.zeros_like(positions) accelerations = np.zeros_like(positions) time_step = 1.0 advance_positions_nosimd(positions, velocities, accelerations, time_step) advance_positions_simd(positions, velocities, accelerations, time_step) advance_positions_loops(positions, velocities, accelerations, time_step) wait_for_all_extensions() glo = globals() norm = timeit( "advance_positions(positions, velocities, accelerations, time_step)", globals=glo) timeit( "advance_positions_nosimd(positions, velocities, accelerations, time_step)", norm=norm, globals=glo) timeit( "advance_positions_simd(positions, velocities, accelerations, time_step)", norm=norm, globals=glo) timeit( "advance_positions_loops(positions, velocities, accelerations, time_step)", norm=norm, globals=glo)
def bench_one(name_module="cmorph", func=None, total_duration=2): if func is not None: raise NotImplementedError functions = [(mod, func_) for (mod, func_) in statements.keys() if mod == name_module] if not functions: raise ValueError(f"bad name_module: {name_module}") name_function = functions[0][1] print(f"module: {name_module}") stmt = statements[(name_module, name_function)] print(stmt) path_setup = Path("setup_codes") / f"{name_module}_{name_function}.py" if not path_setup.exists(): raise RuntimeError with open(path_setup) as file: setup = file.read() if (name_module, name_function) in import_from_skimage: setup_from_skimage = setup.replace( f"from future.{name_module} import {name_function}", import_from_skimage[(name_module, name_function)], ) time = timeit(stmt, setup_from_skimage, total_duration=total_duration) print(f"{'from skimage':18s} {time:.2e} s") setup_pyx = setup.replace(f"from future.{name_module} import", f"from pyx.{name_module} import") code = f""" from transonic.util import timeit setup = '''{setup}''' stmt = '''{stmt}''' print(timeit(stmt, setup, total_duration={total_duration})) """ time_old = timeit(stmt, setup_pyx, total_duration=total_duration) print(f"cython pyx skimage {time_old:.2e} s (= norm)") with open("tmp.py", "w") as file: file.write(code) for backend in ("cython", "pythran", "numba"): time = float(getoutput(f"TRANSONIC_BACKEND='{backend}' python tmp.py")) print(f"{backend:18s} {time:.2e} s (= {time/time_old:.2f} * norm)") # print(getoutput("TRANSONIC_NO_REPLACE=1 python tmp.py")) if (name_module, name_function) not in import_from_skimage: return setup_from_skimage = setup.replace( f"from future.{name_module} import {name_function}", import_from_skimage[(name_module, name_function)], ) time = timeit(stmt, setup_from_skimage, total_duration=total_duration) print(f"{'from skimage':18s} {time:.2e} s (= {time/time_old:.2f} * norm)")
a11 = -y[0] + y[2] a12 = y[0] - y[1] a21 = x[0] - x[2] a22 = -x[0] + x[1] for f in range(6): for p in range(3): grads[f, p, 0] = a11 * gq[f, p, 0] + a12 * gq[f, p, 1] grads[f, p, 1] = a21 * gq[f, p, 0] + a22 * gq[f, p, 1] result = np.zeros(21) compute(x, y, grads, result) timeit = partial(timeit, globals=locals(), total_duration=8) time = timeit("compute(x, y, grads, result)") * 1e6 print(f"Pythran: {time:.2f} µs") from pathlib import Path import sys path_tmp_julia = Path("tmp_result_julia.txt") if not path_tmp_julia.exists(): sys.exit() with open(path_tmp_julia) as file: txt = file.read() time_julia = float(txt.split("\n")[1].split(" ")[-2]) print(f"Julia: {time_julia:.2f} µs\nratio Pythran/Julia: {time/time_julia:.2f}")
n_sleds = 10 n_time = 1000 x_init = np.zeros(n_sleds) y_init = np.random.rand(n_sleds) v_init = np.zeros(n_sleds) u_init = np.zeros(n_sleds) + 3.5 solver(board, x_init, y_init, v_init, u_init, 0.01, n_time, n_sleds) bench_pythran = jit(bench) bench_numba = jit(backend="numba")(bench) if __name__ == "__main__": from transonic.util import timeit_verbose as timeit g = locals() norm = timeit("bench()", globals=g) timeit("bench_pythran()", globals=g, norm=norm) timeit("bench_numba()", globals=g, norm=norm) """ bench : 1.000 * norm norm = 8.35e-01 s bench_pythran : 0.007 * norm bench_numba : 0.009 * norm (~140 speedup!) """
def test_timeit(): a = 1 b = 2 timeit("a + b", total_duration=0.001, globals=locals())
@boost def get_num_threads(): nthreads = -1 # omp parallel if 1: # omp single nthreads = omp.get_num_threads() return nthreads shape = 1024, dim print("shape=", shape) nthreads = get_num_threads() print(nthreads) masses = np.zeros(shape[0]) positions = np.zeros(shape) velocities = np.zeros_like(positions) accelerations = np.zeros([nthreads, shape[0], dim]) x = 0.0 for ip in range(shape[0]): positions[ip, 0] = x x += 1.0 glo = globals() norm = timeit("compute(accelerations, masses, positions)", globals=glo) # timeit("compute_opt(accelerations, masses, positions)", globals=glo, norm=norm)
@boost def integrand_transonic_boost(x: float): return np.exp(10.0 * x * np.cos(x)) integrand_numba = numba.njit(integrand) integrand_numba_cfunc = numba.cfunc("float64(float64)")(integrand) ll_callable = LowLevelCallable(integrand_capsule, signature="double (double)") methods = { "no acceleration": "integrand", "numba": "integrand_numba", "numba cfunc": "integrand_numba_cfunc.ctypes", "transonic_jit": "integrand_transonic_jit", "transonic_boost": "integrand_transonic_boost", "pythran capsule": "ll_callable", } norm = None for name, key in methods.items(): print(name) result = timeit( f"quad({key}, 0, 10, epsabs=1.49e-08, epsrel=1.49e-08)", globals=locals(), total_duration=8, norm=norm, ) print(f"{result * 1e6:.2f} μs") if norm is None: norm = result
np.int32, ) images = images.flatten() # Erosion goes smallest to largest; dilation goes largest to smallest. index_sorted = np.argsort(images).astype(np.int32) index_sorted = index_sorted[::-1] # Make a linked list of pixels sorted by value. -1 is the list terminator. prev = np.full(len(images), -1, np.int32) next_ = np.full(len(images), -1, np.int32) prev[index_sorted[1:]] = index_sorted[:-1] next_[index_sorted[:-1]] = index_sorted[1:] # Cython inner-loop compares the rank of pixel values. value_rank, value_map = rank_order(images) start = index_sorted[0] ranks = np.array(value_rank) strides = nb_strides current_idx = np.int64(start) time = timeit( "reconstruction_loop(ranks, prev, next_, strides, current_idx, image_stride)", globals=locals(), total_duration=10, ) print(f"# {time:.2e} s")