def bench(arrayLength, dtype, innerRepeat, outerRepeat): import numpy as np setup = """ a = np.full(arrayLength, 1, dtype=real_t); b = np.full(arrayLength, 2, dtype=real_t); c = np.full(arrayLength, 3, dtype=real_t); alpha = real_t(42) """ times = { "copy": 'copy(a)', "scale": 'scale(a, alpha)', "add": 'add(a, b)', "triad": 'triad(a, b, alpha)' } namespace = { "copy": copy, "scale": scale, "add": add, "triad": triad, "np": np, "arrayLength": arrayLength, "real_t": dtype } state = state_o(namespace, innerRepeat, outerRepeat) results = benchmark(times, inspect.cleandoc(setup), state) return [("COPY", results["copy"]), ("SCALE", results["scale"]), ("ADD", results["add"]), ("TRIAD", results["triad"])]
def bench(arrayLength, dtype, innerRepeat, outerRepeat): setup = """ a = init(real_t(1), arrayLength); b = init(real_t(2), arrayLength); c = init(real_t(3), arrayLength); alpha = real_t(42); """ times = { "copy": 'copy(a, b)', "scale": 'scale(a, b, alpha)', "add": 'add(a, b, c)', "triad": 'triad(a, b, c, alpha)' } namespace = { "copy": copy, "scale": scale, "add": add, "triad": triad, "init": init, "arrayLength": arrayLength, "real_t": dtype } state = state_o(namespace, innerRepeat, outerRepeat) results = benchmark(times, inspect.cleandoc(setup), state) return [ ("COPY", results["copy"]), ("SCALE", results["scale"]), ("ADD", results["add"]), ("TRIAD", results["triad"]) ]
def bench(arrayLength, dtype, innerRepeat, outerRepeat): import numpy as np setup = """ a = np.full(arrayLength, 1, dtype=real_t); b = np.full(arrayLength, 2, dtype=real_t); c = np.full(arrayLength, 3, dtype=real_t); alpha = real_t(42) """ times = { "copy": 'copy(a, b)', "scale": 'scale(a, b, alpha)', "add": 'add(a, b, c)', "triad": 'triad(a, b, c, alpha)' } namespace = { "copy": copy, "scale": scale, "add": add, "triad": triad, "np": np, "arrayLength": arrayLength, "real_t": dtype } state = state_o(namespace, innerRepeat, outerRepeat) results = benchmark(times, inspect.cleandoc(setup), state) logger.debug("NUMBA Threading Layer: {}".format(npyufunc.parallel.threading_layer())) logger.debug("NUMBA N Threads: {}".format(npyufunc.parallel.get_thread_count())) return [ ("COPY", results["copy"]), ("SCALE", results["scale"]), ("ADD", results["add"]), ("TRIAD", results["triad"]) ]
def bench(arrayLength, dtype, innerRepeat, outerRepeat): import numpy as np setup = """ a = np.full(arrayLength, 1, dtype=real_t); b = np.full(arrayLength, 2, dtype=real_t); c = np.full(arrayLength, 3, dtype=real_t); alpha = real_t(42) stream_a = cuda.stream() stream_b = cuda.stream() stream_c = cuda.stream() d_a = cuda.to_device(a, stream_a) d_b = cuda.to_device(b, stream_b) d_c = cuda.to_device(c, stream_c) n_threads = 256 n_blocks = np.ceil(arrayLength / n_threads).astype("int32") """ times = { "copy": 'copy[n_blocks, n_threads](d_a, d_b)', "scale": 'scale(a, b, alpha)', "add": 'add(a, b, c)', "triad": 'triad(a, b, c, alpha)' } namespace = { "np": np, "cuda": cuda, "copy": copy, "scale": scale, "add": add, "triad": triad, "arrayLength": arrayLength, "real_t": dtype } state = state_o(namespace, innerRepeat, outerRepeat) results = benchmark(times, inspect.cleandoc(setup), state) return [("COPY", results["copy"]), ("SCALE", results["scale"]), ("ADD", results["add"]), ("TRIAD", results["triad"])]