def run() -> None: values: Tuple[int, int, int, int, int, bool] = parse_args() N: int = values[0] M: int = values[1] nrepeat: int = 1 print(f"Total size S = {N * M} N = {N} M = {M}") y = pk.View([N], pk.double) x = pk.View([M], pk.double) A = pk.View([N * M], pk.double) p = pk.RangePolicy(pk.get_default_space(), 0, N) pk.parallel_for(p, y_init, y=y) pk.parallel_for(pk.RangePolicy(pk.get_default_space(), 0, M), y_init, y=x) pk.parallel_for(p, matrix_init, M=M, A=A) timer = pk.Timer() for i in range(nrepeat): result = pk.parallel_reduce(p, yAx, M=M, y=y, x=x, A=A) timer_result = timer.seconds() print(f"Computed result for {N} x {M} is {result}") solution = N * M if result != solution: pk.printf("Error: result (%lf) != solution (%lf)\n", result, solution) print(f"N({N}) M({M}) nrepeat({nrepeat}) problem(MB) time({timer_result}) bandwidth(GB/s)")
def __init__(self, N: int, M: int, fill: bool): self.N: int = N self.M: int = M self.y: pk.View1D[pk.double] = pk.View([N], pk.double, layout=pk.Layout.LayoutRight) self.x: pk.View1D[pk.double] = pk.View([M], pk.double, layout=pk.Layout.LayoutRight) self.A: pk.View2D[pk.double] = pk.View([N, M], pk.double, layout=pk.Layout.LayoutRight) if fill: self.y.fill(1) self.x.fill(1) self.A.fill(1) else: for i in range(N): self.y[i] = 1 for i in range(M): self.x[i] = 1 for j in range(N): for i in range(M): self.A[j][i] = 1
def __init__(self, N: int, K: int, D: int, R: int, F: int): self.N: int = N self.K: int = K self.D: int = D self.R: int = R self.F: int = F self.UNROLL: int = 8 self.scalar_size: int = 8 self.connectivity: pk.View2D[int] = pk.View([N, K], int) self.A: pk.View1D[pk.double] = pk.View([N], pk.double) self.B: pk.View1D[pk.double] = pk.View([N], pk.double) self.C: pk.View1D[pk.double] = pk.View([N], pk.double) self.A.fill(1.5) self.B.fill(2.0) self.connectivity.data = np.random.rand(N, K) * N #TODO use kokkos to init in parallel # random.seed(12313) # for i in range(N): # for jj in range(K): # self.connectivity[i][jj] = (random.randrange(D)+i-D/2+N) % N self.seconds: float = 0
def __init__(self, N: int, M: int, nrepeat: int, fill: bool): self.N: int = N self.M: int = M self.nrepeat: int = nrepeat self.y: pk.View1D[pk.double] = pk.View([N], pk.double) self.x: pk.View1D[pk.double] = pk.View([M], pk.double) self.A: pk.View2D[pk.double] = pk.View([N, M], pk.double) if fill: self.y.fill(1) self.x.fill(1) self.A.fill(1) else: for i in range(N): self.y[i] = 1 for i in range(M): self.x[i] = 1 for j in range(N): for i in range(M): self.A[j][i] = 1 self.result: float = 0 self.timer_result: float = 0
def __init__(self, N: int, M: int, E: int, nrepeat: int, fill: bool): self.N: int = N self.M: int = M self.E: int = E self.nrepeat: int = nrepeat self.y: pk.View2D[pk.double] = pk.View([E, N], pk.double, layout=pk.Layout.LayoutRight) self.x: pk.View2D[pk.double] = pk.View([E, M], pk.double, layout=pk.Layout.LayoutRight) self.A: pk.View3D[pk.double] = pk.View([E, N, M], pk.double, layout=pk.Layout.LayoutRight) if fill: self.y.fill(1) self.x.fill(1) self.A.fill(1) else: for e in range(E): for i in range(N): self.y[e][i] = 1 for i in range(M): self.x[e][i] = 1 for j in range(N): for i in range(M): self.A[e][j][i] = 1 self.result: float = 0 self.timer_result: float = 0
def __init__(self, args: List[str], system: System, half_neigh: bool): super().__init__(args, system, half_neigh) self.ntypes: int = system.ntypes self.half_neigh: bool = False self.use_stackparams: bool = False # self.use_stackparams: bool = self.ntypes <= MAX_TYPES_STACKPARAMS self.lj1: pk.View2D[pk.double] = self.t_fparams() self.lj2: pk.View2D[pk.double] = self.t_fparams() self.cutsq: pk.View2D[pk.double] = self.t_fparams() if not self.use_stackparams: self.lj1 = self.t_fparams(self.ntypes, self.ntypes) self.lj2 = self.t_fparams(self.ntypes, self.ntypes) self.cutsq = self.t_fparams(self.ntypes, self.ntypes) self.rnd_lj1: pk.View2D[pk.double] = self.t_fparams() self.rnd_lj2: pk.View2D[pk.double] = self.t_fparams() self.rnd_cutsq: pk.View2D[pk.double] = self.t_fparams() self.nbinx: int = 0 self.nbiny: int = 0 self.nbinz: int = 0 self.N_local: int = 0 self.nhalo: int = 0 self.step: int = 0 self.stack_lj1: List[List[float]] = [[ 0 for i in range(MAX_TYPES_STACKPARAMS + 1) ] for j in range(MAX_TYPES_STACKPARAMS + 1)] self.stack_lj2: List[List[float]] = [[ 0 for i in range(MAX_TYPES_STACKPARAMS + 1) ] for j in range(MAX_TYPES_STACKPARAMS + 1)] self.stack_cutsq: List[List[float]] = [[ 0 for i in range(MAX_TYPES_STACKPARAMS + 1) ] for j in range(MAX_TYPES_STACKPARAMS + 1)] self.energy: float = 0.0 # parallel_for and parallel_reduce are called separately # so this boolean is used in run to decide which one # to call self.parallel_for: bool = True # copied from NeighList2D self.num_neighs_view: pk.View1D[pk.int32] = pk.View([0], pk.int32) self.neighs_view: pk.View2D[pk.int32] = pk.View([0, 0], pk.int32) # copied from system self.x: pk.View2D[pk.double] = system.x self.f: pk.View2D[pk.double] = system.f self.f_a: pk.View2D[pk.double] = system.f self.id: pk.View1D[pk.int32] = system.id self.type: pk.View1D[pk.int32] = system.type
def __init__(self, N: int, M: int, nrepeat: int): self.N: int = N self.M: int = M self.nrepeat: int = nrepeat self.y: pk.View1D[pk.double] = pk.View([N], pk.double) self.x: pk.View1D[pk.double] = pk.View([M], pk.double) self.A: pk.View2D[pk.double] = pk.View([N, M], pk.double) self.result: float = 0 self.timer_result: float = 0
def __init__(self, total_threads: int): self.total_threads: int = total_threads self.view: pk.View1D[pk.int32] = MyView(total_threads, data_type=pk.int32) self.x_0: int = 4 self.permute_vector: pk.View1D[pk.int32] = pk.View([total_threads], pk.int32) self.bin_offsets: pk.View1D[pk.int32] = pk.View([6], pk.int32) self.bin_count: pk.View1D[pk.int32] = pk.View([6], pk.int32)
def __init__(self, ARRAY_SIZE: int, initA: float, initB: float, initC: float, scalar: float): self.a: pk.View1D[pk.double] = pk.View([ARRAY_SIZE], pk.double) self.b: pk.View1D[pk.double] = pk.View([ARRAY_SIZE], pk.double) self.c: pk.View1D[pk.double] = pk.View([ARRAY_SIZE], pk.double) self.initA: float = initA self.initB: float = initB self.initC: float = initC self.scalar: float = scalar
def __init__(self, n): self.N: int = n self.data: pk.View1D[pk.int32] = pk.View([n], pk.int32) self.result: pk.View1D[pk.int32] = pk.View([n], pk.int32) self.count: pk.View1D[pk.int32] = pk.View([1], pk.int32, trait=pk.Trait.Atomic) for i in range(n): self.data[i] = random.randint(0, n)
def __init__(self, N: int, K: int, R: int, D: int, F: int, T: int, S: int): self.K: int = K self.R: int = R self.F: int = F self.A: pk.View3D[pk.double] = pk.View([N, K, D], pk.double) self.B: pk.View3D[pk.double] = pk.View([N, K, D], pk.double) self.C: pk.View3D[pk.double] = pk.View([N, K, D], pk.double) self.A.fill(1.5) self.B.fill(2.5) self.C.fill(3.5)
def __init__(self, iterations, length, offset): self.iterations: int = iterations self.length: int = length self.offset: int = offset self.A: pk.View1D[pk.double] = pk.View([length], pk.double) self.B: pk.View1D[pk.double] = pk.View([length], pk.double) self.C: pk.View1D[pk.double] = pk.View([length], pk.double) self.scalar: float = 3 self.asum: float = 0 self.nstream_time: float = 0
def __init__(self, s: System, comm_depth: float): super().__init__(s, comm_depth) # copied from System self.domain_x: float = 0.0 self.domain_y: float = 0.0 self.domain_z: float = 0.0 self.sub_domain_x: float = 0.0 self.sub_domain_y: float = 0.0 self.sub_domain_z: float = 0.0 self.sub_domain_hi_x: float = 0.0 self.sub_domain_hi_y: float = 0.0 self.sub_domain_hi_z: float = 0.0 self.sub_domain_lo_x: float = 0.0 self.sub_domain_lo_y: float = 0.0 self.sub_domain_lo_z: float = 0.0 self.x: pk.View2D[pk.double] = t_x(0, 3) self.v: pk.View2D[pk.double] = t_v(0, 3) self.f: pk.View2D[pk.double] = t_f(0, 3) self.id: pk.View1D[pk.int32] = t_id(0) self.type: pk.View1D[pk.int32] = t_type(0) self.q: pk.View1D[pk.double] = t_q(0) self.mass: pk.View1D[pk.double] = t_mass(0) # copied from Comm self.comm_depth: float = comm_depth print("CommSerial") self.pack_count: pk.View1D[pk.int32] = pk.View([1], pk.int32) self.pack_indicies_all: pk.View2D[pk.int32] = pk.View( [6, 0], pk.int32, layout=pk.Layout.LayoutRight) self.num_ghost: List[int] = [0] * 6 self.ghost_offsets: List[int] = [0] * 6 self.phase: int = 0 # Assign self.workunit_id: int = 0 # Needed for translation to succeed self.N_local: int = 0 self.nparticles: int = 0 self.update_threads: int = 0 self.force_threads: int = 0 self.N_ghost: int = 0 self.pack_indicies: pk.View1D[pk.int32] = self.pack_indicies_all[0, :] self.ghost_offsets: pk.View1D[pk.int32] = pk.View([6], pk.int32)
def __init__(self, N: int, M: int, fill: bool): self.N: int = N self.M: int = M self.y: pk.View1D[pk.double] = pk.View([N], pk.double, space=pk.MemorySpace.CudaSpace) self.x: pk.View1D[pk.double] = pk.View([M], pk.double, space=pk.MemorySpace.CudaSpace) self.A: pk.View2D[pk.double] = pk.View([N, M], pk.double, space=pk.MemorySpace.CudaSpace)
def __init__(self, threads: int, i_1: int, i_2: int, b_1: bool, b_2: bool): self.threads: int = threads self.i_1: int = i_1 self.i_2: int = i_2 self.b_1: bool = b_1 self.b_2: bool = b_2 self.view1D: pk.View1D[pk.int32] = pk.View([threads], pk.int32) self.view2D: pk.View2D[pk.int32] = pk.View([threads, threads], pk.int32) self.view3D: pk.View3D[pk.int32] = pk.View([threads, threads, threads], pk.int32)
def run() -> None: values: Tuple[int, int, int, int, int, bool] = parse_args() N: int = values[0] M: int = values[1] fill: bool = values[-1] nrepeat: int = 100 print(f"Total size S = {N * M} N = {N} M = {M}") pk.set_default_space(pk.ExecutionSpace.Cuda) y: pk.View1D = pk.View([N], pk.double) x: pk.View1D = pk.View([M], pk.double) A: pk.View2D = pk.View([N, M], pk.double) p = pk.RangePolicy(pk.get_default_space(), 0, N) pk.parallel_for(p, y_init, y=y) pk.parallel_for(pk.RangePolicy(pk.get_default_space(), 0, M), y_init, y=x) pk.parallel_for(p, matrix_init, M=M, A=A) # if fill: # y.fill(1) # x.fill(1) # A.fill(1) # else: # for i in range(N): # y[i] = 1 # for i in range(M): # x[i] = 1 # for j in range(N): # for i in range(M): # A[j][i] = 1 timer = pk.Timer() for i in range(nrepeat): result = pk.parallel_reduce(p, yAx, M=M, y=y, x=x, A=A) timer_result = timer.seconds() print(f"Computed result for {N} x {M} is {result}") solution: float = N * M if result != solution: pk.printf("Error: result (%lf) != solution (%lf)\n", result, solution) print( f"N({N}) M({M}) nrepeat({nrepeat}) problem(MB) time({timer_result}) bandwidth(GB/s)" )
def __init__(self, iterations, order, tile_size, permute): self.iterations: int = iterations self.order: int = order self.tile_size: int = tile_size self.permute: int = permute self.A: pk.View2D[pk.double] = pk.View([self.order, self.order], pk.double, layout=pk.LayoutRight) self.B: pk.View2D[pk.double] = pk.View([self.order, self.order], pk.double, layout=pk.LayoutRight) self.abserr: float = 0 self.transpose_time: float = 0 self.addit: float = (self.iterations) * (0.5 * (self.iterations - 1))
def __init__(self, n): self.N: int = n self.sum: int = 0 self.a: pk.View2D[pk.int32] = pk.View([n, 3], pk.int32) for i in range(n): for j in range(3): self.a[i][j] = i * n + j
def __init__(self, iterations, n, tile_size, star, radius): self.iterations: int = iterations self.n: int = n self.tile_size: int = tile_size self.star: int = star self.radius: int = radius self.inp: pk.View2D[pk.double] = pk.View([self.n, self.n], pk.double, layout=pk.Layout.LayoutRight) self.out: pk.View2D[pk.double] = pk.View([self.n, self.n], pk.double, layout=pk.Layout.LayoutRight) self.norm: float = 0 self.stencil_time: float = 0
def __init__(self, n): self.N: int = n self.a: pk.View1D[pk.int32] = pk.View([n], pk.int32) for i in range(self.N): self.a[i] = 2 print(f"Initialized view: [{self.a[0]}, ... repeats {n-1} times]")
def run() -> None: values: Tuple[int, int, int, int, int, bool] = parse_args() N: int = values[0] M: int = values[1] E: int = values[3] fill: bool = values[-1] nrepeat: int = 1000 print(f"Total size S = {N * M} N = {N} M = {M} E = {E}") y: pk.View2D = pk.View([E, N], pk.double, layout=pk.Layout.LayoutRight) x: pk.View2D = pk.View([E, M], pk.double, layout=pk.Layout.LayoutRight) A: pk.View3D = pk.View([E, N, M], pk.double, layout=pk.Layout.LayoutRight) if fill: y.fill(1) x.fill(1) A.fill(1) else: for e in range(E): for i in range(N): y[e][i] = 1 for i in range(M): x[e][i] = 1 for j in range(N): for i in range(M): A[e][j][i] = 1 p = pk.TeamPolicy(E, "auto", 32, pk.get_default_space()) timer = pk.Timer() for i in range(nrepeat): result = pk.parallel_reduce(p, yAx, N=N, M=M, y=y, x=x, A=A) timer_result = timer.seconds() print( f"Computed result for {N} x {M} x {E} is {result}") solution: float = N * M * E if result != solution: pk.printf("Error: result (%lf) != solution (%lf)\n", result, solution) print(f"N({N}) M({M}) E({E}) nrepeat({nrepeat}) problem(MB) time({timer_result}) bandwidth(GB/s)")
def __init__(self, n): self.N: int = n self.a: pk.View1D[pk.int32] = pk.View([n], pk.int32) for i in range(self.N): self.a[i] = math.sqrt(math.tau) print("Initialized view:", self.a)
def __init__(self, threads: int, i_1: int, i_2: int, f_1: float, f_2: float, b_1: bool): self.threads: int = threads self.i_1: int = i_1 self.i_2: int = i_2 self.f_1: float = f_1 self.f_2: float = f_2 self.b_1: bool = b_1 self.view1D: pk.View1D[pk.int32] = pk.View([threads], pk.int32) self.view2D: pk.View2D[pk.int32] = pk.View([threads, threads], pk.int32) self.view3D: pk.View3D[pk.int32] = pk.View([threads, threads, threads], pk.int32) self.subview_1: pk.View1D[pk.int32] = self.view1D[threads // 2:] self.subview_2: pk.View2D[pk.int32] = self.view2D[threads // 2:, :]
def __init__(self, ARRAY_SIZE: int, initA: float, initB: float, initC: float, scalar: float, num_times: int): self.array_size: int = ARRAY_SIZE self.a: pk.View1D[pk.double] = pk.View([ARRAY_SIZE], pk.double) self.b: pk.View1D[pk.double] = pk.View([ARRAY_SIZE], pk.double) self.c: pk.View1D[pk.double] = pk.View([ARRAY_SIZE], pk.double) self.initA: pk.double = initA self.initB: pk.double = initB self.initC: pk.double = initC self.scalar: pk.double = scalar self.num_times: int = num_times self.sum: pk.double = 0 self.runtime: float = 0 self.runtimes: pk.View2D[pk.double] = pk.View([5, num_times], pk.double)
def __init__(self, n): self.N: int = n self.total: int = 0 self.a: pk.View1D[pk.int32] = pk.View([n], pk.int32) for i in range(self.N): self.a[i] = random.randint(0, 10) print("Initialized view:", self.a)
def __init__(self, threads: int, i_1: int, i_2: int, i_3: int, i_4: int): self.threads: int = threads self.i_1: int = i_1 self.i_2: int = i_2 self.i_3: int = i_3 self.i_4: int = i_4 self.view1D: pk.View1D[pk.int32] = pk.View([i_1], pk.int32) self.view2D: pk.View2D[pk.int32] = pk.View([i_1, i_2], pk.int32) self.view3D: pk.View3D[pk.int32] = pk.View([i_1, i_2, i_3], pk.int32) self.myView1D: pk.View1D[int] = MyView1D(i_1, int) self.myView2D: pk.View2D[int] = MyView2D(i_1, i_2, int) self.myView3D: pk.View3D[int] = MyView3D(i_1, i_2, i_3, int) # Views needed for subviews self.altView1D: pk.View1D[pk.int32] = pk.View([i_1], pk.int32) self.altView2D: pk.View2D[pk.int32] = pk.View([i_1, i_2], pk.int32) self.altView3D: pk.View3D[pk.int32] = pk.View([i_1, i_2, i_3], pk.int32) for i in range(i_1): self.view1D[i] = i_4 self.myView1D[i] = i_4 self.altView1D[i] = i_4 for j in range(i_2): self.view2D[i][j] = i_4 self.myView2D[i][j] = i_4 self.altView2D[i][j] = i_4 for k in range(i_3): self.view3D[i][j][k] = i_4 self.myView3D[i][j][k] = i_4 self.altView3D[i][j][k] = i_4 self.dynamicView1D: pk.View1D[pk.int32] = pk.View([i_1], pk.int32) self.dynamicView1D.resize(0, i_2) for i in range(i_2): self.dynamicView1D[i] = i_4 self.dynamicView2D: pk.View2D[pk.int32] = pk.View([i_1, i_2], pk.int32) self.dynamicView2D.resize(0, i_2) self.dynamicView2D.resize(1, i_1) for i in range(i_2): for j in range(i_1): self.dynamicView2D[i, j] = i_4 self.subview1D: pk.View1D[pk.int32] = self.altView1D[:] self.subview2D: pk.View2D[pk.int32] = self.altView2D[:, :i_2 // 2] self.subview3D: pk.View3D[pk.int32] = self.altView3D[:, :i_2 // 2, i_3 // 2:i_3]
def __init__(self, indices: int, data: int, repeats: int, use_atomics: bool): self.dataCount: int = data self.indicesCount: int = indices self.repeats: int = repeats self.use_atomics: bool = use_atomics print("Reports fastest timing per kernel") print("Creating Views...") print("Memory Sizes:") print(f"- Elements: {data} ({1e-6*data*8} MB)") print(f"- Indices: {indices} ({1e-6*indices*8} MB)") print(f"- Atomics: {'yes' if use_atomics else 'no'}") print(f"Benchmark kernels will be performed for {repeats} iterations") self.indices: pk.View1D[pk.int64] = pk.View([indices], pk.int64) self.data: pk.View1D[pk.int64] = pk.View([data], pk.int64) self.datum: pk.int64 = -1 self.gupsTime: float = 0
def __init__(self, N: int, K: int, R: int, D: int, F: int, T: int, S: int): self.N: int = N self.K: int = K self.R: int = R self.D: int = D self.F: int = F self.T: int = T self.S: int = S self.UNROLL: int = 8 self.scalar_size: int = 8 self.A: pk.View3D[pk.double] = pk.View([N, K, D], pk.double) self.B: pk.View3D[pk.double] = pk.View([N, K, D], pk.double) self.C: pk.View3D[pk.double] = pk.View([N, K, D], pk.double) self.A.fill(1.5) self.B.fill(2.5) self.C.fill(3.5) self.seconds: float = 0
def __init__(self, N: int, K: int, D: int, R: int, F: int): self.K: int = K self.F: int = F self.connectivity: pk.View2D[int] = pk.View([N, K], int) self.A: pk.View1D[pk.double] = pk.View([N], pk.double) self.B: pk.View1D[pk.double] = pk.View([N], pk.double) self.C: pk.View1D[pk.double] = pk.View([N], pk.double) # self.A: pk.View1D[pk.double] = pk.View([N], pk.double, trait=pk.Trait.RandomAccess) # self.B: pk.View1D[pk.double] = pk.View([N], pk.double, trait=pk.Trait.RandomAccess) # self.C: pk.View1D[pk.double] = pk.View([N], pk.double, trait=pk.Trait.RandomAccess) self.A.fill(1.5) self.B.fill(2.0) #TODO use kokkos to init in parallel random.seed(12313) for i in range(N): for jj in range(K): self.connectivity[i][jj] = (random.randrange(D) + i - D / 2 + N) % N
def __init__(self, r, c): self.r: int = r self.c: int = c self.total: int = 0 self.mat: pk.View2D[pk.int32] = pk.View([r, c], pk.int32) for i in range(r): self.mat[i] = list(range(c * i, c * (i + 1))) for row in self.mat: print(row) print(f"Initialized {r}x{c} array")