def run() -> None: values: Tuple[int, int, int, int, int, bool] = parse_args() N: int = values[0] M: int = values[1] E: int = values[3] fill: bool = values[-1] nrepeat: int = 1000 print(f"Total size S = {N * M} N = {N} M = {M} E = {E}") w = Workload(N, M, E, fill) p = pk.TeamPolicy(E, "auto", 32, pk.get_default_space()) timer = pk.Timer() for i in range(nrepeat): result = pk.parallel_reduce(p, w.yAx) timer_result = timer.seconds() print(f"Computed result for {N} x {M} x {E} is {result}") solution: float = N * M * E if result != solution: pk.printf("Error: result (%lf) != solution (%lf)\n", result, solution) print( f"N({N}) M({M}) E({E}) nrepeat({nrepeat}) problem(MB) time({timer_result}) bandwidth(GB/s)" )
def run() -> None: values: Tuple[int, int, int, int, int, bool] = parse_args() N: int = values[0] M: int = values[1] nrepeat: int = 100 print(f"Total size S = {N * M} N = {N} M = {M}") p = pk.RangePolicy(pk.get_default_space(), 0, N) w = Workload(N, M) pk.parallel_for(p, w.y_init) pk.parallel_for(pk.RangePolicy(pk.get_default_space(), 0, M), w.x_init) pk.parallel_for(p, w.matrix_init) timer = pk.Timer() for i in range(nrepeat): result = pk.parallel_reduce(p, w.yAx) timer_result = timer.seconds() print(f"Computed result for {N} x {M} is {result}") solution = N * M if result != solution: pk.printf("Error: result (%lf) != solution (%lf)\n", result, solution) print(f"N({N}) M({M}) nrepeat({nrepeat}) problem(MB) time({timer_result}) bandwidth(GB/s)")
def run() -> None: values: Tuple[int, int, int, int, int, bool] = parse_args() N: int = values[0] M: int = values[1] nrepeat: int = 1 print(f"Total size S = {N * M} N = {N} M = {M}") y = pk.View([N], pk.double) x = pk.View([M], pk.double) A = pk.View([N * M], pk.double) p = pk.RangePolicy(pk.get_default_space(), 0, N) pk.parallel_for(p, y_init, y=y) pk.parallel_for(pk.RangePolicy(pk.get_default_space(), 0, M), y_init, y=x) pk.parallel_for(p, matrix_init, M=M, A=A) timer = pk.Timer() for i in range(nrepeat): result = pk.parallel_reduce(p, yAx, M=M, y=y, x=x, A=A) timer_result = timer.seconds() print(f"Computed result for {N} x {M} is {result}") solution = N * M if result != solution: pk.printf("Error: result (%lf) != solution (%lf)\n", result, solution) print(f"N({N}) M({M}) nrepeat({nrepeat}) problem(MB) time({timer_result}) bandwidth(GB/s)")
def results(self): print(f"Computed result for {self.N} x {self.M} is {self.result}") solution: float = self.N * self.M if self.result != solution: pk.printf("Error: result (%lf) != solution (%lf)\n", self.result, solution) print(f"N({self.N}) M({self.M}) nrepeat({self.nrepeat}) problem(MB) time({self.timer_result}) bandwidth(GB/s)")
def run(self): t: int = tile_size r: int = radius pk.parallel_for(pk.MDRangePolicy([0, 0], [n, n], [t, t]), self.init) pk.fence() timer = pk.Timer() for i in range(iterations): if (i == 1): pk.fence() if r == 1: # star1 stencil pk.parallel_for( "stencil", pk.MDRangePolicy([r, r], [n - r, n - r], [t, t]), self.star1) elif r == 2: # star2 stencil pk.parallel_for( "stencil", pk.MDRangePolicy([r, r], [n - r, n - r], [t, t]), self.star2) else: # star3 stencil pk.parallel_for( "stencil", pk.MDRangePolicy([r, r], [n - r, n - r], [t, t]), self.star3) pk.parallel_for(pk.MDRangePolicy([0, 0], [n, n], [t, t]), self.increment) pk.fence() self.stencil_time = timer.seconds() active_points: int = (n - 2 * r) * (n - 2 * r) # verify correctness self.norm = pk.parallel_reduce( pk.MDRangePolicy([r, r], [n - r, n - r], [t, t]), self.norm_reduce) pk.fence() self.norm /= active_points episilon: float = 1.0e-8 reference_norm: float = 2 * (iterations) if (abs(self.norm - reference_norm) > episilon): pk.printf("ERROR: L1 norm != Reference norm err=%.2f\n", abs(self.norm - reference_norm)) else: pk.printf("Solution validates\n")
def run() -> None: values: Tuple[int, int, int, int, int, bool] = parse_args() N: int = values[0] M: int = values[1] fill: bool = values[-1] nrepeat: int = 100 print(f"Total size S = {N * M} N = {N} M = {M}") pk.set_default_space(pk.ExecutionSpace.Cuda) y: pk.View1D = pk.View([N], pk.double) x: pk.View1D = pk.View([M], pk.double) A: pk.View2D = pk.View([N, M], pk.double) p = pk.RangePolicy(pk.get_default_space(), 0, N) pk.parallel_for(p, y_init, y=y) pk.parallel_for(pk.RangePolicy(pk.get_default_space(), 0, M), y_init, y=x) pk.parallel_for(p, matrix_init, M=M, A=A) # if fill: # y.fill(1) # x.fill(1) # A.fill(1) # else: # for i in range(N): # y[i] = 1 # for i in range(M): # x[i] = 1 # for j in range(N): # for i in range(M): # A[j][i] = 1 timer = pk.Timer() for i in range(nrepeat): result = pk.parallel_reduce(p, yAx, M=M, y=y, x=x, A=A) timer_result = timer.seconds() print(f"Computed result for {N} x {M} is {result}") solution: float = N * M if result != solution: pk.printf("Error: result (%lf) != solution (%lf)\n", result, solution) print( f"N({N}) M({M}) nrepeat({nrepeat}) problem(MB) time({timer_result}) bandwidth(GB/s)" )
def run() -> None: values: Tuple[int, int, int, int, int, bool] = parse_args() N: int = values[0] M: int = values[1] E: int = values[3] fill: bool = values[-1] nrepeat: int = 1000 print(f"Total size S = {N * M} N = {N} M = {M} E = {E}") y: pk.View2D = pk.View([E, N], pk.double, layout=pk.Layout.LayoutRight) x: pk.View2D = pk.View([E, M], pk.double, layout=pk.Layout.LayoutRight) A: pk.View3D = pk.View([E, N, M], pk.double, layout=pk.Layout.LayoutRight) if fill: y.fill(1) x.fill(1) A.fill(1) else: for e in range(E): for i in range(N): y[e][i] = 1 for i in range(M): x[e][i] = 1 for j in range(N): for i in range(M): A[e][j][i] = 1 p = pk.TeamPolicy(E, "auto", 32, pk.get_default_space()) timer = pk.Timer() for i in range(nrepeat): result = pk.parallel_reduce(p, yAx, N=N, M=M, y=y, x=x, A=A) timer_result = timer.seconds() print( f"Computed result for {N} x {M} x {E} is {result}") solution: float = N * M * E if result != solution: pk.printf("Error: result (%lf) != solution (%lf)\n", result, solution) print(f"N({N}) M({M}) E({E}) nrepeat({nrepeat}) problem(MB) time({timer_result}) bandwidth(GB/s)")
def run(self): pk.parallel_for(self.length, self.init) # pk.parallel_for(self.length, lambda i: 0, self.A) # pk.parallel_for(self.length, lambda i: 2, self.B) # pk.parallel_for(self.length, lambda i: 2, self.C) pk.fence() timer = pk.Timer() for i in range(self.iterations): pk.parallel_for("nstream", self.length, self.nstream) pk.fence() self.nstream_time = timer.seconds() # verify correctness ar: float = 0 br: float = 2 cr: float = 2 for i in range(self.iterations): ar += br + self.scalar * cr ar *= self.length self.asum = pk.parallel_reduce(self.length, lambda i, acc: acc + abs(self.A[i])) pk.fence() episilon: float = 1.0e-8 if (abs(ar - self.asum) / self.asum > episilon): pk.printf("ERROR: Failed Valication on output array\n") else: avgtime: float = self.nstream_time / self.iterations nbytes: float = 4.0 * self.length * 4 pk.printf("Solution validates\n") pk.printf("Rate (MB/s): %.2f\n", 1.e-6 * nbytes / avgtime) pk.printf("Avg time (ms): %f\n", avgtime / 1.e-3)
def run(self): pk.parallel_for( pk.MDRangePolicy([0, 0], [self.order, self.order], [self.tile_size, self.tile_size]), self.init) pk.fence() timer = pk.Timer() for i in range(self.iterations): if self.permute: pk.parallel_for( "transpose", pk.MDRangePolicy([0, 0], [self.order, self.order], [self.tile_size, self.tile_size], rank=pk.Rank(2, pk.Iterate.Left, pk.Iterate.Right)), self.tranpose) else: pk.parallel_for( "transpose", pk.MDRangePolicy([0, 0], [self.order, self.order], [self.tile_size, self.tile_size], rank=pk.Rank(2, pk.Iterate.Right, pk.Iterate.Left)), self.tranpose) self.transpose_time = timer.seconds() self.abserr = pk.parallel_reduce( pk.MDRangePolicy([0, 0], [self.order, self.order], [self.tile_size, self.tile_size]), self.abserr_reduce) pk.printf("%f\n", self.abserr) episilon: float = 1.0e-8 if (self.abserr > episilon): pk.printf( "ERROR: aggregated squared error exceeds threshold %.2f\n", self.abserr) else: pk.printf("Solution validates %2.f\n", self.abserr)
def my_calculation(self, i: int): pk.printf("Running index %d\n", i) self.a[i] += (math.cos(self.a[i]) + 2**i - math.pi / math.fabs(self.a[(i + 1) % self.N]))
def run(self): pk.parallel_for(self.N, lambda i: pk.printf("Hello from i = %i\n", i))
def work(self, tid: int) -> None: pk.printf("%d\n", tid)
def hello(self, i: int): pk.printf("Hello from i = %d\n", i)
def call(self, tid: int, acc: pk.Acc[pk.double]) -> None: pk.printf("Testing printf: %d\n", self.i_1) acc += abs(-self.i_1)
nrepeat: int = 100 space: str = values[-2] if space == "": space = pk.ExecutionSpace.OpenMP else: space = pk.ExecutionSpace(space) pk.set_default_space(space) print(f"Total size S = {N * M} N = {N} M = {M}") w = Workload(N, M, nrepeat, fill) p = pk.RangePolicy(pk.get_default_space(), 0, N) timer = pk.Timer() for i in range(nrepeat): result = pk.parallel_reduce(p, w.yAx) timer_result = timer.seconds() print(f"Computed result for {N} x {M} is {result}") solution: float = N * M if result != solution: pk.printf("Error: result (%lf) != solution (%lf)\n", result, solution) print(f"N({N}) M({M}) nrepeat({nrepeat}) problem(MB) time({timer_result}) bandwidth(GB/s)")