Ejemplo n.º 1
0
def run() -> None:
    values: Tuple[int, int, int, int, int, bool] = parse_args()
    N: int = values[0]
    M: int = values[1]
    E: int = values[3]
    fill: bool = values[-1]
    nrepeat: int = 1000
    print(f"Total size S = {N * M} N = {N} M = {M} E = {E}")

    w = Workload(N, M, E, fill)
    p = pk.TeamPolicy(E, "auto", 32, pk.get_default_space())

    timer = pk.Timer()

    for i in range(nrepeat):
        result = pk.parallel_reduce(p, w.yAx)

    timer_result = timer.seconds()

    print(f"Computed result for {N} x {M} x {E} is {result}")
    solution: float = N * M * E

    if result != solution:
        pk.printf("Error: result (%lf) != solution (%lf)\n", result, solution)

    print(
        f"N({N}) M({M}) E({E}) nrepeat({nrepeat}) problem(MB) time({timer_result}) bandwidth(GB/s)"
    )
Ejemplo n.º 2
0
def run() -> None:
    values: Tuple[int, int, int, int, int, bool] = parse_args()
    N: int = values[0]
    M: int = values[1]
    nrepeat: int = 100
    print(f"Total size S = {N * M} N = {N} M = {M}")

    p = pk.RangePolicy(pk.get_default_space(), 0, N)
    w = Workload(N, M)
    pk.parallel_for(p, w.y_init)
    pk.parallel_for(pk.RangePolicy(pk.get_default_space(), 0, M), w.x_init)
    pk.parallel_for(p, w.matrix_init)

    timer = pk.Timer()

    for i in range(nrepeat):
        result = pk.parallel_reduce(p, w.yAx)

    timer_result = timer.seconds()

    print(f"Computed result for {N} x {M} is {result}")
    solution = N * M

    if result != solution:
        pk.printf("Error: result (%lf) != solution (%lf)\n", result, solution)

    print(f"N({N}) M({M}) nrepeat({nrepeat}) problem(MB) time({timer_result}) bandwidth(GB/s)")
Ejemplo n.º 3
0
def run() -> None:
    values: Tuple[int, int, int, int, int, bool] = parse_args()
    N: int = values[0]
    M: int = values[1]
    nrepeat: int = 1 
    print(f"Total size S = {N * M} N = {N} M = {M}")

    y = pk.View([N], pk.double)
    x = pk.View([M], pk.double)
    A = pk.View([N * M], pk.double)

    p = pk.RangePolicy(pk.get_default_space(), 0, N)
    pk.parallel_for(p, y_init, y=y)
    pk.parallel_for(pk.RangePolicy(pk.get_default_space(), 0, M), y_init, y=x)
    pk.parallel_for(p, matrix_init, M=M, A=A)

    timer = pk.Timer()

    for i in range(nrepeat):
        result = pk.parallel_reduce(p, yAx, M=M, y=y, x=x, A=A)

    timer_result = timer.seconds()

    print(f"Computed result for {N} x {M} is {result}")
    solution = N * M

    if result != solution:
        pk.printf("Error: result (%lf) != solution (%lf)\n", result, solution)

    print(f"N({N}) M({M}) nrepeat({nrepeat}) problem(MB) time({timer_result}) bandwidth(GB/s)")
Ejemplo n.º 4
0
    def results(self):
        print(f"Computed result for {self.N} x {self.M} is {self.result}")
        solution: float = self.N * self.M

        if self.result != solution:
            pk.printf("Error: result (%lf) != solution (%lf)\n",
                      self.result, solution)

        print(f"N({self.N}) M({self.M}) nrepeat({self.nrepeat}) problem(MB) time({self.timer_result}) bandwidth(GB/s)")
Ejemplo n.º 5
0
    def run(self):
        t: int = tile_size
        r: int = radius

        pk.parallel_for(pk.MDRangePolicy([0, 0], [n, n], [t, t]), self.init)
        pk.fence()

        timer = pk.Timer()

        for i in range(iterations):
            if (i == 1):
                pk.fence()

            if r == 1:
                # star1 stencil
                pk.parallel_for(
                    "stencil", pk.MDRangePolicy([r, r], [n - r, n - r],
                                                [t, t]), self.star1)
            elif r == 2:
                # star2 stencil
                pk.parallel_for(
                    "stencil", pk.MDRangePolicy([r, r], [n - r, n - r],
                                                [t, t]), self.star2)
            else:
                # star3 stencil
                pk.parallel_for(
                    "stencil", pk.MDRangePolicy([r, r], [n - r, n - r],
                                                [t, t]), self.star3)

            pk.parallel_for(pk.MDRangePolicy([0, 0], [n, n], [t, t]),
                            self.increment)

        pk.fence()
        self.stencil_time = timer.seconds()

        active_points: int = (n - 2 * r) * (n - 2 * r)

        # verify correctness
        self.norm = pk.parallel_reduce(
            pk.MDRangePolicy([r, r], [n - r, n - r], [t, t]), self.norm_reduce)
        pk.fence()
        self.norm /= active_points

        episilon: float = 1.0e-8
        reference_norm: float = 2 * (iterations)
        if (abs(self.norm - reference_norm) > episilon):
            pk.printf("ERROR: L1 norm != Reference norm err=%.2f\n",
                      abs(self.norm - reference_norm))
        else:
            pk.printf("Solution validates\n")
Ejemplo n.º 6
0
def run() -> None:
    values: Tuple[int, int, int, int, int, bool] = parse_args()
    N: int = values[0]
    M: int = values[1]
    fill: bool = values[-1]
    nrepeat: int = 100
    print(f"Total size S = {N * M} N = {N} M = {M}")

    pk.set_default_space(pk.ExecutionSpace.Cuda)

    y: pk.View1D = pk.View([N], pk.double)
    x: pk.View1D = pk.View([M], pk.double)
    A: pk.View2D = pk.View([N, M], pk.double)

    p = pk.RangePolicy(pk.get_default_space(), 0, N)
    pk.parallel_for(p, y_init, y=y)
    pk.parallel_for(pk.RangePolicy(pk.get_default_space(), 0, M), y_init, y=x)
    pk.parallel_for(p, matrix_init, M=M, A=A)

    # if fill:
    #     y.fill(1)
    #     x.fill(1)
    #     A.fill(1)
    # else:
    #     for i in range(N):
    #         y[i] = 1

    #     for i in range(M):
    #         x[i] = 1

    #     for j in range(N):
    #         for i in range(M):
    #             A[j][i] = 1

    timer = pk.Timer()

    for i in range(nrepeat):
        result = pk.parallel_reduce(p, yAx, M=M, y=y, x=x, A=A)

    timer_result = timer.seconds()

    print(f"Computed result for {N} x {M} is {result}")
    solution: float = N * M

    if result != solution:
        pk.printf("Error: result (%lf) != solution (%lf)\n", result, solution)

    print(
        f"N({N}) M({M}) nrepeat({nrepeat}) problem(MB) time({timer_result}) bandwidth(GB/s)"
    )
Ejemplo n.º 7
0
def run() -> None:
    values: Tuple[int, int, int, int, int, bool] = parse_args()
    N: int = values[0]
    M: int = values[1]
    E: int = values[3]
    fill: bool = values[-1]
    nrepeat: int = 1000
    print(f"Total size S = {N * M} N = {N} M = {M} E = {E}")

    y: pk.View2D = pk.View([E, N], pk.double, layout=pk.Layout.LayoutRight)
    x: pk.View2D = pk.View([E, M], pk.double, layout=pk.Layout.LayoutRight)
    A: pk.View3D = pk.View([E, N, M], pk.double, layout=pk.Layout.LayoutRight)

    if fill:
        y.fill(1)
        x.fill(1)
        A.fill(1)
    else:
        for e in range(E):
            for i in range(N):
                y[e][i] = 1

            for i in range(M):
                x[e][i] = 1

            for j in range(N):
                for i in range(M):
                    A[e][j][i] = 1

    p = pk.TeamPolicy(E, "auto", 32, pk.get_default_space())

    timer = pk.Timer()

    for i in range(nrepeat):
        result = pk.parallel_reduce(p, yAx, N=N, M=M, y=y, x=x, A=A)

    timer_result = timer.seconds()

    print(
        f"Computed result for {N} x {M} x {E} is {result}")
    solution: float = N * M * E

    if result != solution:
        pk.printf("Error: result (%lf) != solution (%lf)\n",
                  result, solution)

    print(f"N({N}) M({M}) E({E}) nrepeat({nrepeat}) problem(MB) time({timer_result}) bandwidth(GB/s)")
Ejemplo n.º 8
0
    def run(self):
        pk.parallel_for(self.length, self.init)
        # pk.parallel_for(self.length, lambda i: 0, self.A)
        # pk.parallel_for(self.length, lambda i: 2, self.B)
        # pk.parallel_for(self.length, lambda i: 2, self.C)
        pk.fence()

        timer = pk.Timer()

        for i in range(self.iterations):
            pk.parallel_for("nstream", self.length, self.nstream)

        pk.fence()
        self.nstream_time = timer.seconds()

        # verify correctness
        ar: float = 0
        br: float = 2
        cr: float = 2
        for i in range(self.iterations):
            ar += br + self.scalar * cr

        ar *= self.length

        self.asum = pk.parallel_reduce(self.length,
                                       lambda i, acc: acc + abs(self.A[i]))
        pk.fence()

        episilon: float = 1.0e-8
        if (abs(ar - self.asum) / self.asum > episilon):
            pk.printf("ERROR: Failed Valication on output array\n")
        else:
            avgtime: float = self.nstream_time / self.iterations
            nbytes: float = 4.0 * self.length * 4
            pk.printf("Solution validates\n")
            pk.printf("Rate (MB/s): %.2f\n", 1.e-6 * nbytes / avgtime)
            pk.printf("Avg time (ms): %f\n", avgtime / 1.e-3)
Ejemplo n.º 9
0
    def run(self):
        pk.parallel_for(
            pk.MDRangePolicy([0, 0], [self.order, self.order],
                             [self.tile_size, self.tile_size]), self.init)
        pk.fence()

        timer = pk.Timer()

        for i in range(self.iterations):
            if self.permute:
                pk.parallel_for(
                    "transpose",
                    pk.MDRangePolicy([0, 0], [self.order, self.order],
                                     [self.tile_size, self.tile_size],
                                     rank=pk.Rank(2, pk.Iterate.Left,
                                                  pk.Iterate.Right)),
                    self.tranpose)
            else:
                pk.parallel_for(
                    "transpose",
                    pk.MDRangePolicy([0, 0], [self.order, self.order],
                                     [self.tile_size, self.tile_size],
                                     rank=pk.Rank(2, pk.Iterate.Right,
                                                  pk.Iterate.Left)),
                    self.tranpose)

        self.transpose_time = timer.seconds()

        self.abserr = pk.parallel_reduce(
            pk.MDRangePolicy([0, 0], [self.order, self.order],
                             [self.tile_size, self.tile_size]),
            self.abserr_reduce)

        pk.printf("%f\n", self.abserr)
        episilon: float = 1.0e-8
        if (self.abserr > episilon):
            pk.printf(
                "ERROR: aggregated squared error exceeds threshold %.2f\n",
                self.abserr)
        else:
            pk.printf("Solution validates %2.f\n", self.abserr)
Ejemplo n.º 10
0
 def my_calculation(self, i: int):
     pk.printf("Running index %d\n", i)
     self.a[i] += (math.cos(self.a[i]) + 2**i -
                   math.pi / math.fabs(self.a[(i + 1) % self.N]))
Ejemplo n.º 11
0
 def run(self):
     pk.parallel_for(self.N, lambda i: pk.printf("Hello from i = %i\n", i))
Ejemplo n.º 12
0
 def work(self, tid: int) -> None:
     pk.printf("%d\n", tid)
Ejemplo n.º 13
0
 def hello(self, i: int):
     pk.printf("Hello from i = %d\n", i)
Ejemplo n.º 14
0
 def call(self, tid: int, acc: pk.Acc[pk.double]) -> None:
     pk.printf("Testing printf: %d\n", self.i_1)
     acc += abs(-self.i_1)
Ejemplo n.º 15
0
    nrepeat: int = 100

    space: str = values[-2]
    if space == "":
        space = pk.ExecutionSpace.OpenMP
    else:
        space = pk.ExecutionSpace(space)

    pk.set_default_space(space)

    print(f"Total size S = {N * M} N = {N} M = {M}")
    w = Workload(N, M, nrepeat, fill)
    p = pk.RangePolicy(pk.get_default_space(), 0, N)

    timer = pk.Timer()

    for i in range(nrepeat):
        result = pk.parallel_reduce(p, w.yAx)

    timer_result = timer.seconds()

    print(f"Computed result for {N} x {M} is {result}")
    solution: float = N * M

    if result != solution:
        pk.printf("Error: result (%lf) != solution (%lf)\n",
                  result, solution)

    print(f"N({N}) M({M}) nrepeat({nrepeat}) problem(MB) time({timer_result}) bandwidth(GB/s)")