コード例 #1
0
def test_diff(ctx_factory):
    ctx = ctx_factory()
    queue = cl.CommandQueue(ctx)

    knl = lp.make_kernel(
         """{ [i,j]: 0<=i,j<n }""",
         """
         <> a = 1/(1+sinh(x[i] + y[j])**2)
         z[i] = sum(j, exp(a * x[j]))
         """, name="diff")

    knl = lp.fix_parameters(knl, n=50)

    from loopy.transform.diff import diff_kernel
    #FIXME Is this the correct interface. Does it make sense to take the entire
    #translation unit?
    dknl, diff_map = diff_kernel(knl["diff"], "z", "x")
    dknl = knl.with_kernel(dknl)
    dknl = lp.remove_unused_arguments(dknl)

    dknl = lp.add_inames_to_insn(dknl, "diff_i0", "writes:a_dx or writes:a")

    print(dknl)

    n = 50
    x = np.random.randn(n)
    y = np.random.randn(n)

    dx = np.random.randn(n)

    fac = 1e-1
    h1 = 1e-4
    h2 = h1 * fac

    evt, (z0,) = knl(queue, x=x, y=y)
    evt, (z1,) = knl(queue, x=(x + h1*dx), y=y)
    evt, (z2,) = knl(queue, x=(x + h2*dx), y=y)

    dknl = lp.set_options(dknl, write_cl=True)
    evt, (df,) = dknl(queue, x=x, y=y)

    diff1 = (z1-z0)
    diff2 = (z2-z0)

    diff1_predicted = df.dot(h1*dx)
    diff2_predicted = df.dot(h2*dx)

    err1 = la.norm(diff1 - diff1_predicted) / la.norm(diff1)
    err2 = la.norm(diff2 - diff2_predicted) / la.norm(diff2)
    print(err1, err2)

    assert (err2 < err1 * fac * 1.1).all()
コード例 #2
0
ファイル: test_diff.py プロジェクト: inducer/loopy
def test_diff(ctx_factory):
    ctx = ctx_factory()
    queue = cl.CommandQueue(ctx)

    knl = lp.make_kernel(
         """{ [i,j]: 0<=i,j<n }""",
         """
         <> a = 1/(1+sinh(x[i] + y[j])**2)
         z[i] = sum(j, exp(a * x[j]))
         """)

    knl = lp.fix_parameters(knl, n=50)

    from loopy.transform.diff import diff_kernel
    dknl, diff_map = diff_kernel(knl, "z", "x")
    dknl = lp.remove_unused_arguments(dknl)

    dknl = lp.add_inames_to_insn(dknl, "diff_i0", "writes:a_dx or writes:a")

    print(dknl)

    n = 50
    x = np.random.randn(n)
    y = np.random.randn(n)

    dx = np.random.randn(n)

    fac = 1e-1
    h1 = 1e-4
    h2 = h1 * fac

    evt, (z0,) = knl(queue, x=x, y=y)
    evt, (z1,) = knl(queue, x=(x + h1*dx), y=y)
    evt, (z2,) = knl(queue, x=(x + h2*dx), y=y)

    dknl = lp.set_options(dknl, write_cl=True)
    evt, (df,) = dknl(queue, x=x, y=y)

    diff1 = (z1-z0)
    diff2 = (z2-z0)

    diff1_predicted = df.dot(h1*dx)
    diff2_predicted = df.dot(h2*dx)

    err1 = la.norm(diff1 - diff1_predicted) / la.norm(diff1)
    err2 = la.norm(diff2 - diff2_predicted) / la.norm(diff2)
    print(err1, err2)

    assert (err2 < err1 * fac * 1.1).all()
コード例 #3
0
ファイル: test_diff.py プロジェクト: dokempf/loopy
def test_diff(ctx_factory):
    ctx = ctx_factory()
    queue = cl.CommandQueue(ctx)

    knl = lp.make_kernel(
         """{ [i,j]: 0<=i,j<n }""",
         """
         <> a = 1/(1+sinh(x[i] + y[j])**2)
         z[i] = sum(j, exp(a * x[j]))
         """)

    knl = lp.fix_parameters(knl, n=50)

    from loopy.transform.diff import diff_kernel
    dknl, diff_map = diff_kernel(knl, "z", "x")
    dknl = lp.remove_unused_arguments(dknl)

    print(dknl)

    n = 50
    x = np.random.randn(n)
    y = np.random.randn(n)

    dx = np.random.randn(n)

    fac = 1e-1
    h1 = 1e-4
    h2 = h1 * fac

    evt, (z0,) = knl(queue, x=x, y=y)
    evt, (z1,) = knl(queue, x=(x + h1*dx), y=y)
    evt, (z2,) = knl(queue, x=(x + h2*dx), y=y)

    dknl = lp.set_options(dknl, write_cl=True)
    evt, (df,) = dknl(queue, x=x, y=y)

    diff1 = (z1-z0)
    diff2 = (z2-z0)

    diff1_predicted = df.dot(h1*dx)
    diff2_predicted = df.dot(h2*dx)

    err1 = la.norm(diff1 - diff1_predicted) / la.norm(diff1)
    err2 = la.norm(diff2 - diff2_predicted) / la.norm(diff2)
    print(err1, err2)

    assert (err2 < err1 * fac * 1.1).all()
コード例 #4
0
ファイル: decomp.py プロジェクト: zachjweiner/pystella
 def z_comm_knl(instructions):
     knl = lp.make_kernel(
         "[Nx, Ny, Nz, hx, hy, hz] \
          -> { [i,j,k]: 0<=i<Nx+2*hx and 0<=j<Ny+2*hy and 0<=k<hz }",
         instructions,
         [
             lp.GlobalArg("arr", shape=pencil_shape_str,
                          offset=lp.auto),
             ...,
         ],
         default_offset=lp.auto,
         lang_version=(2018, 2),
         assumptions=assumptions,
     )
     knl = lp.remove_unused_arguments(knl)
     knl = lp.fix_parameters(knl, **params_to_fix)
     knl = lp.split_iname(knl,
                          "k",
                          self.halo_shape[2],
                          outer_tag="g.0",
                          inner_tag="l.0")
     knl = lp.split_iname(knl, "j", 8, outer_tag="g.1", inner_tag="l.1")
     knl = lp.split_iname(knl, "i", 1, outer_tag="g.2", inner_tag="l.2")
     return knl
コード例 #5
0
    def make_kernel(self, map_instructions, tmp_instructions, args, domains,
                    **kwargs):
        temp_statements = []
        temp_vars = []

        from pystella.field import index_fields
        indexed_tmp_insns = index_fields(tmp_instructions)
        indexed_map_insns = index_fields(map_instructions)

        for statement in indexed_tmp_insns:
            if isinstance(statement, lp.InstructionBase):
                temp_statements += [statement]
            else:
                assignee, expression = statement
                # only declare temporary variables once
                if isinstance(assignee, pp.Variable):
                    current_tmp = assignee
                elif isinstance(assignee, pp.Subscript):
                    current_tmp = assignee.aggregate
                else:
                    current_tmp = None
                if current_tmp is not None and current_tmp not in temp_vars:
                    temp_vars += [current_tmp]
                    tvt = lp.Optional(None)
                else:
                    tvt = lp.Optional()

                temp_statements += [
                    self._assignment(assignee, expression, temp_var_type=tvt)
                ]

        output_statements = []
        for statement in indexed_map_insns:
            if isinstance(statement, lp.InstructionBase):
                output_statements += [statement]
            else:
                assignee, expression = statement
                temp_statements += [self._assignment(assignee, expression)]

        options = kwargs.pop("options", lp.Options())
        # ignore lack of supposed dependency for single-instruction kernels
        if len(map_instructions) + len(tmp_instructions) == 1:
            options.check_dep_resolution = False

        from pystella import get_field_args
        inferred_args = get_field_args([map_instructions, tmp_instructions])
        all_args = append_new_args(args, inferred_args)

        t_unit = lp.make_kernel(
            domains,
            temp_statements + output_statements,
            all_args + [lp.ValueArg("Nx, Ny, Nz", dtype="int"), ...],
            options=options,
            **kwargs,
        )

        new_args = []
        knl = t_unit.default_entrypoint
        for arg in knl.args:
            if isinstance(arg, lp.KernelArgument) and arg.dtype is None:
                new_arg = arg.copy(dtype=self.dtype)
                new_args.append(new_arg)
            else:
                new_args.append(arg)
        t_unit = t_unit.with_kernel(knl.copy(args=new_args))
        t_unit = lp.remove_unused_arguments(t_unit)
        t_unit = lp.register_callable(t_unit, "round",
                                      UnaryOpenCLCallable("round"))

        return t_unit