def test_diff(ctx_factory): ctx = ctx_factory() queue = cl.CommandQueue(ctx) knl = lp.make_kernel( """{ [i,j]: 0<=i,j<n }""", """ <> a = 1/(1+sinh(x[i] + y[j])**2) z[i] = sum(j, exp(a * x[j])) """, name="diff") knl = lp.fix_parameters(knl, n=50) from loopy.transform.diff import diff_kernel #FIXME Is this the correct interface. Does it make sense to take the entire #translation unit? dknl, diff_map = diff_kernel(knl["diff"], "z", "x") dknl = knl.with_kernel(dknl) dknl = lp.remove_unused_arguments(dknl) dknl = lp.add_inames_to_insn(dknl, "diff_i0", "writes:a_dx or writes:a") print(dknl) n = 50 x = np.random.randn(n) y = np.random.randn(n) dx = np.random.randn(n) fac = 1e-1 h1 = 1e-4 h2 = h1 * fac evt, (z0,) = knl(queue, x=x, y=y) evt, (z1,) = knl(queue, x=(x + h1*dx), y=y) evt, (z2,) = knl(queue, x=(x + h2*dx), y=y) dknl = lp.set_options(dknl, write_cl=True) evt, (df,) = dknl(queue, x=x, y=y) diff1 = (z1-z0) diff2 = (z2-z0) diff1_predicted = df.dot(h1*dx) diff2_predicted = df.dot(h2*dx) err1 = la.norm(diff1 - diff1_predicted) / la.norm(diff1) err2 = la.norm(diff2 - diff2_predicted) / la.norm(diff2) print(err1, err2) assert (err2 < err1 * fac * 1.1).all()
def test_diff(ctx_factory): ctx = ctx_factory() queue = cl.CommandQueue(ctx) knl = lp.make_kernel( """{ [i,j]: 0<=i,j<n }""", """ <> a = 1/(1+sinh(x[i] + y[j])**2) z[i] = sum(j, exp(a * x[j])) """) knl = lp.fix_parameters(knl, n=50) from loopy.transform.diff import diff_kernel dknl, diff_map = diff_kernel(knl, "z", "x") dknl = lp.remove_unused_arguments(dknl) dknl = lp.add_inames_to_insn(dknl, "diff_i0", "writes:a_dx or writes:a") print(dknl) n = 50 x = np.random.randn(n) y = np.random.randn(n) dx = np.random.randn(n) fac = 1e-1 h1 = 1e-4 h2 = h1 * fac evt, (z0,) = knl(queue, x=x, y=y) evt, (z1,) = knl(queue, x=(x + h1*dx), y=y) evt, (z2,) = knl(queue, x=(x + h2*dx), y=y) dknl = lp.set_options(dknl, write_cl=True) evt, (df,) = dknl(queue, x=x, y=y) diff1 = (z1-z0) diff2 = (z2-z0) diff1_predicted = df.dot(h1*dx) diff2_predicted = df.dot(h2*dx) err1 = la.norm(diff1 - diff1_predicted) / la.norm(diff1) err2 = la.norm(diff2 - diff2_predicted) / la.norm(diff2) print(err1, err2) assert (err2 < err1 * fac * 1.1).all()
def test_diff(ctx_factory): ctx = ctx_factory() queue = cl.CommandQueue(ctx) knl = lp.make_kernel( """{ [i,j]: 0<=i,j<n }""", """ <> a = 1/(1+sinh(x[i] + y[j])**2) z[i] = sum(j, exp(a * x[j])) """) knl = lp.fix_parameters(knl, n=50) from loopy.transform.diff import diff_kernel dknl, diff_map = diff_kernel(knl, "z", "x") dknl = lp.remove_unused_arguments(dknl) print(dknl) n = 50 x = np.random.randn(n) y = np.random.randn(n) dx = np.random.randn(n) fac = 1e-1 h1 = 1e-4 h2 = h1 * fac evt, (z0,) = knl(queue, x=x, y=y) evt, (z1,) = knl(queue, x=(x + h1*dx), y=y) evt, (z2,) = knl(queue, x=(x + h2*dx), y=y) dknl = lp.set_options(dknl, write_cl=True) evt, (df,) = dknl(queue, x=x, y=y) diff1 = (z1-z0) diff2 = (z2-z0) diff1_predicted = df.dot(h1*dx) diff2_predicted = df.dot(h2*dx) err1 = la.norm(diff1 - diff1_predicted) / la.norm(diff1) err2 = la.norm(diff2 - diff2_predicted) / la.norm(diff2) print(err1, err2) assert (err2 < err1 * fac * 1.1).all()
def z_comm_knl(instructions): knl = lp.make_kernel( "[Nx, Ny, Nz, hx, hy, hz] \ -> { [i,j,k]: 0<=i<Nx+2*hx and 0<=j<Ny+2*hy and 0<=k<hz }", instructions, [ lp.GlobalArg("arr", shape=pencil_shape_str, offset=lp.auto), ..., ], default_offset=lp.auto, lang_version=(2018, 2), assumptions=assumptions, ) knl = lp.remove_unused_arguments(knl) knl = lp.fix_parameters(knl, **params_to_fix) knl = lp.split_iname(knl, "k", self.halo_shape[2], outer_tag="g.0", inner_tag="l.0") knl = lp.split_iname(knl, "j", 8, outer_tag="g.1", inner_tag="l.1") knl = lp.split_iname(knl, "i", 1, outer_tag="g.2", inner_tag="l.2") return knl
def make_kernel(self, map_instructions, tmp_instructions, args, domains, **kwargs): temp_statements = [] temp_vars = [] from pystella.field import index_fields indexed_tmp_insns = index_fields(tmp_instructions) indexed_map_insns = index_fields(map_instructions) for statement in indexed_tmp_insns: if isinstance(statement, lp.InstructionBase): temp_statements += [statement] else: assignee, expression = statement # only declare temporary variables once if isinstance(assignee, pp.Variable): current_tmp = assignee elif isinstance(assignee, pp.Subscript): current_tmp = assignee.aggregate else: current_tmp = None if current_tmp is not None and current_tmp not in temp_vars: temp_vars += [current_tmp] tvt = lp.Optional(None) else: tvt = lp.Optional() temp_statements += [ self._assignment(assignee, expression, temp_var_type=tvt) ] output_statements = [] for statement in indexed_map_insns: if isinstance(statement, lp.InstructionBase): output_statements += [statement] else: assignee, expression = statement temp_statements += [self._assignment(assignee, expression)] options = kwargs.pop("options", lp.Options()) # ignore lack of supposed dependency for single-instruction kernels if len(map_instructions) + len(tmp_instructions) == 1: options.check_dep_resolution = False from pystella import get_field_args inferred_args = get_field_args([map_instructions, tmp_instructions]) all_args = append_new_args(args, inferred_args) t_unit = lp.make_kernel( domains, temp_statements + output_statements, all_args + [lp.ValueArg("Nx, Ny, Nz", dtype="int"), ...], options=options, **kwargs, ) new_args = [] knl = t_unit.default_entrypoint for arg in knl.args: if isinstance(arg, lp.KernelArgument) and arg.dtype is None: new_arg = arg.copy(dtype=self.dtype) new_args.append(new_arg) else: new_args.append(arg) t_unit = t_unit.with_kernel(knl.copy(args=new_args)) t_unit = lp.remove_unused_arguments(t_unit) t_unit = lp.register_callable(t_unit, "round", UnaryOpenCLCallable("round")) return t_unit