def get_typed_and_scheduled_kernel(self, var_to_dtype_set): kernel = self.kernel from loopy.kernel.tools import add_dtypes if var_to_dtype_set: var_to_dtype = {} for var, dtype in var_to_dtype_set: try: dest_name = kernel.impl_arg_to_arg[var].name except KeyError: dest_name = var try: var_to_dtype[dest_name] = dtype except KeyError: raise LoopyError("cannot set type for '%s': " "no known variable/argument with that name" % var) kernel = add_dtypes(kernel, var_to_dtype) from loopy.preprocess import infer_unknown_types kernel = infer_unknown_types(kernel, expect_completion=True) if kernel.schedule is None: from loopy.preprocess import preprocess_kernel kernel = preprocess_kernel(kernel) from loopy.schedule import get_one_scheduled_kernel kernel = get_one_scheduled_kernel(kernel) return kernel
def get_typed_and_scheduled_translation_unit_uncached( self, entrypoint, arg_to_dtype_set): from loopy.kernel.tools import add_dtypes from loopy.kernel import KernelState from loopy.translation_unit import resolve_callables program = resolve_callables(self.program) if arg_to_dtype_set: var_to_dtype = {} entry_knl = program[entrypoint] for var, dtype in arg_to_dtype_set: if var in entry_knl.impl_arg_to_arg: dest_name = entry_knl.impl_arg_to_arg[var].name else: dest_name = var var_to_dtype[dest_name] = dtype program = program.with_kernel(add_dtypes(entry_knl, var_to_dtype)) from loopy.type_inference import infer_unknown_types program = infer_unknown_types(program, expect_completion=True) if program.state < KernelState.LINEARIZED: from loopy.preprocess import preprocess_program program = preprocess_program(program) from loopy.schedule import get_one_linearized_kernel for e in program.entrypoints: program = program.with_kernel( get_one_linearized_kernel(program[e], program.callables_table)) return program
import pyopencl as cl import pyopencl.array knl = lp.make_kernel( "{ [i,k]: 0<=i<n and 0<=k<3 }", """ c[k,i] = a[k, i + 1] out[k,i] = c[k,i] """) # transform knl = lp.split_iname(knl, "i", 128, outer_tag="g.0", inner_tag="l.0") from loopy.kernel.tools import add_dtypes knl = add_dtypes(knl, { "a": np.float32, "c": np.float32, "out": np.float32, "n": np.int32 }) # schedule from loopy.preprocess import preprocess_kernel knl = preprocess_kernel(knl) from loopy.schedule import get_one_scheduled_kernel knl = get_one_scheduled_kernel(knl) # map schedule onto host or device print(knl)
import numpy as np import loopy as lp import pyopencl as cl import pyopencl.array knl = lp.make_kernel( "{ [i,k]: 0<=i<n and 0<=k<3 }", """ c[k,i] = a[k, i + 1] out[k,i] = c[k,i] """) # transform knl = lp.split_iname(knl, "i", 128, outer_tag="g.0", inner_tag="l.0") from loopy.kernel.tools import add_dtypes knl = add_dtypes(knl, {"a": np.float32, "c": np.float32, "out": np.float32, "n": np.int32}) # schedule from loopy.preprocess import preprocess_kernel knl = preprocess_kernel(knl) from loopy.schedule import get_one_scheduled_kernel knl = get_one_scheduled_kernel(knl) # map schedule onto host or device print(knl) cgr = lp.generate_code_v2(knl) print(cgr.device_code()) print(cgr.host_code())