def test_generate_c_snippet(): from pymbolic import var I = var("I") # noqa f = var("f") df = var("df") q_v = var("q_v") eN = var("eN") # noqa k = var("k") u = var("u") from functools import partial l_sum = partial(lp.Reduction, "sum", allow_simultaneous=True) Instr = lp.Assignment # noqa knl = lp.make_kernel("{[I, k]: 0<=I<nSpace and 0<=k<nQuad}", [ Instr(f[I], l_sum(k, q_v[k, I] * u)), Instr(df[I], l_sum(k, q_v[k, I])), ], [ lp.GlobalArg("q_v", np.float64, shape="nQuad, nSpace"), lp.GlobalArg("f,df", np.float64, shape="nSpace"), lp.ValueArg("u", np.float64), "...", ], target=CTarget(), assumptions="nQuad>=1") if 0: # enable to play with prefetching # (prefetch currently requires constant sizes) knl = lp.fix_parameters(knl, nQuad=5, nSpace=3) knl = lp.add_prefetch(knl, "q_v", "k,I", default_tag=None) knl = lp.split_iname(knl, "k", 4, inner_tag="unr", slabs=(0, 1)) knl = lp.prioritize_loops(knl, "I,k_outer,k_inner") print(lp.generate_code_v2(knl))
def test_wrap_loop(self): "Take kernel, place in larger loop, offsetting certain vars" knl = lp.make_kernel("{[i,j]:0<=i,j<n}", "out[i] = sum(j, (i/j)*in[i, j])", target=CTarget()) # in will depend on t knl2 = lp.to_batched(knl, 'T', ['in'], 't') print(self._dtype_and_code(knl2))
def test_split_iname3(self): "Split one of two inames." from loopy.target.ispc import ISPCTarget as CTarget knl = lp.make_kernel("{[i,j]:0<=i,j<n}", "out[i, j] = in[i, j]", target=CTarget()) knl = lp.split_iname(knl, 'i', 8) knl = lp.prioritize_loops(knl, ['i_outer', 'j', 'i_inner']) print(self._dtype_and_code(knl))
def test_wrap_loop_with_param(self): knl = lp.make_kernel("{[i,j]:0<=i,j<n}", """ <> a = a_values[i] out[i] = a * sum(j, (i/j)*in[i, j]) """, target=CTarget()) # in will depend on t knl2 = lp.to_batched(knl, 'T', ['in'], 't', sequential=True) print(self._dtype_and_code(knl2, a_values=np.float32))
def test_simple_kernel(self): knl = lp.make_kernel("{ [i]: 0<=i<n }", "out[i] = 2*a[i]", target=CTarget()) typed = lp.add_dtypes(knl, {'a': np.float32}) code, _ = lp.generate_code(typed) fn = CompiledKernel(typed) # noqa a, out = np.zeros((2, 10), np.float32) a[:] = np.r_[:a.size] fn(a, 10, out) np.testing.assert_allclose(out, a * 2)
def __init__(self, target): self.target = target from loopy.target.c import CTarget self.registry = CTarget().get_dtype_registry().wrapped_registry
def main(): from argparse import ArgumentParser parser = ArgumentParser(description="Stand-alone loopy frontend") parser.add_argument("infile", metavar="INPUT_FILE") parser.add_argument("outfile", default="-", metavar="OUTPUT_FILE", help="Defaults to stdout ('-').", nargs="?") parser.add_argument("--lang", metavar="LANGUAGE", help="loopy|fortran") parser.add_argument("--target", choices=("opencl", "ispc", "ispc-occa", "c", "c-fortran", "cuda"), default="opencl") parser.add_argument("--name") parser.add_argument("--transform") parser.add_argument("--edit-code", action="store_true") parser.add_argument("--occa-defines") parser.add_argument("--occa-add-dummy-arg", action="store_true") parser.add_argument("--print-ir", action="store_true") args = parser.parse_args() if args.target == "opencl": from loopy.target.opencl import OpenCLTarget target = OpenCLTarget() elif args.target == "ispc": from loopy.target.ispc import ISPCTarget target = ISPCTarget() elif args.target == "ispc-occa": from loopy.target.ispc import ISPCTarget target = ISPCTarget(occa_mode=True) elif args.target == "c": from loopy.target.c import CTarget target = CTarget() elif args.target == "c-fortran": from loopy.target.c import CTarget target = CTarget(fortran_abi=True) elif args.target == "cuda": from loopy.target.cuda import CudaTarget target = CudaTarget() else: raise ValueError("unknown target: %s" % target) lp.set_default_target(target) lang = None if args.infile == "-": infile_content = sys.stdin.read() else: from os.path import splitext _, ext = splitext(args.infile) lang = { ".py": "loopy", ".loopy": "loopy", ".floopy": "fortran", ".f90": "fortran", ".fpp": "fortran", ".f": "fortran", ".f77": "fortran", }.get(ext) with open(args.infile) as infile_fd: infile_content = infile_fd.read() if args.lang is not None: lang = args.lang if lang is None: raise RuntimeError("unable to deduce input language " "(wrong input file extension? --lang flag?)") if lang == "loopy": # {{{ path wrangling from os.path import dirname, abspath from os import getcwd infile_dirname = dirname(args.infile) if infile_dirname: infile_dirname = abspath(infile_dirname) else: infile_dirname = getcwd() sys.path.append(infile_dirname) # }}} data_dic = {} data_dic["lp"] = lp data_dic["np"] = np if args.occa_defines: with open(args.occa_defines) as defines_fd: occa_define_code = defines_to_python_code(defines_fd.read()) exec(compile(occa_define_code, args.occa_defines, "exec"), data_dic) with open(args.infile) as infile_fd: exec(compile(infile_content, args.infile, "exec"), data_dic) if args.transform: with open(args.transform) as xform_fd: exec(compile(xform_fd.read(), args.transform, "exec"), data_dic) try: kernel = data_dic["lp_knl"] except KeyError: raise RuntimeError("loopy-lang requires 'lp_knl' " "to be defined on exit") if args.name is not None: kernel = kernel.copy(name=args.name) kernels = [kernel] elif lang in ["fortran", "floopy", "fpp"]: pre_transform_code = None if args.transform: with open(args.transform) as xform_fd: pre_transform_code = xform_fd.read() if args.occa_defines: if pre_transform_code is None: pre_transform_code = "" with open(args.occa_defines) as defines_fd: pre_transform_code = ( defines_to_python_code(defines_fd.read()) + pre_transform_code) kernels = lp.parse_transformed_fortran( infile_content, pre_transform_code=pre_transform_code, filename=args.infile) if args.name is not None: kernels = [ kernel for kernel in kernels if kernel.name == args.name ] if not kernels: raise RuntimeError("no kernels found (name specified: %s)" % args.name) else: raise RuntimeError("unknown language: '%s'" % args.lang) if args.print_ir: for kernel in kernels: print(kernel, file=sys.stderr) if args.occa_add_dummy_arg: new_kernels = [] for kernel in kernels: new_args = [lp.ArrayArg("occa_info", np.int32, shape=None) ] + kernel.args new_kernels.append(kernel.copy(args=new_args)) kernels = new_kernels del new_kernels codes = [] from loopy.codegen import generate_code for kernel in kernels: kernel = lp.preprocess_kernel(kernel) code, impl_arg_info = generate_code(kernel) codes.append(code) if args.outfile is not None: outfile = args.outfile else: outfile = "-" code = "\n\n".join(codes) # {{{ edit code if requested import os edit_kernel_env = os.environ.get("LOOPY_EDIT_KERNEL") need_edit = args.edit_code if not need_edit and edit_kernel_env is not None: # Do not replace with "any()"--Py2.6/2.7 bug doesn't like # comprehensions in functions with exec(). for k in kernels: if edit_kernel_env.lower() in k.name.lower(): need_edit = True if need_edit: from pytools import invoke_editor code = invoke_editor(code, filename="edit.cl") # }}} if outfile == "-": sys.stdout.write(code) else: with open(outfile, "w") as outfile_fd: outfile_fd.write(code)
ecm(vec).expr, 1, ecm(result).expr, 1 ] return ( var(self.name_in_target)(*c_parameters), False # cblas_gemv does not return anything ) def generate_preambles(self, target): assert isinstance(target, CTarget) yield ("99_cblas", "#include <cblas.h>") return # }}} n = 10 knl = lp.make_kernel("{:}", """ y[:] = gemv(A[:, :], x[:]) """, [ lp.GlobalArg("A", dtype=np.float64, shape=(n, n)), lp.GlobalArg("x", dtype=np.float64, shape=(n, )), lp.GlobalArg("y", shape=(n, )), ... ], target=CTarget()) knl = lp.register_callable(knl, "gemv", CBLASGEMV(name="gemv")) print(lp.generate_code_v2(knl).device_code())
def main(): from argparse import ArgumentParser parser = ArgumentParser(description="Stand-alone loopy frontend") parser.add_argument("infile", metavar="INPUT_FILE") parser.add_argument("outfile", default="-", metavar="OUTPUT_FILE", help="Defaults to stdout ('-').", nargs="?") parser.add_argument("--lang", metavar="LANGUAGE", help="loopy|fortran") parser.add_argument("--target", choices=("opencl", "ispc", "ispc-occa", "c", "c-fortran", "cuda"), default="opencl") parser.add_argument("--transform") parser.add_argument("--edit-code", action="store_true") parser.add_argument("--occa-defines") parser.add_argument("--print-ir", action="store_true") args = parser.parse_args() if args.target == "opencl": from loopy.target.opencl import OpenCLTarget target = OpenCLTarget elif args.target == "ispc": from loopy.target.ispc import ISPCTarget target = ISPCTarget elif args.target == "ispc-occa": from loopy.target.ispc import ISPCTarget target = lambda: ISPCTarget(occa_mode=True) # noqa: E731 elif args.target == "c": from loopy.target.c import CTarget target = CTarget elif args.target == "c-fortran": from loopy.target.c import CTarget target = lambda: CTarget(fortran_abi=True) # noqa: E731 elif args.target == "cuda": from loopy.target.cuda import CudaTarget target = CudaTarget else: raise ValueError("unknown target: %s" % target) lp.set_default_target(target) lang = None if args.infile == "-": infile_content = sys.stdin.read() else: from os.path import splitext _, ext = splitext(args.infile) lang = { ".py": "loopy", ".loopy": "loopy", ".floopy": "fortran", ".f90": "fortran", ".F90": "fortran", ".fpp": "fortran", ".f": "fortran", ".f77": "fortran", ".F77": "fortran", }.get(ext) with open(args.infile) as infile_fd: infile_content = infile_fd.read() if args.lang is not None: lang = args.lang if lang is None: raise RuntimeError("unable to deduce input language " "(wrong input file extension? --lang flag?)") if lang == "loopy": # {{{ path wrangling from os.path import dirname, abspath from os import getcwd infile_dirname = dirname(args.infile) if infile_dirname: infile_dirname = abspath(infile_dirname) else: infile_dirname = getcwd() sys.path.append(infile_dirname) # }}} data_dic = {} data_dic["lp"] = lp data_dic["np"] = np if args.occa_defines: with open(args.occa_defines) as defines_fd: occa_define_code = defines_to_python_code(defines_fd.read()) exec(compile(occa_define_code, args.occa_defines, "exec"), data_dic) with open(args.infile) as infile_fd: exec(compile(infile_content, args.infile, "exec"), data_dic) if args.transform: with open(args.transform) as xform_fd: exec(compile(xform_fd.read(), args.transform, "exec"), data_dic) try: kernel = data_dic["lp_knl"] except KeyError: raise RuntimeError("loopy-lang requires 'lp_knl' " "to be defined on exit") t_unit = [kernel] elif lang in ["fortran", "floopy", "fpp"]: pre_transform_code = None if args.transform: with open(args.transform) as xform_fd: pre_transform_code = xform_fd.read() if args.occa_defines: if pre_transform_code is None: pre_transform_code = "" with open(args.occa_defines) as defines_fd: pre_transform_code = ( defines_to_python_code(defines_fd.read()) + pre_transform_code) t_unit = lp.parse_transformed_fortran( infile_content, pre_transform_code=pre_transform_code, filename=args.infile) else: raise RuntimeError("unknown language: '%s'" % args.lang) if not isinstance(t_unit, lp.TranslationUnit): # FIXME assert isinstance(t_unit, list) # of kernels raise NotImplementedError("convert list of kernels to TranslationUnit") if args.print_ir: print(t_unit, file=sys.stderr) t_unit = lp.preprocess_kernel(t_unit) cgr = lp.generate_code_v2(t_unit) if args.outfile is not None: outfile = args.outfile else: outfile = "-" code = cgr.device_code() if outfile == "-": sys.stdout.write(code) else: with open(outfile, "w") as outfile_fd: outfile_fd.write(code)