Esempio n. 1
0
def test_generate_c_snippet():
    from pymbolic import var
    I = var("I")  # noqa
    f = var("f")
    df = var("df")
    q_v = var("q_v")
    eN = var("eN")  # noqa
    k = var("k")
    u = var("u")

    from functools import partial
    l_sum = partial(lp.Reduction, "sum", allow_simultaneous=True)

    Instr = lp.Assignment  # noqa

    knl = lp.make_kernel("{[I, k]: 0<=I<nSpace and 0<=k<nQuad}", [
        Instr(f[I], l_sum(k, q_v[k, I] * u)),
        Instr(df[I], l_sum(k, q_v[k, I])),
    ], [
        lp.GlobalArg("q_v", np.float64, shape="nQuad, nSpace"),
        lp.GlobalArg("f,df", np.float64, shape="nSpace"),
        lp.ValueArg("u", np.float64),
        "...",
    ],
                         target=CTarget(),
                         assumptions="nQuad>=1")

    if 0:  # enable to play with prefetching
        # (prefetch currently requires constant sizes)
        knl = lp.fix_parameters(knl, nQuad=5, nSpace=3)
        knl = lp.add_prefetch(knl, "q_v", "k,I", default_tag=None)

    knl = lp.split_iname(knl, "k", 4, inner_tag="unr", slabs=(0, 1))
    knl = lp.prioritize_loops(knl, "I,k_outer,k_inner")
    print(lp.generate_code_v2(knl))
Esempio n. 2
0
 def test_wrap_loop(self):
     "Take kernel, place in larger loop, offsetting certain vars"
     knl = lp.make_kernel("{[i,j]:0<=i,j<n}",
                          "out[i] = sum(j, (i/j)*in[i, j])",
                          target=CTarget())
     # in will depend on t
     knl2 = lp.to_batched(knl, 'T', ['in'], 't')
     print(self._dtype_and_code(knl2))
Esempio n. 3
0
 def test_split_iname3(self):
     "Split one of two inames."
     from loopy.target.ispc import ISPCTarget as CTarget
     knl = lp.make_kernel("{[i,j]:0<=i,j<n}",
                          "out[i, j] = in[i, j]",
                          target=CTarget())
     knl = lp.split_iname(knl, 'i', 8)
     knl = lp.prioritize_loops(knl, ['i_outer', 'j', 'i_inner'])
     print(self._dtype_and_code(knl))
Esempio n. 4
0
 def test_wrap_loop_with_param(self):
     knl = lp.make_kernel("{[i,j]:0<=i,j<n}",
                          """
                          <> a = a_values[i]
                          out[i] = a * sum(j, (i/j)*in[i, j])
                          """,
                          target=CTarget())
     # in will depend on t
     knl2 = lp.to_batched(knl, 'T', ['in'], 't', sequential=True)
     print(self._dtype_and_code(knl2, a_values=np.float32))
Esempio n. 5
0
 def test_simple_kernel(self):
     knl = lp.make_kernel("{ [i]: 0<=i<n }",
                          "out[i] = 2*a[i]",
                          target=CTarget())
     typed = lp.add_dtypes(knl, {'a': np.float32})
     code, _ = lp.generate_code(typed)
     fn = CompiledKernel(typed)  # noqa
     a, out = np.zeros((2, 10), np.float32)
     a[:] = np.r_[:a.size]
     fn(a, 10, out)
     np.testing.assert_allclose(out, a * 2)
Esempio n. 6
0
 def __init__(self, target):
     self.target = target
     from loopy.target.c import CTarget
     self.registry = CTarget().get_dtype_registry().wrapped_registry
Esempio n. 7
0
def main():
    from argparse import ArgumentParser

    parser = ArgumentParser(description="Stand-alone loopy frontend")

    parser.add_argument("infile", metavar="INPUT_FILE")
    parser.add_argument("outfile",
                        default="-",
                        metavar="OUTPUT_FILE",
                        help="Defaults to stdout ('-').",
                        nargs="?")
    parser.add_argument("--lang", metavar="LANGUAGE", help="loopy|fortran")
    parser.add_argument("--target",
                        choices=("opencl", "ispc", "ispc-occa", "c",
                                 "c-fortran", "cuda"),
                        default="opencl")
    parser.add_argument("--name")
    parser.add_argument("--transform")
    parser.add_argument("--edit-code", action="store_true")
    parser.add_argument("--occa-defines")
    parser.add_argument("--occa-add-dummy-arg", action="store_true")
    parser.add_argument("--print-ir", action="store_true")
    args = parser.parse_args()

    if args.target == "opencl":
        from loopy.target.opencl import OpenCLTarget
        target = OpenCLTarget()
    elif args.target == "ispc":
        from loopy.target.ispc import ISPCTarget
        target = ISPCTarget()
    elif args.target == "ispc-occa":
        from loopy.target.ispc import ISPCTarget
        target = ISPCTarget(occa_mode=True)
    elif args.target == "c":
        from loopy.target.c import CTarget
        target = CTarget()
    elif args.target == "c-fortran":
        from loopy.target.c import CTarget
        target = CTarget(fortran_abi=True)
    elif args.target == "cuda":
        from loopy.target.cuda import CudaTarget
        target = CudaTarget()
    else:
        raise ValueError("unknown target: %s" % target)

    lp.set_default_target(target)

    lang = None
    if args.infile == "-":
        infile_content = sys.stdin.read()
    else:
        from os.path import splitext
        _, ext = splitext(args.infile)

        lang = {
            ".py": "loopy",
            ".loopy": "loopy",
            ".floopy": "fortran",
            ".f90": "fortran",
            ".fpp": "fortran",
            ".f": "fortran",
            ".f77": "fortran",
        }.get(ext)
        with open(args.infile) as infile_fd:
            infile_content = infile_fd.read()

    if args.lang is not None:
        lang = args.lang

    if lang is None:
        raise RuntimeError("unable to deduce input language "
                           "(wrong input file extension? --lang flag?)")

    if lang == "loopy":
        # {{{ path wrangling

        from os.path import dirname, abspath
        from os import getcwd

        infile_dirname = dirname(args.infile)
        if infile_dirname:
            infile_dirname = abspath(infile_dirname)
        else:
            infile_dirname = getcwd()

        sys.path.append(infile_dirname)

        # }}}

        data_dic = {}
        data_dic["lp"] = lp
        data_dic["np"] = np

        if args.occa_defines:
            with open(args.occa_defines) as defines_fd:
                occa_define_code = defines_to_python_code(defines_fd.read())
            exec(compile(occa_define_code, args.occa_defines, "exec"),
                 data_dic)

        with open(args.infile) as infile_fd:
            exec(compile(infile_content, args.infile, "exec"), data_dic)

        if args.transform:
            with open(args.transform) as xform_fd:
                exec(compile(xform_fd.read(), args.transform, "exec"),
                     data_dic)

        try:
            kernel = data_dic["lp_knl"]
        except KeyError:
            raise RuntimeError("loopy-lang requires 'lp_knl' "
                               "to be defined on exit")

        if args.name is not None:
            kernel = kernel.copy(name=args.name)

        kernels = [kernel]

    elif lang in ["fortran", "floopy", "fpp"]:
        pre_transform_code = None
        if args.transform:
            with open(args.transform) as xform_fd:
                pre_transform_code = xform_fd.read()

        if args.occa_defines:
            if pre_transform_code is None:
                pre_transform_code = ""

            with open(args.occa_defines) as defines_fd:
                pre_transform_code = (
                    defines_to_python_code(defines_fd.read()) +
                    pre_transform_code)

        kernels = lp.parse_transformed_fortran(
            infile_content,
            pre_transform_code=pre_transform_code,
            filename=args.infile)

        if args.name is not None:
            kernels = [
                kernel for kernel in kernels if kernel.name == args.name
            ]

        if not kernels:
            raise RuntimeError("no kernels found (name specified: %s)" %
                               args.name)

    else:
        raise RuntimeError("unknown language: '%s'" % args.lang)

    if args.print_ir:
        for kernel in kernels:
            print(kernel, file=sys.stderr)

    if args.occa_add_dummy_arg:
        new_kernels = []
        for kernel in kernels:
            new_args = [lp.ArrayArg("occa_info", np.int32, shape=None)
                        ] + kernel.args
            new_kernels.append(kernel.copy(args=new_args))

        kernels = new_kernels
        del new_kernels

    codes = []
    from loopy.codegen import generate_code
    for kernel in kernels:
        kernel = lp.preprocess_kernel(kernel)
        code, impl_arg_info = generate_code(kernel)
        codes.append(code)

    if args.outfile is not None:
        outfile = args.outfile
    else:
        outfile = "-"

    code = "\n\n".join(codes)

    # {{{ edit code if requested

    import os
    edit_kernel_env = os.environ.get("LOOPY_EDIT_KERNEL")
    need_edit = args.edit_code
    if not need_edit and edit_kernel_env is not None:
        # Do not replace with "any()"--Py2.6/2.7 bug doesn't like
        # comprehensions in functions with exec().

        for k in kernels:
            if edit_kernel_env.lower() in k.name.lower():
                need_edit = True

    if need_edit:
        from pytools import invoke_editor
        code = invoke_editor(code, filename="edit.cl")

    # }}}

    if outfile == "-":
        sys.stdout.write(code)
    else:
        with open(outfile, "w") as outfile_fd:
            outfile_fd.write(code)
Esempio n. 8
0
            ecm(vec).expr, 1,
            ecm(result).expr, 1
        ]
        return (
            var(self.name_in_target)(*c_parameters),
            False  # cblas_gemv does not return anything
        )

    def generate_preambles(self, target):
        assert isinstance(target, CTarget)
        yield ("99_cblas", "#include <cblas.h>")
        return


# }}}

n = 10

knl = lp.make_kernel("{:}",
                     """
        y[:] = gemv(A[:, :], x[:])
        """, [
                         lp.GlobalArg("A", dtype=np.float64, shape=(n, n)),
                         lp.GlobalArg("x", dtype=np.float64, shape=(n, )),
                         lp.GlobalArg("y", shape=(n, )), ...
                     ],
                     target=CTarget())

knl = lp.register_callable(knl, "gemv", CBLASGEMV(name="gemv"))
print(lp.generate_code_v2(knl).device_code())
Esempio n. 9
0
def main():
    from argparse import ArgumentParser

    parser = ArgumentParser(description="Stand-alone loopy frontend")

    parser.add_argument("infile", metavar="INPUT_FILE")
    parser.add_argument("outfile",
                        default="-",
                        metavar="OUTPUT_FILE",
                        help="Defaults to stdout ('-').",
                        nargs="?")
    parser.add_argument("--lang", metavar="LANGUAGE", help="loopy|fortran")
    parser.add_argument("--target",
                        choices=("opencl", "ispc", "ispc-occa", "c",
                                 "c-fortran", "cuda"),
                        default="opencl")
    parser.add_argument("--transform")
    parser.add_argument("--edit-code", action="store_true")
    parser.add_argument("--occa-defines")
    parser.add_argument("--print-ir", action="store_true")
    args = parser.parse_args()

    if args.target == "opencl":
        from loopy.target.opencl import OpenCLTarget
        target = OpenCLTarget
    elif args.target == "ispc":
        from loopy.target.ispc import ISPCTarget
        target = ISPCTarget
    elif args.target == "ispc-occa":
        from loopy.target.ispc import ISPCTarget
        target = lambda: ISPCTarget(occa_mode=True)  # noqa: E731
    elif args.target == "c":
        from loopy.target.c import CTarget
        target = CTarget
    elif args.target == "c-fortran":
        from loopy.target.c import CTarget
        target = lambda: CTarget(fortran_abi=True)  # noqa: E731
    elif args.target == "cuda":
        from loopy.target.cuda import CudaTarget
        target = CudaTarget
    else:
        raise ValueError("unknown target: %s" % target)

    lp.set_default_target(target)

    lang = None
    if args.infile == "-":
        infile_content = sys.stdin.read()
    else:
        from os.path import splitext
        _, ext = splitext(args.infile)

        lang = {
            ".py": "loopy",
            ".loopy": "loopy",
            ".floopy": "fortran",
            ".f90": "fortran",
            ".F90": "fortran",
            ".fpp": "fortran",
            ".f": "fortran",
            ".f77": "fortran",
            ".F77": "fortran",
        }.get(ext)
        with open(args.infile) as infile_fd:
            infile_content = infile_fd.read()

    if args.lang is not None:
        lang = args.lang

    if lang is None:
        raise RuntimeError("unable to deduce input language "
                           "(wrong input file extension? --lang flag?)")

    if lang == "loopy":
        # {{{ path wrangling

        from os.path import dirname, abspath
        from os import getcwd

        infile_dirname = dirname(args.infile)
        if infile_dirname:
            infile_dirname = abspath(infile_dirname)
        else:
            infile_dirname = getcwd()

        sys.path.append(infile_dirname)

        # }}}

        data_dic = {}
        data_dic["lp"] = lp
        data_dic["np"] = np

        if args.occa_defines:
            with open(args.occa_defines) as defines_fd:
                occa_define_code = defines_to_python_code(defines_fd.read())
            exec(compile(occa_define_code, args.occa_defines, "exec"),
                 data_dic)

        with open(args.infile) as infile_fd:
            exec(compile(infile_content, args.infile, "exec"), data_dic)

        if args.transform:
            with open(args.transform) as xform_fd:
                exec(compile(xform_fd.read(), args.transform, "exec"),
                     data_dic)

        try:
            kernel = data_dic["lp_knl"]
        except KeyError:
            raise RuntimeError("loopy-lang requires 'lp_knl' "
                               "to be defined on exit")

        t_unit = [kernel]

    elif lang in ["fortran", "floopy", "fpp"]:
        pre_transform_code = None
        if args.transform:
            with open(args.transform) as xform_fd:
                pre_transform_code = xform_fd.read()

        if args.occa_defines:
            if pre_transform_code is None:
                pre_transform_code = ""

            with open(args.occa_defines) as defines_fd:
                pre_transform_code = (
                    defines_to_python_code(defines_fd.read()) +
                    pre_transform_code)

        t_unit = lp.parse_transformed_fortran(
            infile_content,
            pre_transform_code=pre_transform_code,
            filename=args.infile)

    else:
        raise RuntimeError("unknown language: '%s'" % args.lang)

    if not isinstance(t_unit, lp.TranslationUnit):
        # FIXME
        assert isinstance(t_unit, list)  # of kernels
        raise NotImplementedError("convert list of kernels to TranslationUnit")

    if args.print_ir:
        print(t_unit, file=sys.stderr)

    t_unit = lp.preprocess_kernel(t_unit)
    cgr = lp.generate_code_v2(t_unit)

    if args.outfile is not None:
        outfile = args.outfile
    else:
        outfile = "-"

    code = cgr.device_code()

    if outfile == "-":
        sys.stdout.write(code)
    else:
        with open(outfile, "w") as outfile_fd:
            outfile_fd.write(code)