예제 #1
0
파일: dummy.py 프로젝트: ringw/reikna
    def _build_plan(self, plan_factory, device_params, C, D, coeff1, coeff2):
        plan = plan_factory()
        nested = Dummy(C, D, coeff1, same_A_B=True)

        C_temp = plan.temp_array_like(C)
        D_temp = plan.temp_array_like(D)

        # Testing a computation call which uses the same argument for two parameters.
        plan.computation_call(nested, C_temp, D, C, C, coeff1)

        arr_dtype = C.dtype
        coeff_dtype = coeff2.dtype

        mul = functions.mul(arr_dtype, coeff_dtype)
        div = functions.div(arr_dtype, coeff_dtype)

        template = template_from(
            """
        <%def name="dummy(kernel_declaration, CC, C, D, coeff)">
        ${kernel_declaration}
        {
            VIRTUAL_SKIP_THREADS;
            VSIZE_T idx0 = virtual_global_id(0);
            VSIZE_T idx1 = virtual_global_id(1);

            ${CC.store_idx}(idx0, idx1,
                ${C.load_idx}(idx0, idx1) +
                ${mul}(${D.load_idx}(idx0, idx1), ${coeff}));
        }
        </%def>
        """
        )

        # Testing a kernel call which uses the same argument for two parameters.
        plan.kernel_call(
            template.get_def("dummy"), [C, C_temp, C_temp, coeff2], global_size=C.shape, render_kwds=dict(mul=mul)
        )

        return plan
예제 #2
0
    def _build_plan(self, plan_factory, device_params, C, D, coeff1, coeff2):
        plan = plan_factory()
        nested = Dummy(C, D, coeff1, same_A_B=True)

        C_temp = plan.temp_array_like(C)
        D_temp = plan.temp_array_like(D)

        # Testing a computation call which uses the same argument for two parameters.
        plan.computation_call(nested, C_temp, D, C, C, coeff1)

        arr_dtype = C.dtype
        coeff_dtype = coeff2.dtype

        mul = functions.mul(arr_dtype, coeff_dtype)
        div = functions.div(arr_dtype, coeff_dtype)

        template = template_from("""
        <%def name="dummy(kernel_declaration, CC, C, D, coeff)">
        ${kernel_declaration}
        {
            VIRTUAL_SKIP_THREADS;
            VSIZE_T idx0 = virtual_global_id(0);
            VSIZE_T idx1 = virtual_global_id(1);

            ${CC.store_idx}(idx0, idx1,
                ${C.load_idx}(idx0, idx1) +
                ${mul}(${D.load_idx}(idx0, idx1), ${coeff}));
        }
        </%def>
        """)

        # Testing a kernel call which uses the same argument for two parameters.
        plan.kernel_call(template.get_def('dummy'),
                         [C, C_temp, C_temp, coeff2],
                         global_size=C.shape,
                         render_kwds=dict(mul=mul))

        return plan
예제 #3
0
    def _build_plan(self, plan_factory, device_params, output):
        plan = plan_factory()

        template = template_from("""
        <%def name="dummy(kernel_declaration, output, arr1, arr2)">
        ${kernel_declaration}
        {
            VIRTUAL_SKIP_THREADS;
            const VSIZE_T i = virtual_global_id(0);
            ${arr1.ctype} x1 = ${arr1.load_idx}(i);
            ${arr2.ctype} x2 = ${arr2.load_idx}(0, i);
            ${arr2.ctype} x3 = ${arr2.load_idx}(1, i);
            ${output.store_idx}(i, (x2 + x3) * x1);
        }
        </%def>
        """)

        arr1 = plan.constant_array(self._arr1)
        arr2 = plan.constant_array(self._arr2)

        plan.kernel_call(template.get_def('dummy'), [output, arr1, arr2],
                         global_size=output.shape)

        return plan
예제 #4
0
파일: dummy.py 프로젝트: ringw/reikna
    def _build_plan(self, plan_factory, device_params, C, D, A, B, coeff):
        plan = plan_factory()

        arr_dtype = C.dtype
        coeff_dtype = coeff.dtype

        mul = functions.mul(arr_dtype, coeff_dtype)
        div = functions.div(arr_dtype, coeff_dtype)

        template = template_from(
            """
        <%def name="dummy(kernel_declaration, C, D, A, B, coeff)">
        ${kernel_declaration}
        {
            VIRTUAL_SKIP_THREADS;
            VSIZE_T idx0 = virtual_global_id(0);
            VSIZE_T idx1 = virtual_global_id(1);

            ${A.ctype} a = ${A.load_idx}(idx0, idx1);
            ${C.ctype} c = ${mul}(a, ${coeff});
            ${C.store_idx}(idx1, idx0, c);

            %if same_A_B:
                ${B.ctype} b = ${B.load_idx}(idx0, idx1);
                ${D.ctype} d = ${div}(b, ${coeff});
                ${D.store_idx}(idx0, idx1, d);
            %else:
            if (idx1 == 0)
            {
                ${B.ctype} b = ${B.load_idx}(idx0);
                ${D.ctype} d = ${div}(b, ${coeff});
                ${D.store_idx}(idx0, d);
            }
            %endif
        }
        </%def>

        <%def name="dummy2(kernel_declaration, CC, DD, C, D, pers_arr, const_coeff)">
        ${kernel_declaration}
        {
            VIRTUAL_SKIP_THREADS;
            VSIZE_T idx0 = virtual_global_id(0);
            VSIZE_T idx1 = virtual_global_id(1);

            ${CC.store_idx}(idx0, idx1, ${C.load_idx}(idx0, idx1));

            %if same_A_B:
                ${DD.store_idx}(
                    idx0, idx1,
                    ${mul}(${D.load_idx}(idx0, idx1), ${const_coeff}) +
                        ${pers_arr.load_idx}(idx0, idx1));
            %else:
            if (idx1 == 0)
            {
                ${DD.store_idx}(
                    idx0,
                    ${mul}(${D.load_idx}(idx0), ${const_coeff}) +
                        ${pers_arr.load_idx}(idx0));
            }
            %endif
        }
        </%def>
        """
        )

        block_size = 8

        C_temp = plan.temp_array_like(C)
        D_temp = plan.temp_array_like(D)
        arr = plan.persistent_array(self._persistent_array)

        plan.kernel_call(
            template.get_def("dummy"),
            [C_temp, D_temp, A, B, coeff],
            global_size=A.shape,
            local_size=(block_size, block_size),
            render_kwds=dict(mul=mul, div=div, same_A_B=self._same_A_B),
        )

        plan.kernel_call(
            template.get_def("dummy2"),
            [
                C,
                D,
                C_temp,
                D_temp,
                (self._persistent_array if self._test_kernel_adhoc_array else arr),
                (10 if self._test_untyped_scalar else numpy.float32(10)),
            ],
            global_size=A.shape,
            local_size=(block_size, block_size),
            render_kwds=dict(mul=mul, same_A_B=self._same_A_B),
        )

        return plan
예제 #5
0
파일: kernel.py 프로젝트: ringw/reikna
 def __init__(self, template_src, render_kwds=None):
     self.template = template_from(template_src)
     self.render_kwds = {} if render_kwds is None else dict(render_kwds)
예제 #6
0
 def __init__(self, template_src, render_kwds=None):
     self.template = template_from(template_src)
     self.render_kwds = {} if render_kwds is None else dict(render_kwds)
예제 #7
0
    def _build_plan(self, plan_factory, device_params, C, D, A, B, coeff):
        plan = plan_factory()

        arr_dtype = C.dtype
        coeff_dtype = coeff.dtype

        mul = functions.mul(arr_dtype, coeff_dtype)
        div = functions.div(arr_dtype, coeff_dtype)

        template = template_from("""
        <%def name="dummy(kernel_declaration, C, D, A, B, coeff)">
        ${kernel_declaration}
        {
            VIRTUAL_SKIP_THREADS;
            VSIZE_T idx0 = virtual_global_id(0);
            VSIZE_T idx1 = virtual_global_id(1);

            ${A.ctype} a = ${A.load_idx}(idx0, idx1);
            ${C.ctype} c = ${mul}(a, ${coeff});
            ${C.store_idx}(idx1, idx0, c);

            %if same_A_B:
                ${B.ctype} b = ${B.load_idx}(idx0, idx1);
                ${D.ctype} d = ${div}(b, ${coeff});
                ${D.store_idx}(idx0, idx1, d);
            %else:
            if (idx1 == 0)
            {
                ${B.ctype} b = ${B.load_idx}(idx0);
                ${D.ctype} d = ${div}(b, ${coeff});
                ${D.store_idx}(idx0, d);
            }
            %endif
        }
        </%def>

        <%def name="dummy2(kernel_declaration, CC, DD, C, D, pers_arr, const_coeff)">
        ${kernel_declaration}
        {
            VIRTUAL_SKIP_THREADS;
            VSIZE_T idx0 = virtual_global_id(0);
            VSIZE_T idx1 = virtual_global_id(1);

            ${CC.store_idx}(idx0, idx1, ${C.load_idx}(idx0, idx1));

            %if same_A_B:
                ${DD.store_idx}(
                    idx0, idx1,
                    ${mul}(${D.load_idx}(idx0, idx1), ${const_coeff}) +
                        ${pers_arr.load_idx}(idx0, idx1));
            %else:
            if (idx1 == 0)
            {
                ${DD.store_idx}(
                    idx0,
                    ${mul}(${D.load_idx}(idx0), ${const_coeff}) +
                        ${pers_arr.load_idx}(idx0));
            }
            %endif
        }
        </%def>
        """)

        block_size = 8

        C_temp = plan.temp_array_like(C)
        D_temp = plan.temp_array_like(D)
        arr = plan.persistent_array(self._persistent_array)

        plan.kernel_call(template.get_def('dummy'),
                         [C_temp, D_temp, A, B, coeff],
                         global_size=A.shape,
                         local_size=(block_size, block_size),
                         render_kwds=dict(mul=mul,
                                          div=div,
                                          same_A_B=self._same_A_B))

        plan.kernel_call(template.get_def('dummy2'), [
            C, D, C_temp, D_temp,
            (self._persistent_array if self._test_kernel_adhoc_array else arr),
            (10 if self._test_untyped_scalar else numpy.float32(10))
        ],
                         global_size=A.shape,
                         local_size=(block_size, block_size),
                         render_kwds=dict(mul=mul, same_A_B=self._same_A_B))

        return plan