Python Transformationの例、reikna.core.Transformation Pythonの例

コード例 #1

0

ファイルを表示

def fftshift(arr_t, axes=None):
    """
    Returns a frequency shift transformation (1 output, 1 input) that
    works as ``output = numpy.fft.fftshift(input, axes=axes)``.

    .. warning::

        Involves repositioning of the elements, so cannot be used on inplace kernels.
    """

    if axes is None:
        axes = tuple(range(len(arr_t.shape)))
    else:
        axes = tuple(sorted(axes))

    # The code taken from the FFTShift template for odd problem sizes
    # (at the moment of the writing).
    # Note the use of ``idxs`` template parameter to get access to element indices.
    return Transformation([
        Parameter('output', Annotation(arr_t, 'o')),
        Parameter('input', Annotation(arr_t, 'i'))
    ],
                          """
        <%
            dimensions = len(output.shape)
            new_idx_names = ['new_idx' + str(i) for i in range(dimensions)]
        %>
        %for dim in range(dimensions):
        VSIZE_T ${new_idx_names[dim]} =
            ${idxs[dim]}
            %if dim in axes:
                %if output.shape[dim] % 2 == 0:
                + (${idxs[dim]} < ${output.shape[dim] // 2} ?
                    ${output.shape[dim] // 2} :
                    ${-output.shape[dim] // 2})
                %else:
                + (${idxs[dim]} <= ${output.shape[dim] // 2} ?
                    ${output.shape[dim] // 2} :
                    ${-(output.shape[dim] // 2 + 1)})
                %endif
            %endif
            ;
        %endfor

        ${output.ctype} val = ${input.load_same};
        ${output.store_idx}(${', '.join(new_idx_names)}, val);
        """,
                          connectors=['input'],
                          render_kwds=dict(axes=axes))

コード例 #2

0

ファイルを表示

ファイル: transformations.py プロジェクト: drtpotter/reikna

def split_complex(input_arr_t):
    """
    Returns a transformation that splits complex input into two real outputs
    (2 outputs, 1 input): ``real = Re(input), imag = Im(input)``.
    """
    output_t = Type(dtypes.real_for(input_arr_t.dtype),
                    shape=input_arr_t.shape)
    return Transformation([
        Parameter('real', Annotation(output_t, 'o')),
        Parameter('imag', Annotation(output_t, 'o')),
        Parameter('input', Annotation(input_arr_t, 'i'))
    ], """
            ${real.store_same}(${input.load_same}.x);
            ${imag.store_same}(${input.load_same}.y);
        """)

コード例 #3

0

ファイルを表示

ファイル: transformations.py プロジェクト: xexo7C8/reikna

def copy(arr_t, out_arr_t=None):
    """
    Returns an identity transformation (1 output, 1 input): ``output = input``.
    Output array type ``out_arr_t`` may have different strides,
    but must have the same shape and data type.
    """
    if out_arr_t is None:
        out_arr_t = arr_t
    else:
        if out_arr_t.shape != arr_t.shape or out_arr_t.dtype != arr_t.dtype:
            raise ValueError("Input and output arrays must have the same shape and data type")

    return Transformation(
        [Parameter('output', Annotation(out_arr_t, 'o')),
        Parameter('input', Annotation(arr_t, 'i'))],
        "${output.store_same}(${input.load_same});")

コード例 #4

0

ファイルを表示

ファイル: transformations.py プロジェクト: drtpotter/reikna

def div_const(arr_t, param):
    """
    Returns a scaling transformation with a fixed parameter (1 output, 1 input):
    ``output = input / param``.
    """
    param_dtype = dtypes.detect_type(param)
    return Transformation(
        [
            Parameter('output', Annotation(arr_t, 'o')),
            Parameter('input', Annotation(arr_t, 'i'))
        ],
        "${output.store_same}(${div}(${input.load_same}, ${param}));",
        render_kwds=dict(div=functions.div(arr_t.dtype,
                                           param_dtype,
                                           out_dtype=arr_t.dtype),
                         param=dtypes.c_constant(param, dtype=param_dtype)))

コード例 #5

0

ファイルを表示

ファイル: transformations.py プロジェクト: xexo7C8/reikna

def broadcast_const(arr_t, val):
    """
    Returns a transformation that broadcasts the given constant to the array output
    (1 output): ``output = val``.
    """
    val = dtypes.cast(arr_t.dtype)(val)
    if len(val.shape) != 0:
        raise ValueError("The constant must be a scalar")
    return Transformation(
        [
            Parameter('output', Annotation(arr_t, 'o'))],
        """
        const ${output.ctype} val = ${dtypes.c_constant(val)};
        ${output.store_same}(val);
        """,
        render_kwds=dict(val=val))

コード例 #6

0

ファイルを表示

ファイル: demo_specialized_fft.py プロジェクト: xexo7C8/reikna

def prepare_irfft_output(arr):
    res = Type(dtypes.real_for(arr.dtype),
               arr.shape[:-1] + (arr.shape[-1] * 2, ))
    return Transformation([
        Parameter('output', Annotation(res, 'o')),
        Parameter('input', Annotation(arr, 'i')),
    ],
                          """
        <%
            batch_idxs = " ".join((idx + ", ") for idx in idxs[:-1])
        %>
        ${input.ctype} x = ${input.load_same};
        ${output.store_idx}(${batch_idxs} ${idxs[-1]} * 2, x.x);
        ${output.store_idx}(${batch_idxs} ${idxs[-1]} * 2 + 1, x.y);
        """,
                          connectors=['output'])

コード例 #7

0

ファイルを表示

ファイル: transformations.py プロジェクト: xexo7C8/reikna

def combine_complex(output_arr_t):
    """
    Returns a transformation that joins two real inputs into complex output
    (1 output, 2 inputs): ``output = real + 1j * imag``.
    """
    input_t = Type(dtypes.real_for(output_arr_t.dtype), shape=output_arr_t.shape)
    return Transformation(
        [Parameter('output', Annotation(output_arr_t, 'o')),
        Parameter('real', Annotation(input_t, 'i')),
        Parameter('imag', Annotation(input_t, 'i'))],
        """
        ${output.store_same}(
            COMPLEX_CTR(${output.ctype})(
                ${real.load_same},
                ${imag.load_same}));
        """)

コード例 #8

0

ファイルを表示

def unimod_gen(size, single=True):
    if single:
        dtype = np.complex64
    else:
        dtype = np.complex128
    unimod = Transformation([
        Parameter('output', Annotation(Type(dtype, size), 'o')),
        Parameter('input', Annotation(Type(dtype, size), 'i'))
    ],
                            '''
        ${input.ctype} val = ${input.load_same};       
        ${output.store_same}(${polar_unit}(atan2(val.y, val.x)));
        ''',
                            render_kwds=dict(polar_unit=functions.polar_unit(
                                dtype=np.float32 if single else np.double)))
    return unimod

コード例 #9

0

ファイルを表示

ファイル: demo_specialized_fft.py プロジェクト: xexo7C8/reikna

def prepare_rfft_input(arr):
    res = Type(dtypes.complex_for(arr.dtype),
               arr.shape[:-1] + (arr.shape[-1] // 2, ))
    return Transformation([
        Parameter('output', Annotation(res, 'o')),
        Parameter('input', Annotation(arr, 'i')),
    ],
                          """
        <%
            batch_idxs = " ".join((idx + ", ") for idx in idxs[:-1])
        %>
        ${input.ctype} re = ${input.load_idx}(${batch_idxs} ${idxs[-1]} * 2);
        ${input.ctype} im = ${input.load_idx}(${batch_idxs} ${idxs[-1]} * 2 + 1);
        ${output.store_same}(COMPLEX_CTR(${output.ctype})(re, im));
        """,
                          connectors=['output'])

コード例 #10

0

ファイルを表示

def get_prepare_for_mul_trf(shape):
    dtype = transformed_dtype()
    return Transformation(
        [
            Parameter('output', Annotation(Type(dtype, shape), 'o')),
            Parameter('input', Annotation(Type(dtype, shape), 'i'))
        ],
        """
        ${dtypes.ctype(dtype)} x = ${input.load_same};
        ${ff_ctype} x_ff = { x };
        ${output.store_same}(${prepare_for_mul}(x_ff).val);
        """,
        connectors=['input', 'output'],
        render_kwds=dict(
            prepare_for_mul=prepare_for_mul(ff_elem=ff_elem).module,
            dtype=dtype,
            ff_ctype=transformed_internal_ctype()))

コード例 #11

0

ファイルを表示

    def _build_plan(self, plan_factory, device_params, output, alpha, beta):

        plan = plan_factory()

        for_reduction = Type(numpy.float64, (alpha.shape[0], self._max_click_order))

        meter_trf = Transformation([
            Parameter('output', Annotation(for_reduction, 'o')),
            Parameter('alpha', Annotation(alpha, 'i')),
            Parameter('beta', Annotation(beta, 'i')),
            ],
            """
                VSIZE_T sample_idx = ${idxs[0]};
                VSIZE_T order = ${idxs[1]} + 1;

                ${alpha.ctype} result = COMPLEX_CTR(${alpha.ctype})(1, 0);
                for (VSIZE_T i = 0; i < ${modes}; i++) {
                    ${alpha.ctype} alpha = ${alpha.load_idx}(sample_idx, i);
                    ${beta.ctype} beta = ${beta.load_idx}(sample_idx, i);
                    ${alpha.ctype} t = ${mul_cc}(alpha, beta);
                    ${alpha.ctype} np = ${exp_c}(COMPLEX_CTR(${alpha.ctype})(-t.x, -t.y));

                    if (i >= order) {
                        result = ${mul_cc}(result, np);
                    }
                    else {
                        ${alpha.ctype} cp = COMPLEX_CTR(${alpha.ctype})(1 - np.x, -np.y);
                        result = ${mul_cc}(result, cp);
                    }
                }

                ${output.store_same}(result.x);
                """,
            render_kwds=dict(
                mul_cc=functions.mul(alpha.dtype, alpha.dtype),
                exp_c=functions.exp(alpha.dtype),
                modes=self._system.modes,
                ))

        reduction = Reduce(for_reduction, predicate_sum(output.dtype), axes=(0,))
        reduction.parameter.input.connect(
            meter_trf, meter_trf.output, alpha_p=meter_trf.alpha, beta_p=meter_trf.beta)

        plan.computation_call(reduction, output, alpha, beta)

        return plan

コード例 #12

0

ファイルを表示

def crop_frequencies(arr):
    """
    Crop a 2D array whose columns represent frequencies to only leave the frequencies with
    different absolute values.
    """
    result_arr = Type(arr.dtype, (arr.shape[0], arr.shape[1] // 2 + 1))
    return Transformation(
        [
            Parameter('output', Annotation(result_arr, 'o')),
            Parameter('input', Annotation(arr, 'i')),
        ],
        """
        if (${idxs[1]} < ${input.shape[1] // 2 + 1})
            ${output.store_idx}(${idxs[0]}, ${idxs[1]}, ${input.load_same});
        """,
        # note that only the "load_same"-using argument can serve as a connector!
        connectors=['input'])

コード例 #13

0

ファイルを表示

ファイル: transformations.py プロジェクト: drtpotter/reikna

def copy_broadcasted(arr_t, out_arr_t=None):
    """
    Returns an identity transformation (1 output, 1 input): ``output = input``,
    where ``input`` may be broadcasted (with the same semantics as ``numpy.broadcast_to()``).
    Output array type ``out_arr_t`` may have different strides,
    but must have compatible shapes the same shape and data type.

    .. note::

        This is an input-only transformation.
    """

    if out_arr_t is None:
        out_arr_t = arr_t

    if out_arr_t.dtype != arr_t.dtype:
        raise ValueError(
            "Input and output arrays must have the same data type")

    in_tp = Type.from_value(arr_t)
    out_tp = Type.from_value(out_arr_t)
    if not in_tp.broadcastable_to(out_tp):
        raise ValueError("Input is not broadcastable to output")

    return Transformation([
        Parameter('output', Annotation(out_arr_t, 'o')),
        Parameter('input', Annotation(arr_t, 'i'))
    ],
                          """
        ${output.store_same}(${input.load_idx}(
        %for i in range(len(input.shape)):
            %if input.shape[i] == 1:
            0
            %else:
            ${idxs[i + len(output.shape) - len(input.shape)]}
            %endif
            %if i != len(input.shape) - 1:
                ,
            %endif
        %endfor
        ));
        """,
                          connectors=['output'])

コード例 #14

0

ファイルを表示

ファイル: lwe_gpu.py プロジェクト: stjordanis/nufhe

    def _build_plan(self, plan_factory, device_params, output, matrix, vector):
        plan = plan_factory()

        summation = Reduce(matrix, predicate_sum(matrix.dtype), axes=(len(matrix.shape)-1,))

        mul_vec = Transformation([
            Parameter('output', Annotation(matrix, 'o')),
            Parameter('matrix', Annotation(matrix, 'i')),
            Parameter('vector', Annotation(vector, 'i'))],
            """
            ${output.store_same}(${mul}(${matrix.load_same}, ${vector.load_idx}(${idxs[-1]})));
            """,
            render_kwds=dict(mul=functions.mul(matrix.dtype, vector.dtype)),
            connectors=['output', 'matrix'])

        summation.parameter.input.connect(
            mul_vec, mul_vec.output, matrix=mul_vec.matrix, vector=mul_vec.vector)

        plan.computation_call(summation, output, matrix, vector)

        return plan

コード例 #15

0

ファイルを表示

ファイル: lwe_gpu.py プロジェクト: stjordanis/nufhe

    def _build_plan(self, plan_factory, device_params, result, lwe_a, lwe_b, key):

        plan = plan_factory()

        mul_key = MatrixMulVector(lwe_a)

        fill_res = Transformation([
            Parameter('result', Annotation(result, 'o')),
            Parameter('b', Annotation(lwe_b, 'i')),
            Parameter('a_times_key', Annotation(lwe_b, 'i'))],
            """
            ${result.store_same}(${b.load_same} - ${a_times_key.load_same});
            """,
            connectors=['a_times_key'])

        mul_key.parameter.output.connect(
            fill_res, fill_res.a_times_key,
            result=fill_res.result, b=fill_res.b)

        plan.computation_call(mul_key, result, lwe_b, lwe_a, key)

        return plan

コード例 #16

0

ファイルを表示

ファイル: demo_specialized_fft.py プロジェクト: xexo7C8/reikna

def get_prepare_iprfft_input(X):
    # Input: size N//4
    # Output: size N//4+1

    N = X.shape[-1] * 4
    Y = Type(X.dtype, X.shape[:-1] + (N // 4 + 1, ))

    return Transformation([
        Parameter('Y', Annotation(Y, 'o')),
        Parameter('X', Annotation(X, 'i')),
    ],
                          """
        <%
            batch_idxs = " ".join((idx + ", ") for idx in idxs[:-1])
        %>

        ${Y.ctype} Y;
        if (${idxs[-1]} == 0)
        {
            ${X.ctype} X = ${X.load_idx}(${batch_idxs} 0);
            Y = COMPLEX_CTR(${Y.ctype})(-2 * X.y, 0);
        }
        else if (${idxs[-1]} == ${N//4})
        {
            ${X.ctype} X = ${X.load_idx}(${batch_idxs} ${N//4-1});
            Y = COMPLEX_CTR(${Y.ctype})(2 * X.y, 0);
        }
        else
        {
            ${X.ctype} X = ${X.load_idx}(${batch_idxs} ${idxs[-1]});
            ${X.ctype} X_prev = ${X.load_idx}(${batch_idxs} ${idxs[-1]} - 1);
            ${X.ctype} diff = X - X_prev;
            Y = COMPLEX_CTR(${Y.ctype})(-diff.y, diff.x);
        }

        ${Y.store_same}(Y);
        """,
                          connectors=['Y'],
                          render_kwds=dict(N=N))

コード例 #17

0

ファイルを表示

ファイル: transformations.py プロジェクト: drtpotter/reikna

def norm_param(arr_t):
    """
    Returns a transformation that calculates the ``order``-norm
    (1 output, 1 input, 1 param): ``output = abs(input) ** order``.
    """
    if dtypes.is_complex(arr_t.dtype):
        out_dtype = dtypes.real_for(arr_t.dtype)
    else:
        out_dtype = arr_t.dtype

    return Transformation([
        Parameter('output', Annotation(Type(out_dtype, arr_t.shape), 'o')),
        Parameter('input', Annotation(arr_t, 'i')),
        Parameter('order', Annotation(Type(out_dtype)))
    ],
                          """
        ${input.ctype} val = ${input.load_same};
        ${output.ctype} norm = ${norm}(val);
        norm = pow(norm, ${order} / 2);
        ${output.store_same}(norm);
        """,
                          render_kwds=dict(norm=functions.norm(arr_t.dtype)))

コード例 #18

0

ファイルを表示

ファイル: lwe_gpu.py プロジェクト: DucaturFw/nufhe

    def _build_plan(self, plan_factory, device_params, result_a, result_b,
                    result_cv, messages, key, noises_a, noises_b):

        plan = plan_factory()

        mul_key = MatrixMulVector(noises_a)

        fill_b_cv = Transformation([
            Parameter('result_b', Annotation(result_b, 'o')),
            Parameter('result_cv', Annotation(result_cv, 'o')),
            Parameter('messages', Annotation(messages, 'i')),
            Parameter('noises_a_times_key', Annotation(noises_b, 'i')),
            Parameter('noises_b', Annotation(noises_b, 'i'))
        ],
                                   """
            ${result_b.store_same}(
                ${noises_b.load_same}
                + ${messages.load_same}
                + ${noises_a_times_key.load_same});
            ${result_cv.store_same}(${noise**2});
            """,
                                   connectors=['noises_a_times_key'],
                                   render_kwds=dict(noise=self._noise))

        mul_key.parameter.output.connect(fill_b_cv,
                                         fill_b_cv.noises_a_times_key,
                                         b=fill_b_cv.result_b,
                                         cv=fill_b_cv.result_cv,
                                         messages=fill_b_cv.messages,
                                         noises_b=fill_b_cv.noises_b)

        plan.computation_call(mul_key, result_b, result_cv, messages, noises_b,
                              noises_a, key)
        plan.computation_call(
            PureParallel.from_trf(transformations.copy(noises_a)), result_a,
            noises_a)

        return plan

コード例 #19

0

ファイルを表示

    def _build_plan(self, plan_factory, device_params, output, alpha, beta):

        plan = plan_factory()

        for_reduction = Type(numpy.float64, (alpha.shape[0], self._max_moment))

        meter_trf = Transformation([
            Parameter('output', Annotation(for_reduction, 'o')),
            Parameter('alpha', Annotation(alpha, 'i')),
            Parameter('beta', Annotation(beta, 'i')),
            ],
            """
                VSIZE_T sample_idx = ${idxs[0]};
                VSIZE_T order = ${idxs[1]};

                ${alpha.ctype} result = COMPLEX_CTR(${alpha.ctype})(1, 0);
                for (VSIZE_T i = 0; i <= order; i++) {
                    ${alpha.ctype} alpha = ${alpha.load_idx}(sample_idx, i);
                    ${beta.ctype} beta = ${beta.load_idx}(sample_idx, i);
                    ${alpha.ctype} t = ${mul_cc}(alpha, beta);
                    t.x -= ${ordering};
                    result = ${mul_cc}(result, t);
                }
                ${output.store_same}(result.x);
                """,
            render_kwds=dict(
                mul_cc=functions.mul(alpha.dtype, alpha.dtype),
                ordering=ordering(self._representation),
                ))

        reduction = Reduce(for_reduction, predicate_sum(output.dtype), axes=(0,))
        reduction.parameter.input.connect(
            meter_trf, meter_trf.output, alpha_p=meter_trf.alpha, beta_p=meter_trf.beta)

        plan.computation_call(reduction, output, alpha, beta)

        return plan

コード例 #20

0

ファイルを表示

    def _build_plan(self, plan_factory, device_params, result, phase):
        plan = plan_factory()

        tr = Transformation([
            Parameter('result', Annotation(result, 'o')),
            Parameter('phase', Annotation(phase, 'i')),
        ],
                            """
            <%
                interv = 2**32 // mspace_size
                half_interv = interv // 2
            %>
            ${phase.ctype} phase = ${phase.load_same};
            ${result.store_same}(((unsigned int)phase + ${half_interv}) / ${interv});
            """,
                            render_kwds=dict(mspace_size=self._mspace_size,
                                             uint64=dtypes.ctype(
                                                 numpy.uint64)),
                            connectors=['result', 'phase'])

        plan.computation_call(
            PureParallel.from_trf(tr, guiding_array='result'), result, phase)

        return plan

コード例 #21

0

ファイルを表示

def get_tgsw_polynomial_decomp_trf(params: 'TGswParams', shape):
    tlwe_params = params.tlwe_params
    decomp_length = params.decomp_length
    mask_size = tlwe_params.mask_size
    polynomial_degree = tlwe_params.polynomial_degree

    result = Type(Int32, shape + (mask_size + 1, decomp_length, polynomial_degree))
    sample = Type(Torus32, shape + (mask_size + 1, polynomial_degree))
    return Transformation([
        Parameter('result', Annotation(result, 'o')),
        Parameter('sample', Annotation(sample, 'i'))],
        """
        <%
            mask = 2**params.bs_log2_base - 1
            half_base = 2**(params.bs_log2_base - 1)
        %>
        ${sample.ctype} sample = ${sample.load_idx}(${", ".join(idxs[:-2])}, ${idxs[-1]});
        int decomp_shift = 32 - (${idxs[-2]} + 1) * ${params.bs_log2_base};
        ${result.store_same}(
            (((sample + (${params.offset})) >> decomp_shift) & ${mask}) - ${half_base}
        );
        """,
        connectors=['results'],
        render_kwds=dict(params=params))

コード例 #22

0

ファイルを表示

ファイル: test_transformation.py プロジェクト: ringw/reikna

def test_io_parameter_in_transformation():
    with pytest.raises(ValueError):
        tr = Transformation(
            [Parameter('o1', Annotation(Type(numpy.float32, shape=100), 'io'))],
            "${o1.store_same}(${o1.load_same});")

コード例 #23

0

ファイルを表示

ファイル: test_transformation.py プロジェクト: ringw/reikna

def tr_identity(arr):
    return Transformation(
        [Parameter('o1', Annotation(arr, 'o')),
        Parameter('i1', Annotation(arr, 'i'))],
        "${o1.store_same}(${i1.load_same});")

コード例 #24

0

ファイルを表示

            result.cur_min = ${v2}.cur_min;
        if (${v2}.cur_max > result.cur_max)
            result.cur_max = ${v2}.cur_max;
        return result;
        """,
                   render_kwds=dict(ctype=mmc_c_decl)), empty)

# Test array
arr = numpy.random.randint(0, 10**6, 20000)

# A transformation that creates initial minmax structures for the given array of integers
to_mmc = Transformation([
    Parameter('output', Annotation(Type(mmc_dtype, arr.shape), 'o')),
    Parameter('input', Annotation(arr, 'i'))
], """
    ${output.ctype} res;
    res.cur_min = ${input.load_same};
    res.cur_max = ${input.load_same};
    ${output.store_same}(res);
    """)

# Create the reduction computation and attach the transformation above to its input.
reduction = Reduce(to_mmc.output, predicate)
reduction.parameter.input.connect(to_mmc,
                                  to_mmc.output,
                                  new_input=to_mmc.input)
creduction = reduction.compile(thr)

# Run the computation
arr_dev = thr.to_device(arr)
res_dev = thr.empty_like(reduction.parameter.output)

コード例 #25

0

ファイルを表示

    def _build_plan(self, plan_factory, device_params, output, alpha, beta):

        plan = plan_factory()

        samples, modes = alpha.shape

        for_reduction = Type(alpha.dtype, (samples, self._max_total_clicks + 1))

        prepared_state = plan.temp_array_like(alpha)

        plan.kernel_call(
            TEMPLATE.get_def("compound_click_probability_prepare"),
            [prepared_state, alpha, beta],
            kernel_name="compound_click_probability_prepare",
            global_size=alpha.shape,
            render_kwds=dict(
                mul_cc=functions.mul(alpha.dtype, alpha.dtype),
                exp_c=functions.exp(alpha.dtype),
                ))

        # Block size is limited by the amount of available local memory.
        # In some OpenCL implementations the number reported cannot actually be fully used
        # (because it's used by kernel arguments), so we're padding it a little.
        local_mem_size = device_params.local_mem_size
        max_elems = (local_mem_size - 256) // alpha.dtype.itemsize
        block_size = 2**helpers.log2(max_elems)

        # No reason to have block size larger than the number of modes
        block_size = min(block_size, helpers.bounding_power_of_2(modes))

        products_gsize = (samples, helpers.min_blocks(self._max_total_clicks + 1, block_size) * block_size)
        products = plan.temp_array_like(for_reduction)

        read_size = min(block_size, device_params.max_work_group_size)

        while read_size > 1:

            full_steps = modes // block_size
            remainder_size = modes % block_size

            try:
                plan.kernel_call(
                    TEMPLATE.get_def("compound_click_probability_aggregate"),
                    [products, prepared_state],
                    kernel_name="compound_click_probability_aggregate",
                    global_size=products_gsize,
                    local_size=(1, read_size,),
                    render_kwds=dict(
                        block_size=block_size,
                        read_size=read_size,
                        full_steps=full_steps,
                        remainder_size=remainder_size,
                        output_size=self._max_total_clicks + 1,
                        mul_cc=functions.mul(alpha.dtype, alpha.dtype),
                        add_cc=functions.add(alpha.dtype, alpha.dtype),
                        polar_unit=functions.polar_unit(dtypes.real_for(alpha.dtype)),
                        modes=self._system.modes,
                        max_total_clicks=self._max_total_clicks,
                        ))

            except OutOfResourcesError:
                read_size //= 2

            break

        reduction = Reduce(for_reduction, predicate_sum(alpha.dtype), axes=(0,))

        temp = plan.temp_array_like(reduction.parameter.output)

        plan.computation_call(reduction, temp, products)

        fft = FFT(temp)
        real_trf = Transformation([
            Parameter('output', Annotation(output, 'o')),
            Parameter('input', Annotation(temp, 'i')),
            ],
            """
                ${input.ctype} val = ${input.load_same};
                ${output.store_same}(val.x);
                """)
        fft.parameter.output.connect(real_trf, real_trf.input, output_p=real_trf.output)

        plan.computation_call(fft, output, temp, True)

        return plan

コード例 #26

0

ファイルを表示

ファイル: isaacudaimpl.py プロジェクト: kaizhongkaizhong/WaveSyn

def get_procs(thr, N):
    fft = FFTFactory.create(thr, (N, ), compile_=False)
    unimod_trans = Transformation(
        [
            Parameter('output', Annotation(Type(np.complex128, N), 'o')),
            Parameter('input', Annotation(Type(np.complex128, N), 'i'))
        ],
        """
VSIZE_T idx = ${idxs[0]};
${input.ctype} val = ${input.load_same};
if (idx>${N}/2){
    val.x = 0.0;
    val.y = 0.0;
    ${output.store_same}(val);
}else
    ${output.store_same}(${polar_unit}(atan2(val.y, val.x)));
        """,
        render_kwds=dict(polar_unit=functions.polar_unit(dtype=np.float64),
                         N=N))
    fft.parameter.output.connect(unimod_trans,
                                 unimod_trans.input,
                                 uni=unimod_trans.output)
    fft_unimod = fft.compile(thr)

    mag_square = PureParallel([
        Parameter('output', Annotation(Type(np.complex128, N), 'o')),
        Parameter('input', Annotation(Type(np.complex128, N), 'i'))
    ], '''
VSIZE_T idx = ${idxs[0]};
${input.ctype} val = ${input.load_idx}(idx);  
val.x = val.x*val.x + val.y*val.y;
val.y = 0;
${output.store_idx}(idx, val);
        ''')
    mag_square = mag_square.compile(thr)

    apply_mask = PureParallel(
        [
            Parameter('output', Annotation(Type(np.complex128, N), 'o')),
            Parameter('origin', Annotation(Type(np.complex128, N), 'i')),
            Parameter('mask', Annotation(Type(np.double, N), 'i'))
        ],
        '''
VSIZE_T idx = ${idxs[0]};
${output.store_idx}(idx, ${mul}(${origin.load_idx}(idx), ${mask.load_idx}(idx)));        
        ''',
        render_kwds=dict(mul=functions.mul(np.complex128, np.double)))
    apply_mask = apply_mask.compile(thr)

    combine_mag_phi = PureParallel([
        Parameter('output', Annotation(Type(np.complex128, N), 'o')),
        Parameter('mag_square', Annotation(Type(np.complex128, N), 'i')),
        Parameter('phase', Annotation(Type(np.complex128, N), 'i'))
    ],
                                   '''
VSIZE_T idx = ${idxs[0]};
double r = ${mag_square.load_idx}(idx).x;  
r = r<0.0 ? 0.0 : ${pow}(r, 0.5);
double2 v = ${phase.load_idx}(idx);
double angle = atan2(v.y, v.x);
${output.store_idx}(idx, ${polar}(r, angle));
        ''',
                                   render_kwds=dict(
                                       pow=functions.pow(np.double),
                                       polar=functions.polar(np.double)))
    combine_mag_phi = combine_mag_phi.compile(thr)

    return fft_unimod, mag_square, apply_mask, combine_mag_phi

コード例 #27

0

ファイルを表示

def closing(img, numiter=1):
    return erode(dilate(img, numiter), numiter)


def border(img):
    return repeat_kernel(img, prg.border, 1)


# Create LUT and stringify into preamble of map kernel
LUT = np.zeros(256, np.int32)
for b in xrange(8):
    LUT[(np.arange(256) & (1 << b)) != 0] += 1
strLUT = "constant int LUT[256] = {" + ",".join(map(str, LUT)) + "};\n"
byte_to_count = Transformation([
    Parameter('output', Annotation(Type(np.int32, (1, )), 'o')),
    Parameter('input', Annotation(Type(np.uint8, (1, )), 'i'))
], strLUT + """
        ${output.store_same}(LUT[${input.load_same}]);
    """)

predicate = Predicate(
    Snippet.create(lambda v1, v2: """return ${v1} + ${v2}"""), np.int32(0))

sum_bits_reduction = Reduce(byte_to_count.output, predicate)
sum_bits_reduction.parameter.input.connect(byte_to_count,
                                           byte_to_count.output,
                                           new_input=byte_to_count.input)
sum_bits = sum_bits_reduction.compile(thr)
#sum_byte_count = ReductionKernel(cx, np.int32, neutral="0",
#                    reduce_expr="a+b", map_expr="LUT[bytes[i]]",
#                    arguments="__global unsigned char *bytes",
#                    preamble=strLUT)

コード例 #28

0

ファイルを表示

ファイル: tlwe_gpu.py プロジェクト: tko-heyjude/nufhe

    def _build_plan(self, plan_factory, device_params, result_a, result_cv,
                    key, noises1, noises2):

        plan = plan_factory()

        polynomial_degree = self._polynomial_degree
        batch_shape = result_a.shape[:-2]
        batch_len = helpers.product(batch_shape)

        perf_params = self._perf_params

        transform = get_transform(self._transform_type)

        ft_key = transform.ForwardTransform(key.shape[:-1], polynomial_degree,
                                            perf_params)
        key_tr = plan.temp_array_like(ft_key.parameter.output)

        ft_noises = transform.ForwardTransform(noises1.shape[:-1],
                                               polynomial_degree, perf_params)
        noises1_tr = plan.temp_array_like(ft_noises.parameter.output)

        ift = transform.InverseTransform(noises1.shape[:-1], polynomial_degree,
                                         perf_params)
        ift_res = plan.temp_array_like(ift.parameter.output)

        mul_tr = Transformation(
            [
                Parameter('output', Annotation(ift.parameter.input, 'o')),
                Parameter('key', Annotation(key_tr, 'i')),
                Parameter('noises1', Annotation(noises1_tr, 'i'))
            ],
            """
            ${output.store_same}(${tr_ctype}unpack(${mul}(
                ${tr_ctype}pack(${key.load_idx}(${idxs[-2]}, ${idxs[-1]})),
                ${tr_ctype}pack(${noises1.load_same})
                )));
            """,
            connectors=['output', 'noises1'],
            render_kwds=dict(mul=transform.transformed_mul(perf_params),
                             tr_ctype=transform.transformed_internal_ctype()))

        ift.parameter.input.connect(mul_tr,
                                    mul_tr.output,
                                    key=mul_tr.key,
                                    noises1=mul_tr.noises1)

        plan.computation_call(ft_key, key_tr, key)
        plan.computation_call(ft_noises, noises1_tr, noises1)
        plan.computation_call(ift, ift_res, key_tr, noises1_tr)
        plan.kernel_call(TEMPLATE.get_def("tlwe_encrypt_zero_fill_result"),
                         [result_a, result_cv, noises1, noises2, ift_res],
                         global_size=(batch_len, self._mask_size + 1,
                                      polynomial_degree),
                         render_kwds=dict(noise=self._noise,
                                          mask_size=self._mask_size,
                                          noises1_slices=(len(batch_shape), 1,
                                                          1),
                                          noises2_slices=(len(batch_shape), 1),
                                          cv_slices=(len(batch_shape), )))

        return plan