Python add Exemples, reikna.cluda.functions.add Python Exemples

Exemple #1

0

Afficher le fichier

Fichier : generate_gpu.py Projet : fjarri/squeezed-sim

    def _build_plan(self, plan_factory, device_params, alpha, beta, seed):
        plan = plan_factory()

        bijection = philox(64, 2)

        # Keeping the kernel the same so it can be cached.
        # The seed will be passed as the computation parameter instead.
        keygen = KeyGenerator.create(bijection, seed=numpy.int32(0))

        sampler = normal_bm(bijection, numpy.float64)

        squeezing = plan.persistent_array(self._system.squeezing)
        decoherence = plan.persistent_array(self._system.decoherence)

        plan.kernel_call(TEMPLATE.get_def("generate_input_state"),
                         [alpha, beta, squeezing, decoherence, seed],
                         kernel_name="generate",
                         global_size=alpha.shape,
                         render_kwds=dict(
                             system=self._system,
                             representation=self._representation,
                             Representation=Representation,
                             bijection=bijection,
                             keygen=keygen,
                             sampler=sampler,
                             ordering=ordering,
                             exp=functions.exp(numpy.float64),
                             mul_cr=functions.mul(numpy.complex128,
                                                  numpy.float64),
                             add_cc=functions.add(numpy.complex128,
                                                  numpy.complex128),
                         ))

        return plan

Exemple #2

0

Afficher le fichier

Fichier : transformations.py Projet : fjarri/reikna

def add_param(arr_t, param_dtype):
    """
    Returns an addition transformation with a dynamic parameter (1 output, 1 input, 1 scalar):
    ``output = input + param``.
    """
    return Transformation(
        [Parameter('output', Annotation(arr_t, 'o')),
        Parameter('input', Annotation(arr_t, 'i')),
        Parameter('param', Annotation(param_dtype))],
        "${output.store_same}(${add}(${input.load_same}, ${param}));",
        render_kwds=dict(add=functions.add(arr_t.dtype, param_dtype, out_dtype=arr_t.dtype)))

Exemple #3

0

Afficher le fichier

Fichier : transformations.py Projet : xexo7C8/reikna

def add_param(arr_t, param_dtype):
    """
    Returns an addition transformation with a dynamic parameter (1 output, 1 input, 1 scalar):
    ``output = input + param``.
    """
    return Transformation(
        [Parameter('output', Annotation(arr_t, 'o')),
        Parameter('input', Annotation(arr_t, 'i')),
        Parameter('param', Annotation(param_dtype))],
        "${output.store_same}(${add}(${input.load_same}, ${param}));",
        render_kwds=dict(add=functions.add(arr_t.dtype, param_dtype, out_dtype=arr_t.dtype)))

Exemple #4

0

Afficher le fichier

Fichier : transformations.py Projet : fjarri/reikna

def add_const(arr_t, param):
    """
    Returns an addition transformation with a fixed parameter (1 output, 1 input):
    ``output = input + param``.
    """
    param_dtype = dtypes.detect_type(param)
    return Transformation(
        [Parameter('output', Annotation(arr_t, 'o')),
        Parameter('input', Annotation(arr_t, 'i'))],
        "${output.store_same}(${add}(${input.load_same}, ${param}));",
        render_kwds=dict(
            add=functions.add(arr_t.dtype, param_dtype, out_dtype=arr_t.dtype),
            param=dtypes.c_constant(param, dtype=param_dtype)))

Exemple #5

0

Afficher le fichier

Fichier : transformations.py Projet : xexo7C8/reikna

def add_const(arr_t, param):
    """
    Returns an addition transformation with a fixed parameter (1 output, 1 input):
    ``output = input + param``.
    """
    param_dtype = dtypes.detect_type(param)
    return Transformation(
        [Parameter('output', Annotation(arr_t, 'o')),
        Parameter('input', Annotation(arr_t, 'i'))],
        "${output.store_same}(${add}(${input.load_same}, ${param}));",
        render_kwds=dict(
            add=functions.add(arr_t.dtype, param_dtype, out_dtype=arr_t.dtype),
            param=dtypes.c_constant(param, dtype=param_dtype)))

Exemple #6

0

Afficher le fichier

Fichier : test_functions.py Projet : xexo7C8/reikna

def test_multiarg_add(thr, out_code, in_codes):
    """
    Checks multi-argument add() with a variety of data types.
    """

    out_dtype, in_dtypes = generate_dtypes(out_code, in_codes)

    def reference_add(*args):
        res = sum(args)
        if not dtypes.is_complex(out_dtype) and dtypes.is_complex(res.dtype):
            res = res.real
        return res.astype(out_dtype)

    # Temporarily catching imaginary part truncation warnings
    with catch_warnings():
        filterwarnings("ignore", "", numpy.ComplexWarning)
        mul = functions.add(*in_dtypes, out_dtype=out_dtype)

    check_func(thr, mul, reference_add, out_dtype, in_dtypes)

Exemple #7

0

Afficher le fichier

Fichier : test_functions.py Projet : fjarri/reikna

def test_multiarg_add(thr, out_code, in_codes):
    """
    Checks multi-argument add() with a variety of data types.
    """

    out_dtype, in_dtypes = generate_dtypes(out_code, in_codes)

    def reference_add(*args):
        res = sum(args)
        if not dtypes.is_complex(out_dtype) and dtypes.is_complex(res.dtype):
            res = res.real
        return res.astype(out_dtype)

    # Temporarily catching imaginary part truncation warnings
    with catch_warnings():
        filterwarnings("ignore", "", numpy.ComplexWarning)
        mul = functions.add(*in_dtypes, out_dtype=out_dtype)

    check_func(thr, mul, reference_add, out_dtype, in_dtypes)

Exemple #8

0

Afficher le fichier

Fichier : generate_gpu.py Projet : fjarri/squeezed-sim

    def _build_plan(self, plan_factory, device_params, alpha, beta, alpha_i,
                    beta_i, seed):
        plan = plan_factory()

        system = self._system
        representation = self._representation

        unitary = plan.persistent_array(self._system.unitary)

        needs_noise_matrix = representation != Representation.POSITIVE_P and system.needs_noise_matrix(
        )

        mmul = MatrixMul(alpha, unitary, transposed_b=True)

        if not needs_noise_matrix:

            # TODO: this could be sped up for repr != POSITIVE_P,
            # since in that case alpha == conj(beta), and we don't need to do two multuplications.

            mmul_beta = MatrixMul(beta, unitary, transposed_b=True)
            trf_conj = self._make_trf_conj()
            mmul_beta.parameter.matrix_b.connect(trf_conj,
                                                 trf_conj.output,
                                                 matrix_b_p=trf_conj.input)

            plan.computation_call(mmul, alpha, alpha_i, unitary)
            plan.computation_call(mmul_beta, beta, beta_i, unitary)

        else:

            noise_matrix = system.noise_matrix()
            noise_matrix_dev = plan.persistent_array(noise_matrix)

            # If we're here, it's not positive-P, and alpha == conj(beta).
            # This means we can just calculate alpha, and then build beta from it.

            w = plan.temp_array_like(alpha)
            temp_alpha = plan.temp_array_like(alpha)

            plan.computation_call(mmul, temp_alpha, alpha_i, unitary)

            bijection = philox(64, 2)

            # Keeping the kernel the same so it can be cached.
            # The seed will be passed as the computation parameter instead.
            keygen = KeyGenerator.create(bijection, seed=numpy.int32(0))

            sampler = normal_bm(bijection, numpy.float64)

            plan.kernel_call(TEMPLATE.get_def("generate_apply_matrix_noise"),
                             [w, seed],
                             kernel_name="generate_apply_matrix_noise",
                             global_size=alpha.shape,
                             render_kwds=dict(
                                 bijection=bijection,
                                 keygen=keygen,
                                 sampler=sampler,
                                 mul_cr=functions.mul(numpy.complex128,
                                                      numpy.float64),
                                 add_cc=functions.add(numpy.complex128,
                                                      numpy.complex128),
                             ))

            noise = plan.temp_array_like(alpha)
            plan.computation_call(mmul, noise, w, noise_matrix_dev)

            plan.kernel_call(TEMPLATE.get_def("add_noise"),
                             [alpha, beta, temp_alpha, noise],
                             kernel_name="add_noise",
                             global_size=alpha.shape,
                             render_kwds=dict(
                                 add=functions.add(numpy.complex128,
                                                   numpy.complex128),
                                 conj=functions.conj(numpy.complex128)))

        return plan

Exemple #9

0

Afficher le fichier

Fichier : polynomial_transform_fft.py Projet : stjordanis/nufhe

def transformed_add(perf_params):
    return functions.add(transformed_dtype(), transformed_dtype())

Exemple #10

0

Afficher le fichier

    def _build_plan(self, plan_factory, device_params, output, alpha, beta):

        plan = plan_factory()

        samples, modes = alpha.shape

        for_reduction = Type(alpha.dtype, (samples, self._max_total_clicks + 1))

        prepared_state = plan.temp_array_like(alpha)

        plan.kernel_call(
            TEMPLATE.get_def("compound_click_probability_prepare"),
            [prepared_state, alpha, beta],
            kernel_name="compound_click_probability_prepare",
            global_size=alpha.shape,
            render_kwds=dict(
                mul_cc=functions.mul(alpha.dtype, alpha.dtype),
                exp_c=functions.exp(alpha.dtype),
                ))

        # Block size is limited by the amount of available local memory.
        # In some OpenCL implementations the number reported cannot actually be fully used
        # (because it's used by kernel arguments), so we're padding it a little.
        local_mem_size = device_params.local_mem_size
        max_elems = (local_mem_size - 256) // alpha.dtype.itemsize
        block_size = 2**helpers.log2(max_elems)

        # No reason to have block size larger than the number of modes
        block_size = min(block_size, helpers.bounding_power_of_2(modes))

        products_gsize = (samples, helpers.min_blocks(self._max_total_clicks + 1, block_size) * block_size)
        products = plan.temp_array_like(for_reduction)

        read_size = min(block_size, device_params.max_work_group_size)

        while read_size > 1:

            full_steps = modes // block_size
            remainder_size = modes % block_size

            try:
                plan.kernel_call(
                    TEMPLATE.get_def("compound_click_probability_aggregate"),
                    [products, prepared_state],
                    kernel_name="compound_click_probability_aggregate",
                    global_size=products_gsize,
                    local_size=(1, read_size,),
                    render_kwds=dict(
                        block_size=block_size,
                        read_size=read_size,
                        full_steps=full_steps,
                        remainder_size=remainder_size,
                        output_size=self._max_total_clicks + 1,
                        mul_cc=functions.mul(alpha.dtype, alpha.dtype),
                        add_cc=functions.add(alpha.dtype, alpha.dtype),
                        polar_unit=functions.polar_unit(dtypes.real_for(alpha.dtype)),
                        modes=self._system.modes,
                        max_total_clicks=self._max_total_clicks,
                        ))

            except OutOfResourcesError:
                read_size //= 2

            break

        reduction = Reduce(for_reduction, predicate_sum(alpha.dtype), axes=(0,))

        temp = plan.temp_array_like(reduction.parameter.output)

        plan.computation_call(reduction, temp, products)

        fft = FFT(temp)
        real_trf = Transformation([
            Parameter('output', Annotation(output, 'o')),
            Parameter('input', Annotation(temp, 'i')),
            ],
            """
                ${input.ctype} val = ${input.load_same};
                ${output.store_same}(val.x);
                """)
        fft.parameter.output.connect(real_trf, real_trf.input, output_p=real_trf.output)

        plan.computation_call(fft, output, temp, True)

        return plan