コード例 #1
0
def pow(dtype, power_dtype=None):
    """
    Returns a :py:class:`~reikna.cluda.Module` with a function of two arguments
    that raises the first argument of type ``dtype`` (must be a real or complex data type)
    to the power of the second argument (a corresponding real data type or an integer).
    """
    if dtypes.is_complex(power_dtype):
        raise NotImplementedError("pow() with a complex power is not supported")

    if power_dtype is None:
        if dtypes.is_integer(dtype):
            raise ValueError("Power dtype must be specified for an integer argument")
        elif dtypes.is_real(dtype):
            power_dtype = dtype
        else:
            power_dtype = dtypes.real_for(dtype)

    if dtypes.is_complex(dtype):
        r_dtype = dtypes.real_for(dtype)
    elif dtypes.is_real(dtype):
        r_dtype = dtype
    elif dtypes.is_real(power_dtype):
        r_dtype = power_dtype
    else:
        r_dtype = numpy.float32

    if dtypes.is_integer(dtype) and dtypes.is_real(power_dtype):
        dtype = power_dtype

    return Module(
        TEMPLATE.get_def('pow'),
        render_kwds=dict(
            dtype=dtype, power_dtype=power_dtype,
            mul_=mul(dtype, dtype), div_=div(dtype, dtype),
            polar_=polar(r_dtype)))
コード例 #2
0
ファイル: fft.py プロジェクト: jakirkham/reikna
def get_common_kwds(dtype, device_params):
    return dict(
        dtype=dtype,
        min_mem_coalesce_width=device_params.min_mem_coalesce_width[dtype.itemsize],
        local_mem_banks=device_params.local_mem_banks,
        get_padding=get_padding,
        wrap_const=lambda x: dtypes.c_constant(x, dtypes.real_for(dtype)),
        min_blocks=helpers.min_blocks,
        mul=functions.mul(dtype, dtype),
        polar_unit=functions.polar_unit(dtypes.real_for(dtype)),
        cdivs=functions.div(dtype, numpy.uint32, out_dtype=dtype))
コード例 #3
0
ファイル: fft.py プロジェクト: mgolub2/reikna
def get_common_kwds(dtype, device_params):
    return dict(
        dtype=dtype,
        min_mem_coalesce_width=device_params.min_mem_coalesce_width[dtype.itemsize],
        local_mem_banks=device_params.local_mem_banks,
        get_padding=get_padding,
        wrap_const=lambda x: dtypes.c_constant(x, dtypes.real_for(dtype)),
        min_blocks=helpers.min_blocks,
        mul=functions.mul(dtype, dtype),
        polar_unit=functions.polar_unit(dtypes.real_for(dtype)),
        cdivs=functions.div(dtype, numpy.uint32, out_dtype=dtype))
コード例 #4
0
    def __init__(self, thr, psi_arr_t, box, tmax, steps, samples,
            kinetic_coeff=1, nonlinear_module=None, potentials=None):
        r"""
        Integrates the equation

        .. math::

            i \frac{\psi_j}{dt} = - k \nabla^2 psi_j + N(\psi_1, ... \psi_C, V(t), t),

        where :math:`C` is the number of components, :math:`V` is the dynamic potential.

        ``psi_arr_t`` is an array-like object with the shape ``(components, ensembles, *grid)``.
        ``box`` is a tuple of length ``grid``, containing sizes of the simulation box.
        ``tmax`` is the propagation time.
        ``steps`` is the number of time steps to take.
        ``samples`` is the number of samples to take (not counting the initial one);
            should be a factor of ``steps``.
        ``kinetic_coeff`` is the value of :math:`k`.
        ``nonlinear_module`` calculates :math:`N`.
        ``potentials``: ``None``, an array of shape ``grid``, or an array of shape ``(M, *grid)``,
            corresponding to the values of dynamic potential.
            The dynamic potential contains the snapshots at time points from 0 to ``tmax``,
            and ``M - 1`` should be a factor of ``steps``.
        """

        self.tmax = tmax

        assert steps % samples == 0
        self.steps = steps
        self.samples = samples

        self.dt = float(tmax) / steps
        self.dt_half = self.dt / 2

        if potentials is None:
            self.potentials = numpy.zeros((2,) + psi_arr_t.shape[2:], psi_arr_t.dtype)
        elif len(potentials.shape) == len(psi_arr_t.shape) - 2:
            potentials = potentials.astype(dtypes.real_for(psi_arr_t.dtype))
            potentials = potentials.reshape(1, *(psi_arr_t.shape[2:]))
            self.potentials = numpy.vstack([potentials, potentials])
        else:
            assert len(potentials.shape) == len(psi_arr_t.shape) - 1
            assert steps % (potentials.shape[0] - 1) == 0
            potentials = potentials.astype(dtypes.real_for(psi_arr_t.dtype))
            self.potentials = potentials

        self.thr = thr
        self.stepper = RK4IPStepper(psi_arr_t, self.dt,
            box=box, kinetic_coeff=kinetic_coeff, nonlinear_module=nonlinear_module).compile(thr)
        self.stepper_half = RK4IPStepper(psi_arr_t, self.dt_half,
            box=box, kinetic_coeff=kinetic_coeff, nonlinear_module=nonlinear_module).compile(thr)
コード例 #5
0
def get_nonlinear_wrapper(components, c_dtype, nonlinear_module, dt):
    s_dtype = dtypes.real_for(c_dtype)
    return Module.create(
        """
        %for comp in range(components):
        INLINE WITHIN_KERNEL ${c_ctype} ${prefix}${comp}(
            %for pcomp in range(components):
            ${c_ctype} psi${pcomp},
            %endfor
            ${s_ctype} V, ${s_ctype} t)
        {
            ${c_ctype} nonlinear = ${nonlinear}${comp}(
                %for pcomp in range(components):
                psi${pcomp},
                %endfor
                V, t);
            return ${mul}(
                COMPLEX_CTR(${c_ctype})(0, -${dt}),
                nonlinear);
        }
        %endfor
        """,
        render_kwds=dict(
            components=components,
            c_ctype=dtypes.ctype(c_dtype),
            s_ctype=dtypes.ctype(s_dtype),
            mul=functions.mul(c_dtype, c_dtype),
            dt=dtypes.c_constant(dt, s_dtype),
            nonlinear=nonlinear_module))
コード例 #6
0
def hanning_window(arr, NFFT):
    """
    Applies the von Hann window to the rows of a 2D array.
    To account for zero padding (which we do not want to window), NFFT is provided separately.
    """
    if dtypes.is_complex(arr.dtype):
        coeff_dtype = dtypes.real_for(arr.dtype)
    else:
        coeff_dtype = arr.dtype
    return Transformation([
        Parameter('output', Annotation(arr, 'o')),
        Parameter('input', Annotation(arr, 'i')),
    ],
                          """
        ${dtypes.ctype(coeff_dtype)} coeff;
        %if NFFT != output.shape[0]:
        if (${idxs[1]} >= ${NFFT})
        {
            coeff = 1;
        }
        else
        %endif
        {
            coeff = 0.5 * (1 - cos(2 * ${numpy.pi} * ${idxs[-1]} / (${NFFT} - 1)));
        }
        ${output.store_same}(${mul}(${input.load_same}, coeff));
        """,
                          render_kwds=dict(coeff_dtype=coeff_dtype,
                                           NFFT=NFFT,
                                           mul=functions.mul(
                                               arr.dtype, coeff_dtype)))
コード例 #7
0
ファイル: demo_specgram.py プロジェクト: fjarri/reikna
def hanning_window(arr, NFFT):
    """
    Applies the von Hann window to the rows of a 2D array.
    To account for zero padding (which we do not want to window), NFFT is provided separately.
    """
    if dtypes.is_complex(arr.dtype):
        coeff_dtype = dtypes.real_for(arr.dtype)
    else:
        coeff_dtype = arr.dtype
    return Transformation(
        [
            Parameter('output', Annotation(arr, 'o')),
            Parameter('input', Annotation(arr, 'i')),
        ],
        """
        ${dtypes.ctype(coeff_dtype)} coeff;
        %if NFFT != output.shape[0]:
        if (${idxs[1]} >= ${NFFT})
        {
            coeff = 1;
        }
        else
        %endif
        {
            coeff = 0.5 * (1 - cos(2 * ${numpy.pi} * ${idxs[-1]} / (${NFFT} - 1)));
        }
        ${output.store_same}(${mul}(${input.load_same}, coeff));
        """,
        render_kwds=dict(
            coeff_dtype=coeff_dtype, NFFT=NFFT,
            mul=functions.mul(arr.dtype, coeff_dtype)))
コード例 #8
0
ファイル: wigner.py プロジェクト: fjarri-attic/early-universe
def nonlinear_no_potential(dtype, U, nu):
    c_dtype = dtype
    c_ctype = dtypes.ctype(c_dtype)
    s_dtype = dtypes.real_for(dtype)
    s_ctype = dtypes.ctype(s_dtype)

    return Module.create(
        """
        %for comp in (0, 1):
        INLINE WITHIN_KERNEL ${c_ctype} ${prefix}${comp}(
            ${c_ctype} psi0, ${c_ctype} psi1, ${s_ctype} t)
        {
            return (
                ${mul}(psi${comp}, (
                    ${dtypes.c_constant(U[comp, 0])} * ${norm}(psi0) +
                    ${dtypes.c_constant(U[comp, 1])} * ${norm}(psi1)
                    ))
                - ${mul}(psi${1 - comp}, ${nu})
                );
        }
        %endfor
        """,
        render_kwds=dict(
            mul=functions.mul(c_dtype, s_dtype),
            norm=functions.norm(c_dtype),
            U=U,
            nu=dtypes.c_constant(nu, s_dtype),
            c_ctype=c_ctype,
            s_ctype=s_ctype))
コード例 #9
0
def get_prepare_prfft_scan(output):
    return Transformation(
        [
            Parameter('output', Annotation(output, 'o')),
            Parameter('Y', Annotation(output, 'i')),
            Parameter(
                're_X_0',
                Annotation(
                    Type(dtypes.real_for(output.dtype), output.shape[:-1]),
                    'i'))
        ],
        """
        ${Y.ctype} Y = ${Y.load_same};
        Y = COMPLEX_CTR(${Y.ctype})(Y.y, -Y.x);

        if (${idxs[-1]} == 0)
        {
            Y.x = Y.x / 2 + ${re_X_0.load_idx}(${", ".join(idxs[:-1])});
            Y.y /= 2;
        }

        ${output.store_same}(Y);
        """,
        connectors=['output', 'Y'],
    )
コード例 #10
0
ファイル: transformations.py プロジェクト: xexo7C8/reikna
def norm_const(arr_t, order):
    """
    Returns a transformation that calculates the ``order``-norm
    (1 output, 1 input): ``output = abs(input) ** order``.
    """
    if dtypes.is_complex(arr_t.dtype):
        out_dtype = dtypes.real_for(arr_t.dtype)
    else:
        out_dtype = arr_t.dtype

    return Transformation(
        [
            Parameter('output', Annotation(Type(out_dtype, arr_t.shape), 'o')),
            Parameter('input', Annotation(arr_t, 'i'))],
        """
        ${input.ctype} val = ${input.load_same};
        ${output.ctype} norm = ${norm}(val);
        %if order != 2:
        norm = pow(norm, ${dtypes.c_constant(order / 2, output.dtype)});
        %endif
        ${output.store_same}(norm);
        """,
        render_kwds=dict(
            norm=functions.norm(arr_t.dtype),
            order=order))
コード例 #11
0
    def __init__(self, shape, box, drift,
            trajectories=1, kinetic_coeffs=0.5j, diffusion=None, iterations=3, noise_type=None):

        real_dtype = dtypes.real_for(drift.dtype)
        state_type = Type(drift.dtype, (trajectories, drift.components) + shape)

        self._noise = diffusion is not None

        Computation.__init__(self,
            [Parameter('output', Annotation(state_type, 'o')),
            Parameter('input', Annotation(state_type, 'i'))]
            + ([Parameter('dW', Annotation(noise_type, 'i'))] if self._noise else []) +
            [Parameter('t', Annotation(real_dtype)),
            Parameter('dt', Annotation(real_dtype))])

        self._ksquared = get_ksquared(shape, box).astype(real_dtype)
        # '/2' because we want to propagate only to dt/2
        kprop_trf = get_kprop_trf(state_type, self._ksquared, kinetic_coeffs / 2, exp=True)

        self._fft = FFT(state_type, axes=range(2, len(state_type.shape)))
        self._fft_with_kprop = FFT(state_type, axes=range(2, len(state_type.shape)))
        self._fft_with_kprop.parameter.output.connect(
            kprop_trf, kprop_trf.input,
            output_prime=kprop_trf.output, ksquared=kprop_trf.ksquared, dt=kprop_trf.dt)

        self._prop_iter = get_prop_iter(
            state_type, drift, iterations,
            diffusion=diffusion, noise_type=noise_type)
コード例 #12
0
ファイル: transformations.py プロジェクト: fjarri/reikna
def norm_const(arr_t, order):
    """
    Returns a transformation that calculates the ``order``-norm
    (1 output, 1 input): ``output = abs(input) ** order``.
    """
    if dtypes.is_complex(arr_t.dtype):
        out_dtype = dtypes.real_for(arr_t.dtype)
    else:
        out_dtype = arr_t.dtype

    return Transformation(
        [
            Parameter('output', Annotation(Type(out_dtype, arr_t.shape), 'o')),
            Parameter('input', Annotation(arr_t, 'i'))],
        """
        ${input.ctype} val = ${input.load_same};
        ${output.ctype} norm = ${norm}(val);
        %if order != 2:
        norm = pow(norm, ${dtypes.c_constant(order / 2, output.dtype)});
        %endif
        ${output.store_same}(norm);
        """,
        render_kwds=dict(
            norm=functions.norm(arr_t.dtype),
            order=order))
コード例 #13
0
    def __init__(self, arr_t):

        out_arr = Type(dtypes.real_for(arr_t.dtype),
                       arr_t.shape[:-1] + (arr_t.shape[-1] * 2, ))

        Computation.__init__(self, [
            Parameter('output', Annotation(out_arr, 'o')),
            Parameter('input', Annotation(arr_t, 'i'))
        ])
コード例 #14
0
ファイル: demo_specialized_fft.py プロジェクト: fjarri/reikna
    def __init__(self, arr_t):

        out_arr = Type(
            dtypes.real_for(arr_t.dtype),
            arr_t.shape[:-1] + (arr_t.shape[-1] * 2,))

        Computation.__init__(self, [
            Parameter('output', Annotation(out_arr, 'o')),
            Parameter('input', Annotation(arr_t, 'i'))])
コード例 #15
0
def get_nonlinear_wrapper(state_dtype, grid_dims, drift, diffusion=None):

    real_dtype = dtypes.real_for(state_dtype)
    if diffusion is not None:
        noise_dtype = diffusion.dtype
    else:
        noise_dtype = real_dtype

    return Module.create(
        """
        <%
            components = drift.components
            idx_args = ["idx_" + str(dim) for dim in range(grid_dims)]
            psi_args = ["psi_" + str(comp) for comp in range(components)]
            if diffusion is not None:
                dW_args = ["dW_" + str(ncomp) for ncomp in range(diffusion.noise_sources)]
        %>
        %for comp in range(components):
        INLINE WITHIN_KERNEL ${s_ctype} ${prefix}${comp}(
            %for idx in idx_args:
            const int ${idx},
            %endfor
            %for psi in psi_args:
            const ${s_ctype} ${psi},
            %endfor
            %if diffusion is not None:
            %for dW in dW_args:
            const ${n_ctype} ${dW},
            %endfor
            %endif
            const ${r_ctype} t,
            const ${r_ctype} dt)
        {
            return
                ${mul_sr}(${drift.module}${comp}(
                    ${", ".join(idx_args)}, ${", ".join(psi_args)}, t), dt)
                %if diffusion is not None:
                %for ncomp in range(diffusion.noise_sources):
                + ${mul_sn}(${diffusion.module}${comp}_${ncomp}(
                    ${", ".join(idx_args)}, ${", ".join(psi_args)}, t), ${dW_args[ncomp]})
                %endfor
                %endif
                ;
        }
        %endfor
        """,
        render_kwds=dict(
            grid_dims=grid_dims,
            s_ctype=dtypes.ctype(state_dtype),
            r_ctype=dtypes.ctype(real_dtype),
            n_ctype=dtypes.ctype(noise_dtype),
            mul_sr=functions.mul(state_dtype, real_dtype),
            mul_sn=functions.mul(state_dtype, noise_dtype),
            drift=drift,
            diffusion=diffusion))
コード例 #16
0
ファイル: test_functions.py プロジェクト: ringw/reikna
def test_pow(thr, out_code, in_codes):
    out_dtype, in_dtypes = generate_dtypes(out_code, in_codes)
    if len(in_dtypes) == 1:
        func = functions.pow(in_dtypes[0])
        if dtypes.is_real(in_dtypes[0]):
            in_dtypes.append(in_dtypes[0])
        else:
            in_dtypes.append(dtypes.real_for(in_dtypes[0]))
    else:
        func = functions.pow(in_dtypes[0], power_dtype=in_dtypes[1])
    check_func(thr, func, numpy.power, out_dtype, in_dtypes)
コード例 #17
0
ファイル: test_functions.py プロジェクト: SyamGadde/reikna
def test_pow(thr, out_code, in_codes):
    out_dtype, in_dtypes = generate_dtypes(out_code, in_codes)
    if len(in_dtypes) == 1:
        func = functions.pow(in_dtypes[0])
        if dtypes.is_real(in_dtypes[0]):
            in_dtypes.append(in_dtypes[0])
        else:
            in_dtypes.append(dtypes.real_for(in_dtypes[0]))
    else:
        func = functions.pow(in_dtypes[0], power_dtype=in_dtypes[1])
    check_func(thr, func, numpy.power, out_dtype, in_dtypes)
コード例 #18
0
def get_nonlinear3(state_type, nonlinear_wrapper, components, diffusion=None, noise_type=None):

    real_dtype = dtypes.real_for(state_type.dtype)

    # k4 = N(D(psi_4), t + dt)
    # output = D(psi_k) + k4 / 6
    return PureParallel(
        [
            Parameter('output', Annotation(state_type, 'o')),
            Parameter('kprop_psi_k', Annotation(state_type, 'i')),
            Parameter('kprop_psi_4', Annotation(state_type, 'i'))]
            + ([Parameter('dW', Annotation(noise_type, 'i'))] if diffusion is not None else []) +
            [Parameter('t', Annotation(real_dtype)),
            Parameter('dt', Annotation(real_dtype))],
        """
        <%
            if diffusion is None:
                dW = None

            coords = ", ".join(idxs[1:])
            trajectory = idxs[0]

            args = lambda prefix, num: list(map(lambda i: prefix + str(i), range(num)))
            dW_args = args('dW_', diffusion.noise_sources) if diffusion is not None else []
            k4_args = ", ".join(idxs[1:] + args('psi4_', components) + dW_args)
        %>

        %for comp in range(components):
        ${output.ctype} psi4_${comp} = ${kprop_psi_4.load_idx}(${trajectory}, ${comp}, ${coords});
        ${output.ctype} psik_${comp} = ${kprop_psi_k.load_idx}(${trajectory}, ${comp}, ${coords});
        %endfor

        %if diffusion is not None:
        %for ncomp in range(diffusion.noise_sources):
        ${dW.ctype} dW_${ncomp} = ${dW.load_idx}(${trajectory}, ${ncomp}, ${coords});
        %endfor
        %endif

        %for comp in range(components):
        ${output.ctype} k4_${comp} = ${nonlinear}${comp}(${k4_args}, ${t} + ${dt}, ${dt});
        %endfor

        %for comp in range(components):
        ${output.store_idx}(
            ${trajectory}, ${comp}, ${coords},
            psik_${comp} + ${div}(k4_${comp}, 6));
        %endfor
        """,
        guiding_array=(state_type.shape[0],) + state_type.shape[2:],
        render_kwds=dict(
            components=components,
            nonlinear=nonlinear_wrapper,
            diffusion=diffusion,
            div=functions.div(state_type.dtype, numpy.int32, out_dtype=state_type.dtype)))
コード例 #19
0
ファイル: functions.py プロジェクト: xexo7C8/reikna
def pow(dtype, exponent_dtype=None, output_dtype=None):
    """
    Returns a :py:class:`~reikna.cluda.Module` with a function of two arguments
    that raises the first argument of type ``dtype``
    to the power of the second argument of type ``exponent_dtype``
    (an integer or real data type).
    If ``exponent_dtype`` or ``output_dtype`` are not given, they default to ``dtype``.
    If ``dtype`` is not the same as ``output_dtype``,
    the input is cast to ``output_dtype`` *before* exponentiation.
    If ``exponent_dtype`` is real, but both ``dtype`` and ``output_dtype`` are integer,
    a ``ValueError`` is raised.
    """
    if exponent_dtype is None:
        exponent_dtype = dtype

    if output_dtype is None:
        output_dtype = dtype

    if dtypes.is_complex(exponent_dtype):
        raise NotImplementedError("pow() with a complex exponent is not supported")

    if dtypes.is_real(exponent_dtype):
        if dtypes.is_complex(output_dtype):
            exponent_dtype = dtypes.real_for(output_dtype)
        elif dtypes.is_real(output_dtype):
            exponent_dtype = output_dtype
        else:
            raise ValueError("pow(integer, float): integer is not supported")

    kwds = dict(
        dtype=dtype, exponent_dtype=exponent_dtype, output_dtype=output_dtype,
        div_=None, mul_=None, cast_=None, polar_=None)
    if output_dtype != dtype:
        kwds['cast_'] = cast(output_dtype, dtype)
    if dtypes.is_integer(exponent_dtype) and not dtypes.is_real(output_dtype):
        kwds['mul_'] = mul(output_dtype, output_dtype)
        kwds['div_'] = div(output_dtype, output_dtype)
    if dtypes.is_complex(output_dtype):
        kwds['polar_'] = polar(dtypes.real_for(output_dtype))

    return Module(TEMPLATE.get_def('pow'), render_kwds=kwds)
コード例 #20
0
def get_nonlinear(dtype, interaction, tunneling):
    r"""
    Nonlinear module

    .. math::

        N(\psi_1, ... \psi_C)
        = \sum_{n=1}^{C} U_{jn} |\psi_n|^2 \psi_j
          - \nu_j psi_{m_j}

    ``interaction``: a symmetrical ``components x components`` array with interaction strengths.
    ``tunneling``: a list of (other_comp, coeff) pairs of tunnelling strengths.
    """

    c_dtype = dtype
    c_ctype = dtypes.ctype(c_dtype)
    s_dtype = dtypes.real_for(dtype)
    s_ctype = dtypes.ctype(s_dtype)

    return Module.create(
        """
        %for comp in range(components):
        INLINE WITHIN_KERNEL ${c_ctype} ${prefix}${comp}(
            %for pcomp in range(components):
            ${c_ctype} psi${pcomp},
            %endfor
            ${s_ctype} V, ${s_ctype} t)
        {
            return (
                ${mul}(psi${comp}, (
                    %for other_comp in range(components):
                    + ${dtypes.c_constant(interaction[comp, other_comp], s_dtype)} *
                        ${norm}(psi${other_comp})
                    %endfor
                    + V
                    ))
                - ${mul}(
                    psi${tunneling[comp][0]},
                    ${dtypes.c_constant(tunneling[comp][1], s_dtype)})
                );
        }
        %endfor
        """,
        render_kwds=dict(
            components=interaction.shape[0],
            mul=functions.mul(c_dtype, s_dtype),
            norm=functions.norm(c_dtype),
            interaction=interaction,
            tunneling=tunneling,
            s_dtype=s_dtype,
            c_ctype=c_ctype,
            s_ctype=s_ctype))
コード例 #21
0
    def __init__(self,
                 state_arr,
                 dt,
                 box=None,
                 kinetic_coeff=1,
                 nonlinear_module=None):
        scalar_dtype = dtypes.real_for(state_arr.dtype)
        Computation.__init__(self, [
            Parameter('output', Annotation(state_arr, 'o')),
            Parameter('input', Annotation(state_arr, 'i')),
            Parameter('t', Annotation(scalar_dtype))
        ])

        self._box = box
        self._kinetic_coeff = kinetic_coeff
        self._nonlinear_module = nonlinear_module
        self._components = state_arr.shape[0]
        self._ensembles = state_arr.shape[1]
        self._grid_shape = state_arr.shape[2:]

        ksquared = get_ksquared(self._grid_shape, self._box)
        self._kprop = numpy.exp(
            ksquared * (-1j * kinetic_coeff * dt / 2)).astype(state_arr.dtype)
        self._kprop_trf = Transformation(
            [
                Parameter('output', Annotation(state_arr, 'o')),
                Parameter('input', Annotation(state_arr, 'i')),
                Parameter('kprop', Annotation(self._kprop, 'i'))
            ],
            """
            ${kprop.ctype} kprop_coeff = ${kprop.load_idx}(${', '.join(idxs[2:])});
            ${output.store_same}(${mul}(${input.load_same}, kprop_coeff));
            """,
            render_kwds=dict(
                mul=functions.mul(state_arr.dtype, self._kprop.dtype)))

        self._fft = FFT(state_arr, axes=range(2, len(state_arr.shape)))
        self._fft_with_kprop = FFT(state_arr,
                                   axes=range(2, len(state_arr.shape)))
        self._fft_with_kprop.parameter.output.connect(
            self._kprop_trf,
            self._kprop_trf.input,
            output_prime=self._kprop_trf.output,
            kprop=self._kprop_trf.kprop)

        nonlinear_wrapper = get_nonlinear_wrapper(state_arr.dtype,
                                                  nonlinear_module, dt)
        self._N1 = get_nonlinear1(state_arr, scalar_dtype, nonlinear_wrapper)
        self._N2 = get_nonlinear2(state_arr, scalar_dtype, nonlinear_wrapper,
                                  dt)
        self._N3 = get_nonlinear3(state_arr, scalar_dtype, nonlinear_wrapper,
                                  dt)
コード例 #22
0
    def _build_plan(self, plan_factory, device_params, output, input_):

        plan = plan_factory()

        N = input_.shape[-1] * 4
        batch_shape = input_.shape[:-1]
        batch_size = helpers.product(batch_shape)

        # The first element is unused
        coeffs = numpy.concatenate(
            [[0],
             1 / (4 * numpy.sin(2 * numpy.pi * numpy.arange(1, N // 2) / N))])
        coeffs_arr = plan.persistent_array(coeffs)

        prepare_iprfft_input = get_prepare_iprfft_input(input_)
        prepare_iprfft_output = get_prepare_iprfft_output(output)

        irfft = IRFFT(prepare_iprfft_input.Y)
        irfft.parameter.input.connect(prepare_iprfft_input,
                                      prepare_iprfft_input.Y,
                                      X=prepare_iprfft_input.X)
        irfft.parameter.output.connect(prepare_iprfft_output,
                                       prepare_iprfft_output.y,
                                       x=prepare_iprfft_output.x,
                                       x0=prepare_iprfft_output.x0,
                                       coeffs=prepare_iprfft_output.coeffs)

        real = Transformation([
            Parameter(
                'output',
                Annotation(Type(dtypes.real_for(input_.dtype), input_.shape),
                           'o')),
            Parameter('input', Annotation(input_, 'i')),
        ],
                              """
            ${output.store_same}((${input.load_same}).x);
            """,
                              connectors=['output'])

        rd_t = Type(output.dtype, input_.shape)
        rd = Reduce(rd_t,
                    predicate_sum(rd_t.dtype),
                    axes=(len(input_.shape) - 1, ))
        rd.parameter.input.connect(real, real.output, X=real.input)

        x0 = plan.temp_array_like(rd.parameter.output)

        plan.computation_call(rd, x0, input_)
        plan.computation_call(irfft, output, x0, coeffs_arr, input_)

        return plan
コード例 #23
0
ファイル: transformations.py プロジェクト: xexo7C8/reikna
def split_complex(input_arr_t):
    """
    Returns a transformation that splits complex input into two real outputs
    (2 outputs, 1 input): ``real = Re(input), imag = Im(input)``.
    """
    output_t = Type(dtypes.real_for(input_arr_t.dtype), shape=input_arr_t.shape)
    return Transformation(
        [Parameter('real', Annotation(output_t, 'o')),
        Parameter('imag', Annotation(output_t, 'o')),
        Parameter('input', Annotation(input_arr_t, 'i'))],
        """
            ${real.store_same}(${input.load_same}.x);
            ${imag.store_same}(${input.load_same}.y);
        """)
コード例 #24
0
ファイル: transformations.py プロジェクト: fjarri/reikna
def split_complex(input_arr_t):
    """
    Returns a transformation that splits complex input into two real outputs
    (2 outputs, 1 input): ``real = Re(input), imag = Im(input)``.
    """
    output_t = Type(dtypes.real_for(input_arr_t.dtype), shape=input_arr_t.shape)
    return Transformation(
        [Parameter('real', Annotation(output_t, 'o')),
        Parameter('imag', Annotation(output_t, 'o')),
        Parameter('input', Annotation(input_arr_t, 'i'))],
        """
            ${real.store_same}(${input.load_same}.x);
            ${imag.store_same}(${input.load_same}.y);
        """)
コード例 #25
0
def exp(dtype):
    """
    Returns a :py:class:`~reikna.cluda.Module` with a function of one argument
    that exponentiates the value of type ``dtype``
    (must be a real or complex data type).
    """
    if dtypes.is_integer(dtype):
        raise NotImplementedError("exp() of " + str(dtype) + " is not supported")

    if dtypes.is_real(dtype):
        polar_unit_ = None
    else:
        polar_unit_ = polar_unit(dtypes.real_for(dtype))
    return Module(
        TEMPLATE.get_def('exp'),
        render_kwds=dict(dtype=dtype, polar_unit_=polar_unit_))
コード例 #26
0
def prepare_irfft_output(arr):
    res = Type(dtypes.real_for(arr.dtype),
               arr.shape[:-1] + (arr.shape[-1] * 2, ))
    return Transformation([
        Parameter('output', Annotation(res, 'o')),
        Parameter('input', Annotation(arr, 'i')),
    ],
                          """
        <%
            batch_idxs = " ".join((idx + ", ") for idx in idxs[:-1])
        %>
        ${input.ctype} x = ${input.load_same};
        ${output.store_idx}(${batch_idxs} ${idxs[-1]} * 2, x.x);
        ${output.store_idx}(${batch_idxs} ${idxs[-1]} * 2 + 1, x.y);
        """,
                          connectors=['output'])
コード例 #27
0
ファイル: demo_specialized_fft.py プロジェクト: fjarri/reikna
def prepare_irfft_output(arr):
    res = Type(dtypes.real_for(arr.dtype), arr.shape[:-1] + (arr.shape[-1] * 2,))
    return Transformation(
        [
            Parameter('output', Annotation(res, 'o')),
            Parameter('input', Annotation(arr, 'i')),
        ],
        """
        <%
            batch_idxs = " ".join((idx + ", ") for idx in idxs[:-1])
        %>
        ${input.ctype} x = ${input.load_same};
        ${output.store_idx}(${batch_idxs} ${idxs[-1]} * 2, x.x);
        ${output.store_idx}(${batch_idxs} ${idxs[-1]} * 2 + 1, x.y);
        """,
        connectors=['output'])
コード例 #28
0
ファイル: transformations.py プロジェクト: xexo7C8/reikna
def combine_complex(output_arr_t):
    """
    Returns a transformation that joins two real inputs into complex output
    (1 output, 2 inputs): ``output = real + 1j * imag``.
    """
    input_t = Type(dtypes.real_for(output_arr_t.dtype), shape=output_arr_t.shape)
    return Transformation(
        [Parameter('output', Annotation(output_arr_t, 'o')),
        Parameter('real', Annotation(input_t, 'i')),
        Parameter('imag', Annotation(input_t, 'i'))],
        """
        ${output.store_same}(
            COMPLEX_CTR(${output.ctype})(
                ${real.load_same},
                ${imag.load_same}));
        """)
コード例 #29
0
ファイル: transformations.py プロジェクト: fjarri/reikna
def combine_complex(output_arr_t):
    """
    Returns a transformation that joins two real inputs into complex output
    (1 output, 2 inputs): ``output = real + 1j * imag``.
    """
    input_t = Type(dtypes.real_for(output_arr_t.dtype), shape=output_arr_t.shape)
    return Transformation(
        [Parameter('output', Annotation(output_arr_t, 'o')),
        Parameter('real', Annotation(input_t, 'i')),
        Parameter('imag', Annotation(input_t, 'i'))],
        """
        ${output.store_same}(
            COMPLEX_CTR(${output.ctype})(
                ${real.load_same},
                ${imag.load_same}));
        """)
コード例 #30
0
ファイル: demo_specialized_fft.py プロジェクト: fjarri/reikna
    def _build_plan(self, plan_factory, device_params, output, input_):

        plan = plan_factory()

        N = input_.shape[-1] * 4
        batch_shape = input_.shape[:-1]
        batch_size = helpers.product(batch_shape)

        # The first element is unused
        coeffs = numpy.concatenate(
            [[0], 1 / (4 * numpy.sin(2 * numpy.pi * numpy.arange(1, N//2) / N))])
        coeffs_arr = plan.persistent_array(coeffs)

        prepare_iprfft_input = get_prepare_iprfft_input(input_)
        prepare_iprfft_output = get_prepare_iprfft_output(output)

        irfft = IRFFT(prepare_iprfft_input.Y)
        irfft.parameter.input.connect(
            prepare_iprfft_input, prepare_iprfft_input.Y,
            X=prepare_iprfft_input.X)
        irfft.parameter.output.connect(
            prepare_iprfft_output, prepare_iprfft_output.y,
            x=prepare_iprfft_output.x,
            x0=prepare_iprfft_output.x0, coeffs=prepare_iprfft_output.coeffs)

        real = Transformation(
            [
                Parameter('output', Annotation(Type(dtypes.real_for(input_.dtype), input_.shape), 'o')),
                Parameter('input', Annotation(input_, 'i')),
            ],
            """
            ${output.store_same}((${input.load_same}).x);
            """,
            connectors=['output']
            )

        rd_t = Type(output.dtype, input_.shape)
        rd = Reduce(rd_t, predicate_sum(rd_t.dtype), axes=(len(input_.shape)-1,))
        rd.parameter.input.connect(real, real.output, X=real.input)

        x0 = plan.temp_array_like(rd.parameter.output)

        plan.computation_call(rd, x0, input_)
        plan.computation_call(irfft, output, x0, coeffs_arr, input_)

        return plan
コード例 #31
0
    def __init__(self, state_arr, dt, box=None, kinetic_coeff=1, nonlinear_module=None):
        scalar_dtype = dtypes.real_for(state_arr.dtype)
        potential_arr = Type(scalar_dtype, shape=state_arr.shape[2:])

        Computation.__init__(self, [
            Parameter('output', Annotation(state_arr, 'o')),
            Parameter('input', Annotation(state_arr, 'i')),
            Parameter('potential1', Annotation(potential_arr, 'i')),
            Parameter('potential2', Annotation(potential_arr, 'i')),
            Parameter('t_potential1', Annotation(scalar_dtype)),
            Parameter('t_potential2', Annotation(scalar_dtype)),
            Parameter('t', Annotation(scalar_dtype))])

        self._box = box
        self._kinetic_coeff = kinetic_coeff
        self._nonlinear_module = nonlinear_module
        self._components = state_arr.shape[0]
        self._ensembles = state_arr.shape[1]
        self._grid_shape = state_arr.shape[2:]

        ksquared = get_ksquared(self._grid_shape, self._box)
        self._kprop = numpy.exp(ksquared * (-1j * kinetic_coeff * dt / 2)).astype(state_arr.dtype)
        self._kprop_trf = Transformation(
            [
                Parameter('output', Annotation(state_arr, 'o')),
                Parameter('input', Annotation(state_arr, 'i')),
                Parameter('kprop', Annotation(self._kprop, 'i'))],
            """
            ${kprop.ctype} kprop_coeff = ${kprop.load_idx}(${', '.join(idxs[2:])});
            ${output.store_same}(${mul}(${input.load_same}, kprop_coeff));
            """,
            render_kwds=dict(mul=functions.mul(state_arr.dtype, self._kprop.dtype)))

        self._fft = FFT(state_arr, axes=range(2, len(state_arr.shape)))
        self._fft_with_kprop = FFT(state_arr, axes=range(2, len(state_arr.shape)))
        self._fft_with_kprop.parameter.output.connect(
            self._kprop_trf, self._kprop_trf.input,
            output_prime=self._kprop_trf.output,
            kprop=self._kprop_trf.kprop)

        nonlinear_wrapper = get_nonlinear_wrapper(
            state_arr.shape[0], state_arr.dtype, nonlinear_module, dt)
        self._N1 = get_nonlinear1(state_arr, potential_arr, scalar_dtype, nonlinear_wrapper)
        self._N2 = get_nonlinear2(state_arr, potential_arr, scalar_dtype, nonlinear_wrapper, dt)
        self._N3 = get_nonlinear3(state_arr, potential_arr, scalar_dtype, nonlinear_wrapper, dt)
        self._potential_interpolator = get_potential_interpolator(potential_arr, dt)
コード例 #32
0
    def __init__(self, shape, box, drift, trajectories=1, kinetic_coeffs=0.5j, diffusion=None,
            ksquared_cutoff=None, noise_type=None):

        real_dtype = dtypes.real_for(drift.dtype)
        state_type = Type(drift.dtype, (trajectories, drift.components) + shape)

        self._noise = diffusion is not None

        Computation.__init__(self,
            [Parameter('output', Annotation(state_type, 'o')),
            Parameter('input', Annotation(state_type, 'i'))]
            + ([Parameter('dW', Annotation(noise_type, 'i'))] if self._noise else []) +
            [Parameter('t', Annotation(real_dtype)),
            Parameter('dt', Annotation(real_dtype))])

        self._ksquared = get_ksquared(shape, box).astype(real_dtype)
        kprop_trf = get_kprop_trf(state_type, self._ksquared, kinetic_coeffs)

        self._ksquared_cutoff = ksquared_cutoff
        if self._ksquared_cutoff is not None:
            project_trf = get_project_trf(state_type, self._ksquared, ksquared_cutoff)
            self._fft_with_project = FFT(state_type, axes=range(2, len(state_type.shape)))
            self._fft_with_project.parameter.output.connect(
                project_trf, project_trf.input,
                output_prime=project_trf.output, ksquared=project_trf.ksquared)

        self._fft = FFT(state_type, axes=range(2, len(state_type.shape)))
        self._fft_with_kprop = FFT(state_type, axes=range(2, len(state_type.shape)))
        self._fft_with_kprop.parameter.output.connect(
            kprop_trf, kprop_trf.input,
            output_prime=kprop_trf.output, ksquared=kprop_trf.ksquared, dt=kprop_trf.dt)

        self._xpropagate = get_xpropagate(
            state_type, drift, diffusion=diffusion, noise_type=noise_type)

        self._ai = numpy.array([
            0.0, -0.737101392796, -1.634740794341,
            -0.744739003780, -1.469897351522, -2.813971388035])
        self._bi = numpy.array([
            0.032918605146, 0.823256998200, 0.381530948900,
            0.200092213184, 1.718581042715, 0.27])
        self._ci = numpy.array([
            0.0, 0.032918605146, 0.249351723343,
            0.466911705055, 0.582030414044, 0.847252983783])
コード例 #33
0
def get_nonlinear1(state_type, nonlinear_wrapper, components, diffusion=None, noise_type=None):

    real_dtype = dtypes.real_for(state_type.dtype)

    # output = N(input)
    return PureParallel(
        [
            Parameter('output', Annotation(state_type, 'o')),
            Parameter('input', Annotation(state_type, 'i'))]
            + ([Parameter('dW', Annotation(noise_type, 'i'))] if diffusion is not None else []) +
            [Parameter('t', Annotation(real_dtype)),
            Parameter('dt', Annotation(real_dtype))],
        """
        <%
            if diffusion is None:
                dW = None

            coords = ", ".join(idxs[1:])
            trajectory = idxs[0]

            args = lambda prefix, num: list(map(lambda i: prefix + str(i), range(num)))
            dW_args = args('dW_', diffusion.noise_sources) if diffusion is not None else []
            n_args = ", ".join(idxs[1:] + args('psi_', components) + dW_args)
        %>
        %for comp in range(components):
        ${output.ctype} psi_${comp} = ${input.load_idx}(${trajectory}, ${comp}, ${coords});
        %endfor

        %if diffusion is not None:
        %for ncomp in range(diffusion.noise_sources):
        ${dW.ctype} dW_${ncomp} = ${dW.load_idx}(${trajectory}, ${ncomp}, ${coords});
        %endfor
        %endif

        %for comp in range(components):
        ${output.store_idx}(
            ${trajectory}, ${comp}, ${coords}, ${nonlinear}${comp}(${n_args}, ${t}, ${dt}));
        %endfor
        """,
        guiding_array=(state_type.shape[0],) + state_type.shape[2:],
        render_kwds=dict(
            components=components,
            nonlinear=nonlinear_wrapper,
            diffusion=diffusion))
コード例 #34
0
ファイル: base.py プロジェクト: fjarri/reikna-integrator
    def __init__(self, shape, box, drift, trajectories=1, diffusion=None):

        if diffusion is not None:
            assert diffusion.dtype == drift.dtype
            assert diffusion.components == drift.components

            if not diffusion.real_noise or dtypes.is_real(drift.dtype):
                noise_dtype = drift.dtype
            else:
                noise_dtype = dtypes.real_for(drift.dtype)

            self.noise_type = Type(noise_dtype, (trajectories, diffusion.noise_sources) + shape)
            self.noise = True

            cell_volume = product(box) / product(shape)
            self._noise_normalization = 1. / cell_volume
        else:
            self.noise_type = None
            self.noise = False
コード例 #35
0
def get_nonlinear_wrapper(c_dtype, nonlinear_module, dt):
    s_dtype = dtypes.real_for(c_dtype)
    return Module.create("""
        %for comp in (0, 1):
        INLINE WITHIN_KERNEL ${c_ctype} ${prefix}${comp}(
            ${c_ctype} psi0, ${c_ctype} psi1, ${s_ctype} t)
        {
            ${c_ctype} nonlinear = ${nonlinear}${comp}(psi0, psi1, t);
            return ${mul}(
                COMPLEX_CTR(${c_ctype})(0, -${dt}),
                nonlinear);
        }
        %endfor
        """,
                         render_kwds=dict(c_ctype=dtypes.ctype(c_dtype),
                                          s_ctype=dtypes.ctype(s_dtype),
                                          mul=functions.mul(c_dtype, c_dtype),
                                          dt=dtypes.c_constant(dt, s_dtype),
                                          nonlinear=nonlinear_module))
コード例 #36
0
def normal_bm(bijection, dtype, mean=0, std=1):
    """
    Generates normally distributed random numbers with the mean ``mean`` and
    the standard deviation ``std`` using Box-Muller transform.
    Supported dtypes: ``float(32/64)``, ``complex(64/128)``.
    Produces two random numbers per call for real types and one number for complex types.
    Returns a :py:class:`~reikna.cbrng.samplers.Sampler` object.

    .. note::

        In case of a complex ``dtype``, ``std`` refers to the standard deviation of the
        complex numbers (same as ``numpy.std()`` returns), not real and imaginary components
        (which will be normally distributed with the standard deviation ``std / sqrt(2)``).
        Consequently, while ``mean`` is of type ``dtype``, ``std`` must be real.
    """

    if dtypes.is_complex(dtype):
        r_dtype = dtypes.real_for(dtype)
        c_dtype = dtype
    else:
        r_dtype = dtype
        c_dtype = dtypes.complex_for(dtype)

    uf = uniform_float(bijection, r_dtype, low=0, high=1)

    module = Module(TEMPLATE.get_def("normal_bm"),
                    render_kwds=dict(complex_res=dtypes.is_complex(dtype),
                                     r_dtype=r_dtype,
                                     r_ctype=dtypes.ctype(r_dtype),
                                     c_dtype=c_dtype,
                                     c_ctype=dtypes.ctype(c_dtype),
                                     polar_unit=functions.polar_unit(r_dtype),
                                     bijection=bijection,
                                     mean=mean,
                                     std=std,
                                     uf=uf))

    return Sampler(bijection,
                   module,
                   dtype,
                   deterministic=uf.deterministic,
                   randoms_per_call=1 if dtypes.is_complex(dtype) else 2)
コード例 #37
0
    def __init__(self, shape, drift, trajectories=1, diffusion=None, iterations=3, noise_type=None):

        if dtypes.is_complex(drift.dtype):
            real_dtype = dtypes.real_for(drift.dtype)
        else:
            real_dtype = drift.dtype

        state_type = Type(drift.dtype, (trajectories, drift.components) + shape)

        self._noise = diffusion is not None

        Computation.__init__(self,
            [Parameter('output', Annotation(state_type, 'o')),
            Parameter('input', Annotation(state_type, 'i'))]
            + ([Parameter('dW', Annotation(noise_type, 'i'))] if self._noise else []) +
            [Parameter('t', Annotation(real_dtype)),
            Parameter('dt', Annotation(real_dtype))])

        self._prop_iter = get_prop_iter(
            state_type, drift, iterations,
            diffusion=diffusion, noise_type=noise_type)
コード例 #38
0
ファイル: samplers.py プロジェクト: SyamGadde/reikna
def normal_bm(bijection, dtype, mean=0, std=1):
    """
    Generates normally distributed random numbers with the mean ``mean`` and
    the standard deviation ``std`` using Box-Muller transform.
    Supported dtypes: ``float(32/64)``, ``complex(64/128)``.
    Produces two random numbers per call for real types and one number for complex types.
    Returns a :py:class:`~reikna.cbrng.samplers.Sampler` object.

    .. note::

        In case of a complex ``dtype``, ``std`` refers to the standard deviation of the
        complex numbers (same as ``numpy.std()`` returns), not real and imaginary components
        (which will be normally distributed with the standard deviation ``std / sqrt(2)``).
        Consequently, while ``mean`` is of type ``dtype``, ``std`` must be real.
    """

    if dtypes.is_complex(dtype):
        r_dtype = dtypes.real_for(dtype)
        c_dtype = dtype
    else:
        r_dtype = dtype
        c_dtype = dtypes.complex_for(dtype)

    uf = uniform_float(bijection, r_dtype, low=0, high=1)

    module = Module(
        TEMPLATE.get_def("normal_bm"),
        render_kwds=dict(
            complex_res=dtypes.is_complex(dtype),
            r_dtype=r_dtype, r_ctype=dtypes.ctype(r_dtype),
            c_dtype=c_dtype, c_ctype=dtypes.ctype(c_dtype),
            polar_unit=functions.polar_unit(r_dtype),
            bijection=bijection,
            mean=mean,
            std=std,
            uf=uf))

    return Sampler(
        bijection, module, dtype,
        deterministic=uf.deterministic, randoms_per_call=1 if dtypes.is_complex(dtype) else 2)
コード例 #39
0
ファイル: demo_specialized_fft.py プロジェクト: fjarri/reikna
def get_prepare_prfft_scan(output):
    return Transformation(
        [
            Parameter('output', Annotation(output, 'o')),
            Parameter('Y', Annotation(output, 'i')),
            Parameter('re_X_0', Annotation(
                Type(dtypes.real_for(output.dtype), output.shape[:-1]), 'i'))
        ],
        """
        ${Y.ctype} Y = ${Y.load_same};
        Y = COMPLEX_CTR(${Y.ctype})(Y.y, -Y.x);

        if (${idxs[-1]} == 0)
        {
            Y.x = Y.x / 2 + ${re_X_0.load_idx}(${", ".join(idxs[:-1])});
            Y.y /= 2;
        }

        ${output.store_same}(Y);
        """,
        connectors=['output', 'Y'],
        )
コード例 #40
0
ファイル: soliton.py プロジェクト: fjarri/reikna-integrator
def get_diffusion(state_dtype, gamma):
    return Diffusion(
        Module.create(
            """
            <%
                r_dtype = dtypes.real_for(s_dtype)
                s_ctype = dtypes.ctype(s_dtype)
                r_ctype = dtypes.ctype(r_dtype)
            %>
            INLINE WITHIN_KERNEL ${s_ctype} ${prefix}0_0(
                const int idx_x,
                const ${s_ctype} psi,
                ${r_ctype} t)
            {
                return COMPLEX_CTR(${s_ctype})(${numpy.sqrt(gamma)}, 0);
            }
            """,
            render_kwds=dict(
                mul_cr=functions.mul(state_dtype, dtypes.real_for(state_dtype)),
                s_dtype=state_dtype,
                gamma=gamma)),
        state_dtype, components=1, noise_sources=1)
コード例 #41
0
ファイル: dht.py プロジェクト: xexo7C8/reikna
    def _build_plan(self, plan_factory, _device_params, output_arr, input_arr):

        plan = plan_factory()

        dtype = input_arr.dtype
        p_dtype = dtypes.real_for(dtype) if dtypes.is_complex(dtype) else dtype

        mode_shape = input_arr.shape if self._inverse else output_arr.shape

        current_mem = input_arr
        seq_axes = list(range(len(input_arr.shape)))
        current_axes = list(range(len(input_arr.shape)))

        for i, axis in enumerate(self._axes):
            current_mem, current_axes = self._add_transpose(plan, current_mem, current_axes, axis)

            tr_matrix = plan.persistent_array(
                self._get_transformation_matrix(p_dtype, mode_shape[axis], self._add_points[axis]))

            dot = MatrixMul(current_mem, tr_matrix)
            if i == len(self._axes) - 1 and current_axes == seq_axes:
                dot_output = output_arr
            else:
                # Cannot write to output if it is not the last transform,
                # or if we need to return to the initial axes order
                dot_output = plan.temp_array_like(dot.parameter.output)
            plan.computation_call(dot, dot_output, current_mem, tr_matrix)
            current_mem = dot_output

        # If we ended up with the wrong order of axes,
        # return to the original order.

        if current_axes != seq_axes:
            tr_axes = [current_axes.index(i) for i in range(len(current_axes))]
            transpose = Transpose(current_mem, output_arr_t=output_arr, axes=tr_axes)
            plan.add_computation(transpose, output_arr, current_mem)

        return plan
コード例 #42
0
    def _build_plan(self, plan_factory, device_params, output, alpha, beta):

        plan = plan_factory()

        samples, modes = alpha.shape

        for_reduction = Type(alpha.dtype, (samples, self._max_total_clicks + 1))

        prepared_state = plan.temp_array_like(alpha)

        plan.kernel_call(
            TEMPLATE.get_def("compound_click_probability_prepare"),
            [prepared_state, alpha, beta],
            kernel_name="compound_click_probability_prepare",
            global_size=alpha.shape,
            render_kwds=dict(
                mul_cc=functions.mul(alpha.dtype, alpha.dtype),
                exp_c=functions.exp(alpha.dtype),
                ))

        # Block size is limited by the amount of available local memory.
        # In some OpenCL implementations the number reported cannot actually be fully used
        # (because it's used by kernel arguments), so we're padding it a little.
        local_mem_size = device_params.local_mem_size
        max_elems = (local_mem_size - 256) // alpha.dtype.itemsize
        block_size = 2**helpers.log2(max_elems)

        # No reason to have block size larger than the number of modes
        block_size = min(block_size, helpers.bounding_power_of_2(modes))

        products_gsize = (samples, helpers.min_blocks(self._max_total_clicks + 1, block_size) * block_size)
        products = plan.temp_array_like(for_reduction)

        read_size = min(block_size, device_params.max_work_group_size)

        while read_size > 1:

            full_steps = modes // block_size
            remainder_size = modes % block_size

            try:
                plan.kernel_call(
                    TEMPLATE.get_def("compound_click_probability_aggregate"),
                    [products, prepared_state],
                    kernel_name="compound_click_probability_aggregate",
                    global_size=products_gsize,
                    local_size=(1, read_size,),
                    render_kwds=dict(
                        block_size=block_size,
                        read_size=read_size,
                        full_steps=full_steps,
                        remainder_size=remainder_size,
                        output_size=self._max_total_clicks + 1,
                        mul_cc=functions.mul(alpha.dtype, alpha.dtype),
                        add_cc=functions.add(alpha.dtype, alpha.dtype),
                        polar_unit=functions.polar_unit(dtypes.real_for(alpha.dtype)),
                        modes=self._system.modes,
                        max_total_clicks=self._max_total_clicks,
                        ))

            except OutOfResourcesError:
                read_size //= 2

            break

        reduction = Reduce(for_reduction, predicate_sum(alpha.dtype), axes=(0,))

        temp = plan.temp_array_like(reduction.parameter.output)

        plan.computation_call(reduction, temp, products)

        fft = FFT(temp)
        real_trf = Transformation([
            Parameter('output', Annotation(output, 'o')),
            Parameter('input', Annotation(temp, 'i')),
            ],
            """
                ${input.ctype} val = ${input.load_same};
                ${output.store_same}(val.x);
                """)
        fft.parameter.output.connect(real_trf, real_trf.input, output_p=real_trf.output)

        plan.computation_call(fft, output, temp, True)

        return plan
コード例 #43
0
def get_nonlinear2(state_type, nonlinear_wrapper, components, diffusion=None, noise_type=None):

    real_dtype = dtypes.real_for(state_type.dtype)

    # k2 = N(psi_I + k1 / 2, t + dt / 2)
    # k3 = N(psi_I + k2 / 2, t + dt / 2)
    # psi_4 = psi_I + k3 (argument for the 4-th step k-propagation)
    # psi_k = psi_I + (k1 + 2(k2 + k3)) / 6 (argument for the final k-propagation)
    return PureParallel(
        [
            Parameter('psi_k', Annotation(state_type, 'o')),
            Parameter('psi_4', Annotation(state_type, 'o')),
            Parameter('psi_I', Annotation(state_type, 'i')),
            Parameter('k1', Annotation(state_type, 'i'))]
            + ([Parameter('dW', Annotation(noise_type, 'i'))] if diffusion is not None else []) +
            [Parameter('t', Annotation(real_dtype)),
            Parameter('dt', Annotation(real_dtype))],
        """
        <%
            if diffusion is None:
                dW = None

            coords = ", ".join(idxs[1:])
            trajectory = idxs[0]

            args = lambda prefix, num: ", ".join(map(lambda i: prefix + str(i), range(num)))
            dW_args = (args('dW_', diffusion.noise_sources) + ",") if diffusion is not None else ""
        %>

        %for comp in range(components):
        ${psi_k.ctype} psi_I_${comp} = ${psi_I.load_idx}(${trajectory}, ${comp}, ${coords});
        ${psi_k.ctype} k1_${comp} = ${k1.load_idx}(${trajectory}, ${comp}, ${coords});
        %endfor

        %if diffusion is not None:
        %for ncomp in range(diffusion.noise_sources):
        ${dW.ctype} dW_${ncomp} = ${dW.load_idx}(${trajectory}, ${ncomp}, ${coords});
        %endfor
        %endif

        %for comp in range(components):
        ${psi_k.ctype} k2_${comp} = ${nonlinear}${comp}(
            ${coords},
            %for c in range(components):
            psi_I_${c} + ${div}(k1_${c}, 2),
            %endfor
            ${dW_args}
            ${t} + ${dt} / 2, ${dt});
        %endfor

        %for comp in range(components):
        ${psi_k.ctype} k3_${comp} = ${nonlinear}${comp}(
            ${coords},
            %for c in range(components):
            psi_I_${c} + ${div}(k2_${c}, 2),
            %endfor
            ${dW_args}
            ${t} + ${dt} / 2, ${dt});
        %endfor

        %for comp in range(components):
        ${psi_4.store_idx}(${trajectory}, ${comp}, ${coords}, psi_I_${comp} + k3_${comp});
        %endfor

        %for comp in range(components):
        ${psi_k.store_idx}(
            ${trajectory}, ${comp}, ${coords},
            psi_I_${comp} + ${div}(k1_${comp}, 6) + ${div}(k2_${comp}, 3) + ${div}(k3_${comp}, 3));
        %endfor
        """,
        guiding_array=(state_type.shape[0],) + state_type.shape[2:],
        render_kwds=dict(
            components=components,
            nonlinear=nonlinear_wrapper,
            diffusion=diffusion,
            div=functions.div(state_type.dtype, numpy.int32, out_dtype=state_type.dtype)))
コード例 #44
0
def get_prop_iter(state_type, drift, iterations, diffusion=None, noise_type=None):

    if dtypes.is_complex(state_type.dtype):
        real_dtype = dtypes.real_for(state_type.dtype)
    else:
        real_dtype = state_type.dtype

    if diffusion is not None:
        noise_dtype = noise_type.dtype
    else:
        noise_dtype = real_dtype

    return PureParallel(
        [
            Parameter('output', Annotation(state_type, 'o')),
            Parameter('input', Annotation(state_type, 'i'))]
            + ([Parameter('dW', Annotation(noise_type, 'i'))] if diffusion is not None else []) +
            [Parameter('t', Annotation(real_dtype)),
            Parameter('dt', Annotation(real_dtype))],
        """
        <%
            coords = ", ".join(idxs[1:])
            trajectory = idxs[0]
            components = drift.components
            if diffusion is not None:
                noise_sources = diffusion.noise_sources
            psi_args = ", ".join("psi_" + str(c) + "_tmp" for c in range(components))

            if diffusion is None:
                dW = None
        %>

        %for comp in range(components):
        ${output.ctype} psi_${comp} = ${input.load_idx}(${trajectory}, ${comp}, ${coords});
        ${output.ctype} psi_${comp}_tmp = psi_${comp};
        ${output.ctype} dpsi_${comp};
        %endfor

        %if diffusion is not None:
        %for ncomp in range(noise_sources):
        ${dW.ctype} dW_${ncomp} = ${dW.load_idx}(${trajectory}, ${ncomp}, ${coords});
        %endfor
        %endif

        %for i in range(iterations):

        %for comp in range(components):
        dpsi_${comp} =
            ${mul_cr}(
                ${mul_cr}(${drift.module}${comp}(
                    ${coords}, ${psi_args}, ${t} + ${dt} / 2), ${dt})
                %if diffusion is not None:
                %for ncomp in range(noise_sources):
                + ${mul_cn}(${diffusion.module}${comp}_${ncomp}(
                    ${coords}, ${psi_args}, ${t} + ${dt} / 2), dW_${ncomp})
                %endfor
                %endif
                , 0.5);
        %endfor

        %for comp in range(components):
        psi_${comp}_tmp = psi_${comp} + dpsi_${comp};
        %endfor

        %endfor

        %for comp in range(components):
        ${output.store_idx}(${trajectory}, ${comp}, ${coords}, psi_${comp}_tmp + dpsi_${comp});
        %endfor
        """,
        guiding_array=(state_type.shape[0],) + state_type.shape[2:],
        render_kwds=dict(
            drift=drift,
            diffusion=diffusion,
            iterations=iterations,
            mul_cr=functions.mul(state_type.dtype, real_dtype),
            mul_cn=functions.mul(state_type.dtype, noise_dtype)))