Python Module.createの例、reikna.cluda.Module.create Pythonの例

コード例 #1

0

ファイルを表示

ファイル: integrator.py プロジェクト: fjarri-attic/vienna_simulation

def get_nonlinear_wrapper(components, c_dtype, nonlinear_module, dt):
    s_dtype = dtypes.real_for(c_dtype)
    return Module.create(
        """
        %for comp in range(components):
        INLINE WITHIN_KERNEL ${c_ctype} ${prefix}${comp}(
            %for pcomp in range(components):
            ${c_ctype} psi${pcomp},
            %endfor
            ${s_ctype} V, ${s_ctype} t)
        {
            ${c_ctype} nonlinear = ${nonlinear}${comp}(
                %for pcomp in range(components):
                psi${pcomp},
                %endfor
                V, t);
            return ${mul}(
                COMPLEX_CTR(${c_ctype})(0, -${dt}),
                nonlinear);
        }
        %endfor
        """,
        render_kwds=dict(
            components=components,
            c_ctype=dtypes.ctype(c_dtype),
            s_ctype=dtypes.ctype(s_dtype),
            mul=functions.mul(c_dtype, c_dtype),
            dt=dtypes.c_constant(dt, s_dtype),
            nonlinear=nonlinear_module))

コード例 #2

0

ファイルを表示

ファイル: soliton.py プロジェクト: fjarri/reikna-integrator

def get_drift(state_dtype, U, gamma, dx, wigner=False):
    return Drift(
        Module.create(
            """
            <%
                r_dtype = dtypes.real_for(s_dtype)
                s_ctype = dtypes.ctype(s_dtype)
                r_ctype = dtypes.ctype(r_dtype)
            %>
            INLINE WITHIN_KERNEL ${s_ctype} ${prefix}0(
                const int idx_x,
                const ${s_ctype} psi,
                ${r_ctype} t)
            {
                return ${mul_cc}(
                    COMPLEX_CTR(${s_ctype})(
                        -${gamma},
                        -(${U} * (${norm}(psi) - ${correction}))),
                    psi
                );
            }
            """,
            render_kwds=dict(
                s_dtype=state_dtype,
                U=U,
                gamma=gamma,
                mul_cc=functions.mul(state_dtype, state_dtype),
                norm=functions.norm(state_dtype),
                correction=1. / dx if wigner else 0
                )),
        state_dtype, components=1)

コード例 #3

0

ファイルを表示

ファイル: wigner.py プロジェクト: fjarri-attic/early-universe

def nonlinear_no_potential(dtype, U, nu):
    c_dtype = dtype
    c_ctype = dtypes.ctype(c_dtype)
    s_dtype = dtypes.real_for(dtype)
    s_ctype = dtypes.ctype(s_dtype)

    return Module.create(
        """
        %for comp in (0, 1):
        INLINE WITHIN_KERNEL ${c_ctype} ${prefix}${comp}(
            ${c_ctype} psi0, ${c_ctype} psi1, ${s_ctype} t)
        {
            return (
                ${mul}(psi${comp}, (
                    ${dtypes.c_constant(U[comp, 0])} * ${norm}(psi0) +
                    ${dtypes.c_constant(U[comp, 1])} * ${norm}(psi1)
                    ))
                - ${mul}(psi${1 - comp}, ${nu})
                );
        }
        %endfor
        """,
        render_kwds=dict(
            mul=functions.mul(c_dtype, s_dtype),
            norm=functions.norm(c_dtype),
            U=U,
            nu=dtypes.c_constant(nu, s_dtype),
            c_ctype=c_ctype,
            s_ctype=s_ctype))

コード例 #4

0

ファイルを表示

ファイル: rk4ip_stepper.py プロジェクト: fjarri/reikna-integrator

def get_nonlinear_wrapper(state_dtype, grid_dims, drift, diffusion=None):

    real_dtype = dtypes.real_for(state_dtype)
    if diffusion is not None:
        noise_dtype = diffusion.dtype
    else:
        noise_dtype = real_dtype

    return Module.create(
        """
        <%
            components = drift.components
            idx_args = ["idx_" + str(dim) for dim in range(grid_dims)]
            psi_args = ["psi_" + str(comp) for comp in range(components)]
            if diffusion is not None:
                dW_args = ["dW_" + str(ncomp) for ncomp in range(diffusion.noise_sources)]
        %>
        %for comp in range(components):
        INLINE WITHIN_KERNEL ${s_ctype} ${prefix}${comp}(
            %for idx in idx_args:
            const int ${idx},
            %endfor
            %for psi in psi_args:
            const ${s_ctype} ${psi},
            %endfor
            %if diffusion is not None:
            %for dW in dW_args:
            const ${n_ctype} ${dW},
            %endfor
            %endif
            const ${r_ctype} t,
            const ${r_ctype} dt)
        {
            return
                ${mul_sr}(${drift.module}${comp}(
                    ${", ".join(idx_args)}, ${", ".join(psi_args)}, t), dt)
                %if diffusion is not None:
                %for ncomp in range(diffusion.noise_sources):
                + ${mul_sn}(${diffusion.module}${comp}_${ncomp}(
                    ${", ".join(idx_args)}, ${", ".join(psi_args)}, t), ${dW_args[ncomp]})
                %endfor
                %endif
                ;
        }
        %endfor
        """,
        render_kwds=dict(
            grid_dims=grid_dims,
            s_ctype=dtypes.ctype(state_dtype),
            r_ctype=dtypes.ctype(real_dtype),
            n_ctype=dtypes.ctype(noise_dtype),
            mul_sr=functions.mul(state_dtype, real_dtype),
            mul_sn=functions.mul(state_dtype, noise_dtype),
            drift=drift,
            diffusion=diffusion))

コード例 #5

0

ファイルを表示

ファイル: test_soliton.py プロジェクト: fjarri-attic/vienna_simulation

def get_nonlinear(dtype, interaction, tunneling):
    r"""
    Nonlinear module

    .. math::

        N(\psi_1, ... \psi_C)
        = \sum_{n=1}^{C} U_{jn} |\psi_n|^2 \psi_j
          - \nu_j psi_{m_j}

    ``interaction``: a symmetrical ``components x components`` array with interaction strengths.
    ``tunneling``: a list of (other_comp, coeff) pairs of tunnelling strengths.
    """

    c_dtype = dtype
    c_ctype = dtypes.ctype(c_dtype)
    s_dtype = dtypes.real_for(dtype)
    s_ctype = dtypes.ctype(s_dtype)

    return Module.create(
        """
        %for comp in range(components):
        INLINE WITHIN_KERNEL ${c_ctype} ${prefix}${comp}(
            %for pcomp in range(components):
            ${c_ctype} psi${pcomp},
            %endfor
            ${s_ctype} V, ${s_ctype} t)
        {
            return (
                ${mul}(psi${comp}, (
                    %for other_comp in range(components):
                    + ${dtypes.c_constant(interaction[comp, other_comp], s_dtype)} *
                        ${norm}(psi${other_comp})
                    %endfor
                    + V
                    ))
                - ${mul}(
                    psi${tunneling[comp][0]},
                    ${dtypes.c_constant(tunneling[comp][1], s_dtype)})
                );
        }
        %endfor
        """,
        render_kwds=dict(
            components=interaction.shape[0],
            mul=functions.mul(c_dtype, s_dtype),
            norm=functions.norm(c_dtype),
            interaction=interaction,
            tunneling=tunneling,
            s_dtype=s_dtype,
            c_ctype=c_ctype,
            s_ctype=s_ctype))

コード例 #6

0

ファイルを表示

def get_nonlinear_wrapper(c_dtype, nonlinear_module, dt):
    s_dtype = dtypes.real_for(c_dtype)
    return Module.create("""
        %for comp in (0, 1):
        INLINE WITHIN_KERNEL ${c_ctype} ${prefix}${comp}(
            ${c_ctype} psi0, ${c_ctype} psi1, ${s_ctype} t)
        {
            ${c_ctype} nonlinear = ${nonlinear}${comp}(psi0, psi1, t);
            return ${mul}(
                COMPLEX_CTR(${c_ctype})(0, -${dt}),
                nonlinear);
        }
        %endfor
        """,
                         render_kwds=dict(c_ctype=dtypes.ctype(c_dtype),
                                          s_ctype=dtypes.ctype(s_dtype),
                                          mul=functions.mul(c_dtype, c_dtype),
                                          dt=dtypes.c_constant(dt, s_dtype),
                                          nonlinear=nonlinear_module))

コード例 #7

0

ファイルを表示

ファイル: soliton.py プロジェクト: fjarri/reikna-integrator

def get_diffusion(state_dtype, gamma):
    return Diffusion(
        Module.create(
            """
            <%
                r_dtype = dtypes.real_for(s_dtype)
                s_ctype = dtypes.ctype(s_dtype)
                r_ctype = dtypes.ctype(r_dtype)
            %>
            INLINE WITHIN_KERNEL ${s_ctype} ${prefix}0_0(
                const int idx_x,
                const ${s_ctype} psi,
                ${r_ctype} t)
            {
                return COMPLEX_CTR(${s_ctype})(${numpy.sqrt(gamma)}, 0);
            }
            """,
            render_kwds=dict(
                mul_cr=functions.mul(state_dtype, dtypes.real_for(state_dtype)),
                s_dtype=state_dtype,
                gamma=gamma)),
        state_dtype, components=1, noise_sources=1)

コード例 #8

0

ファイルを表示

ファイル: tools.py プロジェクト: SyamGadde/reikna

    def create(cls, bijection, seed=None, reserve_id_space=True):
        """
        Creates a generator.

        :param bijection: a :py:class:`~reikna.cbrng.bijections.Bijection` object.
        :param seed: an integer, or numpy array of 32-bit unsigned integers.
        :param reserve_id_space: if ``True``, the last 32 bit of the key will be reserved
            for the thread identifier.
            As a result, the total size of the key should be 64 bit or more.
            If ``False``, the thread identifier will be just added to the key,
            which will still result in different keys for different threads,
            with the danger that different seeds produce the same sequences.
        """

        if reserve_id_space:
            if bijection.key_words == 1 and bijection.word_dtype.itemsize == 4:
            # It's too hard to compress both global and thread-dependent part
            # in a single 32-bit word.
            # Let the user handle this himself.
                raise ValueError("Cannor reserve ID space in a 32-bit key")

            if bijection.word_dtype.itemsize == 4:
                key_words32 = bijection.key_words - 1
            else:
                if bijection.key_words > 1:
                    key_words32 = (bijection.key_words - 1) * 2
                else:
                    # Philox-2x64 case, the key is a single 64-bit integer.
                    # We use first 32 bit for the key, and the remaining 32 bit for a thread identifier.
                    key_words32 = 1
        else:
            key_words32 = bijection.key_words * (bijection.word_dtype.itemsize // 4)

        if isinstance(seed, numpy.ndarray):
            # explicit key was provided
            assert seed.size == key_words32 and seed.dtype == numpy.uint32
            key = seed.copy().flatten()
        else:
            # use numpy to generate the key from seed
            np_rng = numpy.random.RandomState(seed)

            # 32-bit Python can only generate random integer up to 2**31-1
            key16 = np_rng.randint(0, 2**16, key_words32 * 2)
            key = numpy.zeros(key_words32, numpy.uint32)
            for i in range(key_words32 * 2):
                key[i // 2] += key16[i] << (16 if i % 2 == 0 else 0)

        full_key = numpy.zeros(1, bijection.key_dtype)[0]
        if bijection.word_dtype.itemsize == 4:
            full_key['v'][:key_words32] = key
        else:
            for i in range(key_words32):
                full_key['v'][i // 2] += key[i] << (32 if i % 2 == 0 else 0)

        module = Module.create("""
            WITHIN_KERNEL ${bijection.module}Key ${prefix}key_from_int(int idx)
            {
                ${bijection.module}Key result;

                %for i in range(bijection.key_words):
                result.v[${i}] = ${key['v'][i]}
                    %if i == bijection.key_words - 1:
                    + idx
                    %endif
                ;
                %endfor

                return result;
            }
            """,
            render_kwds=dict(
                bijection=bijection,
                key=full_key))

        return cls(module, full_key)

コード例 #9

0

ファイルを表示

ファイル: polynomial_transform_fft.py プロジェクト: stjordanis/nufhe

from .transform.fft import fft_transform_ref
from .performance import PerformanceParameters


def transformed_dtype():
    return numpy.dtype('complex128')


def transformed_internal_dtype():
    return numpy.dtype('complex128')


elem = Module.create(lambda prefix: """
    typedef double2 ${prefix};

    #define ${prefix}pack(x) (x)
    #define ${prefix}unpack(x) (x)
    #define ${prefix}zero (COMPLEX_CTR(double2)(0, 0))
    """,
                     render_kwds=dict())


def transformed_internal_ctype():
    return elem


def transformed_length(N):
    return N // 2


def forward_transform_ref(data):
    return fft_transform_ref(data, i32_conversion=True)

コード例 #10

0

ファイルを表示

ファイル: tools.py プロジェクト: xexo7C8/reikna

    def create(cls, bijection, seed=None, reserve_id_space=True):
        """
        Creates a generator.

        :param bijection: a :py:class:`~reikna.cbrng.bijections.Bijection` object.
        :param seed: an integer, or numpy array of 32-bit unsigned integers.
        :param reserve_id_space: if ``True``, the last 32 bit of the key will be reserved
            for the thread identifier.
            As a result, the total size of the key should be 64 bit or more.
            If ``False``, the thread identifier will be just added to the key,
            which will still result in different keys for different threads,
            with the danger that different seeds produce the same sequences.
        """

        if reserve_id_space:
            if bijection.key_words == 1 and bijection.word_dtype.itemsize == 4:
                # It's too hard to compress both global and thread-dependent part
                # in a single 32-bit word.
                # Let the user handle this himself.
                raise ValueError("Cannor reserve ID space in a 32-bit key")

            if bijection.word_dtype.itemsize == 4:
                key_words32 = bijection.key_words - 1
            else:
                if bijection.key_words > 1:
                    key_words32 = (bijection.key_words - 1) * 2
                else:
                    # Philox-2x64 case, the key is a single 64-bit integer.
                    # We use first 32 bit for the key, and the remaining 32 bit for a thread identifier.
                    key_words32 = 1
        else:
            key_words32 = bijection.key_words * (
                bijection.word_dtype.itemsize // 4)

        if isinstance(seed, numpy.ndarray):
            # explicit key was provided
            assert seed.size == key_words32 and seed.dtype == numpy.uint32
            key = seed.copy().flatten()
        else:
            # use numpy to generate the key from seed
            np_rng = numpy.random.RandomState(seed)

            # 32-bit Python can only generate random integer up to 2**31-1
            key16 = np_rng.randint(0, 2**16, key_words32 * 2)
            key = numpy.zeros(key_words32, numpy.uint32)
            for i in range(key_words32 * 2):
                key[i // 2] += key16[i] << (16 if i % 2 == 0 else 0)

        full_key = numpy.zeros(1, bijection.key_dtype)[0]
        if bijection.word_dtype.itemsize == 4:
            full_key['v'][:key_words32] = key
        else:
            for i in range(key_words32):
                full_key['v'][i // 2] += key[i] << (32 if i % 2 == 0 else 0)

        module = Module.create("""
            WITHIN_KERNEL ${bijection.module}Key ${prefix}key_from_int(int idx)
            {
                ${bijection.module}Key result;

                %for i in range(bijection.key_words):
                result.v[${i}] = ${key['v'][i]}
                    %if i == bijection.key_words - 1:
                    + idx
                    %endif
                ;
                %endfor

                return result;
            }
            """,
                               render_kwds=dict(bijection=bijection,
                                                key=full_key))

        return cls(module, full_key)

コード例 #11

0

ファイルを表示

from reikna.cluda import Module

from .numeric_functions_gpu import Torus32ToPhase
from .numeric_functions_gpu import Torus32, Int32, Float  # for re-export
from .computation_cache import get_computation


# Approximate the phase to the nearest message possible in the message space.
# The constant `mspace_size` indicates on which message space we are working
# (how many messages possible).
def phase_to_t32(phase: int, mspace_size: int):
    return Torus32((phase % mspace_size) * (2**32 // mspace_size))


def t32_to_phase(thr, result, messages, mspace_size: int):
    comp = get_computation(thr, Torus32ToPhase, messages.shape, mspace_size)
    comp(result, messages)


def double_to_t32(d: float):
    return ((d - numpy.trunc(d)) * 2**32).astype(Torus32)


double_to_t32_module = Module.create("""
    WITHIN_KERNEL int ${prefix}(double d)
    {
        return (d - trunc(d)) * ${2**32};
    }
    """)