def uniform_float(bijection, dtype, low=0, high=1): """ Generates uniformly distributed floating-points numbers in the interval ``[low, high)``. Supported dtypes: ``float(32/64)``. A fixed number of counters is used in each thread. Returns a :py:class:`~reikna.cbrng.samplers.Sampler` object. """ assert low < high ctype = dtypes.ctype(dtype) bitness = 64 if dtypes.is_double(dtype) else 32 raw_func = 'get_raw_uint' + str(bitness) raw_max = dtypes.c_constant(2**bitness, dtype) size = dtypes.c_constant(high - low, dtype) low = dtypes.c_constant(low, dtype) module = Module(TEMPLATE.get_def("uniform_float"), render_kwds=dict(bijection=bijection, ctype=ctype, raw_func=raw_func, raw_max=raw_max, size=size, low=low)) return Sampler(bijection, module, dtype, deterministic=True)
def uniform_float(bijection, dtype, low=0, high=1): """ Generates uniformly distributed floating-points numbers in the interval ``[low, high)``. Supported dtypes: ``float(32/64)``. A fixed number of counters is used in each thread. Returns a :py:class:`~reikna.cbrng.samplers.Sampler` object. """ assert low < high ctype = dtypes.ctype(dtype) bitness = 64 if dtypes.is_double(dtype) else 32 raw_func = 'get_raw_uint' + str(bitness) raw_max = dtypes.c_constant(2 ** bitness, dtype) size = dtypes.c_constant(high - low, dtype) low = dtypes.c_constant(low, dtype) module = Module( TEMPLATE.get_def("uniform_float"), render_kwds=dict( bijection=bijection, ctype=ctype, raw_func=raw_func, raw_max=raw_max, size=size, low=low)) return Sampler(bijection, module, dtype, deterministic=True)
def get_nonlinear3(state_arr, potential_arr, scalar_dtype, nonlinear_module, dt): # k4 = N(D(psi_4), t + dt) # output = D(psi_k) + k4 / 6 return PureParallel( [ Parameter('output', Annotation(state_arr, 'o')), Parameter('kprop_psi_k', Annotation(state_arr, 'i')), Parameter('kprop_psi_4', Annotation(state_arr, 'i')), Parameter('potential_next', Annotation(potential_arr, 'i')), Parameter('t', Annotation(scalar_dtype))], """ %for comp in range(components): ${output.ctype} psi4_${comp} = ${kprop_psi_4.load_idx}(${comp}, ${idxs.all()}); ${output.ctype} psik_${comp} = ${kprop_psi_k.load_idx}(${comp}, ${idxs.all()}); %endfor ${potential_next.ctype} V = ${potential_next.load_idx}(${', '.join(idxs[1:])}); %for comp in range(components): ${output.ctype} k4_${comp} = ${nonlinear}${comp}( %for pcomp in range(components): psi4_${pcomp}, %endfor V, ${t} + ${dt}); ${output.store_idx}(${comp}, ${idxs.all()}, psik_${comp} + ${div}(k4_${comp}, 6)); %endfor """, guiding_array=state_arr.shape[1:], render_kwds=dict( components=state_arr.shape[0], nonlinear=nonlinear_module, dt=dtypes.c_constant(dt, scalar_dtype), div=functions.div(state_arr.dtype, numpy.int32, out_dtype=state_arr.dtype)))
def get_nonlinear_wrapper(components, c_dtype, nonlinear_module, dt): s_dtype = dtypes.real_for(c_dtype) return Module.create( """ %for comp in range(components): INLINE WITHIN_KERNEL ${c_ctype} ${prefix}${comp}( %for pcomp in range(components): ${c_ctype} psi${pcomp}, %endfor ${s_ctype} V, ${s_ctype} t) { ${c_ctype} nonlinear = ${nonlinear}${comp}( %for pcomp in range(components): psi${pcomp}, %endfor V, t); return ${mul}( COMPLEX_CTR(${c_ctype})(0, -${dt}), nonlinear); } %endfor """, render_kwds=dict( components=components, c_ctype=dtypes.ctype(c_dtype), s_ctype=dtypes.ctype(s_dtype), mul=functions.mul(c_dtype, c_dtype), dt=dtypes.c_constant(dt, s_dtype), nonlinear=nonlinear_module))
def gamma(bijection, dtype, shape=1, scale=1): """ Generates random numbers from the gamma distribution .. math:: P(x) = x^{k-1} \\frac{e^{-x/\\theta}}{\\theta^k \\Gamma(k)}, where :math:`k` is ``shape``, and :math:`\\theta` is ``scale``. Supported dtypes: ``float(32/64)``. Returns a :py:class:`~reikna.cbrng.samplers.Sampler` object. """ ctype = dtypes.ctype(dtype) uf = uniform_float(bijection, dtype, low=0, high=1) nbm = normal_bm(bijection, dtype, mean=0, std=1) module = Module(TEMPLATE.get_def("gamma"), render_kwds=dict(dtype=dtype, ctype=ctype, bijection=bijection, shape=shape, scale=dtypes.c_constant(scale, dtype), uf=uf, nbm=nbm)) return Sampler(bijection, module, dtype)
def nonlinear_no_potential(dtype, U, nu): c_dtype = dtype c_ctype = dtypes.ctype(c_dtype) s_dtype = dtypes.real_for(dtype) s_ctype = dtypes.ctype(s_dtype) return Module.create( """ %for comp in (0, 1): INLINE WITHIN_KERNEL ${c_ctype} ${prefix}${comp}( ${c_ctype} psi0, ${c_ctype} psi1, ${s_ctype} t) { return ( ${mul}(psi${comp}, ( ${dtypes.c_constant(U[comp, 0])} * ${norm}(psi0) + ${dtypes.c_constant(U[comp, 1])} * ${norm}(psi1) )) - ${mul}(psi${1 - comp}, ${nu}) ); } %endfor """, render_kwds=dict( mul=functions.mul(c_dtype, s_dtype), norm=functions.norm(c_dtype), U=U, nu=dtypes.c_constant(nu, s_dtype), c_ctype=c_ctype, s_ctype=s_ctype))
def get_nonlinear3(state_arr, scalar_dtype, nonlinear_module, dt): # k4 = N(D(psi_4), t + dt) # output = D(psi_k) + k4 / 6 return PureParallel([ Parameter('output', Annotation(state_arr, 'o')), Parameter('kprop_psi_k', Annotation(state_arr, 'i')), Parameter('kprop_psi_4', Annotation(state_arr, 'i')), Parameter('t', Annotation(scalar_dtype)) ], """ <% all_indices = ', '.join(idxs) %> ${output.ctype} psi4_0 = ${kprop_psi_4.load_idx}(0, ${all_indices}); ${output.ctype} psi4_1 = ${kprop_psi_4.load_idx}(1, ${all_indices}); ${output.ctype} psik_0 = ${kprop_psi_k.load_idx}(0, ${all_indices}); ${output.ctype} psik_1 = ${kprop_psi_k.load_idx}(1, ${all_indices}); ${output.ctype} k4_0 = ${nonlinear}0(psi4_0, psi4_1, ${t} + ${dt}); ${output.ctype} k4_1 = ${nonlinear}1(psi4_0, psi4_1, ${t} + ${dt}); ${output.store_idx}(0, ${all_indices}, psik_0 + ${div}(k4_0, 6)); ${output.store_idx}(1, ${all_indices}, psik_1 + ${div}(k4_1, 6)); """, guiding_array=state_arr.shape[1:], render_kwds=dict( nonlinear=nonlinear_module, dt=dtypes.c_constant(dt, scalar_dtype), div=functions.div(state_arr.dtype, numpy.int32, out_dtype=state_arr.dtype)))
def get_nonlinear3(state_arr, scalar_dtype, nonlinear_module, dt): # k4 = N(D(psi_4), t + dt) # output = D(psi_k) + k4 / 6 return PureParallel( [ Parameter('output', Annotation(state_arr, 'o')), Parameter('kprop_psi_k', Annotation(state_arr, 'i')), Parameter('kprop_psi_4', Annotation(state_arr, 'i')), Parameter('t', Annotation(scalar_dtype))], """ <% all_indices = ', '.join(idxs) %> ${output.ctype} psi4_0 = ${kprop_psi_4.load_idx}(0, ${all_indices}); ${output.ctype} psi4_1 = ${kprop_psi_4.load_idx}(1, ${all_indices}); ${output.ctype} psik_0 = ${kprop_psi_k.load_idx}(0, ${all_indices}); ${output.ctype} psik_1 = ${kprop_psi_k.load_idx}(1, ${all_indices}); ${output.ctype} k4_0 = ${nonlinear}0(psi4_0, psi4_1, ${t} + ${dt}); ${output.ctype} k4_1 = ${nonlinear}1(psi4_0, psi4_1, ${t} + ${dt}); ${output.store_idx}(0, ${all_indices}, psik_0 + ${div}(k4_0, 6)); ${output.store_idx}(1, ${all_indices}, psik_1 + ${div}(k4_1, 6)); """, guiding_array=state_arr.shape[1:], render_kwds=dict( nonlinear=nonlinear_module, dt=dtypes.c_constant(dt, scalar_dtype), div=functions.div(state_arr.dtype, numpy.int32, out_dtype=state_arr.dtype)))
def vonmises(bijection, dtype, mu=0, kappa=1): """ Generates random numbers from the von Mises distribution .. math:: P(x) = \\frac{\\exp(\\kappa \\cos(x - \\mu))}{2 \\pi I_0(\\kappa)}, where :math:`\\mu` is the mode, :math:`\\kappa` is the dispersion, and :math:`I_0` is the modified Bessel function of the first kind. Supported dtypes: ``float(32/64)``. Returns a :py:class:`~reikna.cbrng.samplers.Sampler` object. """ ctype = dtypes.ctype(dtype) uf = uniform_float(bijection, dtype, low=0, high=1) module = Module(TEMPLATE.get_def("vonmises"), render_kwds=dict(dtype=dtype, ctype=ctype, bijection=bijection, mu=dtypes.c_constant(mu, dtype), kappa=kappa, uf=uf)) return Sampler(bijection, module, dtype)
def get_nonlinear2(state_arr, scalar_dtype, nonlinear_module, dt): # k2 = N(psi_I + k1 / 2, t + dt / 2) # k3 = N(psi_I + k2 / 2, t + dt / 2) # psi_4 = psi_I + k3 (argument for the 4-th step k-propagation) # psi_k = psi_I + (k1 + 2(k2 + k3)) / 6 (argument for the final k-propagation) return PureParallel([ Parameter('psi_k', Annotation(state_arr, 'o')), Parameter('psi_4', Annotation(state_arr, 'o')), Parameter('psi_I', Annotation(state_arr, 'i')), Parameter('k1', Annotation(state_arr, 'i')), Parameter('t', Annotation(scalar_dtype)) ], """ <% all_indices = ', '.join(idxs) %> ${psi_k.ctype} psi_I_0 = ${psi_I.load_idx}(0, ${all_indices}); ${psi_k.ctype} psi_I_1 = ${psi_I.load_idx}(1, ${all_indices}); ${psi_k.ctype} k1_0 = ${k1.load_idx}(0, ${all_indices}); ${psi_k.ctype} k1_1 = ${k1.load_idx}(1, ${all_indices}); ${psi_k.ctype} k2_0 = ${nonlinear}0( psi_I_0 + ${div}(k1_0, 2), psi_I_1 + ${div}(k1_1, 2), ${t} + ${dt} / 2); ${psi_k.ctype} k2_1 = ${nonlinear}1( psi_I_0 + ${div}(k1_0, 2), psi_I_1 + ${div}(k1_1, 2), ${t} + ${dt} / 2); ${psi_k.ctype} k3_0 = ${nonlinear}0( psi_I_0 + ${div}(k2_0, 2), psi_I_1 + ${div}(k2_1, 2), ${t} + ${dt} / 2); ${psi_k.ctype} k3_1 = ${nonlinear}1( psi_I_0 + ${div}(k2_0, 2), psi_I_1 + ${div}(k2_1, 2), ${t} + ${dt} / 2); ${psi_4.store_idx}(0, ${all_indices}, psi_I_0 + k3_0); ${psi_4.store_idx}(1, ${all_indices}, psi_I_1 + k3_1); ${psi_k.store_idx}( 0, ${all_indices}, psi_I_0 + ${div}(k1_0, 6) + ${div}(k2_0, 3) + ${div}(k3_0, 3)); ${psi_k.store_idx}( 1, ${all_indices}, psi_I_1 + ${div}(k1_1, 6) + ${div}(k2_1, 3) + ${div}(k3_1, 3)); """, guiding_array=state_arr.shape[1:], render_kwds=dict( nonlinear=nonlinear_module, dt=dtypes.c_constant(dt, scalar_dtype), div=functions.div(state_arr.dtype, numpy.int32, out_dtype=state_arr.dtype)))
def get_nonlinear2(state_arr, scalar_dtype, nonlinear_module, dt): # k2 = N(psi_I + k1 / 2, t + dt / 2) # k3 = N(psi_I + k2 / 2, t + dt / 2) # psi_4 = psi_I + k3 (argument for the 4-th step k-propagation) # psi_k = psi_I + (k1 + 2(k2 + k3)) / 6 (argument for the final k-propagation) return PureParallel( [ Parameter('psi_k', Annotation(state_arr, 'o')), Parameter('psi_4', Annotation(state_arr, 'o')), Parameter('psi_I', Annotation(state_arr, 'i')), Parameter('k1', Annotation(state_arr, 'i')), Parameter('t', Annotation(scalar_dtype))], """ <% all_indices = ', '.join(idxs) %> ${psi_k.ctype} psi_I_0 = ${psi_I.load_idx}(0, ${all_indices}); ${psi_k.ctype} psi_I_1 = ${psi_I.load_idx}(1, ${all_indices}); ${psi_k.ctype} k1_0 = ${k1.load_idx}(0, ${all_indices}); ${psi_k.ctype} k1_1 = ${k1.load_idx}(1, ${all_indices}); ${psi_k.ctype} k2_0 = ${nonlinear}0( psi_I_0 + ${div}(k1_0, 2), psi_I_1 + ${div}(k1_1, 2), ${t} + ${dt} / 2); ${psi_k.ctype} k2_1 = ${nonlinear}1( psi_I_0 + ${div}(k1_0, 2), psi_I_1 + ${div}(k1_1, 2), ${t} + ${dt} / 2); ${psi_k.ctype} k3_0 = ${nonlinear}0( psi_I_0 + ${div}(k2_0, 2), psi_I_1 + ${div}(k2_1, 2), ${t} + ${dt} / 2); ${psi_k.ctype} k3_1 = ${nonlinear}1( psi_I_0 + ${div}(k2_0, 2), psi_I_1 + ${div}(k2_1, 2), ${t} + ${dt} / 2); ${psi_4.store_idx}(0, ${all_indices}, psi_I_0 + k3_0); ${psi_4.store_idx}(1, ${all_indices}, psi_I_1 + k3_1); ${psi_k.store_idx}( 0, ${all_indices}, psi_I_0 + ${div}(k1_0, 6) + ${div}(k2_0, 3) + ${div}(k3_0, 3)); ${psi_k.store_idx}( 1, ${all_indices}, psi_I_1 + ${div}(k1_1, 6) + ${div}(k2_1, 3) + ${div}(k3_1, 3)); """, guiding_array=state_arr.shape[1:], render_kwds=dict( nonlinear=nonlinear_module, dt=dtypes.c_constant(dt, scalar_dtype), div=functions.div(state_arr.dtype, numpy.int32, out_dtype=state_arr.dtype)))
def get_common_kwds(dtype, device_params): return dict( dtype=dtype, min_mem_coalesce_width=device_params.min_mem_coalesce_width[dtype.itemsize], local_mem_banks=device_params.local_mem_banks, get_padding=get_padding, wrap_const=lambda x: dtypes.c_constant(x, dtypes.real_for(dtype)), min_blocks=helpers.min_blocks, mul=functions.mul(dtype, dtype), polar_unit=functions.polar_unit(dtypes.real_for(dtype)), cdivs=functions.div(dtype, numpy.uint32, out_dtype=dtype))
def add_const(arr_t, param): """ Returns an addition transformation with a fixed parameter (1 output, 1 input): ``output = input + param``. """ param_dtype = dtypes.detect_type(param) return Transformation( [Parameter('output', Annotation(arr_t, 'o')), Parameter('input', Annotation(arr_t, 'i'))], "${output.store_same}(${add}(${input.load_same}, ${param}));", render_kwds=dict( add=functions.add(arr_t.dtype, param_dtype, out_dtype=arr_t.dtype), param=dtypes.c_constant(param, dtype=param_dtype)))
def div_const(arr_t, param): """ Returns a scaling transformation with a fixed parameter (1 output, 1 input): ``output = input / param``. """ param_dtype = dtypes.detect_type(param) return Transformation( [Parameter('output', Annotation(arr_t, 'o')), Parameter('input', Annotation(arr_t, 'i'))], "${output.store_same}(${div}(${input.load_same}, ${param}));", render_kwds=dict( div=functions.div(arr_t.dtype, param_dtype, out_dtype=arr_t.dtype), param=dtypes.c_constant(param, dtype=param_dtype)))
def get_nonlinear2(state_arr, potential_arr, scalar_dtype, nonlinear_module, dt): # k2 = N(psi_I + k1 / 2, t + dt / 2) # k3 = N(psi_I + k2 / 2, t + dt / 2) # psi_4 = psi_I + k3 (argument for the 4-th step k-propagation) # psi_k = psi_I + (k1 + 2(k2 + k3)) / 6 (argument for the final k-propagation) return PureParallel( [ Parameter('psi_k', Annotation(state_arr, 'o')), Parameter('psi_4', Annotation(state_arr, 'o')), Parameter('psi_I', Annotation(state_arr, 'i')), Parameter('k1', Annotation(state_arr, 'i')), Parameter('potential_half', Annotation(potential_arr, 'i')), Parameter('t', Annotation(scalar_dtype))], """ %for comp in range(components): ${psi_k.ctype} psi_I_${comp} = ${psi_I.load_idx}(${comp}, ${idxs.all()}); ${psi_k.ctype} k1_${comp} = ${k1.load_idx}(${comp}, ${idxs.all()}); %endfor ${potential_half.ctype} V = ${potential_half.load_idx}(${', '.join(idxs[1:])}); %for comp in range(components): ${psi_k.ctype} k2_${comp} = ${nonlinear}${comp}( %for pcomp in range(components): psi_I_${pcomp} + ${div}(k1_${pcomp}, 2), %endfor V, ${t} + ${dt} / 2); %endfor %for comp in range(components): ${psi_k.ctype} k3_${comp} = ${nonlinear}${comp}( %for pcomp in range(components): psi_I_${pcomp} + ${div}(k2_${pcomp}, 2), %endfor V, ${t} + ${dt} / 2); %endfor %for comp in range(components): ${psi_4.store_idx}(${comp}, ${idxs.all()}, psi_I_${comp} + k3_${comp}); ${psi_k.store_idx}( ${comp}, ${idxs.all()}, psi_I_${comp} + ${div}(k1_${comp}, 6) + ${div}(k2_${comp}, 3) + ${div}(k3_${comp}, 3)); %endfor """, guiding_array=state_arr.shape[1:], render_kwds=dict( components=state_arr.shape[0], nonlinear=nonlinear_module, dt=dtypes.c_constant(dt, scalar_dtype), div=functions.div(state_arr.dtype, numpy.int32, out_dtype=state_arr.dtype)))
def get_nonlinear_wrapper(c_dtype, nonlinear_module, dt): s_dtype = dtypes.real_for(c_dtype) return Module.create(""" %for comp in (0, 1): INLINE WITHIN_KERNEL ${c_ctype} ${prefix}${comp}( ${c_ctype} psi0, ${c_ctype} psi1, ${s_ctype} t) { ${c_ctype} nonlinear = ${nonlinear}${comp}(psi0, psi1, t); return ${mul}( COMPLEX_CTR(${c_ctype})(0, -${dt}), nonlinear); } %endfor """, render_kwds=dict(c_ctype=dtypes.ctype(c_dtype), s_ctype=dtypes.ctype(s_dtype), mul=functions.mul(c_dtype, c_dtype), dt=dtypes.c_constant(dt, s_dtype), nonlinear=nonlinear_module))
def get_potential_interpolator(potential_arr, dt): scalar_dtype = potential_arr.dtype return PureParallel( [ Parameter('potential_curr', Annotation(potential_arr, 'o')), Parameter('potential_half', Annotation(potential_arr, 'o')), Parameter('potential_next', Annotation(potential_arr, 'o')), Parameter('potential1', Annotation(potential_arr, 'i')), Parameter('potential2', Annotation(potential_arr, 'i')), Parameter('t_potential1', Annotation(scalar_dtype)), Parameter('t_potential2', Annotation(scalar_dtype)), Parameter('t', Annotation(scalar_dtype))], """ ${potential1.ctype} p1 = ${potential1.load_idx}(${idxs.all()}); ${potential2.ctype} p2 = ${potential2.load_idx}(${idxs.all()}); ${t.ctype} normalization = (p2 - p1) / (${t_potential2} - ${t_potential1}); ${t.ctype} offset = ${t} - ${t_potential1}; ${potential_curr.store_idx}(${idxs.all()}, p1 + offset * normalization); ${potential_half.store_idx}(${idxs.all()}, p1 + (offset + ${dt} / 2) * normalization); ${potential_next.store_idx}(${idxs.all()}, p1 + (offset + ${dt}) * normalization); """, render_kwds=dict(dt=dtypes.c_constant(dt, scalar_dtype)))
def gamma(bijection, dtype, shape=1, scale=1): """ Generates random numbers from the gamma distribution .. math:: P(x) = x^{k-1} \\frac{e^{-x/\\theta}}{\\theta^k \\Gamma(k)}, where :math:`k` is ``shape``, and :math:`\\theta` is ``scale``. Supported dtypes: ``float(32/64)``. Returns a :py:class:`~reikna.cbrng.samplers.Sampler` object. """ ctype = dtypes.ctype(dtype) uf = uniform_float(bijection, dtype, low=0, high=1) nbm = normal_bm(bijection, dtype, mean=0, std=1) module = Module( TEMPLATE.get_def("gamma"), render_kwds=dict( dtype=dtype, ctype=ctype, bijection=bijection, shape=shape, scale=dtypes.c_constant(scale, dtype), uf=uf, nbm=nbm)) return Sampler(bijection, module, dtype)
def __init__(self, module): self.module = module self.u64 = dtypes.ctype(numpy.uint64) self.u32 = dtypes.ctype(numpy.uint32) self.modulus = dtypes.c_constant(2**64 - 2**32 + 1, numpy.uint64)