def pow(dtype, power_dtype=None): """ Returns a :py:class:`~reikna.cluda.Module` with a function of two arguments that raises the first argument of type ``dtype`` (must be a real or complex data type) to the power of the second argument (a corresponding real data type or an integer). """ if dtypes.is_complex(power_dtype): raise NotImplementedError("pow() with a complex power is not supported") if power_dtype is None: if dtypes.is_integer(dtype): raise ValueError("Power dtype must be specified for an integer argument") elif dtypes.is_real(dtype): power_dtype = dtype else: power_dtype = dtypes.real_for(dtype) if dtypes.is_complex(dtype): r_dtype = dtypes.real_for(dtype) elif dtypes.is_real(dtype): r_dtype = dtype elif dtypes.is_real(power_dtype): r_dtype = power_dtype else: r_dtype = numpy.float32 if dtypes.is_integer(dtype) and dtypes.is_real(power_dtype): dtype = power_dtype return Module( TEMPLATE.get_def('pow'), render_kwds=dict( dtype=dtype, power_dtype=power_dtype, mul_=mul(dtype, dtype), div_=div(dtype, dtype), polar_=polar(r_dtype)))
def get_common_kwds(dtype, device_params): return dict( dtype=dtype, min_mem_coalesce_width=device_params.min_mem_coalesce_width[dtype.itemsize], local_mem_banks=device_params.local_mem_banks, get_padding=get_padding, wrap_const=lambda x: dtypes.c_constant(x, dtypes.real_for(dtype)), min_blocks=helpers.min_blocks, mul=functions.mul(dtype, dtype), polar_unit=functions.polar_unit(dtypes.real_for(dtype)), cdivs=functions.div(dtype, numpy.uint32, out_dtype=dtype))
def __init__(self, thr, psi_arr_t, box, tmax, steps, samples, kinetic_coeff=1, nonlinear_module=None, potentials=None): r""" Integrates the equation .. math:: i \frac{\psi_j}{dt} = - k \nabla^2 psi_j + N(\psi_1, ... \psi_C, V(t), t), where :math:`C` is the number of components, :math:`V` is the dynamic potential. ``psi_arr_t`` is an array-like object with the shape ``(components, ensembles, *grid)``. ``box`` is a tuple of length ``grid``, containing sizes of the simulation box. ``tmax`` is the propagation time. ``steps`` is the number of time steps to take. ``samples`` is the number of samples to take (not counting the initial one); should be a factor of ``steps``. ``kinetic_coeff`` is the value of :math:`k`. ``nonlinear_module`` calculates :math:`N`. ``potentials``: ``None``, an array of shape ``grid``, or an array of shape ``(M, *grid)``, corresponding to the values of dynamic potential. The dynamic potential contains the snapshots at time points from 0 to ``tmax``, and ``M - 1`` should be a factor of ``steps``. """ self.tmax = tmax assert steps % samples == 0 self.steps = steps self.samples = samples self.dt = float(tmax) / steps self.dt_half = self.dt / 2 if potentials is None: self.potentials = numpy.zeros((2,) + psi_arr_t.shape[2:], psi_arr_t.dtype) elif len(potentials.shape) == len(psi_arr_t.shape) - 2: potentials = potentials.astype(dtypes.real_for(psi_arr_t.dtype)) potentials = potentials.reshape(1, *(psi_arr_t.shape[2:])) self.potentials = numpy.vstack([potentials, potentials]) else: assert len(potentials.shape) == len(psi_arr_t.shape) - 1 assert steps % (potentials.shape[0] - 1) == 0 potentials = potentials.astype(dtypes.real_for(psi_arr_t.dtype)) self.potentials = potentials self.thr = thr self.stepper = RK4IPStepper(psi_arr_t, self.dt, box=box, kinetic_coeff=kinetic_coeff, nonlinear_module=nonlinear_module).compile(thr) self.stepper_half = RK4IPStepper(psi_arr_t, self.dt_half, box=box, kinetic_coeff=kinetic_coeff, nonlinear_module=nonlinear_module).compile(thr)
def get_nonlinear_wrapper(components, c_dtype, nonlinear_module, dt): s_dtype = dtypes.real_for(c_dtype) return Module.create( """ %for comp in range(components): INLINE WITHIN_KERNEL ${c_ctype} ${prefix}${comp}( %for pcomp in range(components): ${c_ctype} psi${pcomp}, %endfor ${s_ctype} V, ${s_ctype} t) { ${c_ctype} nonlinear = ${nonlinear}${comp}( %for pcomp in range(components): psi${pcomp}, %endfor V, t); return ${mul}( COMPLEX_CTR(${c_ctype})(0, -${dt}), nonlinear); } %endfor """, render_kwds=dict( components=components, c_ctype=dtypes.ctype(c_dtype), s_ctype=dtypes.ctype(s_dtype), mul=functions.mul(c_dtype, c_dtype), dt=dtypes.c_constant(dt, s_dtype), nonlinear=nonlinear_module))
def hanning_window(arr, NFFT): """ Applies the von Hann window to the rows of a 2D array. To account for zero padding (which we do not want to window), NFFT is provided separately. """ if dtypes.is_complex(arr.dtype): coeff_dtype = dtypes.real_for(arr.dtype) else: coeff_dtype = arr.dtype return Transformation([ Parameter('output', Annotation(arr, 'o')), Parameter('input', Annotation(arr, 'i')), ], """ ${dtypes.ctype(coeff_dtype)} coeff; %if NFFT != output.shape[0]: if (${idxs[1]} >= ${NFFT}) { coeff = 1; } else %endif { coeff = 0.5 * (1 - cos(2 * ${numpy.pi} * ${idxs[-1]} / (${NFFT} - 1))); } ${output.store_same}(${mul}(${input.load_same}, coeff)); """, render_kwds=dict(coeff_dtype=coeff_dtype, NFFT=NFFT, mul=functions.mul( arr.dtype, coeff_dtype)))
def hanning_window(arr, NFFT): """ Applies the von Hann window to the rows of a 2D array. To account for zero padding (which we do not want to window), NFFT is provided separately. """ if dtypes.is_complex(arr.dtype): coeff_dtype = dtypes.real_for(arr.dtype) else: coeff_dtype = arr.dtype return Transformation( [ Parameter('output', Annotation(arr, 'o')), Parameter('input', Annotation(arr, 'i')), ], """ ${dtypes.ctype(coeff_dtype)} coeff; %if NFFT != output.shape[0]: if (${idxs[1]} >= ${NFFT}) { coeff = 1; } else %endif { coeff = 0.5 * (1 - cos(2 * ${numpy.pi} * ${idxs[-1]} / (${NFFT} - 1))); } ${output.store_same}(${mul}(${input.load_same}, coeff)); """, render_kwds=dict( coeff_dtype=coeff_dtype, NFFT=NFFT, mul=functions.mul(arr.dtype, coeff_dtype)))
def nonlinear_no_potential(dtype, U, nu): c_dtype = dtype c_ctype = dtypes.ctype(c_dtype) s_dtype = dtypes.real_for(dtype) s_ctype = dtypes.ctype(s_dtype) return Module.create( """ %for comp in (0, 1): INLINE WITHIN_KERNEL ${c_ctype} ${prefix}${comp}( ${c_ctype} psi0, ${c_ctype} psi1, ${s_ctype} t) { return ( ${mul}(psi${comp}, ( ${dtypes.c_constant(U[comp, 0])} * ${norm}(psi0) + ${dtypes.c_constant(U[comp, 1])} * ${norm}(psi1) )) - ${mul}(psi${1 - comp}, ${nu}) ); } %endfor """, render_kwds=dict( mul=functions.mul(c_dtype, s_dtype), norm=functions.norm(c_dtype), U=U, nu=dtypes.c_constant(nu, s_dtype), c_ctype=c_ctype, s_ctype=s_ctype))
def get_prepare_prfft_scan(output): return Transformation( [ Parameter('output', Annotation(output, 'o')), Parameter('Y', Annotation(output, 'i')), Parameter( 're_X_0', Annotation( Type(dtypes.real_for(output.dtype), output.shape[:-1]), 'i')) ], """ ${Y.ctype} Y = ${Y.load_same}; Y = COMPLEX_CTR(${Y.ctype})(Y.y, -Y.x); if (${idxs[-1]} == 0) { Y.x = Y.x / 2 + ${re_X_0.load_idx}(${", ".join(idxs[:-1])}); Y.y /= 2; } ${output.store_same}(Y); """, connectors=['output', 'Y'], )
def norm_const(arr_t, order): """ Returns a transformation that calculates the ``order``-norm (1 output, 1 input): ``output = abs(input) ** order``. """ if dtypes.is_complex(arr_t.dtype): out_dtype = dtypes.real_for(arr_t.dtype) else: out_dtype = arr_t.dtype return Transformation( [ Parameter('output', Annotation(Type(out_dtype, arr_t.shape), 'o')), Parameter('input', Annotation(arr_t, 'i'))], """ ${input.ctype} val = ${input.load_same}; ${output.ctype} norm = ${norm}(val); %if order != 2: norm = pow(norm, ${dtypes.c_constant(order / 2, output.dtype)}); %endif ${output.store_same}(norm); """, render_kwds=dict( norm=functions.norm(arr_t.dtype), order=order))
def __init__(self, shape, box, drift, trajectories=1, kinetic_coeffs=0.5j, diffusion=None, iterations=3, noise_type=None): real_dtype = dtypes.real_for(drift.dtype) state_type = Type(drift.dtype, (trajectories, drift.components) + shape) self._noise = diffusion is not None Computation.__init__(self, [Parameter('output', Annotation(state_type, 'o')), Parameter('input', Annotation(state_type, 'i'))] + ([Parameter('dW', Annotation(noise_type, 'i'))] if self._noise else []) + [Parameter('t', Annotation(real_dtype)), Parameter('dt', Annotation(real_dtype))]) self._ksquared = get_ksquared(shape, box).astype(real_dtype) # '/2' because we want to propagate only to dt/2 kprop_trf = get_kprop_trf(state_type, self._ksquared, kinetic_coeffs / 2, exp=True) self._fft = FFT(state_type, axes=range(2, len(state_type.shape))) self._fft_with_kprop = FFT(state_type, axes=range(2, len(state_type.shape))) self._fft_with_kprop.parameter.output.connect( kprop_trf, kprop_trf.input, output_prime=kprop_trf.output, ksquared=kprop_trf.ksquared, dt=kprop_trf.dt) self._prop_iter = get_prop_iter( state_type, drift, iterations, diffusion=diffusion, noise_type=noise_type)
def __init__(self, arr_t): out_arr = Type(dtypes.real_for(arr_t.dtype), arr_t.shape[:-1] + (arr_t.shape[-1] * 2, )) Computation.__init__(self, [ Parameter('output', Annotation(out_arr, 'o')), Parameter('input', Annotation(arr_t, 'i')) ])
def __init__(self, arr_t): out_arr = Type( dtypes.real_for(arr_t.dtype), arr_t.shape[:-1] + (arr_t.shape[-1] * 2,)) Computation.__init__(self, [ Parameter('output', Annotation(out_arr, 'o')), Parameter('input', Annotation(arr_t, 'i'))])
def get_nonlinear_wrapper(state_dtype, grid_dims, drift, diffusion=None): real_dtype = dtypes.real_for(state_dtype) if diffusion is not None: noise_dtype = diffusion.dtype else: noise_dtype = real_dtype return Module.create( """ <% components = drift.components idx_args = ["idx_" + str(dim) for dim in range(grid_dims)] psi_args = ["psi_" + str(comp) for comp in range(components)] if diffusion is not None: dW_args = ["dW_" + str(ncomp) for ncomp in range(diffusion.noise_sources)] %> %for comp in range(components): INLINE WITHIN_KERNEL ${s_ctype} ${prefix}${comp}( %for idx in idx_args: const int ${idx}, %endfor %for psi in psi_args: const ${s_ctype} ${psi}, %endfor %if diffusion is not None: %for dW in dW_args: const ${n_ctype} ${dW}, %endfor %endif const ${r_ctype} t, const ${r_ctype} dt) { return ${mul_sr}(${drift.module}${comp}( ${", ".join(idx_args)}, ${", ".join(psi_args)}, t), dt) %if diffusion is not None: %for ncomp in range(diffusion.noise_sources): + ${mul_sn}(${diffusion.module}${comp}_${ncomp}( ${", ".join(idx_args)}, ${", ".join(psi_args)}, t), ${dW_args[ncomp]}) %endfor %endif ; } %endfor """, render_kwds=dict( grid_dims=grid_dims, s_ctype=dtypes.ctype(state_dtype), r_ctype=dtypes.ctype(real_dtype), n_ctype=dtypes.ctype(noise_dtype), mul_sr=functions.mul(state_dtype, real_dtype), mul_sn=functions.mul(state_dtype, noise_dtype), drift=drift, diffusion=diffusion))
def test_pow(thr, out_code, in_codes): out_dtype, in_dtypes = generate_dtypes(out_code, in_codes) if len(in_dtypes) == 1: func = functions.pow(in_dtypes[0]) if dtypes.is_real(in_dtypes[0]): in_dtypes.append(in_dtypes[0]) else: in_dtypes.append(dtypes.real_for(in_dtypes[0])) else: func = functions.pow(in_dtypes[0], power_dtype=in_dtypes[1]) check_func(thr, func, numpy.power, out_dtype, in_dtypes)
def get_nonlinear3(state_type, nonlinear_wrapper, components, diffusion=None, noise_type=None): real_dtype = dtypes.real_for(state_type.dtype) # k4 = N(D(psi_4), t + dt) # output = D(psi_k) + k4 / 6 return PureParallel( [ Parameter('output', Annotation(state_type, 'o')), Parameter('kprop_psi_k', Annotation(state_type, 'i')), Parameter('kprop_psi_4', Annotation(state_type, 'i'))] + ([Parameter('dW', Annotation(noise_type, 'i'))] if diffusion is not None else []) + [Parameter('t', Annotation(real_dtype)), Parameter('dt', Annotation(real_dtype))], """ <% if diffusion is None: dW = None coords = ", ".join(idxs[1:]) trajectory = idxs[0] args = lambda prefix, num: list(map(lambda i: prefix + str(i), range(num))) dW_args = args('dW_', diffusion.noise_sources) if diffusion is not None else [] k4_args = ", ".join(idxs[1:] + args('psi4_', components) + dW_args) %> %for comp in range(components): ${output.ctype} psi4_${comp} = ${kprop_psi_4.load_idx}(${trajectory}, ${comp}, ${coords}); ${output.ctype} psik_${comp} = ${kprop_psi_k.load_idx}(${trajectory}, ${comp}, ${coords}); %endfor %if diffusion is not None: %for ncomp in range(diffusion.noise_sources): ${dW.ctype} dW_${ncomp} = ${dW.load_idx}(${trajectory}, ${ncomp}, ${coords}); %endfor %endif %for comp in range(components): ${output.ctype} k4_${comp} = ${nonlinear}${comp}(${k4_args}, ${t} + ${dt}, ${dt}); %endfor %for comp in range(components): ${output.store_idx}( ${trajectory}, ${comp}, ${coords}, psik_${comp} + ${div}(k4_${comp}, 6)); %endfor """, guiding_array=(state_type.shape[0],) + state_type.shape[2:], render_kwds=dict( components=components, nonlinear=nonlinear_wrapper, diffusion=diffusion, div=functions.div(state_type.dtype, numpy.int32, out_dtype=state_type.dtype)))
def pow(dtype, exponent_dtype=None, output_dtype=None): """ Returns a :py:class:`~reikna.cluda.Module` with a function of two arguments that raises the first argument of type ``dtype`` to the power of the second argument of type ``exponent_dtype`` (an integer or real data type). If ``exponent_dtype`` or ``output_dtype`` are not given, they default to ``dtype``. If ``dtype`` is not the same as ``output_dtype``, the input is cast to ``output_dtype`` *before* exponentiation. If ``exponent_dtype`` is real, but both ``dtype`` and ``output_dtype`` are integer, a ``ValueError`` is raised. """ if exponent_dtype is None: exponent_dtype = dtype if output_dtype is None: output_dtype = dtype if dtypes.is_complex(exponent_dtype): raise NotImplementedError("pow() with a complex exponent is not supported") if dtypes.is_real(exponent_dtype): if dtypes.is_complex(output_dtype): exponent_dtype = dtypes.real_for(output_dtype) elif dtypes.is_real(output_dtype): exponent_dtype = output_dtype else: raise ValueError("pow(integer, float): integer is not supported") kwds = dict( dtype=dtype, exponent_dtype=exponent_dtype, output_dtype=output_dtype, div_=None, mul_=None, cast_=None, polar_=None) if output_dtype != dtype: kwds['cast_'] = cast(output_dtype, dtype) if dtypes.is_integer(exponent_dtype) and not dtypes.is_real(output_dtype): kwds['mul_'] = mul(output_dtype, output_dtype) kwds['div_'] = div(output_dtype, output_dtype) if dtypes.is_complex(output_dtype): kwds['polar_'] = polar(dtypes.real_for(output_dtype)) return Module(TEMPLATE.get_def('pow'), render_kwds=kwds)
def get_nonlinear(dtype, interaction, tunneling): r""" Nonlinear module .. math:: N(\psi_1, ... \psi_C) = \sum_{n=1}^{C} U_{jn} |\psi_n|^2 \psi_j - \nu_j psi_{m_j} ``interaction``: a symmetrical ``components x components`` array with interaction strengths. ``tunneling``: a list of (other_comp, coeff) pairs of tunnelling strengths. """ c_dtype = dtype c_ctype = dtypes.ctype(c_dtype) s_dtype = dtypes.real_for(dtype) s_ctype = dtypes.ctype(s_dtype) return Module.create( """ %for comp in range(components): INLINE WITHIN_KERNEL ${c_ctype} ${prefix}${comp}( %for pcomp in range(components): ${c_ctype} psi${pcomp}, %endfor ${s_ctype} V, ${s_ctype} t) { return ( ${mul}(psi${comp}, ( %for other_comp in range(components): + ${dtypes.c_constant(interaction[comp, other_comp], s_dtype)} * ${norm}(psi${other_comp}) %endfor + V )) - ${mul}( psi${tunneling[comp][0]}, ${dtypes.c_constant(tunneling[comp][1], s_dtype)}) ); } %endfor """, render_kwds=dict( components=interaction.shape[0], mul=functions.mul(c_dtype, s_dtype), norm=functions.norm(c_dtype), interaction=interaction, tunneling=tunneling, s_dtype=s_dtype, c_ctype=c_ctype, s_ctype=s_ctype))
def __init__(self, state_arr, dt, box=None, kinetic_coeff=1, nonlinear_module=None): scalar_dtype = dtypes.real_for(state_arr.dtype) Computation.__init__(self, [ Parameter('output', Annotation(state_arr, 'o')), Parameter('input', Annotation(state_arr, 'i')), Parameter('t', Annotation(scalar_dtype)) ]) self._box = box self._kinetic_coeff = kinetic_coeff self._nonlinear_module = nonlinear_module self._components = state_arr.shape[0] self._ensembles = state_arr.shape[1] self._grid_shape = state_arr.shape[2:] ksquared = get_ksquared(self._grid_shape, self._box) self._kprop = numpy.exp( ksquared * (-1j * kinetic_coeff * dt / 2)).astype(state_arr.dtype) self._kprop_trf = Transformation( [ Parameter('output', Annotation(state_arr, 'o')), Parameter('input', Annotation(state_arr, 'i')), Parameter('kprop', Annotation(self._kprop, 'i')) ], """ ${kprop.ctype} kprop_coeff = ${kprop.load_idx}(${', '.join(idxs[2:])}); ${output.store_same}(${mul}(${input.load_same}, kprop_coeff)); """, render_kwds=dict( mul=functions.mul(state_arr.dtype, self._kprop.dtype))) self._fft = FFT(state_arr, axes=range(2, len(state_arr.shape))) self._fft_with_kprop = FFT(state_arr, axes=range(2, len(state_arr.shape))) self._fft_with_kprop.parameter.output.connect( self._kprop_trf, self._kprop_trf.input, output_prime=self._kprop_trf.output, kprop=self._kprop_trf.kprop) nonlinear_wrapper = get_nonlinear_wrapper(state_arr.dtype, nonlinear_module, dt) self._N1 = get_nonlinear1(state_arr, scalar_dtype, nonlinear_wrapper) self._N2 = get_nonlinear2(state_arr, scalar_dtype, nonlinear_wrapper, dt) self._N3 = get_nonlinear3(state_arr, scalar_dtype, nonlinear_wrapper, dt)
def _build_plan(self, plan_factory, device_params, output, input_): plan = plan_factory() N = input_.shape[-1] * 4 batch_shape = input_.shape[:-1] batch_size = helpers.product(batch_shape) # The first element is unused coeffs = numpy.concatenate( [[0], 1 / (4 * numpy.sin(2 * numpy.pi * numpy.arange(1, N // 2) / N))]) coeffs_arr = plan.persistent_array(coeffs) prepare_iprfft_input = get_prepare_iprfft_input(input_) prepare_iprfft_output = get_prepare_iprfft_output(output) irfft = IRFFT(prepare_iprfft_input.Y) irfft.parameter.input.connect(prepare_iprfft_input, prepare_iprfft_input.Y, X=prepare_iprfft_input.X) irfft.parameter.output.connect(prepare_iprfft_output, prepare_iprfft_output.y, x=prepare_iprfft_output.x, x0=prepare_iprfft_output.x0, coeffs=prepare_iprfft_output.coeffs) real = Transformation([ Parameter( 'output', Annotation(Type(dtypes.real_for(input_.dtype), input_.shape), 'o')), Parameter('input', Annotation(input_, 'i')), ], """ ${output.store_same}((${input.load_same}).x); """, connectors=['output']) rd_t = Type(output.dtype, input_.shape) rd = Reduce(rd_t, predicate_sum(rd_t.dtype), axes=(len(input_.shape) - 1, )) rd.parameter.input.connect(real, real.output, X=real.input) x0 = plan.temp_array_like(rd.parameter.output) plan.computation_call(rd, x0, input_) plan.computation_call(irfft, output, x0, coeffs_arr, input_) return plan
def split_complex(input_arr_t): """ Returns a transformation that splits complex input into two real outputs (2 outputs, 1 input): ``real = Re(input), imag = Im(input)``. """ output_t = Type(dtypes.real_for(input_arr_t.dtype), shape=input_arr_t.shape) return Transformation( [Parameter('real', Annotation(output_t, 'o')), Parameter('imag', Annotation(output_t, 'o')), Parameter('input', Annotation(input_arr_t, 'i'))], """ ${real.store_same}(${input.load_same}.x); ${imag.store_same}(${input.load_same}.y); """)
def exp(dtype): """ Returns a :py:class:`~reikna.cluda.Module` with a function of one argument that exponentiates the value of type ``dtype`` (must be a real or complex data type). """ if dtypes.is_integer(dtype): raise NotImplementedError("exp() of " + str(dtype) + " is not supported") if dtypes.is_real(dtype): polar_unit_ = None else: polar_unit_ = polar_unit(dtypes.real_for(dtype)) return Module( TEMPLATE.get_def('exp'), render_kwds=dict(dtype=dtype, polar_unit_=polar_unit_))
def prepare_irfft_output(arr): res = Type(dtypes.real_for(arr.dtype), arr.shape[:-1] + (arr.shape[-1] * 2, )) return Transformation([ Parameter('output', Annotation(res, 'o')), Parameter('input', Annotation(arr, 'i')), ], """ <% batch_idxs = " ".join((idx + ", ") for idx in idxs[:-1]) %> ${input.ctype} x = ${input.load_same}; ${output.store_idx}(${batch_idxs} ${idxs[-1]} * 2, x.x); ${output.store_idx}(${batch_idxs} ${idxs[-1]} * 2 + 1, x.y); """, connectors=['output'])
def prepare_irfft_output(arr): res = Type(dtypes.real_for(arr.dtype), arr.shape[:-1] + (arr.shape[-1] * 2,)) return Transformation( [ Parameter('output', Annotation(res, 'o')), Parameter('input', Annotation(arr, 'i')), ], """ <% batch_idxs = " ".join((idx + ", ") for idx in idxs[:-1]) %> ${input.ctype} x = ${input.load_same}; ${output.store_idx}(${batch_idxs} ${idxs[-1]} * 2, x.x); ${output.store_idx}(${batch_idxs} ${idxs[-1]} * 2 + 1, x.y); """, connectors=['output'])
def combine_complex(output_arr_t): """ Returns a transformation that joins two real inputs into complex output (1 output, 2 inputs): ``output = real + 1j * imag``. """ input_t = Type(dtypes.real_for(output_arr_t.dtype), shape=output_arr_t.shape) return Transformation( [Parameter('output', Annotation(output_arr_t, 'o')), Parameter('real', Annotation(input_t, 'i')), Parameter('imag', Annotation(input_t, 'i'))], """ ${output.store_same}( COMPLEX_CTR(${output.ctype})( ${real.load_same}, ${imag.load_same})); """)
def _build_plan(self, plan_factory, device_params, output, input_): plan = plan_factory() N = input_.shape[-1] * 4 batch_shape = input_.shape[:-1] batch_size = helpers.product(batch_shape) # The first element is unused coeffs = numpy.concatenate( [[0], 1 / (4 * numpy.sin(2 * numpy.pi * numpy.arange(1, N//2) / N))]) coeffs_arr = plan.persistent_array(coeffs) prepare_iprfft_input = get_prepare_iprfft_input(input_) prepare_iprfft_output = get_prepare_iprfft_output(output) irfft = IRFFT(prepare_iprfft_input.Y) irfft.parameter.input.connect( prepare_iprfft_input, prepare_iprfft_input.Y, X=prepare_iprfft_input.X) irfft.parameter.output.connect( prepare_iprfft_output, prepare_iprfft_output.y, x=prepare_iprfft_output.x, x0=prepare_iprfft_output.x0, coeffs=prepare_iprfft_output.coeffs) real = Transformation( [ Parameter('output', Annotation(Type(dtypes.real_for(input_.dtype), input_.shape), 'o')), Parameter('input', Annotation(input_, 'i')), ], """ ${output.store_same}((${input.load_same}).x); """, connectors=['output'] ) rd_t = Type(output.dtype, input_.shape) rd = Reduce(rd_t, predicate_sum(rd_t.dtype), axes=(len(input_.shape)-1,)) rd.parameter.input.connect(real, real.output, X=real.input) x0 = plan.temp_array_like(rd.parameter.output) plan.computation_call(rd, x0, input_) plan.computation_call(irfft, output, x0, coeffs_arr, input_) return plan
def __init__(self, state_arr, dt, box=None, kinetic_coeff=1, nonlinear_module=None): scalar_dtype = dtypes.real_for(state_arr.dtype) potential_arr = Type(scalar_dtype, shape=state_arr.shape[2:]) Computation.__init__(self, [ Parameter('output', Annotation(state_arr, 'o')), Parameter('input', Annotation(state_arr, 'i')), Parameter('potential1', Annotation(potential_arr, 'i')), Parameter('potential2', Annotation(potential_arr, 'i')), Parameter('t_potential1', Annotation(scalar_dtype)), Parameter('t_potential2', Annotation(scalar_dtype)), Parameter('t', Annotation(scalar_dtype))]) self._box = box self._kinetic_coeff = kinetic_coeff self._nonlinear_module = nonlinear_module self._components = state_arr.shape[0] self._ensembles = state_arr.shape[1] self._grid_shape = state_arr.shape[2:] ksquared = get_ksquared(self._grid_shape, self._box) self._kprop = numpy.exp(ksquared * (-1j * kinetic_coeff * dt / 2)).astype(state_arr.dtype) self._kprop_trf = Transformation( [ Parameter('output', Annotation(state_arr, 'o')), Parameter('input', Annotation(state_arr, 'i')), Parameter('kprop', Annotation(self._kprop, 'i'))], """ ${kprop.ctype} kprop_coeff = ${kprop.load_idx}(${', '.join(idxs[2:])}); ${output.store_same}(${mul}(${input.load_same}, kprop_coeff)); """, render_kwds=dict(mul=functions.mul(state_arr.dtype, self._kprop.dtype))) self._fft = FFT(state_arr, axes=range(2, len(state_arr.shape))) self._fft_with_kprop = FFT(state_arr, axes=range(2, len(state_arr.shape))) self._fft_with_kprop.parameter.output.connect( self._kprop_trf, self._kprop_trf.input, output_prime=self._kprop_trf.output, kprop=self._kprop_trf.kprop) nonlinear_wrapper = get_nonlinear_wrapper( state_arr.shape[0], state_arr.dtype, nonlinear_module, dt) self._N1 = get_nonlinear1(state_arr, potential_arr, scalar_dtype, nonlinear_wrapper) self._N2 = get_nonlinear2(state_arr, potential_arr, scalar_dtype, nonlinear_wrapper, dt) self._N3 = get_nonlinear3(state_arr, potential_arr, scalar_dtype, nonlinear_wrapper, dt) self._potential_interpolator = get_potential_interpolator(potential_arr, dt)
def __init__(self, shape, box, drift, trajectories=1, kinetic_coeffs=0.5j, diffusion=None, ksquared_cutoff=None, noise_type=None): real_dtype = dtypes.real_for(drift.dtype) state_type = Type(drift.dtype, (trajectories, drift.components) + shape) self._noise = diffusion is not None Computation.__init__(self, [Parameter('output', Annotation(state_type, 'o')), Parameter('input', Annotation(state_type, 'i'))] + ([Parameter('dW', Annotation(noise_type, 'i'))] if self._noise else []) + [Parameter('t', Annotation(real_dtype)), Parameter('dt', Annotation(real_dtype))]) self._ksquared = get_ksquared(shape, box).astype(real_dtype) kprop_trf = get_kprop_trf(state_type, self._ksquared, kinetic_coeffs) self._ksquared_cutoff = ksquared_cutoff if self._ksquared_cutoff is not None: project_trf = get_project_trf(state_type, self._ksquared, ksquared_cutoff) self._fft_with_project = FFT(state_type, axes=range(2, len(state_type.shape))) self._fft_with_project.parameter.output.connect( project_trf, project_trf.input, output_prime=project_trf.output, ksquared=project_trf.ksquared) self._fft = FFT(state_type, axes=range(2, len(state_type.shape))) self._fft_with_kprop = FFT(state_type, axes=range(2, len(state_type.shape))) self._fft_with_kprop.parameter.output.connect( kprop_trf, kprop_trf.input, output_prime=kprop_trf.output, ksquared=kprop_trf.ksquared, dt=kprop_trf.dt) self._xpropagate = get_xpropagate( state_type, drift, diffusion=diffusion, noise_type=noise_type) self._ai = numpy.array([ 0.0, -0.737101392796, -1.634740794341, -0.744739003780, -1.469897351522, -2.813971388035]) self._bi = numpy.array([ 0.032918605146, 0.823256998200, 0.381530948900, 0.200092213184, 1.718581042715, 0.27]) self._ci = numpy.array([ 0.0, 0.032918605146, 0.249351723343, 0.466911705055, 0.582030414044, 0.847252983783])
def get_nonlinear1(state_type, nonlinear_wrapper, components, diffusion=None, noise_type=None): real_dtype = dtypes.real_for(state_type.dtype) # output = N(input) return PureParallel( [ Parameter('output', Annotation(state_type, 'o')), Parameter('input', Annotation(state_type, 'i'))] + ([Parameter('dW', Annotation(noise_type, 'i'))] if diffusion is not None else []) + [Parameter('t', Annotation(real_dtype)), Parameter('dt', Annotation(real_dtype))], """ <% if diffusion is None: dW = None coords = ", ".join(idxs[1:]) trajectory = idxs[0] args = lambda prefix, num: list(map(lambda i: prefix + str(i), range(num))) dW_args = args('dW_', diffusion.noise_sources) if diffusion is not None else [] n_args = ", ".join(idxs[1:] + args('psi_', components) + dW_args) %> %for comp in range(components): ${output.ctype} psi_${comp} = ${input.load_idx}(${trajectory}, ${comp}, ${coords}); %endfor %if diffusion is not None: %for ncomp in range(diffusion.noise_sources): ${dW.ctype} dW_${ncomp} = ${dW.load_idx}(${trajectory}, ${ncomp}, ${coords}); %endfor %endif %for comp in range(components): ${output.store_idx}( ${trajectory}, ${comp}, ${coords}, ${nonlinear}${comp}(${n_args}, ${t}, ${dt})); %endfor """, guiding_array=(state_type.shape[0],) + state_type.shape[2:], render_kwds=dict( components=components, nonlinear=nonlinear_wrapper, diffusion=diffusion))
def __init__(self, shape, box, drift, trajectories=1, diffusion=None): if diffusion is not None: assert diffusion.dtype == drift.dtype assert diffusion.components == drift.components if not diffusion.real_noise or dtypes.is_real(drift.dtype): noise_dtype = drift.dtype else: noise_dtype = dtypes.real_for(drift.dtype) self.noise_type = Type(noise_dtype, (trajectories, diffusion.noise_sources) + shape) self.noise = True cell_volume = product(box) / product(shape) self._noise_normalization = 1. / cell_volume else: self.noise_type = None self.noise = False
def get_nonlinear_wrapper(c_dtype, nonlinear_module, dt): s_dtype = dtypes.real_for(c_dtype) return Module.create(""" %for comp in (0, 1): INLINE WITHIN_KERNEL ${c_ctype} ${prefix}${comp}( ${c_ctype} psi0, ${c_ctype} psi1, ${s_ctype} t) { ${c_ctype} nonlinear = ${nonlinear}${comp}(psi0, psi1, t); return ${mul}( COMPLEX_CTR(${c_ctype})(0, -${dt}), nonlinear); } %endfor """, render_kwds=dict(c_ctype=dtypes.ctype(c_dtype), s_ctype=dtypes.ctype(s_dtype), mul=functions.mul(c_dtype, c_dtype), dt=dtypes.c_constant(dt, s_dtype), nonlinear=nonlinear_module))
def normal_bm(bijection, dtype, mean=0, std=1): """ Generates normally distributed random numbers with the mean ``mean`` and the standard deviation ``std`` using Box-Muller transform. Supported dtypes: ``float(32/64)``, ``complex(64/128)``. Produces two random numbers per call for real types and one number for complex types. Returns a :py:class:`~reikna.cbrng.samplers.Sampler` object. .. note:: In case of a complex ``dtype``, ``std`` refers to the standard deviation of the complex numbers (same as ``numpy.std()`` returns), not real and imaginary components (which will be normally distributed with the standard deviation ``std / sqrt(2)``). Consequently, while ``mean`` is of type ``dtype``, ``std`` must be real. """ if dtypes.is_complex(dtype): r_dtype = dtypes.real_for(dtype) c_dtype = dtype else: r_dtype = dtype c_dtype = dtypes.complex_for(dtype) uf = uniform_float(bijection, r_dtype, low=0, high=1) module = Module(TEMPLATE.get_def("normal_bm"), render_kwds=dict(complex_res=dtypes.is_complex(dtype), r_dtype=r_dtype, r_ctype=dtypes.ctype(r_dtype), c_dtype=c_dtype, c_ctype=dtypes.ctype(c_dtype), polar_unit=functions.polar_unit(r_dtype), bijection=bijection, mean=mean, std=std, uf=uf)) return Sampler(bijection, module, dtype, deterministic=uf.deterministic, randoms_per_call=1 if dtypes.is_complex(dtype) else 2)
def __init__(self, shape, drift, trajectories=1, diffusion=None, iterations=3, noise_type=None): if dtypes.is_complex(drift.dtype): real_dtype = dtypes.real_for(drift.dtype) else: real_dtype = drift.dtype state_type = Type(drift.dtype, (trajectories, drift.components) + shape) self._noise = diffusion is not None Computation.__init__(self, [Parameter('output', Annotation(state_type, 'o')), Parameter('input', Annotation(state_type, 'i'))] + ([Parameter('dW', Annotation(noise_type, 'i'))] if self._noise else []) + [Parameter('t', Annotation(real_dtype)), Parameter('dt', Annotation(real_dtype))]) self._prop_iter = get_prop_iter( state_type, drift, iterations, diffusion=diffusion, noise_type=noise_type)
def normal_bm(bijection, dtype, mean=0, std=1): """ Generates normally distributed random numbers with the mean ``mean`` and the standard deviation ``std`` using Box-Muller transform. Supported dtypes: ``float(32/64)``, ``complex(64/128)``. Produces two random numbers per call for real types and one number for complex types. Returns a :py:class:`~reikna.cbrng.samplers.Sampler` object. .. note:: In case of a complex ``dtype``, ``std`` refers to the standard deviation of the complex numbers (same as ``numpy.std()`` returns), not real and imaginary components (which will be normally distributed with the standard deviation ``std / sqrt(2)``). Consequently, while ``mean`` is of type ``dtype``, ``std`` must be real. """ if dtypes.is_complex(dtype): r_dtype = dtypes.real_for(dtype) c_dtype = dtype else: r_dtype = dtype c_dtype = dtypes.complex_for(dtype) uf = uniform_float(bijection, r_dtype, low=0, high=1) module = Module( TEMPLATE.get_def("normal_bm"), render_kwds=dict( complex_res=dtypes.is_complex(dtype), r_dtype=r_dtype, r_ctype=dtypes.ctype(r_dtype), c_dtype=c_dtype, c_ctype=dtypes.ctype(c_dtype), polar_unit=functions.polar_unit(r_dtype), bijection=bijection, mean=mean, std=std, uf=uf)) return Sampler( bijection, module, dtype, deterministic=uf.deterministic, randoms_per_call=1 if dtypes.is_complex(dtype) else 2)
def get_prepare_prfft_scan(output): return Transformation( [ Parameter('output', Annotation(output, 'o')), Parameter('Y', Annotation(output, 'i')), Parameter('re_X_0', Annotation( Type(dtypes.real_for(output.dtype), output.shape[:-1]), 'i')) ], """ ${Y.ctype} Y = ${Y.load_same}; Y = COMPLEX_CTR(${Y.ctype})(Y.y, -Y.x); if (${idxs[-1]} == 0) { Y.x = Y.x / 2 + ${re_X_0.load_idx}(${", ".join(idxs[:-1])}); Y.y /= 2; } ${output.store_same}(Y); """, connectors=['output', 'Y'], )
def get_diffusion(state_dtype, gamma): return Diffusion( Module.create( """ <% r_dtype = dtypes.real_for(s_dtype) s_ctype = dtypes.ctype(s_dtype) r_ctype = dtypes.ctype(r_dtype) %> INLINE WITHIN_KERNEL ${s_ctype} ${prefix}0_0( const int idx_x, const ${s_ctype} psi, ${r_ctype} t) { return COMPLEX_CTR(${s_ctype})(${numpy.sqrt(gamma)}, 0); } """, render_kwds=dict( mul_cr=functions.mul(state_dtype, dtypes.real_for(state_dtype)), s_dtype=state_dtype, gamma=gamma)), state_dtype, components=1, noise_sources=1)
def _build_plan(self, plan_factory, _device_params, output_arr, input_arr): plan = plan_factory() dtype = input_arr.dtype p_dtype = dtypes.real_for(dtype) if dtypes.is_complex(dtype) else dtype mode_shape = input_arr.shape if self._inverse else output_arr.shape current_mem = input_arr seq_axes = list(range(len(input_arr.shape))) current_axes = list(range(len(input_arr.shape))) for i, axis in enumerate(self._axes): current_mem, current_axes = self._add_transpose(plan, current_mem, current_axes, axis) tr_matrix = plan.persistent_array( self._get_transformation_matrix(p_dtype, mode_shape[axis], self._add_points[axis])) dot = MatrixMul(current_mem, tr_matrix) if i == len(self._axes) - 1 and current_axes == seq_axes: dot_output = output_arr else: # Cannot write to output if it is not the last transform, # or if we need to return to the initial axes order dot_output = plan.temp_array_like(dot.parameter.output) plan.computation_call(dot, dot_output, current_mem, tr_matrix) current_mem = dot_output # If we ended up with the wrong order of axes, # return to the original order. if current_axes != seq_axes: tr_axes = [current_axes.index(i) for i in range(len(current_axes))] transpose = Transpose(current_mem, output_arr_t=output_arr, axes=tr_axes) plan.add_computation(transpose, output_arr, current_mem) return plan
def _build_plan(self, plan_factory, device_params, output, alpha, beta): plan = plan_factory() samples, modes = alpha.shape for_reduction = Type(alpha.dtype, (samples, self._max_total_clicks + 1)) prepared_state = plan.temp_array_like(alpha) plan.kernel_call( TEMPLATE.get_def("compound_click_probability_prepare"), [prepared_state, alpha, beta], kernel_name="compound_click_probability_prepare", global_size=alpha.shape, render_kwds=dict( mul_cc=functions.mul(alpha.dtype, alpha.dtype), exp_c=functions.exp(alpha.dtype), )) # Block size is limited by the amount of available local memory. # In some OpenCL implementations the number reported cannot actually be fully used # (because it's used by kernel arguments), so we're padding it a little. local_mem_size = device_params.local_mem_size max_elems = (local_mem_size - 256) // alpha.dtype.itemsize block_size = 2**helpers.log2(max_elems) # No reason to have block size larger than the number of modes block_size = min(block_size, helpers.bounding_power_of_2(modes)) products_gsize = (samples, helpers.min_blocks(self._max_total_clicks + 1, block_size) * block_size) products = plan.temp_array_like(for_reduction) read_size = min(block_size, device_params.max_work_group_size) while read_size > 1: full_steps = modes // block_size remainder_size = modes % block_size try: plan.kernel_call( TEMPLATE.get_def("compound_click_probability_aggregate"), [products, prepared_state], kernel_name="compound_click_probability_aggregate", global_size=products_gsize, local_size=(1, read_size,), render_kwds=dict( block_size=block_size, read_size=read_size, full_steps=full_steps, remainder_size=remainder_size, output_size=self._max_total_clicks + 1, mul_cc=functions.mul(alpha.dtype, alpha.dtype), add_cc=functions.add(alpha.dtype, alpha.dtype), polar_unit=functions.polar_unit(dtypes.real_for(alpha.dtype)), modes=self._system.modes, max_total_clicks=self._max_total_clicks, )) except OutOfResourcesError: read_size //= 2 break reduction = Reduce(for_reduction, predicate_sum(alpha.dtype), axes=(0,)) temp = plan.temp_array_like(reduction.parameter.output) plan.computation_call(reduction, temp, products) fft = FFT(temp) real_trf = Transformation([ Parameter('output', Annotation(output, 'o')), Parameter('input', Annotation(temp, 'i')), ], """ ${input.ctype} val = ${input.load_same}; ${output.store_same}(val.x); """) fft.parameter.output.connect(real_trf, real_trf.input, output_p=real_trf.output) plan.computation_call(fft, output, temp, True) return plan
def get_nonlinear2(state_type, nonlinear_wrapper, components, diffusion=None, noise_type=None): real_dtype = dtypes.real_for(state_type.dtype) # k2 = N(psi_I + k1 / 2, t + dt / 2) # k3 = N(psi_I + k2 / 2, t + dt / 2) # psi_4 = psi_I + k3 (argument for the 4-th step k-propagation) # psi_k = psi_I + (k1 + 2(k2 + k3)) / 6 (argument for the final k-propagation) return PureParallel( [ Parameter('psi_k', Annotation(state_type, 'o')), Parameter('psi_4', Annotation(state_type, 'o')), Parameter('psi_I', Annotation(state_type, 'i')), Parameter('k1', Annotation(state_type, 'i'))] + ([Parameter('dW', Annotation(noise_type, 'i'))] if diffusion is not None else []) + [Parameter('t', Annotation(real_dtype)), Parameter('dt', Annotation(real_dtype))], """ <% if diffusion is None: dW = None coords = ", ".join(idxs[1:]) trajectory = idxs[0] args = lambda prefix, num: ", ".join(map(lambda i: prefix + str(i), range(num))) dW_args = (args('dW_', diffusion.noise_sources) + ",") if diffusion is not None else "" %> %for comp in range(components): ${psi_k.ctype} psi_I_${comp} = ${psi_I.load_idx}(${trajectory}, ${comp}, ${coords}); ${psi_k.ctype} k1_${comp} = ${k1.load_idx}(${trajectory}, ${comp}, ${coords}); %endfor %if diffusion is not None: %for ncomp in range(diffusion.noise_sources): ${dW.ctype} dW_${ncomp} = ${dW.load_idx}(${trajectory}, ${ncomp}, ${coords}); %endfor %endif %for comp in range(components): ${psi_k.ctype} k2_${comp} = ${nonlinear}${comp}( ${coords}, %for c in range(components): psi_I_${c} + ${div}(k1_${c}, 2), %endfor ${dW_args} ${t} + ${dt} / 2, ${dt}); %endfor %for comp in range(components): ${psi_k.ctype} k3_${comp} = ${nonlinear}${comp}( ${coords}, %for c in range(components): psi_I_${c} + ${div}(k2_${c}, 2), %endfor ${dW_args} ${t} + ${dt} / 2, ${dt}); %endfor %for comp in range(components): ${psi_4.store_idx}(${trajectory}, ${comp}, ${coords}, psi_I_${comp} + k3_${comp}); %endfor %for comp in range(components): ${psi_k.store_idx}( ${trajectory}, ${comp}, ${coords}, psi_I_${comp} + ${div}(k1_${comp}, 6) + ${div}(k2_${comp}, 3) + ${div}(k3_${comp}, 3)); %endfor """, guiding_array=(state_type.shape[0],) + state_type.shape[2:], render_kwds=dict( components=components, nonlinear=nonlinear_wrapper, diffusion=diffusion, div=functions.div(state_type.dtype, numpy.int32, out_dtype=state_type.dtype)))
def get_prop_iter(state_type, drift, iterations, diffusion=None, noise_type=None): if dtypes.is_complex(state_type.dtype): real_dtype = dtypes.real_for(state_type.dtype) else: real_dtype = state_type.dtype if diffusion is not None: noise_dtype = noise_type.dtype else: noise_dtype = real_dtype return PureParallel( [ Parameter('output', Annotation(state_type, 'o')), Parameter('input', Annotation(state_type, 'i'))] + ([Parameter('dW', Annotation(noise_type, 'i'))] if diffusion is not None else []) + [Parameter('t', Annotation(real_dtype)), Parameter('dt', Annotation(real_dtype))], """ <% coords = ", ".join(idxs[1:]) trajectory = idxs[0] components = drift.components if diffusion is not None: noise_sources = diffusion.noise_sources psi_args = ", ".join("psi_" + str(c) + "_tmp" for c in range(components)) if diffusion is None: dW = None %> %for comp in range(components): ${output.ctype} psi_${comp} = ${input.load_idx}(${trajectory}, ${comp}, ${coords}); ${output.ctype} psi_${comp}_tmp = psi_${comp}; ${output.ctype} dpsi_${comp}; %endfor %if diffusion is not None: %for ncomp in range(noise_sources): ${dW.ctype} dW_${ncomp} = ${dW.load_idx}(${trajectory}, ${ncomp}, ${coords}); %endfor %endif %for i in range(iterations): %for comp in range(components): dpsi_${comp} = ${mul_cr}( ${mul_cr}(${drift.module}${comp}( ${coords}, ${psi_args}, ${t} + ${dt} / 2), ${dt}) %if diffusion is not None: %for ncomp in range(noise_sources): + ${mul_cn}(${diffusion.module}${comp}_${ncomp}( ${coords}, ${psi_args}, ${t} + ${dt} / 2), dW_${ncomp}) %endfor %endif , 0.5); %endfor %for comp in range(components): psi_${comp}_tmp = psi_${comp} + dpsi_${comp}; %endfor %endfor %for comp in range(components): ${output.store_idx}(${trajectory}, ${comp}, ${coords}, psi_${comp}_tmp + dpsi_${comp}); %endfor """, guiding_array=(state_type.shape[0],) + state_type.shape[2:], render_kwds=dict( drift=drift, diffusion=diffusion, iterations=iterations, mul_cr=functions.mul(state_type.dtype, real_dtype), mul_cn=functions.mul(state_type.dtype, noise_dtype)))