def _build_plan( self, plan_factory, device_params, result_a, result_b, result_cv, messages, key, noises_a, noises_b): plan = plan_factory() mul_key = MatrixMulVector(noises_a) fill_b_cv = Transformation([ Parameter('result_b', Annotation(result_b, 'o')), Parameter('result_cv', Annotation(result_cv, 'o')), Parameter('messages', Annotation(messages, 'i')), Parameter('noises_a_times_key', Annotation(noises_b, 'i')), Parameter('noises_b', Annotation(noises_b, 'i'))], """ ${result_b.store_same}( ${noises_b.load_same} + ${messages.load_same} + ${noises_a_times_key.load_same}); ${result_cv.store_same}(${noise**2}); """, connectors=['noises_a_times_key'], render_kwds=dict(noise=self._noise)) mul_key.parameter.output.connect( fill_b_cv, fill_b_cv.noises_a_times_key, b=fill_b_cv.result_b, cv=fill_b_cv.result_cv, messages=fill_b_cv.messages, noises_b=fill_b_cv.noises_b) plan.computation_call(mul_key, result_b, result_cv, messages, noises_b, noises_a, key) plan.computation_call( PureParallel.from_trf(transformations.copy(noises_a)), result_a, noises_a) return plan
def _build_plan(self, plan_factory, device_params, output, input_, inverse): if helpers.product([input_.shape[i] for i in self._axes]) == 1: return self._build_trivial_plan(plan_factory, output, input_) plan = plan_factory() axes = tuple(sorted(self._axes)) shape = list(input_.shape) if all(shape[axis] % 2 == 0 for axis in axes): # If all shift axes have even length, it is possible to perform the shift inplace # (by swapping pairs of elements). # Note that the inplace fftshift is its own inverse. shape[axes[0]] //= 2 plan.kernel_call(TEMPLATE.get_def('fftshift_inplace'), [output, input_], kernel_name="kernel_fftshift_inplace", global_size=shape, render_kwds=dict(axes=axes)) else: # Resort to an out-of-place shift to a temporary array and then copy. temp = plan.temp_array_like(output) plan.kernel_call(TEMPLATE.get_def('fftshift_outplace'), [temp, input_, inverse], kernel_name="kernel_fftshift_outplace", global_size=shape, render_kwds=dict(axes=axes)) copy_trf = copy(input_, out_arr_t=output) copy_comp = PureParallel.from_trf(copy_trf, copy_trf.input) plan.computation_call(copy_comp, output, temp) return plan
def _build_plan(self, plan_factory, device_params, output, input_, inverse): if helpers.product([input_.shape[i] for i in self._axes]) == 1: return self._build_trivial_plan(plan_factory, output, input_) plan = plan_factory() axes = tuple(sorted(self._axes)) shape = list(input_.shape) if all(shape[axis] % 2 == 0 for axis in axes): # If all shift axes have even length, it is possible to perform the shift inplace # (by swapping pairs of elements). # Note that the inplace fftshift is its own inverse. shape[axes[0]] //= 2 plan.kernel_call( TEMPLATE.get_def('fftshift_inplace'), [output, input_], kernel_name="kernel_fftshift_inplace", global_size=shape, render_kwds=dict(axes=axes)) else: # Resort to an out-of-place shift to a temporary array and then copy. temp = plan.temp_array_like(output) plan.kernel_call( TEMPLATE.get_def('fftshift_outplace'), [temp, input_, inverse], kernel_name="kernel_fftshift_outplace", global_size=shape, render_kwds=dict(axes=axes)) copy_trf = copy(input_, out_arr_t=output) copy_comp = PureParallel.from_trf(copy_trf, copy_trf.input) plan.computation_call(copy_comp, output, temp) return plan
def __init__(self, arr): copy_trf = copy(arr, out_arr_t=arr) self._copy_comp = PureParallel.from_trf(copy_trf, copy_trf.input) Computation.__init__(self, [ Parameter('outer_output', Annotation(arr, 'o')), Parameter('outer_input', Annotation(arr, 'i'))])
def __init__(self, arr): copy_trf = copy(arr, out_arr_t=arr) self._copy_comp = PureParallel.from_trf(copy_trf, copy_trf.input) Computation.__init__(self, [ Parameter('outer_output', Annotation(arr, 'o')), Parameter('outer_input', Annotation(arr, 'i')) ])
def _build_trivial_plan(self, plan_factory, output, input_): # Trivial problem. Need to add a dummy kernel # because we still have to run transformations. plan = plan_factory() copy_trf = copy(input_, out_arr_t=output) copy_comp = PureParallel.from_trf(copy_trf, copy_trf.input) plan.computation_call(copy_comp, output, input_) return plan
def _build_plan(self, plan_factory, device_params, array, shift): plan = plan_factory() temp = plan.temp_array_like(array) plan.computation_call(roll_computation(array, self._axis), temp, array, shift) tr = transformations.copy(temp, out_arr_t=array) copy_comp = PureParallel.from_trf(tr, guiding_array=tr.output) plan.computation_call(copy_comp, array, temp) return plan
def test_copy(some_thr, any_dtype): input_ = get_test_array((1000,), any_dtype) input_dev = some_thr.to_device(input_) output_dev = some_thr.empty_like(input_dev) test = get_test_computation(input_dev) copy = tr.copy(input_dev) test.parameter.input.connect(copy, copy.output, input_prime=copy.input) test.parameter.output.connect(copy, copy.input, output_prime=copy.output) testc = test.compile(some_thr) testc(output_dev, input_dev) assert diff_is_negligible(output_dev.get(), input_)
def test_copy(some_thr, any_dtype): input_ = get_test_array((1000, ), any_dtype) input_dev = some_thr.to_device(input_) output_dev = some_thr.empty_like(input_dev) test = get_test_computation(input_dev) copy = tr.copy(input_dev) test.parameter.input.connect(copy, copy.output, input_prime=copy.input) test.parameter.output.connect(copy, copy.input, output_prime=copy.output) testc = test.compile(some_thr) testc(output_dev, input_dev) assert diff_is_negligible(output_dev.get(), input_)
def setitem_computation(dest, source): """ Returns a compiled computation that broadcasts ``source`` to ``dest``, where ``dest`` is a GPU array, and ``source`` is either a GPU array or a scalar. """ if len(source.shape) == 0: trf = transformations.broadcast_param(dest) return PureParallel.from_trf(trf, guiding_array=trf.output) else: source_dt = Type.from_value(source).with_dtype(dest.dtype) trf = transformations.copy(source_dt, dest) comp = PureParallel.from_trf(trf, guiding_array=trf.output) cast_trf = transformations.cast(source, dest.dtype) comp.parameter.input.connect(cast_trf, cast_trf.output, src_input=cast_trf.input) return comp
def setitem_computation(dest, source, is_array): """ Returns a compiled computation that broadcasts ``source`` to ``dest``, where ``dest`` is a GPU array, and ``source`` is either a GPU array or a scalar. """ if is_array: source_dt = Type.from_value(source).with_dtype(dest.dtype) trf = transformations.copy(source_dt, dest) comp = PureParallel.from_trf(trf, guiding_array=trf.output) cast_trf = transformations.cast(source, dest.dtype) comp.parameter.input.connect(cast_trf, cast_trf.output, src_input=cast_trf.input) return comp else: trf = transformations.broadcast_param(dest) return PureParallel.from_trf(trf, guiding_array=trf.output)
def __init__(self, arr_t, padding=False, axes=None, **kwargs): ''' Wrapper around `reikna.fft.FFT` with automatic real-to-complex casting and optional padding for higher performance. Input ----- padding: bool, default=True If True, the input array is padded to the next power of two on the transformed axes. axes: tuple Axes over which to perform the transform. Defaults to all axes. Note ---- Because reikna does not allow nodes of the transformation tree with the identical names, the input array is called `input_`. ''' if axes is None: axes = range(len(arr_t.shape)) # if axes is None else tuple(axes) else: axes = tuple(v + len(arr_t.shape) if v < 0 else v for v in axes) for v in axes: if v not in range(0, len(arr_t.shape)): raise IndexError('axis is out of range') dtype = (arr_t.dtype if dtypes.is_complex(arr_t.dtype) else dtypes.complex_for(arr_t.dtype)) if padding: shape = tuple(1 << int(np.ceil(np.log2(v))) if ax in axes else v for ax, v in enumerate(arr_t.shape)) else: shape = arr_t.shape super(FFT, self).__init__(Type(dtype, shape), axes=axes, **kwargs) input = self.parameter.input if dtype != arr_t.dtype: complex_tr = Complex(Type(arr_t.dtype, input.shape)) input.connect(complex_tr, complex_tr.output, in_real=complex_tr.input) input = self.parameter.in_real if shape != arr_t.shape: pad_tr = Padded(input, arr_t, default='0.') input.connect(pad_tr, pad_tr.output, in_padded=pad_tr.input) input = self.parameter.in_padded copy_tr = copy(input) input.connect(copy_tr, copy_tr.output, input_=copy_tr.input)
def test_array_views(thr): a = get_test_array((6, 8, 10), numpy.int32) a_dev = thr.to_device(a) b_dev = thr.empty_like(a) in_view = a_dev[2:4, ::2, ::-1] out_view = b_dev[4:, 1:5, :] move = PureParallel.from_trf( transformations.copy(in_view, out_arr_t=out_view), guiding_array='output').compile(thr) move(out_view, in_view) b_res = b_dev.get()[4:, 1:5, :] b_ref = a[2:4, ::2, ::-1] assert diff_is_negligible(b_res, b_ref)