コード例 #1
0
ファイル: lwe_gpu.py プロジェクト: stjordanis/nufhe
    def _build_plan(
            self, plan_factory, device_params,
            result_a, result_b, result_cv, messages, key, noises_a, noises_b):

        plan = plan_factory()

        mul_key = MatrixMulVector(noises_a)

        fill_b_cv = Transformation([
            Parameter('result_b', Annotation(result_b, 'o')),
            Parameter('result_cv', Annotation(result_cv, 'o')),
            Parameter('messages', Annotation(messages, 'i')),
            Parameter('noises_a_times_key', Annotation(noises_b, 'i')),
            Parameter('noises_b', Annotation(noises_b, 'i'))],
            """
            ${result_b.store_same}(
                ${noises_b.load_same}
                + ${messages.load_same}
                + ${noises_a_times_key.load_same});
            ${result_cv.store_same}(${noise**2});
            """,
            connectors=['noises_a_times_key'],
            render_kwds=dict(noise=self._noise))

        mul_key.parameter.output.connect(
            fill_b_cv, fill_b_cv.noises_a_times_key,
            b=fill_b_cv.result_b, cv=fill_b_cv.result_cv, messages=fill_b_cv.messages,
            noises_b=fill_b_cv.noises_b)

        plan.computation_call(mul_key, result_b, result_cv, messages, noises_b, noises_a, key)
        plan.computation_call(
            PureParallel.from_trf(transformations.copy(noises_a)),
            result_a, noises_a)

        return plan
コード例 #2
0
    def _build_plan(self, plan_factory, device_params, output, input_,
                    inverse):

        if helpers.product([input_.shape[i] for i in self._axes]) == 1:
            return self._build_trivial_plan(plan_factory, output, input_)

        plan = plan_factory()

        axes = tuple(sorted(self._axes))
        shape = list(input_.shape)

        if all(shape[axis] % 2 == 0 for axis in axes):
            # If all shift axes have even length, it is possible to perform the shift inplace
            # (by swapping pairs of elements).
            # Note that the inplace fftshift is its own inverse.
            shape[axes[0]] //= 2
            plan.kernel_call(TEMPLATE.get_def('fftshift_inplace'),
                             [output, input_],
                             kernel_name="kernel_fftshift_inplace",
                             global_size=shape,
                             render_kwds=dict(axes=axes))
        else:
            # Resort to an out-of-place shift to a temporary array and then copy.
            temp = plan.temp_array_like(output)
            plan.kernel_call(TEMPLATE.get_def('fftshift_outplace'),
                             [temp, input_, inverse],
                             kernel_name="kernel_fftshift_outplace",
                             global_size=shape,
                             render_kwds=dict(axes=axes))

            copy_trf = copy(input_, out_arr_t=output)
            copy_comp = PureParallel.from_trf(copy_trf, copy_trf.input)
            plan.computation_call(copy_comp, output, temp)

        return plan
コード例 #3
0
ファイル: fftshift.py プロジェクト: fjarri/reikna
    def _build_plan(self, plan_factory, device_params, output, input_, inverse):

        if helpers.product([input_.shape[i] for i in self._axes]) == 1:
            return self._build_trivial_plan(plan_factory, output, input_)

        plan = plan_factory()

        axes = tuple(sorted(self._axes))
        shape = list(input_.shape)

        if all(shape[axis] % 2 == 0 for axis in axes):
        # If all shift axes have even length, it is possible to perform the shift inplace
        # (by swapping pairs of elements).
        # Note that the inplace fftshift is its own inverse.
            shape[axes[0]] //= 2
            plan.kernel_call(
                TEMPLATE.get_def('fftshift_inplace'), [output, input_],
                kernel_name="kernel_fftshift_inplace",
                global_size=shape,
                render_kwds=dict(axes=axes))
        else:
        # Resort to an out-of-place shift to a temporary array and then copy.
            temp = plan.temp_array_like(output)
            plan.kernel_call(
                TEMPLATE.get_def('fftshift_outplace'), [temp, input_, inverse],
                kernel_name="kernel_fftshift_outplace",
                global_size=shape,
                render_kwds=dict(axes=axes))

            copy_trf = copy(input_, out_arr_t=output)
            copy_comp = PureParallel.from_trf(copy_trf, copy_trf.input)
            plan.computation_call(copy_comp, output, temp)

        return plan
コード例 #4
0
ファイル: test_pureparallel.py プロジェクト: fjarri/reikna
    def __init__(self, arr):

        copy_trf = copy(arr, out_arr_t=arr)
        self._copy_comp = PureParallel.from_trf(copy_trf, copy_trf.input)

        Computation.__init__(self, [
            Parameter('outer_output', Annotation(arr, 'o')),
            Parameter('outer_input', Annotation(arr, 'i'))])
コード例 #5
0
ファイル: test_pureparallel.py プロジェクト: xexo7C8/reikna
    def __init__(self, arr):

        copy_trf = copy(arr, out_arr_t=arr)
        self._copy_comp = PureParallel.from_trf(copy_trf, copy_trf.input)

        Computation.__init__(self, [
            Parameter('outer_output', Annotation(arr, 'o')),
            Parameter('outer_input', Annotation(arr, 'i'))
        ])
コード例 #6
0
ファイル: fft.py プロジェクト: ringw/reikna
    def _build_trivial_plan(self, plan_factory, output, input_):
        # Trivial problem. Need to add a dummy kernel
        # because we still have to run transformations.

        plan = plan_factory()

        copy_trf = copy(input_, out_arr_t=output)
        copy_comp = PureParallel.from_trf(copy_trf, copy_trf.input)
        plan.computation_call(copy_comp, output, input_)

        return plan
コード例 #7
0
ファイル: array_helpers.py プロジェクト: fjarri/reikna
    def _build_plan(self, plan_factory, device_params, array, shift):
        plan = plan_factory()

        temp = plan.temp_array_like(array)
        plan.computation_call(roll_computation(array, self._axis), temp, array, shift)

        tr = transformations.copy(temp, out_arr_t=array)
        copy_comp = PureParallel.from_trf(tr, guiding_array=tr.output)
        plan.computation_call(copy_comp, array, temp)

        return plan
コード例 #8
0
ファイル: fft.py プロジェクト: mgolub2/reikna
    def _build_trivial_plan(self, plan_factory, output, input_):
        # Trivial problem. Need to add a dummy kernel
        # because we still have to run transformations.

        plan = plan_factory()

        copy_trf = copy(input_, out_arr_t=output)
        copy_comp = PureParallel.from_trf(copy_trf, copy_trf.input)
        plan.computation_call(copy_comp, output, input_)

        return plan
コード例 #9
0
ファイル: array_helpers.py プロジェクト: drtpotter/reikna
    def _build_plan(self, plan_factory, device_params, array, shift):
        plan = plan_factory()

        temp = plan.temp_array_like(array)
        plan.computation_call(roll_computation(array, self._axis), temp, array,
                              shift)

        tr = transformations.copy(temp, out_arr_t=array)
        copy_comp = PureParallel.from_trf(tr, guiding_array=tr.output)
        plan.computation_call(copy_comp, array, temp)

        return plan
コード例 #10
0
def test_copy(some_thr, any_dtype):

    input_ = get_test_array((1000,), any_dtype)
    input_dev = some_thr.to_device(input_)
    output_dev = some_thr.empty_like(input_dev)

    test = get_test_computation(input_dev)
    copy = tr.copy(input_dev)

    test.parameter.input.connect(copy, copy.output, input_prime=copy.input)
    test.parameter.output.connect(copy, copy.input, output_prime=copy.output)
    testc = test.compile(some_thr)

    testc(output_dev, input_dev)
    assert diff_is_negligible(output_dev.get(), input_)
コード例 #11
0
def test_copy(some_thr, any_dtype):

    input_ = get_test_array((1000, ), any_dtype)
    input_dev = some_thr.to_device(input_)
    output_dev = some_thr.empty_like(input_dev)

    test = get_test_computation(input_dev)
    copy = tr.copy(input_dev)

    test.parameter.input.connect(copy, copy.output, input_prime=copy.input)
    test.parameter.output.connect(copy, copy.input, output_prime=copy.output)
    testc = test.compile(some_thr)

    testc(output_dev, input_dev)
    assert diff_is_negligible(output_dev.get(), input_)
コード例 #12
0
ファイル: array_helpers.py プロジェクト: fjarri/reikna
def setitem_computation(dest, source):
    """
    Returns a compiled computation that broadcasts ``source`` to ``dest``,
    where ``dest`` is a GPU array, and ``source`` is either a GPU array or a scalar.
    """
    if len(source.shape) == 0:
        trf = transformations.broadcast_param(dest)
        return PureParallel.from_trf(trf, guiding_array=trf.output)
    else:
        source_dt = Type.from_value(source).with_dtype(dest.dtype)
        trf = transformations.copy(source_dt, dest)
        comp = PureParallel.from_trf(trf, guiding_array=trf.output)
        cast_trf = transformations.cast(source, dest.dtype)
        comp.parameter.input.connect(cast_trf, cast_trf.output, src_input=cast_trf.input)
        return comp
コード例 #13
0
ファイル: array_helpers.py プロジェクト: drtpotter/reikna
def setitem_computation(dest, source, is_array):
    """
    Returns a compiled computation that broadcasts ``source`` to ``dest``,
    where ``dest`` is a GPU array, and ``source`` is either a GPU array or a scalar.
    """
    if is_array:
        source_dt = Type.from_value(source).with_dtype(dest.dtype)
        trf = transformations.copy(source_dt, dest)
        comp = PureParallel.from_trf(trf, guiding_array=trf.output)
        cast_trf = transformations.cast(source, dest.dtype)
        comp.parameter.input.connect(cast_trf,
                                     cast_trf.output,
                                     src_input=cast_trf.input)
        return comp
    else:
        trf = transformations.broadcast_param(dest)
        return PureParallel.from_trf(trf, guiding_array=trf.output)
コード例 #14
0
    def __init__(self, arr_t, padding=False, axes=None, **kwargs):
        '''
        Wrapper around `reikna.fft.FFT` with automatic real-to-complex casting
        and optional padding for higher performance.

        Input
        -----
        padding: bool, default=True
            If True, the input array is padded to the next power of two on the 
            transformed axes.
        axes: tuple
            Axes over which to perform the transform. Defaults to all axes.

        Note
        ----
        Because reikna does not allow nodes of the transformation tree with the
        identical names, the input array is called `input_`.
        '''
        if axes is None:
            axes = range(len(arr_t.shape))  # if axes is None else tuple(axes)
        else:
            axes = tuple(v + len(arr_t.shape) if v < 0 else v for v in axes)
        for v in axes:
            if v not in range(0, len(arr_t.shape)):
                raise IndexError('axis is out of range')
        dtype = (arr_t.dtype if dtypes.is_complex(arr_t.dtype) else
                 dtypes.complex_for(arr_t.dtype))
        if padding:
            shape = tuple(1 << int(np.ceil(np.log2(v))) if ax in axes else v
                          for ax, v in enumerate(arr_t.shape))
        else:
            shape = arr_t.shape
        super(FFT, self).__init__(Type(dtype, shape), axes=axes, **kwargs)
        input = self.parameter.input
        if dtype != arr_t.dtype:
            complex_tr = Complex(Type(arr_t.dtype, input.shape))
            input.connect(complex_tr,
                          complex_tr.output,
                          in_real=complex_tr.input)
            input = self.parameter.in_real
        if shape != arr_t.shape:
            pad_tr = Padded(input, arr_t, default='0.')
            input.connect(pad_tr, pad_tr.output, in_padded=pad_tr.input)
            input = self.parameter.in_padded
        copy_tr = copy(input)
        input.connect(copy_tr, copy_tr.output, input_=copy_tr.input)
コード例 #15
0
ファイル: test_transformation.py プロジェクト: xexo7C8/reikna
def test_array_views(thr):

    a = get_test_array((6, 8, 10), numpy.int32)

    a_dev = thr.to_device(a)
    b_dev = thr.empty_like(a)

    in_view = a_dev[2:4, ::2, ::-1]
    out_view = b_dev[4:, 1:5, :]

    move = PureParallel.from_trf(
        transformations.copy(in_view, out_arr_t=out_view),
        guiding_array='output').compile(thr)

    move(out_view, in_view)
    b_res = b_dev.get()[4:, 1:5, :]
    b_ref = a[2:4, ::2, ::-1]

    assert diff_is_negligible(b_res, b_ref)