Esempio n. 1
0
def pow(dtype, power_dtype=None):
    """
    Returns a :py:class:`~reikna.cluda.Module` with a function of two arguments
    that raises the first argument of type ``dtype`` (must be a real or complex data type)
    to the power of the second argument (a corresponding real data type or an integer).
    """
    if dtypes.is_complex(power_dtype):
        raise NotImplementedError("pow() with a complex power is not supported")

    if power_dtype is None:
        if dtypes.is_integer(dtype):
            raise ValueError("Power dtype must be specified for an integer argument")
        elif dtypes.is_real(dtype):
            power_dtype = dtype
        else:
            power_dtype = dtypes.real_for(dtype)

    if dtypes.is_complex(dtype):
        r_dtype = dtypes.real_for(dtype)
    elif dtypes.is_real(dtype):
        r_dtype = dtype
    elif dtypes.is_real(power_dtype):
        r_dtype = power_dtype
    else:
        r_dtype = numpy.float32

    if dtypes.is_integer(dtype) and dtypes.is_real(power_dtype):
        dtype = power_dtype

    return Module(
        TEMPLATE.get_def('pow'),
        render_kwds=dict(
            dtype=dtype, power_dtype=power_dtype,
            mul_=mul(dtype, dtype), div_=div(dtype, dtype),
            polar_=polar(r_dtype)))
Esempio n. 2
0
def norm_const(arr_t, order):
    """
    Returns a transformation that calculates the ``order``-norm
    (1 output, 1 input): ``output = abs(input) ** order``.
    """
    if dtypes.is_complex(arr_t.dtype):
        out_dtype = dtypes.real_for(arr_t.dtype)
    else:
        out_dtype = arr_t.dtype

    return Transformation(
        [
            Parameter('output', Annotation(Type(out_dtype, arr_t.shape), 'o')),
            Parameter('input', Annotation(arr_t, 'i'))],
        """
        ${input.ctype} val = ${input.load_same};
        ${output.ctype} norm = ${norm}(val);
        %if order != 2:
        norm = pow(norm, ${dtypes.c_constant(order / 2, output.dtype)});
        %endif
        ${output.store_same}(norm);
        """,
        render_kwds=dict(
            norm=functions.norm(arr_t.dtype),
            order=order))
Esempio n. 3
0
def hanning_window(arr, NFFT):
    """
    Applies the von Hann window to the rows of a 2D array.
    To account for zero padding (which we do not want to window), NFFT is provided separately.
    """
    if dtypes.is_complex(arr.dtype):
        coeff_dtype = dtypes.real_for(arr.dtype)
    else:
        coeff_dtype = arr.dtype
    return Transformation([
        Parameter('output', Annotation(arr, 'o')),
        Parameter('input', Annotation(arr, 'i')),
    ],
                          """
        ${dtypes.ctype(coeff_dtype)} coeff;
        %if NFFT != output.shape[0]:
        if (${idxs[1]} >= ${NFFT})
        {
            coeff = 1;
        }
        else
        %endif
        {
            coeff = 0.5 * (1 - cos(2 * ${numpy.pi} * ${idxs[-1]} / (${NFFT} - 1)));
        }
        ${output.store_same}(${mul}(${input.load_same}, coeff));
        """,
                          render_kwds=dict(coeff_dtype=coeff_dtype,
                                           NFFT=NFFT,
                                           mul=functions.mul(
                                               arr.dtype, coeff_dtype)))
Esempio n. 4
0
def norm_const(arr_t, order):
    """
    Returns a transformation that calculates the ``order``-norm
    (1 output, 1 input): ``output = abs(input) ** order``.
    """
    if dtypes.is_complex(arr_t.dtype):
        out_dtype = dtypes.real_for(arr_t.dtype)
    else:
        out_dtype = arr_t.dtype

    return Transformation(
        [
            Parameter('output', Annotation(Type(out_dtype, arr_t.shape), 'o')),
            Parameter('input', Annotation(arr_t, 'i'))],
        """
        ${input.ctype} val = ${input.load_same};
        ${output.ctype} norm = ${norm}(val);
        %if order != 2:
        norm = pow(norm, ${dtypes.c_constant(order / 2, output.dtype)});
        %endif
        ${output.store_same}(norm);
        """,
        render_kwds=dict(
            norm=functions.norm(arr_t.dtype),
            order=order))
Esempio n. 5
0
def hanning_window(arr, NFFT):
    """
    Applies the von Hann window to the rows of a 2D array.
    To account for zero padding (which we do not want to window), NFFT is provided separately.
    """
    if dtypes.is_complex(arr.dtype):
        coeff_dtype = dtypes.real_for(arr.dtype)
    else:
        coeff_dtype = arr.dtype
    return Transformation(
        [
            Parameter('output', Annotation(arr, 'o')),
            Parameter('input', Annotation(arr, 'i')),
        ],
        """
        ${dtypes.ctype(coeff_dtype)} coeff;
        %if NFFT != output.shape[0]:
        if (${idxs[1]} >= ${NFFT})
        {
            coeff = 1;
        }
        else
        %endif
        {
            coeff = 0.5 * (1 - cos(2 * ${numpy.pi} * ${idxs[-1]} / (${NFFT} - 1)));
        }
        ${output.store_same}(${mul}(${input.load_same}, coeff));
        """,
        render_kwds=dict(
            coeff_dtype=coeff_dtype, NFFT=NFFT,
            mul=functions.mul(arr.dtype, coeff_dtype)))
Esempio n. 6
0
def get_test_array(shape, dtype, strides=None, no_zeros=False, high=None):
    shape = wrap_in_tuple(shape)
    dtype = dtypes.normalize_type(dtype)

    if dtype.names is not None:
        result = numpy.empty(shape, dtype)
        for name in dtype.names:
            result[name] = get_test_array(shape, dtype[name], no_zeros=no_zeros, high=high)
    else:
        if dtypes.is_integer(dtype):
            low = 1 if no_zeros else 0
            if high is None:
                high = 100 # will work even with signed chars
            get_arr = lambda: numpy.random.randint(low, high, shape).astype(dtype)
        else:
            low = 0.01 if no_zeros else 0
            if high is None:
                high = 1.0
            get_arr = lambda: numpy.random.uniform(low, high, shape).astype(dtype)

        if dtypes.is_complex(dtype):
            result = get_arr() + 1j * get_arr()
        else:
            result = get_arr()

    if strides is not None:
        result = as_strided(result, result.shape, strides)

    return result
Esempio n. 7
0
def get_test_array(shape, dtype, strides=None, no_zeros=False, high=None):
    shape = wrap_in_tuple(shape)
    dtype = dtypes.normalize_type(dtype)

    if dtype.names is not None:
        result = numpy.empty(shape, dtype)
        for name in dtype.names:
            result[name] = get_test_array(shape,
                                          dtype[name],
                                          no_zeros=no_zeros,
                                          high=high)
    else:
        if dtypes.is_integer(dtype):
            low = 1 if no_zeros else 0
            if high is None:
                high = 100  # will work even with signed chars
            get_arr = lambda: numpy.random.randint(low, high, shape).astype(
                dtype)
        else:
            low = 0.01 if no_zeros else 0
            if high is None:
                high = 1.0
            get_arr = lambda: numpy.random.uniform(low, high, shape).astype(
                dtype)

        if dtypes.is_complex(dtype):
            result = get_arr() + 1j * get_arr()
        else:
            result = get_arr()

    if strides is not None:
        result = as_strided(result, result.shape, strides)

    return result
Esempio n. 8
0
    def generate_modes(mshape, dtype, batch=None, random=True):
        """
        Generates list of sparse modes for the problem of given shape.
        """

        max_modes_per_batch = 20

        modelist = []
        if product(mshape) <= max_modes_per_batch:
            # If there are not many modes, fill all of them
            modenums = itertools.product(*[range(modes) for modes in mshape])
            if batch is not None:
                for b in range(batch):
                    modelist += [((b, ) + modenum) for modenum in modenums]
            else:
                modelist += list(modenums)
        else:
            # If there are many modes, fill some random ones
            rand_coord = lambda: tuple(
                numpy.random.randint(0, mshape[i]) for i in range(len(mshape)))

            if batch is not None:
                for b in range(batch):
                    for i in range(max_modes_per_batch):
                        modelist.append((b, ) + rand_coord())
            else:
                for i in range(max_modes_per_batch):
                    modelist.append(rand_coord())

        # add corner modes, to make sure extreme cases are still processed correctly
        corner_modes = itertools.product(*[(0, mshape[i] - 1)
                                           for i in range(len(mshape))])
        for modenum in corner_modes:
            if batch is not None:
                for b in range(batch):
                    modelist.append((b, ) + modenum)
            else:
                modelist.append(modenum)

        modelist = set(modelist)  # remove duplicates

        # Assign coefficients
        modes = []
        for coord in modelist:
            get_coeff = lambda: numpy.random.normal() if random else 1
            if dtypes.is_complex(dtype):
                coeff = get_coeff() + 1j * get_coeff()
            else:
                coeff = get_coeff()
            coeff = dtype(coeff)

            # scaling coefficients for higher modes because of the lower precision in this case
            modenums = coord if batch is None else coord[1:]
            coeff /= sum(modenums) + 1
            modes.append((coeff, coord))

        return modes
Esempio n. 9
0
    def generate_modes(mshape, dtype, batch=None, random=True):
        """
        Generates list of sparse modes for the problem of given shape.
        """

        max_modes_per_batch = 20

        modelist = []
        if product(mshape) <= max_modes_per_batch:
            # If there are not many modes, fill all of them
            modenums = itertools.product(*[range(modes) for modes in mshape])
            if batch is not None:
                for b in range(batch):
                    modelist += [((b,) + modenum) for modenum in modenums]
            else:
                modelist += list(modenums)
        else:
            # If there are many modes, fill some random ones
            rand_coord = lambda: tuple(
                numpy.random.randint(0, mshape[i]) for i in range(len(mshape)))

            if batch is not None:
                for b in range(batch):
                    for i in range(max_modes_per_batch):
                        modelist.append((b,) + rand_coord())
            else:
                for i in range(max_modes_per_batch):
                    modelist.append(rand_coord())

        # add corner modes, to make sure extreme cases are still processed correctly
        corner_modes = itertools.product(*[(0, mshape[i]-1) for i in range(len(mshape))])
        for modenum in corner_modes:
            if batch is not None:
                for b in range(batch):
                    modelist.append((b,) + modenum)
            else:
                modelist.append(modenum)

        modelist = set(modelist) # remove duplicates

        # Assign coefficients
        modes = []
        for coord in modelist:
            get_coeff = lambda: numpy.random.normal() if random else 1
            if dtypes.is_complex(dtype):
                coeff = get_coeff() + 1j * get_coeff()
            else:
                coeff = get_coeff()
            coeff = dtype(coeff)

            # scaling coefficients for higher modes because of the lower precision in this case
            modenums = coord if batch is None else coord[1:]
            coeff /= sum(modenums) + 1
            modes.append((coeff, coord))

        return modes
Esempio n. 10
0
def normal_bm(bijection, dtype, mean=0, std=1):
    """
    Generates normally distributed random numbers with the mean ``mean`` and
    the standard deviation ``std`` using Box-Muller transform.
    Supported dtypes: ``float(32/64)``, ``complex(64/128)``.
    Produces two random numbers per call for real types and one number for complex types.
    Returns a :py:class:`~reikna.cbrng.samplers.Sampler` object.

    .. note::

        In case of a complex ``dtype``, ``std`` refers to the standard deviation of the
        complex numbers (same as ``numpy.std()`` returns), not real and imaginary components
        (which will be normally distributed with the standard deviation ``std / sqrt(2)``).
        Consequently, while ``mean`` is of type ``dtype``, ``std`` must be real.
    """

    if dtypes.is_complex(dtype):
        r_dtype = dtypes.real_for(dtype)
        c_dtype = dtype
    else:
        r_dtype = dtype
        c_dtype = dtypes.complex_for(dtype)

    uf = uniform_float(bijection, r_dtype, low=0, high=1)

    module = Module(TEMPLATE.get_def("normal_bm"),
                    render_kwds=dict(complex_res=dtypes.is_complex(dtype),
                                     r_dtype=r_dtype,
                                     r_ctype=dtypes.ctype(r_dtype),
                                     c_dtype=c_dtype,
                                     c_ctype=dtypes.ctype(c_dtype),
                                     polar_unit=functions.polar_unit(r_dtype),
                                     bijection=bijection,
                                     mean=mean,
                                     std=std,
                                     uf=uf))

    return Sampler(bijection,
                   module,
                   dtype,
                   deterministic=uf.deterministic,
                   randoms_per_call=1 if dtypes.is_complex(dtype) else 2)
Esempio n. 11
0
def conj(dtype):
    """
    Returns a :py:class:`~reikna.cluda.Module` with a function of one argument
    that conjugates the value of type ``dtype`` (must be a complex data type).
    """
    if not dtypes.is_complex(dtype):
        raise NotImplementedError("conj() of " + str(dtype) + " is not supported")

    return Module(
        TEMPLATE.get_def('conj'),
        render_kwds=dict(dtype=dtype))
Esempio n. 12
0
def pow(dtype, exponent_dtype=None, output_dtype=None):
    """
    Returns a :py:class:`~reikna.cluda.Module` with a function of two arguments
    that raises the first argument of type ``dtype``
    to the power of the second argument of type ``exponent_dtype``
    (an integer or real data type).
    If ``exponent_dtype`` or ``output_dtype`` are not given, they default to ``dtype``.
    If ``dtype`` is not the same as ``output_dtype``,
    the input is cast to ``output_dtype`` *before* exponentiation.
    If ``exponent_dtype`` is real, but both ``dtype`` and ``output_dtype`` are integer,
    a ``ValueError`` is raised.
    """
    if exponent_dtype is None:
        exponent_dtype = dtype

    if output_dtype is None:
        output_dtype = dtype

    if dtypes.is_complex(exponent_dtype):
        raise NotImplementedError("pow() with a complex exponent is not supported")

    if dtypes.is_real(exponent_dtype):
        if dtypes.is_complex(output_dtype):
            exponent_dtype = dtypes.real_for(output_dtype)
        elif dtypes.is_real(output_dtype):
            exponent_dtype = output_dtype
        else:
            raise ValueError("pow(integer, float): integer is not supported")

    kwds = dict(
        dtype=dtype, exponent_dtype=exponent_dtype, output_dtype=output_dtype,
        div_=None, mul_=None, cast_=None, polar_=None)
    if output_dtype != dtype:
        kwds['cast_'] = cast(output_dtype, dtype)
    if dtypes.is_integer(exponent_dtype) and not dtypes.is_real(output_dtype):
        kwds['mul_'] = mul(output_dtype, output_dtype)
        kwds['div_'] = div(output_dtype, output_dtype)
    if dtypes.is_complex(output_dtype):
        kwds['polar_'] = polar(dtypes.real_for(output_dtype))

    return Module(TEMPLATE.get_def('pow'), render_kwds=kwds)
Esempio n. 13
0
def normal_bm(bijection, dtype, mean=0, std=1):
    """
    Generates normally distributed random numbers with the mean ``mean`` and
    the standard deviation ``std`` using Box-Muller transform.
    Supported dtypes: ``float(32/64)``, ``complex(64/128)``.
    Produces two random numbers per call for real types and one number for complex types.
    Returns a :py:class:`~reikna.cbrng.samplers.Sampler` object.

    .. note::

        In case of a complex ``dtype``, ``std`` refers to the standard deviation of the
        complex numbers (same as ``numpy.std()`` returns), not real and imaginary components
        (which will be normally distributed with the standard deviation ``std / sqrt(2)``).
        Consequently, while ``mean`` is of type ``dtype``, ``std`` must be real.
    """

    if dtypes.is_complex(dtype):
        r_dtype = dtypes.real_for(dtype)
        c_dtype = dtype
    else:
        r_dtype = dtype
        c_dtype = dtypes.complex_for(dtype)

    uf = uniform_float(bijection, r_dtype, low=0, high=1)

    module = Module(
        TEMPLATE.get_def("normal_bm"),
        render_kwds=dict(
            complex_res=dtypes.is_complex(dtype),
            r_dtype=r_dtype, r_ctype=dtypes.ctype(r_dtype),
            c_dtype=c_dtype, c_ctype=dtypes.ctype(c_dtype),
            polar_unit=functions.polar_unit(r_dtype),
            bijection=bijection,
            mean=mean,
            std=std,
            uf=uf))

    return Sampler(
        bijection, module, dtype,
        deterministic=uf.deterministic, randoms_per_call=1 if dtypes.is_complex(dtype) else 2)
Esempio n. 14
0
def generate_dtypes(out_code, in_codes):
    test_dtype = lambda idx: dict(i=numpy.int32, f=numpy.float32, c=numpy.complex64)[idx]
    in_dtypes = list(map(test_dtype, in_codes))
    out_dtype = dtypes.result_type(*in_dtypes) if out_code == 'auto' else test_dtype(out_code)

    if not any(map(dtypes.is_double, in_dtypes)):
        # numpy thinks that int32 * float32 == float64,
        # but we still need to run this test on older videocards
        if dtypes.is_complex(out_dtype):
            out_dtype = numpy.complex64
        elif dtypes.is_real(out_dtype):
            out_dtype = numpy.float32

    return out_dtype, in_dtypes
Esempio n. 15
0
    def __init__(self, arr_t, axes=None):

        if not dtypes.is_complex(arr_t.dtype):
            raise ValueError("FFT computation requires array of a complex dtype")

        Computation.__init__(self, [
            Parameter('output', Annotation(arr_t, 'o')),
            Parameter('input', Annotation(arr_t, 'i')),
            Parameter('inverse', Annotation(numpy.int32), default=0)])

        if axes is None:
            axes = tuple(range(len(arr_t.shape)))
        else:
            axes = tuple(axes)
        self._axes = axes
Esempio n. 16
0
    def __init__(self, arr_t, axes=None):

        if not dtypes.is_complex(arr_t.dtype):
            raise ValueError("FFT computation requires array of a complex dtype")

        Computation.__init__(self, [
            Parameter('output', Annotation(arr_t, 'o')),
            Parameter('input', Annotation(arr_t, 'i')),
            Parameter('inverse', Annotation(numpy.int32), default=0)])

        if axes is None:
            axes = tuple(range(len(arr_t.shape)))
        else:
            axes = tuple(axes)
        self._axes = axes
Esempio n. 17
0
def generate_dtypes(out_code, in_codes):
    test_dtype = lambda idx: dict(
        i=numpy.int32, f=numpy.float32, c=numpy.complex64)[idx]
    in_dtypes = list(map(test_dtype, in_codes))
    out_dtype = dtypes.result_type(
        *in_dtypes) if out_code == 'auto' else test_dtype(out_code)

    if not any(map(dtypes.is_double, in_dtypes)):
        # numpy thinks that int32 * float32 == float64,
        # but we still need to run this test on older videocards
        if dtypes.is_complex(out_dtype):
            out_dtype = numpy.complex64
        elif dtypes.is_real(out_dtype):
            out_dtype = numpy.float32

    return out_dtype, in_dtypes
Esempio n. 18
0
def Multiply(type):
    return PureParallel([
        Parameter('output', Annotation(type, 'o')),
        Parameter('in1', Annotation(type, 'i')),
        Parameter('in2', Annotation(type, 'i'))
    ],
                        """
        ${ctype} f1 = ${in1.load_same}, f2 = ${in2.load_same};
        #if ${complex}
        ${output.store_same}((${ctype})(f1.x*f2.x - f1.y*f2.y, f1.x*f2.y + f1.y*f2.x));
        #else
        ${output.store_same}(f1*f2);
        #endif
        """,
                        render_kwds=dict(ctype=type.ctype,
                                         complex=int(dtypes.is_complex(type))))
Esempio n. 19
0
    def __init__(self, arr_t, padding=False, axes=None, **kwargs):
        '''
        Wrapper around `reikna.fft.FFT` with automatic real-to-complex casting
        and optional padding for higher performance.

        Input
        -----
        padding: bool, default=True
            If True, the input array is padded to the next power of two on the 
            transformed axes.
        axes: tuple
            Axes over which to perform the transform. Defaults to all axes.

        Note
        ----
        Because reikna does not allow nodes of the transformation tree with the
        identical names, the input array is called `input_`.
        '''
        if axes is None:
            axes = range(len(arr_t.shape))  # if axes is None else tuple(axes)
        else:
            axes = tuple(v + len(arr_t.shape) if v < 0 else v for v in axes)
        for v in axes:
            if v not in range(0, len(arr_t.shape)):
                raise IndexError('axis is out of range')
        dtype = (arr_t.dtype if dtypes.is_complex(arr_t.dtype) else
                 dtypes.complex_for(arr_t.dtype))
        if padding:
            shape = tuple(1 << int(np.ceil(np.log2(v))) if ax in axes else v
                          for ax, v in enumerate(arr_t.shape))
        else:
            shape = arr_t.shape
        super(FFT, self).__init__(Type(dtype, shape), axes=axes, **kwargs)
        input = self.parameter.input
        if dtype != arr_t.dtype:
            complex_tr = Complex(Type(arr_t.dtype, input.shape))
            input.connect(complex_tr,
                          complex_tr.output,
                          in_real=complex_tr.input)
            input = self.parameter.in_real
        if shape != arr_t.shape:
            pad_tr = Padded(input, arr_t, default='0.')
            input.connect(pad_tr, pad_tr.output, in_padded=pad_tr.input)
            input = self.parameter.in_padded
        copy_tr = copy(input)
        input.connect(copy_tr, copy_tr.output, input_=copy_tr.input)
Esempio n. 20
0
    def __init__(self, shape, drift, trajectories=1, diffusion=None, iterations=3, noise_type=None):

        if dtypes.is_complex(drift.dtype):
            real_dtype = dtypes.real_for(drift.dtype)
        else:
            real_dtype = drift.dtype

        state_type = Type(drift.dtype, (trajectories, drift.components) + shape)

        self._noise = diffusion is not None

        Computation.__init__(self,
            [Parameter('output', Annotation(state_type, 'o')),
            Parameter('input', Annotation(state_type, 'i'))]
            + ([Parameter('dW', Annotation(noise_type, 'i'))] if self._noise else []) +
            [Parameter('t', Annotation(real_dtype)),
            Parameter('dt', Annotation(real_dtype))])

        self._prop_iter = get_prop_iter(
            state_type, drift, iterations,
            diffusion=diffusion, noise_type=noise_type)
Esempio n. 21
0
    def _build_plan(self, plan_factory, _device_params, output_arr, input_arr):

        plan = plan_factory()

        dtype = input_arr.dtype
        p_dtype = dtypes.real_for(dtype) if dtypes.is_complex(dtype) else dtype

        mode_shape = input_arr.shape if self._inverse else output_arr.shape

        current_mem = input_arr
        seq_axes = list(range(len(input_arr.shape)))
        current_axes = list(range(len(input_arr.shape)))

        for i, axis in enumerate(self._axes):
            current_mem, current_axes = self._add_transpose(plan, current_mem, current_axes, axis)

            tr_matrix = plan.persistent_array(
                self._get_transformation_matrix(p_dtype, mode_shape[axis], self._add_points[axis]))

            dot = MatrixMul(current_mem, tr_matrix)
            if i == len(self._axes) - 1 and current_axes == seq_axes:
                dot_output = output_arr
            else:
                # Cannot write to output if it is not the last transform,
                # or if we need to return to the initial axes order
                dot_output = plan.temp_array_like(dot.parameter.output)
            plan.computation_call(dot, dot_output, current_mem, tr_matrix)
            current_mem = dot_output

        # If we ended up with the wrong order of axes,
        # return to the original order.

        if current_axes != seq_axes:
            tr_axes = [current_axes.index(i) for i in range(len(current_axes))]
            transpose = Transpose(current_mem, output_arr_t=output_arr, axes=tr_axes)
            plan.add_computation(transpose, output_arr, current_mem)

        return plan
Esempio n. 22
0
    def __init__(self, in1_type, in2_type, axis=-1):
        '''
        Fast convolution with FFT

        Uses transforms of length N1+N2 padded to a power of two, because 
        overlap-add is not significantly faster for the indended shape ranges.

        Input
        -----
        in1_type, in2_type: `reikna.core.Type`
            Shape and dtype of the arrays to be convolved.
        axis: `int`
            Array axis over which the convolution is evaluated.

        Notes
        -----
        * The output is always an array of complex numbers.
        * The arrays are matched using numpy's broadcasting rules.
        '''
        self._thread = None
        # normalize axis
        ndim = max(len(in1_type.shape), len(in2_type.shape))
        if axis < 0:
            axis += ndim
        if axis not in range(ndim):
            raise ValueError('axis is out of range.')
        # check if in1 and in2 are broadcastable
        for ax, s1, s2 in zip(range(ndim - 1, 0, -1), in1_type.shape[::-1],
                              in2_type.shape[::-1]):
            if (ax != axis) and (s1 != s2) and (s1 != 1) and (s2 != 1):
                raise ValueError('in1 and in2 have incompatible shapes')
        # calculate shapes
        in1_shape = (1, ) * (ndim - len(in1_type.shape)) + in1_type.shape
        in2_shape = (1, ) * (ndim - len(in2_type.shape)) + in2_type.shape
        in1_padded = in1_shape[:axis] + (in1_shape[axis] + in2_shape[axis] -
                                         1, ) + in1_shape[axis + 1:]
        in2_padded = in2_shape[:axis] + (in1_shape[axis] + in2_shape[axis] -
                                         1, ) + in2_shape[axis + 1:]
        out_shape = tuple(
            max(s1, s2) for s1, s2 in zip(in1_padded, in2_padded))
        out_dtype = (in1_type.dtype if dtypes.is_complex(in1_type.dtype) else
                     dtypes.complex_for(in1_type.dtype))

        fft1 = FFT(Type(in1_type.dtype, in1_padded), axes=(axis, ))
        pad_in1 = Padded(fft1.parameter.input_,
                         Type(in1_type.dtype, in1_shape),
                         default='0.')
        fft1.parameter.input_.connect(pad_in1,
                                      pad_in1.output,
                                      input_p=pad_in1.input)
        fft2 = FFT(Type(in2_type.dtype, in2_padded), axes=(axis, ))
        pad_in2 = Padded(fft2.parameter.input_,
                         Type(in2_type.dtype, in2_shape),
                         default='0.')
        fft2.parameter.input_.connect(pad_in2,
                                      pad_in2.output,
                                      input_p=pad_in2.input)
        mul = Multiply(Type(out_dtype, out_shape))
        bcast_in1 = Broadcast(out_shape, fft1.parameter.output)
        mul.parameter.in1.connect(bcast_in1,
                                  bcast_in1.output,
                                  in1_p=bcast_in1.input)
        bcast_in2 = Broadcast(out_shape, fft2.parameter.output)
        mul.parameter.in2.connect(bcast_in2,
                                  bcast_in2.output,
                                  in2_p=bcast_in2.input)
        ifft = FFT(Type(out_dtype, out_shape), axes=(axis, ))
        self._comps = [fft1, fft2, mul, ifft]
        # emulate reikna parameter attribute
        parameters = namedtuple('DummyParameters', ['output', 'in1', 'in2'])
        self.parameter = parameters(ifft.parameter.output, in1_type, in2_type)
Esempio n. 23
0
 def reference_mul(*args):
     res = product(args)
     if not dtypes.is_complex(out_dtype) and dtypes.is_complex(res.dtype):
         res = res.real
     return res.astype(out_dtype)
Esempio n. 24
0
def get_prop_iter(state_type, drift, iterations, diffusion=None, noise_type=None):

    if dtypes.is_complex(state_type.dtype):
        real_dtype = dtypes.real_for(state_type.dtype)
    else:
        real_dtype = state_type.dtype

    if diffusion is not None:
        noise_dtype = noise_type.dtype
    else:
        noise_dtype = real_dtype

    return PureParallel(
        [
            Parameter('output', Annotation(state_type, 'o')),
            Parameter('input', Annotation(state_type, 'i'))]
            + ([Parameter('dW', Annotation(noise_type, 'i'))] if diffusion is not None else []) +
            [Parameter('t', Annotation(real_dtype)),
            Parameter('dt', Annotation(real_dtype))],
        """
        <%
            coords = ", ".join(idxs[1:])
            trajectory = idxs[0]
            components = drift.components
            if diffusion is not None:
                noise_sources = diffusion.noise_sources
            psi_args = ", ".join("psi_" + str(c) + "_tmp" for c in range(components))

            if diffusion is None:
                dW = None
        %>

        %for comp in range(components):
        ${output.ctype} psi_${comp} = ${input.load_idx}(${trajectory}, ${comp}, ${coords});
        ${output.ctype} psi_${comp}_tmp = psi_${comp};
        ${output.ctype} dpsi_${comp};
        %endfor

        %if diffusion is not None:
        %for ncomp in range(noise_sources):
        ${dW.ctype} dW_${ncomp} = ${dW.load_idx}(${trajectory}, ${ncomp}, ${coords});
        %endfor
        %endif

        %for i in range(iterations):

        %for comp in range(components):
        dpsi_${comp} =
            ${mul_cr}(
                ${mul_cr}(${drift.module}${comp}(
                    ${coords}, ${psi_args}, ${t} + ${dt} / 2), ${dt})
                %if diffusion is not None:
                %for ncomp in range(noise_sources):
                + ${mul_cn}(${diffusion.module}${comp}_${ncomp}(
                    ${coords}, ${psi_args}, ${t} + ${dt} / 2), dW_${ncomp})
                %endfor
                %endif
                , 0.5);
        %endfor

        %for comp in range(components):
        psi_${comp}_tmp = psi_${comp} + dpsi_${comp};
        %endfor

        %endfor

        %for comp in range(components):
        ${output.store_idx}(${trajectory}, ${comp}, ${coords}, psi_${comp}_tmp + dpsi_${comp});
        %endfor
        """,
        guiding_array=(state_type.shape[0],) + state_type.shape[2:],
        render_kwds=dict(
            drift=drift,
            diffusion=diffusion,
            iterations=iterations,
            mul_cr=functions.mul(state_type.dtype, real_dtype),
            mul_cn=functions.mul(state_type.dtype, noise_dtype)))
Esempio n. 25
0
 def reference_add(*args):
     res = sum(args)
     if not dtypes.is_complex(out_dtype) and dtypes.is_complex(res.dtype):
         res = res.real
     return res.astype(out_dtype)
Esempio n. 26
0
def check_information_loss(out_dtype, expected_dtype):
    if dtypes.is_complex(expected_dtype) and not dtypes.is_complex(out_dtype):
        warn("Imaginary part ignored during the downcast from " +
            str(expected_dtype) + " to " + str(out_dtype),
            numpy.ComplexWarning)
Esempio n. 27
0
 def reference_mul(*args):
     res = product(args)
     if not dtypes.is_complex(out_dtype) and dtypes.is_complex(res.dtype):
         res = res.real
     return res.astype(out_dtype)
Esempio n. 28
0
 def reference_add(*args):
     res = sum(args)
     if not dtypes.is_complex(out_dtype) and dtypes.is_complex(res.dtype):
         res = res.real
     return res.astype(out_dtype)