Python PureParallel.PureParallel Beispiele, reikna.algorithms.PureParallel.PureParallel Python Beispiele

Beispiel #1

0

Datei anzeigen

    def _build_plan(self, plan_factory, device_params, a, current_variances, mu):
        plan = plan_factory()

        fill = PureParallel([
            Parameter('a', Annotation(a, 'o')),
            Parameter('current_variances', Annotation(current_variances, 'o')),
            Parameter('mu', Annotation(mu, 'i'))],
            """
            ${a.ctype} a;
            if (${idxs[-2]} == ${mask_size})
            {
                a = ${mu.load_idx}(${", ".join(idxs[:-2])}, ${idxs[-1]});
            }
            else
            {
                a = 0;
            }
            ${a.store_same}(a);

            if (${idxs[-1]} == 0)
            {
                ${current_variances.store_idx}(${", ".join(idxs[:-1])}, 0);
            }
            """,
            render_kwds=dict(mask_size=self._mask_size))

        plan.computation_call(fill, a, current_variances, mu)

        return plan

Beispiel #2

0

Datei anzeigen

def identity(type):
    return PureParallel([
        Parameter('output', Annotation(type, 'o')),
        Parameter('input', Annotation(type, 'i'))
    ], """
        ${output.store_same}(${input.load_same});
        """)

Beispiel #3

0

Datei anzeigen

Datei: test_pureparallel.py Projekt: xexo7C8/reikna

def test_guiding_output(thr):

    N = 1000
    dtype = numpy.float32

    p = PureParallel([
        Parameter('output', Annotation(Type(dtype, shape=N), 'o')),
        Parameter('input', Annotation(Type(dtype, shape=(2, N)), 'i'))
    ],
                     """
        float t1 = ${input.load_idx}(0, ${idxs[0]});
        float t2 = ${input.load_idx}(1, ${idxs[0]});
        ${output.store_idx}(${idxs[0]}, t1 + t2);
        """,
                     guiding_array='output')

    a = get_test_array_like(p.parameter.input)
    a_dev = thr.to_device(a)
    res_dev = thr.empty_like(p.parameter.output)

    pc = p.compile(thr)
    pc(res_dev, a_dev)

    res_ref = a[0] + a[1]

    assert diff_is_negligible(res_dev.get(), res_ref)

Beispiel #4

0

Datei anzeigen

def logistic(context, activations, bias, dest=None):
    kernel_cache, thread = context.kernel_cache, context.thread

    if dest is None:
        dest = activations

    key = (logistic, activations.shape, thread)
    if not key in kernel_cache.keys():
        log.info("compiling " + str(key))
        assert activations.shape[1] == bias.shape[0]

        kernel = PureParallel([
            Parameter('activations', Annotation(activations, 'i')),
            Parameter('bias', Annotation(bias, 'i')),
            Parameter('dest', Annotation(dest, 'o')),
        ],
                              """
        ${activations.ctype} a = ${activations.load_same};
        ${bias.ctype} b = ${bias.load_idx}(${idxs[1]});

        a += b;
        a = min(max(-45.0f, a), 45.0f);
        a = 1.0f / (1.0f + exp(-a));

        ${dest.store_same}(a);
        """,
                              guiding_array='activations')

        kernel_cache[key] = kernel.compile(thread, fast_math=True)

    # Run kernel
    kernel_cache[key](activations, bias, dest)

    return dest

Beispiel #5

0

Datei anzeigen

    def _build_plan(
            self, plan_factory, device_params,
            ks_a, ks_b, ks_cv, in_key, out_key, noises_a, noises_b):

        plan = plan_factory()

        extracted_n, t, base, inner_n = ks_a.shape

        mul_key = MatrixMulVector(noises_a)
        b_term = plan.temp_array_like(mul_key.parameter.output)

        build_keyswitch = PureParallel([
            Parameter('ks_a', Annotation(ks_a, 'o')),
            Parameter('ks_b', Annotation(ks_b, 'o')),
            Parameter('ks_cv', Annotation(ks_cv, 'o')),
            Parameter('in_key', Annotation(in_key, 'i')),
            Parameter('b_term', Annotation(b_term, 'i')),
            Parameter('noises_a', Annotation(noises_a, 'i')),
            Parameter('noises_b', Annotation(noises_b, 'i'))],
            Snippet(
                TEMPLATE.get_def("make_lwe_keyswitch_key"),
                render_kwds=dict(
                    log2_base=self._log2_base, output_size=self._output_size,
                    noise=self._noise)),
            guiding_array="ks_b")

        plan.computation_call(mul_key, b_term, noises_a, out_key)
        plan.computation_call(
            build_keyswitch,
            ks_a, ks_b, ks_cv, in_key, b_term, noises_a, noises_b)

        return plan

Beispiel #6

0

Datei anzeigen

def logistic_derivative(context, activations, delta, dest=None):
    kernel_cache, thread = context.kernel_cache, context.thread

    if dest is None:
        dest = delta

    key = (logistic_derivative, activations.shape, thread)
    if not key in kernel_cache.keys():
        log.info("compiling " + str(key))
        kernel = PureParallel([
            Parameter('activations', Annotation(activations, 'i')),
            Parameter('delta', Annotation(activations, 'i')),
            Parameter('dest', Annotation(dest, 'o')),
        ],
                              """
        ${activations.ctype} a = ${activations.load_same};
        ${delta.ctype} d = ${delta.load_same};

        d = d*a*(1.0f - a);

        ${dest.store_same}(d);
        """,
                              guiding_array='activations')

        kernel_cache[key] = kernel.compile(thread, fast_math=True)

    # Run kernel
    kernel_cache[key](activations, delta, dest)

Beispiel #7

0

Datei anzeigen

Datei: array_helpers.py Projekt: drtpotter/reikna

def roll_computation(array, axis):
    return PureParallel([
        Parameter('output', Annotation(array, 'o')),
        Parameter('input', Annotation(array, 'i')),
        Parameter('shift', Annotation(Type(numpy.int32)))
    ],
                        """
        <%
            shape = input.shape
        %>
        %for i in range(len(shape)):
            VSIZE_T output_${idxs[i]} =
                %if i == axis:
                ${shift} == 0 ?
                    ${idxs[i]} :
                    ## Since ``shift`` can be negative, and its absolute value greater than
                    ## ``shape[i]``, a double modulo division is necessary
                    ## (the ``%`` operator preserves the sign of the dividend in C).
                    (${idxs[i]} + (${shape[i]} + ${shift} % ${shape[i]})) % ${shape[i]};
                %else:
                ${idxs[i]};
                %endif
        %endfor
        ${output.store_idx}(
            ${", ".join("output_" + name for name in idxs)},
            ${input.load_idx}(${", ".join(idxs)}));
        """,
                        guiding_array='input',
                        render_kwds=dict(axis=axis))

Beispiel #8

0

Datei anzeigen

def classification_delta_kernel(ctx, outputs, targets, deltas):
    kernel_cache, thread = ctx.kernel_cache, ctx.thread

    assert outputs.shape[0] == targets.shape[0] == deltas.shape[0]
    assert len(targets.shape) == 1
    assert targets.dtype == numpy.int32
    assert outputs.shape[1] == deltas.shape[1]

    key = (classification_delta_kernel, outputs.shape)
    if not key in kernel_cache.keys():
        log.info("compiling " + str(key))
        kernel = PureParallel([
            Parameter('outputs', Annotation(outputs, 'i')),
            Parameter('targets', Annotation(targets, 'i')),
            Parameter('deltas', Annotation(deltas, 'o'))
        ],
                              """
        ${outputs.ctype} out = ${outputs.load_same};
        SIZE_T t = ${targets.load_idx}(${idxs[0]});
        SIZE_T idx = ${idxs[1]};
        ${deltas.ctype} d;
        if (t == idx) {
            d = 1.0f - out;
        } else {
            d = -out;
        }
        ${deltas.store_same}(d);
        """,
                              guiding_array='deltas')

        kernel_cache[key] = kernel.compile(thread)

    # Run kernel
    kernel_cache[key](outputs, targets, deltas)

Beispiel #9

0

Datei anzeigen

def class_errors(ctx, expected, actual, errors):
    """ expected int32, actual float, errors int32 """
    kernel_cache, thread = ctx.kernel_cache, ctx.thread

    key = (class_errors, expected.shape)

    if key not in kernel_cache.keys():
        # target should be an integer
        logging.info("compiling " + str(key))
        assert expected.shape == errors.shape  # one neuron per class
        assert expected.shape == (actual.shape[0], )  # index of the class
        assert actual.dtype == numpy.float32
        assert expected.dtype == numpy.int32
        assert errors.dtype == numpy.int32
        kernel = PureParallel(
            [
                Parameter('expected', Annotation(expected, 'i')),
                Parameter('actual', Annotation(actual, 'i')),
                Parameter('errors', Annotation(errors, 'o'))
            ],
            """
            SIZE_T expected = ${expected.load_idx}(${idxs[0]});;
            float maximum=0.0f;
            float value;
            SIZE_T maxindex = 0;

            SIZE_T tl = ${target_length};

            // calculate argmax
            for(SIZE_T j=0; j < tl; j++) {
                value = ${actual.load_idx}(${idxs[0]}, j);

                if (value > maximum) {
                    maximum = value;
                    maxindex = j;
                }
            }

            // If the confidence is too low, return an error
            if (maximum < (1.0f / ${target_length}.0f + 0.001f)) {
                ${errors.store_same}(1);
                return;
            };

            // compare argmax
            if (maxindex != expected) {
                ${errors.store_same}(1);
            } else {
                ${errors.store_same}(0);
            }

        """,
            guiding_array='expected',
            render_kwds={'target_length': numpy.int32(actual.shape[1])})

        kernel_cache[key] = kernel.compile(thread)

    kernel_cache[key](expected, actual, errors)

Beispiel #10

0

Datei anzeigen

Datei: convolution.py Projekt: schreon/neuronaut

def convolve2d_propagation(ctx, array, weights, dest):
    """ The output is the valid discrete linear convolution of the inputs. """
    kernel_cache, thread = ctx.kernel_cache, ctx.thread

    key = (convolve2d_propagation, weights.shape, array.shape, thread)
    if not key in kernel_cache.keys():
        logging.info("compiling" + str(key))

        channels, filters, owidth, oheight = weights.shape[0], weights.shape[
            1], dest.shape[1], dest.shape[2]

        render_kwds = {
            'w0': weights.shape[2],
            'w1': weights.shape[3],
            'a0': array.shape[2],
            'a1': array.shape[3],
            'off0': int(weights.shape[2] - 1),
            'off1': int(weights.shape[3] - 1)
        }

        kernel_conv = PureParallel([
            Parameter('array', Annotation(array, 'i')),
            Parameter('weights', Annotation(weights, 'i')),
            Parameter('dest', Annotation(dest, 'o'))
        ],
                                   """
        // Array dimensions:
        // array : (channels, width, height)
        // weights: (channels, filters, fwidth, fheight)
        // dest (channels, filters, owidth, oheight)

        float a = 0.0f;
        SIZE_T x, y, i, j;
        const SIZE_T number = ${idxs[0]};
        const SIZE_T channel = ${idxs[1]};
        const SIZE_T filter = ${idxs[2]};
        const SIZE_T xout = ${idxs[3]};
        const SIZE_T yout = ${idxs[4]};
        for (i=0; i < ${w0}; i++){
            for (j=0; j < ${w1}; j++){
                x = xout - i  + ${off0};
                y = yout - j  + ${off1};
                a += ${array.load_idx}(number, channel, x, y)
                   * ${weights.load_idx}(channel, filter, i, j); // channel, filter, i, j
            }
        }

        ${dest.store_same}(a);

        """,
                                   guiding_array='dest',
                                   render_kwds=render_kwds)
        kernel_cache[key] = kernel_conv.compile(thread, fast_math=True)

    # run convolution
    kernel_cache[key](array, weights, dest)

    return dest

Beispiel #11

0

Datei anzeigen

def get_test_computation(arr_t):
    return PureParallel([
        Parameter('output', Annotation(arr_t, 'o')),
        Parameter('input', Annotation(arr_t, 'i'))
    ], """
        <%
            all_idxs = ", ".join(idxs)
        %>
        ${output.store_idx}(${all_idxs}, ${input.load_idx}(${all_idxs}));
        """)

Beispiel #12

0

Datei anzeigen

def Multiply(type):
    return PureParallel([
        Parameter('output', Annotation(type, 'o')),
        Parameter('in1', Annotation(type, 'i')),
        Parameter('in2', Annotation(type, 'i'))
    ],
                        """
        ${ctype} f1 = ${in1.load_same}, f2 = ${in2.load_same};
        #if ${complex}
        ${output.store_same}((${ctype})(f1.x*f2.x - f1.y*f2.y, f1.x*f2.y + f1.y*f2.x));
        #else
        ${output.store_same}(f1*f2);
        #endif
        """,
                        render_kwds=dict(ctype=type.ctype,
                                         complex=int(dtypes.is_complex(type))))

Beispiel #13

0

Datei anzeigen

Datei: test_pureparallel.py Projekt: xexo7C8/reikna

    def __init__(self, size, dtype):

        Computation.__init__(self, [
            Parameter('output', Annotation(Type(dtype, shape=size), 'o')),
            Parameter('input', Annotation(Type(dtype, shape=size), 'i'))
        ])

        self._p = PureParallel([
            Parameter('output', Annotation(Type(dtype, shape=size), 'o')),
            Parameter('i1', Annotation(Type(dtype, shape=size), 'i')),
            Parameter('i2', Annotation(Type(dtype, shape=size), 'i'))
        ], """
            ${i1.ctype} t1 = ${i1.load_idx}(${idxs[0]});
            ${i2.ctype} t2 = ${i2.load_idx}(${idxs[0]});
            ${output.store_idx}(${idxs[0]}, t1 + t2);
            """)

Beispiel #14

0

Datei anzeigen

Datei: test_transformation.py Projekt: xexo7C8/reikna

def test_array_offset(thr):

    dtype = numpy.uint32
    itemsize = dtypes.normalize_type(dtype).itemsize
    offset_len = 10
    arr_len = 16

    # internal creation of the base array
    a1 = thr.array((arr_len,), dtype, offset=itemsize * offset_len)

    # providing base
    a2_base = thr.array((arr_len + offset_len,), dtype)
    a2 = thr.array((arr_len,), dtype, offset=itemsize * offset_len, base=a2_base)

    # providing base_data
    a3_base = thr.array((arr_len + offset_len,), dtype)
    a3_data = a3_base.base_data
    a3 = thr.array((arr_len,), dtype, offset=itemsize * offset_len, base_data=a3_data)

    fill = PureParallel(
        [
            Parameter('output1', Annotation(a1, 'o')),
            Parameter('output2', Annotation(a2, 'o')),
            Parameter('output3', Annotation(a3, 'o')),
        ],
        """
        ${output1.store_idx}((int)${idxs[0]} - ${offset_len}, ${idxs[0]});
        ${output2.store_idx}((int)${idxs[0]} - ${offset_len}, ${idxs[0]});
        ${output3.store_idx}((int)${idxs[0]} - ${offset_len}, ${idxs[0]});
        """,
        render_kwds=dict(offset_len=offset_len),
        guiding_array=(arr_len + offset_len,)
        ).compile(thr)

    fill(a1, a2, a3)

    offset_range = numpy.arange(offset_len, arr_len + offset_len).astype(dtype)
    full_range = numpy.arange(arr_len + offset_len).astype(dtype)

    assert diff_is_negligible(a1.get(), offset_range)

    assert diff_is_negligible(a2_base.get(), full_range)
    assert diff_is_negligible(a2.get(), offset_range)

    assert diff_is_negligible(a3_base.get(), full_range)
    assert diff_is_negligible(a3.get(), offset_range)

Beispiel #15

0

Datei anzeigen

Datei: softmax.py Projekt: schreon/neuronaut

def softmax(ctx, activations, bias, dest=None):
    """ Softmax Activation Function """
    kernel_cache, thread = ctx.kernel_cache, ctx.thread

    if dest is None:
        dest = activations

    key = (softmax, activations.shape)
    if key not in kernel_cache.keys():
        logging.info("compiling " + str(key))
        # Regression hidden layer
        kernel_softmax = PureParallel(
            [
                Parameter('activations', Annotation(activations, 'i')),
                Parameter('bias', Annotation(bias, 'i')),
                Parameter('dest', Annotation(dest, 'o')),
            ],
            """
            float x;
            float b;
            float s = 0.0f;
            SIZE_T tl = ${target_length};
            for(SIZE_T j=0; j < tl; j++) {
                x = ${activations.load_idx}(${idxs[0]}, j);
                b = ${bias.load_idx}(j);
                x += b;
                x = exp(min(max(x, -45.0f), 45.0f));
                ${dest.store_idx}(${idxs[0]}, j, x);

                s += x;
            }

            // divide by sum
            for(SIZE_T j=0; j < tl; j++) {
                x = ${dest.load_idx}(${idxs[0]}, j);
                x /= s;
                ${dest.store_idx}(${idxs[0]}, j, x);
            }
        """,
            guiding_array=(activations.shape[0], ),
            render_kwds={'target_length': numpy.int32(activations.shape[1])})

        kernel_cache[key] = kernel_softmax.compile(thread)

    kernel_cache[key](activations, bias, dest)

Beispiel #16

0

Datei anzeigen

Datei: lwe_gpu.py Projekt: stjordanis/nufhe

    def _build_plan(
            self, plan_factory, device_params,
            ks_a, ks_b, ks_cv, in_key, out_key, noises_a, noises_b):

        plan = plan_factory()

        extracted_n, t, base, inner_n = ks_a.shape

        mean = Reduce(noises_b, predicate_sum(noises_b.dtype))
        norm = transformations.div_const(mean.parameter.output, numpy.prod(noises_b.shape))
        mean.parameter.output.connect(norm, norm.input, mean=norm.output)

        noises_b_mean = plan.temp_array_like(mean.parameter.mean)

        mul_key = MatrixMulVector(noises_a)
        b_term = plan.temp_array_like(mul_key.parameter.output)

        build_keyswitch = PureParallel([
            Parameter('ks_a', Annotation(ks_a, 'o')),
            Parameter('ks_b', Annotation(ks_b, 'o')),
            Parameter('ks_cv', Annotation(ks_cv, 'o')),
            Parameter('in_key', Annotation(in_key, 'i')),
            Parameter('b_term', Annotation(b_term, 'i')),
            Parameter('noises_a', Annotation(noises_a, 'i')),
            Parameter('noises_b', Annotation(noises_b, 'i')),
            Parameter('noises_b_mean', Annotation(noises_b_mean, 'i'))],
            Snippet(
                TEMPLATE.get_def("make_lwe_keyswitch_key"),
                render_kwds=dict(
                    log2_base=self._log2_base, output_size=self._output_size,
                    double_to_t32=double_to_t32_module, noise=self._noise)),
            guiding_array="ks_b")

        plan.computation_call(mean, noises_b_mean, noises_b)
        plan.computation_call(mul_key, b_term, noises_a, out_key)
        plan.computation_call(
            build_keyswitch,
            ks_a, ks_b, ks_cv, in_key, b_term, noises_a, noises_b, noises_b_mean)

        return plan

Beispiel #17

0

Datei anzeigen

Datei: test_pureparallel.py Projekt: xexo7C8/reikna

def test_zero_length_shape(thr):

    dtype = numpy.float32

    p = PureParallel([
        Parameter('output', Annotation(Type(dtype, shape=tuple()), 'o')),
        Parameter('input', Annotation(Type(dtype, shape=tuple()), 'i'))
    ],
                     """
        float t = ${input.load_idx}();
        ${output.store_idx}(t * 2);
        """,
                     guiding_array=tuple())

    a = get_test_array_like(p.parameter.input)
    a_dev = thr.to_device(a)
    res_dev = thr.empty_like(p.parameter.output)

    pc = p.compile(thr)
    pc(res_dev, a_dev)

    res_ref = (a * 2).astype(dtype)

    assert diff_is_negligible(res_dev.get(), res_ref)

Beispiel #18

0

Datei anzeigen

Datei: convolution.py Projekt: schreon/neuronaut

def convolve2d_gradient(ctx, prev_deltas, deltas, gradient_intermediate):
    """ The output is the full discrete linear convolution of the inputs. """
    kernel_cache, thread = ctx.kernel_cache, ctx.thread

    key = (convolve2d_gradient, prev_deltas.shape, deltas.shape, thread)
    if not key in kernel_cache.keys():
        logging.info("compiling " + str(key))

        # Extract shapes from the arrays
        n, channels, p_width, p_height = prev_deltas.shape
        n_1, filters, d_width, d_height = deltas.shape
        n, d_width_1, d_height_1, channels_1, filters_1, f_width, f_height = gradient_intermediate.shape

        # Some assertions to be sure everything is correct
        assert n_1 == n
        assert filters_1 == filters
        assert channels_1 == channels
        expected_shape = get_output_shape(prev_deltas, deltas, 'gradient')
        assert expected_shape == gradient_intermediate.shape
        assert d_width_1 == d_width
        assert d_height_1 == d_height

        # Render keywords
        render_kwds = {
            'n': n,
            'filters': filters,
            'channels': channels,
            'f_width': f_width,
            'f_height': f_height,
            'd_width': d_width,
            'd_height': d_height,
            'p_width': p_width,
            'p_height': p_height,
        }

        # The kernel
        kernel = PureParallel([
            Parameter('prev_deltas', Annotation(prev_deltas, 'i')),
            Parameter('deltas', Annotation(deltas, 'i')),
            Parameter('gradient_intermediate',
                      Annotation(gradient_intermediate, 'o'))
        ],
                              """

        const SIZE_T number = ${idxs[0]};
        const SIZE_T dx = ${idxs[1]};
        const SIZE_T dy = ${idxs[2]};
        const SIZE_T channel = ${idxs[3]};
        const SIZE_T filter = ${idxs[4]};
        const SIZE_T fx = ${idxs[5]};
        const SIZE_T fy = ${idxs[6]};


        // weight gradient at the weight position fx, fy is defined by the sum
        //
        //       (deltas * prev_deltas[fx:d_width+fx, fy:fy+d_height]).sum()
        //
        // alternatively we can store all delta positions and sum in a separate kernel - this is what we do now.

        float g = ${deltas.load_idx}(number, filter, dx, dy) * ${prev_deltas.load_idx}(number, channel, dx+fx, dy+fy);

        ${gradient_intermediate.store_same}(g);

        """,
                              guiding_array='gradient_intermediate',
                              render_kwds=render_kwds)

        kernel_cache[key] = kernel.compile(thread, fast_math=True)

    # run convolution -> intermediate
    kernel_cache[key](prev_deltas, deltas, gradient_intermediate)

    return gradient_intermediate

Beispiel #19

0

Datei anzeigen

Datei: convolution.py Projekt: schreon/neuronaut

def convolve2d_backprop(ctx, deltas, weights, deltas_intermediate):
    """ The output is the full discrete linear convolution of the inputs. """
    kernel_cache, thread = ctx.kernel_cache, ctx.thread

    key = (convolve2d_backprop, deltas.shape, weights.shape, thread)
    if not key in kernel_cache.keys():
        logging.info("compiling " + str(key))

        # Extract shapes from the arrays
        channels, filters, f_width, f_height = weights.shape
        n_1, filters_1, d_width, d_height = deltas.shape
        n, channels_1, filters_2, p_width, p_height = deltas_intermediate.shape

        # Some assertions to be sure everything is correct
        assert n_1 == n
        assert filters_2 == filters_1 == filters
        assert channels_1 == channels
        expected_shape = get_output_shape(deltas, weights, 'backprop')
        assert expected_shape == deltas_intermediate.shape

        # Render keywords
        render_kwds = {
            'n': n,
            'filters': filters,
            'channels': channels,
            'f_width': f_width,
            'f_height': f_height,
            'd_width': d_width,
            'd_height': d_height,
            'p_width': p_width,
            'p_height': p_height,
        }

        # The kernel
        kernel = PureParallel([
            Parameter('deltas', Annotation(deltas, 'i')),
            Parameter('weights', Annotation(weights, 'i')),
            Parameter('deltas_intermediate',
                      Annotation(deltas_intermediate, 'o'))
        ],
                              """
        float d = 0.0f;
        SIZE_T x, y, i, j, fi, fj;
        const SIZE_T number = ${idxs[0]};
        const SIZE_T channel = ${idxs[1]};
        const SIZE_T filter = ${idxs[2]};
        const SIZE_T xout = ${idxs[3]};
        const SIZE_T yout = ${idxs[4]};
        for (i=0; i < ${f_width}; i++){
            for (j=0; j < ${f_height}; j++){
                x = xout - i;
                if (x < 0) continue;
                if (x >= ${d_width}) continue;
                y = yout - j;
                if (y < 0) continue;
                if (y >= ${d_height}) continue;
                // acces weights in flipped order!
                fi = ${f_width} - i - 1;
                fj = ${f_height} - j - 1;
                d += ${deltas.load_idx}(number, channel, x, y)
                   * ${weights.load_idx}(channel, filter, fi, fj);
            }
        }

        ${deltas_intermediate.store_same}(d);

        """,
                              guiding_array='deltas_intermediate',
                              render_kwds=render_kwds)

        kernel_cache[key] = kernel.compile(thread, fast_math=True)

    # run convolution -> intermediate
    kernel_cache[key](deltas, weights, deltas_intermediate)

    return deltas_intermediate

Beispiel #20

0

Datei anzeigen

Datei: isaacudaimpl.py Projekt: kaizhongkaizhong/WaveSyn

def get_procs(thr, N):
    fft = FFTFactory.create(thr, (N, ), compile_=False)
    unimod_trans = Transformation(
        [
            Parameter('output', Annotation(Type(np.complex128, N), 'o')),
            Parameter('input', Annotation(Type(np.complex128, N), 'i'))
        ],
        """
VSIZE_T idx = ${idxs[0]};
${input.ctype} val = ${input.load_same};
if (idx>${N}/2){
    val.x = 0.0;
    val.y = 0.0;
    ${output.store_same}(val);
}else
    ${output.store_same}(${polar_unit}(atan2(val.y, val.x)));
        """,
        render_kwds=dict(polar_unit=functions.polar_unit(dtype=np.float64),
                         N=N))
    fft.parameter.output.connect(unimod_trans,
                                 unimod_trans.input,
                                 uni=unimod_trans.output)
    fft_unimod = fft.compile(thr)

    mag_square = PureParallel([
        Parameter('output', Annotation(Type(np.complex128, N), 'o')),
        Parameter('input', Annotation(Type(np.complex128, N), 'i'))
    ], '''
VSIZE_T idx = ${idxs[0]};
${input.ctype} val = ${input.load_idx}(idx);  
val.x = val.x*val.x + val.y*val.y;
val.y = 0;
${output.store_idx}(idx, val);
        ''')
    mag_square = mag_square.compile(thr)

    apply_mask = PureParallel(
        [
            Parameter('output', Annotation(Type(np.complex128, N), 'o')),
            Parameter('origin', Annotation(Type(np.complex128, N), 'i')),
            Parameter('mask', Annotation(Type(np.double, N), 'i'))
        ],
        '''
VSIZE_T idx = ${idxs[0]};
${output.store_idx}(idx, ${mul}(${origin.load_idx}(idx), ${mask.load_idx}(idx)));        
        ''',
        render_kwds=dict(mul=functions.mul(np.complex128, np.double)))
    apply_mask = apply_mask.compile(thr)

    combine_mag_phi = PureParallel([
        Parameter('output', Annotation(Type(np.complex128, N), 'o')),
        Parameter('mag_square', Annotation(Type(np.complex128, N), 'i')),
        Parameter('phase', Annotation(Type(np.complex128, N), 'i'))
    ],
                                   '''
VSIZE_T idx = ${idxs[0]};
double r = ${mag_square.load_idx}(idx).x;  
r = r<0.0 ? 0.0 : ${pow}(r, 0.5);
double2 v = ${phase.load_idx}(idx);
double angle = atan2(v.y, v.x);
${output.store_idx}(idx, ${polar}(r, angle));
        ''',
                                   render_kwds=dict(
                                       pow=functions.pow(np.double),
                                       polar=functions.polar(np.double)))
    combine_mag_phi = combine_mag_phi.compile(thr)

    return fft_unimod, mag_square, apply_mask, combine_mag_phi

Beispiel #21

0

Datei anzeigen

Datei: test_transformations.py Projekt: xexo7C8/reikna

def get_test_computation(arr_t):
    return PureParallel([
        Parameter('output', Annotation(arr_t, 'o')),
        Parameter('input', Annotation(arr_t, 'i'))
    ], "${output.store_idx}(${idxs[0]}, ${input.load_idx}(${idxs[0]}));")