예제 #1
0
def setup_cupy():
    global cupy
    global cupy_stream
    global square_diff_kernel
    global mix_channels_kernel
    global gray_scale_kernel
    import cupy as cupy
    cupy_stream = cupy.cuda.Stream()
    square_diff_kernel = cupy.ElementwiseKernel('T x, T y', 'T z',
                                                'z = x*x - y*y', 'square_diff')

    mix_channels_kernel = cupy.ElementwiseKernel('uint8 x, uint8 y', 'uint8 z',
                                                 'z = (i % 3) ? x : y',
                                                 'mix_channels')

    gray_scale_kernel = cupy.RawKernel(
        r'''
    extern "C" __global__
    void gray_scale(float *output, const unsigned char *input, long long height, long long width) {
        int tidx = blockIdx.x * blockDim.x + threadIdx.x;
        int tidy = blockIdx.y * blockDim.y + threadIdx.y;
        if (tidx < width && tidy < height) {
            float r = input[tidy * width + tidx] / 255.;
            float g = input[tidy * width + tidx + 1] / 255.;
            float b = input[tidy * width + tidx + 2] / 255.;
            output[tidy * width + tidx] = 0.299 * r + 0.59 * g + 0.11 * b;
        }
    }
    ''', 'gray_scale')
예제 #2
0
파일: coo.py 프로젝트: wotulong/cupy
    def sum_duplicates(self):
        """Eliminate duplicate matrix entries by adding them together.

        .. seealso::
           :func:`scipy.sparse.coo_matrix.sum_duplicates`

        """
        if self._has_canonical_format:
            return
        if self.data.size == 0:
            self._has_canonical_format = True
            return
        keys = cupy.stack([self.row, self.col])
        order = cupy.lexsort(keys)
        src_data = self.data[order]
        src_row = self.row[order]
        src_col = self.col[order]
        diff = cupy.ElementwiseKernel(
            'raw int32 row, raw int32 col',
            'int32 diff',
            '''
            int index;
            if (i == 0 || row[i - 1] == row[i] && col[i - 1] == col[i]) {
              diff = 0;
            } else {
              diff = 1;
            }
            ''',
            'sum_duplicates_diff'
        )(src_row, src_col, size=self.row.size)

        if diff[1:].all():
            # All elements have different indices.
            data = src_data
            row = src_row
            col = src_col
        else:
            index = cupy.cumsum(diff, dtype='i')
            size = int(index[-1]) + 1
            data = cupy.zeros(size, dtype=self.data.dtype)
            row = cupy.empty(size, dtype='i')
            col = cupy.empty(size, dtype='i')
            cupy.ElementwiseKernel(
                'T src_data, int32 src_row, int32 src_col, int32 index',
                'raw T data, raw int32 row, raw int32 col',
                '''
                atomicAdd(&data[index], src_data);
                row[index] = src_row;
                col[index] = src_col;
                ''',
                'sum_duplicates_assign',
                preamble=util._preamble_atomic_add
            )(src_data, src_row, src_col, index, data, row, col)

        self.data = data
        self.row = row
        self.col = col
        self._has_canonical_format = True
예제 #3
0
파일: kmeans.py 프로젝트: nudigamma/cupy
def fit(X, n_clusters, max_iter, use_custom_kernel):
    assert X.ndim == 2
    xp = cupy.get_array_module(X)
    pred = xp.zeros(len(X), dtype=np.int32)
    initial_indexes = np.random.choice(len(X), n_clusters,
                                       replace=False).astype(np.int32)
    centers = X[initial_indexes]
    data_num = X.shape[0]
    data_dim = X.shape[1]

    for _ in six.moves.range(max_iter):
        # calculate distances and label
        if not use_custom_kernel or xp == np:
            distances = xp.linalg.norm(X[:, None, :] - centers[None, :, :],
                                       axis=2)
        else:
            distances = xp.zeros((data_num, n_clusters), dtype=np.float32)
            cupy.ElementwiseKernel(
                'S data, raw S centers, int32 n_clusters, int32 dim',
                'raw S dist', '''
                for (int j = 0; j < n_clusters; j++){
                    int cent_ind[] = {j, i % dim};
                    int dist_ind[] = {i / dim, j};
                    double diff = centers[cent_ind] - data;
                    atomicAdd(&dist[dist_ind], diff * diff);
                }
                ''', 'calc_distances')(X, centers, n_clusters, data_dim,
                                       distances)

        new_pred = xp.argmin(distances, axis=1).astype(np.int32)
        if xp.all(new_pred == pred):
            break
        pred = new_pred

        # calculate centers
        if not use_custom_kernel or xp == np:
            centers = xp.stack([
                X[pred == i].mean(axis=0) for i in six.moves.range(n_clusters)
            ])
        else:
            centers = xp.zeros((n_clusters, data_dim), dtype=np.float32)
            group = xp.zeros(n_clusters, dtype=np.float32)
            label = pred[:, None]
            cupy.ElementwiseKernel(
                'S data, T label, int32 dim', 'raw S centers, raw S group', '''
                int cent_ind[] = {label, i % dim};
                atomicAdd(&centers[cent_ind], data);
                atomicAdd(&group[label], 1);
                ''', 'calc_center')(X, label, data_dim, centers, group)
            group /= data_dim
            centers /= group[:, None]

    return centers, pred
예제 #4
0
    def deformation(self, prm):
        """
            Apply 2D Gaussian and Planar deformation.
            Computation is parallelized on GPU using cupy.
        """
        import cupy as cp
        xy_cp = cp.asarray(prm.xy)
        a_cp = cp.asarray(self.a)
        b_cp = cp.asarray(self.b)
        c_cp = cp.asarray(self.c)
        d_cp = cp.asarray(self.d)
        sigma_cp = cp.asarray(self.sigma)
        e_cp = cp.asarray(self.e)
        f_cp = cp.asarray(self.f)
        g_cp = cp.asarray(self.g)
        z_cp = cp.asarray(prm.z)

        func_planar = cp.ElementwiseKernel(
            in_params='T x, T y, T e, T f, T g',
            out_params='T z',
            operation= \
                '''
                z = e + f*x + g*y;
                ''',
            name='func_planar'
        )

        func_gauss2d = cp.ElementwiseKernel(
            in_params='T x, T y, T b, T c, T d, T sigma',
            out_params='T z',
            operation= \
                '''
                z = b*expf(-(powf(x-c,2) + powf(y-d,2))/(2*powf(sigma,2)));
                ''',
            name='func_gauss2d'
        )

        gauss_2d_cp = cp.zeros_like(xy_cp[:, 0])
        for i in range(len(self.b)):
            gauss_2d_cp += func_gauss2d(xy_cp[:, 0], xy_cp[:, 1], b_cp[i], c_cp[i], d_cp[i], sigma_cp[i])
        s1_cp = a_cp + (1.5 / z_cp) * cp.outer(cp.transpose(gauss_2d_cp), z_cp)
        s2_cp = func_planar(xy_cp[:, 0], xy_cp[:, 1], e_cp, f_cp, g_cp)

        refl_cp = cp.asarray(self.refl)
        for i in range(prm.nxy_tr):
            s = s1_cp[i, :] + s2_cp[i] + z_cp
            mat = cp.tile(z_cp, (len(s), 1)) - cp.tile(cp.expand_dims(s, 1), (1, len(z_cp)))
            refl_cp[i, :] = cp.dot(refl_cp[i, :], cp.sinc(mat))

        return np.reshape(cp.asnumpy(refl_cp), [prm.nxy_tr, prm.nz_tr])
예제 #5
0
def bincount(x, weights=None, minlength=None):
    """Count number of occurrences of each value in array of non-negative ints.

    Args:
        x (cupy.ndarray): Input array.
        weights (cupy.ndarray): Weights array which has the same shape as
            ``x``.
        minlength (int): A minimum number of bins for the output array.

    Returns:
        cupy.ndarray: The result of binning the input array. The length of
            output is equal to ``max(cupy.max(x) + 1, minlength)``.

    .. seealso:: :func:`numpy.bincount`

    """
    if x.ndim > 1:
        raise ValueError('object too deep for desired array')
    if x.ndim < 1:
        raise ValueError('object of too small depth for desired array')
    if x.dtype.kind == 'f':
        raise TypeError('x must be int array')
    if (x < 0).any():
        raise ValueError('The first argument of bincount must be non-negative')
    if weights is not None and x.shape != weights.shape:
        raise ValueError('The weights and list don\'t have the same length.')
    if minlength is not None:
        minlength = int(minlength)
        if minlength < 0:
            raise ValueError('minlength must be non-negative')

    size = int(cupy.max(x)) + 1
    if minlength is not None:
        size = max(size, minlength)

    if weights is None:
        # atomicAdd for int64 is not provided
        b = cupy.zeros((size, ), dtype=cupy.int32)
        cupy.ElementwiseKernel('S x', 'raw U bin', 'atomicAdd(&bin[x], 1)',
                               'bincount_kernel')(x, b)
        b = b.astype(numpy.intp)
    else:
        # atomicAdd for float64 is not provided
        b = cupy.zeros((size, ), dtype=cupy.float32)
        cupy.ElementwiseKernel('S x, T w', 'raw U bin',
                               'atomicAdd(&bin[x], w)',
                               'bincount_with_weight_kernel')(x, weights, b)
        b = b.astype(cupy.float64)

    return b
예제 #6
0
def bincount(X, B, weights=None):
    if weights is None:
        b = cp.zeros((B, ), dtype=cp.int32)
        startin = time.time()
        cp.ElementwiseKernel('S x', 'raw U bin', 'atomicAdd(&bin[x], 1)',
                             'bincount_kernel')(X, b)
        b = b.astype(np.intp)
    else:
        b = cp.zeros((B, ), dtype=cp.float32)
        cp.ElementwiseKernel('S x, T w', 'raw U bin', 'atomicAdd(&bin[x], w)',
                             'bincount_with_weight_kernel')(X, weights, b)
        b = b.astype(cp.float64)

    return b
예제 #7
0
def col2im_gpu(col, sy, sx, ph, pw, h, w, dy=1, dx=1):
    n, c, kh, kw, out_h, out_w = col.shape
    img = cp.empty((n, c, h, w), dtype=col.dtype)
    cp.ElementwiseKernel(
        'raw T col, int32 h, int32 w, int32 out_h, int32 out_w,'
        'int32 kh, int32 kw, int32 sy, int32 sx, int32 ph, int32 pw,'
        'int32 dx, int32 dy', 'T img', '''
           int c0 = i / (h * w);
           int y  = i / w % h;
           int x  = i % w;
           T val = 0;
           for (int ky = 0; ky < kh; ++ky) {
             int out_y = (y + ph - ky * dy);
             if (0 > out_y || out_y >= out_h * sy) continue;
             if (out_y % sy != 0) continue;
             out_y /= sy;
             for (int kx = 0; kx < kw; ++kx) {
               int out_x = (x + pw - kx * dx);
               if (0 > out_x || out_x >= out_w * sx) continue;
               if (out_x % sx != 0) continue;
               out_x /= sx;
               int k = out_y + out_h * (kx + kw * (ky + kh * c0));
               val = val + col[out_x + out_w * k];
             }
           }
           img = val;
        ''', 'col2im')(col.reduced_view(), h, w, out_h, out_w, kh, kw, sy, sx,
                       ph, pw, dx, dy, img)
    return img
예제 #8
0
    def forward(ctx, data, masks, default_value):
        # PyTorch to CuPy
        device = data.device
        data_in = cp.asarray(data)
        masks = cp.asarray(masks)
        data_out = data_in.copy()
        dim = data_in.size / masks.size

        # distribute
        masks = cp.ascontiguousarray(masks)
        data_out = cp.ascontiguousarray(data_out)
        kernel = cp.ElementwiseKernel(
            'raw S data_out, int64 mask',
            '',
            string.Template('''
                if (mask == 0) {
                    ${dtype}* p = (${dtype}*)&data_out[i * ${dim}];
                    for (int j = 0; j < ${dim}; j++) *p++ = ${default_value};
                }
            ''').substitute(
                dim=dim,
                dtype=utils.get_dtype_in_cuda(data_out.dtype),
                default_value=default_value,
            ),
            'function',
        )
        kernel(data_out, masks)

        # CuPy to PyTorch
        data_out = torch.as_tensor(data_out, device=device)

        return data_out
예제 #9
0
def euclid_filter_cupy(image, distance, grad=True):
    cp.cuda.set_allocator(cp.cuda.MemoryPool().malloc)

    img = cp.asarray(image).astype(cp.float32)
    new_img = cp.zeros_like(img)
    height, width = img.shape
    diameter = distance * 2 + 1
    eucfilter = cp.zeros((diameter, diameter), dtype=cp.float32)
    for i in range(diameter):
        for j in range(diameter):
            euclid = euclidean((i, j), (distance, distance))
            if euclid <= distance:
                if grad:
                    if euclid == 0:
                        eucfilter[i, j] = 1
                    else:
                        eucfilter[i, j] = 1 / euclid
                else:
                    eucfilter[i, j] = 1

    get_smooth_image = cp.ElementwiseKernel(
        in_params='raw float32 img, raw float32 eucfilter, uint16 height, uint16 width, uint16 distance, uint16 diameter',
        out_params='float32 output',
        preamble=\
        '''
        __device__ int get_x_idx(int i, int width) {
            return i % width;
        }
        __device__ int get_y_idx(int i, int height) {
            return i / height;
        }
        ''',
        operation=\
        '''
        int x = get_x_idx(i, width);
        int y = get_y_idx(i, height);
        float minimum = 0.00001;
        float sum = 0;
        float length = 0;
        if ( ((x >= distance) && (x < width - distance)) && ((y >= distance) && (y < height - distance)) && (img[i] > minimum) ) {
            for (int k=0; k<diameter; k++) {
                for (int l=0; l<diameter; l++) {
                    float pixel_img = img[i + (k-distance)*height + l - distance];
                    float pixel_filter = eucfilter[k*diameter + l];
                    if (pixel_img > minimum) {
                        sum += pixel_img * pixel_filter;
                        length += pixel_filter;
                    }
                }
            }
            output = sum / length;
        } else {
            output = 0;
        }
        ''',
        name='get_smooth_image'
    )
    get_smooth_image(img, eucfilter, height, width, distance, diameter,
                     new_img)
    return cp.asnumpy(new_img)
예제 #10
0
 def compile_kernel(self, lower, upper):
     return cp.ElementwiseKernel(
         "uint8 r, uint8 g, uint8 b",
         "uint8 out",
         f"if(r >= {lower[0]} && r <= {upper[0]} && g >= {lower[1]} && g <= {upper[1]} && b >= {lower[2]} && b <= {upper[2]}) {{ out = 255; }} else {{ out = 0; }}",
         "threshold",
     )
예제 #11
0
def binKernel( num_bins ):
    binK = cp.ElementwiseKernel(
            'float64 x, raw float64 bins',
            'float64 z',
            '''
                min_index = -9;
                for( j=0; j < num_bins - 1; ++j )
                {
                    if( bins[j] < x )
                    {
                        min_index = j;    
                    }
                }
                if( bins[num_bins -1] < x)
                {
                    min_index = -9;        
                }
                z = min_index;
            ''',
            'distance_arg',
            loop_prep='''
                         int j = 0;
                         int min_index = -9;
                         int num_bins = {};
                         '''.format(num_bins)
            )
    return binK
예제 #12
0
def _get_zoom_kernel(ndim,
                     large_int,
                     yshape,
                     mode,
                     cval=0.0,
                     order=1,
                     integer_output=False,
                     grid_mode=False,
                     nprepad=0):
    in_params = 'raw X x, raw W zoom'
    out_params = 'Y y'
    operation, name = _generate_interp_custom(
        coord_func=_get_coord_zoom_grid if grid_mode else _get_coord_zoom,
        ndim=ndim,
        large_int=large_int,
        yshape=yshape,
        mode=mode,
        cval=cval,
        order=order,
        name="zoom_grid" if grid_mode else "zoom",
        integer_output=integer_output,
        nprepad=nprepad,
    )
    return cupy.ElementwiseKernel(in_params,
                                  out_params,
                                  operation,
                                  name,
                                  preamble=math_constants_preamble)
예제 #13
0
def _kernel_init():
    return cupy.ElementwiseKernel(
        "X x",
        "Y y",
        "if (x == 0) { y = -1; } else { y = i; }",
        "cucim_nd_label_init",
    )
예제 #14
0
파일: filters.py 프로젝트: svlandeg/cupy
def _get_correlete_kernel(ndim, mode, cval, xshape, wshape, origin):
    # weights is always casted to float64 in order to get an output compatible
    # with SciPy, thought float32 might be sufficient when input dtype is low
    # precision.
    in_params, out_params, operation, name = _generate_correlete_kernel(
        ndim, mode, cval, xshape, wshape, origin)
    return cupy.ElementwiseKernel(in_params, out_params, operation, name)
예제 #15
0
    def test_manual_indexing(self, n=100):
        in1 = cupy.random.uniform(-1, 1, n).astype(cupy.float32)
        in2 = cupy.random.uniform(-1, 1, n).astype(cupy.float32)
        uesr_kernel_1 = cupy.ElementwiseKernel(
            'T x, T y', 'T z', '''
                z = x + y;
            ''', 'uesr_kernel_1')
        out1 = uesr_kernel_1(in1, in2)

        uesr_kernel_2 = cupy.ElementwiseKernel(
            'raw T x, raw T y', 'raw T z', '''
                z[i] = x[i] + y[i];
            ''', 'uesr_kernel_2')
        out2 = uesr_kernel_2(in1, in2, size=n)

        testing.assert_array_equal(out1, out2)
예제 #16
0
파일: matrix.py 프로젝트: yutiansut/cupy
def tri(N, M=None, k=0, dtype=float):
    """Creates an array with ones at and below the given diagonal.

    Args:
        N (int): Number of rows.
        M (int): Number of columns. M == N by default.
        k (int): The sub-diagonal at and below which the array is filled. Zero
            is the main diagonal, a positive value is above it, and a negative
            value is below.
        dtype: Data type specifier.

    Returns:
        cupy.ndarray: An array with ones at and below the given diagonal.

    .. seealso:: :func:`numpy.tri`

    """
    if M is None:
        M = N
    out = cupy.empty((N, M), dtype=dtype)

    return cupy.ElementwiseKernel(
        'int32 m, int32 k',
        'T out',
        '''
        int row = i % m;
        int col = i / m;
        out = (row <= col + k);
        ''',
        'tri',
    )(M, k, out)
예제 #17
0
 def test_strides(self):
     x = cupy.arange(6).reshape((2, 3)).astype('i')
     y = cupy.ElementwiseKernel(
         'raw int32 x', 'int32 y', 'y = x.strides()[i]',
         'test_carray_strides',
     )(x, size=2)
     testing.assert_array_equal(y, (12, 4))
예제 #18
0
 def test_scalar(self, xp, dtype):
     x = testing.shaped_arange((2, 3, 4), xp, dtype)
     if xp is numpy:
         return x + numpy.dtype(dtype).type(self.value)
     else:
         kernel = cupy.ElementwiseKernel('T x, T y', 'T z', 'z = x + y')
         return kernel(x, self.value)
예제 #19
0
def _get_affine_kernel(
    ndim,
    large_int,
    yshape,
    mode,
    cval=0.0,
    order=1,
    integer_output=False,
    nprepad=0,
):
    in_params = "raw X x, raw W mat"
    out_params = "Y y"
    operation, name = _generate_interp_custom(
        in_params=in_params,
        coord_func=_get_coord_affine,
        ndim=ndim,
        large_int=large_int,
        yshape=yshape,
        mode=mode,
        cval=cval,
        order=order,
        name="affine",
        integer_output=integer_output,
        nprepad=nprepad,
    )
    return cupy.ElementwiseKernel(in_params,
                                  out_params,
                                  operation,
                                  name,
                                  preamble=math_constants_preamble)
예제 #20
0
def _get_map_kernel(
    ndim,
    large_int,
    yshape,
    mode,
    cval=0.0,
    order=1,
    integer_output=False,
    nprepad=0,
):
    in_params = "raw X x, raw W coords"
    out_params = "Y y"
    operation, name = _generate_interp_custom(
        in_params=in_params,
        coord_func=_get_coord_map,
        ndim=ndim,
        large_int=large_int,
        yshape=yshape,
        mode=mode,
        cval=cval,
        order=order,
        name="map_coordinates",
        integer_output=integer_output,
        nprepad=nprepad,
    )
    return cupy.ElementwiseKernel(in_params, out_params, operation, name)
예제 #21
0
def _kernel_labels():
    return cupy.ElementwiseKernel(
        '', 'raw Y y, raw int32 count, raw int32 labels', '''
        if (y[i] != i) continue;
        int j = atomicAdd(&count[1], 1);
        labels[j] = i;
        ''', 'cupyx_nd_label_labels')
예제 #22
0
파일: csr.py 프로젝트: toslunar/cupy
def cupy_multiply_by_dense():
    return cupy.ElementwiseKernel('''
        raw S SP_DATA, raw I SP_INDPTR, raw I SP_INDICES,
        int32 SP_M, int32 SP_N,
        raw D DN_DATA, int32 DN_M, int32 DN_N,
        raw I OUT_INDPTR, int32 OUT_M, int32 OUT_N
        ''',
                                  'O OUT_DATA, I OUT_INDICES',
                                  '''
        int i_out = i;
        int m_out = get_row_id(i_out, 0, OUT_M - 1, &(OUT_INDPTR[0]));
        int i_sp = i_out;
        if (OUT_M > SP_M && SP_M == 1) {
            i_sp -= OUT_INDPTR[m_out];
        }
        if (OUT_N > SP_N && SP_N == 1) {
            i_sp /= OUT_N;
        }
        int n_out = SP_INDICES[i_sp];
        if (OUT_N > SP_N && SP_N == 1) {
            n_out = i_out - OUT_INDPTR[m_out];
        }
        int m_dn = m_out;
        if (OUT_M > DN_M && DN_M == 1) {
            m_dn = 0;
        }
        int n_dn = n_out;
        if (OUT_N > DN_N && DN_N == 1) {
            n_dn = 0;
        }
        OUT_DATA = (O)(SP_DATA[i_sp] * DN_DATA[n_dn + (DN_N * m_dn)]);
        OUT_INDICES = n_out;
        ''',
                                  'cupyx_scipy_sparse_csr_multiply_by_dense',
                                  preamble=_GET_ROW_ID_)
예제 #23
0
def _get_affine_kernel(ndim,
                       large_int,
                       yshape,
                       mode,
                       cval=0.0,
                       order=1,
                       integer_output=False):
    in_params = 'raw X x, raw W mat'
    out_params = 'Y y'
    operation, name = _generate_interp_custom(
        coord_func=_get_coord_affine,
        ndim=ndim,
        large_int=large_int,
        yshape=yshape,
        mode=mode,
        cval=cval,
        order=order,
        name='affine',
        integer_output=integer_output,
    )
    return cupy.ElementwiseKernel(in_params,
                                  out_params,
                                  operation,
                                  name,
                                  preamble=math_constants_preamble)
예제 #24
0
def _get_map_kernel(ndim,
                    large_int,
                    yshape,
                    mode,
                    cval=0.0,
                    order=1,
                    integer_output=False,
                    nprepad=0):
    in_params = 'raw X x, raw W coords'
    out_params = 'Y y'
    operation, name = _generate_interp_custom(
        coord_func=_get_coord_map,
        ndim=ndim,
        large_int=large_int,
        yshape=yshape,
        mode=mode,
        cval=cval,
        order=order,
        name='shift',
        integer_output=integer_output,
        nprepad=nprepad,
        omit_in_coord=True,  # input image coordinates are not needed
    )
    return cupy.ElementwiseKernel(in_params,
                                  out_params,
                                  operation,
                                  name,
                                  preamble=math_constants_preamble)
예제 #25
0
파일: csr.py 프로젝트: toslunar/cupy
def cupy_binopt_csr_step2(op_name):
    name = 'cupyx_scipy_sparse_csr_binopt' + op_name + 'step2'
    return cupy.ElementwiseKernel(
        '''
        raw I A_INFO, raw B A_VALID, raw I A_TMP_INDICES, raw O A_TMP_DATA,
        int32 A_NNZ,
        raw I B_INFO, raw B B_VALID, raw I B_TMP_INDICES, raw O B_TMP_DATA,
        int32 B_NNZ
        ''',
        'raw I C_INDICES, raw O C_DATA',
        '''
        if (i < A_NNZ) {
            int j = i;
            if (A_VALID[j]) {
                C_INDICES[A_INFO[j]] = A_TMP_INDICES[j];
                C_DATA[A_INFO[j]]    = A_TMP_DATA[j];
            }
        } else if (i < A_NNZ + B_NNZ) {
            int j = i - A_NNZ;
            if (B_VALID[j]) {
                C_INDICES[B_INFO[j]] = B_TMP_INDICES[j];
                C_DATA[B_INFO[j]]    = B_TMP_DATA[j];
            }
        }
        ''',
        name,
    )
예제 #26
0
 def test_invalid_shape(self):
     with six.assertRaisesRegex(self, ValueError,
                                'Out shape is mismatched'):
         f = cupy.ElementwiseKernel('T x', 'T y', 'y += x')
         x = cupy.arange(12).reshape(3, 4)
         y = cupy.arange(4)
         f(x, y)
예제 #27
0
def gumbel(loc=0.0, scale=1.0, size=None, dtype=float):
    """Returns an array of samples drawn from a Gumbel distribution.

    The samples are drawn from a Gumbel distribution with location ``loc``
    and scale ``scale``.
    Its probability density function is defined as

    .. math::
       f(x) = \\frac{1}{\\eta} \
           \\exp\\left\\{ - \\frac{x - \\mu}{\\eta} \\right\\} \
           \\exp\\left[-\\exp\\left\\{-\\frac{x - \\mu}{\\eta} \
           \\right\\}\\right],

    where :math:`\\mu` is ``loc`` and :math:`\\eta` is ``scale``.

    Args:
        loc (float): The location of the mode :math:`\\mu`.
        scale (float): The scale parameter :math:`\\eta`.
        size (int or tuple of ints): The shape of the array. If ``None``, a
            zero-dimensional array is generated.
        dtype: Data type specifier. Only :class:`numpy.float32` and
            :class:`numpy.float64` types are allowed.

    Returns:
        cupy.ndarray: Samples drawn from the Gumbel destribution.

    .. seealso:: :func:`numpy.random.gumbel`
    """
    rs = uniform(size=size, dtype=dtype)
    # We use `1 - x` as input of `log` method to prevent overflow.
    # It obeys numpy implementation.
    return cupy.ElementwiseKernel('T x, T loc, T scale', 'T y',
                                  'y = loc - log(-log(1 - x)) * scale',
                                  'gumbel_kernel')(rs, loc, scale, rs)
    return rs
예제 #28
0
    def get_label_lengths(self, labels):
        if self.xp == numpy:
            label_lengths = self.xp.zeros(len(labels))

            for i in range(len(labels)):
                for j in range(len(labels[i])):
                    if labels.data[i][j] == self.blank_symbol:
                        label_lengths[i] = j
                        break
        else:
            import cupy
            label_length_kernel = cupy.ElementwiseKernel(
                'raw T labels, int32 blank_symbol, int32 num_labels',
                'T length', '''
                    for (int j = 0; j < num_labels; ++j) {
                        T label_value = labels[i * num_labels + j];
                        if (label_value == blank_symbol) {
                            length = j;
                            break;
                        }
                    }
                ''', 'get_label_lengths')
            label_lengths = label_length_kernel(labels.data,
                                                self.blank_symbol,
                                                labels.shape[1],
                                                size=len(labels))
        return label_lengths
예제 #29
0
def interpolate_bilinear_gpu(x, v, u, vw, uw):
    B, H, W = x.shape
    out_H, out_W = v.shape
    y = cp.empty((B, out_H, out_W), dtype=x.dtype)

    cp.ElementwiseKernel(
        'raw T x, S v, S u, T vw, T uw, S H, S W, S outsize', 'T y', '''
        // indices
        S v0 = v;
        S v1 = min(v + 1, (S)(H - 1));
        S u0 = u;
        S u1 = min(u + 1, (S)(W - 1));
        // weights
        T w0 = (1 - vw) * (1 - uw);
        T w1 = (1 - vw) * uw;
        T w2 = vw * (1 - uw);
        T w3 = vw * uw;
        // fetch
        S offset = i / outsize * H * W;
        T px0 = x[offset + v0 * W + u0];
        T px1 = x[offset + v0 * W + u1];
        T px2 = x[offset + v1 * W + u0];
        T px3 = x[offset + v1 * W + u1];
        // interpolate
        y = (w0 * px0 + w1 * px1) + (w2 * px2 + w3 * px3);
        ''', 'resize_images_interpolate_bilinear')(x, v, u, vw, uw, H, W,
                                                   out_H * out_W, y)
    return y
예제 #30
0
 def test_getitem_int(self):
     x = cupy.arange(24).reshape((2, 3, 4)).astype('i')
     y = cupy.empty_like(x)
     y = cupy.ElementwiseKernel(
         'raw T x', 'int32 y', 'y = x[i]', 'test_carray_getitem_int',
     )(x, y)
     testing.assert_array_equal(y, x)