Exemple #1
0
    def backward_gpu(self, x, gy):
        out_c, out_h, out_w = gy[0].shape[1:]
        n, c, h, w = x[0].shape

        if cuda.cudnn_enabled and self.use_cudnn:
            handle = cudnn.get_handle()
            x_desc = cudnn.create_tensor_descriptor(x[0])
            gy_arr = gy[0]
            if not gy_arr.flags.c_contiguous:
                gy_arr = cuda.cupy.ascontiguousarray(gy_arr)
            gy_desc = cudnn.create_tensor_descriptor(gy_arr)
            one = ctypes.c_float(1)
            zero = ctypes.c_float(0)
            if self.b is not None:
                libcudnn.convolutionBackwardBias(
                    handle, one, gy_desc.value, gy_arr.data.ptr,
                    one, self.bias_desc.value, self.gb.data.ptr)

            libcudnn.convolutionBackwardFilter(
                handle, one, x_desc.value, x[0].data.ptr,
                gy_desc.value, gy_arr.data.ptr, self.conv_desc.value,
                one, self.filter_desc.value, self.gW.data.ptr)

            gx = cuda.empty_like(x[0])
            libcudnn.convolutionBackwardData(
                handle, one, self.filter_desc.value, self.W.data.ptr,
                gy_desc.value, gy_arr.data.ptr, self.conv_desc.value,
                zero, x_desc.value, gx.data.ptr)
        else:
            handle = cuda.get_cublas_handle()
            if self.gb is not None:
                self.gb += gy[0].sum(axis=(0, 2, 3))

            # TODO(beam2d): Use streams
            gW_mat = self.gW.reshape(out_c, c * self.kh * self.kw)
            col_mats = self.col.reshape(
                n, c * self.kh * self.kw, out_h * out_w)
            gy_mats = gy[0].reshape(n, out_c, out_h * out_w)
            for i in moves.range(n):
                cuda.culinalg.add_dot(
                    gy_mats[i], col_mats[i], gW_mat, transb='T', handle=handle)

            W_mat = self.W.reshape(out_c, c * self.kh * self.kw)
            gcol = cuda.empty_like(self.col)
            gcol_mats = gcol.reshape(n, c * self.kh * self.kw, out_h * out_w)
            for i in moves.range(n):
                cuda.culinalg.dot(W_mat, gy_mats[i], transa='T', handle=handle,
                                  out=gcol_mats[i])

            gx = conv.col2im_gpu(
                gcol, self.sy, self.sx, self.ph, self.pw, h, w)

        return gx,
    def backward_gpu(self, x, gy):
        out_c, out_h, out_w = gy[0].shape[1:]
        n, c, h, w = x[0].shape

        if cudnn.enabled and self.use_cudnn:
            handle = cudnn.get_default_handle()
            x_desc = cudnn.get_tensor_desc(x[0], h, w)
            gy_desc = cudnn.get_tensor_desc(gy[0], out_h, out_w)
            if self.b is not None:
                libcudnn.cudnnConvolutionBackwardBias(
                    handle, 1, gy_desc.value, cudnn.get_ptr(gy[0]),
                    1, self.bias_desc.value, cudnn.get_ptr(self.gb))

            libcudnn.cudnnConvolutionBackwardFilter(
                handle, 1, x_desc.value, cudnn.get_ptr(x[0]),
                gy_desc.value, cudnn.get_ptr(gy[0]), self.conv_desc.value,
                1, self.filter_desc.value, cudnn.get_ptr(self.gW))

            gx = cuda.empty_like(x[0])
            libcudnn.cudnnConvolutionBackwardData(
                handle, 1, self.filter_desc.value, cudnn.get_ptr(self.W),
                gy_desc.value, cudnn.get_ptr(gy[0]), self.conv_desc.value,
                0, x_desc.value, cudnn.get_ptr(gx))
        else:
            handle = cuda.get_cublas_handle()
            if self.gb is not None:
                # TODO(beam2d): Unify kernels
                with cuda.using_cumisc(handle):
                    tmp = cuda.cumisc.sum(
                        gy[0].reshape(n * out_c, out_h * out_w), axis=1)
                    tmp = cuda.cumisc.sum(tmp.reshape(n, out_c), axis=0)
                    self.gb += tmp

            # TODO(beam2d): Use streams
            gW_mat = self.gW.reshape(out_c, c * self.kh * self.kw)
            col_mats = self.col.reshape(
                n, c * self.kh * self.kw, out_h * out_w)
            gy_mats = gy[0].reshape(n, out_c, out_h * out_w)
            for i in moves.range(n):
                cuda.culinalg.add_dot(
                    gy_mats[i], col_mats[i], gW_mat, transb='T', handle=handle)

            W_mat = self.W.reshape(out_c, c * self.kh * self.kw)
            gcol = cuda.empty_like(self.col)
            gcol_mats = gcol.reshape(n, c * self.kh * self.kw, out_h * out_w)
            for i in moves.range(n):
                cuda.culinalg.dot(W_mat, gy_mats[i], transa='T', handle=handle,
                                  out=gcol_mats[i])

            gx = conv.col2im_gpu(
                gcol, self.sy, self.sx, self.ph, self.pw, h, w)

        return gx,
def _partial_reduce(x):
    global _one
    out_axis, sum_axis = x.shape
    one = _one
    if one is None or one.size < sum_axis:
        one = cuda.ones(sum_axis)
        _one = one
    one = one[:sum_axis]
    handle = cuda.get_cublas_handle()
    ret = cuda.empty(out_axis)
    cuda.cublas.cublasSgemv(handle, 't', sum_axis, out_axis,
                            numpy.float32(1.0), x.gpudata, sum_axis, one.gpudata,
                            1, numpy.float32(0.0), ret.gpudata, 1)
    return ret
Exemple #4
0
def _partial_reduce(x):
    global _one
    out_axis, sum_axis = x.shape
    one = _one
    if one is None or one.size < sum_axis:
        one = cuda.ones(sum_axis)
        _one = one
    one = one[:sum_axis]
    handle = cuda.get_cublas_handle()
    ret = cuda.empty(out_axis)
    cuda.cublas.cublasSgemv(handle, 't', sum_axis, out_axis,
                            numpy.float32(1.0), x.gpudata, sum_axis,
                            one.gpudata, 1, numpy.float32(0.0), ret.gpudata, 1)
    return ret
    def forward_gpu(self, x):
        n, out_c, out_h, out_w = x[0].shape
        c = self.W.shape[1]
        h = get_deconv_outsize(out_h, self.kh, self.sy, self.ph)
        w = get_deconv_outsize(out_w, self.kw, self.sx, self.pw)
        if cudnn.enabled and self.use_cudnn:
            handle = cudnn.get_default_handle()
            x_desc = cudnn.get_tensor_desc(x[0], out_h, out_w)
            y = cuda.empty((n, c, h, w), dtype=numpy.float32)
            y_desc = cudnn.get_tensor_desc(y, h, w)

            self.filter_desc = cudnn.get_filter4d_desc(self.W)
            self.conv_desc = cudnn.get_conv2d_desc(
                (self.ph, self.pw), (self.sy, self.sx))
            if self.b is not None:
                self.bias_desc = cudnn.get_conv_bias_desc(self.b)

            libcudnn.cudnnConvolutionBackwardData(
                handle, 1, self.filter_desc.value, cudnn.get_ptr(self.W),
                x_desc.value, cudnn.get_ptr(x[0]), self.conv_desc.value,
                0, y_desc.value, cudnn.get_ptr(y))
            if self.b is not None:
                libcudnn.cudnnAddTensor(
                    handle, libcudnn.cudnnAddMode['CUDNN_ADD_SAME_C'],
                    1, self.bias_desc.value, cudnn.get_ptr(self.b),
                    1, y_desc.value, cudnn.get_ptr(y))
        else:
            handle = cuda.get_cublas_handle()
            # TODO(beam2d): Use streams
            W_mat = self.W.reshape(out_c, c * self.kh * self.kw)
            x_mats = x[0].reshape(n, out_c, out_h * out_w)
            gcol = cuda.empty((n, c, self.kh, self.kw, out_h, out_w), dtype=numpy.float32)
            gcol_mats = gcol.reshape(n, c * self.kh * self.kw, out_h * out_w)
            for i in moves.range(n):
                cuda.culinalg.dot(W_mat, x_mats[i], transa='T', handle=handle,
                                  out=gcol_mats[i])
            y = conv.col2im_gpu(
                gcol, self.sy, self.sx, self.ph, self.pw, h, w)
            # TODO(beam2d): Support unshared bias
            if self.b is not None:
                cuda.elementwise(
                    'float* y, const float* b, int c, int hw',
                    'y[i] += b[i / hw % c]',
                    'conv_bias_fwd')(y, self.b, c, h * w)
        return y,
Exemple #6
0
def _batch_matmul_gpu(a, b, out, transa=False, transb=False, transout=False):
    if transout:
        # (A B)^T = B^T A^T
        a, b, transa, transb = b, a, not transb, not transa
    a = _as_batch_mat(a)
    b = _as_batch_mat(b)
    alpha = numpy.float32(1.0)
    beta = numpy.float32(0.0)
    l, m, k = a.shape
    if transa:
        m, k = k, m
    n = b.shape[1] if transb else b.shape[2]
    return cuda.cublas.cublasSgemmBatched(
        cuda.get_cublas_handle(),
        _as_trans_op(transb),
        _as_trans_op(transa),
        n, m, k, alpha,
        _mat_ptrs(b).gpudata, k if transb else n,
        _mat_ptrs(a).gpudata, m if transa else k,
        beta, _mat_ptrs(out).gpudata, n, l)
Exemple #7
0
def _batch_matmul_gpu(a, b, out, transa=False, transb=False, transout=False):
    if transout:
        # (A B)^T = B^T A^T
        a, b, transa, transb = b, a, not transb, not transa
    a = _as_batch_mat(a)
    b = _as_batch_mat(b)
    alpha = numpy.float32(1.0)
    beta = numpy.float32(0.0)
    l, m, k = a.shape
    if transa:
        m, k = k, m
    n = b.shape[1] if transb else b.shape[2]
    return cuda.cublas.cublasSgemmBatched(
        cuda.get_cublas_handle(),
        _as_trans_op(transb),
        _as_trans_op(transa),
        n, m, k, alpha,
        _mat_ptrs(b).gpudata, k if transb else n,
        _mat_ptrs(a).gpudata, m if transa else k,
        beta, _mat_ptrs(out).gpudata, n, l)
Exemple #8
0
    def backward_gpu(self, x, gy):
        out_c, out_h, out_w = gy[0].shape[1:]
        n, c, h, w = x[0].shape

        if cuda.cudnn_enabled and self.use_cudnn:
            handle = cudnn.get_handle()
            x_desc = cudnn.create_tensor_descriptor(x[0])
            gy_arr = gy[0]
            if not gy_arr.flags.c_contiguous:
                gy_arr = cuda.cupy.ascontiguousarray(gy_arr)
            gy_desc = cudnn.create_tensor_descriptor(gy_arr)
            one = ctypes.c_float(1)
            zero = ctypes.c_float(0)
            if self.b is not None:
                libcudnn.convolutionBackwardBias(handle, one, gy_desc.value,
                                                 gy_arr.data.ptr, one,
                                                 self.bias_desc.value,
                                                 self.gb.data.ptr)

            libcudnn.convolutionBackwardFilter(handle, one, x_desc.value,
                                               x[0].data.ptr, gy_desc.value,
                                               gy_arr.data.ptr,
                                               self.conv_desc.value, one,
                                               self.filter_desc.value,
                                               self.gW.data.ptr)

            gx = cuda.empty_like(x[0])
            libcudnn.convolutionBackwardData(handle, one,
                                             self.filter_desc.value,
                                             self.W.data.ptr, gy_desc.value,
                                             gy_arr.data.ptr,
                                             self.conv_desc.value, zero,
                                             x_desc.value, gx.data.ptr)
        else:
            handle = cuda.get_cublas_handle()
            if self.gb is not None:
                self.gb += gy[0].sum(axis=(0, 2, 3))

            # TODO(beam2d): Use streams
            gW_mat = self.gW.reshape(out_c, c * self.kh * self.kw)
            col_mats = self.col.reshape(n, c * self.kh * self.kw,
                                        out_h * out_w)
            gy_mats = gy[0].reshape(n, out_c, out_h * out_w)
            for i in moves.range(n):
                cuda.culinalg.add_dot(gy_mats[i],
                                      col_mats[i],
                                      gW_mat,
                                      transb='T',
                                      handle=handle)

            W_mat = self.W.reshape(out_c, c * self.kh * self.kw)
            gcol = cuda.empty_like(self.col)
            gcol_mats = gcol.reshape(n, c * self.kh * self.kw, out_h * out_w)
            for i in moves.range(n):
                cuda.culinalg.dot(W_mat,
                                  gy_mats[i],
                                  transa='T',
                                  handle=handle,
                                  out=gcol_mats[i])

            gx = conv.col2im_gpu(gcol, self.sy, self.sx, self.ph, self.pw, h,
                                 w)

        return gx,
    def backward_gpu(self, x, gy):
        n, out_c, out_h, out_w = x[0].shape
        c, h, w = gy[0].shape[1:]
        gx = cuda.empty((n, out_c, out_h, out_w), dtype=numpy.float32)
        if cudnn.enabled and self.use_cudnn:
            handle = cudnn.get_default_handle()
            gy_desc = cudnn.get_tensor_desc(gy[0], h, w)
            gx_desc = cudnn.get_tensor_desc(gx, out_h, out_w)

            algo = libcudnn.cudnnGetConvolutionForwardAlgorithm(
                handle, gy_desc.value, self.filter_desc.value,
                self.conv_desc.value, gx_desc.value, _fwd_pref,
                self.max_workspace_size)
            workspace_size = libcudnn.cudnnGetConvolutionForwardWorkspaceSize(
                handle, gy_desc.value, self.filter_desc.value,
                self.conv_desc.value, gx_desc.value, algo).value
            workspace = cuda.empty(
                (max(workspace_size // 4, 1),), dtype=numpy.float32)

            libcudnn.cudnnConvolutionForward(
                handle, 1, gy_desc.value, cudnn.get_ptr(gy[0]),
                self.filter_desc.value, cudnn.get_ptr(self.W),
                self.conv_desc.value, algo, cudnn.get_ptr(
                    workspace), workspace_size,
                0, gx_desc.value, cudnn.get_ptr(gx))
            # bias backward
            if self.b is not None:
                libcudnn.cudnnConvolutionBackwardBias(
                    handle, 1, gy_desc.value, cudnn.get_ptr(gy[0]),
                    1, self.bias_desc.value, cudnn.get_ptr(self.gb))
            # filter backward
            libcudnn.cudnnConvolutionBackwardFilter(
                handle, 1, gy_desc.value, cudnn.get_ptr(gy[0]),
                gx_desc.value, cudnn.get_ptr(x[0]), self.conv_desc.value,
                1, self.filter_desc.value, cudnn.get_ptr(self.gW))
        else:
            # Implementation using im2col
            col = conv.im2col_gpu(
                gy[0], self.kh, self.kw, self.sy, self.sx, self.ph, self.pw)

            # TODO(beam2d): Use streams
            handle = cuda.get_cublas_handle()
            W_mat = self.W.reshape(out_c, c * self.kh * self.kw)
            col_mats = col.reshape(
                n, c * self.kh * self.kw, out_h * out_w)
            gx_mats = gx.reshape(n, out_c, out_h * out_w)
            for i in moves.range(n):
                cuda.culinalg.dot(W_mat, col_mats[i], handle=handle,
                                  out=gx_mats[i])
            # bias backward
            if self.gb is not None:
                # TODO(beam2d): Unify kernels
                with cuda.using_cumisc(handle):
                    tmp = cuda.cumisc.sum(
                        gy[0].reshape(n * c, h * w), axis=1)
                    tmp = cuda.cumisc.sum(tmp.reshape(n, c), axis=0)
                    self.gb += tmp
            # filter backward
            # TODO(beam2d): Use streams
            gW_mat = self.gW.reshape(out_c, c * self.kh * self.kw)
            x_mats = x[0].reshape(n, out_c, out_h * out_w)
            for i in moves.range(n):
                cuda.culinalg.add_dot(
                    x_mats[i], col_mats[i], gW_mat, transb='T', handle=handle)
        return gx,
    def forward_gpu(self, x):
        n, c, h, w = x[0].shape
        out_h = conv.get_conv_outsize(h, self.kh, self.sy, self.ph)
        out_w = conv.get_conv_outsize(w, self.kw, self.sx, self.pw)
        out_c = self.W.shape[0]
        y = cuda.empty((n, out_c, out_h, out_w), dtype=numpy.float32)

        if cudnn.enabled and self.use_cudnn:
            handle = cudnn.get_default_handle()
            x_desc = cudnn.get_tensor_desc(x[0], h, w)
            y_desc = cudnn.get_tensor_desc(y, out_h, out_w)

            self.filter_desc = cudnn.get_filter4d_desc(self.W)
            self.conv_desc = cudnn.get_conv2d_desc(
                (self.ph, self.pw), (self.sy, self.sx))
            if self.b is not None:
                self.bias_desc = cudnn.get_conv_bias_desc(self.b)

            algo = libcudnn.cudnnGetConvolutionForwardAlgorithm(
                handle, x_desc.value, self.filter_desc.value,
                self.conv_desc.value, y_desc.value, _fwd_pref,
                self.max_workspace_size)
            workspace_size = libcudnn.cudnnGetConvolutionForwardWorkspaceSize(
                handle, x_desc.value, self.filter_desc.value,
                self.conv_desc.value, y_desc.value, algo).value
            workspace = cuda.empty(
                (max(workspace_size // 4, 1),), dtype=numpy.float32)

            libcudnn.cudnnConvolutionForward(
                handle, 1, x_desc.value, cudnn.get_ptr(x[0]),
                self.filter_desc.value, cudnn.get_ptr(self.W),
                self.conv_desc.value, algo, cudnn.get_ptr(
                    workspace), workspace_size,
                0, y_desc.value, cudnn.get_ptr(y))

            # TODO(beam2d): Support unshared bias
            if self.b is not None:
                libcudnn.cudnnAddTensor(
                    handle, libcudnn.cudnnAddMode['CUDNN_ADD_SAME_C'],
                    1, self.bias_desc.value, cudnn.get_ptr(self.b),
                    1, y_desc.value, cudnn.get_ptr(y))
        else:
            # Implementation using im2col
            self.col = conv.im2col_gpu(
                x[0], self.kh, self.kw, self.sy, self.sx, self.ph, self.pw)

            # TODO(beam2d): Use streams
            handle = cuda.get_cublas_handle()
            W_mat = self.W.reshape(out_c, c * self.kh * self.kw)
            col_mats = self.col.reshape(
                n, c * self.kh * self.kw, out_h * out_w)
            y_mats = y.reshape(n, out_c, out_h * out_w)
            for i in moves.range(n):
                cuda.culinalg.dot(W_mat, col_mats[i], handle=handle,
                                  out=y_mats[i])

            # TODO(beam2d): Support unshared bias
            if self.b is not None:
                cuda.elementwise(
                    'float* y, const float* b, int c, int hw',
                    'y[i] += b[i / hw % c]',
                    'conv_bias_fwd')(y, self.b, out_c, out_h * out_w)

        return y,
Exemple #11
0
import numpy as np
from chainer import cuda

try:
    handle = cuda.get_cublas_handle()
except:
    pass

def dot(A, B, out, transa='n', transb='n', 
            alpha=np.float32(1.0)):
        """
        This is just the blas-routine Sgemm:
        out = alpha*A.dot(B)
        where default alpha is 1 and default beta is 0
        """
        beta=np.float(0.0)
        if transa == 't':
            l, n = A.shape
        else:
            n, l = A.shape
        if transb == 't':
            m, k = B.shape
        else:
            k, m = B.shape
        if l != k:
            raise ValueError('objects are not aligned')
        if out.shape != (n, m) or out.dtype != A.dtype:
            raise ValueError('invalid value for c_gpu')
        cuda.cublas.cublasSgemm(handle, transb, transa, m, n, k, alpha, B.gpudata,
                np.int32(B.shape[1]), A.gpudata, np.int32(A.shape[1]), beta, 
                out.gpudata, np.int32(out.shape[1]))