def backward_gpu(self, x, gy): if cudnn.enabled and self.use_cudnn: handle = cudnn.get_default_handle() gx = cuda.empty_like(x[0]) desc = cudnn.get_tensor_desc(x[0], 1, 1) libcudnn.cudnnSoftmaxBackward( handle, _algorithm, _mode, 1, desc.value, cudnn.get_ptr( self.y), desc.value, cudnn.get_ptr(gy[0]), 0, desc.value, cudnn.get_ptr(gx)) else: gx = self.y * gy[0] c = gx.shape[1] sum_ydy = cuda.empty((gx.shape[0],), dtype=numpy.float32) cuda.elementwise( 'float* sum_ydy, const float* ydy, int c', ''' const float* row = ydy + i * c; float sum = 0; for (int j = 0; j < c; ++j) { sum += row[j]; } sum_ydy[i] = sum; ''', 'softmax_bwd_sum_ydy')(sum_ydy, gx, c) cuda.elementwise( 'float* gx, const float* y, const float* sum_ydy, int c', 'gx[i] -= y[i] * sum_ydy[i / c]', 'softmax_bwd_diff')(gx, self.y, sum_ydy, c) return gx,
def backward_gpu(self, x, gy): if cudnn.enabled and self.use_cudnn: handle = cudnn.get_default_handle() gx = cuda.empty_like(x[0]) desc = cudnn.get_tensor_desc(x[0], 1, 1) libcudnn.cudnnSoftmaxBackward(handle, _algorithm, _mode, 1, desc.value, cudnn.get_ptr(self.y), desc.value, cudnn.get_ptr(gy[0]), 0, desc.value, cudnn.get_ptr(gx)) else: gx = self.y * gy[0] c = gx.shape[1] sum_ydy = cuda.empty((gx.shape[0], ), dtype=numpy.float32) cuda.elementwise( 'float* sum_ydy, const float* ydy, int c', ''' const float* row = ydy + i * c; float sum = 0; for (int j = 0; j < c; ++j) { sum += row[j]; } sum_ydy[i] = sum; ''', 'softmax_bwd_sum_ydy')(sum_ydy, gx, c) cuda.elementwise( 'float* gx, const float* y, const float* sum_ydy, int c', 'gx[i] -= y[i] * sum_ydy[i / c]', 'softmax_bwd_diff')(gx, self.y, sum_ydy, c) return gx,
def backward_gpu(self, x, gy): n_unit = int(numpy.prod(x[0].shape[2:])) if cudnn.enabled and self.use_cudnn: handle = cudnn.get_default_handle() gx = cuda.empty_like(x[0]) desc = cudnn.get_tensor_desc(x[0], n_unit, 1) libcudnn.cudnnSoftmaxBackward( handle, _algorithm, _mode, 1, desc.value, cudnn.get_ptr( self.y), desc.value, cudnn.get_ptr(gy[0]), 0, desc.value, cudnn.get_ptr(gx)) else: gx = self.y * gy[0] c = gx.shape[1] sum_ydy_shape = (gx.shape[0],) + gx.shape[2:] sum_ydy = cuda.empty(sum_ydy_shape, dtype=numpy.float32) cuda.elementwise( 'float* sum_ydy, const float* ydy, int n_channel, int n_unit', ''' const int n = i / n_unit; const int m = i % n_unit; const float* row = ydy + n * n_channel * n_unit + m; float sum = 0; for (int c = 0; c < n_channel; ++c) { sum += row[c * n_unit]; } sum_ydy[i] = sum; ''', 'softmax_bwd_sum_ydy')(sum_ydy, gx, c, n_unit) cuda.elementwise( ''' float* gx, const float* y, const float* sum_ydy, int n_channel, int n_unit ''', ''' const int n = i / (n_channel * n_unit); const int m = i % n_unit; gx[i] -= y[i] * sum_ydy[n * n_unit + m]; ''', 'softmax_bwd_diff')(gx, self.y, sum_ydy, c, n_unit) return gx,