Exemple #1
0
    def backward_gpu(self, x, gy):
        if cudnn.enabled and self.use_cudnn:
            handle = cudnn.get_default_handle()
            gx = cuda.empty_like(x[0])
            desc = cudnn.get_tensor_desc(x[0], 1, 1)
            libcudnn.cudnnSoftmaxBackward(
                handle, _algorithm, _mode, 1, desc.value, cudnn.get_ptr(
                    self.y),
                desc.value, cudnn.get_ptr(gy[0]), 0, desc.value,
                cudnn.get_ptr(gx))
        else:
            gx = self.y * gy[0]
            c = gx.shape[1]
            sum_ydy = cuda.empty((gx.shape[0],), dtype=numpy.float32)
            cuda.elementwise(
                'float* sum_ydy, const float* ydy, int c',
                '''
                   const float* row = ydy + i * c;
                   float sum = 0;
                   for (int j = 0; j < c; ++j) {
                     sum += row[j];
                   }
                   sum_ydy[i] = sum;
                ''', 'softmax_bwd_sum_ydy')(sum_ydy, gx, c)
            cuda.elementwise(
                'float* gx, const float* y, const float* sum_ydy, int c',
                'gx[i] -= y[i] * sum_ydy[i / c]',
                'softmax_bwd_diff')(gx, self.y, sum_ydy, c)

        return gx,
Exemple #2
0
    def backward_gpu(self, x, gy):
        n_unit = int(numpy.prod(x[0].shape[2:]))
        if cudnn.enabled and self.use_cudnn:
            handle = cudnn.get_default_handle()
            gx = cuda.empty_like(x[0])
            desc = cudnn.get_tensor_desc(x[0], n_unit, 1)
            libcudnn.cudnnSoftmaxBackward(
                handle, _algorithm, _mode, 1, desc.value, cudnn.get_ptr(
                    self.y),
                desc.value, cudnn.get_ptr(gy[0]), 0, desc.value,
                cudnn.get_ptr(gx))
        else:
            gx = self.y * gy[0]
            c = gx.shape[1]
            sum_ydy_shape = (gx.shape[0],) + gx.shape[2:]
            sum_ydy = cuda.empty(sum_ydy_shape, dtype=numpy.float32)
            cuda.elementwise(
                'float* sum_ydy, const float* ydy, int n_channel, int n_unit',
                '''
                   const int n = i / n_unit;
                   const int m = i % n_unit;
                   const float* row = ydy + n * n_channel * n_unit + m;
                   float sum = 0;
                   for (int c = 0; c < n_channel; ++c) {
                     sum += row[c * n_unit];
                   }
                   sum_ydy[i] = sum;
                ''', 'softmax_bwd_sum_ydy')(sum_ydy, gx, c, n_unit)
            cuda.elementwise(
                '''
                   float* gx, const float* y, const float* sum_ydy,
                   int n_channel, int n_unit
                ''',
                '''
                   const int n = i / (n_channel * n_unit);
                   const int m = i % n_unit;
                   gx[i] -= y[i] * sum_ydy[n * n_unit + m];
                ''',
                'softmax_bwd_diff')(gx, self.y, sum_ydy, c, n_unit)

        return gx,