コード例 #1
0
 def forward_gpu(self, inputs):
     x, targets = inputs
     N = x.shape[0]
     
     
     #Linear function
     z = cuda.empty((N,self.no_labels), dtype=np.float32)
     cuk.dot(x, self.W, out=z, transb='t')
     if not self.nobias:
         cuk.addVec2Mat(z, self.b)
     
     self.probs = z
     if cudnn.enabled and self.use_cudnn:
         handle = cudnn.get_default_handle()
         desc = cudnn.get_tensor_desc(z, 1, 1)
         libcudnn.cudnnSoftmaxForward(
             handle, _algorithm, _mode, 1, desc.value, cudnn.get_ptr(z),
             0, desc.value, cudnn.get_ptr(self.probs))
     else:
         cuk.softmax(z, self.probs)
     
     if self.return_probs:
         return self.probs,
         
     if self.compute_loss:
         correct_probs = cuda.empty((N,),dtype=np.float32)
         cuk.getByIndex_LogAndClip(
                                     self.probs, targets,
                                      out=correct_probs)
         loss = -cuda.cumisc.sum(correct_probs, keepdims=True)/N
     else:
         loss = np.atleast_2d(np.array(np.nan,dtype=np.float32))
     
     return loss,
コード例 #2
0
    def forward_gpu(self, x):
        y = cuda.empty_like(x[0])
        if cudnn.enabled and self.use_cudnn:
            handle = cudnn.get_default_handle()
            desc = cudnn.get_tensor_desc(x[0], 1, 1)
            libcudnn.cudnnSoftmaxForward(
                handle, _algorithm, _mode, 1, desc.value, cudnn.get_ptr(x[0]),
                0, desc.value, cudnn.get_ptr(y))
            self.y = y
        else:
            maxes = cuda.empty((x[0].shape[0],), dtype=numpy.float32)
            c = x[0].shape[1]
            cuda.elementwise(
                'float* maxes, const float* x, int c',
                '''
                   const float* row = x + i * c;
                   float maxval = row[0];
                   for (int j = 1; j < c; ++j) {
                     if (maxval < row[j]) {
                       maxval = row[j];
                     }
                   }
                   maxes[i] = maxval;
                ''', 'softmax_rowmax')(maxes, x[0], c)
            cuda.elementwise(
                'float* y, const float* x, const float* maxes, int c',
                'y[i] = __expf(x[i] - maxes[i / c])',
                'softmax_exp')(y, x[0], maxes, c)
            coeff = maxes  # reuse memory
            cuda.elementwise(
                'float* coeff, const float* y, int c',
                '''
                   const float* row = y + i * c;
                   float sum = 0;
                   for (int j = 0; j < c; ++j) {
                     sum += row[j];
                   }
                   coeff[i] = 1 / sum;
                ''', 'softmax_invrowsum')(coeff, y, c)
            cuda.elementwise(
                'float* y, const float* coeff, int c', 'y[i] *= coeff[i / c]',
                'softmax_rowmul')(y, coeff, c)
            self.y = y

        return y,
コード例 #3
0
ファイル: softmax.py プロジェクト: woodshop/complex-chainer
    def forward_gpu(self, x):
        y = cuda.empty_like(x[0])
        n_unit = int(numpy.prod(x[0].shape[2:]))
        if cudnn.enabled and self.use_cudnn:
            handle = cudnn.get_default_handle()
            desc = cudnn.get_tensor_desc(x[0], n_unit, 1)
            libcudnn.cudnnSoftmaxForward(
                handle, _algorithm, _mode, 1, desc.value, cudnn.get_ptr(x[0]),
                0, desc.value, cudnn.get_ptr(y))
            self.y = y
        else:
            maxes_shape = (x[0].shape[0],) + x[0].shape[2:]
            maxes = cuda.empty(maxes_shape, dtype=numpy.float32)
            c = x[0].shape[1]
            cuda.elementwise(
                'float* maxes, const float* x, int n_channel, int n_unit',
                '''
                   const int n = i / n_unit;
                   const int m = i % n_unit;
                   const float* row = x + n * n_channel * n_unit + m;
                   float maxval = row[0];
                   for (int c = 1; c < n_channel; ++c) {
                     const int v = c * n_unit;
                     if (maxval < row[v]) {
                       maxval = row[v];
                     }
                   }
                   maxes[i] = maxval;
                ''', 'softmax_rowmax')(maxes, x[0], c, n_unit)
            cuda.elementwise(
                '''
                   float* y, const float* x, const float* maxes,
                   int n_channel, int n_unit
                ''',
                '''
                   const int n = i / (n_channel * n_unit);
                   const int m = i % n_unit;
                   y[i] = __expf(x[i] - maxes[n * n_unit + m]);
                ''',
                'softmax_exp')(y, x[0], maxes, c, n_unit)
            coeff = maxes  # reuse memory
            cuda.elementwise(
                'float* coeff, const float* y, int n_channel, int n_unit',
                '''
                   const int n = i / n_unit;
                   const int m = i % n_unit;
                   const float* row = y + n * n_channel * n_unit + m;
                   float sum = 0;
                   for (int c = 0; c < n_channel; ++c) {
                     sum += row[c * n_unit];
                   }
                   coeff[i] = 1 / sum;
                ''', 'softmax_invrowsum')(coeff, y, c, n_unit)
            cuda.elementwise(
                'float* y, const float* coeff, int n_channel, int n_unit',
                '''
                   const int n = i / (n_channel * n_unit);
                   const int m = i % n_unit;
                   y[i] *= coeff[n * n_unit + m];
                ''',
                'softmax_rowmul')(y, coeff, c, n_unit)
            self.y = y

        return y,