def backward(self, top, propagate_down, bottom):
     with pu.caffe_cuda_context():
         h = caffe.cublas_handle()
         import scikits.cuda.linalg as linalg
         top_diff = top[0].diff_as_pycuda_gpuarray()
         ts = [self.t1_, self.t2_]
         for i in xrange(len(bottom)):
             if not propagate_down[i]:
                 continue
             diff = bottom[i].diff_as_pycuda_gpuarray()
             data = bottom[(i + 1) % 2].data_as_pycuda_gpuarray()
             # Belew 3 conditions are complicated and might be hard to
             # understand.
             swap = ts[i] ^ bool(i)
             t1 = ts[i]
             t2 = (not t1) ^ ts[(i + 1) % 2]
             for b in xrange(bottom[0].shape[0]):
                 x = top_diff[b]
                 y = data[b]
                 t1_, t2_ = t1, t2
                 if swap:
                     x, y = y, x
                     t1_, t2_ = t2_, t1_
                 linalg.dot(x,
                            y,
                            transa=blas_trans(t1_),
                            transb=blas_trans(t2_),
                            handle=h,
                            out=diff[b])
 def backward(self, top, propagate_down, bottom):
     with pu.caffe_cuda_context():
         h = caffe.cublas_handle()
         import scikits.cuda.linalg as linalg
         top_diff = top[0].diff_as_pycuda_gpuarray()
         ts = [self.t1_, self.t2_]
         for i in xrange(len(bottom)):
             if not propagate_down[i]:
                 continue
             diff = bottom[i].diff_as_pycuda_gpuarray()
             data = bottom[(i + 1) % 2].data_as_pycuda_gpuarray()
             # Belew 3 conditions are complicated and might be hard to
             # understand.
             swap = ts[i] ^ bool(i)
             t1 = ts[i]
             t2 = (not t1) ^ ts[(i + 1) % 2]
             for b in xrange(bottom[0].shape[0]):
                 x = top_diff[b]
                 y = data[b]
                 t1_, t2_ = t1, t2
                 if swap:
                     x, y = y, x
                     t1_, t2_ = t2_, t1_
                 linalg.dot(x, y,
                            transa=blas_trans(t1_), transb=blas_trans(t2_),
                            handle=h, out=diff[b])
    def forward(self, bottom, top):
        """

        """
        with pu.caffe_cuda_context():
            h = caffe.cublas_handle()
            batch_size = bottom[0].shape[0]
            dim = bottom[0].count / bottom[0].shape[0]
            pred = bottom[0].data_as_pycuda_gpuarray()
            label = bottom[1].data_as_pycuda_gpuarray()
            mask = bottom[2].data_as_pycuda_gpuarray()
            # Use bottom[0,1].diff as temporary buffer
            diff = bottom[0].diff_as_pycuda_gpuarray()
            diff2 = bottom[1].diff_as_pycuda_gpuarray()
            # Compute diff
            self.k_masked_diff_(diff, pred, label, mask)
            self.k_squared_(diff, diff2)
            import scikits.cuda.linalg as linalg
            # This needs scikits.cuda 0.5.0a3 or later
            # (sudo) pip install scikits.cuda=>0.5.0a3
            linalg.dot(diff.reshape(batch_size, dim), self.multipier_sum_,
                       handle=h, out=self.diff_sum_)
            linalg.dot(diff2.reshape(batch_size, dim), self.multipier_sum_,
                       handle=h, out=self.diff2_sum_)
            linalg.dot(mask.reshape(batch_size, dim), self.multipier_sum_,
                       handle=h, out=self.mask_sum_)
            self.k_ensure_mask_sum_(self.mask_sum_)
            term1 = self.k_div_sum_(self.diff2_sum_, self.mask_sum_)
            term2 = self.k_div_squared_sum_(self.diff_sum_, self.mask_sum_)
            top[0].data[...] = (term1.get() - self.lambda_ * term2.get()) \
                / batch_size
Beispiel #4
0
    def forward(self, bottom, top):
        #        print 'hanli crf forward -- '
        #        print 'self.diff.shape: ' + str(self.diff.shape);  # self.diff.shape: (batchsize, 65536)
        #        print 'crf bottom[0].data.shape: ' + str(bottom[0].data.shape); #crf bottom[0].data.shape: (batchsize, 11)
        #        print 'raw degree bottom[1].data.shape: ' + str(bottom[1].data.shape);  #(batchsize, 65536, 11)
        #        print 'png bottom[2].data.shape: ' + str(bottom[2].data.shape);  # (batchsize, 65536)
        #        print 'np.dot(bottom[1].data[i,:,:], bottom[0].data[i,:]).shape: ' + str(np.dot(bottom[1].data[0,:,:], bottom[0].data[0,:]).shape); #(65536,)
        #        print 'bottom[2].data[i,:].shape: ' + str(bottom[2].data[0,:].shape);  # (65536,)
        with pu.caffe_cuda_context():
            linalg.init()
            for i in range(self.diff.shape[0]):
                #a =  bottom[1].data_as_pycuda_gpuarray()
                #b =  bottom[0].data_as_pycuda_gpuarray()
                a = bottom[1].data[i, :, :].astype(np.float32)
                b = bottom[0].data[i, :].astype(np.float32)
                ##a = np.asarray(np.random.rand(4, 4), dtype=np.float32)
                ##b = np.asarray(np.random.rand(4), dtype=np.float32)

                #a_gpu = gpuarray.GPUArray(a, dtype=np.float32)
                #b_gpu = gpuarray.GPUArray(b, dtype=np.float32)
                a_gpu = gpuarray.to_gpu(a)
                b_gpu = gpuarray.to_gpu(b)
                c_gpu = linalg.dot(a_gpu, b_gpu)
                #self.diff[i,:] = c_gpu + bottom[2].data[i,:] - bottom[3].data[i,:];
                self.diff[i, :] = np.dot(
                    bottom[1].data[i, :, :], bottom[0].data[
                        i, :]) + bottom[2].data[i, :] - bottom[3].data[i, :]
            top[0].data[...] = np.sum(self.diff**2) / bottom[3].num / 2.
            #self.transDiff = np.transpose(self.diff / bottom[3].num); # (65536, 50)
            a_gpu = gpuarray.to_gpu(self.diff / bottom[3].num)
            at_gpu = linalg.transpose(a_gpu)
            self.transDiff = at_gpu
 def forward(self, bottom, top):
     with pu.caffe_cuda_context():
         h = caffe.cublas_handle()
         import scikits.cuda.linalg as linalg
         mat1 = bottom[0].data_as_pycuda_gpuarray()
         mat2 = bottom[1].data_as_pycuda_gpuarray()
         mato = top[0].data_as_pycuda_gpuarray()
         for b in xrange(bottom[0].shape[0]):
             linalg.dot(mat1[b], mat2[b],
                        transa=blas_trans(self.t1_),
                        transb=blas_trans(self.t2_),
                        handle=h, out=mato[b])
 def forward(self, bottom, top):
     with pu.caffe_cuda_context():
         h = caffe.cublas_handle()
         import scikits.cuda.linalg as linalg
         mat1 = bottom[0].data_as_pycuda_gpuarray()
         mat2 = bottom[1].data_as_pycuda_gpuarray()
         mato = top[0].data_as_pycuda_gpuarray()
         for b in xrange(bottom[0].shape[0]):
             linalg.dot(mat1[b],
                        mat2[b],
                        transa=blas_trans(self.t1_),
                        transb=blas_trans(self.t2_),
                        handle=h,
                        out=mato[b])
    def reshape(self, bottom, top):
        with pu.caffe_cuda_context():

            batch_size = bottom[0].shape[0]
            if self.batch_size_ != batch_size:
                self.batch_size_ = batch_size
                self.diff_sum_ = gpuarray.zeros((batch_size, 1), dtype)
                self.diff2_sum_ = gpuarray.zeros((batch_size, 1), dtype)
                self.mask_sum_ = gpuarray.zeros((batch_size, 1), dtype)
            dim = int(np.prod(bottom[0].shape[1:]))
            if self.dim_ != dim:
                self.dim_ = dim
                self.multipier_sum_ = gpuarray.zeros((dim, 1), dtype)
                self.multipier_sum_.fill(dtype(1.0))
        top[0].reshape()
Beispiel #8
0
    def reshape(self, bottom, top):
        with pu.caffe_cuda_context():

            batch_size = bottom[0].shape[0]
            if self.batch_size_ != batch_size:
                self.batch_size_ = batch_size
                self.diff_sum_ = gpuarray.zeros((batch_size, 1), dtype)
                self.diff2_sum_ = gpuarray.zeros((batch_size, 1), dtype)
                self.mask_sum_ = gpuarray.zeros((batch_size, 1), dtype)
            dim = int(np.prod(bottom[0].shape[1:]))
            if self.dim_ != dim:
                self.dim_ = dim
                self.multipier_sum_ = gpuarray.zeros((dim, 1), dtype)
                self.multipier_sum_.fill(dtype(1.0))
        top[0].reshape()
Beispiel #9
0
    def backward(self, top, propagate_down, bottom):

        #        self.nPCAcoms = bottom[0].data.shape[1];
        with pu.caffe_cuda_context():
            #for i in range(self.nPCAcoms):
            #bottom[0].diff[:, i] = np.trace(np.dot( bottom[1].data[:,:,i], self.transDiff ));
            linalg.init()
            for i in range(self.nPCAcoms):
                ##a =  bottom[1].data[:,:,i].data_as_pycuda_gpuarray()
                a = bottom[1].data[:, :, i]
                b_gpu = self.transDiff
                a_gpu = gpuarray.to_gpu(a)
                c_gpu = linalg.dot(a_gpu, b_gpu)
                d_gpu = linalg.trace(c_gpu)
                bottom[0].diff[:, i] = d_gpu
 def backward(self, top, propagate_down, bottom):
     """
     Compute @f$\frac{\partial {\cal L}}{\partial y_bi}=\frac{\partial {\cal L}}{\partial d_i} \frac{\partial d_i} {\partial y_bi}@f$.
     @f$\frac{\partial {\cal L}}{\partial d_i}=\frac{2}{n}d_i' \left(d_i - \frac{\lambda}{n}\sum_j d_j\right).
     """
     with pu.caffe_cuda_context():
         pred = bottom[0].data_as_pycuda_gpuarray()
         label = bottom[1].data_as_pycuda_gpuarray()
         mask = bottom[2].data_as_pycuda_gpuarray()
         for i in xrange(len(bottom) - 1):
             if propagate_down[i]:
                 diff = bottom[i].diff_as_pycuda_gpuarray()
                 sgn = 1 if i == 0 else - 1
                 self.k_backward_(
                     pred, label, mask, self.diff_sum_, self.mask_sum_, sgn,
                     diff)
Beispiel #11
0
 def backward(self, top, propagate_down, bottom):
     """
     Compute @f$\frac{\partial {\cal L}}{\partial y_bi}=\frac{\partial {\cal L}}{\partial d_i} \frac{\partial d_i} {\partial y_bi}@f$.
     @f$\frac{\partial {\cal L}}{\partial d_i}=\frac{2}{n}d_i' \left(d_i - \frac{\lambda}{n}\sum_j d_j\right).
     """
     with pu.caffe_cuda_context():
         pred = bottom[0].data_as_pycuda_gpuarray()
         label = bottom[1].data_as_pycuda_gpuarray()
         mask = bottom[2].data_as_pycuda_gpuarray()
         for i in xrange(len(bottom) - 1):
             if propagate_down[i]:
                 diff = bottom[i].diff_as_pycuda_gpuarray()
                 sgn = 1 if i == 0 else -1
                 self.k_backward_(pred, label, mask, self.diff_sum_,
                                  self.mask_sum_, sgn, top[0].diff, diff)
                 if self.clip_gradient_ is not None:
                     self.k_clip_gradient(diff)
Beispiel #12
0
    def forward(self, bottom, top):
        """

        """
        with pu.caffe_cuda_context():
            h = caffe.cublas_handle()
            batch_size = bottom[0].shape[0]
            dim = bottom[0].count / bottom[0].shape[0]
            pred = bottom[0].data_as_pycuda_gpuarray()
            label = bottom[1].data_as_pycuda_gpuarray()
            mask = bottom[2].data_as_pycuda_gpuarray()
            # Use bottom[0,1].diff as temporary buffer
            diff = bottom[0].diff_as_pycuda_gpuarray()
            diff2 = bottom[1].diff_as_pycuda_gpuarray()
            # Compute diff
            self.k_masked_diff_(diff, pred, label, mask)
            self.k_squared_(diff, diff2)
            import scikits.cuda.linalg as linalg
            # This needs scikits.cuda 0.5.0a3 or later
            # (sudo) pip install scikits.cuda=>0.5.0a3
            linalg.dot(diff.reshape(batch_size, dim),
                       self.multipier_sum_,
                       handle=h,
                       out=self.diff_sum_)
            linalg.dot(diff2.reshape(batch_size, dim),
                       self.multipier_sum_,
                       handle=h,
                       out=self.diff2_sum_)
            linalg.dot(mask.reshape(batch_size, dim),
                       self.multipier_sum_,
                       handle=h,
                       out=self.mask_sum_)
            self.k_ensure_mask_sum_(self.mask_sum_)
            term1 = self.k_div_sum_(self.diff2_sum_, self.mask_sum_)
            term2 = self.k_div_squared_sum_(self.diff_sum_, self.mask_sum_)
            top[0].data[...] = (term1.get() - self.lambda_ * term2.get()) \
                / batch_size
 def forward(self, bottom, top):
     with pu.caffe_cuda_context():
         self.k_log_(bottom[0].data_as_pycuda_gpuarray(),
                     top[0].data_as_pycuda_gpuarray(),
                     np.float32(self.offset_))
    def setup(self, bottom, top):
        assert len(bottom) == 3
        assert len(top) == 1
        # parameter
        param = eval(self.param_str_)
        self.lambda_ = param['lambda']
        self.clip_gradient_ = param.get('clip_gradient', None)
        # Create CUDA function
        with pu.caffe_cuda_context():
            self.k_masked_diff_ = ElementwiseKernel(
                "float *diff, float *pred, float *label, float *mask",
                "diff[i] = (pred[i] - label[i]) * mask[i]", 'masked_diff')
            self.k_squared_ = ElementwiseKernel(
                "float *diff, float *diff2",
                "diff2[i] = diff[i] * diff[i]", 'squared')
            self.k_ensure_mask_sum_ = ElementwiseKernel(
                "float *mask_sum",
                "mask_sum[i] = max(mask_sum[i], 1.0f)", 'ensure_mask_sum')
            if self.clip_gradient_ is not None:
                self.k_clip_gradient = ElementwiseKernel(
                    "float *diff",
                    "diff[i] = fmaxf(-{0}, fminf(diff[i], {0}))".format(
                        self.clip_gradient_),
                    'clip_gradient')
            # This should be computed more faster by cublasSdot
            self.k_sum_ = ReductionKernel(
                dtype, neutral="0",
                reduce_expr="a+b", map_expr="d[i]",
                arguments="float *d")
            self.k_squred_sum_ = ReductionKernel(
                dtype, neutral="0",
                reduce_expr="a+b", map_expr="d[i] * d[i]",
                arguments="float *d")
            self.k_div_sum_ = ReductionKernel(
                dtype, neutral="0",
                reduce_expr="a+b",
                map_expr="d[i] / m[i]",
                arguments="float *d, float *m")
            self.k_div_squared_sum_ = ReductionKernel(
                dtype, neutral="0",
                reduce_expr="a+b",
                map_expr="d[i] * d[i] / (m[i] * m[i])",
                arguments="float *d, float *m")
            func_backward = SourceModule(
                """
#include <caffe/util/device_alternate.hpp>
__global__ void backward(float *pred, float *label, float *mask,
  float *diff_sum, float *mask_sum, int count, int stride, int sgn,
  int batch_size, float lambda, float loss_weight, float *diff) {
  CUDA_KERNEL_LOOP(i, count) {
    diff[i] = loss_weight * mask[i] * 2.0f * sgn / mask_sum[i / stride]
         / batch_size * ((pred[i] - label[i])
            - lambda / mask_sum[i / stride] * diff_sum[i / stride]);
  }
}
""", include_dirs=pu.caffe_include_dirs).get_function("backward")
            func_backward.prepare("PPPPPiiiiffP")

            def _func_backward(pred, label, mask, ds, ms, sgn, loss_weight,
                               diff):
                bg = pu.block_and_grid(pred.size)
                batch_size = pred.shape[0]
                count = pred.size
                stride = pred.size / pred.shape[0]
                func_backward.prepared_call(
                    bg['grid'], bg['block'],
                    pred.gpudata, label.gpudata, mask.gpudata, ds.gpudata,
                    ms.gpudata, count, stride, sgn, batch_size,
                    self.lambda_, loss_weight,
                    diff.gpudata)
            self.k_backward_ = _func_backward
        self.batch_size_ = 0
        self.dim_ = 0
        self.reshape(bottom, top)
Beispiel #15
0
    def setup(self, bottom, top):
        assert len(bottom) == 3
        assert len(top) == 1
        # parameter
        param = eval(self.param_str_)
        self.lambda_ = param['lambda']
        self.clip_gradient_ = param.get('clip_gradient', None)
        # Create CUDA function
        with pu.caffe_cuda_context():
            self.k_masked_diff_ = ElementwiseKernel(
                "float *diff, float *pred, float *label, float *mask",
                "diff[i] = (pred[i] - label[i]) * mask[i]", 'masked_diff')
            self.k_squared_ = ElementwiseKernel(
                "float *diff, float *diff2", "diff2[i] = diff[i] * diff[i]",
                'squared')
            self.k_ensure_mask_sum_ = ElementwiseKernel(
                "float *mask_sum", "mask_sum[i] = max(mask_sum[i], 1.0f)",
                'ensure_mask_sum')
            if self.clip_gradient_ is not None:
                self.k_clip_gradient = ElementwiseKernel(
                    "float *diff",
                    "diff[i] = fmaxf(-{0}, fminf(diff[i], {0}))".format(
                        self.clip_gradient_), 'clip_gradient')
            # This should be computed more faster by cublasSdot
            self.k_sum_ = ReductionKernel(dtype,
                                          neutral="0",
                                          reduce_expr="a+b",
                                          map_expr="d[i]",
                                          arguments="float *d")
            self.k_squred_sum_ = ReductionKernel(dtype,
                                                 neutral="0",
                                                 reduce_expr="a+b",
                                                 map_expr="d[i] * d[i]",
                                                 arguments="float *d")
            self.k_div_sum_ = ReductionKernel(dtype,
                                              neutral="0",
                                              reduce_expr="a+b",
                                              map_expr="d[i] / m[i]",
                                              arguments="float *d, float *m")
            self.k_div_squared_sum_ = ReductionKernel(
                dtype,
                neutral="0",
                reduce_expr="a+b",
                map_expr="d[i] * d[i] / (m[i] * m[i])",
                arguments="float *d, float *m")
            func_backward = SourceModule(
                """
#include <caffe/util/device_alternate.hpp>
__global__ void backward(float *pred, float *label, float *mask,
  float *diff_sum, float *mask_sum, int count, int stride, int sgn,
  int batch_size, float lambda, float loss_weight, float *diff) {
  CUDA_KERNEL_LOOP(i, count) {
    diff[i] = loss_weight * mask[i] * 2.0f * sgn / mask_sum[i / stride]
         / batch_size * ((pred[i] - label[i])
            - lambda / mask_sum[i / stride] * diff_sum[i / stride]);
  }
}
""",
                include_dirs=pu.caffe_include_dirs).get_function("backward")
            func_backward.prepare("PPPPPiiiiffP")

            def _func_backward(pred, label, mask, ds, ms, sgn, loss_weight,
                               diff):
                bg = pu.block_and_grid(pred.size)
                batch_size = pred.shape[0]
                count = pred.size
                stride = pred.size / pred.shape[0]
                func_backward.prepared_call(bg['grid'], bg['block'],
                                            pred.gpudata, label.gpudata,
                                            mask.gpudata, ds.gpudata,
                                            ms.gpudata, count, stride, sgn,
                                            batch_size, self.lambda_,
                                            loss_weight, diff.gpudata)

            self.k_backward_ = _func_backward
        self.batch_size_ = 0
        self.dim_ = 0
        self.reshape(bottom, top)
 def forward(self, bottom, top):
     with pu.caffe_cuda_context():
         self.k_log_(
             bottom[0].data_as_pycuda_gpuarray(),
             top[0].data_as_pycuda_gpuarray(),
             np.float32(self.offset_))