Ejemplo n.º 1
0
    def backward(self, indexes, grad_outputs):
        x, W = self.get_retained_inputs()
        gy, = grad_outputs

        ret = []
        if 0 in indexes:
            if self.cover_all is None:
                self._set_cover_all(x, W)
            gx = chainer.functions.convolution_2d(gy,
                                                  W,
                                                  stride=(self.sy, self.sx),
                                                  pad=(self.ph, self.pw),
                                                  cover_all=self.cover_all,
                                                  dilate=(self.dy, self.dx),
                                                  groups=self.groups)
            ret.append(gx)
        if 1 in indexes:
            if self.cover_all is None:
                self._set_cover_all(x, W)
            gW, = convolution_2d.Convolution2DGradW(self).apply((gy, x))
            ret.append(gW)
        if 2 in indexes:
            gb = chainer.functions.sum(gy, axis=(0, 2, 3))
            ret.append(gb)

        return ret
Ejemplo n.º 2
0
    def backward(self, indexes, grad_outputs):
        x, W = self.get_retained_inputs()
        gy, = grad_outputs

        # NOTE: where to call the ada_loss function
        s_gy, u_gy = self.ada_loss.loss_scaling(gy, W)

        ret = []
        if 0 in indexes:
            xh, xw = x.shape[2:]
            gx = chainer.functions.deconvolution_2d(s_gy,
                                                    W,
                                                    stride=(self.sy, self.sx),
                                                    pad=(self.ph, self.pw),
                                                    outsize=(xh, xw),
                                                    dilate=(self.dy, self.dx),
                                                    groups=self.groups)
            ret.append(gx)
        if 1 in indexes:
            gW, = convolution_2d.Convolution2DGradW(self).apply((x, u_gy))
            ret.append(gW)
        if 2 in indexes:
            gb = chainer.functions.sum(u_gy, axis=(0, 2, 3))
            ret.append(gb)

        return ret
Ejemplo n.º 3
0
    def backward(self, indexes, grad_outputs):
        x, W = self.get_retained_inputs()
        gy, = grad_outputs

        # NOTE: where to call the ada_loss function
        gy_, prev_scale = self.ada_loss.loss_scaling(gy, W)
        # gy_, prev_scale = gy, 1.0

        # xp = chainer.backend.get_array_module(x.array)
        # print(
        #     gy_.__dict__['loss_scale'],
        #     gy_.size,
        #     xp.count_nonzero(gy.array),
        #     xp.count_nonzero(gy_.array),
        #     gy.array.max(),
        #     gy_.array.max(),
        #     xp.abs(gy.array[gy.array > 0]).min(),
        #     xp.abs(gy_.array[gy_.array > 0]).min(),
        # )

        ret = []
        if 0 in indexes:
            xh, xw = x.shape[2:]
            gx = chainer.functions.deconvolution_2d(gy_,
                                                    W,
                                                    stride=(self.sy, self.sx),
                                                    pad=(self.ph, self.pw),
                                                    outsize=(xh, xw),
                                                    dilate=(self.dy, self.dx),
                                                    groups=self.groups)
            if (self.ada_loss.sanity_checker
                    and self.ada_loss.recorder.current_iteration %
                    self.ada_loss.sanity_checker.check_per_n_iter == 0):
                curr_iter = self.ada_loss.recorder.current_iteration
                gx_ = chainer.functions.deconvolution_2d(
                    gy,
                    W,
                    stride=(self.sy, self.sx),
                    pad=(self.ph, self.pw),
                    outsize=(xh, xw),
                    dilate=(self.dy, self.dx),
                    groups=self.groups)
                gx2_ = chainer.functions.deconvolution_2d(
                    F.cast(gy, 'float32'),
                    F.cast(W, 'float32'),
                    stride=(self.sy, self.sx),
                    pad=(self.ph, self.pw),
                    outsize=(xh, xw),
                    dilate=(self.dy, self.dx),
                    groups=self.groups)
                self.ada_loss.sanity_checker.check(gy, W, gx, gx_, gx2_,
                                                   gy_.__dict__['loss_scale'],
                                                   self.ada_loss.n_uf,
                                                   curr_iter)
            # gx_ = chainer.functions.deconvolution_2d(gy,
            #                                          W,
            #                                          stride=(self.sy, self.sx),
            #                                          pad=(self.ph, self.pw),
            #                                          outsize=(xh, xw),
            #                                          dilate=(self.dy, self.dx),
            #                                          groups=self.groups)
            # print(gx.size, xp.count_nonzero(gx.array),
            #       xp.count_nonzero(gx_.array))
            # NOTE: here we pass the loss scale through gx's __dict__
            self.ada_loss.set_loss_scale(gx,
                                         self.ada_loss.grad_loss_scale(gy_))
            ret.append(gx)
        if 1 in indexes:
            gW, = convolution_2d.Convolution2DGradW(self).apply((x, gy))
            gW_ = self.ada_loss.get_unscaled_gradient(gW, prev_scale)
            ret.append(gW_)
        if 2 in indexes:
            gb = chainer.functions.sum(gy, axis=(0, 2, 3))
            gb_ = self.ada_loss.get_unscaled_gradient(gb, prev_scale)
            # ret.append(F.cast(gW_, W.dtype))
            ret.append(gb_)

        return ret