Ejemplo n.º 1
0
    def test_list_input(self):
        """ Suppose the link to be wrapped takes a list of inputs """
        with chainer.using_config('dtype', chainer.mixed16):
            x = chainer.Variable(
                np.random.normal(size=(1, 2, 3, 3)).astype('float16'))
            link = MultiHeadLink()
            ys = loss_scaling(link(x), 16)

            loss = sum(ys)
            loss.grad = np.ones_like(loss.array)
            loss.backward()

            self.assertTrue('loss_scale' in x.grad_var.__dict__)
            self.assertEquals(x.grad_var.__dict__['loss_scale'], 16)
    def test_backward(self):
        """ """
        x = chainer.Variable(
            np.random.normal(size=(1, 3, 4, 4)).astype('float16'))
        W = chainer.Variable(
            np.random.normal(size=(4, 3, 3, 3)).astype('float16'))
        y = loss_scaling(
            ada_loss_convolution_2d(
                x, W, ada_loss=AdaLossChainer(loss_scale_method='fixed')), 2.)
        y.grad = np.ones_like(y.array)
        y.backward()

        self.assertTrue(hasattr(x.grad_var, 'loss_scale'))
        self.assertTrue(hasattr(W.grad_var, 'loss_scale'))
        # scaled down
        self.assertEqual(getattr(W.grad_var, 'loss_scale'), 1.0)
Ejemplo n.º 3
0
    def test_backward(self):
        """ """
        x = chainer.Variable(
            np.random.normal(size=(1, 3, 4, 4)).astype('float16'))
        gamma = chainer.Variable(np.random.normal(size=(3)).astype('float16'))
        beta = chainer.Variable(np.random.normal(size=(3)).astype('float16'))

        y = loss_scaling(ada_loss_batch_normalization(x, gamma, beta), 2.)
        y.grad = np.ones_like(y.array)
        y.backward()

        self.assertTrue(hasattr(x.grad_var, 'loss_scale'))
        self.assertEqual(getattr(x.grad_var, 'loss_scale'), 2.0)
        self.assertTrue(hasattr(gamma.grad_var, 'loss_scale'))
        self.assertEqual(getattr(gamma.grad_var, 'loss_scale'), 1.0)
        self.assertTrue(hasattr(beta.grad_var, 'loss_scale'))
        self.assertEqual(getattr(beta.grad_var, 'loss_scale'), 1.0)
    def test_backward(self):
        with chainer.using_config('dtype', chainer.mixed16):
            x = chainer.Variable(
                np.random.normal(size=(1, 3, 4, 4)).astype('float16'))
            link = chainer.Sequential(
                AdaLossConv2DBNActiv(3,
                                     4,
                                     ksize=3,
                                     ada_loss_cfg={'fixed_loss_scale': 2}),
                lambda x: loss_scaling(x, 16.),
            )
            y = link(x)
            y.grad = np.ones_like(y.array, dtype=np.float16)
            y.backward()

        # grad_var can propagate
        self.assertTrue(hasattr(x.grad_var, 'loss_scale'))
        self.assertTrue(getattr(x.grad_var, 'loss_scale'), 2 * 16)
Ejemplo n.º 5
0
    def test_backward(self):
        with chainer.using_config('dtype', chainer.mixed16):
            x = chainer.Variable(
                np.random.normal(size=(1, 3, 4, 4)).astype('float16'))
            link = chainer.Sequential(
                AdaLossBasicBlock(3,
                                  3,
                                  ada_loss_cfg={
                                      'fixed_loss_scale': 2,
                                      'loss_scale_method': 'fixed'
                                  }),
                lambda x: loss_scaling(x, 16.),
            )
            y = link(x)
            y.grad = np.ones_like(y.array, dtype=np.float16)
            y.backward()

        # grad_var can propagate
        self.assertTrue(hasattr(x.grad_var, 'loss_scale'))
        # NOTE: left term is the residual branch
        self.assertEqual(getattr(x.grad_var, 'loss_scale'), 16 * 2 * 2)
Ejemplo n.º 6
0
    def test_backward(self):
        dtype = np.float16
        x_data = np.random.normal(size=(2, 4)).astype(dtype)
        W_data = np.random.normal(size=(3, 4)).astype(dtype)
        b_data = np.random.normal(size=(3)).astype(dtype)
        g_data = np.random.normal(size=(2, 3)).astype(dtype)

        x = chainer.Variable(x_data)
        W = chainer.Variable(W_data)
        b = chainer.Variable(b_data)

        # no loss scaling
        y1 = F.linear(x, W, b=b)
        y1.grad = g_data
        y1.backward()

        W_grad1 = W.grad
        x_grad1 = x.grad
        b_grad1 = b.grad

        x = chainer.Variable(x_data)
        W = chainer.Variable(W_data)
        b = chainer.Variable(b_data)
        # with loss scaling
        y2 = loss_scaling(
            ada_loss_linear(x,
                            W,
                            b=b,
                            ada_loss=AdaLossChainer(loss_scale_method='fixed',
                                                    fixed_loss_scale=2.0)),
            2.0)
        y2.grad = g_data
        y2.backward()

        self.assertTrue(np.allclose(x.grad, x_grad1 * 4))
        self.assertTrue(np.allclose(W.grad, W_grad1))
        self.assertTrue(np.allclose(b.grad, b_grad1))
Ejemplo n.º 7
0
 def forward(self, x):
     """ Forward computation """
     return loss_scaling(self.link(x), self.init_scale)