def test_list_input(self): """ Suppose the link to be wrapped takes a list of inputs """ with chainer.using_config('dtype', chainer.mixed16): x = chainer.Variable( np.random.normal(size=(1, 2, 3, 3)).astype('float16')) link = MultiHeadLink() ys = loss_scaling(link(x), 16) loss = sum(ys) loss.grad = np.ones_like(loss.array) loss.backward() self.assertTrue('loss_scale' in x.grad_var.__dict__) self.assertEquals(x.grad_var.__dict__['loss_scale'], 16)
def test_backward(self): """ """ x = chainer.Variable( np.random.normal(size=(1, 3, 4, 4)).astype('float16')) W = chainer.Variable( np.random.normal(size=(4, 3, 3, 3)).astype('float16')) y = loss_scaling( ada_loss_convolution_2d( x, W, ada_loss=AdaLossChainer(loss_scale_method='fixed')), 2.) y.grad = np.ones_like(y.array) y.backward() self.assertTrue(hasattr(x.grad_var, 'loss_scale')) self.assertTrue(hasattr(W.grad_var, 'loss_scale')) # scaled down self.assertEqual(getattr(W.grad_var, 'loss_scale'), 1.0)
def test_backward(self): """ """ x = chainer.Variable( np.random.normal(size=(1, 3, 4, 4)).astype('float16')) gamma = chainer.Variable(np.random.normal(size=(3)).astype('float16')) beta = chainer.Variable(np.random.normal(size=(3)).astype('float16')) y = loss_scaling(ada_loss_batch_normalization(x, gamma, beta), 2.) y.grad = np.ones_like(y.array) y.backward() self.assertTrue(hasattr(x.grad_var, 'loss_scale')) self.assertEqual(getattr(x.grad_var, 'loss_scale'), 2.0) self.assertTrue(hasattr(gamma.grad_var, 'loss_scale')) self.assertEqual(getattr(gamma.grad_var, 'loss_scale'), 1.0) self.assertTrue(hasattr(beta.grad_var, 'loss_scale')) self.assertEqual(getattr(beta.grad_var, 'loss_scale'), 1.0)
def test_backward(self): with chainer.using_config('dtype', chainer.mixed16): x = chainer.Variable( np.random.normal(size=(1, 3, 4, 4)).astype('float16')) link = chainer.Sequential( AdaLossConv2DBNActiv(3, 4, ksize=3, ada_loss_cfg={'fixed_loss_scale': 2}), lambda x: loss_scaling(x, 16.), ) y = link(x) y.grad = np.ones_like(y.array, dtype=np.float16) y.backward() # grad_var can propagate self.assertTrue(hasattr(x.grad_var, 'loss_scale')) self.assertTrue(getattr(x.grad_var, 'loss_scale'), 2 * 16)
def test_backward(self): with chainer.using_config('dtype', chainer.mixed16): x = chainer.Variable( np.random.normal(size=(1, 3, 4, 4)).astype('float16')) link = chainer.Sequential( AdaLossBasicBlock(3, 3, ada_loss_cfg={ 'fixed_loss_scale': 2, 'loss_scale_method': 'fixed' }), lambda x: loss_scaling(x, 16.), ) y = link(x) y.grad = np.ones_like(y.array, dtype=np.float16) y.backward() # grad_var can propagate self.assertTrue(hasattr(x.grad_var, 'loss_scale')) # NOTE: left term is the residual branch self.assertEqual(getattr(x.grad_var, 'loss_scale'), 16 * 2 * 2)
def test_backward(self): dtype = np.float16 x_data = np.random.normal(size=(2, 4)).astype(dtype) W_data = np.random.normal(size=(3, 4)).astype(dtype) b_data = np.random.normal(size=(3)).astype(dtype) g_data = np.random.normal(size=(2, 3)).astype(dtype) x = chainer.Variable(x_data) W = chainer.Variable(W_data) b = chainer.Variable(b_data) # no loss scaling y1 = F.linear(x, W, b=b) y1.grad = g_data y1.backward() W_grad1 = W.grad x_grad1 = x.grad b_grad1 = b.grad x = chainer.Variable(x_data) W = chainer.Variable(W_data) b = chainer.Variable(b_data) # with loss scaling y2 = loss_scaling( ada_loss_linear(x, W, b=b, ada_loss=AdaLossChainer(loss_scale_method='fixed', fixed_loss_scale=2.0)), 2.0) y2.grad = g_data y2.backward() self.assertTrue(np.allclose(x.grad, x_grad1 * 4)) self.assertTrue(np.allclose(W.grad, W_grad1)) self.assertTrue(np.allclose(b.grad, b_grad1))
def forward(self, x): """ Forward computation """ return loss_scaling(self.link(x), self.init_scale)