def test_gradient_clipping(tmpdir): """ Test gradient clipping """ model = EvalModelTemplate() # test that gradient is clipped correctly def _optimizer_step(*args, **kwargs): parameters = model.parameters() grad_norm = torch.norm( torch.stack([torch.norm(p.grad.detach(), 2) for p in parameters]), 2) assert (grad_norm - 1.0).abs() < 0.01, "Gradient norm != 1.0: {grad_norm}".format( grad_norm=grad_norm) trainer = Trainer( max_steps=1, max_epochs=1, gradient_clip_val=1.0, default_root_dir=tmpdir, ) # for the test model.optimizer_step = _optimizer_step model.prev_called_batch_idx = 0 trainer.fit(model)
def test_gradient_clipping_fp16(tmpdir): """ Test gradient clipping with fp16 """ model = EvalModelTemplate() trainer = Trainer( max_steps=1, max_epochs=1, precision=16, gpus=1, gradient_clip_val=1.0, default_root_dir=tmpdir, ) trainer.train_loop.old_training_step_and_backward = trainer.train_loop.training_step_and_backward def training_step_and_backward(split_batch, batch_idx, opt_idx, optimizer, hiddens): """ wrap the forward step in a closure so second order methods work """ # test that gradient is clipped correctly ret_val = trainer.train_loop.old_training_step_and_backward( split_batch, batch_idx, opt_idx, optimizer, hiddens) parameters = model.parameters() grad_norm = torch.norm( torch.stack([torch.norm(p.grad.detach(), 2) for p in parameters]), 2) assert (grad_norm - 1.0).abs() < 0.01, "Gradient norm != 1.0: {grad_norm}".format( grad_norm=grad_norm) return ret_val trainer.train_loop.training_step_and_backward = training_step_and_backward model.prev_called_batch_idx = 0 trainer.fit(model)
def test_gradient_accumulation_scheduling(tmpdir): """ Test grad accumulation by the freq of optimizer updates """ # test incorrect configs with pytest.raises(IndexError): assert Trainer(accumulate_grad_batches={0: 3, 1: 4, 4: 6}) assert Trainer(accumulate_grad_batches={-2: 3}) with pytest.raises(TypeError): assert Trainer(accumulate_grad_batches={}) assert Trainer(accumulate_grad_batches=[[2, 3], [4, 6]]) assert Trainer(accumulate_grad_batches={1: 2, 3.: 4}) assert Trainer(accumulate_grad_batches={1: 2.5, 3: 5}) # test optimizer call freq matches scheduler def _optimizer_step(self, epoch, batch_idx, optimizer, optimizer_idx, second_order_closure=None): # only test the first 12 batches in epoch if batch_idx < 12: if epoch == 0: # reset counter when starting epoch if batch_idx == 0: self.prev_called_batch_idx = 0 # use this opportunity to test once assert self.trainer.accumulate_grad_batches == 1 assert batch_idx == self.prev_called_batch_idx self.prev_called_batch_idx += 1 elif 1 <= epoch <= 2: # reset counter when starting epoch if batch_idx == 1: self.prev_called_batch_idx = 1 # use this opportunity to test once assert self.trainer.accumulate_grad_batches == 2 assert batch_idx == self.prev_called_batch_idx self.prev_called_batch_idx += 2 else: if batch_idx == 3: self.prev_called_batch_idx = 3 # use this opportunity to test once assert self.trainer.accumulate_grad_batches == 4 assert batch_idx == self.prev_called_batch_idx self.prev_called_batch_idx += 3 optimizer.step() # clear gradients optimizer.zero_grad() model = EvalModelTemplate() schedule = {1: 2, 3: 4} trainer = Trainer(accumulate_grad_batches=schedule, train_percent_check=0.1, val_percent_check=0.1, max_epochs=2, default_root_dir=tmpdir) # for the test trainer.optimizer_step = _optimizer_step model.prev_called_batch_idx = 0 trainer.fit(model)
def test_gradient_accumulation_scheduling(tmpdir, schedule, expected): """ Test grad accumulation by the freq of optimizer updates """ # test incorrect configs with pytest.raises(IndexError): assert Trainer(accumulate_grad_batches={-1: 3, 1: 4, 4: 6}) with pytest.raises(IndexError): assert Trainer(accumulate_grad_batches={-2: 3}) with pytest.raises(TypeError): assert Trainer(accumulate_grad_batches={}) with pytest.raises(TypeError): assert Trainer(accumulate_grad_batches=[[2, 3], [4, 6]]) with pytest.raises(TypeError): assert Trainer(accumulate_grad_batches={1: 2, 3.: 4}) with pytest.raises(TypeError): assert Trainer(accumulate_grad_batches={1: 2.5, 3: 5}) model = EvalModelTemplate() trainer = Trainer(accumulate_grad_batches=schedule, limit_train_batches=0.8, limit_val_batches=0.8, max_epochs=4, default_root_dir=tmpdir) # test optimizer call freq matches scheduler def _optimizer_step(epoch, batch_idx, optimizer, optimizer_idx, second_order_closure=None, on_tpu=False, using_native_amp=False, using_lbfgs=False): # only test the first 12 batches in epoch if batch_idx < 12: if epoch == 0: # reset counter when starting epoch if batch_idx == expected[0] - 1: model.prev_called_batch_idx = expected[0] - 1 # use this opportunity to test once assert trainer.accumulate_grad_batches == expected[0] assert batch_idx == model.prev_called_batch_idx model.prev_called_batch_idx += expected[0] elif 1 <= epoch <= 2: # reset counter when starting epoch if batch_idx == expected[1] - 1: model.prev_called_batch_idx = expected[1] - 1 # use this opportunity to test once assert trainer.accumulate_grad_batches == expected[1] assert batch_idx == model.prev_called_batch_idx model.prev_called_batch_idx += expected[1] else: if batch_idx == expected[2] - 1: model.prev_called_batch_idx = expected[2] - 1 # use this opportunity to test once assert trainer.accumulate_grad_batches == expected[2] assert batch_idx == model.prev_called_batch_idx model.prev_called_batch_idx += expected[2] optimizer.step() # clear gradients optimizer.zero_grad() # for the test model.optimizer_step = _optimizer_step model.prev_called_batch_idx = 0 trainer.fit(model)