def test_lr_scheduler_with_no_actual_scheduler_raises(tmpdir): """Test exception when lr_scheduler dict has no scheduler.""" model = BoringModel() model.configure_optimizers = lambda: {"optimizer": optim.Adam(model.parameters()), "lr_scheduler": {}} trainer = Trainer(default_root_dir=tmpdir, fast_dev_run=True) with pytest.raises(MisconfigurationException, match='The lr scheduler dict must have the key "scheduler"'): trainer.fit(model)
def test_multiple_optimizer_config_dicts_with_extra_keys_warns(tmpdir): """Test exception when multiple optimizer configuration dicts have extra keys.""" model = BoringModel() optimizer1 = optim.Adam(model.parameters(), lr=0.01) optimizer2 = optim.Adam(model.parameters(), lr=0.01) lr_scheduler_config_1 = { "scheduler": optim.lr_scheduler.StepLR(optimizer1, 1) } lr_scheduler_config_2 = { "scheduler": optim.lr_scheduler.StepLR(optimizer2, 1) } optim_conf = [ { "optimizer": optimizer1, "lr_scheduler": lr_scheduler_config_1, "foo": 1, "bar": 2 }, { "optimizer": optimizer2, "lr_scheduler": lr_scheduler_config_2, "foo": 1, "bar": 2 }, ] with pytest.warns( RuntimeWarning, match= r"Found unsupported keys in the optimizer configuration: \{.+\}"): TrainerOptimizersMixin._configure_optimizers(optim_conf)
def test_onecyclelr_with_epoch_interval_warns(): """Test warning when a OneCycleLR is used and interval is epoch.""" model = BoringModel() optimizer = optim.Adam(model.parameters()) lr_scheduler = {"scheduler": optim.lr_scheduler.OneCycleLR(optimizer, max_lr=0.01, total_steps=3)} with pytest.warns(RuntimeWarning, match="Are you sure you didn't mean 'interval': 'step'?"): TrainerOptimizersMixin._configure_schedulers([lr_scheduler], None, False)
def test_reducelronplateau_with_no_monitor_raises(tmpdir): """Test exception when a ReduceLROnPlateau is used with no monitor.""" model = BoringModel() optimizer = optim.Adam(model.parameters()) model.configure_optimizers = lambda: ([optimizer], [optim.lr_scheduler.ReduceLROnPlateau(optimizer)]) trainer = Trainer(default_root_dir=tmpdir, fast_dev_run=True) with pytest.raises( MisconfigurationException, match="`configure_optimizers` must include a monitor when a `ReduceLROnPlateau`" ): trainer.fit(model)
def test_reducelronplateau_with_no_monitor_in_lr_scheduler_dict_raises(tmpdir): """Test exception when lr_scheduler dict has a ReduceLROnPlateau with no monitor.""" model = BoringModel() optimizer = optim.Adam(model.parameters()) model.configure_optimizers = lambda: { "optimizer": optimizer, "lr_scheduler": {"scheduler": optim.lr_scheduler.ReduceLROnPlateau(optimizer)}, } trainer = Trainer(default_root_dir=tmpdir, fast_dev_run=True) with pytest.raises(MisconfigurationException, match="must include a monitor when a `ReduceLROnPlateau`"): trainer.fit(model)
def test_lr_scheduler_with_extra_keys_warns(tmpdir): """Test warning when lr_scheduler dict has extra keys.""" model = BoringModel() optimizer = optim.Adam(model.parameters()) model.configure_optimizers = lambda: { "optimizer": optimizer, "lr_scheduler": {"scheduler": optim.lr_scheduler.StepLR(optimizer, 1), "foo": 1, "bar": 2}, } trainer = Trainer(default_root_dir=tmpdir, fast_dev_run=True) with pytest.warns(RuntimeWarning, match=r"Found unsupported keys in the lr scheduler dict: \[.+\]"): trainer.fit(model)
def test_lr_scheduler_with_unknown_interval_raises(tmpdir): """Test exception when lr_scheduler dict has unknown interval param value.""" model = BoringModel() optimizer = optim.Adam(model.parameters()) model.configure_optimizers = lambda: { "optimizer": optimizer, "lr_scheduler": {"scheduler": optim.lr_scheduler.StepLR(optimizer, 1), "interval": "incorrect_unknown_value"}, } trainer = Trainer(default_root_dir=tmpdir, fast_dev_run=True) with pytest.raises(MisconfigurationException, match=r'The "interval" key in lr scheduler dict must be'): trainer.fit(model)
def test_lr_scheduler_strict(step_mock, tmpdir, complete_epoch): """Test "strict" support in lr_scheduler dict.""" model = BoringModel() optimizer = optim.Adam(model.parameters()) scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer) max_epochs = 1 if complete_epoch else None max_steps = -1 if complete_epoch else 1 trainer = Trainer(default_root_dir=tmpdir, max_epochs=max_epochs, max_steps=max_steps) model.configure_optimizers = lambda: { "optimizer": optimizer, "lr_scheduler": { "scheduler": scheduler, "monitor": "giraffe", "strict": True }, } if complete_epoch: with pytest.raises( MisconfigurationException, match= r"ReduceLROnPlateau conditioned on metric .* which is not available\. Available metrics are:", ): trainer.fit(model) else: trainer.fit(model) step_mock.assert_not_called() model.configure_optimizers = lambda: { "optimizer": optimizer, "lr_scheduler": { "scheduler": scheduler, "monitor": "giraffe", "strict": False }, } if complete_epoch: trainer = Trainer(default_root_dir=tmpdir, max_epochs=max_epochs, max_steps=max_steps) with pytest.warns( RuntimeWarning, match= r"ReduceLROnPlateau conditioned on metric .* which is not available but strict" ): trainer.fit(model) step_mock.assert_not_called()
def test_optimizer_config_dict_with_extra_keys_warns(tmpdir): """Test exception when optimizer configuration dict has extra keys.""" model = BoringModel() optimizer = optim.Adam(model.parameters()) optim_conf = { "optimizer": optimizer, "lr_scheduler": {"scheduler": optim.lr_scheduler.StepLR(optimizer, 1)}, "foo": 1, "bar": 2, } with pytest.warns(RuntimeWarning, match=r"Found unsupported keys in the optimizer configuration: \{.+\}"): TrainerOptimizersMixin._configure_optimizers(optim_conf)
def test_ddp_sharded_strategy_checkpoint_multi_gpu(tmpdir): """Test to ensure that checkpoint is saved correctly when using multiple GPUs.""" model = BoringModel() trainer = Trainer(gpus=2, strategy="ddp_sharded_spawn", fast_dev_run=True) trainer.fit(model) checkpoint_path = os.path.join(tmpdir, "model.pt") trainer.save_checkpoint(checkpoint_path) saved_model = BoringModel.load_from_checkpoint(checkpoint_path) # Assert model parameters are identical after loading for ddp_param, shard_param in zip(model.parameters(), saved_model.parameters()): assert torch.equal(ddp_param.to("cpu"), shard_param)
def test_async_algorithm(tmpdir): model = BoringModel() bagua_strategy = BaguaStrategy(algorithm="async") trainer = Trainer( default_root_dir=tmpdir, fast_dev_run=1, strategy=bagua_strategy, accelerator="gpu", devices=2, ) trainer.fit(model) for param in model.parameters(): assert torch.norm(param) < 3
def test_ddp_sharded_plugin_checkpoint_cpu(tmpdir): """ Test to ensure that checkpoint is saved correctly """ model = BoringModel() trainer = Trainer(accelerator="ddp_sharded_spawn", num_processes=2, fast_dev_run=True) trainer.fit(model) checkpoint_path = os.path.join(tmpdir, "model.pt") trainer.save_checkpoint(checkpoint_path) saved_model = BoringModel.load_from_checkpoint(checkpoint_path) # Assert model parameters are identical after loading for ddp_param, shard_param in zip(model.parameters(), saved_model.parameters()): assert torch.equal(ddp_param.to("cpu"), shard_param)
def test_ddp_sharded_plugin_checkpoint_multi_gpu(tmpdir): """ Test to ensure that checkpoint is saved correctly when using multiple GPUs """ model = BoringModel() trainer = Trainer( gpus=2, accelerator='ddp_spawn', plugins=[DDPShardedPlugin()], fast_dev_run=True, ) trainer.fit(model) checkpoint_path = os.path.join(tmpdir, 'model.pt') trainer.save_checkpoint(checkpoint_path) saved_model = BoringModel.load_from_checkpoint(checkpoint_path) # Assert model parameters are identical after loading for ddp_param, shard_param in zip(model.parameters(), saved_model.parameters()): assert torch.equal(ddp_param, shard_param)
def test_optimizer_return_options(tmpdir): trainer = Trainer(default_root_dir=tmpdir) model = BoringModel() # single optimizer opt_a = optim.Adam(model.parameters(), lr=0.002) opt_b = optim.SGD(model.parameters(), lr=0.002) scheduler_a = optim.lr_scheduler.StepLR(opt_a, 10) scheduler_b = optim.lr_scheduler.StepLR(opt_b, 10) # single optimizer model.configure_optimizers = lambda: opt_a opt, lr_sched, freq = trainer.init_optimizers(model) assert len(opt) == 1 and len(lr_sched) == len(freq) == 0 # opt tuple model.configure_optimizers = lambda: (opt_a, opt_b) opt, lr_sched, freq = trainer.init_optimizers(model) assert opt == [opt_a, opt_b] assert len(lr_sched) == len(freq) == 0 # opt list model.configure_optimizers = lambda: [opt_a, opt_b] opt, lr_sched, freq = trainer.init_optimizers(model) assert opt == [opt_a, opt_b] assert len(lr_sched) == len(freq) == 0 ref_lr_sched = dict( scheduler=scheduler_a, interval="epoch", frequency=1, reduce_on_plateau=False, monitor=None, strict=True, name=None, opt_idx=None, ) # opt tuple of 2 lists model.configure_optimizers = lambda: ([opt_a], [scheduler_a]) opt, lr_sched, freq = trainer.init_optimizers(model) assert len(opt) == len(lr_sched) == 1 assert len(freq) == 0 assert opt[0] == opt_a assert lr_sched[0] == ref_lr_sched # opt tuple of 1 list model.configure_optimizers = lambda: ([opt_a], scheduler_a) opt, lr_sched, freq = trainer.init_optimizers(model) assert len(opt) == len(lr_sched) == 1 assert len(freq) == 0 assert opt[0] == opt_a assert lr_sched[0] == ref_lr_sched # opt single dictionary model.configure_optimizers = lambda: { "optimizer": opt_a, "lr_scheduler": scheduler_a } opt, lr_sched, freq = trainer.init_optimizers(model) assert len(opt) == len(lr_sched) == 1 assert len(freq) == 0 assert opt[0] == opt_a assert lr_sched[0] == ref_lr_sched # opt multiple dictionaries with frequencies model.configure_optimizers = lambda: ( { "optimizer": opt_a, "lr_scheduler": scheduler_a, "frequency": 1 }, { "optimizer": opt_b, "lr_scheduler": scheduler_b, "frequency": 5 }, ) opt, lr_sched, freq = trainer.init_optimizers(model) assert len(opt) == len(lr_sched) == len(freq) == 2 assert opt[0] == opt_a ref_lr_sched["opt_idx"] = 0 assert lr_sched[0] == ref_lr_sched ref_lr_sched["scheduler"] = scheduler_b ref_lr_sched["opt_idx"] = 1 assert lr_sched[1] == ref_lr_sched assert freq == [1, 5]
def test_v1_8_0_deprecate_trainer_callback_hook_mixin(): methods_with_self = [ "on_before_accelerator_backend_setup", "on_configure_sharded_model", "on_init_start", "on_init_end", "on_fit_start", "on_fit_end", "on_sanity_check_start", "on_sanity_check_end", "on_train_epoch_start", "on_train_epoch_end", "on_validation_epoch_start", "on_validation_epoch_end", "on_test_epoch_start", "on_test_epoch_end", "on_predict_epoch_start", "on_epoch_start", "on_epoch_end", "on_train_start", "on_train_end", "on_pretrain_routine_start", "on_pretrain_routine_end", "on_batch_start", "on_batch_end", "on_validation_start", "on_validation_end", "on_test_start", "on_test_end", "on_predict_start", "on_predict_end", "on_after_backward", ] methods_with_stage = [ "setup", "teardown", ] methods_with_batch_batch_idx_dataloader_idx = [ "on_train_batch_start", "on_validation_batch_start", "on_test_batch_start", "on_predict_batch_start", ] methods_with_outputs_batch_batch_idx_dataloader_idx = [ "on_train_batch_end", "on_validation_batch_end", "on_test_batch_end", "on_predict_batch_end", ] methods_with_checkpoint = ["on_save_checkpoint", "on_load_checkpoint"] trainer = Trainer( max_epochs=1, limit_val_batches=0.1, limit_train_batches=0.2, enable_progress_bar=False, logger=False, ) model = BoringModel() # need to attach model to trainer for testing of `on_pretrain_routine_start` trainer.fit(model) for method_name in methods_with_self: fn = getattr(trainer, method_name, None) with pytest.deprecated_call(match="was deprecated in v1.6 and will be removed in v1.8"): fn() for method_name in methods_with_stage: fn = getattr(trainer, method_name) with pytest.deprecated_call(match="was deprecated in v1.6 and will be removed in v1.8"): fn(stage="test") for method_name in methods_with_batch_batch_idx_dataloader_idx: fn = getattr(trainer, method_name) with pytest.deprecated_call(match="was deprecated in v1.6 and will be removed in v1.8"): fn(batch={}, batch_idx=0, dataloader_idx=0) for method_name in methods_with_outputs_batch_batch_idx_dataloader_idx: fn = getattr(trainer, method_name) with pytest.deprecated_call(match="was deprecated in v1.6 and will be removed in v1.8"): fn(outputs=torch.tensor([[1.0, -1.0], [1.0, -1.0]]), batch={}, batch_idx=0, dataloader_idx=0) for method_name in methods_with_checkpoint: fn = getattr(trainer, method_name) with pytest.deprecated_call(match="was deprecated in v1.6 and will be removed in v1.8"): fn(checkpoint={}) with pytest.deprecated_call(match="was deprecated in v1.6 and will be removed in v1.8"): trainer.on_predict_epoch_end(outputs=torch.tensor([[1.0, -1.0], [1.0, -1.0]])) with pytest.deprecated_call(match="was deprecated in v1.6 and will be removed in v1.8"): trainer.on_exception(exception=Exception) with pytest.deprecated_call(match="was deprecated in v1.6 and will be removed in v1.8"): trainer.on_before_backward(loss=torch.tensor([[1.0, -1.0], [1.0, -1.0]])) with pytest.deprecated_call(match="was deprecated in v1.6 and will be removed in v1.8"): trainer.on_before_optimizer_step( optimizer=optim.SGD(model.parameters(), lr=0.01, momentum=0.9), optimizer_idx=0 ) with pytest.deprecated_call(match="was deprecated in v1.6 and will be removed in v1.8"): trainer.on_before_zero_grad(optimizer=optim.SGD(model.parameters(), lr=0.01, momentum=0.9))