def test_rl_early_stopping(): gata_double_dqn = GATADoubleDQN() trainer = Trainer() es = RLEarlyStopping("val_monitor", "train_monitor", 0.95, patience=3) # if val score and train score are all below the threshold 0.95, don't stop trainer.callback_metrics = {"val_monitor": 0.1, "train_monitor": 0.1} es._run_early_stopping_check(trainer, gata_double_dqn) assert not trainer.should_stop # if val score is 1.0 and train score is above the threshold, stop trainer.callback_metrics = {"val_monitor": 1.0, "train_monitor": 0.95} trainer.current_epoch = 1 es._run_early_stopping_check(trainer, gata_double_dqn) assert trainer.should_stop assert es.stopped_epoch == 1 # if train score is above the threshold for `patience` times, # but val score is not 1.0, stop trainer.should_stop = False es.wait_count = 0 es.stopped_epoch = 0 for i in range(3): trainer.current_epoch = i trainer.callback_metrics = {"val_monitor": 0.9, "train_monitor": 0.95} es._run_early_stopping_check(trainer, gata_double_dqn) if i == 2: assert trainer.should_stop assert es.stopped_epoch == 2 else: assert not trainer.should_stop assert es.stopped_epoch == 0
def test_model_checkpoint_options(tmpdir, save_top_k, save_last, file_prefix, expected_files): """Test ModelCheckpoint options.""" def mock_save_function(filepath, *args): open(filepath, 'a').close() # simulated losses losses = [10, 9, 2.8, 5, 2.5] checkpoint_callback = ModelCheckpoint(tmpdir, save_top_k=save_top_k, save_last=save_last, prefix=file_prefix, verbose=1) checkpoint_callback.save_function = mock_save_function trainer = Trainer() # emulate callback's calls during the training for i, loss in enumerate(losses): trainer.current_epoch = i trainer.callback_metrics = {'val_loss': loss} checkpoint_callback.on_validation_end(trainer, trainer.get_model()) file_lists = set(os.listdir(tmpdir)) assert len(file_lists) == len(expected_files), \ "Should save %i models when save_top_k=%i" % (len(expected_files), save_top_k) # verify correct naming for fname in expected_files: assert fname in file_lists
def test_model_checkpoint_options(tmp_path): """Test ModelCheckpoint options.""" def mock_save_function(filepath): open(filepath, 'a').close() hparams = tutils.get_hparams() _ = LightningTestModel(hparams) # simulated losses save_dir = tmp_path / "1" save_dir.mkdir() losses = [10, 9, 2.8, 5, 2.5] # ----------------- # CASE K=-1 (all) checkpoint_callback = ModelCheckpoint(save_dir, save_top_k=-1, verbose=1) checkpoint_callback.save_function = mock_save_function trainer = Trainer() # emulate callback's calls during the training for i, loss in enumerate(losses): trainer.current_epoch = i trainer.callback_metrics = {'val_loss': loss} checkpoint_callback.on_validation_end(trainer, trainer.get_model()) file_lists = set(os.listdir(save_dir)) assert len(file_lists) == len( losses), "Should save all models when save_top_k=-1" # verify correct naming for i in range(0, len(losses)): assert f"_ckpt_epoch_{i}.ckpt" in file_lists save_dir = tmp_path / "2" save_dir.mkdir() # ----------------- # CASE K=0 (none) checkpoint_callback = ModelCheckpoint(save_dir, save_top_k=0, verbose=1) checkpoint_callback.save_function = mock_save_function trainer = Trainer() # emulate callback's calls during the training for i, loss in enumerate(losses): trainer.current_epoch = i trainer.callback_metrics = {'val_loss': loss} checkpoint_callback.on_validation_end(trainer, trainer.get_model()) file_lists = os.listdir(save_dir) assert len(file_lists) == 0, "Should save 0 models when save_top_k=0" save_dir = tmp_path / "3" save_dir.mkdir() # ----------------- # CASE K=1 (2.5, epoch 4) checkpoint_callback = ModelCheckpoint(save_dir, save_top_k=1, verbose=1, prefix='test_prefix') checkpoint_callback.save_function = mock_save_function trainer = Trainer() # emulate callback's calls during the training for i, loss in enumerate(losses): trainer.current_epoch = i trainer.callback_metrics = {'val_loss': loss} checkpoint_callback.on_validation_end(trainer, trainer.get_model()) file_lists = set(os.listdir(save_dir)) assert len(file_lists) == 1, "Should save 1 model when save_top_k=1" assert 'test_prefix_ckpt_epoch_4.ckpt' in file_lists save_dir = tmp_path / "4" save_dir.mkdir() # ----------------- # CASE K=2 (2.5 epoch 4, 2.8 epoch 2) # make sure other files don't get deleted checkpoint_callback = ModelCheckpoint(save_dir, save_top_k=2, verbose=1) open(f"{save_dir}/other_file.ckpt", 'a').close() checkpoint_callback.save_function = mock_save_function trainer = Trainer() # emulate callback's calls during the training for i, loss in enumerate(losses): trainer.current_epoch = i trainer.callback_metrics = {'val_loss': loss} checkpoint_callback.on_validation_end(trainer, trainer.get_model()) file_lists = set(os.listdir(save_dir)) assert len(file_lists) == 3, 'Should save 2 model when save_top_k=2' assert '_ckpt_epoch_4.ckpt' in file_lists assert '_ckpt_epoch_2.ckpt' in file_lists assert 'other_file.ckpt' in file_lists save_dir = tmp_path / "5" save_dir.mkdir() # ----------------- # CASE K=4 (save all 4 models) # multiple checkpoints within same epoch checkpoint_callback = ModelCheckpoint(save_dir, save_top_k=4, verbose=1) checkpoint_callback.save_function = mock_save_function trainer = Trainer() # emulate callback's calls during the training for loss in losses: trainer.current_epoch = 0 trainer.callback_metrics = {'val_loss': loss} checkpoint_callback.on_validation_end(trainer, trainer.get_model()) file_lists = set(os.listdir(save_dir)) assert len( file_lists ) == 4, 'Should save all 4 models when save_top_k=4 within same epoch' save_dir = tmp_path / "6" save_dir.mkdir() # ----------------- # CASE K=3 (save the 2nd, 3rd, 4th model) # multiple checkpoints within same epoch checkpoint_callback = ModelCheckpoint(save_dir, save_top_k=3, verbose=1) checkpoint_callback.save_function = mock_save_function trainer = Trainer() # emulate callback's calls during the training for loss in losses: trainer.current_epoch = 0 trainer.callback_metrics = {'val_loss': loss} checkpoint_callback.on_validation_end(trainer, trainer.get_model()) file_lists = set(os.listdir(save_dir)) assert len(file_lists) == 3, 'Should save 3 models when save_top_k=3' assert '_ckpt_epoch_0_v2.ckpt' in file_lists assert '_ckpt_epoch_0_v1.ckpt' in file_lists assert '_ckpt_epoch_0.ckpt' in file_lists