def test_cpu(tmpdir, num_processes): log_filepath = tmpdir / "log" trainer = DummyTrainer( default_root_dir=tmpdir, max_epochs=2, callbacks=[TrainingTimer(), __TestCallback()], accelerator="ddp_cpu" if num_processes > 1 else None, num_processes=num_processes, plugins=[DummyLoggingPlugin(log_filepath)], ) trainer.fit(DummyEngine(), datamodule=DummyMNIST(batch_size=1)) # caplog does not seem to work with multiprocessing.spawn # test logging on saved log file if num_processes > 1: log_filepath_rank1 = tmpdir.join("log.rank1") assert log_filepath_rank1.exists() assert not log_filepath_rank1.read_text("utf-8") assert log_filepath.exists() lines = [l.strip() for l in log_filepath.readlines()] print(lines) lines = [ l.startswith(f"E{e}: tr_time=") for l in lines for e in range(trainer.max_epochs) ] assert sum(lines) == trainer.max_epochs
def test_model_loader_find_best(tmpdir): # empty directory assert ModelLoader.find_best(tmpdir, "test") is None # with no-monitor ckpts trainer = DummyTrainer( default_root_dir=tmpdir, callbacks=[ pl.callbacks.ModelCheckpoint(dirpath=tmpdir, save_top_k=-1, filename="{epoch}") ], checkpoint_callback=True, max_epochs=3, ) trainer.fit(DummyEngine(), datamodule=DummyMNIST()) assert ModelLoader.find_best(tmpdir, "test") is None # with monitor ckpts monitor = "bar" mc = pl.callbacks.ModelCheckpoint(dirpath=tmpdir, save_top_k=-1, monitor=monitor, mode="max", filename="{epoch}") trainer = DummyTrainer(default_root_dir=tmpdir, callbacks=[mc], checkpoint_callback=True, max_epochs=3) trainer.fit(DummyEngine(), datamodule=DummyMNIST()) assert (ModelLoader.find_best(tmpdir, monitor, mode="max") == tmpdir / "epoch=2-v0.ckpt" == mc.best_model_path) assert (ModelLoader.find_best(tmpdir, monitor, mode="min") == tmpdir / "epoch=0-v0.ckpt")
def test_epoch_csv_logger(tmpdir, num_processes): pl.seed_everything(0) # run twice for csv_filename in ("metrics.csv", "metrics-v0.csv"): trainer = DummyTrainer( default_root_dir=tmpdir, max_epochs=3, accelerator="ddp_cpu" if num_processes > 1 else None, num_processes=num_processes, logger=EpochCSVLogger(tmpdir), ) trainer.fit(DummyEngine(), datamodule=DummyMNIST(batch_size=2)) csv = pd.read_csv(tmpdir / csv_filename) # check epoch values assert ( list(csv["epoch"].values) == list(csv["foo"].values) == list(range(trainer.max_epochs)) ) # check test variable "bar" values assert list(csv["bar"].values) == list( range( trainer.limit_train_batches - 1, (trainer.limit_train_batches * trainer.max_epochs), trainer.limit_train_batches, ) ) # check losses are floats assert all(isinstance(v, float) for v in csv["tr_loss"].values)
def test_learning_rate_warns(tmpdir): trainer = DummyTrainer( default_root_dir=tmpdir, max_epochs=1, callbacks=[LearningRate()], ) with pytest.warns(RuntimeWarning, match=r"You are using LearningRateMonitor.*"): trainer.fit(DummyEngine(), datamodule=DummyMNIST())
def test_progress_bar(tmpdir): pbar = ProgressBar() module = DummyEngine() data_module = DummyMNIST() trainer = DummyTrainer( default_root_dir=tmpdir, max_epochs=2, callbacks=[pbar, __TestCallback(pbar)], ) # fake things to include in the pbar trainer.progress_bar_metrics["tr_cer"] = 1 trainer.progress_bar_metrics["va_cer"] = 0.33 trainer.progress_bar_metrics["gpu_stats"] = {"gpu_stats": "baz"} trainer.fit(module, datamodule=data_module) n, m = trainer.limit_train_batches, trainer.limit_val_batches assert pbar.is_enabled # check counts assert pbar.total_train_batches == pbar.main_progress_bar.total == n assert pbar.total_val_batches == pbar.val_progress_bar.total == m # check end was reached assert pbar.main_progress_bar.n == pbar.train_batch_idx == n assert pbar.val_progress_bar.n == pbar.val_batch_idx == m # check test bar is off assert pbar.total_test_batches == 0 assert pbar.test_progress_bar is None # check bar string float_pattern = "([0-9]*[.])?[0-9]+" pattern = ( r" - E1: " r"100%\|[█]+\| 10/10 \[00:0[0-9]<00:00, " rf"{float_pattern}it/s, " rf"loss={float_pattern}, " rf"cer={float_pattern}%, " r"gpu_stats={'gpu_stats': 'baz'}]" ) assert re.match("TR" + pattern, str(pbar.main_progress_bar)) assert re.match("VA" + pattern, str(pbar.val_progress_bar)) trainer.test(module, datamodule=data_module) # previous checks for test k = trainer.limit_test_batches assert pbar.total_test_batches == pbar.test_progress_bar.total == k assert pbar.test_progress_bar.n == pbar.test_batch_idx == k assert re.match( rf"Decoding: 100%\|[█]+\| 10/10 \[00:00<00:00, {float_pattern}it/s]", str(pbar.test_progress_bar), )
def test_model_loader_prepare_checkpoint(tmpdir): # create some checkpoints monitor = "bar" exp_dirpath = tmpdir / "experiment" trainer = DummyTrainer( default_root_dir=tmpdir, callbacks=[ pl.callbacks.ModelCheckpoint( dirpath=exp_dirpath, save_top_k=-1, monitor=monitor, mode="max", filename="{epoch}", ) ], checkpoint_callback=True, max_epochs=2, ) trainer.fit(DummyEngine(), datamodule=DummyMNIST()) expected = exp_dirpath / "epoch=0.ckpt" # nothing assert ModelLoader.prepare_checkpoint("", exp_dirpath, monitor) == expected # direct path assert ModelLoader.prepare_checkpoint(expected, exp_dirpath, monitor) == expected # direct path outside of exp_dirpath shutil.copy(expected, "/tmp") assert (ModelLoader.prepare_checkpoint("/tmp/epoch=0.ckpt", exp_dirpath, monitor) == "/tmp/epoch=0.ckpt") # filename assert (ModelLoader.prepare_checkpoint("epoch=0.ckpt", exp_dirpath, monitor) == expected) # globbed filename assert (ModelLoader.prepare_checkpoint( "epoch=?.ckpt", exp_dirpath, monitor) == exp_dirpath / "epoch=1.ckpt") # failures with pytest.raises(AssertionError, match="Could not find a valid checkpoint in"): ModelLoader.prepare_checkpoint("", tmpdir, monitor) with pytest.raises(AssertionError, match="Could not find the checkpoint"): ModelLoader.prepare_checkpoint("?", exp_dirpath, monitor)
def test_progress_bar_gpu_stats(monkeypatch, tmpdir): def _fake_on_train_start(self, *_): self._gpu_ids = "0,1" fake_stats = [[1.2, 2.3], [3.4, 4.5]] monkeypatch.setattr(shutil, "which", lambda _: True) monkeypatch.setattr(ProgressBarGPUStats, "on_train_start", _fake_on_train_start) monkeypatch.setattr(ProgressBarGPUStats, "_get_gpu_stats", lambda *_: fake_stats) trainer = DummyTrainer( default_root_dir=tmpdir, max_epochs=1, callbacks=[ProgressBarGPUStats()], ) trainer.fit(DummyEngine(), datamodule=DummyMNIST()) expected = { f"GPU-{i}": f"{int(fake_stats[i][0])}/{int(sum(fake_stats[i]))}MB" for i in range(2) } assert trainer.progress_bar_dict["gpu_stats"] == expected
def test_learning_rate(tmpdir, num_processes): log_filepath = tmpdir / "log" trainer = DummyTrainer( default_root_dir=tmpdir, max_epochs=3, callbacks=[LearningRate()], accelerator="ddp_cpu" if num_processes > 1 else None, num_processes=num_processes, plugins=[DummyLoggingPlugin(log_filepath)], ) trainer.fit(__TestEngine(), datamodule=DummyMNIST()) if num_processes > 1: log_filepath_rank1 = tmpdir.join("log.rank1") assert log_filepath_rank1.exists() assert not log_filepath_rank1.read_text("utf-8") assert log_filepath.exists() lines = [l.strip() for l in log_filepath.readlines()] for e in range(1, trainer.max_epochs): expected = f"E{e}: lr-Adam 1.000e-0{e + 2} ⟶ 1.000e-0{e + 3}" assert lines.count(expected) == 1
def test_can_train(tmpdir): model = DummyModel((3, 3), 10) module = EngineModule(model, CTCLoss()) trainer = DummyTrainer(default_root_dir=tmpdir) trainer.fit(module, datamodule=DummyMNIST())