def test_logged_lr(self): # Mock LR scheduler class SchedulerMock(ClassyParamScheduler): def __call__(self, where): return where mock_lr_scheduler = SchedulerMock(UpdateInterval.STEP) # Mock Logging class DummySummaryWriter(object): def __init__(self): self.scalar_logs = {} def add_scalar(self, key, value, global_step=None, walltime=None) -> None: self.scalar_logs[key] = self.scalar_logs.get(key, []) + [value] def add_histogram(self, key, value, global_step=None, walltime=None) -> None: return def add_text(self, *args, **kwargs): pass def flush(self): return config = get_test_mlp_task_config() config["num_epochs"] = 3 config["dataset"]["train"]["batchsize_per_replica"] = 10 config["dataset"]["test"]["batchsize_per_replica"] = 5 task = build_task(config) writer = DummySummaryWriter() hook = TensorboardPlotHook(writer) hook.log_period = 1 task.set_hooks([hook]) task.set_optimizer_schedulers({"lr": mock_lr_scheduler}) trainer = LocalTrainer() trainer.train(task) # We have 20 samples, batch size is 10. Each epoch is done in two steps. self.assertEqual( writer.scalar_logs["Learning Rate/train"], [0, 1 / 6, 2 / 6, 3 / 6, 4 / 6, 5 / 6], )
def test_logged_lr(self): # Mock LR scheduler def scheduler_mock(where): return where mock_lr_scheduler = mock.Mock(side_effect=scheduler_mock) mock_lr_scheduler.update_interval = UpdateInterval.STEP # Mock Logging class DummySummaryWriter(object): def __init__(self): self.scalar_logs = {} def add_scalar(self, key, value, global_step=None, walltime=None) -> None: self.scalar_logs[key] = self.scalar_logs.get(key, []) + [value] def flush(self): return config = get_test_mlp_task_config() config["num_epochs"] = 3 config["dataset"]["train"]["batchsize_per_replica"] = 5 config["dataset"]["test"]["batchsize_per_replica"] = 5 task = build_task(config) writer = DummySummaryWriter() hook = TensorboardPlotHook(writer) task.set_hooks([hook]) task.optimizer.param_schedulers["lr"] = mock_lr_scheduler trainer = LocalTrainer() trainer.train(task) # We have 10 samples, batch size is 5. Each epoch is done in two steps. self.assertEqual( writer.scalar_logs["train_learning_rate_updates"], [0, 1 / 6, 2 / 6, 3 / 6, 4 / 6, 5 / 6], )
def configure_hooks(args, config): hooks = [LossLrMeterLoggingHook(args.log_freq), ModelComplexityHook()] # Make a folder to store checkpoints and tensorboard logging outputs suffix = datetime.now().isoformat() base_folder = f"{Path(__file__).parent}/output_{suffix}" if args.checkpoint_folder == "": args.checkpoint_folder = base_folder + "/checkpoints" os.makedirs(args.checkpoint_folder, exist_ok=True) logging.info(f"Logging outputs to {base_folder}") logging.info(f"Logging checkpoints to {args.checkpoint_folder}") if not args.skip_tensorboard: try: from torch.utils.tensorboard import SummaryWriter os.makedirs(Path(base_folder) / "tensorboard", exist_ok=True) tb_writer = SummaryWriter(log_dir=Path(base_folder) / "tensorboard") hooks.append(TensorboardPlotHook(tb_writer)) except ImportError: logging.warning( "tensorboard not installed, skipping tensorboard hooks") args_dict = vars(args) args_dict["config"] = config hooks.append( CheckpointHook(args.checkpoint_folder, args_dict, checkpoint_period=args.checkpoint_period)) if args.profiler: hooks.append(ProfilerHook()) if args.show_progress: hooks.append(ProgressBarHook()) if args.visdom_server != "": hooks.append(VisdomHook(args.visdom_server, args.visdom_port)) return hooks
def test_writer(self, mock_is_primary_func: mock.MagicMock) -> None: """ Tests that the tensorboard writer writes the correct scalars to SummaryWriter iff is_primary() is True. """ for phase_idx, master in product([0, 1, 2], [True, False]): train, phase_type = ((True, "train") if phase_idx % 2 == 0 else (False, "test")) mock_is_primary_func.return_value = master # set up the task and state config = get_test_task_config() config["dataset"]["train"]["batchsize_per_replica"] = 2 config["dataset"]["test"]["batchsize_per_replica"] = 5 task = build_task(config) task.prepare() task.advance_phase() task.phase_idx = phase_idx task.train = train losses = [1.23, 4.45, 12.3, 3.4] sample_fetch_times = [1.1, 2.2, 3.3, 2.2] summary_writer = SummaryWriter(self.base_dir) # create a spy on top of summary_writer summary_writer = mock.MagicMock(wraps=summary_writer) # create a loss lr tensorboard hook tensorboard_plot_hook = TensorboardPlotHook(summary_writer) # run the hook in the correct order tensorboard_plot_hook.on_phase_start(task) # test tasks which do not pass the sample_fetch_times as well disable_sample_fetch_times = phase_idx == 0 for loss, sample_fetch_time in zip(losses, sample_fetch_times): task.losses.append(loss) step_data = ({} if disable_sample_fetch_times else { "sample_fetch_time": sample_fetch_time }) task.last_batch = LastBatchInfo(None, None, None, None, step_data) tensorboard_plot_hook.on_step(task) tensorboard_plot_hook.on_phase_end(task) if master: # add_scalar() should have been called with the right scalars if train: learning_rate_key = f"Learning Rate/{phase_type}" summary_writer.add_scalar.assert_any_call( learning_rate_key, mock.ANY, global_step=mock.ANY, walltime=mock.ANY, ) avg_loss_key = f"Losses/{phase_type}" summary_writer.add_scalar.assert_any_call(avg_loss_key, mock.ANY, global_step=mock.ANY) for meter in task.meters: for name in meter.value: meter_key = f"Meters/{phase_type}/{meter.name}/{name}" summary_writer.add_scalar.assert_any_call( meter_key, mock.ANY, global_step=mock.ANY) if step_data: summary_writer.add_scalar.assert_any_call( f"Speed/{phase_type}/cumulative_sample_fetch_time", mock.ANY, global_step=mock.ANY, walltime=mock.ANY, ) else: # add_scalar() shouldn't be called since is_primary() is False summary_writer.add_scalar.assert_not_called() summary_writer.add_scalar.reset_mock()
def test_writer(self, mock_is_master_func: mock.MagicMock) -> None: """ Tests that the tensorboard writer writes the correct scalars to SummaryWriter iff is_master() is True. """ for phase_idx, master in product([0, 1, 2], [True, False]): train, phase_type = ((True, "train") if phase_idx % 2 == 0 else (False, "test")) mock_is_master_func.return_value = master # set up the task and state config = get_test_task_config() config["dataset"]["train"]["batchsize_per_replica"] = 2 config["dataset"]["test"]["batchsize_per_replica"] = 5 task = build_task(config) task.prepare() task.phase_idx = phase_idx task.train = train losses = [1.23, 4.45, 12.3, 3.4] local_variables = {} summary_writer = SummaryWriter(self.base_dir) # create a spy on top of summary_writer summary_writer = mock.MagicMock(wraps=summary_writer) # create a loss lr tensorboard hook tensorboard_plot_hook = TensorboardPlotHook(summary_writer) # test that the hook logs a warning and doesn't write anything to # the writer if on_phase_start() is not called for initialization # before on_update() is called. with self.assertLogs() as log_watcher: tensorboard_plot_hook.on_update(task, local_variables) self.assertTrue( len(log_watcher.records) == 1 and log_watcher.records[0].levelno == logging.WARN and "learning_rates is not initialized" in log_watcher.output[0]) # test that the hook logs a warning and doesn't write anything to # the writer if on_phase_start() is not called for initialization # if on_phase_end() is called. with self.assertLogs() as log_watcher: tensorboard_plot_hook.on_phase_end(task, local_variables) self.assertTrue( len(log_watcher.records) == 1 and log_watcher.records[0].levelno == logging.WARN and "learning_rates is not initialized" in log_watcher.output[0]) summary_writer.add_scalar.reset_mock() # run the hook in the correct order tensorboard_plot_hook.on_phase_start(task, local_variables) for loss in losses: task.losses.append(loss) tensorboard_plot_hook.on_update(task, local_variables) tensorboard_plot_hook.on_phase_end(task, local_variables) if master: # add_scalar() should have been called with the right scalars if train: loss_key = f"{phase_type}_loss" learning_rate_key = f"{phase_type}_learning_rate_updates" summary_writer.add_scalar.assert_any_call( loss_key, mock.ANY, global_step=mock.ANY, walltime=mock.ANY) summary_writer.add_scalar.assert_any_call( learning_rate_key, mock.ANY, global_step=mock.ANY, walltime=mock.ANY, ) avg_loss_key = f"avg_{phase_type}_loss" summary_writer.add_scalar.assert_any_call(avg_loss_key, mock.ANY, global_step=mock.ANY) for meter in task.meters: for name in meter.value: meter_key = f"{phase_type}_{meter.name}_{name}" summary_writer.add_scalar.assert_any_call( meter_key, mock.ANY, global_step=mock.ANY) else: # add_scalar() shouldn't be called since is_master() is False summary_writer.add_scalar.assert_not_called() summary_writer.add_scalar.reset_mock()