def test_logged_lr(self):
        # Mock LR scheduler
        class SchedulerMock(ClassyParamScheduler):
            def __call__(self, where):
                return where

        mock_lr_scheduler = SchedulerMock(UpdateInterval.STEP)

        # Mock Logging
        class DummySummaryWriter(object):
            def __init__(self):
                self.scalar_logs = {}

            def add_scalar(self,
                           key,
                           value,
                           global_step=None,
                           walltime=None) -> None:
                self.scalar_logs[key] = self.scalar_logs.get(key, []) + [value]

            def add_histogram(self,
                              key,
                              value,
                              global_step=None,
                              walltime=None) -> None:
                return

            def add_text(self, *args, **kwargs):
                pass

            def flush(self):
                return

        config = get_test_mlp_task_config()
        config["num_epochs"] = 3
        config["dataset"]["train"]["batchsize_per_replica"] = 10
        config["dataset"]["test"]["batchsize_per_replica"] = 5
        task = build_task(config)

        writer = DummySummaryWriter()
        hook = TensorboardPlotHook(writer)
        hook.log_period = 1
        task.set_hooks([hook])
        task.set_optimizer_schedulers({"lr": mock_lr_scheduler})

        trainer = LocalTrainer()
        trainer.train(task)

        # We have 20 samples, batch size is 10. Each epoch is done in two steps.
        self.assertEqual(
            writer.scalar_logs["Learning Rate/train"],
            [0, 1 / 6, 2 / 6, 3 / 6, 4 / 6, 5 / 6],
        )
Exemple #2
0
    def test_logged_lr(self):
        # Mock LR scheduler
        def scheduler_mock(where):
            return where

        mock_lr_scheduler = mock.Mock(side_effect=scheduler_mock)
        mock_lr_scheduler.update_interval = UpdateInterval.STEP

        # Mock Logging
        class DummySummaryWriter(object):
            def __init__(self):
                self.scalar_logs = {}

            def add_scalar(self,
                           key,
                           value,
                           global_step=None,
                           walltime=None) -> None:
                self.scalar_logs[key] = self.scalar_logs.get(key, []) + [value]

            def flush(self):
                return

        config = get_test_mlp_task_config()
        config["num_epochs"] = 3
        config["dataset"]["train"]["batchsize_per_replica"] = 5
        config["dataset"]["test"]["batchsize_per_replica"] = 5
        task = build_task(config)

        writer = DummySummaryWriter()
        hook = TensorboardPlotHook(writer)
        task.set_hooks([hook])
        task.optimizer.param_schedulers["lr"] = mock_lr_scheduler

        trainer = LocalTrainer()
        trainer.train(task)

        # We have 10 samples, batch size is 5. Each epoch is done in two steps.
        self.assertEqual(
            writer.scalar_logs["train_learning_rate_updates"],
            [0, 1 / 6, 2 / 6, 3 / 6, 4 / 6, 5 / 6],
        )
def configure_hooks(args, config):
    hooks = [LossLrMeterLoggingHook(args.log_freq), ModelComplexityHook()]

    # Make a folder to store checkpoints and tensorboard logging outputs
    suffix = datetime.now().isoformat()
    base_folder = f"{Path(__file__).parent}/output_{suffix}"
    if args.checkpoint_folder == "":
        args.checkpoint_folder = base_folder + "/checkpoints"
        os.makedirs(args.checkpoint_folder, exist_ok=True)

    logging.info(f"Logging outputs to {base_folder}")
    logging.info(f"Logging checkpoints to {args.checkpoint_folder}")

    if not args.skip_tensorboard:
        try:
            from torch.utils.tensorboard import SummaryWriter

            os.makedirs(Path(base_folder) / "tensorboard", exist_ok=True)
            tb_writer = SummaryWriter(log_dir=Path(base_folder) /
                                      "tensorboard")
            hooks.append(TensorboardPlotHook(tb_writer))
        except ImportError:
            logging.warning(
                "tensorboard not installed, skipping tensorboard hooks")

    args_dict = vars(args)
    args_dict["config"] = config
    hooks.append(
        CheckpointHook(args.checkpoint_folder,
                       args_dict,
                       checkpoint_period=args.checkpoint_period))

    if args.profiler:
        hooks.append(ProfilerHook())
    if args.show_progress:
        hooks.append(ProgressBarHook())
    if args.visdom_server != "":
        hooks.append(VisdomHook(args.visdom_server, args.visdom_port))

    return hooks
    def test_writer(self, mock_is_primary_func: mock.MagicMock) -> None:
        """
        Tests that the tensorboard writer writes the correct scalars to SummaryWriter
        iff is_primary() is True.
        """
        for phase_idx, master in product([0, 1, 2], [True, False]):
            train, phase_type = ((True, "train") if phase_idx % 2 == 0 else
                                 (False, "test"))
            mock_is_primary_func.return_value = master

            # set up the task and state
            config = get_test_task_config()
            config["dataset"]["train"]["batchsize_per_replica"] = 2
            config["dataset"]["test"]["batchsize_per_replica"] = 5
            task = build_task(config)
            task.prepare()
            task.advance_phase()
            task.phase_idx = phase_idx
            task.train = train

            losses = [1.23, 4.45, 12.3, 3.4]
            sample_fetch_times = [1.1, 2.2, 3.3, 2.2]

            summary_writer = SummaryWriter(self.base_dir)
            # create a spy on top of summary_writer
            summary_writer = mock.MagicMock(wraps=summary_writer)

            # create a loss lr tensorboard hook
            tensorboard_plot_hook = TensorboardPlotHook(summary_writer)

            # run the hook in the correct order
            tensorboard_plot_hook.on_phase_start(task)

            # test tasks which do not pass the sample_fetch_times as well
            disable_sample_fetch_times = phase_idx == 0

            for loss, sample_fetch_time in zip(losses, sample_fetch_times):
                task.losses.append(loss)
                step_data = ({} if disable_sample_fetch_times else {
                    "sample_fetch_time": sample_fetch_time
                })
                task.last_batch = LastBatchInfo(None, None, None, None,
                                                step_data)
                tensorboard_plot_hook.on_step(task)

            tensorboard_plot_hook.on_phase_end(task)

            if master:
                # add_scalar() should have been called with the right scalars
                if train:
                    learning_rate_key = f"Learning Rate/{phase_type}"
                    summary_writer.add_scalar.assert_any_call(
                        learning_rate_key,
                        mock.ANY,
                        global_step=mock.ANY,
                        walltime=mock.ANY,
                    )
                avg_loss_key = f"Losses/{phase_type}"
                summary_writer.add_scalar.assert_any_call(avg_loss_key,
                                                          mock.ANY,
                                                          global_step=mock.ANY)
                for meter in task.meters:
                    for name in meter.value:
                        meter_key = f"Meters/{phase_type}/{meter.name}/{name}"
                        summary_writer.add_scalar.assert_any_call(
                            meter_key, mock.ANY, global_step=mock.ANY)
                if step_data:
                    summary_writer.add_scalar.assert_any_call(
                        f"Speed/{phase_type}/cumulative_sample_fetch_time",
                        mock.ANY,
                        global_step=mock.ANY,
                        walltime=mock.ANY,
                    )
            else:
                # add_scalar() shouldn't be called since is_primary() is False
                summary_writer.add_scalar.assert_not_called()
            summary_writer.add_scalar.reset_mock()
    def test_writer(self, mock_is_master_func: mock.MagicMock) -> None:
        """
        Tests that the tensorboard writer writes the correct scalars to SummaryWriter
        iff is_master() is True.
        """
        for phase_idx, master in product([0, 1, 2], [True, False]):
            train, phase_type = ((True, "train") if phase_idx % 2 == 0 else
                                 (False, "test"))
            mock_is_master_func.return_value = master

            # set up the task and state
            config = get_test_task_config()
            config["dataset"]["train"]["batchsize_per_replica"] = 2
            config["dataset"]["test"]["batchsize_per_replica"] = 5
            task = build_task(config)
            task.prepare()
            task.phase_idx = phase_idx
            task.train = train

            losses = [1.23, 4.45, 12.3, 3.4]

            local_variables = {}

            summary_writer = SummaryWriter(self.base_dir)
            # create a spy on top of summary_writer
            summary_writer = mock.MagicMock(wraps=summary_writer)

            # create a loss lr tensorboard hook
            tensorboard_plot_hook = TensorboardPlotHook(summary_writer)

            # test that the hook logs a warning and doesn't write anything to
            # the writer if on_phase_start() is not called for initialization
            # before on_update() is called.
            with self.assertLogs() as log_watcher:
                tensorboard_plot_hook.on_update(task, local_variables)

            self.assertTrue(
                len(log_watcher.records) == 1
                and log_watcher.records[0].levelno == logging.WARN and
                "learning_rates is not initialized" in log_watcher.output[0])

            # test that the hook logs a warning and doesn't write anything to
            # the writer if on_phase_start() is not called for initialization
            # if on_phase_end() is called.
            with self.assertLogs() as log_watcher:
                tensorboard_plot_hook.on_phase_end(task, local_variables)

            self.assertTrue(
                len(log_watcher.records) == 1
                and log_watcher.records[0].levelno == logging.WARN and
                "learning_rates is not initialized" in log_watcher.output[0])
            summary_writer.add_scalar.reset_mock()

            # run the hook in the correct order
            tensorboard_plot_hook.on_phase_start(task, local_variables)

            for loss in losses:
                task.losses.append(loss)
                tensorboard_plot_hook.on_update(task, local_variables)

            tensorboard_plot_hook.on_phase_end(task, local_variables)

            if master:
                # add_scalar() should have been called with the right scalars
                if train:
                    loss_key = f"{phase_type}_loss"
                    learning_rate_key = f"{phase_type}_learning_rate_updates"
                    summary_writer.add_scalar.assert_any_call(
                        loss_key,
                        mock.ANY,
                        global_step=mock.ANY,
                        walltime=mock.ANY)
                    summary_writer.add_scalar.assert_any_call(
                        learning_rate_key,
                        mock.ANY,
                        global_step=mock.ANY,
                        walltime=mock.ANY,
                    )
                avg_loss_key = f"avg_{phase_type}_loss"
                summary_writer.add_scalar.assert_any_call(avg_loss_key,
                                                          mock.ANY,
                                                          global_step=mock.ANY)
                for meter in task.meters:
                    for name in meter.value:
                        meter_key = f"{phase_type}_{meter.name}_{name}"
                        summary_writer.add_scalar.assert_any_call(
                            meter_key, mock.ANY, global_step=mock.ANY)
            else:
                # add_scalar() shouldn't be called since is_master() is False
                summary_writer.add_scalar.assert_not_called()
            summary_writer.add_scalar.reset_mock()