Esempio n. 1
0
    def test_checkpointing(self):
        """
        Tests checkpointing by running train_steps to make sure the train_steps
        run the same way after loading from a checkpoint.
        """
        config = get_fast_test_task_config()
        task = build_task(config).set_hooks([LossLrMeterLoggingHook()])
        task_2 = build_task(config).set_hooks([LossLrMeterLoggingHook()])

        task.set_use_gpu(torch.cuda.is_available())

        # prepare the tasks for the right device
        task.prepare()

        # test in both train and test mode
        for _ in range(2):
            task.advance_phase()

            # set task's state as task_2's checkpoint
            task_2.set_checkpoint(get_checkpoint_dict(task, {}, deep_copy=True))
            task_2.prepare()

            # task 2 should have the same state
            self._compare_states(task.get_classy_state(), task_2.get_classy_state())

            # this tests that both states' iterators return the same samples
            sample = next(task.get_data_iterator())
            sample_2 = next(task_2.get_data_iterator())
            self._compare_samples(sample, sample_2)

            # test that the train step runs the same way on both states
            # and the loss remains the same
            task.train_step()
            task_2.train_step()
            self._compare_states(task.get_classy_state(), task_2.get_classy_state())
 def save(self, stream):
     checkpoint_state = get_checkpoint_dict(self.task, self.input_args)
     checkpoint_state[
         "advance_to_next_phase"] = self.advance_to_next_phase
     checkpoint_state["skip_current_phase"] = self.skip_current_phase
     checkpoint_state["run_start_hooks"] = self.run_start_hooks
     torch.save(checkpoint_state, stream)
    def test_train_parametric_loss(self):
        heads_num_classes = 100
        pre_train_config = self._get_pre_train_config(
            head_num_classes=heads_num_classes)
        pre_train_config["loss"] = {
            "name": "batchnorm_cross_entropy_loss",
            "num_classes": heads_num_classes,
        }
        pre_train_task = build_task(pre_train_config)
        trainer = LocalTrainer()
        trainer.train(pre_train_task)
        checkpoint = get_checkpoint_dict(pre_train_task, {})

        fine_tuning_config = self._get_fine_tuning_config(
            head_num_classes=heads_num_classes)
        fine_tuning_config["loss"] = {
            "name": "batchnorm_cross_entropy_loss",
            "num_classes": heads_num_classes,
        }

        fine_tuning_task = build_task(fine_tuning_config)
        fine_tuning_task._set_pretrained_checkpoint_dict(
            copy.deepcopy(checkpoint))
        # run in test mode to compare the loss state. Since we have a BatchNorm module in
        # the loss, its moving mean/std should be unchanged when we run in test-only mode
        fine_tuning_task.set_test_only(True)
        loss_state = copy.deepcopy(fine_tuning_task.loss.get_classy_state())
        trainer.train(fine_tuning_task)
        self._compare_state_dict(loss_state,
                                 fine_tuning_task.loss.get_classy_state())
Esempio n. 4
0
    def test_prepare(self):
        pre_train_config = self._get_pre_train_config()
        pre_train_task = build_task(pre_train_config)
        pre_train_task.prepare()
        checkpoint = get_checkpoint_dict(pre_train_task, {})

        fine_tuning_config = self._get_fine_tuning_config()
        fine_tuning_task = build_task(fine_tuning_config)
        # cannot prepare a fine tuning task without a pre training checkpoint
        with self.assertRaises(Exception):
            fine_tuning_task.prepare()

        fine_tuning_task.set_pretrained_checkpoint(checkpoint)
        fine_tuning_task.prepare()

        # test a fine tuning task with incompatible heads
        fine_tuning_config = self._get_fine_tuning_config(head_num_classes=10)
        fine_tuning_task = build_task(fine_tuning_config)
        fine_tuning_task.set_pretrained_checkpoint(checkpoint)
        # cannot prepare a fine tuning task with a pre training checkpoint which
        # has incompatible heads
        with self.assertRaises(Exception):
            fine_tuning_task.prepare()

        fine_tuning_task.set_pretrained_checkpoint(checkpoint).set_reset_heads(
            True)
        fine_tuning_task.prepare()
Esempio n. 5
0
    def test_checkpointing(self):
        """
        Tests checkpointing by running train_steps to make sure the train_steps
        run the same way after loading from a checkpoint.
        """
        config = get_fast_test_task_config()
        task = build_task(config).set_hooks([LossLrMeterLoggingHook()])
        task_2 = build_task(config).set_hooks([LossLrMeterLoggingHook()])

        task.set_use_gpu(torch.cuda.is_available())

        # only train 1 phase at a time
        trainer = LimitedPhaseTrainer(num_phases=1)

        while not task.done_training():
            # set task's state as task_2's checkpoint
            task_2._set_checkpoint_dict(
                get_checkpoint_dict(task, {}, deep_copy=True))

            # task 2 should have the same state before training
            self._compare_states(task.get_classy_state(),
                                 task_2.get_classy_state())

            # train for one phase
            trainer.train(task)
            trainer.train(task_2)

            # task 2 should have the same state after training
            self._compare_states(task.get_classy_state(),
                                 task_2.get_classy_state())
    def test_checkpointing_different_device(self):
        config = get_fast_test_task_config()
        task = build_task(config)
        task_2 = build_task(config)

        for use_gpu in [True, False]:
            task.prepare(use_gpu=use_gpu)

            # set task's state as task_2's checkpoint
            task_2.set_checkpoint(get_checkpoint_dict(task, {}, deep_copy=True))

            # we should be able to run the trainer using state from a different device
            trainer = LocalTrainer(use_gpu=not use_gpu)
            trainer.train(task_2)
Esempio n. 7
0
    def _save_checkpoint(self, task, filename):
        if getattr(task, "test_only", False):
            return
        assert PathManager.exists(
            self.checkpoint_folder
        ), "Checkpoint folder '{}' deleted unexpectedly".format(self.checkpoint_folder)

        # save checkpoint:
        logging.info("Saving checkpoint to '{}'...".format(self.checkpoint_folder))
        checkpoint_file = save_checkpoint(
            self.checkpoint_folder, get_checkpoint_dict(task, self.input_args)
        )

        # make copy of checkpoint that won't be overwritten:
        PathManager.copy(checkpoint_file, f"{self.checkpoint_folder}/{filename}")
    def test_test_only_checkpointing(self):
        """
        Tests checkpointing by running train_steps to make sure the
        train_steps run the same way after loading from a training
        task checkpoint on a test_only task.
        """
        train_config = get_fast_test_task_config()
        train_config["num_epochs"] = 10
        test_config = get_fast_test_task_config()
        test_config["test_only"] = True
        train_task = build_task(train_config).set_hooks(
            [LossLrMeterLoggingHook()])
        test_only_task = build_task(test_config).set_hooks(
            [LossLrMeterLoggingHook()])

        use_gpu = torch.cuda.is_available()

        # prepare the tasks for the right device
        train_task.prepare(use_gpu=use_gpu)

        # test in both train and test mode
        trainer = LocalTrainer(use_gpu=use_gpu)
        trainer.train(train_task)

        # set task's state as task_2's checkpoint
        test_only_task.set_checkpoint(
            get_checkpoint_dict(train_task, {}, deep_copy=True))
        test_only_task.prepare(use_gpu=use_gpu)
        test_state = test_only_task.get_classy_state()

        # We expect the phase idx to be different for a test only task
        self.assertEqual(test_state["phase_idx"], -1)

        # We expect that test only state is test, no matter what train state is
        self.assertFalse(test_state["train"])

        # Num updates should be 0
        self.assertEqual(test_state["num_updates"], 0)

        # train_phase_idx should -1
        self.assertEqual(test_state["train_phase_idx"], -1)

        # Verify task will run
        trainer = LocalTrainer(use_gpu=use_gpu)
        trainer.train(test_only_task)
    def test_train(self):
        pre_train_config = self._get_pre_train_config(head_num_classes=100)
        pre_train_task = build_task(pre_train_config)
        trainer = LocalTrainer()
        trainer.train(pre_train_task)
        checkpoint = get_checkpoint_dict(pre_train_task, {})

        for reset_heads, heads_num_classes in [(False, 100), (True, 20)]:
            for freeze_trunk in [True, False]:
                fine_tuning_config = self._get_fine_tuning_config(
                    head_num_classes=heads_num_classes)
                fine_tuning_task = build_task(fine_tuning_config)
                fine_tuning_task = (
                    fine_tuning_task._set_pretrained_checkpoint_dict(
                        copy.deepcopy(checkpoint)).set_reset_heads(
                            reset_heads).set_freeze_trunk(freeze_trunk))
                # run in test mode to compare the model state
                fine_tuning_task.set_test_only(True)
                trainer.train(fine_tuning_task)
                self._compare_model_state(
                    pre_train_task.model.get_classy_state(),
                    fine_tuning_task.model.get_classy_state(),
                    check_heads=not reset_heads,
                )
                # run in train mode to check accuracy
                fine_tuning_task.set_test_only(False)
                trainer.train(fine_tuning_task)
                if freeze_trunk:
                    # if trunk is frozen the states should be the same
                    self._compare_model_state(
                        pre_train_task.model.get_classy_state(),
                        fine_tuning_task.model.get_classy_state(),
                        check_heads=False,
                    )
                else:
                    # trunk isn't frozen, the states should be different
                    with self.assertRaises(Exception):
                        self._compare_model_state(
                            pre_train_task.model.get_classy_state(),
                            fine_tuning_task.model.get_classy_state(),
                            check_heads=False,
                        )

                accuracy = fine_tuning_task.meters[0].value["top_1"]
                self.assertAlmostEqual(accuracy, 1.0)
Esempio n. 10
0
    def _save_checkpoint(self, task, filename):
        if getattr(task, "test_only", False):
            return
        assert os.path.exists(
            self.checkpoint_folder
        ), "Checkpoint folder '{}' deleted unexpectedly".format(self.checkpoint_folder)

        # save checkpoint:
        logging.info("Saving checkpoint to '{}'...".format(self.checkpoint_folder))
        checkpoint_file = save_checkpoint(
            self.checkpoint_folder, get_checkpoint_dict(task, self.input_args)
        )

        # make copy of checkpoint that won't be overwritten:
        if checkpoint_file:
            tmp_dir = tempfile.mkdtemp()
            tmp_file = os.path.join(tmp_dir, filename)
            copy2(checkpoint_file, tmp_file)
            move(tmp_file, os.path.join(self.checkpoint_folder, filename))
    def _save_checkpoint(self, task, filename):
        if getattr(task, "test_only", False):
            return
        assert PathManager.exists(
            self.checkpoint_folder
        ), "Checkpoint folder '{}' deleted unexpectedly".format(
            self.checkpoint_folder)

        for prefix in gfs_prefix_list:
            if self.checkpoint_folder.startswith(prefix):
                logging.warning(
                    "GFS is deprecating... please save checkpoint to manifold!"
                )
                break

        # save checkpoint:
        logging.info("Saving checkpoint to '{}'...".format(
            self.checkpoint_folder))
        checkpoint_file = save_checkpoint(
            self.checkpoint_folder, get_checkpoint_dict(task, self.input_args))

        # make copy of checkpoint that won't be overwritten:
        PathManager.copy(checkpoint_file,
                         f"{self.checkpoint_folder}/{filename}")
    def test_prepare(self):
        pre_train_config = self._get_pre_train_config()
        pre_train_task = build_task(pre_train_config)
        pre_train_task.prepare()
        checkpoint = get_checkpoint_dict(pre_train_task, {})

        fine_tuning_config = self._get_fine_tuning_config()
        fine_tuning_task = build_task(fine_tuning_config)

        # test: cannot prepare a fine tuning task without a pre-trained checkpoint
        with self.assertRaises(Exception):
            fine_tuning_task.prepare()

        # test: prepare should succeed after pre-trained checkpoint is set
        fine_tuning_task = build_task(fine_tuning_config)
        fine_tuning_task._set_pretrained_checkpoint_dict(checkpoint)
        fine_tuning_task.prepare()

        # test: prepare should succeed if a pre-trained checkpoint is provided in the
        # config
        fine_tuning_config = self._get_fine_tuning_config(
            pretrained_checkpoint=True)
        fine_tuning_task = build_task(fine_tuning_config)
        with mock.patch(
                "classy_vision.tasks.fine_tuning_task.load_and_broadcast_checkpoint",
                return_value=checkpoint,
        ):
            fine_tuning_task.prepare()

        # test: a fine tuning task with incompatible heads with a manually set
        # pre-trained checkpoint should fail to prepare if the heads are not reset
        fine_tuning_config = self._get_fine_tuning_config(head_num_classes=10)
        fine_tuning_task = build_task(fine_tuning_config)
        fine_tuning_task._set_pretrained_checkpoint_dict(checkpoint)

        with self.assertRaises(Exception):
            fine_tuning_task.prepare()

        # test: a fine tuning task with incompatible heads with a manually set
        # pre-trained checkpoint should succeed to prepare if the heads are reset
        fine_tuning_task = build_task(fine_tuning_config)
        fine_tuning_task._set_pretrained_checkpoint_dict(
            copy.deepcopy(checkpoint)).set_reset_heads(True)

        fine_tuning_task.prepare()

        # test: a fine tuning task with incompatible heads with the pre-trained
        # checkpoint provided in the config should fail to prepare
        fine_tuning_config = self._get_fine_tuning_config(
            head_num_classes=10, pretrained_checkpoint=True)

        fine_tuning_task = build_task(fine_tuning_config)
        with mock.patch(
                "classy_vision.tasks.fine_tuning_task.load_and_broadcast_checkpoint",
                return_value=copy.deepcopy(checkpoint),
        ) and self.assertRaises(Exception):
            fine_tuning_task.prepare()

        # test: a fine tuning task with incompatible heads with the pre-trained
        # checkpoint provided in the config should succeed to prepare if the heads are
        # reset
        fine_tuning_task = build_task(fine_tuning_config)
        fine_tuning_task.set_reset_heads(True)
        with mock.patch(
                "classy_vision.tasks.fine_tuning_task.load_and_broadcast_checkpoint",
                return_value=copy.deepcopy(checkpoint),
        ):
            fine_tuning_task.prepare()
Esempio n. 13
0
 def save(self, stream):
     checkpoint_state = get_checkpoint_dict(self.task, self.input_args)
     checkpoint_state[
         "advance_to_next_phase"] = self.advance_to_next_phase
     torch.save(checkpoint_state, stream)