def test_logged_lr(self):
        # Mock LR scheduler
        def scheduler_mock(where):
            return where

        mock_lr_scheduler = mock.Mock(side_effect=scheduler_mock)
        mock_lr_scheduler.update_interval = UpdateInterval.STEP
        config = get_test_mlp_task_config()
        config["num_epochs"] = 3
        config["dataset"]["train"]["batchsize_per_replica"] = 5
        config["dataset"]["test"]["batchsize_per_replica"] = 5
        task = build_task(config)
        task.optimizer.lr_scheduler = mock_lr_scheduler
        trainer = LocalTrainer()

        # 2 LR updates per epoch
        # At end of each epoch for train, LR is logged an additional time
        lr_order = [
            0.0, 1 / 6, 1 / 6, 2 / 6, 3 / 6, 3 / 6, 4 / 6, 5 / 6, 5 / 6
        ]
        lr_list = []

        def mock_log_lr(task: ClassyTask, local_variables) -> None:
            lr_list.append(task.optimizer.lr)

        with mock.patch.object(LossLrMeterLoggingHook,
                               "_log_lr",
                               side_effect=mock_log_lr):
            hook = LossLrMeterLoggingHook(1)
            task.set_hooks([hook])
            trainer.train(task)
            self.assertEqual(lr_list, lr_order)
 def test_update_classy_model(self):
     """
     Tests that the update_classy_model successfully updates from a
     checkpoint
     """
     config = get_fast_test_task_config()
     task = build_task(config)
     trainer = LocalTrainer()
     trainer.train(task)
     for reset_heads in [False, True]:
         task_2 = build_task(config)
         # prepare task_2 for the right device
         task_2.prepare()
         update_classy_model(task_2.model,
                             task.model.get_classy_state(deep_copy=True),
                             reset_heads)
         self._compare_model_state(
             task.model.get_classy_state(),
             task_2.model.get_classy_state(),
             check_heads=not reset_heads,
         )
         if reset_heads:
             # the model head states should be different
             with self.assertRaises(Exception):
                 self._compare_model_state(
                     task.model.get_classy_state(),
                     task_2.model.get_classy_state(),
                     check_heads=True,
                 )
Example #3
0
    def test_logged_lr(self):
        class SchedulerMock(ClassyParamScheduler):
            def __call__(self, where):
                return where

        mock_lr_scheduler = SchedulerMock(UpdateInterval.STEP)
        config = get_test_mlp_task_config()
        config["num_epochs"] = 3
        config["dataset"]["train"]["batchsize_per_replica"] = 10
        config["dataset"]["test"]["batchsize_per_replica"] = 5
        task = build_task(config)
        task.set_optimizer_schedulers({"lr": mock_lr_scheduler})
        trainer = LocalTrainer()

        # 2 LR updates per epoch = 6
        lr_order = [0.0, 1 / 6, 2 / 6, 3 / 6, 4 / 6, 5 / 6]
        lr_list = []

        class LRLoggingHook(ClassyHook):
            on_end = ClassyHook._noop
            on_phase_end = ClassyHook._noop
            on_phase_start = ClassyHook._noop
            on_start = ClassyHook._noop

            def on_step(self, task):
                if task.train:
                    lr_list.append(task.optimizer.options_view.lr)

        hook = LRLoggingHook()
        task.set_hooks([hook])
        trainer.train(task)
        self.assertEqual(lr_list, lr_order)
Example #4
0
def train(datasets, model, loss, optimizer, meters, args):
    task = (ClassificationTask()
            .set_num_epochs(args.num_epochs)
            .set_loss(loss)
            .set_model(model)
            .set_optimizer(optimizer)
            .set_meters(meters))
    for phase in ["train", "test"]:
        task.set_dataset(datasets[phase], phase)

    hooks = [LossLrMeterLoggingHook(log_freq=args.print_freq)]
    # show progress
    hooks.append(ProgressBarHook())
    if not args.skip_tensorboard:
        try:
            from tensorboardX import SummaryWriter
            tb_writer = SummaryWriter(log_dir=args.video_dir + "/tensorboard")
            hooks.append(TensorboardPlotHook(tb_writer))
        except ImportError:
            print("tensorboardX not installed, skipping tensorboard hooks")

    checkpoint_dir = f"{args.video_dir}/checkpoint/classy_checkpoint_{time.time()}"
    os.mkdir(checkpoint_dir)
    hooks.append(CheckpointHook(checkpoint_dir, input_args={}))

    task = task.set_hooks(hooks)
    trainer = LocalTrainer(use_gpu=args.cuda, num_dataloader_workers=args.num_workers)
    trainer.train(task)
Example #5
0
    def test_logged_lr(self):
        # Mock LR scheduler
        def scheduler_mock(where):
            return where

        mock_lr_scheduler = mock.Mock(side_effect=scheduler_mock)
        mock_lr_scheduler.update_interval = UpdateInterval.STEP
        config = get_test_mlp_task_config()
        config["num_epochs"] = 3
        config["dataset"]["train"]["batchsize_per_replica"] = 10
        config["dataset"]["test"]["batchsize_per_replica"] = 5
        task = build_task(config)
        task.optimizer.param_schedulers["lr"] = mock_lr_scheduler
        trainer = LocalTrainer()

        # 2 LR updates per epoch = 6
        lr_order = [0.0, 1 / 6, 2 / 6, 3 / 6, 4 / 6, 5 / 6]
        lr_list = []

        class LRLoggingHook(ClassyHook):
            on_end = ClassyHook._noop
            on_phase_end = ClassyHook._noop
            on_phase_start = ClassyHook._noop
            on_start = ClassyHook._noop

            def on_step(self, task):
                if task.train:
                    lr_list.append(task.optimizer.parameters.lr)

        hook = LRLoggingHook()
        task.set_hooks([hook])
        trainer.train(task)
        self.assertEqual(lr_list, lr_order)
    def test_checkpointing(self):
        # make checkpoint directory
        checkpoint_folder = self.base_dir + "/checkpoint/"
        os.mkdir(checkpoint_folder)

        config = get_fast_test_task_config()
        cuda_available = torch.cuda.is_available()
        task = build_task(config)

        task.prepare(use_gpu=cuda_available)

        # create a checkpoint hook
        checkpoint_hook = CheckpointHook(checkpoint_folder, {},
                                         phase_types=["train"])

        # call the on end phase function
        checkpoint_hook.on_phase_end(task)

        # we should be able to train a task using the checkpoint on all available
        # devices
        for use_gpu in {False, cuda_available}:
            # load the checkpoint
            checkpoint = load_checkpoint(checkpoint_folder)

            # create a new task
            task = build_task(config)

            # set the checkpoint
            task.set_checkpoint(checkpoint)

            task.prepare(use_gpu=use_gpu)

            # we should be able to run the trainer using the checkpoint
            trainer = LocalTrainer(use_gpu=use_gpu)
            trainer.train(task)
    def test_test_only_task(self):
        """
        Tests the task in test mode by running train_steps
        to make sure the train_steps run as expected on a
        test_only task
        """
        test_config = get_fast_test_task_config()
        test_config["test_only"] = True

        # delete train dataset
        del test_config["dataset"]["train"]

        test_only_task = build_task(test_config).set_hooks(
            [LossLrMeterLoggingHook()])

        test_only_task.prepare()
        test_state = test_only_task.get_classy_state()

        # We expect that test only state is test, no matter what train state is
        self.assertFalse(test_state["train"])

        # Num updates should be 0
        self.assertEqual(test_state["num_updates"], 0)

        # Verify task will run
        trainer = LocalTrainer()
        trainer.train(test_only_task)
Example #8
0
 def test_training(self):
     config = get_fast_test_task_config()
     config["amp_args"] = {"opt_level": "O2"}
     task = build_task(config)
     task.set_use_gpu(True)
     trainer = LocalTrainer()
     trainer.train(task)
Example #9
0
    def test_hook(self):
        task = self._build_task(num_epochs=3)

        lr_list = []

        class TestHook(ClassyHook):
            on_rendezvous = ClassyHook._noop
            on_start = ClassyHook._noop
            on_phase_start = ClassyHook._noop
            on_sample = ClassyHook._noop
            on_forward = ClassyHook._noop
            on_loss_and_meter = ClassyHook._noop
            on_backward = ClassyHook._noop
            on_phase_end = ClassyHook._noop
            on_end = ClassyHook._noop

            def on_update(self, task: ClassyTask, local_variables) -> None:
                lr_list.append(task.optimizer.lr)

        task.set_hooks([TestHook()])

        def scheduler_mock(where):
            return where

        mock = Mock(side_effect=scheduler_mock)
        mock.update_interval = UpdateInterval.STEP
        task.optimizer.lr_scheduler = mock

        trainer = LocalTrainer()
        trainer.train(task)

        # We have 10 samples, batch size is 5. Each epoch is done in two steps.
        self.assertEqual(lr_list, [0, 1 / 6, 2 / 6, 3 / 6, 4 / 6, 5 / 6])
Example #10
0
    def test_one(self):
        train_dataset = MyDataset(
            batchsize_per_replica=32,
            shuffle=False,
            transform=GenericImageTransform(
                transform=transforms.Compose(
                    [
                        transforms.RandomResizedCrop(224),
                        transforms.RandomHorizontalFlip(),
                        transforms.ToTensor(),
                        transforms.Normalize(
                            mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]
                        ),
                    ]
                )
            ),
            num_samples=100,
            crop_size=224,
            class_ratio=0.5,
            seed=0,
        )

        test_dataset = MyDataset(
            batchsize_per_replica=32,
            shuffle=False,
            transform=GenericImageTransform(
                transform=transforms.Compose(
                    [
                        transforms.Resize(256),
                        transforms.CenterCrop(224),
                        transforms.ToTensor(),
                        transforms.Normalize(
                            mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]
                        ),
                    ]
                )
            ),
            num_samples=100,
            crop_size=224,
            class_ratio=0.5,
            seed=0,
        )

        model = MyModel()
        loss = MyLoss()

        optimizer = SGD(lr_scheduler=ConstantParamScheduler(0.01))

        task = (
            ClassificationTask()
            .set_model(model)
            .set_dataset(train_dataset, "train")
            .set_dataset(test_dataset, "test")
            .set_loss(loss)
            .set_optimizer(optimizer)
            .set_num_epochs(1)
        )

        trainer = LocalTrainer()
        trainer.train(task)
Example #11
0
    def test_hook(self):
        task = self._build_task(num_epochs=3)

        lr_list = []
        weight_decay_list = []
        momentum_list = []

        test_instance = self

        class TestHook(ClassyHook):
            on_rendezvous = ClassyHook._noop
            on_start = ClassyHook._noop
            on_phase_start = ClassyHook._noop
            on_sample = ClassyHook._noop
            on_forward = ClassyHook._noop
            on_loss_and_meter = ClassyHook._noop
            on_backward = ClassyHook._noop
            on_phase_end = ClassyHook._noop
            on_end = ClassyHook._noop

            def on_update(self, task: ClassyTask, local_variables) -> None:
                # make sure we have non-zero param groups
                test_instance.assertGreater(
                    len(task.optimizer.optimizer.param_groups), 0)
                # test that our overrides work on the underlying PyTorch optimizer
                for param_group in task.optimizer.optimizer.param_groups:
                    test_instance.assertEqual(param_group["lr"],
                                              task.optimizer.parameters.lr)
                    test_instance.assertEqual(
                        param_group["weight_decay"],
                        task.optimizer.parameters.weight_decay,
                    )
                    test_instance.assertEqual(
                        param_group["momentum"],
                        task.optimizer.parameters.momentum)
                lr_list.append(param_group["lr"])
                weight_decay_list.append(param_group["weight_decay"])
                momentum_list.append(param_group["momentum"])

        task.set_hooks([TestHook()])

        def scheduler_mock(where):
            return where

        mock = Mock(side_effect=scheduler_mock)
        mock.update_interval = UpdateInterval.STEP
        task.optimizer.param_schedulers["lr"] = mock

        trainer = LocalTrainer()
        trainer.train(task)

        # We have 10 samples, batch size is 5. Each epoch takes two steps. So,
        # there will be a total of 6 steps.
        # the lr scheduler uses a step update interval
        self.assertEqual(lr_list, [0 / 6, 1 / 6, 2 / 6, 3 / 6, 4 / 6, 5 / 6])
        # the weight decay scheduler uses an epoch update interval
        self.assertEqual(weight_decay_list,
                         [0 / 6, 0 / 6, 4 / 6, 4 / 6, 8 / 6, 8 / 6])
        self.assertEqual(momentum_list, [0.9, 0.9, 0.9, 0.9, 0.9, 0.9])
    def test_no_param_schedulers(self):
        task = self._build_task(num_epochs=3, skip_param_schedulers=True)

        # there should be no param schedulers
        self.assertEqual(task.optimizer.param_schedulers, {})

        # we should still be able to train the task
        trainer = LocalTrainer()
        trainer.train(task)
    def test_train_step(self):
        # test that the model can be run in a train step
        model = models.resnet34(pretrained=False)
        classy_model = ClassyModelWrapper(model)

        config = get_fast_test_task_config()
        task = build_task(config)
        task.set_model(classy_model)
        trainer = LocalTrainer()
        trainer.train(task)
    def test_logged_lr(self):
        # Mock LR scheduler
        class SchedulerMock(ClassyParamScheduler):
            def __call__(self, where):
                return where

        mock_lr_scheduler = SchedulerMock(UpdateInterval.STEP)

        # Mock Logging
        class DummySummaryWriter(object):
            def __init__(self):
                self.scalar_logs = {}

            def add_scalar(self,
                           key,
                           value,
                           global_step=None,
                           walltime=None) -> None:
                self.scalar_logs[key] = self.scalar_logs.get(key, []) + [value]

            def add_histogram(self,
                              key,
                              value,
                              global_step=None,
                              walltime=None) -> None:
                return

            def add_text(self, *args, **kwargs):
                pass

            def flush(self):
                return

        config = get_test_mlp_task_config()
        config["num_epochs"] = 3
        config["dataset"]["train"]["batchsize_per_replica"] = 10
        config["dataset"]["test"]["batchsize_per_replica"] = 5
        task = build_task(config)

        writer = DummySummaryWriter()
        hook = TensorboardPlotHook(writer)
        hook.log_period = 1
        task.set_hooks([hook])
        task.set_optimizer_schedulers({"lr": mock_lr_scheduler})

        trainer = LocalTrainer()
        trainer.train(task)

        # We have 20 samples, batch size is 10. Each epoch is done in two steps.
        self.assertEqual(
            writer.scalar_logs["Learning Rate/train"],
            [0, 1 / 6, 2 / 6, 3 / 6, 4 / 6, 5 / 6],
        )
Example #15
0
 def test_update_classy_state(self):
     """
     Tests that the update_classy_state successfully updates from a
     checkpoint
     """
     config = get_fast_test_task_config()
     task = build_task(config)
     task_2 = build_task(config)
     task_2.prepare()
     trainer = LocalTrainer()
     trainer.train(task)
     update_classy_state(task_2, task.get_classy_state(deep_copy=True))
     self._compare_states(task.get_classy_state(), task_2.get_classy_state())
    def test_checkpointing_different_device(self):
        config = get_fast_test_task_config()
        task = build_task(config)
        task_2 = build_task(config)

        for use_gpu in [True, False]:
            task.prepare(use_gpu=use_gpu)

            # set task's state as task_2's checkpoint
            task_2.set_checkpoint(get_checkpoint_dict(task, {}, deep_copy=True))

            # we should be able to run the trainer using state from a different device
            trainer = LocalTrainer(use_gpu=not use_gpu)
            trainer.train(task_2)
Example #17
0
    def test_train_only_task(self):
        """
        Tests that the task runs when only a train dataset is specified.
        """
        test_config = get_fast_test_task_config()

        # delete the test dataset from the config
        del test_config["dataset"]["test"]

        task = build_task(test_config).set_hooks([LossLrMeterLoggingHook()])
        task.prepare()

        # verify the the task can still be trained
        trainer = LocalTrainer()
        trainer.train(task)
Example #18
0
    def test_train(self) -> None:
        for use_gpu in {False, torch.cuda.is_available()}:
            folder = f"{self.base_dir}/train_test/{use_gpu}"
            os.makedirs(folder)

            task = build_task(get_fast_test_task_config(head_num_classes=2))

            csv_hook = OutputCSVHook(folder)
            task.set_hooks([csv_hook])
            task.set_use_gpu(use_gpu)

            trainer = LocalTrainer()
            trainer.train(task)

            self.assertEqual(parse_csv(csv_hook.output_path), 10)
Example #19
0
        def train_with_clipped_gradients(amp_args=None):
            task = build_task(get_fast_test_task_config())
            task.set_num_epochs(1)
            task.set_model(SimpleModel())
            task.set_loss(SimpleLoss())
            task.set_meters([])
            task.set_use_gpu(torch.cuda.is_available())
            task.set_clip_grad_norm(0.5)
            task.set_amp_args(amp_args)

            task.set_optimizer(SGD(lr=1))

            trainer = LocalTrainer()
            trainer.train(task)

            return task.model.param.grad.norm()
    def test_param_scheduler_epoch(self):
        task = self._build_task(num_epochs=3)

        where_list = []

        def scheduler_mock(where):
            where_list.append(where)
            return 0.1

        mock = Mock(side_effect=scheduler_mock)
        mock.update_interval = UpdateInterval.EPOCH
        task.optimizer.param_schedulers["lr"] = mock

        trainer = LocalTrainer()
        trainer.train(task)

        self.assertEqual(where_list, [0, 1 / 3, 2 / 3])
    def test_param_scheduler_epoch(self):
        task = self._build_task(num_epochs=3)

        where_list = []

        class SchedulerMock(ClassyParamScheduler):
            def __call__(self, where):
                where_list.append(where)
                return 0.1

        mock = SchedulerMock(UpdateInterval.EPOCH)
        task.set_optimizer_schedulers({"lr": mock})

        trainer = LocalTrainer()
        trainer.train(task)

        self.assertEqual(where_list, [0, 1 / 3, 2 / 3])
    def test_param_scheduler_step(self):
        task = self._build_task(num_epochs=3)

        where_list = []

        def scheduler_mock(where):
            where_list.append(where)
            return 0.1

        mock = Mock(side_effect=scheduler_mock)
        mock.update_interval = UpdateInterval.STEP
        task.optimizer.param_schedulers["lr"] = mock

        trainer = LocalTrainer()
        trainer.train(task)

        # We have 10 samples, batch size is 5. Each epoch is done in two steps.
        self.assertEqual(where_list, [0, 1 / 6, 2 / 6, 3 / 6, 4 / 6, 5 / 6])
Example #23
0
    def test_param_scheduler_step(self):
        task = self._build_task(num_epochs=3)

        where_list = []

        class SchedulerMock(ClassyParamScheduler):
            def __call__(self, where):
                where_list.append(where)
                return 0.1

        mock = SchedulerMock(UpdateInterval.STEP)
        task.set_optimizer_schedulers({"lr": mock})

        trainer = LocalTrainer()
        trainer.train(task)

        # We have 10 samples, batch size is 5. Each epoch is done in two steps.
        # The first call is the initialization
        self.assertEqual(where_list, [0, 0, 1 / 6, 2 / 6, 3 / 6, 4 / 6, 5 / 6])
    def test_training(self):
        """Checks we can train a small MLP model."""
        config = get_test_mlp_task_config()
        task = (ClassificationTask().set_num_epochs(10).set_loss(
            build_loss(config["loss"])).set_model(build_model(
                config["model"])).set_optimizer(
                    build_optimizer(config["optimizer"])).set_meters([
                        AccuracyMeter(topk=[1])
                    ]).set_hooks([LossLrMeterLoggingHook()]))
        for split in ["train", "test"]:
            dataset = build_dataset(config["dataset"][split])
            task.set_dataset(dataset, split)

        self.assertTrue(task is not None)

        trainer = LocalTrainer()
        trainer.train(task)
        accuracy = task.meters[0].value["top_1"]
        self.assertAlmostEqual(accuracy, 1.0)
Example #25
0
    def test_logged_lr(self):
        # Mock LR scheduler
        def scheduler_mock(where):
            return where

        mock_lr_scheduler = mock.Mock(side_effect=scheduler_mock)
        mock_lr_scheduler.update_interval = UpdateInterval.STEP

        # Mock Logging
        class DummySummaryWriter(object):
            def __init__(self):
                self.scalar_logs = {}

            def add_scalar(self,
                           key,
                           value,
                           global_step=None,
                           walltime=None) -> None:
                self.scalar_logs[key] = self.scalar_logs.get(key, []) + [value]

            def flush(self):
                return

        config = get_test_mlp_task_config()
        config["num_epochs"] = 3
        config["dataset"]["train"]["batchsize_per_replica"] = 5
        config["dataset"]["test"]["batchsize_per_replica"] = 5
        task = build_task(config)

        writer = DummySummaryWriter()
        hook = TensorboardPlotHook(writer)
        task.set_hooks([hook])
        task.optimizer.param_schedulers["lr"] = mock_lr_scheduler

        trainer = LocalTrainer()
        trainer.train(task)

        # We have 10 samples, batch size is 5. Each epoch is done in two steps.
        self.assertEqual(
            writer.scalar_logs["train_learning_rate_updates"],
            [0, 1 / 6, 2 / 6, 3 / 6, 4 / 6, 5 / 6],
        )
    def test_final_train_checkpoint(self):
        """Test that a train phase checkpoint with a where of 1.0 can be loaded"""

        config = get_fast_test_task_config()
        task = build_task(config).set_hooks(
            [CheckpointHook(self.base_dir, {}, phase_types=["train"])])
        task_2 = build_task(config)

        use_gpu = torch.cuda.is_available()

        trainer = LocalTrainer(use_gpu=use_gpu)
        trainer.train(task)

        # load the final train checkpoint
        checkpoint = load_checkpoint(self.base_dir)

        # make sure fetching the where raises an exception, which means that
        # where is >= 1.0
        with self.assertRaises(Exception):
            task.where

        # set task_2's state as task's final train checkpoint
        task_2.set_checkpoint(checkpoint)
        task_2.prepare(use_gpu=use_gpu)

        # we should be able to train the task
        trainer.train(task_2)
Example #27
0
    def test_hook(self):
        task = self._build_task(num_epochs=3)

        lr_list = []
        weight_decay_list = []
        momentum_list = []

        test_instance = self

        class TestHook(ClassyHook):
            on_start = ClassyHook._noop
            on_phase_start = ClassyHook._noop
            on_phase_end = ClassyHook._noop
            on_end = ClassyHook._noop

            def on_step(self, task: ClassyTask) -> None:
                if not task.train:
                    return

                # make sure we have non-zero param groups
                test_instance.assertGreater(len(task.optimizer.param_groups),
                                            0)
                lr_list.append(task.optimizer.options_view.lr)
                weight_decay_list.append(
                    task.optimizer.options_view.weight_decay)
                momentum_list.append(task.optimizer.options_view.momentum)

        task.set_hooks([TestHook()])

        trainer = LocalTrainer()
        trainer.train(task)

        # We have 10 samples, batch size is 5. Each epoch takes two steps. So,
        # there will be a total of 6 steps.
        # the lr scheduler uses a step update interval
        self.assertEqual(lr_list, [0 / 6, 1 / 6, 2 / 6, 3 / 6, 4 / 6, 5 / 6])
        # the weight decay scheduler uses an epoch update interval
        self.assertEqual(weight_decay_list,
                         [0 / 6, 0 / 6, 4 / 6, 4 / 6, 8 / 6, 8 / 6])
        self.assertEqual(momentum_list, [0.9, 0.9, 0.9, 0.9, 0.9, 0.9])
    def test_train_parametric_loss(self):
        heads_num_classes = 100
        pre_train_config = self._get_pre_train_config(
            head_num_classes=heads_num_classes)
        pre_train_config["loss"] = {
            "name": "batchnorm_cross_entropy_loss",
            "num_classes": heads_num_classes,
        }
        pre_train_task = build_task(pre_train_config)
        trainer = LocalTrainer()
        trainer.train(pre_train_task)
        checkpoint = get_checkpoint_dict(pre_train_task, {})

        fine_tuning_config = self._get_fine_tuning_config(
            head_num_classes=heads_num_classes)
        fine_tuning_config["loss"] = {
            "name": "batchnorm_cross_entropy_loss",
            "num_classes": heads_num_classes,
        }

        fine_tuning_task = build_task(fine_tuning_config)
        fine_tuning_task._set_pretrained_checkpoint_dict(
            copy.deepcopy(checkpoint))
        # run in test mode to compare the loss state. Since we have a BatchNorm module in
        # the loss, its moving mean/std should be unchanged when we run in test-only mode
        fine_tuning_task.set_test_only(True)
        loss_state = copy.deepcopy(fine_tuning_task.loss.get_classy_state())
        trainer.train(fine_tuning_task)
        self._compare_state_dict(loss_state,
                                 fine_tuning_task.loss.get_classy_state())
    def test_training(self):
        # Test an Apex AMP training
        config = get_fast_test_task_config()
        config["amp_args"] = {"opt_level": "O2"}
        task = build_task(config)
        task.set_use_gpu(True)
        trainer = LocalTrainer()
        trainer.train(task)

        # Test a Pytorch AMP training
        config["amp_args"] = {"amp_type": "pytorch"}
        task = build_task(config)
        task.set_use_gpu(True)
        trainer = LocalTrainer()
        trainer.train(task)
Example #30
0
    def train_with_batch(self, simulated_bs, actual_bs, clip_grad_norm=None):
        config = copy.deepcopy(get_fast_test_task_config())
        config["dataset"]["train"]["num_samples"] = 12
        config["dataset"]["train"]["batchsize_per_replica"] = actual_bs
        del config["dataset"]["test"]

        task = build_task(config)
        task.set_num_epochs(1)
        task.set_model(SimpleModel())
        task.set_loss(SimpleLoss())
        task.set_meters([])
        task.set_use_gpu(torch.cuda.is_available())
        if simulated_bs is not None:
            task.set_simulated_global_batchsize(simulated_bs)
        if clip_grad_norm is not None:
            task.set_clip_grad_norm(clip_grad_norm)

        task.set_optimizer(SGD(lr=1))

        trainer = LocalTrainer()
        trainer.train(task)

        return task.model.param