Example #1
0
    def test_should_stop_early_with_decreasing_metric(self):
        new_trainer = GradientDescentTrainer(
            self.model,
            self.optimizer,
            self.data_loader,
            validation_data_loader=self.validation_data_loader,
            num_epochs=3,
            serialization_dir=self.TEST_DIR,
            patience=5,
            validation_metric="-acc",
        )
        tracker = new_trainer._metric_tracker

        new_tracker = copy.deepcopy(tracker)
        for acc in [0.02, 0.3, 0.2, 0.1, 0.4, 0.4]:
            new_tracker.add_metrics({"acc": acc})
        assert new_tracker.should_stop_early()

        new_tracker = copy.deepcopy(tracker)
        for acc in [0.3, 0.3, 0.2, 0.1, 0.4, 0.5]:
            new_tracker.add_metrics({"acc": acc})
        assert not new_tracker.should_stop_early()

        new_tracker = copy.deepcopy(tracker)
        for acc in [0.1, 0.3, 0.2, 0.1, 0.4, 0.5]:
            new_tracker.add_metrics({"acc": acc})
        assert new_tracker.should_stop_early()
Example #2
0
    def test_trainer_can_log_histograms(self):
        # enable activation logging
        for module in self.model.modules():
            module.should_log_activations = True

        trainer = GradientDescentTrainer(
            self.model,
            self.optimizer,
            self.data_loader,
            num_epochs=3,
            serialization_dir=self.TEST_DIR,
            tensorboard_writer=TensorboardWriter(
                serialization_dir=self.TEST_DIR, histogram_interval=2
            ),
        )
        trainer.train()
Example #3
0
    def ensure_model_can_train(
        self,
        trainer: GradientDescentTrainer,
        gradients_to_ignore: Set[str] = None,
        metric_to_check: str = None,
        metric_terminal_value: float = None,
        metric_tolerance: float = 1e-4,
        disable_dropout: bool = True,
    ):
        """
        A simple test for model training behavior when you are not using configuration files. In
        this case, we don't have a story around saving and loading models (you need to handle that
        yourself), so we don't have tests for that.  We just test that the model can train, and that
        it computes gradients for all parameters.

        Because the `Trainer` already has a reference to a model and to a data loader, we just take
        the `Trainer` object itself, and grab the `Model` and other necessary objects from there.

        # Parameters

        trainer: `GradientDescentTrainer`
            The `Trainer` to use for the test, which already has references to a `Model` and a
            `DataLoader`, which we will use in the test.
        gradients_to_ignore : `Set[str]`, optional (default=`None`)
            This test runs a gradient check to make sure that we're actually computing gradients
            for all of the parameters in the model.  If you really want to ignore certain
            parameters when doing that check, you can pass their names here.  This is not
            recommended unless you're `really` sure you don't need to have non-zero gradients for
            those parameters (e.g., some of the beam search / state machine models have
            infrequently-used parameters that are hard to force the model to use in a small test).
        metric_to_check: `str`, optional (default = `None`)
            We may want to automatically perform a check that model reaches given metric when
            training (on validation set, if it is specified). It may be useful in CI, for example.
            You can pass any metric that is in your model returned metrics.
        metric_terminal_value: `str`, optional (default = `None`)
            When you set `metric_to_check`, you need to set the value this metric must converge to
        metric_tolerance: `float`, optional (default=`1e-4`)
            Tolerance to check you model metric against metric terminal value. One can expect some
            variance in model metrics when the training process is highly stochastic.
        disable_dropout : `bool`, optional (default = `True`)
            If True we will set all dropout to 0 before checking gradients. (Otherwise, with small
            datasets, you may get zero gradients because of unlucky dropout.)
        """
        metrics = trainer.train()
        if metric_to_check is not None:
            metric_value = metrics.get(
                f"best_validation_{metric_to_check}") or metrics.get(
                    f"training_{metric_to_check}")
            assert metric_value is not None, f"Cannot find {metric_to_check} in metrics.json file"
            assert metric_terminal_value is not None, "Please specify metric terminal value"
            assert abs(metric_value - metric_terminal_value) < metric_tolerance

        model_batch = next(iter(trainer.data_loader))

        # Check gradients are None for non-trainable parameters and check that
        # trainable parameters receive some gradient if they are trainable.
        self.check_model_computes_gradients_correctly(trainer.model,
                                                      model_batch,
                                                      gradients_to_ignore,
                                                      disable_dropout)
Example #4
0
 def test_data_loader_lazy_epoch_size_correct_custom_epoch_size(self):
     self.data_loader_lazy.batches_per_epoch = 3
     num_epochs = 3
     trainer = GradientDescentTrainer(
         self.model,
         self.optimizer,
         self.data_loader_lazy,
         validation_data_loader=self.validation_data_loader,
         num_epochs=num_epochs,
         serialization_dir=self.TEST_DIR,
     )
     assert trainer._batch_num_total == 0
     metrics = trainer.train()
     epoch = metrics["epoch"]
     assert epoch == num_epochs - 1
     assert trainer._batch_num_total == num_epochs * 3
Example #5
0
    def test_metric_only_considered_best_so_far_when_strictly_better_than_those_before_it_decreasing_metric(
        self, ):
        new_trainer = GradientDescentTrainer(
            self.model,
            self.optimizer,
            self.data_loader,
            validation_data_loader=self.validation_data_loader,
            num_epochs=3,
            serialization_dir=self.TEST_DIR,
            patience=5,
            validation_metric="-test",
        )
        tracker = new_trainer._metric_tracker

        # when it is the only metric it should be considered the best
        new_tracker = copy.deepcopy(tracker)
        new_tracker.add_metric(1)
        assert new_tracker.is_best_so_far()

        # when it is the same as one before it it is not considered the best
        new_tracker = copy.deepcopy(tracker)
        new_tracker.add_metrics([0.3, 0.3, 0.3, 0.2, 0.5, 0.1, 0.3])
        assert not new_tracker.is_best_so_far()

        # when it is the best it is considered the best
        new_tracker = copy.deepcopy(tracker)
        new_tracker.add_metrics([0.3, 0.3, 0.3, 0.2, 0.5, 0.1, 0.0013])
        assert new_tracker.is_best_so_far()

        # when it is not the the best it is not considered the best
        new_tracker = copy.deepcopy(tracker)
        new_tracker.add_metrics([0.3, 0.3, 0.3, 0.2, 0.5, 0.1, 13])
Example #6
0
 def test_can_optimise_model_with_dense_and_sparse_params(self):
     optimizer_params = Params({"type": "dense_sparse_adam"})
     parameters = [[n, p] for n, p in self.model.named_parameters() if p.requires_grad]
     optimizer = Optimizer.from_params(model_parameters=parameters, params=optimizer_params)
     for instance in self.instances:
         instance.index_fields(self.vocab)
     GradientDescentTrainer(self.model, optimizer, SimpleDataLoader(self.instances, 2)).train()
Example #7
0
    def test_passing_trainer_multiple_gpus_raises_error(self):
        self.model.cuda()

        with pytest.raises(ConfigurationError):
            GradientDescentTrainer(
                self.model, self.optimizer, self.data_loader, num_epochs=2, cuda_device=[0, 1],
            )
Example #8
0
    def test_regularization(self):
        penalty = self.model.get_regularization_penalty()
        assert penalty is None

        data_loader = PyTorchDataLoader(self.instances, batch_size=32)
        trainer = GradientDescentTrainer(self.model, None, data_loader)  # optimizer,

        # You get a RuntimeError if you call `model.forward` twice on the same inputs.
        # The data and config are such that the whole dataset is one batch.
        training_batch = next(iter(data_loader))
        validation_batch = next(iter(data_loader))

        training_loss = trainer.batch_outputs(training_batch, for_training=True)["loss"].item()
        validation_loss = trainer.batch_outputs(validation_batch, for_training=False)["loss"].item()

        # Training loss should have the regularization penalty, but validation loss should not.
        numpy.testing.assert_almost_equal(training_loss, validation_loss)
Example #9
0
    def test_trainer_can_log_learning_rates_tensorboard(self):
        data_loader = SimpleDataLoader(self.instances, 4)
        trainer = GradientDescentTrainer(
            self.model,
            self.optimizer,
            data_loader,
            num_epochs=2,
            serialization_dir=self.TEST_DIR,
            callbacks=[
                TensorBoardCallback(
                    serialization_dir=self.TEST_DIR,
                    summary_interval=2,
                    should_log_learning_rate=True,
                )
            ],
        )

        trainer.train()
Example #10
0
    def test_trainer_can_log_learning_rates_tensorboard(self):
        data_loader = DataLoader(self.instances,
                                 batch_size=4,
                                 collate_fn=allennlp_collate)
        trainer = GradientDescentTrainer(
            self.model,
            self.optimizer,
            data_loader,
            num_epochs=2,
            serialization_dir=self.TEST_DIR,
            tensorboard_writer=TensorboardWriter(
                serialization_dir=self.TEST_DIR,
                should_log_learning_rate=True,
                summary_interval=2,
            ),
        )

        trainer.train()
Example #11
0
    def test_trainer_respects_num_serialized_models_to_keep(self):
        trainer = GradientDescentTrainer(
            self.model,
            self.optimizer,
            self.data_loader,
            num_epochs=5,
            serialization_dir=self.TEST_DIR,
            checkpointer=Checkpointer(
                serialization_dir=self.TEST_DIR, num_serialized_models_to_keep=3
            ),
        )
        trainer.train()

        # Now check the serialized files
        for prefix in ["model_state_epoch_*", "training_state_epoch_*"]:
            file_names = glob.glob(os.path.join(self.TEST_DIR, prefix))
            epochs = [int(re.search(r"_([0-9])\.th", fname).group(1)) for fname in file_names]
            assert sorted(epochs) == [2, 3, 4]
Example #12
0
    def test_trainer_respects_keep_serialized_model_every_num_seconds(self):
        # To test:
        #   Create an fake data loader that sleeps for 2.5 second per epoch, so the total
        #   training time for one epoch is slightly greater then 2.5 seconds.
        #   Run for 6 epochs, keeping the last 2 models, models also kept every 5 seconds.
        #   Check the resulting checkpoints.  Should then have models at epochs
        #       2, 4, plus the last two at 5 and 6.

        class SlowDataLoader:
            data_loader = SimpleDataLoader(self.instances, batch_size=2)

            def __iter__(self):
                time.sleep(2.5)
                return iter(self.data_loader)

            def __len__(self):
                return len(self.data_loader)

            def set_target_device(self, _):
                pass

        trainer = GradientDescentTrainer(
            self.model,
            self.optimizer,
            SlowDataLoader(),
            num_epochs=6,
            serialization_dir=self.TEST_DIR,
            checkpointer=Checkpointer(
                serialization_dir=self.TEST_DIR,
                num_serialized_models_to_keep=2,
                keep_serialized_model_every_num_seconds=5,
            ),
        )
        trainer.train()

        # Now check the serialized files
        for prefix in ["model_state_epoch_*", "training_state_epoch_*"]:
            file_names = glob.glob(os.path.join(self.TEST_DIR, prefix))
            epochs = [
                int(re.search(r"_([0-9])\.th", fname).group(1))
                for fname in file_names
            ]
            # epoch N has N-1 in file name
            assert sorted(epochs) == [1, 3, 4, 5]
Example #13
0
    def test_epoch_callback_is_called_at_every_epoch(self):
        class FakeEpochCallback(EpochCallback):
            def __call__(self, trainer: "GradientDescentTrainer",
                         metrics: Dict[str, Any], epoch: int) -> None:
                if not hasattr(trainer, "epoch_callback_calls"):
                    trainer.epoch_callback_calls = []  # type: ignore
                trainer.epoch_callback_calls.append(epoch)  # type: ignore

        trainer = GradientDescentTrainer(
            self.model,
            self.optimizer,
            self.data_loader,
            num_epochs=4,
            validation_data_loader=self.validation_data_loader,
            epoch_callbacks=[FakeEpochCallback()],
        )
        trainer.train()
        expected_calls = [epoch for epoch in range(-1, 4)]
        assert trainer.epoch_callback_calls == expected_calls
Example #14
0
    def test_trainer_can_log_histograms(self):
        # enable activation logging
        for module in self.model.modules():
            module.should_log_activations = True

        trainer = GradientDescentTrainer(
            self.model,
            self.optimizer,
            self.data_loader,
            num_epochs=3,
            serialization_dir=self.TEST_DIR,
            callbacks=[
                TensorBoardCallback(
                    serialization_dir=self.TEST_DIR,
                    distribution_interval=2,
                )
            ],
        )
        trainer.train()
Example #15
0
    def test_trainer_saves_models_at_specified_interval(self):
        data_loader = DataLoader(self.instances,
                                 batch_size=4,
                                 collate_fn=allennlp_collate)

        trainer = GradientDescentTrainer(
            self.model,
            self.optimizer,
            data_loader,
            num_epochs=2,
            serialization_dir=self.TEST_DIR,
            checkpointer=Checkpointer(
                serialization_dir=self.TEST_DIR,
                model_save_interval=0.0001,
                num_serialized_models_to_keep=10,
            ),
        )

        trainer.train()

        # Now check the serialized files for models saved during the epoch.
        prefix = "model_state_epoch_*"
        file_names = sorted(glob.glob(os.path.join(self.TEST_DIR, prefix)))
        epochs = [
            re.search(r"_([0-9\.\-]+)\.th", fname).group(1)
            for fname in file_names
        ]
        # We should have checkpoints at the end of each epoch and during each, e.g.
        # [0.timestamp, 0, 1.timestamp, 1]
        assert len(epochs) == 4
        assert epochs[3] == "1"
        assert "." in epochs[0]

        # Now make certain we can restore from timestamped checkpoint.
        # To do so, remove the checkpoint from the end of epoch 1&2, so
        # that we are forced to restore from the timestamped checkpoints.
        for k in range(2):
            os.remove(
                os.path.join(self.TEST_DIR,
                             "model_state_epoch_{}.th".format(k)))
            os.remove(
                os.path.join(self.TEST_DIR,
                             "training_state_epoch_{}.th".format(k)))
        os.remove(os.path.join(self.TEST_DIR, "best.th"))

        restore_trainer = GradientDescentTrainer(
            self.model,
            self.optimizer,
            self.data_loader,
            num_epochs=2,
            serialization_dir=self.TEST_DIR,
            checkpointer=Checkpointer(serialization_dir=self.TEST_DIR,
                                      model_save_interval=0.0001),
        )
        epoch = restore_trainer._restore_checkpoint()
        assert epoch == 2
        # One batch per epoch.
        assert restore_trainer._batch_num_total == 2
Example #16
0
    def test_trainer_saves_metrics_every_epoch(self):
        trainer = GradientDescentTrainer(
            model=self.model,
            optimizer=self.optimizer,
            data_loader=self.data_loader,
            validation_data_loader=self.validation_data_loader,
            num_epochs=5,
            serialization_dir=self.TEST_DIR,
            checkpointer=Checkpointer(serialization_dir=self.TEST_DIR,
                                      num_serialized_models_to_keep=3),
        )
        trainer.train()

        for epoch in range(5):
            epoch_file = self.TEST_DIR / f"metrics_epoch_{epoch}.json"
            assert epoch_file.exists()
            metrics = json.load(open(epoch_file))
            assert "validation_loss" in metrics
            assert "best_validation_loss" in metrics
            assert metrics.get("epoch") == epoch
Example #17
0
 def test_trainer_respects_epoch_size_smaller_tnan_total(self):
     batches_per_epoch = 1
     num_epochs = 2
     data_loader_smaller_epoch = SimpleDataLoader(
         self.instances,
         2,
         batches_per_epoch=batches_per_epoch,
     )
     trainer = GradientDescentTrainer(
         self.model,
         self.optimizer,
         data_loader_smaller_epoch,
         validation_data_loader=self.validation_data_loader,
         num_epochs=num_epochs,
         serialization_dir=self.TEST_DIR,
     )
     assert trainer._batch_num_total == 0
     metrics = trainer.train()
     epoch = metrics["epoch"]
     assert epoch == num_epochs - 1
     assert trainer._batch_num_total == num_epochs * batches_per_epoch
Example #18
0
    def test_trainer_can_run_gradient_accumulation(self):
        instances = list(self.instances)
        steps_to_accumulate = 2

        trainer = GradientDescentTrainer(
            self.model,
            self.optimizer,
            self.data_loader,
            validation_data_loader=self.validation_data_loader,
            num_epochs=2,
            num_gradient_accumulation_steps=steps_to_accumulate,
        )
        assert trainer._num_gradient_accumulation_steps == steps_to_accumulate

        metrics = trainer.train()

        num_batches_trained_per_epoch = trainer._batch_num_total // (metrics["training_epochs"] + 1)
        num_batches_expected = math.ceil(
            math.ceil(len(instances) / self.data_loader.batch_size) / steps_to_accumulate
        )

        assert num_batches_trained_per_epoch == num_batches_expected
Example #19
0
    def test_trainer_saves_and_loads_best_validation_metrics_correctly_2(self):
        # Use -loss and run 1 epoch of original-training, and one of restored-training
        # Run 1 epoch of original training.
        trainer = GradientDescentTrainer(
            self.model,
            self.optimizer,
            self.data_loader,
            validation_data_loader=self.validation_data_loader,
            validation_metric="+loss",
            num_epochs=1,
            serialization_dir=self.TEST_DIR,
        )
        trainer.train()

        _ = trainer._restore_checkpoint()
        best_epoch_1 = trainer._metric_tracker.best_epoch
        best_validation_metrics_epoch_1 = trainer._metric_tracker.best_epoch_metrics
        # best_validation_metrics_epoch_1: {'accuracy': 0.75, 'accuracy3': 1.0, 'loss': 0.6243013441562653}
        assert isinstance(best_validation_metrics_epoch_1, dict)
        assert "loss" in best_validation_metrics_epoch_1

        # Run 1 more epoch of restored training.
        restore_trainer = GradientDescentTrainer(
            self.model,
            self.optimizer,
            self.data_loader,
            validation_data_loader=self.validation_data_loader,
            validation_metric="+loss",
            num_epochs=2,
            serialization_dir=self.TEST_DIR,
        )
        restore_trainer.train()
        _ = restore_trainer._restore_checkpoint()
        best_epoch_2 = restore_trainer._metric_tracker.best_epoch
        best_validation_metrics_epoch_2 = restore_trainer._metric_tracker.best_epoch_metrics

        # Because of using +loss, 2nd epoch won't be better than 1st. So best val metrics should be same.
        assert best_epoch_1 == best_epoch_2 == 0
        assert best_validation_metrics_epoch_2 == best_validation_metrics_epoch_1
Example #20
0
 def test_data_loader_lazy_epoch_size_correct_custom_epoch_size(self):
     batches_per_epoch = 3
     num_epochs = 3
     data_loader_custom_epoch_lazy = PyTorchDataLoader(
         self.instances_lazy,
         batch_size=2,
         collate_fn=allennlp_collate,
         batches_per_epoch=batches_per_epoch,
     )
     trainer = GradientDescentTrainer(
         self.model,
         self.optimizer,
         data_loader_custom_epoch_lazy,
         validation_data_loader=self.validation_data_loader,
         num_epochs=num_epochs,
         serialization_dir=self.TEST_DIR,
     )
     assert trainer._batch_num_total == 0
     metrics = trainer.train()
     epoch = metrics["epoch"]
     assert epoch == num_epochs - 1
     assert trainer._batch_num_total == num_epochs * batches_per_epoch
Example #21
0
    def test_total_loss_is_average_of_batch_loss(self):

        batches_per_epoch = 3

        data_loader_custom_epoch_lazy = PyTorchDataLoader(
            self.instances_lazy,
            batch_size=2,
            collate_fn=allennlp_collate,
            batches_per_epoch=batches_per_epoch,
        )

        class FakeBatchCallback(BatchCallback):
            def __call__(
                self,
                trainer: "GradientDescentTrainer",
                batch_inputs: List[List[TensorDict]],
                batch_outputs: List[Dict[str, Any]],
                batch_metrics: Dict[str, Any],
                epoch: int,
                batch_number: int,
                is_training: bool,
                is_master: bool,
            ) -> None:
                if not hasattr(trainer, "batch_losses"):
                    trainer.batch_losses = []  # type: ignore
                trainer.batch_losses.append(
                    batch_outputs[0]["loss"].item())  # type: ignore

        trainer = GradientDescentTrainer(
            self.model,
            self.optimizer,
            data_loader_custom_epoch_lazy,
            num_epochs=1,
            batch_callbacks=[FakeBatchCallback()],
        )
        metrics = trainer.train()

        assert metrics["training_loss"] == float(
            sum(trainer.batch_losses) / batches_per_epoch)
Example #22
0
 def test_trainer_respects_epoch_size_larger_tnan_total(self):
     batches_per_epoch = 7
     num_epochs = 3
     data_loader_larger_epoch = AllennlpDataLoader(
         self.instances,
         batch_size=2,
         collate_fn=allennlp_collate,
         batches_per_epoch=batches_per_epoch,
     )
     trainer = GradientDescentTrainer(
         self.model,
         self.optimizer,
         data_loader_larger_epoch,
         validation_data_loader=self.validation_data_loader,
         num_epochs=num_epochs,
         serialization_dir=self.TEST_DIR,
     )
     assert trainer._batch_num_total == 0
     metrics = trainer.train()
     epoch = metrics["epoch"]
     assert epoch == num_epochs - 1
     assert trainer._batch_num_total == num_epochs * batches_per_epoch
Example #23
0
 def init_trainer(self) -> Trainer:
     parameters = [(n, p) for n, p in self.model.named_parameters() if p.requires_grad]
     optimizer = AdamOptimizer(parameters, lr=self.config.lr)  # type: ignore
     trainer = GradientDescentTrainer(
         model=self.model,
         serialization_dir='./output',
         data_loader=self.train_data_loader,
         validation_data_loader=self.dev_data_loader,
         num_epochs=self.config.epoch,
         optimizer=optimizer,
         cuda_device=self.config.device,
     )
     return trainer
Example #24
0
    def test_trainer_can_run_and_resume_with_momentum_scheduler(self):
        scheduler = MomentumScheduler.from_params(
            optimizer=self.optimizer,
            params=Params({
                "type": "inverted_triangular",
                "cool_down": 2,
                "warm_up": 2
            }),
        )
        trainer = GradientDescentTrainer(
            model=self.model,
            optimizer=self.optimizer,
            data_loader=self.data_loader,
            momentum_scheduler=scheduler,
            validation_metric="-loss",
            validation_data_loader=self.validation_data_loader,
            num_epochs=4,
            serialization_dir=self.TEST_DIR,
        )
        trainer.train()

        new_scheduler = MomentumScheduler.from_params(
            optimizer=self.optimizer,
            params=Params({
                "type": "inverted_triangular",
                "cool_down": 2,
                "warm_up": 2
            }),
        )
        new_trainer = GradientDescentTrainer(
            model=self.model,
            optimizer=self.optimizer,
            data_loader=self.data_loader,
            momentum_scheduler=new_scheduler,
            validation_metric="-loss",
            validation_data_loader=self.validation_data_loader,
            num_epochs=6,
            serialization_dir=self.TEST_DIR,
        )
        epoch = new_trainer._restore_checkpoint()
        assert epoch == 4
        assert new_trainer._momentum_scheduler.last_epoch == 3
        new_trainer.train()
def objective_fn(
        trial: Trial,
        device: int,
        direction: str,
        target_metric: str,
        base_serialization_dir: str,
):
    embedding_dim = trial.suggest_int("embedding_dim", 128, 256)
    max_filter_size = trial.suggest_int("max_filter_size", 3, 6)
    num_filters = trial.suggest_int("num_filters", 128, 256)
    output_dim = trial.suggest_int("output_dim", 128, 512)
    dropout = trial.suggest_float("dropout", 0, 1.0, log=False)
    lr = trial.suggest_float("lr", 1e-4, 1e-1, log=True)

    train_dataset, valid_dataset, vocab = prepare_data()
    model = create_model(vocab, embedding_dim, max_filter_size, num_filters, output_dim, dropout)

    if device > -1:
        model.to(torch.device("cuda:{}".format(device)))

    optimizer = SGD(model.parameters(), lr=lr)
    data_loader = DataLoader(train_dataset, batch_size=10, collate_fn=allennlp_collate)
    validation_data_loader = DataLoader(valid_dataset, batch_size=64, collate_fn=allennlp_collate)
    serialization_dir = os.path.join(base_serialization_dir, "trial_{}".format(trial.number))
    trainer = GradientDescentTrainer(
        model=model,
        optimizer=optimizer,
        data_loader=data_loader,
        validation_data_loader=validation_data_loader,
        validation_metric=("+" if direction == "MAXIMIZE" else "-") + target_metric,
        patience=None,  # `patience=None` since it could conflict with AllenNLPPruningCallback
        num_epochs=50,
        cuda_device=device,
        serialization_dir=serialization_dir,
        epoch_callbacks=[AllenNLPPruningCallback(trial, f"validation_{target_metric}")],
    )
    vocab.save_to_files(os.path.join(serialization_dir, "vocabulary"))
    return trainer.train()[f"best_validation_{target_metric}"]
Example #26
0
    def test_sanity_check_default(self):
        model_with_bias = FakeModelForTestingNormalizationBiasVerification(use_bias=True)
        inst = Instance({"x": TensorField(torch.rand(3, 1, 4))})
        data_loader = SimpleDataLoader([inst, inst], 2)
        trainer = GradientDescentTrainer.from_partial_objects(
            model_with_bias,
            serialization_dir=self.TEST_DIR,
            data_loader=data_loader,
            num_epochs=1,
        )
        with pytest.raises(SanityCheckError):
            trainer.train()

        trainer = GradientDescentTrainer.from_partial_objects(
            model_with_bias,
            serialization_dir=self.TEST_DIR,
            data_loader=data_loader,
            num_epochs=1,
            run_sanity_checks=False,
        )

        # Check is not run, so no failure.
        trainer.train()
Example #27
0
 def test_should_stop_early_with_invalid_patience(self):
     for patience in [0, -1, -2, 1.5, "None"]:
         with pytest.raises(
             ConfigurationError,
             match='.* is an invalid value for "patience": '
             "it must be a positive integer or None "
             "\\(if you want to disable early stopping\\)",
         ):
             GradientDescentTrainer(
                 self.model,
                 self.optimizer,
                 self.data_loader,
                 validation_data_loader=self.validation_data_loader,
                 num_epochs=100,
                 patience=patience,
                 validation_metric="+test",
             )
def build_trainer(
    model: Model,
    serialization_dir: str,
    train_loader: DataLoader,
    dev_loader: DataLoader,
) -> Trainer:
    parameters = [(n, p) for n, p in model.named_parameters()
                  if p.requires_grad]
    optimizer = AdamOptimizer(parameters)  # type: ignore
    trainer = GradientDescentTrainer(
        model=model,
        serialization_dir=serialization_dir,
        data_loader=train_loader,
        validation_data_loader=dev_loader,
        num_epochs=5,
        optimizer=optimizer,
    )
    return trainer
Example #29
0
        def trainer_ctor(tagger, corpus_len, train_dataloader, val_dataloader):
            optimizer = AdamW(tagger.parameters(),
                              lr=self._lr,
                              betas=(0.9, 0.999),
                              eps=1e-6,
                              weight_decay=0.01,
                              correct_bias=True)

            # lr_scheduler = ReduceLROnPlateau(optimizer, mode='max', factor=0.5, patience=self._patience)
            #
            # trainer = ModelTrainerBert(model=seq_tagger,
            #                            optimizer=optimizer,
            #                            lr_scheduler=lr_scheduler,
            #                            train_dataset=train_data,
            #                            val_dataset=val_data,
            #                            validation_metrics=[f1_entity_level],
            #                            batch_size=self._bs,
            #                            update_scheduler='ee',
            #                            keep_best_model=True,
            #                            restore_bm_on_lr_change=True,
            #                            max_grad_norm=1.,
            #                            smallest_lr=self._lr / 4)

            lr_scheduler = ReduceOnPlateauLearningRateScheduler(
                optimizer, mode='max', factor=0.5, patience=self._patience)

            trainer = GradientDescentTrainer(
                model=tagger,
                validation_metric='-loss',
                optimizer=optimizer,
                data_loader=train_dataloader,
                validation_data_loader=val_dataloader,
                num_epochs=self._n_epochs,
                # cuda_device=cuda_device,
                learning_rate_scheduler=lr_scheduler,
                patience=self._patience,
                num_gradient_accumulation_steps=self._bs)

            return trainer
Example #30
0
    def init_trainer(self) -> Trainer:
        parameters = [(n, p) for n, p in self.model.named_parameters()
                      if p.requires_grad]

        group_parameter_group = [(['_text_field_embedder.*'], {
            'lr': self.config.lr
        }), (['_classification_layer.*'], {
            'lr': self.config.classifier_lr
        })]

        optimizer = AdamOptimizer(parameters,
                                  parameter_groups=group_parameter_group,
                                  lr=self.config.lr)  # type: ignore

        trainer = GradientDescentTrainer(
            model=self.model,
            serialization_dir='./output',
            data_loader=self.train_data_loader,
            validation_data_loader=self.dev_data_loader,
            num_epochs=self.config.epoch,
            optimizer=optimizer,
            cuda_device=self.config.device,
        )
        return trainer