def test_from_params_in_trainer(self): # This is more of an integration test, making sure that a bunch of pieces fit together # correctly, but it matters most for this learning rate scheduler, so we're testing it here. params = Params({ "num_epochs": 5, "learning_rate_scheduler": { "type": "slanted_triangular", "gradual_unfreezing": True, "discriminative_fine_tuning": True, "decay_factor": 0.5, }, }) # The method called in the logic below only checks the length of this list, not its # contents, so this should be safe. instances = [ 1, ] * 40 # noqa: E231, flake doesn't like what black does with this list optim = self._get_optimizer() trainer = TrainerBase.from_params( model=self.model, optimizer=Lazy(lambda **kwargs: optim), serialization_dir=self.TEST_DIR, params=params, iterator=BasicIterator(batch_size=10), train_data=instances, ) assert isinstance(trainer._learning_rate_scheduler, SlantedTriangular) # This is what we wrote this test for: to be sure that num_epochs is passed correctly, and # that num_steps_per_epoch is computed and passed correctly. This logic happens inside of # `Trainer.from_partial_objects`. assert trainer._learning_rate_scheduler.num_epochs == 5 assert trainer._learning_rate_scheduler.num_steps_per_epoch == 4 # And we'll do one more to make sure that we can override num_epochs in the scheduler if we # really want to. Not sure why you would ever want to in this case; this is just testing # the functionality. params = Params({ "num_epochs": 5, "learning_rate_scheduler": { "type": "slanted_triangular", "num_epochs": 3, "gradual_unfreezing": True, "discriminative_fine_tuning": True, "decay_factor": 0.5, }, }) trainer = TrainerBase.from_params( model=self.model, optimizer=Lazy(lambda **kwargs: optim), serialization_dir=self.TEST_DIR, params=params, iterator=BasicIterator(batch_size=10), train_data=instances, ) assert trainer._learning_rate_scheduler.num_epochs == 3
def setUp(self): super().setUp() params = Params({ "model": { "type": "simple_tagger", "text_field_embedder": { "token_embedders": { "tokens": { "type": "embedding", "embedding_dim": 5 } } }, "encoder": { "type": "lstm", "input_size": 5, "hidden_size": 7, "num_layers": 2 }, }, "dataset_reader": { "type": "sequence_tagging" }, "train_data_path": str(self.FIXTURES_ROOT / "data" / "sequence_tagging.tsv"), "validation_data_path": str(self.FIXTURES_ROOT / "data" / "sequence_tagging.tsv"), "iterator": { "type": "basic", "batch_size": 2 }, "trainer": { "cuda_device": -1, "num_epochs": 2, "optimizer": "adam" }, }) all_datasets = datasets_from_params(params) vocab = Vocabulary.from_params( params.pop("vocabulary", {}), instances=(instance for dataset in all_datasets.values() for instance in dataset), ) model = Model.from_params(vocab=vocab, params=params.pop("model")) iterator = DataIterator.from_params(params.pop("iterator")) iterator.index_with(vocab) train_data = all_datasets["train"] trainer_params = params.pop("trainer") serialization_dir = os.path.join(self.TEST_DIR, "test_search_learning_rate") self.trainer = TrainerBase.from_params( model=model, serialization_dir=serialization_dir, iterator=iterator, train_data=train_data, params=trainer_params, validation_data=None, validation_iterator=None, )
def setUp(self): super().setUp() param_file = self.FIXTURES_ROOT / "simple_tagger" / "experiment_with_regularization.json" self.set_up_model(param_file, self.FIXTURES_ROOT / "data" / "sequence_tagging.tsv") params = Params.from_file(param_file) self.reader = DatasetReader.from_params(params["dataset_reader"]) self.iterator = DataIterator.from_params(params["iterator"]) self.trainer = TrainerBase.from_params( model=self.model, serialization_dir=self.TEST_DIR, iterator=self.iterator, train_data=self.dataset, params=params.get("trainer"), )
def find_learning_rate_model( params: Params, serialization_dir: str, start_lr: float = 1e-5, end_lr: float = 10, num_batches: int = 100, linear_steps: bool = False, stopping_factor: float = None, force: bool = False, ) -> None: """ Runs learning rate search for given `num_batches` and saves the results in ``serialization_dir`` # Parameters params : ``Params`` A parameter object specifying an AllenNLP Experiment. serialization_dir : ``str`` The directory in which to save results. start_lr : ``float`` Learning rate to start the search. end_lr : ``float`` Learning rate upto which search is done. num_batches : ``int`` Number of mini-batches to run Learning rate finder. linear_steps : ``bool`` Increase learning rate linearly if False exponentially. stopping_factor : ``float`` Stop the search when the current loss exceeds the best loss recorded by multiple of stopping factor. If ``None`` search proceeds till the ``end_lr`` force : ``bool`` If True and the serialization directory already exists, everything in it will be removed prior to finding the learning rate. """ create_serialization_dir(params, serialization_dir, recover=False, force=force) prepare_environment(params) cuda_device = params.params.get("trainer").get("cuda_device", -1) check_for_gpu(cuda_device) distributed_params = params.params.get("distributed", None) # See https://github.com/allenai/allennlp/issues/3658 assert not distributed_params, "find-lr is not compatible with DistributedDataParallel." all_datasets = datasets_from_params(params) datasets_for_vocab_creation = set( params.pop("datasets_for_vocab_creation", all_datasets)) for dataset in datasets_for_vocab_creation: if dataset not in all_datasets: raise ConfigurationError( f"invalid 'dataset_for_vocab_creation' {dataset}") logger.info( "From dataset instances, %s will be considered for vocabulary creation.", ", ".join(datasets_for_vocab_creation), ) vocab = Vocabulary.from_params( params.pop("vocabulary", {}), instances=(instance for key, dataset in all_datasets.items() for instance in dataset if key in datasets_for_vocab_creation), ) model = Model.from_params(vocab=vocab, params=params.pop("model")) iterator = DataIterator.from_params(params.pop("iterator")) iterator.index_with(vocab) train_data = all_datasets["train"] trainer_params = params.pop("trainer") no_grad_regexes = trainer_params.pop("no_grad", ()) for name, parameter in model.named_parameters(): if any(re.search(regex, name) for regex in no_grad_regexes): parameter.requires_grad_(False) trainer_choice = trainer_params.pop("type", "default") if trainer_choice != "default": raise ConfigurationError( "currently find-learning-rate only works with the default Trainer") trainer: Trainer = TrainerBase.from_params( # type: ignore model=model, serialization_dir=serialization_dir, iterator=iterator, train_data=train_data, validation_data=None, params=trainer_params, validation_iterator=None, ) logger.info( f"Starting learning rate search from {start_lr} to {end_lr} in {num_batches} iterations." ) learning_rates, losses = search_learning_rate( trainer, start_lr=start_lr, end_lr=end_lr, num_batches=num_batches, linear_steps=linear_steps, stopping_factor=stopping_factor, ) logger.info(f"Finished learning rate search.") losses = _smooth(losses, 0.98) _save_plot(learning_rates, losses, os.path.join(serialization_dir, "lr-losses.png"))