Ejemplo n.º 1
0
    def test_from_params_in_trainer(self):
        # This is more of an integration test, making sure that a bunch of pieces fit together
        # correctly, but it matters most for this learning rate scheduler, so we're testing it here.
        params = Params({
            "num_epochs": 5,
            "learning_rate_scheduler": {
                "type": "slanted_triangular",
                "gradual_unfreezing": True,
                "discriminative_fine_tuning": True,
                "decay_factor": 0.5,
            },
        })
        # The method called in the logic below only checks the length of this list, not its
        # contents, so this should be safe.
        instances = [
            1,
        ] * 40  # noqa: E231, flake doesn't like what black does with this list
        optim = self._get_optimizer()
        trainer = TrainerBase.from_params(
            model=self.model,
            optimizer=Lazy(lambda **kwargs: optim),
            serialization_dir=self.TEST_DIR,
            params=params,
            iterator=BasicIterator(batch_size=10),
            train_data=instances,
        )
        assert isinstance(trainer._learning_rate_scheduler, SlantedTriangular)

        # This is what we wrote this test for: to be sure that num_epochs is passed correctly, and
        # that num_steps_per_epoch is computed and passed correctly.  This logic happens inside of
        # `Trainer.from_partial_objects`.
        assert trainer._learning_rate_scheduler.num_epochs == 5
        assert trainer._learning_rate_scheduler.num_steps_per_epoch == 4

        # And we'll do one more to make sure that we can override num_epochs in the scheduler if we
        # really want to.  Not sure why you would ever want to in this case; this is just testing
        # the functionality.
        params = Params({
            "num_epochs": 5,
            "learning_rate_scheduler": {
                "type": "slanted_triangular",
                "num_epochs": 3,
                "gradual_unfreezing": True,
                "discriminative_fine_tuning": True,
                "decay_factor": 0.5,
            },
        })
        trainer = TrainerBase.from_params(
            model=self.model,
            optimizer=Lazy(lambda **kwargs: optim),
            serialization_dir=self.TEST_DIR,
            params=params,
            iterator=BasicIterator(batch_size=10),
            train_data=instances,
        )
        assert trainer._learning_rate_scheduler.num_epochs == 3
Ejemplo n.º 2
0
    def setUp(self):
        super().setUp()
        params = Params({
            "model": {
                "type": "simple_tagger",
                "text_field_embedder": {
                    "token_embedders": {
                        "tokens": {
                            "type": "embedding",
                            "embedding_dim": 5
                        }
                    }
                },
                "encoder": {
                    "type": "lstm",
                    "input_size": 5,
                    "hidden_size": 7,
                    "num_layers": 2
                },
            },
            "dataset_reader": {
                "type": "sequence_tagging"
            },
            "train_data_path":
            str(self.FIXTURES_ROOT / "data" / "sequence_tagging.tsv"),
            "validation_data_path":
            str(self.FIXTURES_ROOT / "data" / "sequence_tagging.tsv"),
            "iterator": {
                "type": "basic",
                "batch_size": 2
            },
            "trainer": {
                "cuda_device": -1,
                "num_epochs": 2,
                "optimizer": "adam"
            },
        })
        all_datasets = datasets_from_params(params)
        vocab = Vocabulary.from_params(
            params.pop("vocabulary", {}),
            instances=(instance for dataset in all_datasets.values()
                       for instance in dataset),
        )
        model = Model.from_params(vocab=vocab, params=params.pop("model"))
        iterator = DataIterator.from_params(params.pop("iterator"))
        iterator.index_with(vocab)
        train_data = all_datasets["train"]
        trainer_params = params.pop("trainer")
        serialization_dir = os.path.join(self.TEST_DIR,
                                         "test_search_learning_rate")

        self.trainer = TrainerBase.from_params(
            model=model,
            serialization_dir=serialization_dir,
            iterator=iterator,
            train_data=train_data,
            params=trainer_params,
            validation_data=None,
            validation_iterator=None,
        )
Ejemplo n.º 3
0
 def setUp(self):
     super().setUp()
     param_file = self.FIXTURES_ROOT / "simple_tagger" / "experiment_with_regularization.json"
     self.set_up_model(param_file,
                       self.FIXTURES_ROOT / "data" / "sequence_tagging.tsv")
     params = Params.from_file(param_file)
     self.reader = DatasetReader.from_params(params["dataset_reader"])
     self.iterator = DataIterator.from_params(params["iterator"])
     self.trainer = TrainerBase.from_params(
         model=self.model,
         serialization_dir=self.TEST_DIR,
         iterator=self.iterator,
         train_data=self.dataset,
         params=params.get("trainer"),
     )
Ejemplo n.º 4
0
def find_learning_rate_model(
    params: Params,
    serialization_dir: str,
    start_lr: float = 1e-5,
    end_lr: float = 10,
    num_batches: int = 100,
    linear_steps: bool = False,
    stopping_factor: float = None,
    force: bool = False,
) -> None:
    """
    Runs learning rate search for given `num_batches` and saves the results in ``serialization_dir``

    # Parameters

    params : ``Params``
        A parameter object specifying an AllenNLP Experiment.
    serialization_dir : ``str``
        The directory in which to save results.
    start_lr : ``float``
        Learning rate to start the search.
    end_lr : ``float``
        Learning rate upto which search is done.
    num_batches : ``int``
        Number of mini-batches to run Learning rate finder.
    linear_steps : ``bool``
        Increase learning rate linearly if False exponentially.
    stopping_factor : ``float``
        Stop the search when the current loss exceeds the best loss recorded by
        multiple of stopping factor. If ``None`` search proceeds till the ``end_lr``
    force : ``bool``
        If True and the serialization directory already exists, everything in it will
        be removed prior to finding the learning rate.
    """
    create_serialization_dir(params,
                             serialization_dir,
                             recover=False,
                             force=force)

    prepare_environment(params)

    cuda_device = params.params.get("trainer").get("cuda_device", -1)
    check_for_gpu(cuda_device)
    distributed_params = params.params.get("distributed", None)
    # See https://github.com/allenai/allennlp/issues/3658
    assert not distributed_params, "find-lr is not compatible with DistributedDataParallel."

    all_datasets = datasets_from_params(params)
    datasets_for_vocab_creation = set(
        params.pop("datasets_for_vocab_creation", all_datasets))

    for dataset in datasets_for_vocab_creation:
        if dataset not in all_datasets:
            raise ConfigurationError(
                f"invalid 'dataset_for_vocab_creation' {dataset}")

    logger.info(
        "From dataset instances, %s will be considered for vocabulary creation.",
        ", ".join(datasets_for_vocab_creation),
    )
    vocab = Vocabulary.from_params(
        params.pop("vocabulary", {}),
        instances=(instance for key, dataset in all_datasets.items()
                   for instance in dataset
                   if key in datasets_for_vocab_creation),
    )

    model = Model.from_params(vocab=vocab, params=params.pop("model"))
    iterator = DataIterator.from_params(params.pop("iterator"))
    iterator.index_with(vocab)

    train_data = all_datasets["train"]

    trainer_params = params.pop("trainer")

    no_grad_regexes = trainer_params.pop("no_grad", ())
    for name, parameter in model.named_parameters():
        if any(re.search(regex, name) for regex in no_grad_regexes):
            parameter.requires_grad_(False)

    trainer_choice = trainer_params.pop("type", "default")
    if trainer_choice != "default":
        raise ConfigurationError(
            "currently find-learning-rate only works with the default Trainer")
    trainer: Trainer = TrainerBase.from_params(  # type: ignore
        model=model,
        serialization_dir=serialization_dir,
        iterator=iterator,
        train_data=train_data,
        validation_data=None,
        params=trainer_params,
        validation_iterator=None,
    )

    logger.info(
        f"Starting learning rate search from {start_lr} to {end_lr} in {num_batches} iterations."
    )
    learning_rates, losses = search_learning_rate(
        trainer,
        start_lr=start_lr,
        end_lr=end_lr,
        num_batches=num_batches,
        linear_steps=linear_steps,
        stopping_factor=stopping_factor,
    )
    logger.info(f"Finished learning rate search.")
    losses = _smooth(losses, 0.98)

    _save_plot(learning_rates, losses,
               os.path.join(serialization_dir, "lr-losses.png"))