def from_params(cls, parameters: List, params: Params) -> 'GanOptimizer':
        # Because we "tagged" the parameters, we can use getattr to figure out
        # which ones go with which model.
        generator_parameters = [("", param) for param in parameters
                                if hasattr(param, '_generator')]
        discriminator_parameters = [("", param) for param in parameters
                                    if hasattr(param, '_discriminator')]

        generator_optimizer = Optimizer.from_params(
            generator_parameters, params.pop("generator_optimizer"))
        discriminator_optimizer = Optimizer.from_params(
            discriminator_parameters, params.pop("discriminator_optimizer"))

        return cls(generator_optimizer=generator_optimizer,
                   discriminator_optimizer=discriminator_optimizer)
 def test_reduce_on_plateau_error_throw_when_no_metrics_exist(self):
     with self.assertRaises(ConfigurationError) as context:
         LearningRateScheduler.from_params(Optimizer.from_params(self.model.named_parameters(),
                                                                 Params({"type": "adam"})),
                                           Params({"type": "reduce_on_plateau",
                                                   "mode": "min"})).step(None, None)
     assert "learning rate scheduler requires a validation metric" in str(context.exception)
Example #3
0
 def _get_optimizer(self, lr: float = 1.0):
     optimizer_params = Params({"type": "sgd", "lr": lr})
     optimizer_params["parameter_groups"] = [[[f"^{m}"], {}]
                                             for m in self.model._modules]
     return Optimizer.from_params(
         model_parameters=self.model.named_parameters(),
         params=optimizer_params)
Example #4
0
 def test_can_optimise_model_with_dense_and_sparse_params(self):
     optimizer_params = Params({"type": "dense_sparse_adam"})
     parameters = [[n, p] for n, p in self.model.named_parameters() if p.requires_grad]
     optimizer = Optimizer.from_params(model_parameters=parameters, params=optimizer_params)
     iterator = BasicIterator(2)
     iterator.index_with(self.vocab)
     Trainer(self.model, optimizer, iterator, self.instances).train()
Example #5
0
    def from_params(cls,
                    model: Model,
                    serialization_dir: str,
                    iterator: DataIterator,
                    train_data: Iterable[Instance],
                    validation_data: Optional[Iterable[Instance]],
                    params: Params,
                    validation_iterator: DataIterator = None) -> 'GANTrainer':

        patience = params.pop_int("patience", None)
        validation_metric = params.pop("validation_metric", "-loss")
        shuffle = params.pop_bool("shuffle", True)
        num_epochs = params.pop_int("num_epochs", 20)
        cuda_device = params.pop_int("cuda_device", -1)
        grad_norm = params.pop_float("grad_norm", None)
        grad_clipping = params.pop_float("grad_clipping", None)
        lr_scheduler_params = params.pop("learning_rate_scheduler", None)

        if cuda_device >= 0:
            model = model.cuda(cuda_device)
        parameters = [[n, p] for n, p in model.named_parameters()
                      if p.requires_grad]
        optimizer = Optimizer.from_params(parameters, params.pop("optimizer"))

        if lr_scheduler_params:
            scheduler = LearningRateScheduler.from_params(
                optimizer, lr_scheduler_params)
        else:
            scheduler = None

        num_serialized_models_to_keep = params.pop_int(
            "num_serialized_models_to_keep", 20)
        keep_serialized_model_every_num_seconds = params.pop_int(
            "keep_serialized_model_every_num_seconds", None)
        model_save_interval = params.pop_float("model_save_interval", None)
        summary_interval = params.pop_int("summary_interval", 100)
        histogram_interval = params.pop_int("histogram_interval", None)

        params.assert_empty(cls.__name__)
        return cls(model,
                   optimizer,
                   iterator,
                   train_data,
                   validation_data,
                   patience=patience,
                   validation_metric=validation_metric,
                   validation_iterator=validation_iterator,
                   shuffle=shuffle,
                   num_epochs=num_epochs,
                   serialization_dir=serialization_dir,
                   cuda_device=cuda_device,
                   grad_norm=grad_norm,
                   grad_clipping=grad_clipping,
                   learning_rate_scheduler=scheduler,
                   num_serialized_models_to_keep=num_serialized_models_to_keep,
                   keep_serialized_model_every_num_seconds=
                   keep_serialized_model_every_num_seconds,
                   model_save_interval=model_save_interval,
                   summary_interval=summary_interval,
                   histogram_interval=histogram_interval)
Example #6
0
 def test_can_optimise_model_with_dense_and_sparse_params(self):
     optimizer_params = Params({"type": "dense_sparse_adam"})
     parameters = [[n, p] for n, p in self.model.named_parameters() if p.requires_grad]
     optimizer = Optimizer.from_params(model_parameters=parameters, params=optimizer_params)
     for instance in self.instances:
         instance.index_fields(self.vocab)
     GradientDescentTrainer(self.model, optimizer, SimpleDataLoader(self.instances, 2)).train()
Example #7
0
 def _setup_training(self, tasks, train_params, optimizer_params,
                     scheduler_params, iterator):
     # Task bookkeeping
     task_infos = {task.name: {} for task in tasks}
     for task in tasks:
         task_info = task_infos[task.name]
         tr_generator = iterator(task.train_data,
                                 num_epochs=None,
                                 cuda_device=self._cuda_device)
         task_info['n_tr_batches'] = iterator.get_num_batches(
             task.train_data)
         task_info['tr_generator'] = tr_generator
         task_info['loss'] = 0.0
         task_info['total_batches_trained'] = 0
         task_info['n_batches_since_val'] = 0
         task_info['optimizer'] = Optimizer.from_params(
             train_params, copy.deepcopy(optimizer_params))
         task_info['scheduler'] = LearningRateScheduler.from_params(
             task_info['optimizer'], copy.deepcopy(scheduler_params))
         task_info['stopped'] = False
         task_info['last_log'] = time.time()
     # Metric bookkeeping
     all_metrics = [task.val_metric for task in tasks
                    ] + ['micro_accuracy', 'macro_accuracy']
     metric_infos = {metric: {'hist': [], 'stopped': False, 'best': (-1, {})} for \
                     metric in all_metrics}
     self._task_infos = task_infos
     self._metric_infos = metric_infos
     return task_infos, metric_infos
    def test_linear_with_warmup_works_properly(self):
        scheduler = LearningRateScheduler.from_params(
            optimizer=Optimizer.from_params(
                model_parameters=self.model.named_parameters(),
                params=Params({"type": "sgd", "lr": 1.0}),
            ),
            params=Params(
                {
                    "type": "linear_with_warmup",
                    "warmup_steps": 2,
                    "num_epochs": 2,
                    "num_steps_per_epoch": 3,
                }
            ),
        )
        optimizer = scheduler.optimizer

        # Linear warmup for 2 steps.
        scheduler.step_batch()
        assert optimizer.param_groups[0]["lr"] == 0.5  # 1.0 * 1/2
        scheduler.step_batch()
        assert optimizer.param_groups[0]["lr"] == 1.0  # 1.0 * 2/2

        # Linear decay for 4 steps.
        scheduler.step_batch()
        assert optimizer.param_groups[0]["lr"] == 0.75
        scheduler.step_batch()
        assert optimizer.param_groups[0]["lr"] == 0.5
        scheduler.step_batch()
        assert optimizer.param_groups[0]["lr"] == 0.25
        scheduler.step_batch()
        assert optimizer.param_groups[0]["lr"] == 0.0
Example #9
0
    def predict_json(self, _: JsonDict, cuda_device: int = -1) -> JsonDict:
        parameter_filename = 'allennlp/seq2seq.json'
        serialization_dir = 'retrained'
        subprocess.check_call(['mkdir', '-p', serialization_dir])
        params = Params.from_file(parameter_filename)

        iterator = DataIterator.from_params(params.pop("iterator"))
        iterator.index_with(self._model.vocab)

        parameters = [[n, p] for n, p in self._model.named_parameters()
                      if p.requires_grad]
        trainer_params = params.pop('trainer')
        optimizer = Optimizer.from_params(parameters,
                                          trainer_params.pop("optimizer"))

        all_datasets = datasets_from_params(params)
        train_data = all_datasets['train']
        trainer = SimpleTrainer(self._model, optimizer, train_data, iterator)
        interpreter = Interpreter(self._model, self._dataset_reader, trainer)
        while True:
            try:
                interpreter.cmdloop()
            except Exception as e:
                print(e)
                traceback.print_exc()
                print('Restarting interpreter cmdloop.')
Example #10
0
    def test_optimizer_parameter_groups(self):
        optimizer_params = Params({
                "type": "sgd",
                "lr": 1,
                "momentum": 5,
                "parameter_groups": [
                        # the repeated "bias_" checks a corner case
                        # NOT_A_VARIABLE_NAME displays a warning but does not raise an exception
                        [["weight_i", "bias_", "bias_", "NOT_A_VARIABLE_NAME"], {'lr': 2}],
                        [["tag_projection_layer"], {'lr': 3}],
                ]
        })
        parameters = [[n, p] for n, p in self.model.named_parameters() if p.requires_grad]
        optimizer = Optimizer.from_params(parameters, optimizer_params)
        param_groups = optimizer.param_groups

        assert len(param_groups) == 3
        assert param_groups[0]['lr'] == 2
        assert param_groups[1]['lr'] == 3
        # base case uses default lr
        assert param_groups[2]['lr'] == 1
        for k in range(3):
            assert param_groups[k]['momentum'] == 5

        # all LSTM parameters except recurrent connections (those with weight_h in name)
        assert len(param_groups[0]['params']) == 6
        # just the projection weight and bias
        assert len(param_groups[1]['params']) == 2
        # the embedding + recurrent connections left in the default group
        assert len(param_groups[2]['params']) == 3
Example #11
0
 def test_optimizer_basic(self):
     optimizer_params = Params({"type": "sgd", "lr": 1})
     parameters = [[n, p] for n, p in self.model.named_parameters() if p.requires_grad]
     optimizer = Optimizer.from_params(model_parameters=parameters, params=optimizer_params)
     param_groups = optimizer.param_groups
     assert len(param_groups) == 1
     assert param_groups[0]["lr"] == 1
 def test_noam_learning_rate_schedule_does_not_crash(self):
     model = torch.nn.Sequential(torch.nn.Linear(10, 10))
     lrs = LearningRateScheduler.from_params(Optimizer.from_params(model.named_parameters(),
                                                                   Params({"type": "adam"})),
                                             Params({"type": "noam", "model_size": 10, "warmup_steps": 2000}))
     lrs.step(None)
     lrs.step_batch(None)
Example #13
0
    def test_optimizer_parameter_groups(self):
        optimizer_params = Params({
                u"type": u"sgd",
                u"lr": 1,
                u"momentum": 5,
                u"parameter_groups": [
                        # the repeated "bias_" checks a corner case
                        # NOT_A_VARIABLE_NAME displays a warning but does not raise an exception
                        [[u"weight_i", u"bias_", u"bias_", u"NOT_A_VARIABLE_NAME"], {u'lr': 2}],
                        [[u"tag_projection_layer"], {u'lr': 3}],
                ]
        })
        parameters = [[n, p] for n, p in self.model.named_parameters() if p.requires_grad]
        optimizer = Optimizer.from_params(parameters, optimizer_params)
        param_groups = optimizer.param_groups

        assert len(param_groups) == 3
        assert param_groups[0][u'lr'] == 2
        assert param_groups[1][u'lr'] == 3
        # base case uses default lr
        assert param_groups[2][u'lr'] == 1
        for k in range(3):
            assert param_groups[k][u'momentum'] == 5

        # all LSTM parameters except recurrent connections (those with weight_h in name)
        assert len(param_groups[0][u'params']) == 6
        # just the projection weight and bias
        assert len(param_groups[1][u'params']) == 2
        # the embedding + recurrent connections left in the default group
        assert len(param_groups[2][u'params']) == 3
 def test_exponential_works_properly(self):
     scheduler = LearningRateScheduler.from_params(
         optimizer=Optimizer.from_params(self.model.named_parameters(),
                                         Params({
                                             "type": "sgd",
                                             "lr": 1.0
                                         })),
         params=Params({
             "type": "exponential",
             "gamma": 0.5
         }),
     )
     optimizer = scheduler.lr_scheduler.optimizer
     optimizer.step()  # to avoid a pytorch warning
     # Initial learning rate should be unchanged for first epoch.
     assert optimizer.param_groups[0]["lr"] == 1.0
     # But since the way PyTorch LR schedulers work is a little wonky,
     # the LR will also be unchanged for the second epoch (epoch id 0).
     scheduler.step(epoch=0)
     assert optimizer.param_groups[0]["lr"] == 1.0
     # Now the learning rate starts to be updated...
     scheduler.step(epoch=1)
     assert optimizer.param_groups[0]["lr"] == 0.5
     scheduler.step(epoch=2)
     assert optimizer.param_groups[0]["lr"] == 0.5**2
 def test_noam_learning_rate_schedule_does_not_crash(self):
     model = torch.nn.Sequential(torch.nn.Linear(10, 10))
     lrs = LearningRateScheduler.from_params(Optimizer.from_params(model.named_parameters(),
                                                                   Params({"type": "adam"})),
                                             Params({"type": "noam", "model_size": 10, "warmup_steps": 2000}))
     lrs.step(None)
     lrs.step_batch(None)
Example #16
0
 def _get_optimizer(self, lr: float = 1.0):
     return Optimizer.from_params(
         model_parameters=self.model.named_parameters(),
         params=Params({
             "type": "sgd",
             "lr": lr
         }))
    def test_polynomial_decay_works_properly(self):
        scheduler = LearningRateScheduler.from_params(
            optimizer=Optimizer.from_params(
                model_parameters=self.model.named_parameters(),
                params=Params({"type": "sgd", "lr": 1.0}),
            ),
            params=Params(
                {
                    "type": "polynomial_decay",
                    "warmup_steps": 2,
                    "num_epochs": 2,
                    "num_steps_per_epoch": 3,
                    "end_learning_rate": 0.1,
                    "power": 2,
                }
            ),
        )
        optimizer = scheduler.optimizer

        # Linear warmup for 2 steps.
        scheduler.step_batch()
        assert optimizer.param_groups[0]["lr"] == 0.5  # 1.0 * 1/2
        scheduler.step_batch()
        assert optimizer.param_groups[0]["lr"] == 1.0  # 1.0 * 2/2

        # Polynomial decay for 4 steps.
        scheduler.step_batch()
        assert optimizer.param_groups[0]["lr"] == 0.60625  # (1.0 - 0.1) * (3/4) ** 2 + 0.1
        scheduler.step_batch()
        assert optimizer.param_groups[0]["lr"] == 0.325  # (1.0 - 0.1) * (2/4) ** 2 + 0.1
        scheduler.step_batch()
        assert optimizer.param_groups[0]["lr"] == 0.15625  # (1.0 - 0.1) * (1/4) ** 2 + 0.1
        scheduler.step_batch()
        assert optimizer.param_groups[0]["lr"] == 0.1  # (1.0 - 0.1) * (0/4) ** 2 + 0.1
 def test_reduce_on_plateau_works_when_metrics_exist(self):
     LearningRateScheduler.from_params(
         optimizer=Optimizer.from_params(
             model_parameters=self.model.named_parameters(), params=Params({"type": "adam"})
         ),
         params=Params({"type": "reduce_on_plateau"}),
     ).step(10)
Example #19
0
 def _get_optimizer(self):
     return Optimizer.from_params(
         self.model.named_parameters(),
         Params({
             "type": "sgd",
             "lr": 1.0,
             "momentum": self.base_momentum
         }))
Example #20
0
    def from_params(  # type: ignore
        cls,
        params: Params,
        serialization_dir: str,
        recover: bool = False,
        cache_directory: str = None,
        cache_prefix: str = None,
    ) -> "CallbackTrainer":
        pieces = TrainerPieces.from_params(params, serialization_dir, recover,
                                           cache_directory, cache_prefix)
        model = pieces.model
        params = pieces.params
        validation_iterator = pieces.validation_iterator or pieces.iterator

        shuffle = params.pop_bool("shuffle", True)
        num_epochs = params.pop_int("num_epochs", 20)
        cuda_device = parse_cuda_device(params.pop("cuda_device", -1))

        if isinstance(cuda_device, list):
            model_device = cuda_device[0]
        else:
            model_device = cuda_device
        if model_device >= 0:
            # Moving model to GPU here so that the optimizer state gets constructed on
            # the right device.
            model = model.cuda(model_device)

        parameters = [[n, p] for n, p in model.named_parameters()
                      if p.requires_grad]
        optimizer = Optimizer.from_params(parameters, params.pop("optimizer"))

        callbacks_params = params.pop("callbacks", [])
        callbacks: List[Callback] = [
            Callback.from_params(
                params=callback_params,
                model=model,
                optimizer=optimizer,
                instances=pieces.train_dataset,
                iterator=pieces.iterator,
                shuffle=shuffle,
                validation_data=pieces.validation_dataset,
                validation_iterator=validation_iterator,
                serialization_dir=serialization_dir,
            ) for callback_params in callbacks_params
        ]

        params.assert_empty(cls.__name__)
        return cls(
            model,
            pieces.train_dataset,
            pieces.iterator,
            optimizer,
            num_epochs=num_epochs,
            shuffle=shuffle,
            serialization_dir=serialization_dir,
            cuda_device=cuda_device,
            callbacks=callbacks,
        )
Example #21
0
 def test_can_optimise_model_with_dense_and_sparse_params(self):
     optimizer_params = Params({
             "type": "dense_sparse_adam"
     })
     parameters = [[n, p] for n, p in self.model.named_parameters() if p.requires_grad]
     optimizer = Optimizer.from_params(parameters, optimizer_params)
     iterator = BasicIterator(2)
     iterator.index_with(self.vocab)
     Trainer(self.model, optimizer, iterator, self.instances).train()
Example #22
0
 def test_no_metric_wrapper_can_support_none_for_metrics(self):
     lrs = LearningRateScheduler.from_params(
         Optimizer.from_params(self.model.named_parameters(),
                               Params({"type": "adam"})),
         Params({
             "type": "step",
             "step_size": 1
         }))
     lrs.step(None, None)
 def test_no_metric_wrapper_can_support_none_for_metrics(self):
     lrs = LearningRateScheduler.from_params(
         optimizer=Optimizer.from_params(
             model_parameters=self.model.named_parameters(), params=Params({"type": "adam"})
         ),
         params=Params({"type": "step", "step_size": 1}),
     )
     lrs.lr_scheduler.optimizer.step()  # to avoid a pytorch warning
     lrs.step(None)
 def test_noam_learning_rate_schedule_does_not_crash(self):
     lrs = LearningRateScheduler.from_params(
         optimizer=Optimizer.from_params(
             model_parameters=self.model.named_parameters(), params=Params({"type": "adam"})
         ),
         params=Params({"type": "noam", "model_size": 10, "warmup_steps": 2000}),
     )
     lrs.step(None)
     lrs.step_batch(None)
Example #25
0
 def test_optimizer_basic(self):
     optimizer_params = Params({
             "type": "sgd",
             "lr": 1
     })
     parameters = [[n, p] for n, p in self.model.named_parameters() if p.requires_grad]
     optimizer = Optimizer.from_params(parameters, optimizer_params)
     param_groups = optimizer.param_groups
     assert len(param_groups) == 1
     assert param_groups[0]['lr'] == 1
    def test_reduce_on_plateau_error_throw_when_no_metrics_exist(self):
        model = torch.nn.Sequential(torch.nn.Linear(10, 10))
        with self.assertRaises(ConfigurationError) as context:
            LearningRateScheduler.from_params(Optimizer.from_params(model.named_parameters(),
                                                                    Params({"type": "adam"})),
                                              Params({"type": "reduce_on_plateau"})).step(None, None)

        self.assertTrue(
                'The reduce_on_plateau learning rate scheduler requires a validation metric'
                in str(context.exception))
Example #27
0
 def setup_method(self):
     super().setup_method()
     self.model = torch.nn.Sequential(torch.nn.Linear(10, 10))
     self.optimizer = Optimizer.from_params(
         model_parameters=self.model.named_parameters(),
         params=Params({
             "type": "sgd",
             "lr": 1.0
         }),
     )
    def test_parameter_type_inference(self):
        # Should work ok even with lr as a string
        optimizer_params = Params({"type": "sgd", "lr": "0.1"})

        parameters = [[n, p] for n, p in self.model.named_parameters()
                      if p.requires_grad]
        optimizer = Optimizer.from_params(model_parameters=parameters,
                                          params=optimizer_params)

        assert optimizer.defaults["lr"] == 0.1
 def test_reduce_on_plateau_error_throw_when_no_metrics_exist(self):
     with pytest.raises(
         ConfigurationError, match="learning rate scheduler requires a validation metric"
     ):
         LearningRateScheduler.from_params(
             optimizer=Optimizer.from_params(
                 model_parameters=self.model.named_parameters(), params=Params({"type": "adam"})
             ),
             params=Params({"type": "reduce_on_plateau"}),
         ).step(None)
    def test_reduce_on_plateau_error_throw_when_no_metrics_exist(self):
        model = torch.nn.Sequential(torch.nn.Linear(10, 10))
        with self.assertRaises(ConfigurationError) as context:
            LearningRateScheduler.from_params(Optimizer.from_params(model.named_parameters(),
                                                                    Params({"type": "adam"})),
                                              Params({"type": "reduce_on_plateau"})).step(None, None)

        self.assertTrue(
                'The reduce_on_plateau learning rate scheduler requires a validation metric'
                in str(context.exception))
 def test_no_metric_wrapper_can_support_none_for_metrics(self):
     model = torch.nn.Sequential(torch.nn.Linear(10, 10))
     lrs = LearningRateScheduler.from_params(
         Optimizer.from_params(model.named_parameters(),
                               Params({"type": "adam"})),
         Params({
             "type": "step",
             "step_size": 1
         }))
     lrs.step(None, None)
Example #32
0
    def test_optimizer_parameter_groups(self):
        optimizer_params = Params({
            "type":
            "sgd",
            "lr":
            1,
            "momentum":
            5,
            "parameter_groups": [
                # the repeated "bias_" checks a corner case
                # NOT_A_VARIABLE_NAME displays a warning but does not raise an exception
                [["weight_i", "bias_", "bias_", "NOT_A_VARIABLE_NAME"], {
                    "lr": 2
                }],
                [["tag_projection_layer"], {
                    "lr": 3
                }],
                [["^text_field_embedder.*$"], {
                    "requires_grad": False
                }],
            ],
        })

        # Before initializing the optimizer all params in this module will still require grad.
        assert all([
            param.requires_grad
            for param in self.model.text_field_embedder.parameters()
        ])

        parameters = [[n, p] for n, p in self.model.named_parameters()
                      if p.requires_grad]
        optimizer = Optimizer.from_params(model_parameters=parameters,
                                          params=optimizer_params)
        param_groups = optimizer.param_groups

        # After initializing the optimizer, requires_grad should be false for all params in this module.
        assert not any([
            param.requires_grad
            for param in self.model.text_field_embedder.parameters()
        ])

        assert len(param_groups) == 3
        assert param_groups[0]["lr"] == 2
        assert param_groups[1]["lr"] == 3
        # base case uses default lr
        assert param_groups[2]["lr"] == 1
        for k in range(3):
            assert param_groups[k]["momentum"] == 5

        # all LSTM parameters except recurrent connections (those with weight_h in name)
        assert len(param_groups[0]["params"]) == 6
        # just the projection weight and bias
        assert len(param_groups[1]["params"]) == 2
        # the recurrent connections left in the default group
        assert len(param_groups[2]["params"]) == 2
Example #33
0
    def test_optimizer_params(self):
        optimizer_params = Params(
            {
                "type": "multi",
                "optimizers": {
                    "default": {"type": "adam", "lr": 1},
                    "embedder": {"type": "adam", "lr": 2},
                    "encoder": {"type": "adam", "lr": 3},
                },
                "parameter_groups": [
                    [
                        ["^text_field_embedder"],
                        {
                            "optimizer_name": "embedder",
                            "betas": (0.9, 0.98),
                            "lr": 2,
                            "weight_decay": 0.01,
                        },
                    ],
                    [["^encoder.*bias"], {"optimizer_name": "encoder", "lr": 0.001}],
                    [["^encoder.*weight"], {"optimizer_name": "encoder", "lr": 0.002}],
                    [["^tag_projection_layer.*.weight$"], {"lr": 5}],
                ],
            }
        )

        parameters = [[n, p] for n, p in self.model.named_parameters() if p.requires_grad]
        optimizer = Optimizer.from_params(model_parameters=parameters, params=optimizer_params)

        # When the MultiOptimizer is initialized, `optimizer.param_groups` stores the parameter groups.
        # These parameter groups are assigned to their own optimizer by the MultiOptimizer.
        # Check that changes to the parameters in optimizer.param_groups affect the parameters in
        # optimizer._grouped_optimizers.
        regex_optimizer_params = set()
        regex_optimizer_grouped_optimizer_params = set()

        for param_group in optimizer.param_groups:
            # Each param_group should have optimizer options visible so they can be used by schedulers.
            lr = param_group["lr"]
            assert lr > 0
            params = param_group["params"]
            for param in params:
                param.data.zero_()
                regex_optimizer_params.add(id(param))

        # Check that the parameters of the sub-optimizers were also changed.
        for optimizer in optimizer.optimizers.values():
            for param_group in optimizer.param_groups:
                params = param_group["params"]
                for param in params:
                    regex_optimizer_grouped_optimizer_params.add(id(param))
                    assert param.sum() == 0, "Param has non-zero values."

        assert regex_optimizer_params == regex_optimizer_grouped_optimizer_params
Example #34
0
    def test_parameter_type_inference(self):
        # Should work ok even with lr as a string
        optimizer_params = Params({"type": "sgd", "lr": "0.1"})

        parameters = [[n, p] for n, p in self.model.named_parameters()
                      if p.requires_grad]
        optimizer = Optimizer.from_params(parameters, optimizer_params)

        assert optimizer.defaults["lr"] == 0.1

        # But should crash (in the Pytorch code) if we don't do the type inference
        optimizer_params = Params({
            "type": "sgd",
            "lr": "0.1",
            "infer_type_and_cast": False
        })

        parameters = [[n, p] for n, p in self.model.named_parameters()
                      if p.requires_grad]

        with pytest.raises(TypeError):
            optimizer = Optimizer.from_params(parameters, optimizer_params)
Example #35
0
    def from_params(cls,
                    model: Model,
                    serialization_dir: str,
                    iterator: DataIterator,
                    train_data: Iterable[Instance],
                    validation_data: Optional[Iterable[Instance]],
                    params: Params,
                    validation_iterator: DataIterator = None) -> 'Trainer':

        patience = params.pop_int("patience", None)
        validation_metric = params.pop("validation_metric", "-loss")
        num_epochs = params.pop_int("num_epochs", 20)
        cuda_device = params.pop_int("cuda_device", -1)
        grad_norm = params.pop_float("grad_norm", None)
        grad_clipping = params.pop_float("grad_clipping", None)
        lr_scheduler_params = params.pop("learning_rate_scheduler", None)

        if cuda_device >= 0:
            model = model.cuda(cuda_device)
        parameters = [[n, p] for n, p in model.named_parameters() if p.requires_grad]
        optimizer = Optimizer.from_params(parameters, params.pop("optimizer"))

        if lr_scheduler_params:
            scheduler = LearningRateScheduler.from_params(optimizer, lr_scheduler_params)
        else:
            scheduler = None

        num_serialized_models_to_keep = params.pop_int("num_serialized_models_to_keep", 20)
        keep_serialized_model_every_num_seconds = params.pop_int(
                "keep_serialized_model_every_num_seconds", None)
        model_save_interval = params.pop_float("model_save_interval", None)
        summary_interval = params.pop_int("summary_interval", 100)
        histogram_interval = params.pop_int("histogram_interval", None)

        params.assert_empty(cls.__name__)
        return Trainer(model, optimizer, iterator,
                       train_data, validation_data,
                       patience=patience,
                       validation_metric=validation_metric,
                       validation_iterator=validation_iterator,
                       num_epochs=num_epochs,
                       serialization_dir=serialization_dir,
                       cuda_device=cuda_device,
                       grad_norm=grad_norm,
                       grad_clipping=grad_clipping,
                       learning_rate_scheduler=scheduler,
                       num_serialized_models_to_keep=num_serialized_models_to_keep,
                       keep_serialized_model_every_num_seconds=keep_serialized_model_every_num_seconds,
                       model_save_interval=model_save_interval,
                       summary_interval=summary_interval,
                       histogram_interval=histogram_interval)
Example #36
0
    def _setup_for_training(self):
        """Create vocab, configure default loggers/callbacks, create optimizer/lr scheduler, setup best metrics"""
        # create vocab
        if self._vocab_config is not None:
            vocab_datasets = [self._train_instances]
            if (
                self._valid_instances is not None
                and self._vocab_config.include_valid_data
            ):
                vocab_datasets += [self._valid_instances]
            self._pipeline.create_vocab(vocab_datasets, config=self._vocab_config)

        # Check for an empty vocab
        if vocabulary.is_empty(
            self._pipeline.vocab, self._pipeline.config.features.configured_namespaces
        ):
            raise EmptyVocabError(
                "All your features need a non-empty vocabulary for a training!"
            )

        # we give some special attention to these loggers/callbacks
        self._wandb_logger: Optional[WandbLogger] = None
        self._model_checkpoint: Optional[ModelCheckpoint] = None

        # add default callbacks/loggers
        self._trainer_config.callbacks = self._add_default_callbacks()
        if self._trainer_config.logger is not False:
            self._trainer_config.logger = self._add_default_loggers()

        # create optimizer, has to come AFTER creating the vocab!
        self._pipeline.model.optimizer = Optimizer.from_params(
            Params(
                {
                    "model_parameters": self._pipeline.model.named_parameters(),
                    **self._trainer_config.optimizer,
                }
            )
        )

        # create lr scheduler, has to come AFTER creating the optimizer!
        if not (
            self._trainer_config.warmup_steps == 0
            and self._trainer_config.lr_decay is None
        ):
            self._pipeline.model.lr_scheduler = self._create_lr_scheduler()
        else:
            self._pipeline.model.lr_scheduler = None

        # set monitor and mode for best validation metrics
        self._pipeline.model.monitor = self._trainer_config.monitor
        self._pipeline.model.monitor_mode = self._trainer_config.monitor_mode
Example #37
0
    def test_parameter_type_inference(self):
        # Should work ok even with lr as a string
        optimizer_params = Params({
                "type": "sgd",
                "lr": "0.1"
        })

        parameters = [[n, p] for n, p in self.model.named_parameters() if p.requires_grad]
        optimizer = Optimizer.from_params(parameters, optimizer_params)

        assert optimizer.defaults["lr"] == 0.1

        # But should crash (in the Pytorch code) if we don't do the type inference
        optimizer_params = Params({
                "type": "sgd",
                "lr": "0.1",
                "infer_type_and_cast": False
        })

        parameters = [[n, p] for n, p in self.model.named_parameters() if p.requires_grad]

        with pytest.raises(TypeError):
            optimizer = Optimizer.from_params(parameters, optimizer_params)
 def test_no_metric_wrapper_can_support_none_for_metrics(self):
     model = torch.nn.Sequential(torch.nn.Linear(10, 10))
     lrs = LearningRateScheduler.from_params(Optimizer.from_params(model.named_parameters(),
                                                                   Params({"type": "adam"})),
                                             Params({"type": "step", "step_size": 1}))
     lrs.step(None, None)
 def _get_optimizer(self, lr: float = 1.0):
     return Optimizer.from_params(self.model.named_parameters(), Params({"type": "sgd", "lr": lr}))
 def _get_optimizer(self, lr: float = 1.0):
     optimizer_params = Params({"type": "sgd", "lr": lr})
     optimizer_params["parameter_groups"] = [[[f"^{m}"], {}] for m in self.model._modules]
     return Optimizer.from_params(self.model.named_parameters(), optimizer_params)
 def test_reduce_on_plateau_works_when_metrics_exist(self):
     model = torch.nn.Sequential(torch.nn.Linear(10, 10))
     LearningRateScheduler.from_params(Optimizer.from_params(model.named_parameters(),
                                                             Params({"type": "adam"})),
                                       Params({"type": "reduce_on_plateau"})).step(10, None)