def from_params(cls, parameters: List, params: Params) -> 'GanOptimizer': # Because we "tagged" the parameters, we can use getattr to figure out # which ones go with which model. generator_parameters = [("", param) for param in parameters if hasattr(param, '_generator')] discriminator_parameters = [("", param) for param in parameters if hasattr(param, '_discriminator')] generator_optimizer = Optimizer.from_params( generator_parameters, params.pop("generator_optimizer")) discriminator_optimizer = Optimizer.from_params( discriminator_parameters, params.pop("discriminator_optimizer")) return cls(generator_optimizer=generator_optimizer, discriminator_optimizer=discriminator_optimizer)
def test_reduce_on_plateau_error_throw_when_no_metrics_exist(self): with self.assertRaises(ConfigurationError) as context: LearningRateScheduler.from_params(Optimizer.from_params(self.model.named_parameters(), Params({"type": "adam"})), Params({"type": "reduce_on_plateau", "mode": "min"})).step(None, None) assert "learning rate scheduler requires a validation metric" in str(context.exception)
def _get_optimizer(self, lr: float = 1.0): optimizer_params = Params({"type": "sgd", "lr": lr}) optimizer_params["parameter_groups"] = [[[f"^{m}"], {}] for m in self.model._modules] return Optimizer.from_params( model_parameters=self.model.named_parameters(), params=optimizer_params)
def test_can_optimise_model_with_dense_and_sparse_params(self): optimizer_params = Params({"type": "dense_sparse_adam"}) parameters = [[n, p] for n, p in self.model.named_parameters() if p.requires_grad] optimizer = Optimizer.from_params(model_parameters=parameters, params=optimizer_params) iterator = BasicIterator(2) iterator.index_with(self.vocab) Trainer(self.model, optimizer, iterator, self.instances).train()
def from_params(cls, model: Model, serialization_dir: str, iterator: DataIterator, train_data: Iterable[Instance], validation_data: Optional[Iterable[Instance]], params: Params, validation_iterator: DataIterator = None) -> 'GANTrainer': patience = params.pop_int("patience", None) validation_metric = params.pop("validation_metric", "-loss") shuffle = params.pop_bool("shuffle", True) num_epochs = params.pop_int("num_epochs", 20) cuda_device = params.pop_int("cuda_device", -1) grad_norm = params.pop_float("grad_norm", None) grad_clipping = params.pop_float("grad_clipping", None) lr_scheduler_params = params.pop("learning_rate_scheduler", None) if cuda_device >= 0: model = model.cuda(cuda_device) parameters = [[n, p] for n, p in model.named_parameters() if p.requires_grad] optimizer = Optimizer.from_params(parameters, params.pop("optimizer")) if lr_scheduler_params: scheduler = LearningRateScheduler.from_params( optimizer, lr_scheduler_params) else: scheduler = None num_serialized_models_to_keep = params.pop_int( "num_serialized_models_to_keep", 20) keep_serialized_model_every_num_seconds = params.pop_int( "keep_serialized_model_every_num_seconds", None) model_save_interval = params.pop_float("model_save_interval", None) summary_interval = params.pop_int("summary_interval", 100) histogram_interval = params.pop_int("histogram_interval", None) params.assert_empty(cls.__name__) return cls(model, optimizer, iterator, train_data, validation_data, patience=patience, validation_metric=validation_metric, validation_iterator=validation_iterator, shuffle=shuffle, num_epochs=num_epochs, serialization_dir=serialization_dir, cuda_device=cuda_device, grad_norm=grad_norm, grad_clipping=grad_clipping, learning_rate_scheduler=scheduler, num_serialized_models_to_keep=num_serialized_models_to_keep, keep_serialized_model_every_num_seconds= keep_serialized_model_every_num_seconds, model_save_interval=model_save_interval, summary_interval=summary_interval, histogram_interval=histogram_interval)
def test_can_optimise_model_with_dense_and_sparse_params(self): optimizer_params = Params({"type": "dense_sparse_adam"}) parameters = [[n, p] for n, p in self.model.named_parameters() if p.requires_grad] optimizer = Optimizer.from_params(model_parameters=parameters, params=optimizer_params) for instance in self.instances: instance.index_fields(self.vocab) GradientDescentTrainer(self.model, optimizer, SimpleDataLoader(self.instances, 2)).train()
def _setup_training(self, tasks, train_params, optimizer_params, scheduler_params, iterator): # Task bookkeeping task_infos = {task.name: {} for task in tasks} for task in tasks: task_info = task_infos[task.name] tr_generator = iterator(task.train_data, num_epochs=None, cuda_device=self._cuda_device) task_info['n_tr_batches'] = iterator.get_num_batches( task.train_data) task_info['tr_generator'] = tr_generator task_info['loss'] = 0.0 task_info['total_batches_trained'] = 0 task_info['n_batches_since_val'] = 0 task_info['optimizer'] = Optimizer.from_params( train_params, copy.deepcopy(optimizer_params)) task_info['scheduler'] = LearningRateScheduler.from_params( task_info['optimizer'], copy.deepcopy(scheduler_params)) task_info['stopped'] = False task_info['last_log'] = time.time() # Metric bookkeeping all_metrics = [task.val_metric for task in tasks ] + ['micro_accuracy', 'macro_accuracy'] metric_infos = {metric: {'hist': [], 'stopped': False, 'best': (-1, {})} for \ metric in all_metrics} self._task_infos = task_infos self._metric_infos = metric_infos return task_infos, metric_infos
def test_linear_with_warmup_works_properly(self): scheduler = LearningRateScheduler.from_params( optimizer=Optimizer.from_params( model_parameters=self.model.named_parameters(), params=Params({"type": "sgd", "lr": 1.0}), ), params=Params( { "type": "linear_with_warmup", "warmup_steps": 2, "num_epochs": 2, "num_steps_per_epoch": 3, } ), ) optimizer = scheduler.optimizer # Linear warmup for 2 steps. scheduler.step_batch() assert optimizer.param_groups[0]["lr"] == 0.5 # 1.0 * 1/2 scheduler.step_batch() assert optimizer.param_groups[0]["lr"] == 1.0 # 1.0 * 2/2 # Linear decay for 4 steps. scheduler.step_batch() assert optimizer.param_groups[0]["lr"] == 0.75 scheduler.step_batch() assert optimizer.param_groups[0]["lr"] == 0.5 scheduler.step_batch() assert optimizer.param_groups[0]["lr"] == 0.25 scheduler.step_batch() assert optimizer.param_groups[0]["lr"] == 0.0
def predict_json(self, _: JsonDict, cuda_device: int = -1) -> JsonDict: parameter_filename = 'allennlp/seq2seq.json' serialization_dir = 'retrained' subprocess.check_call(['mkdir', '-p', serialization_dir]) params = Params.from_file(parameter_filename) iterator = DataIterator.from_params(params.pop("iterator")) iterator.index_with(self._model.vocab) parameters = [[n, p] for n, p in self._model.named_parameters() if p.requires_grad] trainer_params = params.pop('trainer') optimizer = Optimizer.from_params(parameters, trainer_params.pop("optimizer")) all_datasets = datasets_from_params(params) train_data = all_datasets['train'] trainer = SimpleTrainer(self._model, optimizer, train_data, iterator) interpreter = Interpreter(self._model, self._dataset_reader, trainer) while True: try: interpreter.cmdloop() except Exception as e: print(e) traceback.print_exc() print('Restarting interpreter cmdloop.')
def test_optimizer_parameter_groups(self): optimizer_params = Params({ "type": "sgd", "lr": 1, "momentum": 5, "parameter_groups": [ # the repeated "bias_" checks a corner case # NOT_A_VARIABLE_NAME displays a warning but does not raise an exception [["weight_i", "bias_", "bias_", "NOT_A_VARIABLE_NAME"], {'lr': 2}], [["tag_projection_layer"], {'lr': 3}], ] }) parameters = [[n, p] for n, p in self.model.named_parameters() if p.requires_grad] optimizer = Optimizer.from_params(parameters, optimizer_params) param_groups = optimizer.param_groups assert len(param_groups) == 3 assert param_groups[0]['lr'] == 2 assert param_groups[1]['lr'] == 3 # base case uses default lr assert param_groups[2]['lr'] == 1 for k in range(3): assert param_groups[k]['momentum'] == 5 # all LSTM parameters except recurrent connections (those with weight_h in name) assert len(param_groups[0]['params']) == 6 # just the projection weight and bias assert len(param_groups[1]['params']) == 2 # the embedding + recurrent connections left in the default group assert len(param_groups[2]['params']) == 3
def test_optimizer_basic(self): optimizer_params = Params({"type": "sgd", "lr": 1}) parameters = [[n, p] for n, p in self.model.named_parameters() if p.requires_grad] optimizer = Optimizer.from_params(model_parameters=parameters, params=optimizer_params) param_groups = optimizer.param_groups assert len(param_groups) == 1 assert param_groups[0]["lr"] == 1
def test_noam_learning_rate_schedule_does_not_crash(self): model = torch.nn.Sequential(torch.nn.Linear(10, 10)) lrs = LearningRateScheduler.from_params(Optimizer.from_params(model.named_parameters(), Params({"type": "adam"})), Params({"type": "noam", "model_size": 10, "warmup_steps": 2000})) lrs.step(None) lrs.step_batch(None)
def test_optimizer_parameter_groups(self): optimizer_params = Params({ u"type": u"sgd", u"lr": 1, u"momentum": 5, u"parameter_groups": [ # the repeated "bias_" checks a corner case # NOT_A_VARIABLE_NAME displays a warning but does not raise an exception [[u"weight_i", u"bias_", u"bias_", u"NOT_A_VARIABLE_NAME"], {u'lr': 2}], [[u"tag_projection_layer"], {u'lr': 3}], ] }) parameters = [[n, p] for n, p in self.model.named_parameters() if p.requires_grad] optimizer = Optimizer.from_params(parameters, optimizer_params) param_groups = optimizer.param_groups assert len(param_groups) == 3 assert param_groups[0][u'lr'] == 2 assert param_groups[1][u'lr'] == 3 # base case uses default lr assert param_groups[2][u'lr'] == 1 for k in range(3): assert param_groups[k][u'momentum'] == 5 # all LSTM parameters except recurrent connections (those with weight_h in name) assert len(param_groups[0][u'params']) == 6 # just the projection weight and bias assert len(param_groups[1][u'params']) == 2 # the embedding + recurrent connections left in the default group assert len(param_groups[2][u'params']) == 3
def test_exponential_works_properly(self): scheduler = LearningRateScheduler.from_params( optimizer=Optimizer.from_params(self.model.named_parameters(), Params({ "type": "sgd", "lr": 1.0 })), params=Params({ "type": "exponential", "gamma": 0.5 }), ) optimizer = scheduler.lr_scheduler.optimizer optimizer.step() # to avoid a pytorch warning # Initial learning rate should be unchanged for first epoch. assert optimizer.param_groups[0]["lr"] == 1.0 # But since the way PyTorch LR schedulers work is a little wonky, # the LR will also be unchanged for the second epoch (epoch id 0). scheduler.step(epoch=0) assert optimizer.param_groups[0]["lr"] == 1.0 # Now the learning rate starts to be updated... scheduler.step(epoch=1) assert optimizer.param_groups[0]["lr"] == 0.5 scheduler.step(epoch=2) assert optimizer.param_groups[0]["lr"] == 0.5**2
def _get_optimizer(self, lr: float = 1.0): return Optimizer.from_params( model_parameters=self.model.named_parameters(), params=Params({ "type": "sgd", "lr": lr }))
def test_polynomial_decay_works_properly(self): scheduler = LearningRateScheduler.from_params( optimizer=Optimizer.from_params( model_parameters=self.model.named_parameters(), params=Params({"type": "sgd", "lr": 1.0}), ), params=Params( { "type": "polynomial_decay", "warmup_steps": 2, "num_epochs": 2, "num_steps_per_epoch": 3, "end_learning_rate": 0.1, "power": 2, } ), ) optimizer = scheduler.optimizer # Linear warmup for 2 steps. scheduler.step_batch() assert optimizer.param_groups[0]["lr"] == 0.5 # 1.0 * 1/2 scheduler.step_batch() assert optimizer.param_groups[0]["lr"] == 1.0 # 1.0 * 2/2 # Polynomial decay for 4 steps. scheduler.step_batch() assert optimizer.param_groups[0]["lr"] == 0.60625 # (1.0 - 0.1) * (3/4) ** 2 + 0.1 scheduler.step_batch() assert optimizer.param_groups[0]["lr"] == 0.325 # (1.0 - 0.1) * (2/4) ** 2 + 0.1 scheduler.step_batch() assert optimizer.param_groups[0]["lr"] == 0.15625 # (1.0 - 0.1) * (1/4) ** 2 + 0.1 scheduler.step_batch() assert optimizer.param_groups[0]["lr"] == 0.1 # (1.0 - 0.1) * (0/4) ** 2 + 0.1
def test_reduce_on_plateau_works_when_metrics_exist(self): LearningRateScheduler.from_params( optimizer=Optimizer.from_params( model_parameters=self.model.named_parameters(), params=Params({"type": "adam"}) ), params=Params({"type": "reduce_on_plateau"}), ).step(10)
def _get_optimizer(self): return Optimizer.from_params( self.model.named_parameters(), Params({ "type": "sgd", "lr": 1.0, "momentum": self.base_momentum }))
def from_params( # type: ignore cls, params: Params, serialization_dir: str, recover: bool = False, cache_directory: str = None, cache_prefix: str = None, ) -> "CallbackTrainer": pieces = TrainerPieces.from_params(params, serialization_dir, recover, cache_directory, cache_prefix) model = pieces.model params = pieces.params validation_iterator = pieces.validation_iterator or pieces.iterator shuffle = params.pop_bool("shuffle", True) num_epochs = params.pop_int("num_epochs", 20) cuda_device = parse_cuda_device(params.pop("cuda_device", -1)) if isinstance(cuda_device, list): model_device = cuda_device[0] else: model_device = cuda_device if model_device >= 0: # Moving model to GPU here so that the optimizer state gets constructed on # the right device. model = model.cuda(model_device) parameters = [[n, p] for n, p in model.named_parameters() if p.requires_grad] optimizer = Optimizer.from_params(parameters, params.pop("optimizer")) callbacks_params = params.pop("callbacks", []) callbacks: List[Callback] = [ Callback.from_params( params=callback_params, model=model, optimizer=optimizer, instances=pieces.train_dataset, iterator=pieces.iterator, shuffle=shuffle, validation_data=pieces.validation_dataset, validation_iterator=validation_iterator, serialization_dir=serialization_dir, ) for callback_params in callbacks_params ] params.assert_empty(cls.__name__) return cls( model, pieces.train_dataset, pieces.iterator, optimizer, num_epochs=num_epochs, shuffle=shuffle, serialization_dir=serialization_dir, cuda_device=cuda_device, callbacks=callbacks, )
def test_can_optimise_model_with_dense_and_sparse_params(self): optimizer_params = Params({ "type": "dense_sparse_adam" }) parameters = [[n, p] for n, p in self.model.named_parameters() if p.requires_grad] optimizer = Optimizer.from_params(parameters, optimizer_params) iterator = BasicIterator(2) iterator.index_with(self.vocab) Trainer(self.model, optimizer, iterator, self.instances).train()
def test_no_metric_wrapper_can_support_none_for_metrics(self): lrs = LearningRateScheduler.from_params( Optimizer.from_params(self.model.named_parameters(), Params({"type": "adam"})), Params({ "type": "step", "step_size": 1 })) lrs.step(None, None)
def test_no_metric_wrapper_can_support_none_for_metrics(self): lrs = LearningRateScheduler.from_params( optimizer=Optimizer.from_params( model_parameters=self.model.named_parameters(), params=Params({"type": "adam"}) ), params=Params({"type": "step", "step_size": 1}), ) lrs.lr_scheduler.optimizer.step() # to avoid a pytorch warning lrs.step(None)
def test_noam_learning_rate_schedule_does_not_crash(self): lrs = LearningRateScheduler.from_params( optimizer=Optimizer.from_params( model_parameters=self.model.named_parameters(), params=Params({"type": "adam"}) ), params=Params({"type": "noam", "model_size": 10, "warmup_steps": 2000}), ) lrs.step(None) lrs.step_batch(None)
def test_optimizer_basic(self): optimizer_params = Params({ "type": "sgd", "lr": 1 }) parameters = [[n, p] for n, p in self.model.named_parameters() if p.requires_grad] optimizer = Optimizer.from_params(parameters, optimizer_params) param_groups = optimizer.param_groups assert len(param_groups) == 1 assert param_groups[0]['lr'] == 1
def test_reduce_on_plateau_error_throw_when_no_metrics_exist(self): model = torch.nn.Sequential(torch.nn.Linear(10, 10)) with self.assertRaises(ConfigurationError) as context: LearningRateScheduler.from_params(Optimizer.from_params(model.named_parameters(), Params({"type": "adam"})), Params({"type": "reduce_on_plateau"})).step(None, None) self.assertTrue( 'The reduce_on_plateau learning rate scheduler requires a validation metric' in str(context.exception))
def setup_method(self): super().setup_method() self.model = torch.nn.Sequential(torch.nn.Linear(10, 10)) self.optimizer = Optimizer.from_params( model_parameters=self.model.named_parameters(), params=Params({ "type": "sgd", "lr": 1.0 }), )
def test_parameter_type_inference(self): # Should work ok even with lr as a string optimizer_params = Params({"type": "sgd", "lr": "0.1"}) parameters = [[n, p] for n, p in self.model.named_parameters() if p.requires_grad] optimizer = Optimizer.from_params(model_parameters=parameters, params=optimizer_params) assert optimizer.defaults["lr"] == 0.1
def test_reduce_on_plateau_error_throw_when_no_metrics_exist(self): with pytest.raises( ConfigurationError, match="learning rate scheduler requires a validation metric" ): LearningRateScheduler.from_params( optimizer=Optimizer.from_params( model_parameters=self.model.named_parameters(), params=Params({"type": "adam"}) ), params=Params({"type": "reduce_on_plateau"}), ).step(None)
def test_no_metric_wrapper_can_support_none_for_metrics(self): model = torch.nn.Sequential(torch.nn.Linear(10, 10)) lrs = LearningRateScheduler.from_params( Optimizer.from_params(model.named_parameters(), Params({"type": "adam"})), Params({ "type": "step", "step_size": 1 })) lrs.step(None, None)
def test_optimizer_parameter_groups(self): optimizer_params = Params({ "type": "sgd", "lr": 1, "momentum": 5, "parameter_groups": [ # the repeated "bias_" checks a corner case # NOT_A_VARIABLE_NAME displays a warning but does not raise an exception [["weight_i", "bias_", "bias_", "NOT_A_VARIABLE_NAME"], { "lr": 2 }], [["tag_projection_layer"], { "lr": 3 }], [["^text_field_embedder.*$"], { "requires_grad": False }], ], }) # Before initializing the optimizer all params in this module will still require grad. assert all([ param.requires_grad for param in self.model.text_field_embedder.parameters() ]) parameters = [[n, p] for n, p in self.model.named_parameters() if p.requires_grad] optimizer = Optimizer.from_params(model_parameters=parameters, params=optimizer_params) param_groups = optimizer.param_groups # After initializing the optimizer, requires_grad should be false for all params in this module. assert not any([ param.requires_grad for param in self.model.text_field_embedder.parameters() ]) assert len(param_groups) == 3 assert param_groups[0]["lr"] == 2 assert param_groups[1]["lr"] == 3 # base case uses default lr assert param_groups[2]["lr"] == 1 for k in range(3): assert param_groups[k]["momentum"] == 5 # all LSTM parameters except recurrent connections (those with weight_h in name) assert len(param_groups[0]["params"]) == 6 # just the projection weight and bias assert len(param_groups[1]["params"]) == 2 # the recurrent connections left in the default group assert len(param_groups[2]["params"]) == 2
def test_optimizer_params(self): optimizer_params = Params( { "type": "multi", "optimizers": { "default": {"type": "adam", "lr": 1}, "embedder": {"type": "adam", "lr": 2}, "encoder": {"type": "adam", "lr": 3}, }, "parameter_groups": [ [ ["^text_field_embedder"], { "optimizer_name": "embedder", "betas": (0.9, 0.98), "lr": 2, "weight_decay": 0.01, }, ], [["^encoder.*bias"], {"optimizer_name": "encoder", "lr": 0.001}], [["^encoder.*weight"], {"optimizer_name": "encoder", "lr": 0.002}], [["^tag_projection_layer.*.weight$"], {"lr": 5}], ], } ) parameters = [[n, p] for n, p in self.model.named_parameters() if p.requires_grad] optimizer = Optimizer.from_params(model_parameters=parameters, params=optimizer_params) # When the MultiOptimizer is initialized, `optimizer.param_groups` stores the parameter groups. # These parameter groups are assigned to their own optimizer by the MultiOptimizer. # Check that changes to the parameters in optimizer.param_groups affect the parameters in # optimizer._grouped_optimizers. regex_optimizer_params = set() regex_optimizer_grouped_optimizer_params = set() for param_group in optimizer.param_groups: # Each param_group should have optimizer options visible so they can be used by schedulers. lr = param_group["lr"] assert lr > 0 params = param_group["params"] for param in params: param.data.zero_() regex_optimizer_params.add(id(param)) # Check that the parameters of the sub-optimizers were also changed. for optimizer in optimizer.optimizers.values(): for param_group in optimizer.param_groups: params = param_group["params"] for param in params: regex_optimizer_grouped_optimizer_params.add(id(param)) assert param.sum() == 0, "Param has non-zero values." assert regex_optimizer_params == regex_optimizer_grouped_optimizer_params
def test_parameter_type_inference(self): # Should work ok even with lr as a string optimizer_params = Params({"type": "sgd", "lr": "0.1"}) parameters = [[n, p] for n, p in self.model.named_parameters() if p.requires_grad] optimizer = Optimizer.from_params(parameters, optimizer_params) assert optimizer.defaults["lr"] == 0.1 # But should crash (in the Pytorch code) if we don't do the type inference optimizer_params = Params({ "type": "sgd", "lr": "0.1", "infer_type_and_cast": False }) parameters = [[n, p] for n, p in self.model.named_parameters() if p.requires_grad] with pytest.raises(TypeError): optimizer = Optimizer.from_params(parameters, optimizer_params)
def from_params(cls, model: Model, serialization_dir: str, iterator: DataIterator, train_data: Iterable[Instance], validation_data: Optional[Iterable[Instance]], params: Params, validation_iterator: DataIterator = None) -> 'Trainer': patience = params.pop_int("patience", None) validation_metric = params.pop("validation_metric", "-loss") num_epochs = params.pop_int("num_epochs", 20) cuda_device = params.pop_int("cuda_device", -1) grad_norm = params.pop_float("grad_norm", None) grad_clipping = params.pop_float("grad_clipping", None) lr_scheduler_params = params.pop("learning_rate_scheduler", None) if cuda_device >= 0: model = model.cuda(cuda_device) parameters = [[n, p] for n, p in model.named_parameters() if p.requires_grad] optimizer = Optimizer.from_params(parameters, params.pop("optimizer")) if lr_scheduler_params: scheduler = LearningRateScheduler.from_params(optimizer, lr_scheduler_params) else: scheduler = None num_serialized_models_to_keep = params.pop_int("num_serialized_models_to_keep", 20) keep_serialized_model_every_num_seconds = params.pop_int( "keep_serialized_model_every_num_seconds", None) model_save_interval = params.pop_float("model_save_interval", None) summary_interval = params.pop_int("summary_interval", 100) histogram_interval = params.pop_int("histogram_interval", None) params.assert_empty(cls.__name__) return Trainer(model, optimizer, iterator, train_data, validation_data, patience=patience, validation_metric=validation_metric, validation_iterator=validation_iterator, num_epochs=num_epochs, serialization_dir=serialization_dir, cuda_device=cuda_device, grad_norm=grad_norm, grad_clipping=grad_clipping, learning_rate_scheduler=scheduler, num_serialized_models_to_keep=num_serialized_models_to_keep, keep_serialized_model_every_num_seconds=keep_serialized_model_every_num_seconds, model_save_interval=model_save_interval, summary_interval=summary_interval, histogram_interval=histogram_interval)
def _setup_for_training(self): """Create vocab, configure default loggers/callbacks, create optimizer/lr scheduler, setup best metrics""" # create vocab if self._vocab_config is not None: vocab_datasets = [self._train_instances] if ( self._valid_instances is not None and self._vocab_config.include_valid_data ): vocab_datasets += [self._valid_instances] self._pipeline.create_vocab(vocab_datasets, config=self._vocab_config) # Check for an empty vocab if vocabulary.is_empty( self._pipeline.vocab, self._pipeline.config.features.configured_namespaces ): raise EmptyVocabError( "All your features need a non-empty vocabulary for a training!" ) # we give some special attention to these loggers/callbacks self._wandb_logger: Optional[WandbLogger] = None self._model_checkpoint: Optional[ModelCheckpoint] = None # add default callbacks/loggers self._trainer_config.callbacks = self._add_default_callbacks() if self._trainer_config.logger is not False: self._trainer_config.logger = self._add_default_loggers() # create optimizer, has to come AFTER creating the vocab! self._pipeline.model.optimizer = Optimizer.from_params( Params( { "model_parameters": self._pipeline.model.named_parameters(), **self._trainer_config.optimizer, } ) ) # create lr scheduler, has to come AFTER creating the optimizer! if not ( self._trainer_config.warmup_steps == 0 and self._trainer_config.lr_decay is None ): self._pipeline.model.lr_scheduler = self._create_lr_scheduler() else: self._pipeline.model.lr_scheduler = None # set monitor and mode for best validation metrics self._pipeline.model.monitor = self._trainer_config.monitor self._pipeline.model.monitor_mode = self._trainer_config.monitor_mode
def test_parameter_type_inference(self): # Should work ok even with lr as a string optimizer_params = Params({ "type": "sgd", "lr": "0.1" }) parameters = [[n, p] for n, p in self.model.named_parameters() if p.requires_grad] optimizer = Optimizer.from_params(parameters, optimizer_params) assert optimizer.defaults["lr"] == 0.1 # But should crash (in the Pytorch code) if we don't do the type inference optimizer_params = Params({ "type": "sgd", "lr": "0.1", "infer_type_and_cast": False }) parameters = [[n, p] for n, p in self.model.named_parameters() if p.requires_grad] with pytest.raises(TypeError): optimizer = Optimizer.from_params(parameters, optimizer_params)
def test_no_metric_wrapper_can_support_none_for_metrics(self): model = torch.nn.Sequential(torch.nn.Linear(10, 10)) lrs = LearningRateScheduler.from_params(Optimizer.from_params(model.named_parameters(), Params({"type": "adam"})), Params({"type": "step", "step_size": 1})) lrs.step(None, None)
def _get_optimizer(self, lr: float = 1.0): return Optimizer.from_params(self.model.named_parameters(), Params({"type": "sgd", "lr": lr}))
def _get_optimizer(self, lr: float = 1.0): optimizer_params = Params({"type": "sgd", "lr": lr}) optimizer_params["parameter_groups"] = [[[f"^{m}"], {}] for m in self.model._modules] return Optimizer.from_params(self.model.named_parameters(), optimizer_params)
def test_reduce_on_plateau_works_when_metrics_exist(self): model = torch.nn.Sequential(torch.nn.Linear(10, 10)) LearningRateScheduler.from_params(Optimizer.from_params(model.named_parameters(), Params({"type": "adam"})), Params({"type": "reduce_on_plateau"})).step(10, None)