def from_partial_objects( cls, name: str, model: Model, num_epochs: Optional[int] = None, batches_per_epoch: Optional[int] = None, cuda_device: Optional[Union[int, torch.device]] = None, grad_norm: float = None, grad_clipping: float = None, optimizer: Lazy[Optimizer] = None, learning_rate_scheduler: Lazy[LearningRateScheduler] = None, momentum_scheduler: Lazy[MomentumScheduler] = None, tensorboard_writer: Lazy[TensorboardWriter] = None, ) -> "ComponentOptimizer": parameters = [[n, p] for n, p in model.named_parameters() if p.requires_grad] optimizer_ = optimizer.construct(model_parameters=parameters) if not optimizer_: optimizer_ = Optimizer.default(parameters) common_util.log_frozen_and_tunable_parameter_names(model) learning_rate_scheduler_ = learning_rate_scheduler.construct( optimizer=optimizer_, num_epochs=num_epochs, num_steps_per_epoch=batches_per_epoch) momentum_scheduler_ = momentum_scheduler.construct( optimizer=optimizer_) return cls(name=name, model=model, optimizer=optimizer_, cuda_device=cuda_device, grad_norm=grad_norm, grad_clipping=grad_clipping, learning_rate_scheduler=learning_rate_scheduler_, momentum_scheduler=momentum_scheduler_)
def from_partial_objects( cls, model: Model, serialization_dir: str, data_loader: DataLoader, validation_data_loader: DataLoader = None, local_rank: int = 0, patience: int = None, validation_metric: str = "-loss", num_epochs: int = 20, cuda_device: int = -1, grad_norm: float = None, grad_clipping: float = None, distributed: bool = None, world_size: int = 1, num_gradient_accumulation_steps: int = 1, opt_level: Optional[str] = None, no_grad: List[str] = None, optimizer: Lazy[Optimizer] = None, learning_rate_scheduler: Lazy[LearningRateScheduler] = None, momentum_scheduler: Lazy[MomentumScheduler] = None, tensorboard_writer: Lazy[TensorboardWriter] = None, moving_average: Lazy[MovingAverage] = None, checkpointer: Lazy[Checkpointer] = None, batch_callbacks: List[BatchCallback] = None, epoch_callbacks: List[EpochCallback] = None, ) -> "Trainer": """ This method exists so that we can have a documented method to construct this class using `FromParams`. If you are not using `FromParams` or config files, you can safely ignore this method. The reason we can't just use `__init__` with `FromParams` here is because there are sequential dependencies to this class's arguments. Anything that has a `Lazy[]` type annotation needs something from one of the non-`Lazy` arguments. The `Optimizer` needs to have the parameters from the `Model` before it's constructed, and the `Schedulers` need to have the `Optimizer`. Because of this, the typical way we construct things `FromParams` doesn't work, so we use `Lazy` to allow for constructing the objects sequentially. If you're not using `FromParams`, you can just construct these arguments in the right order yourself in your code and call the constructor directly. """ check_for_gpu(cuda_device) if cuda_device >= 0: # Moving model to GPU here so that the optimizer state gets constructed on # the right device. model = model.cuda(cuda_device) if no_grad: for name, parameter in model.named_parameters(): if any(re.search(regex, name) for regex in no_grad): parameter.requires_grad_(False) common_util.log_frozen_and_tunable_parameter_names(model) parameters = [[n, p] for n, p in model.named_parameters() if p.requires_grad] optimizer_ = optimizer.construct(model_parameters=parameters) if not optimizer_: optimizer_ = Optimizer.default(parameters) try: batches_per_epoch = len(data_loader) except TypeError: # If the dataset is lazy, it won't have a length. batches_per_epoch = None moving_average_ = moving_average.construct(parameters=parameters) learning_rate_scheduler_ = learning_rate_scheduler.construct( optimizer=optimizer_, num_epochs=num_epochs, num_steps_per_epoch=batches_per_epoch) momentum_scheduler_ = momentum_scheduler.construct( optimizer=optimizer_) checkpointer_ = checkpointer.construct() or Checkpointer( serialization_dir) tensorboard_writer_ = tensorboard_writer.construct( ) or TensorboardWriter(serialization_dir) return cls( model, optimizer_, data_loader, patience=patience, validation_metric=validation_metric, validation_data_loader=validation_data_loader, num_epochs=num_epochs, serialization_dir=serialization_dir, cuda_device=cuda_device, grad_norm=grad_norm, grad_clipping=grad_clipping, learning_rate_scheduler=learning_rate_scheduler_, momentum_scheduler=momentum_scheduler_, tensorboard_writer=tensorboard_writer_, checkpointer=checkpointer_, moving_average=moving_average_, batch_callbacks=batch_callbacks, epoch_callbacks=epoch_callbacks, distributed=distributed, local_rank=local_rank, world_size=world_size, num_gradient_accumulation_steps=num_gradient_accumulation_steps, opt_level=opt_level, )
def from_partial_objects( cls, model: Model, serialization_dir: str, iterator: DataIterator, train_data: Iterable[Instance], validation_iterator: DataIterator = None, validation_data: Iterable[Instance] = None, local_rank: int = 0, patience: int = None, validation_metric: str = "-loss", shuffle: bool = True, num_epochs: int = 20, cuda_device: int = -1, grad_norm: float = None, grad_clipping: float = None, model_save_interval: float = None, summary_interval: int = 100, histogram_interval: int = None, should_log_parameter_statistics: bool = True, should_log_learning_rate: bool = False, log_batch_size_period: int = None, distributed: bool = None, world_size: int = 1, num_gradient_accumulation_steps: int = 1, no_grad: List[str] = None, optimizer: Lazy[Optimizer] = None, learning_rate_scheduler: Lazy[LearningRateScheduler] = None, momentum_scheduler: Lazy[MomentumScheduler] = None, moving_average: Lazy[MovingAverage] = None, checkpointer: Lazy[Checkpointer] = None, ) -> "Trainer": """ This method exists so that we can have a documented method to construct this class using `FromParams`. If you are not using `FromParams` or config files, you can safely ignore this method. The reason we can't just use `__init__` with `FromParams` here is because there are sequential dependencies to this class's arguments. Anything that has a `Lazy[]` type annotation needs something from one of the non-`Lazy` arguments. The `Optimizer` needs to have the parameters from the `Model` before it's constructed, and the `Schedulers` need to have the `Optimizer`. Because of this, the typical way we construct things `FromParams` doesn't work, so we use `Lazy` to allow for constructing the objects sequentially. If you're not using `FromParams`, you can just construct these arguments in the right order yourself in your code and call the constructor directly. """ check_for_gpu(cuda_device) if cuda_device >= 0: # Moving model to GPU here so that the optimizer state gets constructed on # the right device. model = model.cuda(cuda_device) if no_grad: for name, parameter in model.named_parameters(): if any(re.search(regex, name) for regex in no_grad): parameter.requires_grad_(False) common_util.log_frozen_and_tunable_parameter_names(model) parameters = [[n, p] for n, p in model.named_parameters() if p.requires_grad] optimizer_ = optimizer.construct(model_parameters=parameters) if not optimizer_: optimizer_ = Optimizer.default(parameters) batches_per_epoch = iterator.get_num_batches(train_data) if batches_per_epoch == 1: # get_num_batches returns 1 when it can't determine the answer batches_per_epoch = None moving_average_ = moving_average.construct(parameters=parameters) learning_rate_scheduler_ = learning_rate_scheduler.construct( optimizer=optimizer_, num_epochs=num_epochs, num_steps_per_epoch=batches_per_epoch) momentum_scheduler_ = momentum_scheduler.construct( optimizer=optimizer_) checkpointer_ = checkpointer.construct() or Checkpointer( serialization_dir) return cls( model, optimizer_, iterator, train_data, validation_data, patience=patience, validation_metric=validation_metric, validation_iterator=validation_iterator, shuffle=shuffle, num_epochs=num_epochs, serialization_dir=serialization_dir, cuda_device=cuda_device, grad_norm=grad_norm, grad_clipping=grad_clipping, learning_rate_scheduler=learning_rate_scheduler_, momentum_scheduler=momentum_scheduler_, checkpointer=checkpointer_, model_save_interval=model_save_interval, summary_interval=summary_interval, histogram_interval=histogram_interval, should_log_parameter_statistics=should_log_parameter_statistics, should_log_learning_rate=should_log_learning_rate, log_batch_size_period=log_batch_size_period, moving_average=moving_average_, distributed=distributed, local_rank=local_rank, world_size=world_size, num_gradient_accumulation_steps=num_gradient_accumulation_steps, )