def reset_sgd_optimizer(opt: Optimizer) -> None: """ Reset SGD optimizer momentum buffer. """ opt.state = defaultdict(dict)
def create_default_trainer(model: nn.Module, optimizer: optim.Optimizer, criterion, device=None, dtype=None, non_blocking=False, prepare_batch_function=batch_to_tensor, loss_transform_function=lambda criterion, y_preds, y: criterion(y_preds, y), output_transform_function=lambda x, y, y_pred, loss: (x, y, y_pred, loss.item() if loss is not None else None), init_callback=None, verbose=1): """ Helper method that returns an instance of `ModuleTrainer`. This is helpful as it provides a default training function. Note: The optimizer's state will be reset when calling the method. :param model: Model to train. :param optimizer: Optimizer that will be used through-out the training. :param criterion: Loss function to optimize. :param device: device to use. eg: 'cpu' (default) or 'cuda'. :param dtype: Passed to `prepare_batch` argument to change the model's data type. eg: `torch.float32` or `torch.float64`. :param non_blocking: Passed to `prepare_batch`. :param prepare_batch_function: Function that prepares a batch. This should return a `torch.Tensor`. :param loss_transform_function: Optionally transform the loss function's output. Can be useful with multi-output models. :param output_transform_function: Optionally transform the `x`, `y`, `y_prediction` and `loss`. Can be useful with multi-output models. :param init_callback: Passed to `ModuleTrainer`'s constructor. :param verbose: Currently only used to show the progressbar. By default this is set to 1. :return: An instance of `ModuleTrainer`. """ if not callable(prepare_batch_function): raise TypeError( "Argument prepare_batch_function should be a function.") if not callable(output_transform_function): raise TypeError("Argument output_transform should be a function.") if device is None: device = torch.device( 'cuda') if torch.cuda.is_available() else torch.device('cpu') model.to(device) if dtype: model.to(dtype=dtype) # reset optimizer state optimizer.state = collections.defaultdict(dict) def _default_train_function(batch): model.train() optimizer.zero_grad() x, y, model_args = prepare_batch_function(batch, device=device, dtype=dtype, non_blocking=non_blocking) y_pred = model(x, **model_args) loss = loss_transform_function(criterion, y_pred, y) loss.backward() optimizer.step() # does y_pred need detach() ? return output_transform_function(x, y, y_pred, loss.detach()) def _default_evaluate_function(batch): x, y, model_args = prepare_batch_function(batch, device=device, dtype=dtype, non_blocking=non_blocking) y_pred = model(x, **model_args) # does y_pred need detach() ? return output_transform_function(x, y, y_pred, None) return ModuleTrainer(model, optimizer, train_function=_default_train_function, evaluate_function=_default_evaluate_function, prepare_batch_function=prepare_batch_function, init_callback=init_callback, verbose=verbose)