def log_histograms(self, model: Model, histogram_parameters: Set[str]) -> None: """ Send histograms of parameters to tensorboard. """ for name, param in model.named_parameters(): if name in histogram_parameters: self.add_train_histogram("parameter_histogram/" + name, param)
def log_parameter_and_gradient_statistics(self, # pylint: disable=invalid-name model: Model, batch_grad_norm: float) -> None: """ Send the mean and std of all parameters and gradients to tensorboard, as well as logging the average gradient norm. """ if self._should_log_parameter_statistics: # Log parameter values to Tensorboard for name, param in model.named_parameters(): self.add_train_scalar("parameter_mean/" + name, param.data.mean()) self.add_train_scalar("parameter_std/" + name, param.data.std()) if param.grad is not None: if param.grad.is_sparse: # pylint: disable=protected-access grad_data = param.grad.data._values() else: grad_data = param.grad.data # skip empty gradients if torch.prod(torch.tensor(grad_data.shape)).item() > 0: # pylint: disable=not-callable self.add_train_scalar("gradient_mean/" + name, grad_data.mean()) self.add_train_scalar("gradient_std/" + name, grad_data.std()) else: # no gradient for a parameter with sparse gradients logger.info("No gradient for %s, skipping tensorboard logging.", name) # norm of gradients if batch_grad_norm is not None: self.add_train_scalar("gradient_norm", batch_grad_norm)
def log_learning_rates(self, model: Model, optimizer: torch.optim.Optimizer): """ Send current parameter specific learning rates to tensorboard """ if self._should_log_learning_rate: # optimizer stores lr info keyed by parameter tensor # we want to log with parameter name names = {param: name for name, param in model.named_parameters()} for group in optimizer.param_groups: if 'lr' not in group: continue rate = group['lr'] for param in group['params']: # check whether params has requires grad or not effective_rate = rate * float(param.requires_grad) self.add_train_scalar("learning_rate/" + names[param], effective_rate)