def forward(self, inputs: torch.Tensor) -> torch.Tensor: normalized_state = torch.clamp( (inputs - self.running_mean) / torch.sqrt(self.running_variance / self.normalization_steps), -5, 5, ) return normalized_state
def forward(self, layer_activations: torch.Tensor) -> torch.Tensor: mean = torch.mean(layer_activations, dim=-1, keepdim=True) var = torch.mean((layer_activations - mean)**2, dim=-1, keepdim=True) return (layer_activations - mean) / (torch.sqrt(var + 1e-5))