def forward(self, x, gamma, beta): in_mean, in_var = porch.mean(x, dim=[2, 3], keepdim=True), porch.var(x, dim=[2, 3], keepdim=True) out_in = (x - in_mean) / porch.sqrt(in_var + self.eps) ln_mean, ln_var = porch.mean(x, dim=[1, 2, 3], keepdim=True), porch.var(x, dim=[1, 2, 3], keepdim=True) out_ln = (x - ln_mean) / porch.sqrt(ln_var + self.eps) out = porch.Tensor(self.rho).expand(x.shape[0], -1, -1, -1) * out_in + (1 - porch.Tensor(self.rho).expand(x.shape[0], -1, -1, -1)) * out_ln out = out * porch.Tensor(gamma).unsqueeze(2).unsqueeze(3) + porch.Tensor(beta).unsqueeze(2).unsqueeze(3) return out
def forward(self, x): in_mean, in_var = porch.mean(x, dim=(2, 3), keepdim=True), porch.var(x, dim=(2, 3), keepdim=True) out_in = (x - in_mean) / porch.sqrt(in_var + self.eps) ln_mean, ln_var = porch.mean(x, dim=(1, 2, 3), keepdim=True), porch.var(x, dim=(1, 2, 3), keepdim=True) out_ln = (x - ln_mean) / porch.sqrt(ln_var + self.eps) out = porch.Tensor(self.rho).expand(x.shape[0], -1, -1, -1) * out_in + (1 - porch.Tensor(self.rho).expand(x.shape[0], -1, -1, -1)) * out_ln out = out * porch.Tensor(self.gamma).expand(x.shape[0], -1, -1, -1) + porch.Tensor(self.beta).expand(x.shape[0], -1, -1, -1) return out
def __init__(self, height=64, width=64, with_r=False, with_boundary=False): super(AddCoordsTh, self).__init__() self.with_r = with_r self.with_boundary = with_boundary device = torch.device( 'cuda' if torch.cuda.is_available() else 'cpu') with torch.no_grad(): x_coords = torch.arange(height).unsqueeze(1).expand( height, width).float() y_coords = torch.arange(width).unsqueeze(0).expand( height, width).float() x_coords = (x_coords / (height - 1)) * 2 - 1 y_coords = (y_coords / (width - 1)) * 2 - 1 coords = torch.stack([x_coords, y_coords], dim=0) # (2, height, width) if self.with_r: rr = torch.sqrt( torch.pow(x_coords, 2) + torch.pow(y_coords, 2)) # (height, width) rr = (rr / torch.max(rr)).unsqueeze(0) coords = torch.cat([coords, rr], dim=0) self.coords = coords.unsqueeze(0).to( device) # (1, 2 or 3, height, width) self.x_coords = x_coords.to(device) self.y_coords = y_coords.to(device)
def forward(self, input): in_mean, in_var = torch.mean(input, dim=[2, 3], keepdim=True), torch.var(input, dim=[2, 3], keepdim=True) out_in = (input - in_mean) / torch.sqrt(in_var + self.eps) ln_mean, ln_var = torch.mean(input, dim=[1, 2, 3], keepdim=True), torch.var(input, dim=[1, 2, 3], keepdim=True) out_ln = (input - ln_mean) / torch.sqrt(ln_var + self.eps) out = self.rho.expand(input.shape[0], -1, -1, -1) * out_in + ( 1 - self.rho.expand(input.shape[0], -1, -1, -1)) * out_ln out = out * self.gamma.expand(input.shape[0], -1, -1, -1) + self.beta.expand( input.shape[0], -1, -1, -1) return out
def clip_grad_norm(params, max_norm): """Clips gradient norm.""" if max_norm > 0: return [ torch.nn.utils.clip_by_norm(p, max_norm) for p in params if p.grad is not None ] else: return torch.sqrt( sum(p.grad.data.norm()**2 for p in params if p.grad is not None))
def sqrt_newton_schulz(A, numIters, dtype=None): with torch.no_grad(): if dtype is None: dtype = A.dtype batchSize = A.shape[0] dim = A.shape[1] normA = A.mul(A).sum(dim=1).sum(dim=1).sqrt() Y = torch.Tensor(A/(normA.view(batchSize, 1, 1).expand(*A.shape ))) I = torch.Tensor(torch.eye(dim,dim).view(1, dim, dim).repeat(batchSize,1,1).astype("float32")) Z = torch.Tensor(torch.eye(dim,dim).view(1, dim, dim).repeat(batchSize,1,1).astype("float32")) for i in range(numIters): T = torch.Tensor(0.5*(3.0*I - Z.bmm(Y))) Y = Y.bmm(T) Z = T.bmm(Z) sA = Y*torch.sqrt(normA).view(batchSize, 1, 1).expand(*A.shape ) return sA
def sqrt(self): return torch.sqrt(self)