def grad_U(z): grad_outputs = torch.ones(z.size(0)).type(self.dtype) grad = torchgrad(U(z), z, grad_outputs=grad_outputs, create_graph=True)[0] # gradient clipping by norm avoid numerical issue norm = torch.sqrt(torch.norm(grad, p=2, dim=1)) grad = grad / norm.view(-1, 1) return grad.detach()
def grad_U(z, modeltype=None, cond=None): if modeltype in ['mcvae']: grad_outputs_lst = [] for i in range(0, len(B)): grad_outputs = torch.ones(B[0]) # .cuda() grad_outputs_lst.append(grad_outputs) else: grad_outputs = torch.ones(B) #.cuda() # torch.autograd.grad default returns volatile if not isinstance(z, list): z = z.requires_grad_() #else: # for i in range(0,len(z)): # z[i] = z[i].requires_grad() #if modeltype in ['mcvae', 'cvae', 'vae']: #else: grad = torchgrad(U(z, modeltype=modeltype, cond=cond), z, grad_outputs=grad_outputs)[0] # clip by norm if modeltype in ['mcvae']: max_ = B[0] * model.latent_dim * 100. else: max_ = B * model.latent_dim * 100. grad = torch.clamp(grad, -max_, max_) grad.requires_grad_() return grad
def grad_U(z): # grad w.r.t. outputs; mandatory in this case grad_outputs = torch.ones(B, dtype = batch.dtype, device = batch.device) grad = torchgrad(U(z), z, grad_outputs=grad_outputs)[0] # clip by norm grad = torch.clamp(grad, -B*z_size*100, B*z_size*100) grad.requires_grad = True return grad
def grad_U(z): # grad w.r.t. outputs; mandatory in this case grad_outputs = torch.ones(B).cuda() # torch.autograd.grad default returns volatile grad = torchgrad(U(z), z, grad_outputs=grad_outputs)[0] # clip by norm max_ = B * model.latent_dim * 100. grad = torch.clamp(grad, -max_, max_) grad.requires_grad_() return grad
def grad_U(z): # grad w.r.t. outputs; mandatory in this case grad_outputs = torch.ones(B).type(mdtype) # torch.autograd.grad default returns volatile grad = torchgrad(U(z), z, grad_outputs=grad_outputs)[0] # avoid humongous gradients grad = torch.clamp(grad, -10000, 10000) # needs variable wrapper to make differentiable grad = Variable(grad.data, requires_grad=True) return grad
def grad_U(z): grad_outputs = torch.ones(z.size(0)).type(self.dtype) grad = torchgrad(U(z), z, grad_outputs=grad_outputs, create_graph=True)[0] # gradient clipping avoid numerical issue norm = torch.sqrt(torch.norm(grad, p=2, dim=1)) # neither grad clip methods consistently outperforms the other grad = grad / norm.view(-1, 1) # grad = torch.clamp(grad, -10000, 10000) return grad.detach()
def grad_U(z): # grad w.r.t. outputs; mandatory in this case grad_outputs = torch.ones(B).cuda() # torch.autograd.grad default returns volatile # grad = torchgrad(U(z), z, grad_outputs=grad_outputs)[0] grad = torchgrad(-log_f_i(z, batch, t1), z, grad_outputs=grad_outputs, retain_graph=False, create_graph=False)[0] # clip by norm max_ = B * model_latent_dim * 100. grad = torch.clamp(grad, -max_, max_) # grad.requires_grad_() return grad.detach()
def grad(z): return torchgrad(energy(z),z,grad_outputs=z.new_ones(z.shape[0]))[0]