def clip_grads(self, grads): # Gradient clipping self.grad_norm = 0.0 for p in grads: self.grad_norm += l2norm(grads[p]) ** 2 self.grad_norm = self.grad_norm ** 0.5 if self.grad_norm > self.max_grad: logger.info('Clipping gradient by %f / %f' % (self.max_grad, self.grad_norm)) return self.alpha * (self.max_grad / self.grad_norm) return self.alpha
def clip_grads(self, grads): # Gradient clipping self.grad_norm = 0.0 for p in grads: self.grad_norm += l2norm(grads[p])**2 self.grad_norm = self.grad_norm**0.5 if self.grad_norm > self.max_grad: logger.info('Clipping gradient by %f / %f' % (self.max_grad, self.grad_norm)) return self.alpha * (self.max_grad / self.grad_norm) return self.alpha
def l2_norm(self, p): return p/l2norm(p, axis=0)
def l2_norm(self, p): return p / l2norm(p, axis=0)