Example #1
0
 def _step(self, epoch):
     t = fn.to_tensor(epoch)
     grads = [p.grad for p in self.parameters]
     for p, g, m, v in (zip(self.parameters, grads, self.ms, self.vs)):
         m = self.beta1*m + (1-self.beta1)*g
         v = self.beta2*v + (1-self.beta2)*fn.square(g)
         m_hat = m / (1 - fn.power(self.beta1, t))
         v_hat = v / (1 - fn.power(self.beta2, t))
         p -= (self.lr * (m_hat * self.beta1 + (1 - self.beta2) / (1 - fn.power(self.beta1, t))*g)) / (fn.sqrt(v_hat) + self.epsilon)
Example #2
0
 def _step(self, epoch):
     t = fn.to_tensor(epoch)
     step_size = self.lr * (fn.sqrt(1 - fn.power(self.beta2, t)) /
                            (1 - fn.power(self.beta1, t)))
     lower_bound = self.final_lr * (1.0 - 1.0 / (self.gamma * t + 1))
     upper_bound = self.final_lr * (1.0 + 1.0 / (self.gamma * t))
     grads = [p.grad for p in self.parameters]
     for p, g, m, v in (zip(self.parameters, grads, self.ms, self.vs)):
         m = self.beta1 * m + (1 - self.beta1) * g
         v = self.beta2 * v + (1 - self.beta2) * fn.square(g)
         denom = fn.sqrt(v) + self.epsilon
         p -= m * fn.clip(step_size / denom, lower_bound.item(),
                          upper_bound.item())
Example #3
0
 def _step(self, epoch):
     t = fn.to_tensor(epoch)
     grads = [p.grad for p in self.parameters]
     for p, g, m, v in (zip(self.parameters, grads, self.ms, self.vs)):
         m = self.beta1 * m + (1 - self.beta1) * g
         v = fn.maximum(self.beta2 * v, fn.abs(g))
         lr_t = self.lr / (1 - fn.power(self.beta1, t))
         p -= lr_t * m / (v + self.epsilon)
Example #4
0
 def __rpow__(self, t):
     from beacon.tensor.functions import power
     return power(self._to_tensor(t), self)