class RMSprop(Optimizer): def __init__(self, params, lr=0.001, decay=0.9, eps=1e-8): super(RMSprop, self).__init__(params) self.lr, self.decay, self.eps = Tensor([lr], gpu=self.params[0].gpu, requires_grad=False), decay, eps self.v = [ Tensor(np.zeros(t.shape, dtype=np.float32), gpu=params[0].gpu, requires_grad=False) for t in self.params ] def step(self): for i, t in enumerate(self.params): self.v[i] = self.decay * self.v[i] + (1.0 - self.decay) * t.grad.pow(2.0) t -= self.lr.div(self.v[i].sqrt() + self.eps) * t.grad
class RMSprop(Optimizer): def __init__(self, params, lr=0.001, decay=0.9, eps=1e-8): super(RMSprop, self).__init__(params) self.lr = Tensor([lr], gpu=params[0].gpu) self.decay = Tensor([decay], gpu=params[0].gpu) self.eps = Tensor([eps], gpu=params[0].gpu) self.v = [ Tensor(np.zeros(t.shape, dtype=np.float32), gpu=params[0].gpu) for t in self.params ] self.one = Tensor([1], gpu=self.params[0].gpu) self.two = Tensor([2], gpu=self.params[0].gpu) def step(self): for i, t in enumerate(self.params): self.v[i] = self.decay * self.v[i] + ( self.one - self.decay) * t.grad.pow(self.two) t -= self.lr.div(self.v[i].sqrt() + self.eps) * t.grad