def __init__(self, x0, scheduler, beta1=0.9, beta2=0.999, eps=1e-8): self._scheduler = scheduler prox = DiagQuad(1.0 / self._scheduler.stepsize) super().__init__(x0, prox) self._m = ExpMvAvg(np.zeros_like(self._h), beta1) self._v = ExpMvAvg(np.zeros_like(self._h), beta2) self._eps = eps
class LazyOracle(MetaOracle): """Function-based oracle based on moving average.""" def __init__(self, base_oracle, beta): self._base_oracle = copy.deepcopy(base_oracle) self._beta = beta self._f = ExpMvAvg(None, beta) self._g = ExpMvAvg(None, beta) def update(self, *args, **kwargs): self._base_oracle.update(*args, **kwargs) self._f.update(self._base_oracle.compute_loss()) self._g.update(self._base_oracle.compute_grad()) def compute_loss(self): f = self._base_oracle.compute_loss() # if np.isfinite(f): # self._f.replace(f) # return self._f.val # else: return f def compute_grad(self): g = self._base_oracle.compute_grad() # if np.all(np.isfinite(g)): # self._g.replace(g) # return self._g.val # else: return g
class Adam(MirrorDescent): def __init__(self, x0, scheduler, beta1=0.9, beta2=0.999, eps=1e-8): self._scheduler = scheduler prox = DiagQuad(1.0 / self._scheduler.stepsize) super().__init__(x0, prox) self._beta1 = beta1 self._beta2 = beta2 self._m = ExpMvAvg(np.zeros_like(self._h), beta1) self._v = ExpMvAvg(np.zeros_like(self._h), beta2) self._eps = eps def reset(self): self._scheduler.reset() self._breg.update(1.0 / self._scheduler.stepsize) self._m = ExpMvAvg(np.zeros_like(self._h), self._beta1) self._v = ExpMvAvg(np.zeros_like(self._h), self._beta2) def adapt(self, g, w): self._scheduler.update(w) self._v.update(g**2) D = np.sqrt(self._v.val) + self._eps self._breg.update(D / self._scheduler.stepsize) def update(self, g, w): self._m.update(g) # additional moving average super().update(self._m.val, w)
class FTRLAvg(FTRL): def __init__(self, x0, prox, scheduler): super().__init__(x0, prox, scheduler) self._G_avg = ExpMvAvg(1e-8, 0.999) @property def _G(self): return self._G_avg.val def _update_G(self, g): self._G_avg.update(norm(g))
def __init__(self, x0, scheduler, mvp=None, eps=1e-5, cg_iters=20, verbose=False, use_cache=True, beta2=0.999): super().__init__(x0, scheduler, mvp=mvp, eps=eps, cg_iters=cg_iters, verbose=verbose, use_cache=use_cache) self._v = ExpMvAvg(0.0, beta2)
class LazyOracle(MetaOracle): """Function-based oracle based on moving average.""" def __init__(self, base_oracle, beta): self._base_oracle = copy.deepcopy(base_oracle) self._beta = beta self._f = ExpMvAvg(None, beta) self._g = ExpMvAvg(None, beta) def update(self, *args, **kwargs): self._base_oracle.update(*args, **kwargs) self._f.update(self._base_oracle.compute_loss()) self._g.update(self._base_oracle.compute_grad()) def compute_loss(self): f = self._base_oracle.compute_loss() return f def compute_grad(self): g = self._base_oracle.compute_grad() return g
class AdaptiveSecondOrderUpdate(SecondOrderUpdate): # use exponential moving average for adaptive stepsize def __init__(self, x0, scheduler, mvp=None, eps=1e-5, cg_iters=20, verbose=False, use_cache=True, beta2=0.999): super().__init__(x0, scheduler, mvp=mvp, eps=eps, cg_iters=cg_iters, verbose=verbose, use_cache=use_cache) self._v = ExpMvAvg(0.0, beta2) def _compute_stepsize(self, g, w): self._v.update(self.dualnorm2(g)) return self._scheduler.stepsize / (np.sqrt(self._v.val) + 1e-8)
def __init__(self, shape, unscale=False, unbias=False, clip_thre=None, rate=0, momentum=None, eps=1e-6): """ An online normalizer based on whitening. shape: None or an tuple specifying each dimension momentum: None for moving average [0,1) for expoential average 1 for using instant update rate: decides the weight of new observation as itr**rate """ super().__init__(shape, unscale=unscale, unbias=unbias, clip_thre=clip_thre) if momentum is None: self._mvavg_init = lambda: PolMvAvg(np.zeros(self._shape), power=rate) else: assert momentum <= 1.0 and momentum >= 0.0 self._mvavg_init = lambda: ExpMvAvg(np.zeros(self._shape), rate=momentum) self.reset() self._eps = eps
def __init__(self, x0, prox, scheduler): super().__init__(x0, prox, scheduler) self._G_avg = ExpMvAvg(1e-8, 0.999)
def __init__(self, base_oracle, beta): self._base_oracle = copy.deepcopy(base_oracle) self._beta = beta self._f = ExpMvAvg(None, beta) self._g = ExpMvAvg(None, beta)
def reset(self): self._scheduler.reset() self._breg.update(1.0 / self._scheduler.stepsize) self._m = ExpMvAvg(np.zeros_like(self._h), self._beta1) self._v = ExpMvAvg(np.zeros_like(self._h), self._beta2)