def __iter__(self): gradient_m1 = ma.zero_like(self.wrt) changes = ma.random_like(self.wrt) * self.changes_max for i, (args, kwargs) in enumerate(self.args): gradient = self.fprime(self.wrt, *args, **kwargs) changes_min = changes * self.step_grow changes_max = changes * self.step_shrink gradprod = gradient_m1 * gradient changes_min *= gradprod > 0 changes_max *= gradprod < 0 changes *= gradprod == 0 # TODO actually, this should be done to changes changes_min = ma.clip(changes_min, self.min_step, self.max_step) changes_max = ma.clip(changes_max, self.min_step, self.max_step) changes += changes_min + changes_max step = -changes * ma.sign(gradient) self.wrt += step gradient_m1 = gradient yield { 'n_iter': i, 'args': args, 'kwargs': kwargs, 'gradient': gradient, 'gradient_m1': gradient_m1, 'step': step, }
def __iter__(self): grad_m1 = ma.zeros(self.wrt.shape) changes = ma.random.random(self.wrt.shape) * self.changes_max for i, (args, kwargs) in enumerate(self.args): grad = self.fprime(self.wrt, *args, **kwargs) changes_min = changes * self.step_grow changes_max = changes * self.step_shrink gradprod = grad_m1 * grad changes_min *= gradprod > 0 changes_max *= gradprod < 0 changes *= gradprod == 0 # TODO actually, this should be done to changes changes_min = ma.clip(changes_min, self.min_step, self.max_step) changes_max = ma.clip(changes_max, self.min_step, self.max_step) changes += changes_min + changes_max step = -changes * ma.sign(grad) self.wrt += step grad_m1 = grad yield dict(args=args, kwargs=kwargs, gradient=grad, gradient_m1=grad_m1, n_iter=i, step=step)
def __iter__(self): gradient_m1 = ma.zero_like(self.wrt) changes = ma.ones_like(self.wrt) * self.changes_init for i, (args, kwargs) in enumerate(self.args): gradient = self.fprime(self.wrt, *args, **kwargs) changes_min = changes * self.step_grow changes_max = changes * self.step_shrink gradprod = gradient_m1 * gradient changes_min *= gradprod > 0 changes_max *= gradprod < 0 changes *= gradprod == 0 # TODO actually, this should be done to changes changes_min = ma.clip(changes_min, self.min_step, self.max_step) changes_max = ma.clip(changes_max, self.min_step, self.max_step) changes += changes_min + changes_max step = -changes * ma.sign(gradient) self.wrt += step gradient_m1 = gradient yield { 'n_iter': i, 'args': args, 'kwargs': kwargs, 'gradient': gradient, 'gradient_m1': gradient_m1, 'step': step, }
def _iterate(self): for args, kwargs in self.args: step_m1 = self.step # We use Nesterov momentum: first, we make a step according to the # momentum and then we calculate the gradient. step1 = step_m1 * self.momentum self.wrt -= step1 gradient = self.fprime(self.wrt, *args, **kwargs) self.moving_mean_squared = (self.decay * self.moving_mean_squared + (1 - self.decay) * gradient**2) step2 = self.step_rate * gradient step2 /= sqrt(self.moving_mean_squared + 1e-8) self.wrt -= step2 step = step1 + step2 # Step rate adaption. If the current step and the momentum agree, # we slightly increase the step rate for that dimension. if self.step_adapt: # This code might look weird, but it makes it work with both # numpy and gnumpy. step_non_negative = step > 0 step_m1_non_negative = step_m1 > 0 agree = (step_non_negative == step_m1_non_negative) * 1. adapt = 1 + agree * self.step_adapt * 2 - self.step_adapt self.step_rate *= adapt self.step_rate = clip(self.step_rate, self.step_rate_min, self.step_rate_max) self.step = step self.n_iter += 1 yield dict(gradient=gradient, args=args, kwargs=kwargs)
def _iterate(self): for args, kwargs in self.args: step_m1 = self.step # We use Nesterov momentum: first, we make a step according to the # momentum and then we calculate the gradient. step1 = step_m1 * self.momentum self.wrt -= step1 gradient = self.fprime(self.wrt, *args, **kwargs) self.moving_mean_squared = ( self.decay * self.moving_mean_squared + (1 - self.decay) * gradient ** 2) step2 = self.step_rate * gradient step2 /= sqrt(self.moving_mean_squared + 1e-8) self.wrt -= step2 step = step1 + step2 # Step rate adaption. If the current step and the momentum agree, # we slightly increase the step rate for that dimension. if self.step_adapt: # This code might look weird, but it makes it work with both # numpy and gnumpy. step_non_negative = step > 0 step_m1_non_negative = step_m1 > 0 agree = (step_non_negative == step_m1_non_negative) * 1. adapt = 1 + agree * self.step_adapt * 2 - self.step_adapt self.step_rate *= adapt self.step_rate = clip( self.step_rate, self.step_rate_min, self.step_rate_max) self.step = step self.n_iter += 1 yield dict(gradient=gradient, args=args, kwargs=kwargs)
def __iter__(self): self.moving_mean_squared = 1 self.step_m1 = 0 self.step_rate = self._steprate # If we adapt step rates, we need one for each parameter. if self.step_adapt: self.step_rate *= ones_like(self.wrt) for i, (args, kwargs) in enumerate(self.args): # We use Nesterov momentum: first, we make a step according to the # momentum and then we calculate the gradient. step1 = self.step_m1 * self.momentum self.wrt -= step1 gradient = self.fprime(self.wrt, *args, **kwargs) self.moving_mean_squared = ( self.decay * self.moving_mean_squared + (1 - self.decay) * gradient ** 2) step2 = self.step_rate * gradient step2 /= sqrt(self.moving_mean_squared + 1e-8) self.wrt -= step2 step = step1 + step2 # Step rate adaption. If the current step and the momentum agree, # we slightly increase the step rate for that dimension. if self.step_adapt: # This code might look weird, but it makes it work with both # numpy and gnumpy. step_non_negative = step > 0 step_m1_non_negative = self.step_m1 > 0 agree = (step_non_negative == step_m1_non_negative) * 1. adapt = 1 + agree * self.step_adapt * 2 - self.step_adapt self.step_rate *= adapt self.step_rate = clip( self.step_rate, self.step_rate_min, self.step_rate_max) self.step_m1 = step yield { 'n_iter': i, 'gradient': gradient, 'moving_mean_squared': self.moving_mean_squared, 'step': self.step_m1, 'args': args, 'kwargs': kwargs, 'step_rate': self.step_rate }
def _iterate(self): for args, kwargs in self.args: gradient_m1 = self.gradient self.gradient = self.fprime(self.wrt, *args, **kwargs) changes_min = self.changes * self.step_grow changes_max = self.changes * self.step_shrink gradprod = gradient_m1 * self.gradient changes_min *= gradprod > 0 changes_max *= gradprod < 0 self.changes *= gradprod == 0 # TODO actually, this should be done to changes changes_min = ma.clip(changes_min, self.min_step, self.max_step) changes_max = ma.clip(changes_max, self.min_step, self.max_step) self.changes += changes_min + changes_max step = -self.changes * ma.sign(self.gradient) self.wrt += step yield { 'args': args, 'kwargs': kwargs, 'step': step, }
def _iterate(self): for args, kwargs in self.args: gradient_m1 = self.gradient self.gradient = self.fprime(self.wrt, *args, **kwargs) gradprod = gradient_m1 * self.gradient self.changes[gradprod > 0] *= self.step_grow self.changes[gradprod < 0] *= self.step_shrink self.changes = ma.clip(self.changes, self.min_step, self.max_step) step = -self.changes * ma.sign(self.gradient) self.wrt += step self.n_iter += 1 yield { 'args': args, 'kwargs': kwargs, 'step': step, }