def _iterate(self): for args, kwargs in self.args: step_m1 = self.step d = self.decay o = self.offset m = self.momentum step1 = step_m1 * m * self.step_rate self.wrt -= step1 gradient = self.fprime(self.wrt, *args, **kwargs) self.gms = (d * self.gms) + (1 - d) * gradient**2 step2 = sqrt(self.sms + o) / sqrt(self.gms + o) * gradient * self.step_rate self.wrt -= step2 self.step = step1 + step2 self.sms = (d * self.sms) + (1 - d) * self.step**2 self.n_iter += 1 yield { 'n_iter': self.n_iter, 'gradient': gradient, 'args': args, 'kwargs': kwargs, }
def _iterate(self): for args, kwargs in self.args: step_m1 = self.step d = self.decay o = self.offset m = self.momentum step1 = step_m1 * m * self.step_rate self.wrt -= step1 gradient = self.fprime(self.wrt, *args, **kwargs) self.gms = (d * self.gms) + (1 - d) * gradient ** 2 step2 = sqrt(self.sms + o) / sqrt(self.gms + o) * gradient * self.step_rate self.wrt -= step2 self.step = step1 + step2 self.sms = (d * self.sms) + (1 - d) * self.step ** 2 self.n_iter += 1 yield { 'n_iter': self.n_iter, 'gradient': gradient, 'args': args, 'kwargs': kwargs, }
def _iterate(self): for args, kwargs in self.args: step_m1 = self.step # We use Nesterov momentum: first, we make a step according to the # momentum and then we calculate the gradient. step1 = step_m1 * self.momentum self.wrt -= step1 gradient = self.fprime(self.wrt, *args, **kwargs) self.moving_mean_squared = (self.decay * self.moving_mean_squared + (1 - self.decay) * gradient**2) step2 = self.step_rate * gradient step2 /= sqrt(self.moving_mean_squared + 1e-8) self.wrt -= step2 step = step1 + step2 # Step rate adaption. If the current step and the momentum agree, # we slightly increase the step rate for that dimension. if self.step_adapt: # This code might look weird, but it makes it work with both # numpy and gnumpy. step_non_negative = step > 0 step_m1_non_negative = step_m1 > 0 agree = (step_non_negative == step_m1_non_negative) * 1. adapt = 1 + agree * self.step_adapt * 2 - self.step_adapt self.step_rate *= adapt self.step_rate = clip(self.step_rate, self.step_rate_min, self.step_rate_max) self.step = step self.n_iter += 1 yield dict(gradient=gradient, args=args, kwargs=kwargs)
def max_length_columns(arr, max_length): """Project the columns of an array below a certain length. Works in place. Parameters ---------- arr : array_like 2D array. max_length : int Maximum length of a column. """ if arr.ndim != 2: raise ValueError('only 2d arrays allowed') max_length = float(max_length) lengths = sqrt((arr ** 2).sum(axis=0)) too_big_by = lengths / max_length divisor = too_big_by non_violated = lengths < max_length if isinstance(arr, np.ndarray): divisor[np.where(non_violated)] = 1. else: # Gnumpy implementation. # TODO: can this be done more efficiently? for i, nv in enumerate(non_violated): if nv: divisor[i] = 1. arr /= divisor[np.newaxis]
def max_length_columns(arr, max_length): """Project the columns of an array below a certain length. Works in place. Parameters ---------- arr : array_like 2D array. max_length : int Maximum length of a column. """ if arr.ndim != 2: raise ValueError('only 2d arrays allowed') max_length = float(max_length) lengths = sqrt((arr**2).sum(axis=0)) too_big_by = lengths / max_length divisor = too_big_by non_violated = lengths < max_length if isinstance(arr, np.ndarray): divisor[np.where(non_violated)] = 1. else: # Gnumpy implementation. # TODO: can this be done more efficiently? for i, nv in enumerate(non_violated): if nv: divisor[i] = 1. arr /= divisor[np.newaxis]
def _iterate(self): for args, kwargs in self.args: step_m1 = self.step # We use Nesterov momentum: first, we make a step according to the # momentum and then we calculate the gradient. step1 = step_m1 * self.momentum self.wrt -= step1 gradient = self.fprime(self.wrt, *args, **kwargs) self.moving_mean_squared = ( self.decay * self.moving_mean_squared + (1 - self.decay) * gradient ** 2) step2 = self.step_rate * gradient step2 /= sqrt(self.moving_mean_squared + 1e-8) self.wrt -= step2 step = step1 + step2 # Step rate adaption. If the current step and the momentum agree, # we slightly increase the step rate for that dimension. if self.step_adapt: # This code might look weird, but it makes it work with both # numpy and gnumpy. step_non_negative = step > 0 step_m1_non_negative = step_m1 > 0 agree = (step_non_negative == step_m1_non_negative) * 1. adapt = 1 + agree * self.step_adapt * 2 - self.step_adapt self.step_rate *= adapt self.step_rate = clip( self.step_rate, self.step_rate_min, self.step_rate_max) self.step = step self.n_iter += 1 yield dict(gradient=gradient, args=args, kwargs=kwargs)
def __iter__(self): self.moving_mean_squared = 1 self.step_m1 = 0 self.step_rate = self._steprate # If we adapt step rates, we need one for each parameter. if self.step_adapt: self.step_rate *= ones_like(self.wrt) for i, (args, kwargs) in enumerate(self.args): # We use Nesterov momentum: first, we make a step according to the # momentum and then we calculate the gradient. step1 = self.step_m1 * self.momentum self.wrt -= step1 gradient = self.fprime(self.wrt, *args, **kwargs) self.moving_mean_squared = ( self.decay * self.moving_mean_squared + (1 - self.decay) * gradient ** 2) step2 = self.step_rate * gradient step2 /= sqrt(self.moving_mean_squared + 1e-8) self.wrt -= step2 step = step1 + step2 # Step rate adaption. If the current step and the momentum agree, # we slightly increase the step rate for that dimension. if self.step_adapt: # This code might look weird, but it makes it work with both # numpy and gnumpy. step_non_negative = step > 0 step_m1_non_negative = self.step_m1 > 0 agree = (step_non_negative == step_m1_non_negative) * 1. adapt = 1 + agree * self.step_adapt * 2 - self.step_adapt self.step_rate *= adapt self.step_rate = clip( self.step_rate, self.step_rate_min, self.step_rate_max) self.step_m1 = step yield { 'n_iter': i, 'gradient': gradient, 'moving_mean_squared': self.moving_mean_squared, 'step': self.step_m1, 'args': args, 'kwargs': kwargs, 'step_rate': self.step_rate }