def get_updates(self, params, gparams): if not self._ms: for param in params: self._ms += [K.shared(np.zeros_like(param.get_value()))] self._vs += [K.shared(np.zeros_like(param.get_value()))] update_params = [] update_ms = [] update_vs = [] for i1 in xrange(len(params)): m_new = self._beta1 * self._ms[i1] + (1 - self._beta1) * gparams[i1] v_new = self._beta2 * self._vs[i1] + (1 - self._beta2) * gparams[i1]**2 m_unbias = m_new / (1 - K.power(self._beta1, self._epoch)) v_unbias = v_new / (1 - K.power(self._beta2, self._epoch)) param_new = params[i1] - self._alpha * m_unbias / ( K.sqrt(v_unbias) + self._eps) update_ms += [(self._ms[i1], m_new)] update_vs += [(self._vs[i1], v_new)] update_params += [(params[i1], param_new)] update_epoch = [(self._epoch, self._epoch + 1.)] updates = update_params + update_ms + update_vs + update_epoch return updates
def get_updates(self, params, gparams): self._ms_ = [] self._vs_ = [] for param in params: self._ms_ += [K.shared(np.zeros_like(param.get_value()))] self._vs_ += [K.shared(np.zeros_like(param.get_value()))] updates = [] t = self._iter_ + 1 alpha_t = self._alpha_ * (K.sqrt(1. - K.power(self._beta2_, t)) / (1. - K.power(self._beta1_, t))) for p, g, m, v in zip(params, gparams, self._ms_, self._vs_): m_new = self._beta1_ * m + (1. - self._beta1_) * g updates.append((m, m_new)) v_new = self._beta2_ * v + (1. - self._beta2_) * K.sqr(g) updates.append((v, v_new)) p_new = p - alpha_t * m_new / (K.sqrt(v_new) + self._eps_) updates.append((p, p_new)) updates.append((self._iter_, self._iter_ + 1)) return updates
def __init__(self, lr=1e-3, beta1=0.9, beta2=0.999, eps=1e-8): self._alpha = lr self._beta1 = beta1 self._beta2 = beta2 self._eps = eps self._ms = [] self._vs = [] self._epoch = K.shared(1)
def get_updates(self, params, gparams): self._accumulators_ = [] self._delta_accumulators_ = [] for param in params: self._accumulators_ += [K.shared(np.zeros_like(param.get_value()))] self._delta_accumulators_ += [K.shared(np.zeros_like(param.get_value()))] updates = [] for p, g, a, d_a in zip(params, gparams, self._accumulators_, self._delta_accumulators_): a_new = self._rou_ * a + (1. - self._rou_) * K.sqr(g) updates.append((a, a_new)) p_delta = - g * K.sqrt(d_a + self._eps_) / K.sqrt(a_new + self._eps_) p_new = p + p_delta updates.append((p, p_new)) d_a_new = self._rou_ * d_a + (1. - self._rou_) * K.sqr(p_delta) updates.append((d_a, d_a_new)) return updates
def get_updates(self, params, gparams): if not self._Egs: for param in params: self._Egs += [K.shared(np.zeros_like(param.get_value()))] self._Exs += [K.shared(np.zeros_like(param.get_value()))] update_params = [] update_Egs = [] update_Exs = [] for i1 in xrange(len(params)): Eg_new = self._rou * self._Egs[i1] + (1 - self._rou) * gparams[i1]**2 delta_x = -np.sqrt(self._Exs[i1] + self._eps) / np.sqrt( Eg_new + self._eps) * gparams[i1] Ex_new = self._rou * self._Exs[i1] + (1 - self._rou) * delta_x**2 update_Egs += [(self._Egs[i1], Eg_new)] update_Exs += [(self._Exs[i1], Ex_new)] update_params += [(params[i1], params[i1] + delta_x)] updates = update_params + update_Egs + update_Exs return updates
def get_updates(self, params, gparams): self._accumulators_ = [] for param in params: self._accumulators_.append(K.shared(np.zeros_like(K.get_value(param)))) updates = [] for p, g, a in zip(params, gparams, self._accumulators_): a_new = a + K.sqr(g) p_new = p - self._lr_ * g / (K.sqrt(a_new) + self._eps_) updates.append((a, a_new)) updates.append((p, p_new)) return updates
def get_updates(self, params, gparams): if len(self._vs) == 0: for param in params: self._vs.append(K.shared(np.zeros_like(K.get_value(param)))) update_params = [] update_vs = [] for i1 in xrange(len(params)): v_new = self._rho * self._vs[i1] + self._lr * gparams[i1] update_params.append((params[i1], params[i1] - v_new)) update_vs.append((self._vs[i1], v_new)) updates = update_params + update_vs return updates
def get_updates(self, params, gparams): self._vs_ = [] for param in params: self._vs_.append(K.shared(np.zeros_like(K.get_value(param)))) update_params = [] update_vs = [] for p, g, a in zip(params, gparams, self._vs_): a_new = self._momentum_ * a + self._lr_ * g p_new = p - a_new update_params.append((p, p_new)) update_vs.append((a, a_new)) updates = update_params + update_vs return updates
def get_updates(self, params, gparams): if len(self._Gs) == 0: for param in params: self._Gs.append(K.shared(np.zeros_like(K.get_value(param)))) update_params = [] update_Gs = [] for i1 in xrange(len(params)): G_new = self._Gs[i1] + gparams[i1]**2 update_Gs.append((self._Gs[i1], G_new)) update_params.append( (params[i1], params[i1] - self._lr * gparams[i1] / K.sqrt(G_new + self._eps))) return update_params + update_Gs
def __init__(self, lr=0.001, beta1=0.9, beta2=0.999, eps=1e-8): self._alpha_ = lr self._beta1_ = beta1 self._beta2_ = beta2 self._eps_ = eps self._iter_ = K.shared(0)