def __init__(self, params, lr=0.001, beta_1=0.9, beta_2=0.999, lda = 1-1e-8, epsilon=1e-8, *args, **kwargs): super(Adam, self).__init__(**kwargs) self.__dict__.update(locals()) self.iterations = K.variable(0) self.lr = K.variable(lr) self.beta_1 = K.variable(beta_1) self.beta_2 = K.variable(beta_2) self.lda = K.variable(lda) self.epsilon = K.variable(epsilon) self.m = [] self.v = [] for par in params: m = K.variable(np.zeros(K.get_value(par).shape)) v = K.variable(np.zeros(K.get_value(par).shape)) self.m += [m] self.v += [v]
def __init__(self, params, lr=0.001, momentum=0.9, decay=0.9, nesterov=False, *args, **kwargs): super(SGD, self).__init__(**kwargs) self.__dict__.update(locals()) self.iterations = K.variable(0.) self.lr = K.variable(lr) self.momentum = K.variable(momentum) self.decay = K.variable(decay) self.lr_decay_after = K.variable(10000.) self.m = [] for par in params: m = K.variable(np.zeros(K.get_value(par).shape)) self.m.append(m)
def get_updates(self, params, gparams): self._accumulators_ = [] for param in params: self._accumulators_.append(K.shared(np.zeros_like(K.get_value(param)))) updates = [] for p, g, a in zip(params, gparams, self._accumulators_): a_new = a + K.sqr(g) p_new = p - self._lr_ * g / (K.sqrt(a_new) + self._eps_) updates.append((a, a_new)) updates.append((p, p_new)) return updates
def __init__(self, params, lr=0.001, beta_1=0.9, beta_2=0.999, lda=1 - 1e-8, epsilon=1e-8, *args, **kwargs): super(Adam, self).__init__(**kwargs) self.__dict__.update(locals()) self.iterations = K.variable(0) self.lr = K.variable(lr) self.beta_1 = K.variable(beta_1) self.beta_2 = K.variable(beta_2) self.lda = K.variable(lda) self.epsilon = K.variable(epsilon) self.m = [] self.v = [] for par in params: m = K.variable(np.zeros(K.get_value(par).shape)) v = K.variable(np.zeros(K.get_value(par).shape)) self.m += [m] self.v += [v]
def get_updates(self, params, gparams): if len(self._vs) == 0: for param in params: self._vs.append(K.shared(np.zeros_like(K.get_value(param)))) update_params = [] update_vs = [] for i1 in xrange(len(params)): v_new = self._rho * self._vs[i1] + self._lr * gparams[i1] update_params.append((params[i1], params[i1] - v_new)) update_vs.append((self._vs[i1], v_new)) updates = update_params + update_vs return updates
def get_updates(self, params, gparams): self._vs_ = [] for param in params: self._vs_.append(K.shared(np.zeros_like(K.get_value(param)))) update_params = [] update_vs = [] for p, g, a in zip(params, gparams, self._vs_): a_new = self._momentum_ * a + self._lr_ * g p_new = p - a_new update_params.append((p, p_new)) update_vs.append((a, a_new)) updates = update_params + update_vs return updates
def get_updates(self, params, gparams): if len(self._Gs) == 0: for param in params: self._Gs.append(K.shared(np.zeros_like(K.get_value(param)))) update_params = [] update_Gs = [] for i1 in xrange(len(params)): G_new = self._Gs[i1] + gparams[i1]**2 update_Gs.append((self._Gs[i1], G_new)) update_params.append( (params[i1], params[i1] - self._lr * gparams[i1] / K.sqrt(G_new + self._eps))) return update_params + update_Gs
def _num_of_params(self, layer): n_params = 0 for param in layer.params_: n_params += np.prod(K.get_value(param).shape) return n_params
def _n_layer_params(self, layer): n_params = 0 for param in layer.params_: n_params += np.prod(K.get_value(param).shape) return n_params