def get_updates(self, loss, params): # 更新判据 cond = K.equal(self.iterations % self.grad_accum_steps, 0) # 获取梯度 grads = self.get_gradients(loss, params) self.accum_grads = [ K.zeros(K.int_shape(p), dtype=K.dtype(p), name='accum_grad_%s' % i) for i, p in enumerate(params) ] old_update = K.update def new_update(x, new_x): new_x = K.switch(cond, new_x, x) return old_update(x, new_x) K.update = new_update updates = super(new_optimizer, self).get_updates(loss, params) K.update = old_update # 累积梯度 with tf.control_dependencies(updates): accum_updates = [ K.update(ag, K.switch(cond, g, ag + g)) for g, ag in zip(grads, self.accum_grads) ] return accum_updates
def get_updates(self, loss, params): updates = super(new_optimizer, self).get_updates(loss, params) k, alpha = self.steps_per_slow_update, self.slow_step_size cond = K.equal(self.iterations % k, 0) slow_vars = [ K.zeros(K.int_shape(p), dtype=K.dtype(p), name='slow_var_%s' % i) for i, p in enumerate(params) ] with tf.control_dependencies(updates): slow_updates = [ K.update(q, K.switch(cond, q + alpha * (p - q), q)) for p, q in zip(params, slow_vars) ] with tf.control_dependencies(slow_updates): copy_updates = [ K.update(p, K.switch(cond, q, p)) for p, q in zip(params, slow_vars) ] return copy_updates