Ejemplo n.º 1
0
        def get_updates(self, loss, params):
            # 更新判据
            cond = K.equal(self.iterations % self.grad_accum_steps, 0)
            # 获取梯度
            grads = self.get_gradients(loss, params)
            self.accum_grads = [
                K.zeros(K.int_shape(p),
                        dtype=K.dtype(p),
                        name='accum_grad_%s' % i) for i, p in enumerate(params)
            ]

            old_update = K.update

            def new_update(x, new_x):
                new_x = K.switch(cond, new_x, x)
                return old_update(x, new_x)

            K.update = new_update
            updates = super(new_optimizer, self).get_updates(loss, params)
            K.update = old_update

            # 累积梯度
            with tf.control_dependencies(updates):
                accum_updates = [
                    K.update(ag, K.switch(cond, g, ag + g))
                    for g, ag in zip(grads, self.accum_grads)
                ]

            return accum_updates
Ejemplo n.º 2
0
 def set_model(self, model):
     """
     绑定模型,并初始化参数
     """
     super(ExponentialMovingAverage, self).set_model(model)
     self.ema_weights = [K.zeros(K.shape(w)) for w in model.weights]
     self.old_weights = K.batch_get_value(model.weights)
     K.batch_set_value(zip(self.ema_weights, self.old_weights))
     self.updates = []
     for w1, w2 in zip(self.ema_weights, model.weights):
         op = K.moving_average_update(w1, w2, self.momentum)
         self.updates.append(op)
Ejemplo n.º 3
0
        def get_updates(self, loss, params):
            updates = super(new_optimizer, self).get_updates(loss, params)

            k, alpha = self.steps_per_slow_update, self.slow_step_size
            cond = K.equal(self.iterations % k, 0)
            slow_vars = [
                K.zeros(K.int_shape(p),
                        dtype=K.dtype(p),
                        name='slow_var_%s' % i) for i, p in enumerate(params)
            ]

            with tf.control_dependencies(updates):
                slow_updates = [
                    K.update(q, K.switch(cond, q + alpha * (p - q), q))
                    for p, q in zip(params, slow_vars)
                ]
                with tf.control_dependencies(slow_updates):
                    copy_updates = [
                        K.update(p, K.switch(cond, q, p))
                        for p, q in zip(params, slow_vars)
                    ]

            return copy_updates