def test_huber_loss():
    a = np.array([1., 1.5, 2., 4.])
    b = np.array([1.5, 1., 4., 2.])
    assert_allclose(K.eval(huber_loss(a, b, 1.)),
                    np.array([.125, .125, 1.5, 1.5]))
    assert_allclose(K.eval(huber_loss(a, b, 3.)),
                    np.array([.125, .125, 2., 2.]))
    assert_allclose(K.eval(huber_loss(a, b, np.inf)),
                    np.array([.125, .125, 2., 2.]))
 def clipped_masked_error(args):
     y_true, y_pred, mask = args
     loss = huber_loss(y_true, y_pred, self.delta_clip)
     loss *= mask  # apply element-wise mask
     return K.sum(loss, axis=-1)
 def clipped_error(y_true, y_pred):
     return K.mean(huber_loss(y_true, y_pred, self.delta_clip), axis=-1)
Exemple #4
0
 def clipped_masked_error(args):
     y_true, y_pred, mask = args
     loss = huber_loss(y_true, y_pred, self.delta_clip)
     loss *= mask  # apply element-wise mask
     return K.sum(loss, axis=-1)
def test_huber_loss():
    a = np.array([1.,  1.5, 2., 4.])
    b = np.array([1.5, 1.,  4., 2.])
    assert_allclose(K.eval(huber_loss(a, b, 1.)), np.array([.125, .125, 1.5, 1.5]))
    assert_allclose(K.eval(huber_loss(a, b, 3.)), np.array([.125, .125, 2., 2.]))
    assert_allclose(K.eval(huber_loss(a, b, np.inf)), np.array([.125, .125, 2., 2.]))
Exemple #6
0
    def compile(self):
        # def clipped_error(y_true, y_pred):
        # return K.mean(huber_loss(y_true, y_pred, self.delta_clip), axis=-1)

        # Compile target networks. We only use them in feed-forward mode, hence we can pass any
        # optimizer and loss since we never use it anyway.
        self.target_actor = clone_model(self.actor, self.custom_model_objects)
        self.target_actor.compile(optimizer='sgd', loss='mse')
        self.target_critic = clone_model(self.critic,
                                         self.custom_model_objects)
        self.target_critic.compile(optimizer='sgd', loss='mse')

        # We also compile the actor. We never optimize the actor using Keras but instead compute
        # the policy gradient ourselves. However, we need the actor in feed-forward mode, hence
        # we also compile it with any optimizer
        self.actor.compile(optimizer='sgd', loss='mse')

        # Compile the critic for the same reason
        self.critic.compile(optimizer='sgd', loss='mse')

        # Compile the critic optimizer
        critic_optimizer = tf.train.AdamOptimizer()
        self.critic_target = tf.placeholder(dtype=tf.float32, shape=(None, 1))
        # Clip the critic gradient using the huber loss
        critic_loss = K.mean(
            huber_loss(
                self.critic([self.state, self.action]), self.critic_target,
                self.delta_clip))
        critic_gradient_vars = critic_optimizer.compute_gradients(
            critic_loss, var_list=self.critic.trainable_weights)

        # Compute the norm as a metric
        critic_gradients_norms = [
            tf.norm(grad_var[0]) for grad_var in critic_gradient_vars
        ]
        critic_gradient_norm = tf.reduce_sum(critic_gradients_norms)

        self.critic_train_fn = critic_optimizer.apply_gradients(
            critic_gradient_vars)

        # Target critic optimizer
        if self.target_critic_update < 1.:
            # Include soft target model updates.
            self.target_critic_train_fn = get_soft_target_model_ops(
                self.target_critic.weights, self.critic.weights,
                self.target_critic_update)

        # Target actor optimizer
        if self.target_actor_update < 1.:
            # Include soft target model updates.
            self.target_actor_train_fn = get_soft_target_model_ops(
                self.target_actor.weights, self.actor.weights,
                self.target_actor_update)

        # Actor optimizer
        actor_optimizer = tf.train.AdamOptimizer()
        # Be careful to negate the gradient
        # Since the optimizer wants to minimize the value
        actor_loss = -tf.reduce_mean(
            self.critic([self.state, self.actor(self.state)]))

        actor_gradient_vars = actor_optimizer.compute_gradients(
            actor_loss, var_list=self.actor.trainable_weights)
        # Gradient inverting
        # as described in https://arxiv.org/abs/1511.04143
        if self.invert_gradients:
            actor_gradient_vars = [(gradient_inverter(
                x[0], self.gradient_inverter_min, self.gradient_inverter_max),
                                    x[1]) for x in actor_gradient_vars]

        # Compute the norm as a metric
        actor_gradients_norms = [
            tf.norm(grad_var[0]) for grad_var in actor_gradient_vars
        ]
        actor_gradient_norm = tf.reduce_sum(actor_gradients_norms)

        # The actual train function
        self.actor_train_fn = actor_optimizer.apply_gradients(
            actor_gradient_vars)

        # Collect metrics
        self.critic_summaries.append(
            tf.summary.scalar("critic/loss", critic_loss))
        self.critic_summaries.append(
            tf.summary.scalar("critic/gradient", critic_gradient_norm))
        for var, norm in zip(self.critic.trainable_weights,
                             critic_gradients_norms):
            self.critic_summaries.append(
                tf.summary.scalar("critic/{}".format(var.name), norm))

        self.actor_summaries.append(
            tf.summary.scalar("actor/loss", -actor_loss))
        self.actor_summaries.append(
            tf.summary.scalar("actor/gradient", actor_gradient_norm))
        for var, norm in zip(self.actor.trainable_weights,
                             actor_gradients_norms):
            self.actor_summaries.append(
                tf.summary.scalar("actor/{}".format(var.name), norm))

        # FIXME: Use directly Keras backend
        # This is a kind of a hack
        # Taken from the "initialize_variables" of the Keras Tensorflow backend
        # https://github.com/fchollet/keras/blob/master/keras/backend/tensorflow_backend.py#L330
        # It permits to only initialize variables that are not already initialized
        # Without that, the networks and target networks get initialized again, to different values (stochastic initialization)
        # This is a problem when a network and it's target network do not begin with the same parameter values...
        variables = tf.global_variables()
        uninitialized_variables = []
        for v in variables:
            if not hasattr(v,
                           '_keras_initialized') or not v._keras_initialized:
                uninitialized_variables.append(v)
                v._keras_initialized = True
        self.session.run(tf.variables_initializer(uninitialized_variables))
        # self.session.run(tf.global_variables_initializer())

        self.merged_summary = tf.summary.merge_all()

        self.compiled = True