def test_huber_loss(): a = np.array([1., 1.5, 2., 4.]) b = np.array([1.5, 1., 4., 2.]) assert_allclose(K.eval(huber_loss(a, b, 1.)), np.array([.125, .125, 1.5, 1.5])) assert_allclose(K.eval(huber_loss(a, b, 3.)), np.array([.125, .125, 2., 2.])) assert_allclose(K.eval(huber_loss(a, b, np.inf)), np.array([.125, .125, 2., 2.]))
def clipped_masked_error(args): y_true, y_pred, mask = args loss = huber_loss(y_true, y_pred, self.delta_clip) loss *= mask # apply element-wise mask return K.sum(loss, axis=-1)
def clipped_error(y_true, y_pred): return K.mean(huber_loss(y_true, y_pred, self.delta_clip), axis=-1)
def compile(self): # def clipped_error(y_true, y_pred): # return K.mean(huber_loss(y_true, y_pred, self.delta_clip), axis=-1) # Compile target networks. We only use them in feed-forward mode, hence we can pass any # optimizer and loss since we never use it anyway. self.target_actor = clone_model(self.actor, self.custom_model_objects) self.target_actor.compile(optimizer='sgd', loss='mse') self.target_critic = clone_model(self.critic, self.custom_model_objects) self.target_critic.compile(optimizer='sgd', loss='mse') # We also compile the actor. We never optimize the actor using Keras but instead compute # the policy gradient ourselves. However, we need the actor in feed-forward mode, hence # we also compile it with any optimizer self.actor.compile(optimizer='sgd', loss='mse') # Compile the critic for the same reason self.critic.compile(optimizer='sgd', loss='mse') # Compile the critic optimizer critic_optimizer = tf.train.AdamOptimizer() self.critic_target = tf.placeholder(dtype=tf.float32, shape=(None, 1)) # Clip the critic gradient using the huber loss critic_loss = K.mean( huber_loss( self.critic([self.state, self.action]), self.critic_target, self.delta_clip)) critic_gradient_vars = critic_optimizer.compute_gradients( critic_loss, var_list=self.critic.trainable_weights) # Compute the norm as a metric critic_gradients_norms = [ tf.norm(grad_var[0]) for grad_var in critic_gradient_vars ] critic_gradient_norm = tf.reduce_sum(critic_gradients_norms) self.critic_train_fn = critic_optimizer.apply_gradients( critic_gradient_vars) # Target critic optimizer if self.target_critic_update < 1.: # Include soft target model updates. self.target_critic_train_fn = get_soft_target_model_ops( self.target_critic.weights, self.critic.weights, self.target_critic_update) # Target actor optimizer if self.target_actor_update < 1.: # Include soft target model updates. self.target_actor_train_fn = get_soft_target_model_ops( self.target_actor.weights, self.actor.weights, self.target_actor_update) # Actor optimizer actor_optimizer = tf.train.AdamOptimizer() # Be careful to negate the gradient # Since the optimizer wants to minimize the value actor_loss = -tf.reduce_mean( self.critic([self.state, self.actor(self.state)])) actor_gradient_vars = actor_optimizer.compute_gradients( actor_loss, var_list=self.actor.trainable_weights) # Gradient inverting # as described in https://arxiv.org/abs/1511.04143 if self.invert_gradients: actor_gradient_vars = [(gradient_inverter( x[0], self.gradient_inverter_min, self.gradient_inverter_max), x[1]) for x in actor_gradient_vars] # Compute the norm as a metric actor_gradients_norms = [ tf.norm(grad_var[0]) for grad_var in actor_gradient_vars ] actor_gradient_norm = tf.reduce_sum(actor_gradients_norms) # The actual train function self.actor_train_fn = actor_optimizer.apply_gradients( actor_gradient_vars) # Collect metrics self.critic_summaries.append( tf.summary.scalar("critic/loss", critic_loss)) self.critic_summaries.append( tf.summary.scalar("critic/gradient", critic_gradient_norm)) for var, norm in zip(self.critic.trainable_weights, critic_gradients_norms): self.critic_summaries.append( tf.summary.scalar("critic/{}".format(var.name), norm)) self.actor_summaries.append( tf.summary.scalar("actor/loss", -actor_loss)) self.actor_summaries.append( tf.summary.scalar("actor/gradient", actor_gradient_norm)) for var, norm in zip(self.actor.trainable_weights, actor_gradients_norms): self.actor_summaries.append( tf.summary.scalar("actor/{}".format(var.name), norm)) # FIXME: Use directly Keras backend # This is a kind of a hack # Taken from the "initialize_variables" of the Keras Tensorflow backend # https://github.com/fchollet/keras/blob/master/keras/backend/tensorflow_backend.py#L330 # It permits to only initialize variables that are not already initialized # Without that, the networks and target networks get initialized again, to different values (stochastic initialization) # This is a problem when a network and it's target network do not begin with the same parameter values... variables = tf.global_variables() uninitialized_variables = [] for v in variables: if not hasattr(v, '_keras_initialized') or not v._keras_initialized: uninitialized_variables.append(v) v._keras_initialized = True self.session.run(tf.variables_initializer(uninitialized_variables)) # self.session.run(tf.global_variables_initializer()) self.merged_summary = tf.summary.merge_all() self.compiled = True