def loss(y_true, y_pred): # scale predictions so that the class probas of each sample sum to 1 y_pred /= K.sum(y_pred, axis=-1, keepdims=True) # clip to prevent NaN's and Inf's y_pred = K.clip(y_pred, K.epsilon(), 1 - K.epsilon()) # calc loss = y_true * K.log(y_pred) * weights loss = -K.sum(loss, -1) return loss
def train_critic(self, batches): batches = np.array(batches).transpose() imgs = np.vstack(batches[0]) speeds = np.vstack(batches[1]) actions = np.vstack(batches[2]) rewards = np.vstack(batches[3]) next_imgs = np.vstack(batches[4]) next_speeds = np.vstack(batches[5]) dones = np.vstack(batches[6].astype(int)) speeds = np.reshape(speeds, (-1, 1)) next_speeds = np.reshape(next_speeds, (-1, 1)) noise = np.clip( np.random.randn(2) * self.policy_noise, -self.noise_clip, self.noise_clip) target_actions = self.actor_target([next_imgs, next_speeds]) + noise target_actions = K.clip(target_actions, [-0.8, 0], [0.8, 1]) target_q1 = self.critic_target1.predict( [next_imgs, next_speeds, target_actions], steps=1) target_q2 = self.critic_target2.predict( [next_imgs, next_speeds, target_actions], steps=1) target_q = K.minimum(target_q1, target_q2) rewards += self.gamma * target_q * (1 - dones) q1 = self.critic1([imgs, speeds, actions]) q2 = self.critic2([imgs, speeds, actions]) with tf.name_scope('critic_loss'): loss1 = tf.reduce_mean( tf.keras.losses.mean_squared_error(rewards, q1)) loss2 = tf.reduce_mean( tf.keras.losses.mean_squared_error(rewards, q2)) loss = loss1 + loss2 closs_scalar = tf.compat.v1.summary.scalar('critic_loss', loss) grads = tf.gradients( loss, self.critic1.trainable_weights + self.critic2.trainable_weights) self.critic1.optimizer.apply_gradients( zip( grads, self.critic1.trainable_weights + self.critic2.trainable_weights))
def loss(y_true, y_pred): PPO_LOSS_CLIPPING = 0.2 PPO_ENTROPY_LOSS = 5 * 1e-3 # Does not converge without entropy penalty log_pdf_new = get_log_probability_density(y_pred, y_true) log_pdf_old = get_log_probability_density(old_prediction, y_true) ratio = K.exp(log_pdf_new - log_pdf_old) surrogate1 = ratio * advantage clip_ratio = K.clip(ratio, min_value=(1 - PPO_LOSS_CLIPPING), max_value=(1 + PPO_LOSS_CLIPPING)) surrogate2 = clip_ratio * advantage loss_actor = -K.mean(K.minimum(surrogate1, surrogate2)) sigma = y_pred[:, 2:] variance = K.square(sigma) loss_entropy = PPO_ENTROPY_LOSS * K.mean( -(K.log(2 * np.pi * variance) + 1) / 2) return loss_actor + loss_entropy
def __call__(self, w): return K.clip(w, self.min_val, self.max_val)
def recall_m(y_true, y_pred): true_positives = K.sum(K.round(K.clip(y_true * y_pred, 0, 1))) possible_positives = K.sum(K.round(K.clip(y_true, 0, 1))) recall = true_positives / (possible_positives + K.epsilon()) return recall
def precision_m(y_true, y_pred): true_positives = K.sum(K.round(K.clip(y_true * y_pred, 0, 1))) predicted_positives = K.sum(K.round(K.clip(y_pred, 0, 1))) precision = true_positives / (predicted_positives + K.epsilon()) return precision
def preprocess(x): x = (x + 0.8) / 7.0 x = K.clip(x, -5, 5) return x