def add_loss_op_target(self): action_one_hot = tf.one_hot(self.action, self.n_actions, 1.0, 0.0, name='action_one_hot') train = tf.reduce_sum(self.q_out * action_one_hot, reduction_indices=1, name='q_acted') self.delta = train - self.target_val self.loss = tf.reduce_mean(huber_loss(self.delta)) avg_q = tf.reduce_mean(self.q_out, 0) q_summary = [] for i in range(self.n_actions): q_summary.append(tf.summary.histogram('q/{}'.format(i), avg_q[i])) self.merged_image_sum = tf.summary.merge(self.image_summary, "images") self.avg_q_summary = tf.summary.merge(q_summary, 'q_summary') self.loss_summary = tf.summary.scalar("loss", self.loss)
def add_loss_op_target(self): action_one_hot = tf.one_hot(self.action, self.n_actions, 1.0, 0.0, name='action_one_hot') #Coloca 1 en la posción correspondiente la acción train = tf.reduce_sum(self.q_out * action_one_hot, reduction_indices=1, name='q_acted') #producto punto entre qout y el tensor de 1 y 0 self.delta = train - self.target_val #Se define valores delta self.loss = tf.reduce_mean(huber_loss(self.delta)) #Se calcula función de pérdida avg_q = tf.reduce_mean(self.q_out, 0) #Se calcula la suma de los promedio de q q_summary = [] for i in range(self.n_actions): #Se define objetos summary para ser visualizados en tensorboard q_summary.append(tf.summary.histogram('q/{}'.format(i), avg_q[i])) self.merged_image_sum = tf.summary.merge(self.image_summary, "images") self.avg_q_summary = tf.summary.merge(q_summary, 'q_summary') self.loss_summary = tf.summary.scalar("loss", self.loss)
def add_loss_op_target_tf(self): self.reward = tf.cast(self.reward, dtype=tf.float32) target_best = tf.reduce_max(self.q_target_out, 1) masked = (1.0 - self.terminal) * target_best target = self.reward + self.gamma * masked action_one_hot = tf.one_hot(self.action, self.n_actions, 1.0, 0.0, name='action_one_hot') train = tf.reduce_sum(self.q_out * action_one_hot, reduction_indices=1) delta = target - train self.loss = tf.reduce_mean(huber_loss(delta)) avg_q = tf.reduce_mean(self.q_out, 0) q_summary = [] for i in range(self.n_actions): q_summary.append(tf.summary.histogram('q/{}'.format(i), avg_q[i])) self.avg_q_summary = tf.summary.merge(q_summary, 'q_summary') self.loss_summary = tf.summary.scalar("loss", self.loss)
def add_loss_op_target_tf(self): self.reward = tf.cast(self.reward, dtype=tf.float32) target_best = tf.reduce_max(self.q_target_out, 1) #Calcula los valores de q máximos masked = (1.0 - self.terminal) * target_best #Anula aquellos que corresponden a estados terminales target = self.reward + self.gamma * masked #Calcula targe como las recompenas + el valor q máximo descontado por gamma ##Coloca 1 en la posción correspondiente la acción action_one_hot = tf.one_hot(self.action, self.n_actions, 1.0, 0.0, name='action_one_hot') train = tf.reduce_sum(self.q_out * action_one_hot, reduction_indices=1)#producto punto entre qout y el tensor de 1 y 0 delta = target - train#Se define valores delta self.loss = tf.reduce_mean(huber_loss(delta)) #Se calcula función de pérdida avg_q = tf.reduce_mean(self.q_out, 0)#Se calcula la suma de los promedio de q q_summary = [] for i in range(self.n_actions):#Se define objetos summary para ser visualizados en tensorboard q_summary.append(tf.summary.histogram('q/{}'.format(i), avg_q[i])) self.avg_q_summary = tf.summary.merge(q_summary, 'q_summary') self.loss_summary = tf.summary.scalar("loss", self.loss) self.merged_image_sum = tf.summary.merge(self.image_summary, "images")