Exemple #1
0
    def add_loss_op_target(self):
        action_one_hot = tf.one_hot(self.action, self.n_actions, 1.0, 0.0, name='action_one_hot')
        train = tf.reduce_sum(self.q_out * action_one_hot, reduction_indices=1, name='q_acted')
        self.delta = train - self.target_val
        self.loss = tf.reduce_mean(huber_loss(self.delta))

        avg_q = tf.reduce_mean(self.q_out, 0)
        q_summary = []
        for i in range(self.n_actions):
            q_summary.append(tf.summary.histogram('q/{}'.format(i), avg_q[i]))
        self.merged_image_sum = tf.summary.merge(self.image_summary, "images")
        self.avg_q_summary = tf.summary.merge(q_summary, 'q_summary')
        self.loss_summary = tf.summary.scalar("loss", self.loss)
Exemple #2
0
    def add_loss_op_target(self):
        action_one_hot = tf.one_hot(self.action, self.n_actions, 1.0, 0.0, name='action_one_hot') #Coloca 1 en la posción correspondiente la acción
        train = tf.reduce_sum(self.q_out * action_one_hot, reduction_indices=1, name='q_acted') #producto punto entre qout y el tensor de 1 y 0
        self.delta = train - self.target_val  #Se define valores delta
        self.loss = tf.reduce_mean(huber_loss(self.delta)) #Se calcula función de pérdida

        avg_q = tf.reduce_mean(self.q_out, 0) #Se calcula la suma de los promedio de q 
        q_summary = []
        for i in range(self.n_actions): #Se define objetos summary para ser visualizados en tensorboard
            q_summary.append(tf.summary.histogram('q/{}'.format(i), avg_q[i]))
        self.merged_image_sum = tf.summary.merge(self.image_summary, "images")
        self.avg_q_summary = tf.summary.merge(q_summary, 'q_summary')
        self.loss_summary = tf.summary.scalar("loss", self.loss)
Exemple #3
0
    def add_loss_op_target_tf(self):
        self.reward = tf.cast(self.reward, dtype=tf.float32)
        target_best = tf.reduce_max(self.q_target_out, 1)
        masked = (1.0 - self.terminal) * target_best
        target = self.reward + self.gamma * masked

        action_one_hot = tf.one_hot(self.action, self.n_actions, 1.0, 0.0, name='action_one_hot')
        train = tf.reduce_sum(self.q_out * action_one_hot, reduction_indices=1)
        delta = target - train
        self.loss = tf.reduce_mean(huber_loss(delta))
        avg_q = tf.reduce_mean(self.q_out, 0)
        q_summary = []
        for i in range(self.n_actions):
            q_summary.append(tf.summary.histogram('q/{}'.format(i), avg_q[i]))
        self.avg_q_summary = tf.summary.merge(q_summary, 'q_summary')
        self.loss_summary = tf.summary.scalar("loss", self.loss)
Exemple #4
0
 def add_loss_op_target_tf(self):
     self.reward = tf.cast(self.reward, dtype=tf.float32)
     target_best = tf.reduce_max(self.q_target_out, 1) #Calcula los valores de q máximos 
     masked = (1.0 - self.terminal) * target_best #Anula aquellos que corresponden a estados terminales
     target = self.reward + self.gamma * masked #Calcula targe como las recompenas + el valor q máximo descontado por gamma
     ##Coloca 1 en la posción correspondiente la acción
     action_one_hot = tf.one_hot(self.action, self.n_actions, 1.0, 0.0, name='action_one_hot') 
     train = tf.reduce_sum(self.q_out * action_one_hot, reduction_indices=1)#producto punto entre qout y el tensor de 1 y 0
     delta = target - train#Se define valores delta
     self.loss = tf.reduce_mean(huber_loss(delta)) #Se calcula función de pérdida
     avg_q = tf.reduce_mean(self.q_out, 0)#Se calcula la suma de los promedio de q 
     q_summary = []
     for i in range(self.n_actions):#Se define objetos summary para ser visualizados en tensorboard
         q_summary.append(tf.summary.histogram('q/{}'.format(i), avg_q[i]))
     self.avg_q_summary = tf.summary.merge(q_summary, 'q_summary')
     self.loss_summary = tf.summary.scalar("loss", self.loss)
     self.merged_image_sum = tf.summary.merge(self.image_summary, "images")