Ejemplo n.º 1
0
 def choose_action_softmax(self, state, epsilon):
     q_vals = self.q_vals(state)
     tau = epsilon 
     q_vals = q_vals / tau
     exp_q_vals = np.exp(q_vals)
     weights = dict()
     for idx, val in enumerate(exp_q_vals):
         weights[idx] = val
     action = weightedRandomChoice(weights)
     return action
Ejemplo n.º 2
0
 def choose_action_softmax(self, state, epsilon):
     q_vals = self.q_vals(state)
     tau = epsilon
     q_vals = q_vals / tau
     exp_q_vals = np.exp(q_vals)
     weights = dict()
     for idx, val in enumerate(exp_q_vals):
         weights[idx] = val
     action = weightedRandomChoice(weights)
     return action
Ejemplo n.º 3
0
 def choose_action_softmax(self, state, epsilon):
     q_vals, compression_loss = self.q_vals_and_compression_loss(state)
     compression_loss_component = compression_loss / self.max_compression_loss
     tau = epsilon + compression_loss_component
     q_vals = q_vals / tau
     exp_q_vals = np.exp(q_vals)
     weights = dict()
     for idx, val in enumerate(exp_q_vals):
         weights[idx] = val
     action = weightedRandomChoice(weights)
     return action
 def choose_action_softmax(self, state, epsilon):
     q_vals, compression_loss = self.q_vals_and_compression_loss(state)
     compression_loss_component = compression_loss / self.max_compression_loss
     tau = epsilon + compression_loss_component
     q_vals = q_vals / tau
     exp_q_vals = np.exp(q_vals)
     weights = dict()
     for idx, val in enumerate(exp_q_vals):
         weights[idx] = val
     action = weightedRandomChoice(weights)
     return action