def choose_action(self, q_values): self.update_parameters() exp_q_values = np.exp(q_values / (self.tau + 1e-2)) weights = dict() for idx, val in enumerate(exp_q_values): weights[idx] = val action = learning_utils.weightedRandomChoice(weights) return action
def get_action(self, state): self.num_iters += 1 # if self.tau > 1e-9: # self.tau *= .9999 # print self.tau q_values = np.array([self.getQ(state, action) for action in self.actions]) exp_q_values = np.exp(q_values / (self.tau + 1e-2)) weights = dict() for idx, val in enumerate(exp_q_values): weights[idx] = val action_idx = learning_utils.weightedRandomChoice(weights) action = self.actions[action_idx] return action