def get_action(self): legal_actions = self.Maze.get_legal_dirs(self.position) if utilities.rand_bool(self.epsilon): return utilities.rand_choice(legal_actions) boltz_values = self.softmax([self.qValues[(self.position, a)] for a in legal_actions]) return np.random.choice(legal_actions, p=boltz_values)
def get_action(self): """ Compute epsilon greedy move """ legal_actions = self.Maze.get_legal_dirs(self.position) if utilities.rand_bool(self.epsilon): return utilities.rand_choice(legal_actions) # get mapping from move to value lst = [(self.qValues[(self.position, action)], action) for action in legal_actions] best = max(lst)[0] tiedMoves = [move for val, move in lst if val == best] return utilities.rand_choice(tiedMoves)