def choose_action(self): """Choose action by taking max over action cells. """ if len(self.striatum_activation) != len(self.env.actions): print('something is wrong') action_idx = utils.random_argmax(self.striatum_activation) value = self.striatum_activation[action_idx] return self.env.actions[action_idx], value * 120
def e_greedy_selection(self): """Epsilon greedy action selection: choose greedy action or random with probability epsilon. :return (int): Index of chosen action """ if np.random.rand() < self.epsilon: chosen_action = np.random.choice(range(self.actions.__len__())) else: chosen_action = utils.random_argmax(self.striatum_activation) return chosen_action
def choose_action(self): available_actions = self.get_available_actions() action_goodness = [] for act in available_actions: next_x, next_y = self.env.compute_new_position(act) goal_cell_rate_diff = self.get_value(next_x, next_y) action_goodness.append(goal_cell_rate_diff) action_idx = utils.random_argmax(action_goodness) allocentric_action = available_actions[action_idx] # Back to egocentric reference frame egocentric_action = get_relative_angle(allocentric_action, self.env.curr_orientation) return egocentric_action, action_goodness[action_idx]
def choose_action(self): available_actions = self.get_available_actions() action_goodness = [] for act in available_actions: next_x, next_y = self.env.compute_new_position(act) goal_cell_rate_diff = self.get_goal_cell_rate( next_x, next_y) - self.goal_cell_rate action_goodness.append(goal_cell_rate_diff) #if np.all(np.array(action_goodness) < 0): # top reached but not goal # self.weights *= 0 action_idx = utils.random_argmax(action_goodness) allocentric_action = available_actions[action_idx] # Back to egocentric reference frame egocentric_action = get_relative_angle(allocentric_action, self.env.curr_orientation) return egocentric_action, action_goodness[action_idx]
def choose_action(self): """Choose action by taking max over action cells. """ action_idx = utils.random_argmax(self.striatum_activation) value = self.striatum_activation[action_idx] return self.env.actions[action_idx], value