コード例 #1
0
 def choose_action(self):
     """Choose action by taking max over action cells.
     """
     if len(self.striatum_activation) != len(self.env.actions):
         print('something is wrong')
     action_idx = utils.random_argmax(self.striatum_activation)
     value = self.striatum_activation[action_idx]
     return self.env.actions[action_idx], value * 120
コード例 #2
0
    def e_greedy_selection(self):
        """Epsilon greedy action selection: choose greedy action or random with probability epsilon.

        :return (int): Index of chosen action
        """
        if np.random.rand() < self.epsilon:
            chosen_action = np.random.choice(range(self.actions.__len__()))
        else:
            chosen_action = utils.random_argmax(self.striatum_activation)
        return chosen_action
コード例 #3
0
    def choose_action(self):
        available_actions = self.get_available_actions()

        action_goodness = []
        for act in available_actions:
            next_x, next_y = self.env.compute_new_position(act)
            goal_cell_rate_diff = self.get_value(next_x, next_y)
            action_goodness.append(goal_cell_rate_diff)

        action_idx = utils.random_argmax(action_goodness)

        allocentric_action = available_actions[action_idx]
        # Back to egocentric reference frame
        egocentric_action = get_relative_angle(allocentric_action,
                                               self.env.curr_orientation)
        return egocentric_action, action_goodness[action_idx]
コード例 #4
0
    def choose_action(self):
        available_actions = self.get_available_actions()

        action_goodness = []
        for act in available_actions:
            next_x, next_y = self.env.compute_new_position(act)
            goal_cell_rate_diff = self.get_goal_cell_rate(
                next_x, next_y) - self.goal_cell_rate
            action_goodness.append(goal_cell_rate_diff)

        #if np.all(np.array(action_goodness) < 0):  # top reached but not goal
        #    self.weights *= 0

        action_idx = utils.random_argmax(action_goodness)

        allocentric_action = available_actions[action_idx]
        # Back to egocentric reference frame
        egocentric_action = get_relative_angle(allocentric_action,
                                               self.env.curr_orientation)
        return egocentric_action, action_goodness[action_idx]
コード例 #5
0
ファイル: striatum.py プロジェクト: treestreamymw/HBPcollab
 def choose_action(self):
     """Choose action by taking max over action cells.
     """
     action_idx = utils.random_argmax(self.striatum_activation)
     value = self.striatum_activation[action_idx]
     return self.env.actions[action_idx], value