예제 #1
0
    def __call__(self, action=None):
        if action == None:
            self.curr_pos = self.start_pos
            self.episode_steps = 0
            self.start_episode()
            return self.state()
        else:
            self.episode_steps += 1
            assert action in self.actions
            r, c = self.curr_pos
            p = self.correct_action_probability
            N = len(self.actions)
            distr = array([(1 - p) / (N - 1)] * N)
            distr[self.actions.index(action)] = p
            a = utils.weighted_sample(distr)

            dr, dc = self.action_map[self.actions[a]]

            if self.move_okay(r + dr, c + dc):
                r, c = self.curr_pos = (r + dr, c + dc)

        if (r, c) == self.goal_pos:
            self.verbose("!!! GOAL !!!")
            return rl.TERMINAL_STATE, self.goal_reward
        elif self.timeout and self.episode_steps > self.timeout:
            return rl.TERMINAL_STATE, self.step_reward
        else:
            return self.state(), self.step_reward
예제 #2
0
    def policy(self,sensation):
        """
        Given a sensation, return an action.  Uses
        self.action_selection to get a distribution over the agent's
        actions.  Uses self.applicable_actions to prevent selecting
        inapplicable actions.

        Returns 0 if is_terminal(sensation).
        """
        if not is_terminal(sensation):
            actions = self.applicable_actions(sensation)
            return actions[weighted_sample(self.policy_fn(sensation,actions))]
        else:
            # In the terminal state, the action is irrelevant
            return 0
    def policy(self, sensation):
        """
        Given a sensation, return an action.  Uses
        self.action_selection to get a distribution over the agent's
        actions.  Uses self.applicable_actions to prevent selecting
        inapplicable actions.

        Returns 0 if is_terminal(sensation).
        """
        if not is_terminal(sensation):
            actions = self.applicable_actions(sensation)
            return actions[weighted_sample(self.policy_fn(sensation, actions))]
        else:
            # In the terminal state, the action is irrelevant
            return 0