Esempio n. 1
0
    def get_greedy_action(self, obs):
        """
        Retrieves the best next action for the current observation according to the eval Deep Q-Network prediction.

        :param obs: Current observation (state representation)

        :return: action: AgentAction which should be chosen next by the agent according to the eval Deep Q-Network
        """
        action_values = self.predict(np.array([obs]))[0]
        greedy_action_index = np.argmax(action_values)
        return index_to_agent_action(greedy_action_index)
Esempio n. 2
0
    def get_greedy_action(self, obs):
        """
        Retrieves the best next action for the current observation according to the eval Deep Q-Network prediction.

        :param obs: Current observation (state representation)

        :return: action: AgentAction which should be chosen next by the agent according to the eval Deep Q-Network
        """
        borda_count = self.compute_borda_count([obs])[0]
        action_index = np.argmax(borda_count)
        return index_to_agent_action(action_index)
Esempio n. 3
0
    def get_warm_up_action(self):
        """
        Retrieves the action which should be played by the agent in the warm up phase (based on current turn number).

        :return action : AgentAction which should be chosen next by the agent
        """
        # Agents' request sequence is defined in agent_rule_requests
        if self.turn < len(agent_rule_requests):
            raw_action = agent_rule_requests[self.turn]
        else:
            raw_action = agent_rule_requests[-1]
        feasible_action_index = raw_agent_action_to_index(raw_action)
        action = index_to_agent_action(feasible_action_index)
        return action
Esempio n. 4
0
    def choose_action(self, obs, warm_up=False):
        """
        Determines which action is chosen for the next turn given the current observation.
        Action choice can differ from greedy action policy when:
        - A random action is chosen due to epsilon greedy exploration policy
        - A predefined action sequence is used in the warm up phase

        :param obs : Current observation (state representation)
        :param warm_up : Flag whether the warm up phase is currently used

        :return action : AgentAction which should be chosen next by the agent
        """
        if warm_up:
            action = self.get_warm_up_action()
        elif random.random() < self.epsilon:
            action_index = random.randrange(self.n_actions)
            action = index_to_agent_action(action_index)
        else:
            action = self.get_greedy_action(obs)
        action.round_num = self.turn
        self.turn += 1
        return action
Esempio n. 5
0
 def get_greedy_action(self, obs):
     action_index = random.randrange(self.n_actions)
     return index_to_agent_action(action_index)