Example #1
0
    def future_rewards(self, state):
        """
        Calculates the maximum reward of state

        Args:
            state (nested list (3 x 3)): a Tictactoe board

        Returns:
            float: maximum reward
        """
        # Get all the available ations
        actions = Tictactoe.available_actions(state)

        # Return 0 if not actions available
        if len(actions) == 0:
            return 0

        # Choose the max reward according to the Q table
        max_reward = self.get_q_value(state, actions[0])

        for action in actions:
            value = self.get_q_value(state, action)

            if value > max_reward:
                max_reward = value

        return max_reward
Example #2
0
    def best_action(self, state, epsilon_true=False):
        """
        Chooses the best action according to the state

        Args:
            state (nested list (3 x 3)): a Tictactoe board

        Raises:
            Exception: If no action is available

        Returns:
            tuple (i, j): best action
        """
        # Get all the available ations
        actions = Tictactoe.available_actions(state)

        # Raise if no action available
        if len(actions) == 0:
            raise Exception('No action available')

        # Choose the best action according to its Q value
        if (not epsilon_true) or (epsilon_true and random.random() <
                                  (1 - self.epsilon)):

            best_action = actions[0]
            max_reward = self.get_q_value(state, best_action)

            for action in actions:
                value = self.get_q_value(state, action)

                if value > max_reward or (value == max_reward
                                          and random.random() < 0.5):
                    best_action = action
                    max_reward = value

        # Randomly choose the best action
        else:
            best_action = random.choice(actions)

        return best_action