コード例 #1
0
ファイル: game.py プロジェクト: ajunwalker/Poker-Net
class Game:
    def __init__(self):

        self.opponent = RandomAgent()
        self.total_turns = 5

    def reset(self):

        # Initialise player boards
        self.player_one = [
            np.zeros((4, 13), dtype=np.int32),
            np.zeros((4, 13), dtype=np.int32),
            np.zeros((4, 13), dtype=np.int32),
        ]

        self.player_two = [
            np.zeros((4, 13), dtype=np.int32),
            np.zeros((4, 13), dtype=np.int32),
            np.zeros((4, 13), dtype=np.int32),
        ]

        self.one_discard_pile = np.zeros((4, 13), dtype=np.int32)
        self.two_discard_pile = np.zeros((4, 13), dtype=np.int32)

        self.players = [self.player_one, self.player_two]
        self.turns = [0, 0]

        self.cards = [(i, j) for i in range(4) for j in range(13)]
        shuffle(self.cards)

        self.start_player = randint(0, 1)

        if self.start_player != 0:

            next_cards = []

            for i in range(5):
                idx = randint(0, len(self.cards) - 1)
                num = self.cards[idx]
                self.cards.pop(idx)
                next_cards.append(num)

            enumerations = enumerate_start_moves(next_cards)
            action = self.opponent.get_action(enumerations)
            self.players[1] = enumerations[action]
            self.turns[1] = 1

        next_cards = []

        for i in range(5):
            idx = randint(0, len(self.cards) - 1)
            num = self.cards[idx]
            self.cards.pop(idx)
            next_cards.append(num)

        self.enumerations = enumerate_start_moves(next_cards)
        available_actions = []

        for enumeration in self.enumerations:
            x_flat = np.array(self.players[0]).flatten()
            y_flat = np.array(enumeration).flatten()
            discard_flat = np.zeros((4, 13)).flatten()
            available_actions.append(
                np.append(np.append(x_flat, y_flat), discard_flat))
        return available_actions

    def step(self, action):

        self.players[0] = self.enumerations[action]
        self.turns[0] += 1

        if self.turns[0] > 1:
            self.one_discard_pile = self.discards[action]

        if self.turns == [5, 5]:
            player_one_score = self.calculate_royalty(0)
            player_two_score = self.calculate_royalty(1)
            res = np.append(
                np.array(self.players[0]).flatten(),
                np.array(self.players[1]).flatten())
            return res, player_one_score - player_two_score, True

        next_cards = []

        for i in range(5):
            idx = randint(0, len(self.cards) - 1)
            num = self.cards[idx]
            self.cards.pop(idx)
            next_cards.append(num)

        if self.turns[1] == 0:
            enumerations = enumerate_start_moves(next_cards)
        else:
            enumerations, discards = enumerate_midgame_moves(
                next_cards, self.players[1], self.two_discard_pile)

        action = self.opponent.get_action(enumerations)
        self.players[1] = enumerations[action]
        self.turns[1] += 1

        if self.turns == [5, 5]:
            player_one_score = self.calculate_royalty(0)
            player_two_score = self.calculate_royalty(1)
            res = np.append(
                np.array(self.players[0]).flatten(),
                np.array(self.players[1]).flatten())
            return res, player_one_score - player_two_score, True

        next_cards = []

        for i in range(3):
            idx = randint(0, len(self.cards) - 1)
            num = self.cards[idx]
            self.cards.pop(idx)
            next_cards.append(num)

        self.enumerations, self.discards = enumerate_midgame_moves(
            next_cards, self.players[0], self.one_discard_pile)

        available_actions = []

        for enumeration, discard in zip(self.enumerations, self.discards):
            x_flat = np.array(self.players[0]).flatten()
            y_flat = np.array(enumeration).flatten()
            discard_flat = discard.flatten()
            available_actions.append(
                np.append(np.append(x_flat, y_flat), discard_flat))
        return available_actions, 0, False

    def play(self, player, row, suit, number):
        self.players[player][row][suit, number] = 1

    def calculate_royalty(self, player):

        row_points = [0, 0, 0]

        # Extract player
        player = self.players[player]

        # Check for pair on top row
        exists, score = has_pair(player[0])
        if exists:
            row_points[0] = score

        # Check for triple on top row
        exists, score = has_triple(player[0])
        if exists:
            row_points[0] = score

        # For middle and bottom rows
        for row in range(1, 3):

            pair, triple, straight, flush = False, False, False, False

            # Check for pair
            exists, _ = has_pair(player[row])
            if exists:
                pair = True

            # Check for triple
            exists, _ = has_triple(player[row])
            if exists:
                triple = True
                row_points[row] = 2 if row == 1 else 0

            # Check for straight
            if has_straight(player[row]):
                straight = True
                row_points[row] = 4 if row == 1 else 2

            # Check for flush
            if has_flush(player[row]):
                flush = True
                row_points[row] = 8 if row == 1 else 4

            # Check for full house
            if pair and triple:
                row_points[row] = 12 if row == 1 else 6

            # Check for quad
            if has_quad(player[row]):
                row_points[row] = 20 if row == 1 else 10

            # Check if straight flush
            if straight and flush:

                # Check if royal flush
                col_sums = str(tuple(np.sum(player[row], axis=0)))
                if col_sums == '(1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1)':
                    row_points[row] = 50 if row == 1 else 25

                else:
                    row_points[row] = 30 if row == 1 else 15

        return sum(row_points)

    def display(self, player):

        suits = {0: 'S', 1: 'H', 2: 'C', 3: 'D'}

        player = self.players[player]

        for row in range(3):
            print(str(row + 1) + ':', end='')
            for num in range(13):
                for suit in range(4):
                    if player[row][suit, num] == 1:
                        print(str(num + 1) + str(suits[suit]), end=' ')
            print()
コード例 #2
0
class Experiment(object):
    """
    Wrapper to run experiments with
    """
    def __init__(self, algorithm, problem, max_episodes=100, max_t=100):
        """
        Initialize experiment with the right algorithm, problem
        """
        self.setup_problem(problem)
        self.setup_algorithm(algorithm)
        self.max_episodes = max_episodes
        self.max_t = max_t

    def setup_problem(self, problem):
        """
        Initialize MDP
        """
        self.problem = problem
        if problem == "treasure_hunt":
            from treasure_hunt import TreasureHunt
            self.mdp = TreasureHunt(n_states=9, grid_shape=(3, 3))
        self.mdp.configure()
        self.actions = self.mdp.actions
        self.grid_shape = self.mdp.grid_shape

    def setup_algorithm(self, algorithm):
        """
        Initialize agent that uses the given algorithm
        """
        if algorithm == "random":
            from random_agent import RandomAgent
            self.agent = RandomAgent(self.actions, self.grid_shape)
        # Add agent here
        else:
            raise ValueError("No algorithm implemented for '"
                             "{}'".format(algorithm))

    def main_loop(self):
        """
        Run the main loop interacting between the agent and the environment.
        """
        total_r = 0
        r_list = []
        for ep in xrange(self.max_episodes):
            new_state = self.mdp.reset()
            for t in xrange(self.max_t):
                state = new_state
                action = self.agent.get_action(state)
                new_state, reward, done, info = self.mdp.step(action)
                self.agent.update(state,
                                  action,
                                  reward,
                                  new_state,
                                  is_done=done)
                total_r += reward
                self.mdp.render()
                if done:
                    break
            r_list.append(total_r)
            total_r = 0
        # Plot the found value functions, if applicable
        self.agent.plot()
        return r_list