Python OFCBoard Exemples, rlofc.ofc_board.OFCBoard Python Exemples

Exemple #1

0

Afficher le fichier

def test_ofc_random_policy_can_complete():
    agent = OFCRandomAgent()

    deck = DeckGenerator.new_deck()
    board = OFCBoard()

    for i in xrange(13):
        draw = deck.pop()
        street_id = agent.place_new_card(draw, board)
        board.place_card_by_id(draw, street_id)

    # board.pretty()
    assert board.is_complete()

Exemple #2

0

Afficher le fichier

Fichier : test_ofc_board.py Projet : DexGroves/rl-ofc

def test_available_streets():
    board = OFCBoard()

    board.front = OFCHand(['6s', '6d', '5s'])
    board.mid = OFCHand(['6d', '6c', '4s', '2d', '3d'])
    board.back = OFCHand(['Ah', '2h', '3h', '4h', '5h'])

    assert board.get_free_streets() == [0, 0, 0]

    board.front = OFCHand(['6s', '6d'])
    board.mid = OFCHand(['6d', '6c', '2d', '3d'])
    board.back = OFCHand(['Ah', '2h', '3h', '4h', '5h'])

    assert board.get_free_streets() == [1, 1, 0]

Exemple #3

0

Afficher le fichier

Fichier : test_ofc_board.py Projet : DexGroves/rl-ofc

def test_is_foul():
    board = OFCBoard()

    board.front = OFCHand(['6s', '6d', '5s'])
    board.mid = OFCHand(['6d', '6c', '4s', '2d', '3d'])
    board.back = OFCHand(['Ah', '2h', '3h', '4h', '5h'])

    assert board.is_foul()

    board.front = OFCHand(['6s', '6d', '5s'])
    board.mid = OFCHand(['6d', '6c', '9s', '2d', '3d'])
    board.back = OFCHand(['Ah', '2h', '3h', '4h', '5h'])

    assert not board.is_foul()

Exemple #4

0

Afficher le fichier

Fichier : ofc_environment.py Projet : DexGroves/rl-ofc

    def reset(self):
        self.plyr_board = OFCBoard()
        self.oppo_board = OFCBoard()

        self.game_over = False
        self.reward = 0

        self.deck = DeckGenerator.new_deck()
        self.plyr_cards = sorted(self.deck[0:5])
        self.oppo_cards = sorted(self.deck[6:11])

        self.current_card = self.plyr_cards.pop()

        self.plyr_goes_first = random.choice([0, 1])

        if self.plyr_goes_first == 0:
            self.execute_opponent_turn()

Exemple #5

0

Afficher le fichier

Fichier : ofc_environment.py Projet : DexGroves/rl-ofc

    def reset(self):
        self.plyr_board = OFCBoard()
        self.oppo_board = OFCBoard()

        self.game_over = False
        self.reward = 0

        self.deck = DeckGenerator.new_deck()
        self.plyr_cards = sorted(self.deck[0:5])
        self.oppo_cards = sorted(self.deck[6:11])

        self.current_card = self.plyr_cards.pop()

        self.plyr_goes_first = random.choice([0, 1])

        if self.plyr_goes_first == 0:
            self.execute_opponent_turn()

Exemple #6

0

Afficher le fichier

Fichier : test_ofc_board.py Projet : DexGroves/rl-ofc

def test_get_royalties():
    board = OFCBoard()

    board.front = OFCHand(['6s', '6d', '5s'])
    board.mid = OFCHand(['9d', '9c', '9s', '2d', '3d'])
    board.back = OFCHand(['Ah', '2h', '3h', '4h', '5h'])

    assert board.get_royalties() == 18

Exemple #7

0

Afficher le fichier

Fichier : ofc_environment.py Projet : DexGroves/rl-ofc

class OFCEnv(object):
    """Handle an OFC game in a manner condusive to PG RL."""

    def __init__(self, opponent, encoder_class=None):
        if encoder_class is not None:
            self.encoder = encoder_class()
        self.opponent = opponent
        self.reset()

    def reset(self):
        self.plyr_board = OFCBoard()
        self.oppo_board = OFCBoard()

        self.game_over = False
        self.reward = 0

        self.deck = DeckGenerator.new_deck()
        self.plyr_cards = sorted(self.deck[0:5])
        self.oppo_cards = sorted(self.deck[6:11])

        self.current_card = self.plyr_cards.pop()

        self.plyr_goes_first = random.choice([0, 1])

        if self.plyr_goes_first == 0:
            self.execute_opponent_turn()

    def step(self, action):
        """Advance the game state by one decision."""
        self.plyr_board.place_card_by_id(self.current_card, action)

        # Only do opponent turn if we have no cards left to lay
        if len(self.plyr_cards) == 0:
            self.plyr_cards.append(self.deck.pop())
            self.execute_opponent_turn()

        if len(self.deck) > 35:
            self.current_card = self.plyr_cards.pop()
        else:
            self.current_card = None
            self.execute_endgame()

    def observe(self):
        """Return information about the game state."""
        game_state = (self.plyr_board,
                      self.oppo_board,
                      self.current_card,  # Current decision card
                      self.plyr_cards,    # i.e. remaining starting hand
                      self.game_over,     # Whether the game is over
                      self.reward)        # Score, or None
        return game_state

    def execute_opponent_turn(self):
        if not self.oppo_board.is_complete():
            if len(self.oppo_cards) == 0:
                self.oppo_cards.append(self.deck.pop())

            while len(self.oppo_cards) > 0:
                oppo_card = self.oppo_cards.pop()
                free_streets = self.oppo_board.get_free_street_indices()
                oppo_action = random.choice(free_streets)  # For now!
                # oppo_action = 2
                self.oppo_board.place_card_by_id(oppo_card, oppo_action)

    def execute_endgame(self):
        self.reward = self.calculate_score()
        self.game_over = True

    def calculate_score(self):
        plyr_royalties = self.plyr_board.get_royalties()
        oppo_royalties = self.oppo_board.get_royalties()

        if self.plyr_board.is_foul() and self.oppo_board.is_foul():
            score = 0

        elif self.plyr_board.is_foul():
            score = (-1 * oppo_royalties) - 6

        elif self.oppo_board.is_foul():
            score = plyr_royalties + 6

        else:
            exch = self.calculate_scoop(self.plyr_board,
                                        self.oppo_board)
            score = exch + plyr_royalties - oppo_royalties

        return score

    def calculate_scoop(self, lhs_board, rhs_board):
        lhs_won = 0

        lhs_won += self.calculate_street(lhs_board.front, rhs_board.front)
        lhs_won += self.calculate_street(lhs_board.mid, rhs_board.mid)
        lhs_won += self.calculate_street(lhs_board.back, rhs_board.back)

        if lhs_won in [3, -3]:   # Scoop, one way or the other
            lhs_won = lhs_won * 2

        return lhs_won

    def calculate_street(self, lhs_hand, rhs_hand):
        lhs_rank = lhs_hand.get_rank()
        rhs_rank = rhs_hand.get_rank()

        if lhs_rank < rhs_rank:
            return 1
        if rhs_rank < lhs_rank:
            return -1
        return 0

Exemple #8

0

Afficher le fichier

Fichier : ofc_environment.py Projet : DexGroves/rl-ofc

    def play_game(self):
        """Rollout one OFC game and return the LHS score and LHS/RHS boards."""
        deck = DeckGenerator.new_deck()

        lhs_board = OFCBoard()
        rhs_board = OFCBoard()

        lhs_start = deck[0:5]
        rhs_start = deck[6:11]

        # Starting hand one card at a time for now. In future, give
        # all cards at once
        for i in xrange(5):
            card = lhs_start[i]
            street_id = self.lhs_agent.place_new_card(card, lhs_board)
            lhs_board.place_card_by_id(card, street_id)

            card = rhs_start[i]
            street_id = self.rhs_agent.place_new_card(card, rhs_board)
            rhs_board.place_card_by_id(card, street_id)

        # Eight cards one at a time
        for i in xrange(8):
            card = deck.pop()
            street_id = self.lhs_agent.place_new_card(card, lhs_board)
            lhs_board.place_card_by_id(card, street_id)

            card = deck.pop()
            street_id = self.rhs_agent.place_new_card(card, rhs_board)
            rhs_board.place_card_by_id(card, street_id)

        lhs_royalties = lhs_board.get_royalties()
        rhs_royalties = rhs_board.get_royalties()

        if lhs_board.is_foul() and rhs_board.is_foul():
            lhs_score = 0

        elif lhs_board.is_foul():
            lhs_score = (-1 * rhs_royalties) - 6

        elif rhs_board.is_foul():
            lhs_score = lhs_royalties + 6

        else:
            exch = self.calculate_scoop(lhs_board,
                                        rhs_board)
            lhs_score = exch + lhs_royalties - rhs_royalties

        return lhs_score, lhs_board, rhs_board

Exemple #9

0

Afficher le fichier

Fichier : ofc_environment.py Projet : DexGroves/rl-ofc

class OFCEnv(object):
    """Handle an OFC game in a manner condusive to PG RL."""
    def __init__(self, opponent, encoder_class=None):
        if encoder_class is not None:
            self.encoder = encoder_class()
        self.opponent = opponent
        self.reset()

    def reset(self):
        self.plyr_board = OFCBoard()
        self.oppo_board = OFCBoard()

        self.game_over = False
        self.reward = 0

        self.deck = DeckGenerator.new_deck()
        self.plyr_cards = sorted(self.deck[0:5])
        self.oppo_cards = sorted(self.deck[6:11])

        self.current_card = self.plyr_cards.pop()

        self.plyr_goes_first = random.choice([0, 1])

        if self.plyr_goes_first == 0:
            self.execute_opponent_turn()

    def step(self, action):
        """Advance the game state by one decision."""
        self.plyr_board.place_card_by_id(self.current_card, action)

        # Only do opponent turn if we have no cards left to lay
        if len(self.plyr_cards) == 0:
            self.plyr_cards.append(self.deck.pop())
            self.execute_opponent_turn()

        if len(self.deck) > 35:
            self.current_card = self.plyr_cards.pop()
        else:
            self.current_card = None
            self.execute_endgame()

    def observe(self):
        """Return information about the game state."""
        game_state = (
            self.plyr_board,
            self.oppo_board,
            self.current_card,  # Current decision card
            self.plyr_cards,  # i.e. remaining starting hand
            self.game_over,  # Whether the game is over
            self.reward)  # Score, or None
        return game_state

    def execute_opponent_turn(self):
        if not self.oppo_board.is_complete():
            if len(self.oppo_cards) == 0:
                self.oppo_cards.append(self.deck.pop())

            while len(self.oppo_cards) > 0:
                oppo_card = self.oppo_cards.pop()
                free_streets = self.oppo_board.get_free_street_indices()
                oppo_action = random.choice(free_streets)  # For now!
                # oppo_action = 2
                self.oppo_board.place_card_by_id(oppo_card, oppo_action)

    def execute_endgame(self):
        self.reward = self.calculate_score()
        self.game_over = True

    def calculate_score(self):
        plyr_royalties = self.plyr_board.get_royalties()
        oppo_royalties = self.oppo_board.get_royalties()

        if self.plyr_board.is_foul() and self.oppo_board.is_foul():
            score = 0

        elif self.plyr_board.is_foul():
            score = (-1 * oppo_royalties) - 6

        elif self.oppo_board.is_foul():
            score = plyr_royalties + 6

        else:
            exch = self.calculate_scoop(self.plyr_board, self.oppo_board)
            score = exch + plyr_royalties - oppo_royalties

        return score

    def calculate_scoop(self, lhs_board, rhs_board):
        lhs_won = 0

        lhs_won += self.calculate_street(lhs_board.front, rhs_board.front)
        lhs_won += self.calculate_street(lhs_board.mid, rhs_board.mid)
        lhs_won += self.calculate_street(lhs_board.back, rhs_board.back)

        if lhs_won in [3, -3]:  # Scoop, one way or the other
            lhs_won = lhs_won * 2

        return lhs_won

    def calculate_street(self, lhs_hand, rhs_hand):
        lhs_rank = lhs_hand.get_rank()
        rhs_rank = rhs_hand.get_rank()

        if lhs_rank < rhs_rank:
            return 1
        if rhs_rank < lhs_rank:
            return -1
        return 0

Exemple #10

0

Afficher le fichier

Fichier : ofc_environment.py Projet : DexGroves/rl-ofc

    def play_game(self):
        """Rollout one OFC game and return the LHS score and LHS/RHS boards."""
        deck = DeckGenerator.new_deck()

        lhs_board = OFCBoard()
        rhs_board = OFCBoard()

        lhs_start = deck[0:5]
        rhs_start = deck[6:11]

        # Starting hand one card at a time for now. In future, give
        # all cards at once
        for i in xrange(5):
            card = lhs_start[i]
            street_id = self.lhs_agent.place_new_card(card, lhs_board)
            lhs_board.place_card_by_id(card, street_id)

            card = rhs_start[i]
            street_id = self.rhs_agent.place_new_card(card, rhs_board)
            rhs_board.place_card_by_id(card, street_id)

        # Eight cards one at a time
        for i in xrange(8):
            card = deck.pop()
            street_id = self.lhs_agent.place_new_card(card, lhs_board)
            lhs_board.place_card_by_id(card, street_id)

            card = deck.pop()
            street_id = self.rhs_agent.place_new_card(card, rhs_board)
            rhs_board.place_card_by_id(card, street_id)

        lhs_royalties = lhs_board.get_royalties()
        rhs_royalties = rhs_board.get_royalties()

        if lhs_board.is_foul() and rhs_board.is_foul():
            lhs_score = 0

        elif lhs_board.is_foul():
            lhs_score = (-1 * rhs_royalties) - 6

        elif rhs_board.is_foul():
            lhs_score = lhs_royalties + 6

        else:
            exch = self.calculate_scoop(lhs_board, rhs_board)
            lhs_score = exch + lhs_royalties - rhs_royalties

        return lhs_score, lhs_board, rhs_board

Exemple #11

0

Afficher le fichier

Fichier : ofc_environment-checkpoint.py Projet : igenic/deep-rl-ofc-poker

    def play_game_human_cpu(self):
        """Rollout one OFC game and return the LHS score and LHS/RHS boards."""
        deck = DeckGenerator.new_deck()

        lhs_board = OFCBoard()
        rhs_board = OFCBoard()

        lhs_start = deck[0:5]
        rhs_start = deck[6:11]

        # Starting hand one card at a time for now. In future, give
        # all cards at once
        lhs_board.pretty()
        print('Player 1 starting cards;'),
        Card.print_pretty_cards([Card.new(card) for card in lhs_start])
        for i in range(5):
            card = lhs_start[i]
            street_id = self.lhs_agent.place_new_card(card, lhs_board)
            lhs_board.place_card_by_id(card, street_id)
            lhs_board.pretty()

        for i in range(5):
            card = rhs_start[i]
            street_id = self.rhs_agent.place_new_card(card, rhs_board)
            rhs_board.place_card_by_id(card, street_id)
        print('')

        # Eight cards one at a time
        for i in range(8):
            self.print_both_boards(lhs_board, rhs_board)
            card = deck.pop()
            street_id = self.lhs_agent.place_new_card(card, lhs_board)
            lhs_board.place_card_by_id(card, street_id)

            card = deck.pop()
            street_id = self.rhs_agent.place_new_card(card, rhs_board)
            rhs_board.place_card_by_id(card, street_id)

        print('Final Boards')
        self.print_both_boards(lhs_board, rhs_board)

        lhs_royalties = lhs_board.get_royalties()
        rhs_royalties = rhs_board.get_royalties()

        if lhs_board.is_foul() and rhs_board.is_foul():
            lhs_score = 0

        elif lhs_board.is_foul():
            lhs_score = (-1 * rhs_royalties) - 6

        elif rhs_board.is_foul():
            lhs_score = lhs_royalties + 6

        else:
            exch = self.calculate_scoop(lhs_board, rhs_board)
            lhs_score = exch + lhs_royalties - rhs_royalties

        return lhs_score, lhs_board, rhs_board