def reset(self): self.done = False self.table = Table(small_blind=self.small_blind, big_blind=self.big_blind) for idx, p in enumerate(self.players): p.reset(stakes=self.stakes) p.table = self.table self.players_last_stakes[idx] = self.stakes self.table.players = self.players[:] self.table.new_round() return self.observation_space(self.table.next_player)
def test_new_bet_round(): t = Table(small_blind=10, big_blind=20) for _ in range(4): t.add_player(Player(100, t)) t.new_round() assert t.bet_round == BetRound.PREFLOP assert t.last_bet_raise_delta == t.big_blind assert len(t.board) == 0 assert t.next_player_idx == t.next_active_seat(t.big_blind_player) for p in t.active_players: assert not p.has_called t.next_player.raise_bet(50) t.next_player.call_check() t.next_player.call_check() t.next_player.call_check() assert t.all_players_called() assert t.bet_round == BetRound.PREFLOP t.start_next_bet_round() assert t.bet_round == BetRound.FLOP assert len(t.board) == 3 assert t.next_player_idx == t.next_active_seat(t.dealer) for p in t.active_players: assert not p.has_called for _ in range(4): t.next_player.call_check() assert t.bet_round == BetRound.FLOP assert t.all_players_called() t.start_next_bet_round() assert t.bet_round == BetRound.TURN assert len(t.board) == 4 assert t.next_player_idx == t.next_active_seat(t.dealer) for p in t.active_players: assert not p.has_called t.next_player.raise_bet(20) t.next_player.fold() t.next_player.call_check() t.next_player.call_check() assert len(t.active_players) == 3 assert t.all_players_called() assert t.bet_round == BetRound.TURN t.start_next_bet_round() assert t.bet_round == BetRound.RIVER assert len(t.board) == 5 assert t.next_player_idx == t.next_active_seat(t.dealer) for p in t.active_players: assert not p.has_called for _ in range(3): t.next_player.call_check() assert t.all_players_called() assert t.bet_round == BetRound.RIVER t.start_next_bet_round() assert t.bet_round == BetRound.SHOWDOWN assert len(t.board) == 5 t.end_round() assert t.bet_round == BetRound.GAME_OVER
def test_split_pot(): # Table 1 t1 = Table(small_blind=10, big_blind=20) t1.add_player(Player(100, t1, name="0")) t1.add_player(Player(100, t1, name="1")) t1.add_player(Player(100, t1, name="2")) t1.add_player(Player(50, t1, name="3")) t1.new_round() t1.next_player.call_check() t1.next_player.raise_bet(100) t1.next_player.call_check() t1.next_player.call_check() t1.next_player.call_check() #assert len(t1.pots) == 2 assert t1.pots[0].highest_bet == 50 assert t1.pots[0].stakes == 200 assert t1.pots[1].highest_bet == 100 assert t1.pots[1].stakes == 150 # Table 2 t2 = Table(small_blind=10, big_blind=20) t2.add_player(Player(100, t2)) t2.add_player(Player(100, t2)) t2.add_player(Player(50, t2)) t2.add_player(Player(25, t2)) t2.new_round() t2.next_player.call_check() t2.next_player.raise_bet(100) t2.next_player.call_check() t2.next_player.call_check() t2.next_player.call_check() #assert len(t2.pots) == 3 assert t2.pots[0].highest_bet == 25 assert t2.pots[0].stakes == 100 assert t2.pots[1].highest_bet == 50 assert t2.pots[1].stakes == 75 assert t2.pots[2].highest_bet == 100 assert t2.pots[2].stakes == 100 t3 = Table(small_blind=10, big_blind=20) t3.add_player(Player(100, t3, name="0")) t3.add_player(Player(10, t3, name="1")) t3.add_player(Player(20, t3, name="2")) t3.add_player(Player(100, t3, name="3")) t3.new_round() t3.next_player.call_check() t3.next_player.raise_bet(40) t3.next_player.raise_bet(80) t3.next_player.call_check() assert len(t3.pots) == 3 assert t3.pots[0].highest_bet == 10 assert t3.pots[0].stakes == 40 assert t3.pots[1].highest_bet == 20 assert t3.pots[1].stakes == 30 assert t3.pots[2].highest_bet == 100 assert t3.pots[2].stakes == 160
def play_game_1(): t = Table() for _ in range(4): t.add(Player(100, table=t)) t.new_round() @property def n(): return t.next_player n.raise_bet(75) n.raise_bet(100) n.fold() n.call_check() n.action_from_amount(n.stakes) t.start_next_bet_round() t.end_round() t.new_round()
def play_game_0(): t = Table() for _ in range(4): t.add_player(Player(100, table=t)) t.new_round() @property def n(): return t.next_player n.raise_bet(81) n.fold() n.fold() n.fold() t.start_next_bet_round() t.end_round() t.new_round() n.raise_bet(80) n.fold() n.fold() n.call_check() t.start_next_bet_round() t.end_round() t.new_round() n.action_from_amount(n.stakes) n.raise_bet(202) n.action_from_amount(n.stakes) t.start_next_bet_round() t.end_round() t.new_round() n.raise_bet(209) n.action_from_amount(n.stakes) t.start_next_bet_round() t.end_round() assert len(t.players) == 1
def test_all_players_have_called(): t = Table() for _ in range(4): t.add_player(Player(100, t)) t.new_round() assert not t.all_players_called() t.next_player.call_check() assert not t.all_players_called() t.next_player.call_check() assert not t.all_players_called() t.next_player.call_check() assert not t.all_players_called() t.next_player.call_check() assert t.all_players_called() t.start_next_bet_round() assert not t.all_players_called()
class HoldemEnv(gym.Env): def __init__(self, player_amount=4, small_blind=25, big_blind=50, stakes=1000): super().__init__() self.player_amount = player_amount self.small_blind = small_blind self.big_blind = big_blind self.stakes = stakes self.table = None self.done = True # 0 -> FOLD # 1 -> CALL || CHECK # 2 -> ALL_IN # 3..(stakes * player_amount + 2) -> bet_amount + 2 self.action_space = Discrete(self.stakes_in_game + 3) self.players = [ Player(stakes, name=str(i)) for i in range(player_amount) ] self.players_last_stakes = [stakes for _ in range(player_amount)] self.debug = {} self.last_action = (-1, None) self.reset() def step(self, action: int): dbg_end_round = False dbg_new_round = False dbg_winners = [] dbg_new_bet_round = False player = self.table.next_player if action not in self.valid_actions: raise ValueError(f"Action {action} is not valid in this context") self._take_action(action, player) if self.table.all_players_called(): self.table.start_next_bet_round() dbg_new_bet_round = True while self.table.bet_round == BetRound.SHOWDOWN: dbg_end_round = True dbg_winners = self.table.end_round() if len(self.table.players) >= 2: self.table.new_round() dbg_new_round = True if self.table.all_players_called(): self.table.start_next_bet_round() else: self.done = True idx = self.players.index(player) reward = player.stakes - self.players_last_stakes[idx] self.players_last_stakes[idx] = player.stakes self.debug = { "new_bet_round": dbg_new_bet_round, "new_round": dbg_new_round, "end_round": dbg_end_round, "winners": dbg_winners } self.last_action = action, player return self.observation_space(player), reward, self.done, self.debug def reset(self): self.done = False self.table = Table(small_blind=self.small_blind, big_blind=self.big_blind) for idx, p in enumerate(self.players): p.reset(stakes=self.stakes) p.table = self.table self.players_last_stakes[idx] = self.stakes self.table.players = self.players[:] self.table.new_round() return self.observation_space(self.table.next_player) @staticmethod def _take_action(action, player): if action == 0: player.fold() elif action == 1: player.call_check() elif action == 2: player.action_from_amount(player.stakes) else: player.raise_bet(action - 2) @property def valid_actions(self): player = self.table.next_player to_call = player.to_call_amount() min_bet_amount = to_call + self.table.last_bet_raise_delta max_bet_amount = player.stakes # 0 -> FOLD # 1 -> CALL || CHECK actions = [0, 1, 2] if min_bet_amount <= max_bet_amount: possible_bet_actions = range(min_bet_amount + 2, max_bet_amount + 3) actions += possible_bet_actions # else: # if player.stakes > to_call: # actions.append(player.stakes) return np.array(actions) def observation_space(self, player): max_card_value = 268471337 hand = [card / (max_card_value + 1) for card in player.hand] board = [card / (max_card_value + 1) for card in self.table.board] for _ in range(len(self.table.board), 5): board.append(0) pot = self.table.pot_value() / (self.stakes_in_game + 1) player_stakes = player.stakes / (self.stakes_in_game + 1) other_players_stakes = [] for p in self.players: if p == player: continue other_players_stakes.append(p.stakes / (self.stakes_in_game + 1)) active_false = 0 active_true = 0.1 player_active = active_true if player in self.table.active_players else active_false other_players_active = [] for p in self.players: if p == player: continue active = active_true if p in self.table.active_players else active_false other_players_active.append(active) observation = hand + board + [ pot, player_stakes ] + other_players_stakes + [player_active] + other_players_active return np.array(observation) @property def table_players(self): return self.table.players @property def next_player(self): return self.table.next_player @property def stakes_in_game(self): return self.player_amount * self.stakes def render(self, mode="human", close=False): # for p in self.table.active_players: # print(str(p)) # print(f"Board: {Card.print_pretty_cards(self.table.board)}") # print(f"Bet round: {bet_round_to_str(self.table.bet_round)}") if self.last_action[0] == 0: print(f"{self.last_action[1].name}: FOLDED") elif self.last_action[0] == 1: print(f"{self.last_action[1].name}: CALLED") elif self.last_action[0] == 2: print(f"{self.last_action[1].name}: ALL_IN") else: print( f"{self.last_action[1].name}: RAISED({self.last_action[0] - 2})" ) if self.debug["new_bet_round"]: print("### NEW BET ROUND ###") print( f"Community Cards: {Card.print_pretty_cards(self.table.board)}" ) if self.debug["end_round"]: print("### END ROUND ###") all_winners = [[w.name for w in winners] for winners in self.debug["winners"]] print(f"WINNERS: {all_winners}") if self.debug["new_round"]: print("### NEW ROUND ###") for p in self.table.players: print( f"Player {p.name}: hand={Card.print_pretty_cards(p.hand)}, stakes={p.stakes}, " f"bet={p.bet}, has_called={p.has_called}, has_folded={p not in self.table.active_players}, " f"dealer={not self.done and self.table.players[self.table.dealer] == p}" ) if self.done: print("### GAME ENDED - RESETTING ###")
def test_player_actions(): t = Table(small_blind=10, big_blind=20) for _ in range(4): t.add_player(Player(100, t)) t.new_round() assert t.active_players[0].bet == 0 assert t.active_players[0].stakes == 100 assert t.active_players[1].bet == 10 assert t.active_players[1].stakes == 90 assert t.active_players[2].bet == 20 assert t.active_players[2].stakes == 80 assert t.active_players[3].bet == 0 assert t.active_players[3].stakes == 100 for p in t.active_players: assert not p.has_called for _ in range(4): p = t.next_player p.call_check() assert p.bet == 20 assert p.stakes == 80 assert p.has_called t.start_next_bet_round() p = t.next_player p.call_check() assert p.bet == 20 assert p.stakes == 80 p = t.next_player try: p.raise_bet(19) assert False except PokerRuleViolationException: assert p.bet == 20 assert p.stakes == 80 p.raise_bet(20) assert p.bet == 40 assert p.stakes == 60 assert t.current_pot.highest_bet == 40 assert t.current_pot.highest_amount() == 40 assert p in t.current_pot.contributors for other_player in t.active_players: if other_player == p: continue assert not other_player.has_called p = t.next_player p.fold() assert not p.has_called assert p.bet == 20 assert p.stakes == 80 assert p not in t.active_players for _ in range(2): t.next_player.call_check() assert len(t.active_players) == 3 for p in t.active_players: assert p.has_called assert p.bet == 40 assert p.stakes == 60 t.start_next_bet_round() p = t.next_player p.raise_bet(50) p = t.next_player p.raise_bet(60) p = t.next_player p.call_check() p = t.next_player p.call_check() # Player can only perform one action per betround try: p.fold() assert False except PokerRuleViolationException: pass for p in t.active_players: assert p.is_all_in t.start_next_bet_round() assert t.bet_round == BetRound.SHOWDOWN assert len(t.board) == 5 assert len(t.pots) == 1 assert t.current_pot.highest_bet == 100 assert t.current_pot.highest_amount() == 100