def render(self, mode='human'): """Render the current state""" screen_width = 600 screen_height = 400 table_radius = 200 face_radius = 10 if self.viewer is None: self.viewer = PygletWindow(screen_width + 50, screen_height + 50) self.viewer.reset() self.viewer.circle(screen_width / 2, screen_height / 2, table_radius, color=BLUE, thickness=0) for i in range(len(self.players)): degrees = i * (360 / len(self.players)) radian = (degrees * (np.pi / 180)) x = (face_radius + table_radius) * \ np.cos(radian) + screen_width / 2 y = (face_radius + table_radius) * \ np.sin(radian) + screen_height / 2 if self.player_cycle.alive[i]: color = GREEN else: color = RED self.viewer.circle(x, y, face_radius, color=color, thickness=2) try: if i == self.current_player.seat: self.viewer.rectangle(x - 60, y, 150, -50, (255, 0, 0, 10)) except AttributeError: pass self.viewer.text(f"{self.players[i].name}", x - 60, y - 15, font_size=10, color=WHITE) self.viewer.text( f"Player {self.players[i].seat}: {self.players[i].cards}", x - 60, y, font_size=10, color=WHITE) equity_alive = int(round( float(self.players[i].equity_alive) * 100)) self.viewer.text(f"${self.players[i].stack} (EQ: {equity_alive}%)", x - 60, y + 15, font_size=10, color=WHITE) try: self.viewer.text(self.players[i].last_action_in_stage, x - 60, y + 30, font_size=10, color=WHITE) except IndexError: pass x_inner = (-face_radius + table_radius - 60) * \ np.cos(radian) + screen_width / 2 y_inner = (-face_radius + table_radius - 60) * \ np.sin(radian) + screen_height / 2 self.viewer.text(f"${self.player_pots[i]}", x_inner, y_inner, font_size=10, color=WHITE) self.viewer.text(f"{self.table_cards}", screen_width / 2 - 40, screen_height / 2, font_size=10, color=WHITE) self.viewer.text(f"${self.community_pot}", screen_width / 2 - 15, screen_height / 2 + 30, font_size=10, color=WHITE) self.viewer.text(f"${self.current_round_pot}", screen_width / 2 - 15, screen_height / 2 + 50, font_size=10, color=WHITE) x_button = (-face_radius + table_radius - 20) * \ np.cos(radian) + screen_width / 2 y_button = (-face_radius + table_radius - 20) * \ np.sin(radian) + screen_height / 2 try: if i == self.player_cycle.dealer_idx: self.viewer.circle(x_button, y_button, 5, color=BLUE, thickness=2) except AttributeError: pass self.viewer.update()
class HoldemTable(Env): """Pokergame environment""" def __init__(self, initial_stacks=100, small_blind=1, big_blind=2, render=False, funds_plot=True, max_raising_rounds=2, use_cpp_montecarlo=False): """ The table needs to be initialized once at the beginning Args: num_of_players (int): number of players that need to be added initial_stacks (real): initial stacks per placyer small_blind (real) big_blind (real) render (bool): render table after each move in graphical format funds_plot (bool): show plot of funds history at end of each episode max_raising_rounds (int): max raises per round per player """ if use_cpp_montecarlo: import cppimport calculator = cppimport.imp("tools.montecarlo_cpp.pymontecarlo") get_equity = calculator.montecarlo else: from tools.montecarlo_python import get_equity self.get_equity = get_equity self.use_cpp_montecarlo = use_cpp_montecarlo self.num_of_players = 0 self.small_blind = small_blind self.big_blind = big_blind self.render_switch = render self.players = [] self.table_cards = None self.dealer_pos = None self.player_status = [] # one hot encoded self.current_player = None self.player_cycle = None # cycle iterator self.stage = None self.last_player_pot = None self.viewer = None self.player_max_win = None # used for side pots self.second_round = False self.last_caller = None self.last_raiser = None self.raisers = [] self.callers = [] self.played_in_round = None self.min_call = None self.community_data = None self.player_data = None self.stage_data = None self.deck = None self.action = None self.winner_ix = None self.initial_stacks = initial_stacks self.acting_agent = None self.funds_plot = funds_plot self.max_round_raising = max_raising_rounds # pots self.community_pot = 0 self.current_round_pot = 9 self.player_pots = None # individual player pots self.observation = None self.reward = None self.info = None self.done = False self.funds_history = None self.array_everything = None self.legal_moves = None self.illegal_move_reward = -1000000 self.action_space = Discrete(len(Action) - 2) self.first_action_for_hand = None def reset(self): """Reset after game over.""" self.observation = None self.reward = None self.info = None self.done = False self.funds_history = pd.DataFrame() self.first_action_for_hand = [True] * len(self.players) for player in self.players: player.stack = self.initial_stacks self.dealer_pos = 0 self.player_cycle = PlayerCycle( self.players, dealer_idx=-1, max_steps_after_raiser=len(self.players) - 1, max_steps_after_big_blind=len(self.players)) self._start_new_hand() self._get_environment() # auto play for agents where autoplay is set if self._agent_is_autoplay() and not self.done: # kick off the first action after bb by an autoplay agent self.step('initial_player_autoplay') return self.array_everything def step(self, action): # pylint: disable=arguments-differ """ Next player makes a move and a new environment is observed. Args: action: Used for testing only. Needs to be of Action type """ # loop over step function, calling the agent's action method # until either the env id sone, or an agent is just a shell and # and will get a call from to the step function externally (e.g. via # keras-rl self.reward = 0 self.acting_agent = self.player_cycle.idx if self._agent_is_autoplay(): while self._agent_is_autoplay() and not self.done: log.debug("Autoplay agent. Call action method of agent.") self._get_environment() # call agent's action method action = self.current_player.agent_obj.action( self.legal_moves, self.observation, self.info) if Action(action) not in self.legal_moves: print('Equity Illegal Move') self._illegal_move(action) else: self._execute_step(Action(action)) if self.first_action_for_hand[ self.acting_agent] or self.done: self.first_action_for_hand[self.acting_agent] = False self._calculate_reward(action) # action received from player shell (e.g. keras rl, not autoplay) else: self._get_environment() # get legal moves if Action(action) not in self.legal_moves: print('Agent Illegal Move') self._illegal_move(action) else: self._execute_step(Action(action)) if self.first_action_for_hand[self.acting_agent] or self.done: self.first_action_for_hand[self.acting_agent] = False self._calculate_reward(action) log.info( f"Previous action reward for seat {self.acting_agent}: {self.reward}" ) print( f"Previous action reward for seat {self.acting_agent}: {self.reward}" ) return self.array_everything, self.reward, self.done, self.info def _execute_step(self, action): self._process_decision(action) self._next_player() if self.stage in [Stage.END_HIDDEN, Stage.SHOWDOWN]: self._end_hand() self._start_new_hand() self._get_environment() def _illegal_move(self, action): log.warning( f"{action} is an Illegal move, try again. Currently allowed: {self.legal_moves}" ) print( f"{action} is an Illegal move, try again. Currently allowed: {self.legal_moves}" ) self.reward = self.illegal_move_reward def _agent_is_autoplay(self, idx=None): if not idx: return hasattr(self.current_player.agent_obj, 'autoplay') return hasattr(self.players[idx].agent_obj, 'autoplay') def _get_environment(self): """Observe the environment""" if not self.done: self._get_legal_moves() self.observation = None self.reward = 0 self.info = None self.community_data = CommunityData(len(self.players)) self.community_data.community_pot = self.community_pot / \ (self.big_blind * 100) self.community_data.current_round_pot = self.current_round_pot / \ (self.big_blind * 100) self.community_data.small_blind = self.small_blind self.community_data.big_blind = self.big_blind self.community_data.stage[np.minimum(self.stage.value, 3)] = 1 # pylint: disable= invalid-sequence-index self.community_data.legal_moves = [ action in self.legal_moves for action in Action ] # self.cummunity_data.active_players self.player_data = PlayerData() self.player_data.stack = [ player.stack / (self.big_blind * 100) for player in self.players ] if not self.current_player: # game over self.current_player = self.players[self.winner_ix] self.player_data.position = self.current_player.seat self.current_player.equity_alive = self.get_equity( set(self.current_player.cards), set(self.table_cards), sum(self.player_cycle.alive), 1000) self.player_data.equity_to_river_alive = self.current_player.equity_alive arr1 = np.array(list(flatten(self.player_data.__dict__.values()))) arr2 = np.array(list(flatten(self.community_data.__dict__.values()))) arr3 = np.array([ list(flatten(sd.__dict__.values())) for sd in self.stage_data ]).flatten() # arr_legal_only = np.array(self.community_data.legal_moves).flatten() self.array_everything = np.concatenate([arr1, arr2, arr3]).flatten() self.observation = self.array_everything self._get_legal_moves() self.info = { 'player_data': self.player_data.__dict__, 'community_data': self.community_data.__dict__, 'stage_data': [stage.__dict__ for stage in self.stage_data], 'legal_moves': self.legal_moves } self.observation_space = Box(low=0.0, high=1000.0, shape=self.array_everything.shape, dtype=np.float64) if self.render_switch: self.render() def _calculate_reward(self, last_action): """ Preliminiary implementation of reward function - Currently missing potential additional winnings from future contributions """ # if last_action == Action.FOLD: # self.reward = -( # self.community_pot + self.current_round_pot) # else: # self.reward = self.player_data.equity_to_river_alive * (self.community_pot + self.current_round_pot) - \ # (1 - self.player_data.equity_to_river_alive) * self.player_pots[self.current_player.seat] _ = last_action if self.done: won = 1 if not self._agent_is_autoplay(idx=self.winner_ix) else -1 self.reward = self.initial_stacks * len(self.players) * won log.debug(f"Keras-rl agent has reward {self.reward}") print(f"Keras-rl agent has reward {self.reward}") elif len(self.funds_history) > 1: self.reward = self.funds_history.iloc[ -1, self.acting_agent] - self.funds_history.iloc[-2, self.acting_agent] else: pass def _process_decision(self, action): # pylint: disable=too-many-statements """Process the decisions that have been made by an agent.""" if action not in [Action.SMALL_BLIND, Action.BIG_BLIND]: assert action in set(self.legal_moves), "Illegal decision" if action == Action.FOLD: self.player_cycle.deactivate_current() self.player_cycle.mark_folder() else: if action == Action.CALL: contribution = min( self.min_call - self.player_pots[self.current_player.seat], self.current_player.stack) self.callers.append(self.current_player.seat) self.last_caller = self.current_player.seat # verify the player has enough in his stack elif action == Action.CHECK: contribution = 0 self.player_cycle.mark_checker() elif action == Action.RAISE_3BB: contribution = 3 * self.big_blind - \ self.player_pots[self.current_player.seat] self.raisers.append(self.current_player.seat) elif action == Action.RAISE_HALF_POT: contribution = (self.community_pot + self.current_round_pot) / 2 self.raisers.append(self.current_player.seat) elif action == Action.RAISE_POT: contribution = (self.community_pot + self.current_round_pot) self.raisers.append(self.current_player.seat) elif action == Action.RAISE_2POT: contribution = (self.community_pot + self.current_round_pot) * 2 self.raisers.append(self.current_player.seat) elif action == Action.ALL_IN: contribution = self.current_player.stack self.raisers.append(self.current_player.seat) elif action == Action.SMALL_BLIND: contribution = np.minimum(self.small_blind, self.current_player.stack) elif action == Action.BIG_BLIND: contribution = np.minimum(self.big_blind, self.current_player.stack) self.player_cycle.mark_bb() else: raise RuntimeError("Illegal action.") if contribution > self.min_call: self.player_cycle.mark_raiser() self.current_player.stack -= contribution self.player_pots[self.current_player.seat] += contribution self.current_round_pot += contribution self.last_player_pot = self.player_pots[self.current_player.seat] if self.current_player.stack == 0 and contribution > 0: self.player_cycle.mark_out_of_cash_but_contributed() self.min_call = max(self.min_call, contribution) self.current_player.actions.append(action) self.current_player.last_action_in_stage = action.name self.current_player.temp_stack.append(self.current_player.stack) # side pot self.player_max_win[self.current_player.seat] += contribution pos = self.player_cycle.idx rnd = self.stage.value + self.second_round self.stage_data[rnd].calls[pos] = action == Action.CALL self.stage_data[rnd].raises[pos] = action in [ Action.RAISE_2POT, Action.RAISE_HALF_POT, Action.RAISE_POT ] self.stage_data[rnd].min_call_at_action[pos] = self.min_call / \ (self.big_blind * 100) self.stage_data[rnd].community_pot_at_action[pos] = self.community_pot / \ (self.big_blind * 100) self.stage_data[rnd].contribution[pos] += contribution / \ (self.big_blind * 100) self.stage_data[rnd].stack_at_action[pos] = self.current_player.stack / \ (self.big_blind * 100) self.player_cycle.update_alive() log.info( f"Seat {self.current_player.seat} ({self.current_player.name}): {action} - Remaining stack: {self.current_player.stack}, " f"Round pot: {self.current_round_pot}, Community pot: {self.community_pot}, " f"player pot: {self.player_pots[self.current_player.seat]}") print( f"Seat {self.current_player.seat} ({self.current_player.name}): {action} - Remaining stack: {self.current_player.stack}, " f"Round pot: {self.current_round_pot}, Community pot: {self.community_pot}, " f"player pot: {self.player_pots[self.current_player.seat]}") def _start_new_hand(self): """Deal new cards to players and reset table states.""" self._save_funds_history() if self._check_game_over(): return log.info("") log.info("++++++++++++++++++") log.info("Starting new hand.") log.info("++++++++++++++++++") print("") print("++++++++++++++++++") print("Starting new hand.") print("++++++++++++++++++") self.table_cards = [] self._create_card_deck() self.stage = Stage.PREFLOP # preflop round1,2, flop>: round 1,2, turn etc... self.stage_data = [StageData(len(self.players)) for _ in range(8)] # pots self.community_pot = 0 self.current_round_pot = 0 self.player_pots = [0] * len(self.players) self.player_max_win = [0] * len(self.players) self.last_player_pot = 0 self.played_in_round = 0 self.first_action_for_hand = [True] * len(self.players) for player in self.players: player.cards = [] self._next_dealer() self._distribute_cards() self._initiate_round() def _save_funds_history(self): """Keep track of player funds history""" funds_dict = {i: player.stack for i, player in enumerate(self.players)} self.funds_history = pd.concat( [self.funds_history, pd.DataFrame(funds_dict, index=[0])]) def _check_game_over(self): """Check if only one player has money left""" player_alive = [] self.player_cycle.new_hand_reset() for idx, player in enumerate(self.players): if player.stack > 0: player_alive.append(True) else: self.player_status.append(False) self.player_cycle.deactivate_player(idx) remaining_players = sum(player_alive) if remaining_players < 2: self._game_over() return True return False def _game_over(self): """End of an episode.""" log.info("Game over.") print("Game over.") self.done = True player_names = [ f"{i} - {player.name}" for i, player in enumerate(self.players) ] self.funds_history.columns = player_names if self.funds_plot: self.funds_history.reset_index(drop=True).plot() log.info(self.funds_history) print(self.funds_history) plt.show() winner_in_episodes.append(self.winner_ix) league_table = pd.Series(winner_in_episodes).value_counts() best_player = league_table.index[0] log.info(league_table) log.info(f"Best Player: {best_player}") print(league_table) print(f"Best Player: {best_player}") def _initiate_round(self): """A new round (flop, turn, river) is initiated""" self.last_caller = None self.last_raiser = None self.raisers = [] self.callers = [] self.min_call = 0 for player in self.players: player.last_action_in_stage = '' self.player_cycle.new_round_reset() if self.stage == Stage.PREFLOP: log.info("") log.info("===Round: Stage: PREFLOP") print("") print("===Round: Stage: PREFLOP") # max steps total will be adjusted again at bb self.player_cycle.max_steps_total = len( self.players) * self.max_round_raising + 2 self._next_player() self._process_decision(Action.SMALL_BLIND) self._next_player() self._process_decision(Action.BIG_BLIND) self._next_player() elif self.stage in [Stage.FLOP, Stage.TURN, Stage.RIVER]: self.player_cycle.max_steps_total = len( self.players) * self.max_round_raising self._next_player() elif self.stage == Stage.SHOWDOWN: log.info("Showdown") print("Showdown") else: raise RuntimeError() def add_player(self, agent): """Add a player to the table. Has to happen at the very beginning""" self.num_of_players += 1 player = PlayerShell(stack_size=self.initial_stacks, name=agent.name) player.agent_obj = agent player.seat = len(self.players) # assign next seat number to player player.stack = self.initial_stacks self.players.append(player) self.player_status = [True] * len(self.players) self.player_pots = [0] * len(self.players) def _end_round(self): """End of preflop, flop, turn or river""" self._close_round() if self.stage == Stage.PREFLOP: self.stage = Stage.FLOP self._distribute_cards_to_table(3) elif self.stage == Stage.FLOP: self.stage = Stage.TURN self._distribute_cards_to_table(1) elif self.stage == Stage.TURN: self.stage = Stage.RIVER self._distribute_cards_to_table(1) elif self.stage == Stage.RIVER: self.stage = Stage.SHOWDOWN log.info("--------------------------------") log.info(f"===ROUND: {self.stage} ===") print("--------------------------------") print(f"===ROUND: {self.stage} ===") self._clean_up_pots() def _clean_up_pots(self): self.community_pot += self.current_round_pot self.current_round_pot = 0 self.player_pots = [0] * len(self.players) def _end_hand(self): self._clean_up_pots() self.winner_ix = self._get_winner() self._award_winner(self.winner_ix) def _get_winner(self): """Determine which player has won the hand""" potential_winners = self.player_cycle.get_potential_winners() potential_winner_idx = [ i for i, potential_winner in enumerate(potential_winners) if potential_winner ] if sum(potential_winners) == 1: winner_ix = [ i for i, active in enumerate(potential_winners) if active ][0] winning_card_type = 'Only remaining player in round' else: assert self.stage == Stage.SHOWDOWN remaining_player_winner_ix, winning_card_type = get_winner([ player.cards for ix, player in enumerate(self.players) if potential_winners[ix] ], self.table_cards) winner_ix = potential_winner_idx[remaining_player_winner_ix] log.info(f"Player {winner_ix} won: {winning_card_type}") print(f"Player {winner_ix} won: {winning_card_type}") return winner_ix def _award_winner(self, winner_ix): """Hand the pot to the winner and handle side pots""" max_win_per_player_for_winner = self.player_max_win[winner_ix] total_winnings = sum( np.minimum(max_win_per_player_for_winner, self.player_max_win)) remains = np.maximum(0, np.array(self.player_max_win) - max_win_per_player_for_winner) # to be returned self.players[winner_ix].stack += total_winnings self.winner_ix = winner_ix if total_winnings < sum(self.player_max_win): log.info("Returning side pots") print("Returning side pots") for i, player in enumerate(self.players): player.stack += remains[i] def _next_dealer(self): self.dealer_pos = self.player_cycle.next_dealer().seat def _next_player(self): """Move to the next player""" self.current_player = self.player_cycle.next_player() if not self.current_player: if sum(self.player_cycle.alive) < 2: log.info("Only one player remaining in round") print("Only one player remaining in round") self.stage = Stage.END_HIDDEN else: log.info("End round - no current player returned") print("End round - no current player returned") self._end_round() # todo: in some cases no new round should be initialized bc only one player is playing only it seems self._initiate_round() elif self.current_player == 'max_steps_total' or self.current_player == 'max_steps_after_raiser': log.debug(self.current_player) log.info("End of round ") print("End of round ") self._end_round() return def _get_legal_moves(self): """Determine what moves are allowed in the current state""" self.legal_moves = [] if self.player_pots[self.current_player.seat] == max(self.player_pots): self.legal_moves.append(Action.CHECK) else: self.legal_moves.append(Action.CALL) self.legal_moves.append(Action.FOLD) if self.current_player.stack >= 3 * self.big_blind - self.player_pots[ self.current_player.seat]: self.legal_moves.append(Action.RAISE_3BB) if self.current_player.stack >= ( (self.community_pot + self.current_round_pot) / 2) >= self.min_call: self.legal_moves.append(Action.RAISE_HALF_POT) if self.current_player.stack >= ( self.community_pot + self.current_round_pot) >= self.min_call: self.legal_moves.append(Action.RAISE_POT) if self.current_player.stack >= ( (self.community_pot + self.current_round_pot) * 2) >= self.min_call: self.legal_moves.append(Action.RAISE_2POT) if self.current_player.stack > 0: self.legal_moves.append(Action.ALL_IN) log.debug( f"Community+current round pot pot: {self.community_pot + self.current_round_pot}" ) def _close_round(self): """put player_pots into community pots""" self.community_pot += sum(self.player_pots) self.player_pots = [0] * len(self.players) self.played_in_round = 0 def _create_card_deck(self): values = "23456789TJQKA" suites = "CDHS" self.deck = [] # contains cards in the deck _ = [self.deck.append(x + y) for x in values for y in suites] def _distribute_cards(self): log.info(f"Dealer is at position {self.dealer_pos}") print(f"Dealer is at position {self.dealer_pos}") for player in self.players: player.cards = [] if player.stack <= 0: continue for _ in range(2): card = np.random.randint(0, len(self.deck)) player.cards.append(self.deck.pop(card)) log.info( f"Player {player.seat} got {player.cards} and ${player.stack}") print( f"Player {player.seat} got {player.cards} and ${player.stack}") def _distribute_cards_to_table(self, amount_of_cards): for _ in range(amount_of_cards): card = np.random.randint(0, len(self.deck)) self.table_cards.append(self.deck.pop(card)) log.info(f"Cards on table: {self.table_cards}") print(f"Cards on table: {self.table_cards}") def render(self, mode='human'): """Render the current state""" screen_width = 600 screen_height = 400 table_radius = 200 face_radius = 10 if self.viewer is None: self.viewer = PygletWindow(screen_width + 50, screen_height + 50) self.viewer.reset() self.viewer.circle(screen_width / 2, screen_height / 2, table_radius, color=BLUE, thickness=0) for i in range(len(self.players)): degrees = i * (360 / len(self.players)) radian = (degrees * (np.pi / 180)) x = (face_radius + table_radius) * \ np.cos(radian) + screen_width / 2 y = (face_radius + table_radius) * \ np.sin(radian) + screen_height / 2 if self.player_cycle.alive[i]: color = GREEN else: color = RED self.viewer.circle(x, y, face_radius, color=color, thickness=2) try: if i == self.current_player.seat: self.viewer.rectangle(x - 60, y, 150, -50, (255, 0, 0, 10)) except AttributeError: pass self.viewer.text(f"{self.players[i].name}", x - 60, y - 15, font_size=10, color=WHITE) self.viewer.text( f"Player {self.players[i].seat}: {self.players[i].cards}", x - 60, y, font_size=10, color=WHITE) equity_alive = int(round( float(self.players[i].equity_alive) * 100)) self.viewer.text(f"${self.players[i].stack} (EQ: {equity_alive}%)", x - 60, y + 15, font_size=10, color=WHITE) try: self.viewer.text(self.players[i].last_action_in_stage, x - 60, y + 30, font_size=10, color=WHITE) except IndexError: pass x_inner = (-face_radius + table_radius - 60) * \ np.cos(radian) + screen_width / 2 y_inner = (-face_radius + table_radius - 60) * \ np.sin(radian) + screen_height / 2 self.viewer.text(f"${self.player_pots[i]}", x_inner, y_inner, font_size=10, color=WHITE) self.viewer.text(f"{self.table_cards}", screen_width / 2 - 40, screen_height / 2, font_size=10, color=WHITE) self.viewer.text(f"${self.community_pot}", screen_width / 2 - 15, screen_height / 2 + 30, font_size=10, color=WHITE) self.viewer.text(f"${self.current_round_pot}", screen_width / 2 - 15, screen_height / 2 + 50, font_size=10, color=WHITE) x_button = (-face_radius + table_radius - 20) * \ np.cos(radian) + screen_width / 2 y_button = (-face_radius + table_radius - 20) * \ np.sin(radian) + screen_height / 2 try: if i == self.player_cycle.dealer_idx: self.viewer.circle(x_button, y_button, 5, color=BLUE, thickness=2) except AttributeError: pass self.viewer.update()
class HoldemTable(Env): """Pokergame environment""" def __init__(self, num_of_players=6, initial_stacks=100, small_blind=1, big_blind=2, render=False): """The table needs to be initialized once at the beginning""" self.num_of_players = num_of_players self.small_blind = small_blind self.big_blind = big_blind self.render_switch = render self.players = [] self.table_cards = None self.dealer_pos = None self.player_status = [] # one hot encoded self.current_player = None self.player_cycle = None # cycle iterator self.stage = None self.last_player_pot = None self.viewer = None self.player_max_win = None # used for side pots self.second_round = False self.last_caller = None self.last_raiser = None self.raisers = [] self.callers = [] self.played_in_round = None self.min_call = None self.community_data = None self.player_data = None self.stage_data = None self.deck = None self.winner_ix = None self.initial_stacks = initial_stacks # pots self.community_pot = 0 self.current_round_pot = 9 self.player_pots = None # individual player pots self.observation = None self.reward = None self.info = None self.done = False self.funds_history = None self.array_everything = None def reset(self): """Reset after game over.""" self.observation = None self.reward = None self.info = None self.done = False self.funds_history = pd.DataFrame() for player in self.players: player.stack = self.initial_stacks self.dealer_pos = 0 self.player_cycle = PlayerCycle(self.players, dealer_idx=-1, max_steps_after_raiser=len( self.players)) self._start_new_hand() self._get_environment() # auto play for agents where autoplay is set if hasattr(self.current_player, 'agent_obj') and not self.done: self.step() return self.array_everything def step(self, action=None): # pylint: disable=arguments-differ """ Next player makes a move and a new environment is observed. Args: action: Used for testing only. Needs to be of Action type """ self.observation_space = self.array_everything.shape if not action: if not hasattr(self.current_player.agent_obj, 'autoplay'): # only player shell, external model required to by calling step method # todo: reward should be for last played action of external model return self.array_everything, self.reward, self.done, self.info action = self.current_player.agent_obj.action( self.action_space, self.observation) self._process_decision(action) self._next_player() if self.stage in [Stage.END_HIDDEN, Stage.SHOWDOWN]: self._end_hand() self._start_new_hand() self.player_cycle.update_alive() self._get_environment() # auto play for agents where autoplay is set if hasattr(self.current_player, 'agent_obj') and not self.done: self.step() return self.observation, self.reward, self.done, self.info def _get_environment(self): """Observe the environment""" if not self.done: self._get_legal_moves() self.observation = None self.reward = None self.info = None self.community_data = CommunityData(len(self.players)) self.community_data.community_pot = self.community_pot self.community_data.current_round_pot = self.current_round_pot self.community_data.small_blind = self.small_blind self.community_data.big_blind = self.big_blind self.community_data.stage[np.minimum(self.stage.value, 3)] = 1 # self.cummunity_data.active_players self.player_data = PlayerData() self.player_data.stack = [player.stack for player in self.players] if not self.current_player: # game over self.current_player = self.players[self.winner_ix] self.player_data.position = self.current_player.seat self.current_player.equity_alive = get_equity( self.current_player.cards, self.table_cards, sum(self.player_cycle.alive)) self.player_data.equity_to_river_alive = self.current_player.equity_alive arr1 = np.array(list(flatten(self.player_data.__dict__.values()))) arr2 = np.array(list(flatten(self.community_data.__dict__.values()))) arr3 = np.array([ list(flatten(sd.__dict__.values())) for sd in self.stage_data ]).flatten() self.array_everything = np.concatenate([arr1, arr2, arr3]).flatten() self.observation = { 'array_everything': self.array_everything, 'player_data': self.player_data, 'community_data': self.community_data, 'stage_data': self.stage_data } self._get_legal_moves() self.reward = self.current_player.stack + self.player_data.equity_to_river_alive * self.community_pot if self.render_switch: self.render() def _process_decision(self, action): # pylint: disable=too-many-statements """Process the decisions that have been made by an agent.""" if action not in [Action.SMALL_BLIND, Action.BIG_BLIND]: assert action in set(self.action_space), "Illegal decision" if action == Action.FOLD: self.player_cycle.deactivate_current() self.player_cycle.mark_folder() log.info( f"Seat {self.current_player.seat}: {action} - Remaining stack: {self.current_player.stack}, " f"Round pot: {self.current_round_pot}, Community pot: {self.community_pot}, " f"player pot: {self.player_pots[self.current_player.seat]}") return if action == Action.CALL: contribution = min( self.min_call - self.player_pots[self.current_player.seat], self.current_player.stack) self.callers.append(self.current_player.seat) self.last_caller = self.current_player.seat # verify the player has enough in his stack elif action == Action.CHECK: contribution = 0 self.player_cycle.mark_checker() elif action == Action.RAISE_3BB: contribution = (self.community_pot + self.big_blind) * 3 self.raisers.append(self.current_player.seat) elif action == Action.RAISE_HALF_POT: contribution = (self.community_pot + self.current_round_pot) / 2 self.raisers.append(self.current_player.seat) elif action == Action.RAISE_POT: contribution = (self.community_pot + self.current_round_pot) self.raisers.append(self.current_player.seat) elif action == Action.RAISE_2POT: contribution = (self.community_pot + self.current_round_pot) * 2 self.raisers.append(self.current_player.seat) elif action == Action.ALL_IN: contribution = self.current_player.stack self.raisers.append(self.current_player.seat) elif action == Action.SMALL_BLIND: contribution = np.minimum(self.small_blind, self.current_player.stack) self.last_raiser = self.current_player.seat elif action == Action.BIG_BLIND: contribution = np.minimum(self.big_blind, self.current_player.stack) self.last_raiser = self.current_player.seat self.player_cycle.mark_bb() else: raise RuntimeError("Illegal action.") if contribution > self.min_call: self.player_cycle.mark_raiser() self.last_raiser = self.current_player.seat self.current_player.stack -= contribution self.player_pots[self.current_player.seat] += contribution self.current_round_pot += contribution self.last_player_pot = self.player_pots[self.current_player.seat] if self.current_player.stack == 0 and contribution > 0: self.player_cycle.mark_out_of_cash_but_contributed() self.min_call = max(self.min_call, contribution) self.current_player.actions.append(action) self.current_player.last_action_in_stage = action.name self.current_player.temp_stack.append(self.current_player.stack) self.player_max_win[ self.current_player.seat] += contribution # side pot pos = self.player_cycle.idx rnd = self.stage.value + self.second_round self.stage_data[rnd].calls[pos] = action == Action.CALL self.stage_data[rnd].raises[pos] = action in [ Action.RAISE_2POT, Action.RAISE_HALF_POT, Action.RAISE_POT ] self.stage_data[rnd].min_call_at_action[pos] = self.min_call self.stage_data[rnd].community_pot_at_action[pos] = self.community_pot self.stage_data[rnd].contribution[pos] += contribution self.stage_data[rnd].stack_at_action[pos] = self.current_player.stack log.info( f"Seat {self.current_player.seat}: {action} - Remaining stack: {self.current_player.stack}, " f"Round pot: {self.current_round_pot}, Community pot: {self.community_pot}, " f"player pot: {self.player_pots[self.current_player.seat]}") def _start_new_hand(self): """Deal new cards to players and reset table states.""" self._save_funds_history() if self._check_game_over(): return log.info("") log.info("++++++++++++++++++") log.info("Starting new hand.") log.info("++++++++++++++++++") self.table_cards = [] self._create_card_deck() self.stage = Stage.PREFLOP # preflop round1,2, flop>: round 1,2, turn etc... self.stage_data = [StageData(len(self.players)) for _ in range(8)] # pots self.community_pot = 0 self.current_round_pot = 0 self.player_pots = [0] * len(self.players) self.player_max_win = [0] * len(self.players) self.last_player_pot = 0 self.played_in_round = 0 for player in self.players: player.cards = [] self._next_dealer() self._distribute_cards() self._initiate_round() def _save_funds_history(self): """Keep track of player funds history""" funds_dict = {i: player.stack for i, player in enumerate(self.players)} self.funds_history = pd.concat( [self.funds_history, pd.DataFrame(funds_dict, index=[0])]) def _check_game_over(self): """Check if only one player has money left""" player_alive = [] self.player_cycle.new_hand_reset() for idx, player in enumerate(self.players): if player.stack > 0: player_alive.append(True) else: self.player_status.append(False) self.player_cycle.deactivate_player(idx) remaining_players = sum(player_alive) if remaining_players < 2: self._game_over() return True return False def _game_over(self): """End of an episode.""" log.info("Game over.") self.done = True player_names = [ f"{i} - {player.name}" for i, player in enumerate(self.players) ] self.funds_history.columns = player_names self.funds_history.reset_index(drop=True).plot() log.info(self.funds_history) plt.show() def _initiate_round(self): """A new round (flop, turn, river) is initiated""" self.last_caller = None self.last_raiser = None self.raisers = [] self.callers = [] self.min_call = 0 for player in self.players: player.last_action_in_stage = '' self.player_cycle.new_round_reset() if self.stage == Stage.PREFLOP: log.info("") log.info("===Round: Stage: PREFLOP") # max steps total will be adjusted again at bb self.player_cycle.max_steps_total = len(self.players) * 2 + 2 self._next_player() self._process_decision(Action.SMALL_BLIND) self._next_player() self._process_decision(Action.BIG_BLIND) self._next_player() elif self.stage in [Stage.FLOP, Stage.TURN, Stage.RIVER]: self.player_cycle.max_steps_total = len(self.players) * 2 self._next_player() elif self.stage == Stage.SHOWDOWN: log.info("Showdown") else: raise RuntimeError() def add_player(self, agent): """Add a player to the table. Has to happen at the very beginning""" player = PlayerShell(stack_size=self.initial_stacks, name=agent.name) player.agent_obj = agent player.seat = len(self.players) # assign next seat number to player player.stack = self.initial_stacks self.players.append(player) self.player_status = [True] * len(self.players) self.player_pots = [0] * len(self.players) def _end_round(self): """End of preflop, flop, turn or river""" self._close_round() if self.stage == Stage.PREFLOP: self.stage = Stage.FLOP self._distribute_cards_to_table(3) elif self.stage == Stage.FLOP: self.stage = Stage.TURN self._distribute_cards_to_table(1) elif self.stage == Stage.TURN: self.stage = Stage.RIVER self._distribute_cards_to_table(1) elif self.stage == Stage.RIVER: self.stage = Stage.SHOWDOWN log.info("--------------------------------") log.info(f"===ROUND: {self.stage} ===") self._clean_up_pots() def _clean_up_pots(self): self.community_pot += self.current_round_pot self.current_round_pot = 0 self.player_pots = [0] * len(self.players) def _end_hand(self): self._clean_up_pots() self.winner_ix = self._get_winner() self._award_winner(self.winner_ix) def _get_winner(self): """Determine which player has won the hand""" potential_winners = self.player_cycle.get_potential_winners() potential_winner_idx = [ i for i, potential_winner in enumerate(potential_winners) if potential_winner ] if sum(potential_winners) == 1: winner_ix = [ i for i, active in enumerate(potential_winners) if active ][0] winning_card_type = 'Only remaining player in round' else: assert self.stage == Stage.SHOWDOWN remaining_player_winner_ix, winning_card_type = get_winner([ player.cards for ix, player in enumerate(self.players) if potential_winners[ix] ], self.table_cards) winner_ix = potential_winner_idx[remaining_player_winner_ix] log.info(f"Player {winner_ix} won: {winning_card_type}") return winner_ix def _award_winner(self, winner_ix): """Hand the pot to the winner and handle side pots""" max_win_per_player_for_winner = self.player_max_win[winner_ix] total_winnings = sum( np.minimum(max_win_per_player_for_winner, self.player_max_win)) remains = np.maximum(0, np.array(self.player_max_win) - max_win_per_player_for_winner) # to be returned self.players[winner_ix].stack += total_winnings self.winner_ix = winner_ix if total_winnings < sum(self.player_max_win): log.info("Returning side pots") for i, player in enumerate(self.players): player.stack += remains[i] def _next_dealer(self): self.dealer_pos = self.player_cycle.next_dealer().seat def _next_player(self): """Move to the next player""" self.current_player = self.player_cycle.next_player() if not self.current_player: if sum(self.player_cycle.alive) < 2: log.info("Only one player remaining in round") self.stage = Stage.END_HIDDEN else: log.info("End round - no current player returned") self._end_round() self._initiate_round() elif self.current_player == 'max_steps_total' or self.current_player == 'max_steps_after_raiser': log.debug(self.current_player) log.info("End of round ") self._end_round() return def _get_legal_moves(self): """Determine what moves are allowed in the current state""" self.action_space = [] if self.current_round_pot == 0: self.action_space.append(Action.CHECK) else: self.action_space.append(Action.CALL) self.action_space.append(Action.FOLD) if self.player_cycle.is_raising_allowed(): if self.current_player.stack >= 3 * self.big_blind >= self.min_call: self.action_space.append(Action.RAISE_3BB) if self.current_player.stack >= ( (self.community_pot + self.current_round_pot) / 2) >= self.min_call: self.action_space.append(Action.RAISE_HALF_POT) if self.current_player.stack >= ( self.community_pot + self.current_round_pot) >= self.min_call: self.action_space.append(Action.RAISE_POT) if self.current_player.stack >= ( (self.community_pot + self.current_round_pot) * 2) >= self.min_call: self.action_space.append(Action.RAISE_2POT) if self.current_player.stack > 0: self.action_space.append(Action.ALL_IN) log.debug( f"Community+current round pot pot: {self.community_pot + self.current_round_pot}" ) def _close_round(self): """put player_pots into community pots""" self.community_pot += sum(self.player_pots) self.player_pots = [0] * len(self.players) self.played_in_round = 0 def _create_card_deck(self): values = "23456789TJQKA" suites = "CDHS" self.deck = [] # contains cards in the deck _ = [self.deck.append(x + y) for x in values for y in suites] def _distribute_cards(self): log.info(f"Dealer is at position {self.dealer_pos}") for player in self.players: player.cards = [] if player.stack <= 0: continue for _ in range(2): card = np.random.randint(0, len(self.deck)) player.cards.append(self.deck.pop(card)) log.info( f"Player {player.seat} got {player.cards} and ${player.stack}") def _distribute_cards_to_table(self, amount_of_cards): for _ in range(amount_of_cards): card = np.random.randint(0, len(self.deck)) self.table_cards.append(self.deck.pop(card)) log.info(f"Cards on table: {self.table_cards}") def render(self, mode='human'): """Render the current state""" screen_width = 600 screen_height = 400 table_radius = 200 face_radius = 10 if self.viewer is None: self.viewer = PygletWindow(screen_width + 50, screen_height + 50) self.viewer.reset() self.viewer.circle(screen_width / 2, screen_height / 2, table_radius, color=BLUE, thickness=0) for i in range(len(self.players)): degrees = i * (360 / len(self.players)) radian = (degrees * (np.pi / 180)) x = (face_radius + table_radius) * np.cos(radian) + screen_width / 2 y = (face_radius + table_radius) * np.sin(radian) + screen_height / 2 if self.player_cycle.alive[i]: color = GREEN else: color = RED self.viewer.circle(x, y, face_radius, color=color, thickness=2) try: if i == self.current_player.seat: self.viewer.rectangle(x - 60, y, 150, -50, (255, 0, 0, 10)) except AttributeError: pass self.viewer.text(f"{self.players[i].name}", x - 60, y - 15, font_size=10, color=WHITE) self.viewer.text( f"Player {self.players[i].seat}: {self.players[i].cards}", x - 60, y, font_size=10, color=WHITE) equity_alive = int(round( float(self.players[i].equity_alive) * 100)) self.viewer.text(f"${self.players[i].stack} (EQ: {equity_alive}%)", x - 60, y + 15, font_size=10, color=WHITE) try: self.viewer.text(self.players[i].last_action_in_stage, x - 60, y + 30, font_size=10, color=WHITE) except IndexError: pass x_inner = (-face_radius + table_radius - 60) * np.cos(radian) + screen_width / 2 y_inner = (-face_radius + table_radius - 60) * np.sin(radian) + screen_height / 2 self.viewer.text(f"${self.player_pots[i]}", x_inner, y_inner, font_size=10, color=WHITE) self.viewer.text(f"{self.table_cards}", screen_width / 2 - 40, screen_height / 2, font_size=10, color=WHITE) self.viewer.text(f"${self.community_pot}", screen_width / 2 - 15, screen_height / 2 + 30, font_size=10, color=WHITE) self.viewer.text(f"${self.current_round_pot}", screen_width / 2 - 15, screen_height / 2 + 50, font_size=10, color=WHITE) x_button = (-face_radius + table_radius - 20) * np.cos(radian) + screen_width / 2 y_button = (-face_radius + table_radius - 20) * np.sin(radian) + screen_height / 2 try: if i == self.player_cycle.dealer_idx: self.viewer.circle(x_button, y_button, 5, color=BLUE, thickness=2) except AttributeError: pass self.viewer.update()