def init_game(self): ''' Initialilze the game of Limit Texas Hold'em This version supports two-player limit texas hold'em Returns: (tuple): Tuple containing: (dict): The first state of the game (int): Current player's id ''' # Initilize a dealer that can deal cards self.dealer = Dealer(self.np_random) # Initilize players to play the game self.players = [ Player(i, self.init_chips[i], self.np_random) for i in range(self.num_players) ] # Initialize a judger class which will decide who wins in the end self.judger = Judger(self.np_random) # Deal cards to each player to prepare for the first round for i in range(2 * self.num_players): self.players[i % self.num_players].hand.append( self.dealer.deal_card()) # Initilize public cards self.public_cards = [] self.stage = Stage.PREFLOP # Randomly choose a big blind and a small blind s = self.np_random.randint(0, self.num_players) b = (s + 1) % self.num_players self.players[b].bet(chips=self.big_blind) self.players[s].bet(chips=self.small_blind) # The player next to the small blind plays the first self.game_pointer = (b + 1) % self.num_players # Initilize a bidding round, in the first round, the big blind and the small blind needs to # be passed to the round for processing. self.round = Round(self.num_players, self.big_blind, dealer=self.dealer, np_random=self.np_random) self.round.start_new_round(game_pointer=self.game_pointer, raised=[p.in_chips for p in self.players]) # Count the round. There are 4 rounds in each game. self.round_counter = 0 # Save the hisory for stepping back to the last state. self.history = [] state = self.get_state(self.game_pointer) return state, self.game_pointer
class NolimitholdemGame(Game): def __init__(self, allow_step_back=False, num_players=2): ''' Initialize the class nolimitholdem Game ''' self.allow_step_back = allow_step_back self.np_random = np.random.RandomState() # small blind and big blind self.small_blind = 1 self.big_blind = 2 * self.small_blind # config players self.num_players = num_players self.init_chips = [100] * num_players # If None, the dealer will be randomly chosen self.dealer_id = None def configure(self, game_config): ''' Specifiy some game specific parameters, such as player number, initial chips, and dealer id. If dealer_id is None, he will be randomly chosen ''' self.num_players = game_config['game_player_num'] self.init_chips = game_config['chips_for_each'] self.dealer_id = game_config['dealer_id'] def init_game(self): ''' Initialilze the game of Limit Texas Hold'em This version supports two-player limit texas hold'em Returns: (tuple): Tuple containing: (dict): The first state of the game (int): Current player's id ''' if self.dealer_id is None: self.dealer_id = self.np_random.randint(0, self.num_players) # Initilize a dealer that can deal cards self.dealer = Dealer(self.np_random) # Initilize players to play the game self.players = [ Player(i, self.init_chips[i], self.np_random) for i in range(self.num_players) ] # Initialize a judger class which will decide who wins in the end self.judger = Judger(self.np_random) # Deal cards to each player to prepare for the first round for i in range(2 * self.num_players): self.players[i % self.num_players].hand.append( self.dealer.deal_card()) # Initilize public cards self.public_cards = [] self.stage = Stage.PREFLOP # Big blind and small blind s = (self.dealer_id + 1) % self.num_players b = (self.dealer_id + 2) % self.num_players self.players[b].bet(chips=self.big_blind) self.players[s].bet(chips=self.small_blind) # The player next to the small blind plays the first self.game_pointer = (b + 1) % self.num_players # Initilize a bidding round, in the first round, the big blind and the small blind needs to # be passed to the round for processing. self.round = Round(self.num_players, self.big_blind, dealer=self.dealer, np_random=self.np_random) self.round.start_new_round(game_pointer=self.game_pointer, raised=[p.in_chips for p in self.players]) # Count the round. There are 4 rounds in each game. self.round_counter = 0 # Save the hisory for stepping back to the last state. self.history = [] state = self.get_state(self.game_pointer) return state, self.game_pointer def get_legal_actions(self): ''' Return the legal actions for current player Returns: (list): A list of legal actions ''' return self.round.get_nolimit_legal_actions(players=self.players) def step(self, action): ''' Get the next state Args: action (str): a specific action. (call, raise, fold, or check) Returns: (tuple): Tuple containing: (dict): next player's state (int): next plater's id ''' if action not in self.get_legal_actions(): print(action, self.get_legal_actions()) print(self.get_state(self.game_pointer)) raise Exception('Action not allowed') if self.allow_step_back: # First snapshot the current state r = deepcopy(self.round) b = self.game_pointer r_c = self.round_counter d = deepcopy(self.dealer) p = deepcopy(self.public_cards) ps = deepcopy(self.players) self.history.append((r, b, r_c, d, p, ps)) # Then we proceed to the next round self.game_pointer = self.round.proceed_round(self.players, action) players_in_bypass = [ 1 if player.status in (PlayerStatus.FOLDED, PlayerStatus.ALLIN) else 0 for player in self.players ] if self.num_players - sum(players_in_bypass) == 1: last_player = players_in_bypass.index(0) if self.round.raised[last_player] >= max(self.round.raised): # If the last player has put enough chips, he is also bypassed players_in_bypass[last_player] = 1 # If a round is over, we deal more public cards if self.round.is_over(): # Game pointer goes to the first player not in bypass after the dealer, if there is one self.game_pointer = (self.dealer_id + 1) % self.num_players if sum(players_in_bypass) < self.num_players: while players_in_bypass[self.game_pointer]: self.game_pointer = (self.game_pointer + 1) % self.num_players # For the first round, we deal 3 cards if self.round_counter == 0: self.stage = Stage.FLOP self.public_cards.append(self.dealer.deal_card()) self.public_cards.append(self.dealer.deal_card()) self.public_cards.append(self.dealer.deal_card()) if len(self.players) == np.sum(players_in_bypass): self.round_counter += 1 # For the following rounds, we deal only 1 card if self.round_counter == 1: self.stage = Stage.TURN self.public_cards.append(self.dealer.deal_card()) if len(self.players) == np.sum(players_in_bypass): self.round_counter += 1 if self.round_counter == 2: self.stage = Stage.RIVER self.public_cards.append(self.dealer.deal_card()) if len(self.players) == np.sum(players_in_bypass): self.round_counter += 1 self.round_counter += 1 self.round.start_new_round(self.game_pointer) state = self.get_state(self.game_pointer) return state, self.game_pointer def get_state(self, player_id): ''' Return player's state Args: player_id (int): player id Returns: (dict): The state of the player ''' self.dealer.pot = np.sum([player.in_chips for player in self.players]) chips = [self.players[i].in_chips for i in range(self.num_players)] legal_actions = self.get_legal_actions() state = self.players[player_id].get_state(self.public_cards, chips, legal_actions) state['stakes'] = [ self.players[i].remained_chips for i in range(self.num_players) ] state['current_player'] = self.game_pointer state['pot'] = self.dealer.pot state['stage'] = self.stage return state def step_back(self): ''' Return to the previous state of the game Returns: (bool): True if the game steps back successfully ''' if len(self.history) > 0: self.round, self.game_pointer, self.round_counter, self.dealer, self.public_cards, self.players = self.history.pop( ) return True return False def get_player_num(self): ''' Return the number of players in No Limit Texas Hold'em Returns: (int): The number of players in the game ''' return self.num_players def get_payoffs(self): ''' Return the payoffs of the game Returns: (list): Each entry corresponds to the payoff of one player ''' hands = [ p.hand + self.public_cards if p.status in (PlayerStatus.ALIVE, PlayerStatus.ALLIN) else None for p in self.players ] chips_payoffs = self.judger.judge_game(self.players, hands) return chips_payoffs @staticmethod def get_action_num(): ''' Return the number of applicable actions Returns: (int): The number of actions. There are 6 actions (call, raise_half_pot, raise_pot, all_in, check and fold) ''' return len(Action)
class NolimitholdemGame(Game): def __init__(self, allow_step_back=False, num_players=2): ''' Initialize the class nolimitholdem Game ''' self.allow_step_back = allow_step_back self.np_random = np.random.RandomState() # small blind and big blind self.small_blind = 1 self.big_blind = 2 * self.small_blind # config players self.num_players = num_players self.init_chips = 100 def init_game(self): ''' Initialilze the game of Limit Texas Hold'em This version supports two-player limit texas hold'em Returns: (tuple): Tuple containing: (dict): The first state of the game (int): Current player's id ''' # Initilize a dealer that can deal cards self.dealer = Dealer(self.np_random) # Initilize two players to play the game self.players = [ Player(i, self.init_chips, self.np_random) for i in range(self.num_players) ] # Initialize a judger class which will decide who wins in the end self.judger = Judger(self.np_random) # Deal cards to each player to prepare for the first round for i in range(2 * self.num_players): self.players[i % self.num_players].hand.append( self.dealer.deal_card()) # Initilize public cards self.public_cards = [] self.stage = Stage.PREFLOP # Randomly choose a big blind and a small blind s = self.np_random.randint(0, self.num_players) b = (s + 1) % self.num_players self.players[b].bet(chips=self.big_blind) self.players[s].bet(chips=self.small_blind) # The player next to the small blind plays the first self.game_pointer = (b + 1) % self.num_players # Initilize a bidding round, in the first round, the big blind and the small blind needs to # be passed to the round for processing. self.round = Round(self.num_players, self.big_blind, dealer=self.dealer, np_random=self.np_random) self.round.start_new_round(game_pointer=self.game_pointer, raised=[p.in_chips for p in self.players]) # Count the round. There are 4 rounds in each game. self.round_counter = 0 # Save the hisory for stepping back to the last state. self.history = [] self.action_history = [] state = self.get_state(self.game_pointer) return state, self.game_pointer def get_legal_actions(self): ''' Return the legal actions for current player Returns: (list): A list of legal actions ''' return self.round.get_nolimit_legal_actions(players=self.players) def step(self, action): ''' Get the next state Args: action (str): a specific action. (call, raise, fold, or check) Returns: (tuple): Tuple containing: (dict): next player's state (int): next plater's id ''' if action not in self.get_legal_actions(): print(action, self.get_legal_actions()) print(self.get_state(self.game_pointer)) raise Exception('Action not allowed') if self.allow_step_back: # First snapshot the current state r = deepcopy(self.round) b = self.game_pointer r_c = self.round_counter d = deepcopy(self.dealer) p = deepcopy(self.public_cards) ps = deepcopy(self.players) ac = deepcopy(self.action_history) self.history.append((r, b, r_c, d, p, ps, ac)) # Then we proceed to the next round self.action_history.append( [self.game_pointer, self.round_counter, action]) self.game_pointer = self.round.proceed_round(self.players, action) players_in_bypass = [ 1 if player.status in (PlayerStatus.FOLDED, PlayerStatus.ALLIN) else 0 for player in self.players ] # If a round is over, we deal more public cards if self.round.is_over(): # For the first round, we deal 3 cards if self.round_counter == 0: self.stage = Stage.FLOP self.public_cards.append(self.dealer.deal_card()) self.public_cards.append(self.dealer.deal_card()) self.public_cards.append(self.dealer.deal_card()) if len(self.players) == np.sum(players_in_bypass): self.round_counter += 1 self.stage = Stage.TURN self.public_cards.append(self.dealer.deal_card()) self.round_counter += 1 self.stage = Stage.RIVER self.public_cards.append(self.dealer.deal_card()) self.round_counter += 1 # For the following rounds, we deal only 1 card elif self.round_counter == 1: self.stage = Stage.TURN self.public_cards.append(self.dealer.deal_card()) if len(self.players) == np.sum(players_in_bypass): self.round_counter += 1 self.stage = Stage.RIVER self.public_cards.append(self.dealer.deal_card()) self.round_counter += 1 elif self.round_counter == 2: self.stage = Stage.RIVER self.public_cards.append(self.dealer.deal_card()) if len(self.players) == np.sum(players_in_bypass): self.round_counter += 1 self.round_counter += 1 self.round.start_new_round(self.game_pointer) state = self.get_state(self.game_pointer) return state, self.game_pointer def get_state(self, player_id): ''' Return player's state Args: player_id (int): player id Returns: (dict): The state of the player ''' self.dealer.pot = np.sum([player.in_chips for player in self.players]) chips = [self.players[i].in_chips for i in range(self.num_players)] legal_actions = self.get_legal_actions() state = self.players[player_id].get_state(self.public_cards, chips, legal_actions) state['to_call'] = min( max(chips) - self.players[player_id].in_chips, self.players[player_id].remained_chips ) / self.dealer.pot # New - %pot to call state['to_allin'] = self.players[ player_id].remained_chips / self.dealer.pot # New - %pot to all-in. Not strictly true, since you might have the most chips. But fixable. players_still_in = [] state['n_others'] = -1 # New - number of others that haven't folded. for i, p in enumerate(self.players): if p.status in (PlayerStatus.ALIVE, PlayerStatus.ALLIN): state['n_others'] += 1 players_still_in.append(i) this_player_i = self.players.index(self.players[player_id]) state['already_called'] = 0 for p in self.players: if p.in_chips == max(chips): state['already_called'] += 1 people_before = 0 state['action_history'] = self.action_history for action in state['action_history']: if action[0] in players_still_in: people_before += 1 if action[0] == player_id: break state['position'] = people_before / len(players_still_in) state['past_aggression'] = 0 state['street_aggression'] = 0 for action in state['action_history']: if action[0] in players_still_in and action[0] != player_id: if action[2] == Action.RAISE_HALF_POT: aggro = 0.5 elif action[2] == Action.RAISE_POT: aggro = 1 elif action[2] == Action.ALL_IN: aggro = 2.5 else: aggro = 0 if action[1] != self.round_counter: state['past_aggression'] += aggro else: state['street_aggression'] += aggro state['need_to_call'] = (state['n_others'] + 1) - state['already_called'] state['stakes'] = [ self.players[i].remained_chips / self.dealer.pot for i in range(self.num_players) ] # Edited: normalized to pot state['current_player'] = self.game_pointer state['pot'] = self.dealer.pot state['stage'] = self.stage return state def step_back(self): ''' Return to the previous state of the game Returns: (bool): True if the game steps back successfully ''' if len(self.history) > 0: self.round, self.game_pointer, self.round_counter, self.dealer, self.public_cards, self.players, self.action_history = self.history.pop( ) return True return False def get_payoffs(self): ''' Return the payoffs of the game Returns: (list): Each entry corresponds to the payoff of one player ''' hands = [ p.hand + self.public_cards if p.status in (PlayerStatus.ALIVE, PlayerStatus.ALLIN) else None for p in self.players ] chips_payoffs = self.judger.judge_game(self.players, hands) self.action_history = [] return chips_payoffs @staticmethod def get_action_num(): ''' Return the number of applicable actions Returns: (int): The number of actions. There are 6 actions (call, raise_half_pot, raise_pot, all_in, check and fold) ''' return len(Action)