def _prepare_table(self, *players): """ create new table and new hand, add players """ self.evaluator = deuces.Evaluator() self._table = Table() self.hand = Hand() deck = deuces.Deck() self.board = deck.draw(5) for player in players: player.join_table(self._table, Table.BUY_IN) player.activate() player.set_pocket(*deck.draw(2)) player.set_evaluator(self.evaluator) player.bot._rank = None self.hand.players = self._table.players self._table.prep(self.hand)
class Trainer(object): """ Trainer objects trains the bot through making the bot play against itself and use regret minimization algorithm to adjust the mixed strategy to reach a Nash Equilibrium. Acts as a dealer, redesigned game logic to fit heads up games. ==================== ===================================================== Attribute Description ==================== ===================================================== DATA: history list; stores training history (on going) FUNCTIONS: train() train the bot and adjust its strategy headsup_simulate() simulate heads up game ==================== ==================================================== """ def __init__(self): ''' Constructor ''' self.history = [] self._table = None self.evaluator = deuces.Evaluator() _ACTIONS = ["CALL", "CHECK", "FOLD", "RAISE", "BET"] _SAMPLE_ITER = 100 _TRAIN_ITER = 10 def train(self, player): # create new a new bot shares the same strategy player2 = Player(player.name + "COPY") player2.bot = EasyBot(player.bot.name + "_COPY") player2.bot.strategy = player.bot.strategy for _ in range(self._TRAIN_ITER): self.headsup_simulate(player, player2) def headsup_simulate(self, *players): if len(players) != 2: # should only be 2 player2 game raise HeardsUpError() self._prepare_table(*players) # get expected value for both players using current strategy evs = self._get_expected_values(players[0], players[1]) for n in range(len(players)): player = players[n]; for _iter in range(self._SAMPLE_ITER): for key, value in player.bot.strategy.items(): regrets = [] for i in range(len(value)): # change the strategy new_value = [0]*len(value) new_value[i] = 1 player.bot.strategy[key] = tuple(new_value) # compare the new ev and update the regrets new_evs = self._get_expected_values(players[0], players[1]) regrets.append(new_evs[n]-evs[n]) player.bot.strategy[key] = value # sum up regrets player.bot.strategy.regrets[key] = tuple(np.add(player.bot.strategy.regrets[key], regrets)) player.bot.strategy.refine(); def _prepare_table(self, *players): """ create new table and new hand, add players """ self.evaluator = deuces.Evaluator() self._table = Table() self.hand = Hand() deck = deuces.Deck() self.board = deck.draw(5) for player in players: player.join_table(self._table, Table.BUY_IN) player.activate() player.set_pocket(*deck.draw(2)) player.set_evaluator(self.evaluator) player.bot._rank = None self.hand.players = self._table.players self._table.prep(self.hand) def _get_expected_values(self, player1, player2): """ sample the game and get expected value for both players """ evs = (0,0) for _iter in range(self._SAMPLE_ITER): self._play_game() evs = np.add(evs, self._refound(player1, player2)) return [v/self._SAMPLE_ITER for v in evs] def _play_game(self): """ """ self._reset_game() self._run_preflop() for street in self.hand.streets: self._reset_hand(street.name) self._reset_actions() self._run_betting() def _run_preflop(self): self._reset_hand("PRE_FLOP") # post blinds for player in self._table.players: player.bet(Table.BLINDS[1]) player.FOLD = False self._reset_actions() self._run_betting() def _run_betting(self, display=False): i = 0 while self._need_action(): player = self._table.players[i] context = self.hand.get_profile() context["legal_actions"] = self._get_legal_action(player) action = player.bot.get_action(context) self._process_action(player, action) if display: print(action) i = 1 - i def _refound(self, player1, player2): """ return the refound for both players after game ends """ effective_stack = min(player1.bet_amount, player2.bet_amount) if player1.FOLD: player1.ev = -effective_stack elif player2.FOLD: player1.ev = effective_stack else: for player in self._table.players: rank = self.evaluator.evaluate(player.pocket, self.hand.board) player.rank = rank if player1.rank == player2.rank: player1.ev = 0 elif player1.rank < player2.rank: player1.ev = effective_stack else: player1.ev = -effective_stack player2.ev = -player1.ev return (player1.ev, player2.ev) def _get_legal_action(self, player): legal_actions = dict() legal_actions["FOLD"] = api.LegalFold() cur_bet = self.hand.cur_bet if cur_bet: call = api.LegalCall() call["amount"] = min(cur_bet, player.stack) legal_actions["CALL"] = call raise_ = api.LegalRaise() raise_["min"] = cur_bet + max(cur_bet, Table.BLINDS[1]) raise_["max"] = player.stack legal_actions["RAISE"] = raise_ else: legal_actions["CHECK"] = api.LegalCheck() new_bet = api.LegalBet() new_bet["min"] = min(player.stack, Table.BLINDS[1]) new_bet["max"] = player.stack legal_actions["BET"] = new_bet return legal_actions def _reset_game(self): for player in self._table.players: player.stack = Table.BUY_IN player.bet_amount = 0 self.hand._flop.cards = [] self.hand._turn.cards = [] self.hand._river.cards = [] self.hand.pot = 0 def _reset_hand(self, street): self.hand.street = street self.hand.cur_bet = 0 if street is "PRE_FLOP": return elif street is "FLOP": self.hand._flop.cards.extend(self.board[0:3]) elif street is "TURN": self.hand._turn.cards.extend(self.board[3:4]) else: self.hand._river.cards.extend(self.board[4:5]) def _reset_actions(self): for player in self._table.players: player.CALL = False player.CHECK = False def _need_action(self): """ check if all call or fold except the last one who bet for 2 player game, anyone who fold or call will end the street """ checked = True for player in self._table.players: if player.CALL | player.FOLD: return False checked &= player.CHECK return not checked def _process_action(self, player, action): setattr(player, action["type"], True) if action["amount"]: player.bet(action["amount"]) self.hand.cur_bet = action["amount"] elif action["type"] == "CHECK": player.bet(0) self.hand.cur_bet = 0 elif action["type"] == "FOLD": player.fold() self.hand.cur_bet = -1