def __init__(self, rules, setup, logger, players): self.rules = rules self.setup = setup self.logger = logger self.pov = players[0] self.opponent = players[1] self.available_bets = AvailableBets(setup)
def __init__(self, rules, setup, abstracted=False): self.rules = rules self.setup = setup self.available_bets = AvailableBets(setup) self.PLAYERS = [1, 2] self.infoset_strategy_map = {} self.abstracted = abstracted
class Contest: def __init__(self, rules, setup, logger, players): self.rules = rules self.setup = setup self.logger = logger self.pov = players[0] self.opponent = players[1] self.available_bets = AvailableBets(setup) def play(self): self.pov.new_game() self.opponent.new_game() pov_seat = random.choice([1, 2]) opponent_seat = 3 - pov_seat self.pov.take_seat(pov_seat == 2) self.opponent.take_seat(opponent_seat == 2) deal = self.rules.deal() if pov_seat == 2: self.pov.receive_cards(deal.small) self.opponent.receive_cards(deal.big) else: self.pov.receive_cards(deal.big) self.opponent.receive_cards(deal.small) round = 0 gs= State(rules=self.rules, setup=self.setup, deal=deal) self.logger.round(gs, pov_seat) while not gs.is_terminal(): turn = gs.get_players_turn() player = self.pov if turn == pov_seat else self.opponent other = self.opponent if turn == pov_seat else self.pov bet = player.bet(self.available_bets.get_bets_by_action_type(gs._my_contrib(turn), gs._other_contrib(turn)), self.available_bets.get_bets_as_numbers(gs._my_contrib(turn), gs._other_contrib(turn))) other.opponent_bets(bet) gs.update(bet) self.logger.bet(gs, player, pov_seat, bet) if (not gs.is_terminal()) and gs.round > round: # print round and new cards self.pov.advance_round(deal.board[round]) self.opponent.advance_round(deal.board[round]) self.logger.round(gs, pov_seat) round = gs.round # after end util = gs.get_utility(pov_seat) self.logger.evaluate(gs, pov_seat) self.logger.earnings(util) return util
def __init__(self, rules, setup, blueprint, abstracting=False): self.rules = rules self.setup = setup self.available_bets = AvailableBets(setup) self.is_small_blind = None # self.train(10000) self.abstracting = abstracting # self.strategy_map = pickle.load(open('strategy-smol.pkl', 'rb')) self.strategy_map = load(blueprint) self.surplus = 0 print("Strategy map loaded")
class ESMCCFR_P: def __init__(self, rules, setup, abstracted=False): self.rules = rules self.setup = setup self.available_bets = AvailableBets(setup) self.PLAYERS = [1, 2] self.infoset_strategy_map = {} self.abstracted = abstracted def get_random_bet(self, player_strategy): # return the index of the bet the strategy chooses to take return random.choices(list(range(len(player_strategy))), weights=player_strategy, k=1)[0] def new_game(self): return State(rules=self.rules, setup=self.setup, deal=self.rules.deal()) def run(self, T): utility = 0 start = timeit.default_timer() # pool = Pool(processes=multiprocessing.cpu_count()) printProgressBar(0, T) # conduct external-sampling Monte Carlo Counterfactual Regret for t in range(T): for player in self.PLAYERS: utility += self.traverse_ESMCCFR(self.new_game(), player) printProgressBar(t + 1, T) stop = timeit.default_timer() print("Time elapsed: %.2f" % (stop - start, )) print("Average game value: %.4f" % (utility / T, )) save('strategy-leduc-10-1-1.csv', self.infoset_strategy_map) return self.infoset_strategy_map def traverse_ESMCCFR(self, state, player): if state.is_terminal(): return state.get_utility(player) #default to chance player other_player = 3 - player player_turn = state.get_players_turn() possible_bets = self.available_bets.get_bets_as_numbers( state._my_contrib(player_turn), state._other_contrib(player_turn), self.abstracted) # Determine the strategy at this infoset infoset = state.get_infoset(player_turn) if infoset in self.infoset_strategy_map.keys(): strategy = self.infoset_strategy_map[infoset] else: strategy = Strategy(len(possible_bets)) self.infoset_strategy_map[infoset] = strategy player_strategy = strategy.calculate_strategy() if player_turn == player: # initialize expected value # value of a node h is the value player i expects to achieve if all players play according to given strategy, having reached h value = 0 value_bet = [0] * len(player_strategy) for bet_index, bet in enumerate(possible_bets): # need to define adding an bet to a bets, make bet class memento = state.update(bet) # Traverse each bet (per iteration of loop) (each bet changes the bets) va = self.traverse_ESMCCFR(state, player) state.reverse_update(memento) value_bet[bet_index] = va # Update the expected value value += player_strategy[bet_index] * va for bet_index in range(len(possible_bets)): # Update the cumulative regret of each bet strategy.regret_sum[bet_index] += value_bet[bet_index] - value return value elif player_turn == other_player: # Sample one bet and increment bet counter bet_index = self.get_random_bet(player_strategy) bet = possible_bets[bet_index] strategy.count[bet_index] += 1 memento = state.update(bet) val = self.traverse_ESMCCFR(state, player) state.reverse_update(memento) return val else: raise Exception('How did we get here? There are no other players')
from AvailableBets import AvailableBets from Setup import Setup # throw error when it's not my turn available_bets = AvailableBets(Setup(small_blind=2, big_blind=5, stack_size=11)) exception_thrown = False try: available_bets.get_bets_as_numbers(6, 5) except: exception_thrown = True assert exception_thrown exception_thrown = False try: available_bets.get_bets_by_action_type(6, 5) except: exception_thrown = True assert exception_thrown # throw error when I'm out of chips available_bets = AvailableBets(Setup(small_blind=2, big_blind=5, stack_size=11)) exception_thrown = False try: available_bets.get_bets_as_numbers(8, 7) except: exception_thrown = True assert exception_thrown
class ESMCCFRPlusTraining: def __init__(self, rules, setup, blueprint, abstracting=False): self.rules = rules self.setup = setup self.available_bets = AvailableBets(setup) self.is_small_blind = None # self.train(10000) self.abstracting = abstracting # self.strategy_map = pickle.load(open('strategy-smol.pkl', 'rb')) self.strategy_map = load(blueprint) self.surplus = 0 print("Strategy map loaded") def _my_seat(self): return 2 if self.is_small_blind else 1 def _opponent_seat(self): return 3 - self._my_seat() def _my_contrib(self): return self.state._my_contrib(self._my_seat()) def _opponent_contrib(self): return self.state._other_contrib(self._my_seat()) def new_game(self): self.state = State(self.rules, self.setup, Deal(rules=self.rules, big=[], small=[], board=[])) self.surplus = 0 def take_seat(self, is_small_blind): self.is_small_blind = is_small_blind def receive_cards(self, cards): assert self.is_small_blind != None if self.is_small_blind: self.state.deal.small = cards else: self.state.deal.big = cards #actions params for easy integration with Contest.py def bet(self, actions_by_type=None, actions_by_numbers=None, state=None): if self.state.player_turn != self._my_seat(): raise Exception('Player turn %d is wrong. I am %d.' % (self.state.player_turn, self._my_seat())) bet = self._select_bet() self.state.update(bet) return bet def _select_bet(self): infoset = self.state.get_infoset() if infoset in self.strategy_map.keys(): bets = self.available_bets.get_bets_as_numbers( self._my_contrib(), self._opponent_contrib(), self.abstracting) strategy = self.strategy_map[infoset] print(" |", infoset) print(" |", bets) print( " |", self.available_bets.get_bets_by_action_type( self.state._my_contrib(self.state.player_turn), self.state._other_contrib(self.state.player_turn))) print(" |", strategy.get_average_strategy()) player_strategy = strategy.get_average_strategy() return bets[random.choices(list(range(len(player_strategy))), weights=player_strategy, k=1)[0]] print("Notice: infoset %s not found; checking/calling" % str(infoset)) actions = self.available_bets.get_bets_by_action_type( self.state._my_contrib(self.state.player_turn), self.state._other_contrib(self.state.player_turn), self.abstracting) return actions['call'][0] if 'call' in actions else actions['check'][0] def advance_round(self, cards): self.state.deal.board.append(cards) if self.state.round != len(self.state.deal.board): raise Exception( 'Wrong get_bets_as_numbers of rounds, deal: %s, round: %d' % (str(self.state.deal), self.state.round)) def opponent_bets(self, bet): if self.state.player_turn != self._opponent_seat(): raise Exception('Player turn %d is wrong. They are %d.' % (self.state.player_turn, self._opponent_seat())) if self.abstracting: opponent_bets_were = self.available_bets.get_bets_by_action_type( self._my_contrib(), self._opponent_contrib(), False) if bet in opponent_bets_were['raises']: bet = self._round_opponent_raise(bet, opponent_bets_were) self.state.update(bet) def train(self, T=2000): esmccfr = ESMCCFR_P(self.rules, self.setup) self.strategy_map = esmccfr.run(T) return self def _round_opponent_raise(self, bet, bets_were): assert bet in bets_were['raises'] # If the bet is already even, return it without rounding if bet % 2 == 0: return bet # if decrementing the bet is not a raise, increment it, possibly going # all in if not (bet - 1 in bets_were['raises']): surplus = surplus + 1 return bet + 1 # if decrementing the bet is a raise and incrementing is all in, raise if bet - 1 in bets_were[ 'raises'] and not bet + 1 in bets_were['raises']: surplus = surplus - 1 return bet - 1 # if neither increment or decrement is a raise, prefer to round in the # direction that evens out the pot total overall if surplus == 0: surplus = surplus + 1 return bet + 1 elif surplus > 0: surplus = surplus - 1 return bet - 1 else: surplus = surplus + 1 return bet + 1 def __str__(self): return "EsmccfrBot\t"