def get_card_dict(): deck = Deck() deck_list = deck.GetFullDeck() dict = {} for i, value in enumerate(deck_list): dict[value] = i return dict
def index(): evaluator = Evaluator() deck = Deck() card = Card.new('Qh') board = deck.draw(5) player_names = ("player 1", "player 2", "player 3", "player 4", "player 5", "player 6", "player 7", "player 8") players = {} output = {} # this is procedural programming, not functional programming :( for p in player_names: hand = deck.draw(2) score = evaluator.evaluate(board, hand) text = evaluator.class_to_string(evaluator.get_rank_class(score)) players[p] = score output[p] = {'score': score, 'text': text} # What about a tie? tie = (len(players.values()) == len(set(players.values()))) winner = min( players, key=players.get) # always 1 result :( Don't forget to fix the TEST! # does the tie involve the winning hand though? # TODO https://stackoverflow.com/questions/17821079/how-to-check-if-two-keys-in-dictionary-hold-the-same-value output["winners"] = winner output["tie"] = tie output["card"] = Card.int_to_str(card) j = json.dumps(output) return j
def __init__(self, deck: Deck, evaluator: Evaluator): self._deck = deck self._hands = [deck.draw(2), deck.draw(2)] self._board = deck.draw(5) self._hand_bucket_indices = [] for i in range(len(self._hands)): bucket_indices = dict() for key in BOARD_CARDS: if key == Stage.SHOWDOWN: bucket_indices[key] = bucket_indices[Stage.RIVER] continue stage_board = self.board(stage=key) bucket_indices[key] = evaluator.effective_rank( self._hands[i], stage_board, MAX_BUCKETS) self._hand_bucket_indices.append(bucket_indices) self._hand_ranks = [ evaluator.rank(self._hands[SMALL_BLIND], self._board), evaluator.rank(self._hands[BIG_BLIND], self._board), ]
def evaluate(self, hands, boards, threshold): win_rate = super(CardCounting, self).evaluate(hands, boards, False) expect_win_rate = 0 count = 0 deck = Deck() draw_cards = [] for card in deck.draw(52): if card in hands or card in boards: continue boards.append(card) if len(boards) == 5: # Turn new_win_rate = super(CardCounting, self).evaluate(hands, boards, False) if new_win_rate >= threshold and new_win_rate - win_rate > 0.1: draw_cards.append(card) expect_win_rate += new_win_rate count += 1 # print('old:{}'.format(win_rate) + ' new:{}'.format(new_win_rate)) elif len(boards) == 4: # Flop (cards, rate) = self.evaluate(hands, boards, threshold) if rate != 0: expect_win_rate += rate count += 1 for dc in cards: if dc not in draw_cards: draw_cards.append(dc) boards.remove(card) if count != 0: expect_win_rate = expect_win_rate / count return (draw_cards, expect_win_rate)
def __init__(self, ph1, ph2, n=1): # initiate playerHand obvjects self.ph1 = ph1 self.ph2 = ph2 # initiate deck self.deck = Deck() self.n = n # dictionary which maps player (1 or -1) so it's playerHand object self.PLAYERS_HAND_DICT = {1: self.ph1, -1: self.ph2}
def watch(self, observation, info): if info['done'] is True: pass elif info['is_new_round'] is True: deck = Deck() self._available_cards = deck.draw(52) else: played_card = info['action'] self._available_cards.remove(played_card)
def test_card(self): deck = Deck() for i in range(2): self.guest.card_player_set.create(card_str = Card.int_to_str(deck.draw(1))) self.learner.card_player_set.create(card_str = Card.int_to_str(deck.draw(1))) self.assertTrue([len(x.card_str)<3 for x in self.guest.card_player_set.all()]) self.assertTrue([len(x.card_str)<3 for x in self.learner.card_player_set.all()]) for i in range(5): self.game.card_community_set.create(card_str = Card.int_to_str(deck.draw(1))) self.assertTrue([len(x.card_str)<3 for x in self.game.card_player_set.all()])
def __init__(self, n_player=4): self.deck = Deck() self.trumpCard = 0 self.trumps = 0 self.leadSuit = 0 self.n = 52 // n_player - 1 self.playerList = [] self.playerOrder = [] self.playedCards = [] self.leadPlayer = None
def __init__(self): self.actions = ['raise', 'check', 'call', 'fold'] self.anticipatory = 0.1 self.GameState = None # tensor that tracks current state self.current_player = None self.deck = Deck() self.deck_lookup = None self.evaluator = Evaluator() self.S = 1000 # Starting stack self.SB = None self.BB = None self.players = ['_', self.SB, self.BB]
def move(self, observation): if self._current_node is None: new_mcts_info = {} new_mcts_info['my_player_id'] = self._my_player_id new_mcts_info['my_hand_cards'] = observation['hand_cards'].copy() deck = Deck() new_mcts_info['available_cards'] = deck.draw(52) for c in new_mcts_info['my_hand_cards']: new_mcts_info['available_cards'].remove(c) self._current_node = Node( MyHeartState(0, observation, new_mcts_info)) if len(observation['playing_ids']) == 0: return Card.new('2c') best_next_node = self._mcts.UCTSEARCH(self._current_node, observation) return best_next_node.state.get_action_card()
def __init__(self, n_seats, ranking_encoding='norm'): self.n_seats = n_seats self.ranking_encoding = ranking_encoding self.n_dim = 265 + 104 + 6 + n_seats + 6 * n_seats + (7463 if ranking_encoding == 'one-hot' else 1 if ranking_encoding == 'norm' else 0) self._deck = np.array(Deck.GetFullDeck(), dtype=np.int64) self._deck_alt = np.concatenate((np.array([-1], dtype=np.int64), self._deck)) self._evaluator = Evaluator()
def monteCarlo(board, hand, numPlayers, monteN): deck = Deck() evaluator = Evaluator() playerHands = [None]*numPlayers winAmount = 0 board_backup = board.copy() for time in range(int(monteN)): board = board_backup.copy() monteDeck = [card for card in deck.cards if card not in board and card not in hand] for x in range(numPlayers): playerHands[x] = [] for y in range(2): randomIndex = randrange(0, len(monteDeck)) playerHands[x].append(monteDeck[randomIndex]) del monteDeck[randomIndex] while len(board) < 5: randomIndex = randrange(0, len(monteDeck)) board.append(monteDeck[randomIndex]) del monteDeck[randomIndex] win = True handRank = evaluator.evaluate(board, hand) for x in range(numPlayers): otherRank = evaluator.evaluate(board, playerHands[x]) if otherRank < handRank: win = False break if win: winAmount += 1 return winAmount/monteN
def init_game(self, SB, BB, GameState): """ :param SB: :param BB: :param GameState: :return: """ self.GameState = GameState self.SB = SB self.BB = BB self.players = ['_', self.SB, self.BB] # creating mapping of card bit int to int to make # it easier for tracking state deck = Deck.GetFullDeck() deck_dict = {} for i in range(len(deck)): deck_dict[deck[i]] = i self.deck_lookup = deck_dict
def __init__(self, n_seats, ranking_encoding='norm', concat=True, drop_cards=False, split_cards=False): self.n_seats = n_seats self.ranking_encoding = ranking_encoding self._deck = np.array(Deck.GetFullDeck(), dtype=np.int64) self._deck_alt = np.concatenate((np.array([-1], dtype=np.int64), self._deck)) self._evaluator = Evaluator() self.concat = concat
async def set_up_game(web_client, channel_id, plo=False): players = player_list[channel_id] if channel_id not in tab_list: deck = Deck() deck.shuffle() tab = Table() tab_list[channel_id] = {} tab_list[channel_id]["table"] = tab tab_list[channel_id]["deck"] = deck tab = tab_list[channel_id]["table"] deck = tab_list[channel_id]["deck"] deck.shuffle() tab.cards.extend(deck.draw(3)) if plo: print("plos") tab.plo = True for name in players: if plo: name.cards.extend(deck.draw(4)) else: print("nlhe") name.cards.extend(deck.draw(2)) print("got to cards bit") pic = Card.print_pretty_cards(name.cards) await sendslack(pic, web_client, name.name) if len(players) == 2: i = random.randint(1, 2) if i == 1: players += [players.pop(0)] await start_heads_up(web_client, channel_id) if len(players) == 3: await sendslack("Starting new game...", web_client, channel_id) await sendslack("Starting stacks are %d" % newMoney, web_client, channel_id) await sendslack( "Big blind is %d, small blind is %d" % (bigblind, smallblind), web_client, channel_id, ) players[0].dealer = True players[1].bet = smallblind players[1].money = players[1].money - smallblind players[1].tocall = smallblind players[2].bet = bigblind players[2].money = players[2].money - bigblind tab.pot = tab.pot + players[1].bet + players[2].bet tab.highbet = bigblind await sendslack("<@%s> is first to act" % players[0].name, web_client, channel_id) await sendslack("%d to call" % bigblind, web_client, channel_id)
def __init__(self, n_players, agents, seed, stack_low=50, stack_high=200, hand_history_location='hands/', invalid_action_penalty=-5): self.hand_history_location = hand_history_location self.hand_history_enabled = False self.stack_low = stack_low self.stack_high = stack_high self.rng = np.random.default_rng(seed) self.n_players = n_players self.pot = 0 self.bet_to_match = 0 self.minimum_raise = 0 self.street = GameState.PREFLOP self.cards = [] self.deck = Deck() self.players = [Player(n+1, agents[n], 'player_%d' % n, invalid_action_penalty) for n in range(n_players)] self.active_players = n_players self.evaluator = Evaluator() self.history = []
def simulatePokerGeneral(self): playerCards, communityCards, deck = [], [], Deck() while len(playerCards) != 5 or len(communityCards) < 5: card = deck.draw(1) if len(playerCards) != 5: stringCardDrawn = Card.print_pretty_card(card) if self.isTypeOfCard(stringCardDrawn): playerCards.append(card) else: communityCards.append(card) return playerCards, communityCards
def _simulate(self, expanded_card, observation): number_of_players = observation['number_of_players'] number_of_hand_cards_for_all_players = observation[ 'number_of_hand_cards_for_all_players'] current_player_id = observation['current_player_id'] competitor_cards = self._available_cards.copy() hand_cards = observation['hand_cards'].copy() for c in hand_cards: competitor_cards.remove(c) random.shuffle(competitor_cards) # clone the env and players Deck._FULL_DECK = competitor_cards deck = Deck() Deck._FULL_DECK = [] env = hearts_env.HeartsEnv() for player_id in range(0, number_of_players): if player_id == current_player_id: env.add_player(hand_cards=hand_cards, strategy=CompletePlayStrategy( first_action_card=expanded_card)) else: cards = deck.draw( number_of_hand_cards_for_all_players[player_id]) if number_of_hand_cards_for_all_players[player_id] == 1: cards = [cards] env.add_player(hand_cards=cards, strategy=CompletePlayStrategy()) env.copy_observation(observation) env.start() simulated_observation = env.get_observation() # play util finish the round and use complete strategy as default policy is_new_round = False done = False while (not is_new_round) and (not done): action = env.move() simulated_observation, reward, done, info = env.step(action) is_new_round = info['is_new_round'] score = simulated_observation['scores'][current_player_id] return score
def __init__(self, deck: Deck, evaluator: Evaluator): self._deck = deck self._hands = [deck.draw(2), deck.draw(2)] self._board = deck.draw(5) self._hand_bucket_indices = [] for _ in range(len([SMALL_BLIND, BIG_BLIND])): bucket_indices = dict() bucket_indices[Stage.PREFLOP] = random.randint(0, MAX_BUCKETS - 1) prev_stage = Stage.PREFLOP # Apply deviance with normal distribution. for key in [Stage.FLOP, Stage.TURN, Stage.RIVER]: deviance = FakeCardBundle.rand_deviance() bucket_indices[key] = min( max(bucket_indices[prev_stage] + deviance, 0), MAX_BUCKETS - 1) bucket_indices[Stage.SHOWDOWN] = bucket_indices[Stage.RIVER] self._hand_bucket_indices.append(bucket_indices) small_blind_final_bucket = self.bucket_index(SMALL_BLIND, Stage.SHOWDOWN) big_blind_final_bucket = self.bucket_index(BIG_BLIND, Stage.SHOWDOWN) self._hand_ranks = [0, 0] winner = random.randrange(2) if small_blind_final_bucket > big_blind_final_bucket: winner = SMALL_BLIND if small_blind_final_bucket < big_blind_final_bucket: winner = BIG_BLIND self._hand_ranks[winner] += 1
def evaluate(self, hands, num_draw, count): deck = Deck() draw_cards = [] boards = [] win_rate = 0.0 for i in range(0, count): deck.shuffle() j = 0 while j < num_draw: card = deck.draw(1) if card in hands: continue else: boards.append(card) j += 1 win_rate += super(HoleEvaluator, self).evaluate(hands, boards, False) boards.clear() hole_win_rate = win_rate / float(count) print('eval win rate: {:2.2%}'.format(hole_win_rate)) return hole_win_rate
def setUp(self): """Serializer data matches the Company object for each field.""" self.guest = Player_Factory.create() self.learner = Player_Factory.create() self.game = Game_Factory.create(players=( self.guest, self.learner)) # object instance being created in query set form self.game_serializer = GameSerializer( self.game) # object being serialized into json for field_name in ['id', 'total_pot']: self.assertEqual(game_serializer.data[field_name], getattr(game, field_name)) deck = Deck() for i in range(2): self.guest.card_player_set.create( card_str=Card.int_to_str(deck.draw(1))) self.learner.card_player_set.create( card_str=Card.int_to_str(deck.draw(1))) for i in range(5): self.game.card_community_set.create( card_str=Card.int_to_str(deck.draw(1)))
def setup(n, m): assert m > 0 deck = Deck() boards = [] hands = [] for i in range(n): boards.append(deck.draw(m)) hands.append(deck.draw(2)) deck.shuffle() return boards, hands
def reset(self): self.pot = 0 self.street = GameState.PREFLOP self.cards = [] self.deck.cards = Deck.GetFullDeck() self.rng.shuffle(self.deck.cards) self.rng.shuffle(self.players) self.active_players = self.n_players initial_draw = self.deck.draw(self.n_players * 2) for i, player in enumerate(self.players): player.reset() player.position = i player.cards = [initial_draw[i], initial_draw[i+self.n_players]] player.stack = self.rng.integers(self.stack_low, self.stack_high, 1)[0] self.bet_to_match = 0 self.history = []
def __init__(self): # current player index (0 oop, 1 ip) # 2 chance node, 3 terminal node self._current = CHANCE_ID # TODO better intialization self._players = [PlayerState(), PlayerState()] # current number of chips in pot self._pot = 100 # current streets self._street = RIVER # action sequence self._history = "" # create empty deck self._deck = Deck() # legal actions start as cards to draw self._legal_actions = list(range(DECK_SIZE)) # array of ints for now self._board = []
async def set_up_game(web_client, channel_id, plo=False): players = player_list[channel_id] if channel_id not in tab_list: deck = Deck() deck.shuffle() tab = Table() tab_list[channel_id] = {} tab_list[channel_id]["table"] = tab tab_list[channel_id]["deck"] = deck tab = tab_list[channel_id]["table"] deck = tab_list[channel_id]["deck"] deck.shuffle() tab.cards.extend(deck.draw(3)) if plo: print("plos") tab.plo = True for name in players: if plo: name.cards.extend(deck.draw(4)) else: print("nlhe") name.cards.extend(deck.draw(2)) print("got to cards bit") pic = Card.print_pretty_cards(name.cards) await sendslack(pic, web_client, name.name) if len(players) == 2: i = random.randint(1, 2) if i == 1: players += [players.pop(0)] await start_heads_up(web_client, channel_id) if len(players) > 2: random.shuffle(players) tab.origlist = players.copy() await start_game(web_client, channel_id)
def odds(hand, board, num_players): my_hand = [Card.new(hand[0]), Card.new(hand[1])] remove_cards = [Card.new(hand[0]), Card.new(hand[1])] my_board = [] for i in range(len(board)): import sys print(board, file=sys.stderr) try: my_board.append(Card.new(board[i])) remove_cards.append(Card.new(board[i])) except KeyError as e: print("BAD!!!", file=sys.stderr) exit() my_deck = Deck() for i in range(len(remove_cards)): my_deck.cards.remove(remove_cards[i]) my_players = [my_hand] evaluator = Evaluator() count = 0 for b in range(1000): deck = Deck() cards = my_deck.cards.copy() rshuffle(cards) deck.cards = cards players = my_players.copy() for j in range(num_players - 1): players.append(deck.draw(2)) board = my_board.copy() while len(board) < 5: board.append(deck.draw(1)) if evaluator.hand_summary(board, players) == 0: count += 1 return count / 1000
def evaluateCards(boardCards, handCards): # decrypt the two hand cards sent from the client + board cards n = 2 str(boardCards).lower() boardCardsSplit = [(boardCards[i:i + n]) for i in range(0, len(boardCards), n)] str(handCards).lower() handCardsSplit = [(handCards[i:i + n]) for i in range(0, len(handCards), n)] handCardsSplit[0] = handCardsSplit[0][1] + handCardsSplit[0][0] handCardsSplit[1] = handCardsSplit[1][1] + handCardsSplit[1][0] hand = [ Card.new(str(handCardsSplit[0].capitalize())), Card.new(str(handCardsSplit[1].capitalize())) ] board = [] i = 0 if len(list(boardCardsSplit)) == 3: board = [ Card.new(str(boardCardsSplit[0].capitalize())), Card.new(str(boardCardsSplit[1].capitalize())), Card.new(str(boardCardsSplit[2].capitalize())) ] else: if len(list(boardCardsSplit)) == 4: board = [ Card.new(str(boardCardsSplit[0].capitalize())), Card.new(str(boardCardsSplit[1].capitalize())), Card.new(str(boardCardsSplit[2].capitalize())), Card.new(str(boardCardsSplit[3].capitalize())) ] else: if len(list(boardCardsSplit)) == 5: board = [ Card.new(str(boardCardsSplit[0].capitalize())), Card.new(str(boardCardsSplit[1].capitalize())), Card.new(str(boardCardsSplit[2].capitalize())), Card.new(str(boardCardsSplit[3].capitalize())), Card.new(str(boardCardsSplit[4].capitalize())) ] deck = Deck() print(Card.print_pretty_cards(board + hand)) evaluator = Evaluator() bestScore = evaluator.evaluate(board, hand) handType = evaluator.get_rank_class(bestScore) print("Player 1 hand rank = %d (%s)\n" % (bestScore, evaluator.class_to_string(handType))) if (len(board) == 5): for i in range(len(board) + len(hand)): # Make copy of hand and board tempHand = [] tempBoard = [] for j in range(len(hand)): tempHand.append(hand[j]) for j in range(len(board)): tempBoard.append(board[j]) #First try removing one of the hand cards if (i < 2): tempHand.pop(i) tempHand.append(board[0]) tempBoard.pop(0) #Now we try removing board cards else: tempBoard.pop(i - 2) #Find the score score = evaluator.evaluate(tempBoard, tempHand) #If score is same as before, these cards have the best hand if (score == bestScore): # Make copy of best hand and board best6Hand = [] best6Board = [] for j in range(len(tempHand)): best6Hand.append(tempHand[j]) for j in range(len(tempBoard)): best6Board.append(tempBoard[j]) break else: best6Board = board best6Hand = hand print(Card.print_pretty_cards(best6Board + best6Hand)) if (len(best6Board) == 4 or len(board) == 4): #we repeat the process to have the best 5 cards for i in range(len(best6Board) + len(best6Hand)): #Make copy of hand and board tempHand = [] tempBoard = [] for j in range(len(best6Hand)): tempHand.append(best6Hand[j]) for j in range(len(best6Board)): tempBoard.append(best6Board[j]) if (i < 2): tempHand.pop(i) tempHand.append(best6Board[0]) tempBoard.pop(0) else: tempBoard.pop(i - 2) score = evaluator.evaluate(tempBoard, tempHand) if (score == bestScore): # Make copy of best hand and board best5Hand = [] best5Board = [] for j in range(len(tempHand)): best5Hand.append(tempHand[j]) for j in range(len(tempBoard)): best5Board.append(tempBoard[j]) break else: best5Board = best6Board best5Hand = best6Hand print(Card.print_pretty_cards(best5Board + best5Hand)) card1 = convertCardToString(best5Board.__getitem__(0)) card2 = convertCardToString(best5Board.__getitem__(1)) card3 = convertCardToString(best5Board.__getitem__(2)) card4 = convertCardToString(best5Hand.__getitem__(0)) card5 = convertCardToString(best5Hand.__getitem__(1)) handString = card1 + card2 + card3 + card4 + card5 print("Hand string: " + handString) stringToSend = str(handType) + " " + handString + " " + str(bestScore) print("String to send: " + stringToSend) return stringToSend
pot += MakeDecision(bot1, bot1_prob) pot += MakeDecision(bot2, bot2_prob) print("Pot:" + str(pot)) print() stage += 1 if stage == 4: #Call Or Raise Expected Value-Don't fold print("Post") print("---------------") print() bot1_prob = CalculateWinProb(bot1.hand, bot1.board) bot2_prob = CalculateWinProb(bot2.hand, bot2.board) pot += MakeDecision(bot1, bot1_prob) pot += MakeDecision(bot2, bot2_prob) print("Pot:" + str(pot)) print() print("Showdown") print("---------------") print() GoToShowdown(bot1, bot2) stage += 1 big_blind = 100 pot = 0 player_1 = Bot() player_2 = Bot() evaluator = Evaluator() deck = Deck() play_game(player_1, player_2, pot) print(CalculateWinProb(player_1.hand, player_1.board))
def __init__(self, n_seats, max_limit=100000, debug=False): n_suits = 4 # s,h,d,c n_ranks = 13 # 2,3,4,5,6,7,8,9,T,J,Q,K,A n_community_cards = 5 # flop, turn, river n_pocket_cards = 2 n_stud = 5 self.level_raises = {0:0, 1:0, 2:0} # Assuming 3 players self.n_seats = n_seats self._blind_index = 0 [self._smallblind, self._bigblind] = TexasHoldemEnv.BLIND_INCREMENTS[0] self._deck = Deck() self._evaluator = Evaluator() self.last_seq_move = [] self.filled_seats = 0 self.signal_end_round = False self.winning_players = None self.starting_stack_size = None self.community = [] self._round = 0 self._button = 0 self._discard = [] self.game_resolved = False self.is_new_r = True self._side_pots = [0] * n_seats self._current_sidepot = 0 # index of _side_pots self._totalpot = 0 self._tocall = 0 self._lastraise = 0 self._number_of_hands = 0 self._record_players = [] # fill seats with dummy players self._seats = [Player(i, stack=0, emptyplayer=True) for i in range(n_seats)] self.learner_bot = None self.villain = None self.emptyseats = n_seats self._player_dict = {} self._current_player = None self._debug = debug self._last_player = None self._last_actions = None # (PSEUDOCODE) # MODEL HYPERPARAMETERS: # state_size = [(position, learner.stack, learner.handrank, played_this_round ...[card1, card2]), (pot_total, learner.to_call, opponent.stack, community_cards)] # action_size = env.action_space.n # learning_rate = 0.00025 self.observation_space = spaces.Tuple([ spaces.Tuple([ # players spaces.MultiDiscrete([ max_limit, # stack max_limit, # handrank 1, # playedthisround 1, # is_betting max_limit, # last side pot ]), spaces.Tuple([ spaces.MultiDiscrete([ # card n_suits, # suit, can be negative one if it's not avaiable. n_ranks, # rank, can be negative one if it's not avaiable. ]) ] * n_pocket_cards) ] * 4), spaces.Tuple([ spaces.Discrete(max_limit), # learner position spaces.Discrete(max_limit), # pot amount spaces.Discrete(max_limit), # last raise spaces.Discrete(n_seats - 1), # current player seat location. spaces.Discrete(max_limit), # minimum amount to raise spaces.Discrete(max_limit), # how much needed to call by current player. spaces.Tuple([ spaces.MultiDiscrete([ # card n_suits - 1, # suit n_ranks - 1, # rank 1, # is_flopped ]) ] * n_community_cards) ]) ]) ### MAY NEED TO ALTER FOR HEADS-UP # self.action_space = spaces.Tuple([ # spaces.MultiDiscrete([ # 3, # action_id # max_limit, # raise_amount # ]), # ] * n_seats) self.action_space = spaces.Discrete(3)
class TexasHoldemEnv(Env, utils.EzPickle): BLIND_INCREMENTS = [[10,25], [25,50], [50,100], [75,150], [100,200], [150,300], [200,400], [300,600], [400,800], [500,10000], [600,1200], [800,1600], [1000,2000]] current_player_notifier = "" weighting_coefficient_regret_fold = 10 weighting_coefficient_regret_check = 10 weighting_coefficient_regret_call = 10 weighting_coefficient_regret_raise = 10 weighting_coefficient_round_resolve = 100 def __init__(self, n_seats, max_limit=100000, debug=False): n_suits = 4 # s,h,d,c n_ranks = 13 # 2,3,4,5,6,7,8,9,T,J,Q,K,A n_community_cards = 5 # flop, turn, river n_pocket_cards = 2 n_stud = 5 self.level_raises = {0:0, 1:0, 2:0} # Assuming 3 players self.n_seats = n_seats self._blind_index = 0 [self._smallblind, self._bigblind] = TexasHoldemEnv.BLIND_INCREMENTS[0] self._deck = Deck() self._evaluator = Evaluator() self.last_seq_move = [] self.filled_seats = 0 self.signal_end_round = False self.winning_players = None self.starting_stack_size = None self.community = [] self._round = 0 self._button = 0 self._discard = [] self.game_resolved = False self.is_new_r = True self._side_pots = [0] * n_seats self._current_sidepot = 0 # index of _side_pots self._totalpot = 0 self._tocall = 0 self._lastraise = 0 self._number_of_hands = 0 self._record_players = [] # fill seats with dummy players self._seats = [Player(i, stack=0, emptyplayer=True) for i in range(n_seats)] self.learner_bot = None self.villain = None self.emptyseats = n_seats self._player_dict = {} self._current_player = None self._debug = debug self._last_player = None self._last_actions = None # (PSEUDOCODE) # MODEL HYPERPARAMETERS: # state_size = [(position, learner.stack, learner.handrank, played_this_round ...[card1, card2]), (pot_total, learner.to_call, opponent.stack, community_cards)] # action_size = env.action_space.n # learning_rate = 0.00025 self.observation_space = spaces.Tuple([ spaces.Tuple([ # players spaces.MultiDiscrete([ max_limit, # stack max_limit, # handrank 1, # playedthisround 1, # is_betting max_limit, # last side pot ]), spaces.Tuple([ spaces.MultiDiscrete([ # card n_suits, # suit, can be negative one if it's not avaiable. n_ranks, # rank, can be negative one if it's not avaiable. ]) ] * n_pocket_cards) ] * 4), spaces.Tuple([ spaces.Discrete(max_limit), # learner position spaces.Discrete(max_limit), # pot amount spaces.Discrete(max_limit), # last raise spaces.Discrete(n_seats - 1), # current player seat location. spaces.Discrete(max_limit), # minimum amount to raise spaces.Discrete(max_limit), # how much needed to call by current player. spaces.Tuple([ spaces.MultiDiscrete([ # card n_suits - 1, # suit n_ranks - 1, # rank 1, # is_flopped ]) ] * n_community_cards) ]) ]) ### MAY NEED TO ALTER FOR HEADS-UP # self.action_space = spaces.Tuple([ # spaces.MultiDiscrete([ # 3, # action_id # max_limit, # raise_amount # ]), # ] * n_seats) self.action_space = spaces.Discrete(3) def seed(self, seed=None): _, seed = seeding.np_random(seed) return [seed] # Important Note: Positions are only assigned at end of game. Be aware in # case of reporting stats on position type def assign_positions(self): no_active_players = self.filled_seats if(self.filled_seats == 3): for player in self._seats: player.position = (player.position + (no_active_players-1)) % no_active_players if player in self._player_dict.values() else None elif(self.filled_seats == 2): new_positions = [] # We want to only use positions 0 and 2, which are encodings of BTN and BB respectively # Sort for positions 0 and 2 first for player in self._player_dict.values(): if not(player.emptyplayer): if player.position == 2: player.position = 0 new_positions.append(player.position) elif player.position == 0: player.position = 2 new_positions.append(player.position) # Special case of former position 1 depends on new positions allocated above if len(new_positions) == 1: for player in self._player_dict.values(): if player.position == 1: if new_positions[0] == 0: player.position = 2 elif new_positions[0] == 2: player.position = 0 def add_player(self, seat_id, stack=2000): """Add a player to the environment seat with the given stack (chipcount)""" player_id = seat_id if player_id not in self._player_dict: new_player = Player(player_id, stack=stack, emptyplayer=False) Player.total_plrs+=1 self.starting_stack_size = stack if self._seats[player_id].emptyplayer: self._seats[player_id] = new_player new_player.set_seat(player_id) else: raise error.Error('Seat already taken.') self._player_dict[player_id] = new_player self.emptyseats -= 1 self.filled_seats +=1 if new_player.get_seat() == 0: self.learner_bot = new_player else: self.villain = new_player self._record_players.append(new_player) def move_player_to_empty_seat(self, player): # priority queue placing active players at front of table for seat_no in range(len(self._seats)): if self._seats[seat_no].emptyplayer and (seat_no < player._seat): unused_player = self._seats[seat_no] self._seats[seat_no] = player self._seats[player.get_seat()] = unused_player def reassign_players_seats(self): for player in self._player_dict.values(): self.move_player_to_empty_seat(player) def remove_player(self, seat_id): """Remove a player from the environment seat.""" player_id = seat_id try: idx = self._seats.index(self._player_dict[player_id]) self._seats[idx] = Player(0, stack=0, emptyplayer=True) self._seats[idx].position = None # Very important for when transitioning from 3 to 2 players. del self._player_dict[player_id] self.emptyseats += 1 self.filled_seats-=1 Player.total_plrs-=1 #self.reassign_players_seats() except ValueError: pass def reset(self): self._reset_game() self._ready_players() self._number_of_hands = 1 [self._smallblind, self._bigblind] = TexasHoldemEnv.BLIND_INCREMENTS[0] if (self.emptyseats < len(self._seats) - 1): players = [p for p in self._seats if p.playing_hand] self._new_round() self._round = 0 self._current_player = self._first_to_act(players, "post_blinds") self._post_smallblind(self._current_player) self._current_player = self._next(players, self._current_player) self._post_bigblind(self._current_player) self._current_player = self._next(players, self._current_player) self._tocall = self._bigblind self._round = 0 self._deal_next_round() self.organise_evaluations() self._folded_players = [] return self._get_current_reset_returns() def organise_evaluations(self): for idx, player in self._player_dict.items(): if player is not None: player.he = HandHoldem.HandEvaluation(player.hand, idx, "Preflop") #Unique to player instance player.he.evaluate(event='Preflop') player.set_handrank(player.he.evaluation) def assume_unique_cards(self, players): cards_count = {} this_board = None for player in players: player_cards = player.hand for card in player_cards: cards_count.update({card: 1}) if card not in cards_count else cards_count.update({card: cards_count[card] + 1}) if this_board is None and player.he is not None: if player.he.board is not None: this_board = player.he.board if this_board is not None: for card in this_board: cards_count.update({card: 1}) if card not in cards_count else cards_count.update({card: cards_count[card] + 1}) for card, no_occurence in cards_count.items(): if no_occurence > 1: return False else: return True def step(self, actions): """ CHECK = 0 CALL = 1 RAISE = 2 FO RAISE_AMT = [0, minraise] """ players = [p for p in self._seats if p.playing_hand] assert self.assume_unique_cards(players) is True self._last_player = self._current_player # self._last_actions = actions # if self._last_player.count_r(self.last_seq_move) > 1: # if [3,0] in actions: # print("r") # if current player did not play this round if not self._current_player.playedthisround and len([p for p in players if not p.isallin]) >= 1: if self._current_player.isallin: self._current_player = self._next(players, self._current_player) return self._get_current_step_returns(False) move = self._current_player.player_move(self._output_state(self._current_player), actions[self._current_player.player_id], last_seq_move = self.last_seq_move, _round = self._round) if self.am_i_only_player_wmoney() and self.level_raises[self._current_player.get_seat()] >= self.highest_in_LR()[0]: move = ("check", 0) # Protects against player making bets without any other stacked/active players self._last_actions = move if move[0] == 'call': assert self.action_space.contains(0) self._player_bet(self._current_player, self._tocall, is_posting_blind=False, bet_type=move[0]) if self._debug: print('Player', self._current_player.player_id, move) self._current_player = self._next(players, self._current_player) self.last_seq_move.append('C') self.playedthisround = True self._current_player.round['raises_i_owe'] = 0 elif move[0] == 'check': # assert self.action_space.contains(0) self._player_bet(self._current_player, self._current_player.currentbet, is_posting_blind=False, bet_type=move[0]) if self._debug: print('Player', self._current_player.player_id, move) self._current_player = self._next(players, self._current_player) self.last_seq_move.append('c') self.playedthisround = True elif move[0] == 'raise': # if self._current_player is self.learner_bot and self.level_raises == {0: 1, 1: 0, 2: 2} or self.level_raises == {0: 2, 1: 0, 2: 3} or self.level_raises == {0: 3, 1: 0, 2: 4} or self.level_raises == {0: 4, 1: 0, 2: 5} or self.level_raises == {0: 5, 1: 0, 2: 6} or self.level_raises == {0: 5, 1: 0, 2: 6} and 'R' in self.last_seq_move: # print("watch") assert self.action_space.contains(1) self._player_bet(self._current_player, move[1]+self._current_player.currentbet, is_posting_blind=False, bet_type="bet/raise") if self._debug: print('Player', self._current_player.player_id, move) for p in players: if p != self._current_player: p.playedthisround = False self._current_player = self._next(players, self._current_player) self.last_seq_move.append('R') self._current_player.round['raises_i_owe'] = 0 elif move[0] == 'fold': # if self.highest_in_LR()[0] > 4: # print("watch") assert self.action_space.contains(2) self._current_player.playing_hand = False self._current_player.playedthisround = True if self._debug: print('Player', self._current_player.player_id, move) self._current_player = self._next(players, self._current_player) self._folded_players.append(self._current_player) self.last_seq_move.append('F') # break if a single player left # players = [p for p in self._seats if p.playing_hand] # if len(players) == 1: # self._resolve(players) players = [p for p in self._seats if p.playing_hand] # else: ## This will help eliminate infinite loop # self._current_player = self._next(players, self._current_player) # This will effectively dictate who will become dealer after flop players_with_money = [] for player in players: if(player.stack > 0): players_with_money.append(player) if all([player.playedthisround for player in players_with_money]): self._resolve(players) for player in self._player_dict.values(): player.round == {'moves_i_made_in_this_round_sofar': '', 'possible_moves': set([]), 'raises_owed_to_me': 0, "raises_i_owe": 0} terminal = False if all([player.isallin for player in players]): while self._round < 4: self._deal_next_round() self._round += 1 elif self.count_active_wmoney() == 1 and all([player.playedthisround for player in players]): # do something else here while self._round < 3: self._round += 1 self._deal_next_round() if self._round == 4 or len(players) == 1: terminal = True self._resolve(players) self._resolve_round(players) return self._get_current_step_returns(terminal, action=move) def am_i_only_player_wmoney(self): count_other_broke = 0 for player in self._player_dict.values(): if player is not self._current_player and player.stack <= 0: count_other_broke += 1 if count_other_broke == (len(self._player_dict) - 1): return True else: return False def count_active_wmoney(self): count = 0 account_active_money = {0:{"is_active":False, "has_money":False},1:{"is_active":False, "has_money":False},2:{"is_active":False, "has_money":False}} for player in self._player_dict.values(): if player.playing_hand: account_active_money[player.get_seat()].update({"is_active": True}) if player.stack > 0: account_active_money[player.get_seat()].update({"has_money": True}) for player, account in account_active_money.items(): if account["is_active"] is True and account["has_money"] is True: count+=1 return count def render(self, mode='human', close=False, initial=False, delay=None): if delay: time.sleep(delay) if(initial is True): print("\n") if self._last_actions is not None and initial is False: pid = self._last_player.player_id #print('last action by player {}:'.format(pid)) print(format_action(self._last_player, self._last_actions)) print("\n\n") print('Total Pot: {}'.format(self._totalpot)) (player_states, community_states) = self._get_current_state() (player_infos, player_hands) = zip(*player_states) (community_infos, community_cards) = community_states print('Board:') print('-' + hand_to_str(community_cards)) print('Players:') # for player in self._player_dict: # assert player.round['raises_i_owe'] for idx, hand in enumerate(player_hands): if self._current_player.get_seat() == idx: self.current_player_notifier = "<" + str(self._current_player.position) print('{}{}stack: {} {}'.format(idx, hand_to_str(hand), self._seats[idx].stack, self.current_player_notifier)) self.current_player_notifier = "" def _resolve(self, players): self.signal_end_round = True self._current_player = self._first_to_act(players) self._resolve_sidepots(players + self._folded_players) self._new_round() self._deal_next_round() if self._debug: print('totalpot', self._totalpot) def _resolve_postflop(self, players): self._current_player = self._first_to_act(players) # print(self._current_player) def _deal_next_round(self): if self._round == 0: self._deal() elif self._round == 1: self._flop() elif self._round == 2: self._turn() elif self._round == 3: self._river() def _increment_blinds(self): self._blind_index = min(self._blind_index + 1, len(TexasHoldemEnv.BLIND_INCREMENTS) - 1) [self._smallblind, self._bigblind] = TexasHoldemEnv.BLIND_INCREMENTS[self._blind_index] def _post_smallblind(self, player): if self._debug: print('player ', player.player_id, 'small blind', self._smallblind) self._player_bet(player, self._smallblind, is_posting_blind=True) player.playedthisround = False def _post_bigblind(self, player): if self._debug: print('player ', player.player_id, 'big blind', self._bigblind) self._player_bet(player, self._bigblind, is_posting_blind=True) player.playedthisround = False self._lastraise = self._bigblind def highest_in_LR(self, specific=None, request_is_seq=None): highest_lr_bot = 0 highest_lr_value = 0 if specific is None: spec = self.level_raises else: spec = specific for key, value in spec.items(): if value > highest_lr_value: highest_lr_value = value highest_lr_bot = key rep = [(highest_lr_value, highest_lr_bot)] if request_is_seq: for key, value in spec.items(): if value == highest_lr_value and key != highest_lr_bot: rep.append((value, key)) return rep else: return highest_lr_value, highest_lr_bot def is_level_raises_allzero(self): count_zero = 0 for value in self.level_raises.values(): if value == 0: count_zero+=1 if(count_zero == len(self.level_raises)): return True else: return False def _player_bet(self, player, total_bet, **special_betting_type): # Case 1: New round, players have incosistent raises # Case 2: End of round, difference of raises is 2 import operator sorted_lr = sorted(self.level_raises.items(), key=operator.itemgetter(1)) # if (self.is_off_balance_LR() and self.is_new_r) or ( ((int(self.highest_in_LR()[0]) - int(sorted_lr[1][1])) == 2) and (self.is_new_r is False)): # print("raise") if "is_posting_blind" in special_betting_type and "bet_type" not in special_betting_type: # posting blind (not remainder to match preceding calls/raises) if special_betting_type["is_posting_blind"] is True: self.level_raises[player.get_seat()] = 0 elif "is_posting_blind" in special_betting_type and "bet_type" in special_betting_type: # Bet/Raise or call. Also accounts for checks preflop. highest_lr_value, highest_lr_bot = self.highest_in_LR() if special_betting_type["is_posting_blind"] is False: if special_betting_type["bet_type"] == "bet/raise": if self.level_raises[player.get_seat()] < highest_lr_value: player.action_type = "raise" self.level_raises[player.get_seat()] = highest_lr_value + 1 elif self.level_raises[player.get_seat()] == highest_lr_value: player.action_type = "bet" self.level_raises[player.get_seat()] += 1 elif special_betting_type["bet_type"] == "call": if self.level_raises[player.get_seat()] < highest_lr_value: player.action_type = "call" self.level_raises[player.get_seat()] = highest_lr_value elif self.is_level_raises_allzero(): if player.position == 0: player.action_type = "call" self.level_raises[player.get_seat()] = 1 elif player.position == 2: player.action_type = "call" self.level_raises[player.get_seat()] = highest_lr_value elif special_betting_type["bet_type"] == "check" and self._round is 0: # BB checking preflop if player.position == 2: self.level_raises[player.get_seat()] = 1 # relative_bet is how much _additional_ money is the player betting this turn, # on top of what they have already contributed # total_bet is the total contribution by player to pot in this round relative_bet = min(player.stack, total_bet - player.currentbet) player.bet(relative_bet + player.currentbet) self._totalpot += relative_bet self._tocall = max(self._tocall, total_bet) if self._tocall > 0: self._tocall = max(self._tocall, self._bigblind) self._lastraise = max(self._lastraise, relative_bet - self._lastraise) self.is_new_r = False def _first_to_act(self, players, my_event="Postflop"): # if self._round == 0 and len(players) == 2: # return self._next(sorted( # players + [self._seats[self._button]], key=lambda x:x.get_seat()), # self._seats[self._button]) first_to_act = None if self.filled_seats == 2: if my_event is "Preflop" or my_event is "post_blinds": first_to_act = self.assign_next_to_act(players, [0,2]) elif my_event is "Postflop" or my_event is "sidepot": first_to_act = self.assign_next_to_act(players, [2,0]) elif self.filled_seats == 3: if my_event is "Preflop": first_to_act = self.assign_next_to_act(players, [0,1,2]) elif my_event is "Postflop" or my_event is "post_blinds" or my_event is "sidepot": first_to_act = self.assign_next_to_act(players, [1,2,0]) # else: # my_return = [player for player in players if player.get_seat() > self._button][0] #assert first_to_act is not None and not(first_to_act.emptyplayer) and not(first_to_act.stack <= 0) if len(players) == 1: first_to_act = self._record_players[0] return first_to_act def assign_next_to_act(self, players, precedence_positions): for pos in precedence_positions: for player in players: if player.position == pos and not(player.emptyplayer) and player.playing_hand and player.stack > 0: assert player is not None return player def _next(self, players, current_player): i = 1 current_player_seat = players.index(current_player) while(players[(current_player_seat+i) % len(players)].stack <= 0): i+=1 if i > 10: break # In this case of inifinte loop, self._current_player is assigned to _next but will be irrelevant anyway so okay. assert players[(current_player_seat+i) % len(players)] is not None return players[(current_player_seat+i) % len(players)] def _deal(self): for player in self._seats: if player.playing_hand and player.stack > 0: player.hand = self._deck.draw(2) def _flop(self): self._discard.append(self._deck.draw(1)) #burn this_flop = self._deck.draw(3) self.flop_cards = this_flop self.community = this_flop def _turn(self): self._discard.append(self._deck.draw(1)) #burn self.turn_card = self._deck.draw(1) self.community.append(self.turn_card) # .append(self.community) def _river(self): self._discard.append(self._deck.draw(1)) #burn self.river_card = self._deck.draw(1) self.community.append(self.river_card) def _ready_players(self): for p in self._seats: if not p.emptyplayer and p.sitting_out: p.sitting_out = False p.playing_hand = True def _resolve_sidepots(self, players_playing): players = [p for p in players_playing if p.currentbet] if self._debug: print('current bets: ', [p.currentbet for p in players]) print('playing hand: ', [p.playing_hand for p in players]) if not players: return try: smallest_bet = min([p.currentbet for p in players if p.playing_hand]) except ValueError: for p in players: self._side_pots[self._current_sidepot] += p.currentbet p.currentbet = 0 return smallest_players_allin = [p for p, bet in zip(players, [p.currentbet for p in players]) if bet == smallest_bet and p.isallin] for p in players: self._side_pots[self._current_sidepot] += min(smallest_bet, p.currentbet) p.currentbet -= min(smallest_bet, p.currentbet) p.lastsidepot = self._current_sidepot if smallest_players_allin: self._current_sidepot += 1 self._resolve_sidepots(players) if self._debug: print('sidepots: ', self._side_pots) def _new_round(self): for player in self._player_dict.values(): player.currentbet = 0 player.playedthisround = False player.round = {'moves_i_made_in_this_round_sofar': '', 'possible_moves': set([]), 'raises_owed_to_me': 0, "raises_i_owe": 0} player.round_track_stack = player.stack self.is_new_r = True self._round += 1 self._tocall = 0 self._lastraise = 0 self.last_seq_move = [] # if self.is_off_balance_LR(): # if self._last_actions[0] != 'fold': # raise error.Error() def is_off_balance_LR(self): lr = self.level_raises highest_value, highest_bot = self.highest_in_LR() lr_without_highest = dict(lr) del lr_without_highest[highest_bot] next_highest_value, next_highest_bot = self.highest_in_LR(specific=lr_without_highest) if highest_value != next_highest_value: return True elif highest_value == next_highest_value: return False def _resolve_round(self, players): # if len(players) == 1: # if (self._round == 1 or self._round == 2) and self._last_player.get_seat() == 0 and self._last_actions[0] == 'fold': # if self._last_player.count_r(self.last_seq_move) < 1: # if self.learner_bot.position == 0: # players[0].refund(self._bigblind + self._smallblind) # self._totalpot = 0 # self.winning_players = players[0] # else: # players[0].refund(self._bigblind + self._smallblind + 40) # self._totalpot = 0 # self.winning_players = players[0] # else: # players[0].refund(sum(self._side_pots)) # self._totalpot = 0 # self.winning_players = players[0] if len(players) == 1: winner, loser = None, None # Heads-Up for p in self._record_players: if p == players[0]: winner = p else: loser = p winner_investment = winner.stack_start_game - winner.stack loser_loss = loser.stack_start_game - loser.stack if loser.stack_start_game < 15 and loser.position == 0: players[0].refund((self.starting_stack_size - winner.stack) ) elif loser.stack_start_game < 25 and loser.position == 2: players[0].refund((self.starting_stack_size - winner.stack) ) else: players[0].refund(winner_investment + loser_loss) self._totalpot = 0 self.winning_players = players[0] else: # compute hand ranks for player in players: # assert (len(self.community) <= 5) is True player.handrank = self._evaluator.evaluate(player.hand, self.community) # trim side_pots to only include the non-empty side pots temp_pots = [pot for pot in self._side_pots if pot > 0] # compute who wins each side pot and pay winners for pot_idx,_ in enumerate(temp_pots): # find players involved in given side_pot, compute the winner(s) pot_contributors = [p for p in players if p.lastsidepot >= pot_idx] winning_rank = min([p.handrank for p in pot_contributors]) winning_players = [p for p in pot_contributors if p.handrank == winning_rank] self.winning_players = winning_players[0] for player in winning_players: split_amount = int(self._side_pots[pot_idx]/len(winning_players)) if self._debug: print('Player', player.player_id, 'wins side pot (', int(self._side_pots[pot_idx]/len(winning_players)), ')') player.refund(split_amount) self._side_pots[pot_idx] -= split_amount # any remaining chips after splitting go to the winner in the earliest position if self._side_pots[pot_idx]: earliest = self._first_to_act([player for player in winning_players], "sidepot") earliest.refund(self._side_pots[pot_idx]) # for player in players: ## THIS IS AT THE END OF THE GAME. NOT DURING. (safe) # if(player.stack == 0): # self.remove_player(player.get_seat()) self.game_resolved = True # assert(self._player_dict[0].stack + self._player_dict[2].stack + self._totalpot == 2*self.starting_stack_size) def report_game(self, requested_attributes, specific_player=None): if "stack" in requested_attributes: player_stacks = {} for key, player in self._player_dict.items(): player_stacks.update({key: player.stack}) # if len(player_stacks) < 3: # for i in range(3): # if i not in player_stacks: # player_stacks.update({i:0}) if specific_player is None: return (player_stacks) assert (player_stacks.values()) is not None else: return (player_dict[specific_player].values()) def _reset_game(self): playing = 0 # if self._player_dict[0].stack is not None and self._player_dict[2].stack is not None: # assert(self._player_dict[0].stack + self._player_dict[2].stack == 2*self.starting_stack_size) for player in self._seats: if not player.emptyplayer and not player.sitting_out: player.stack_start_game = player.stack player.reset_hand() playing += 1 self.community = [] self._current_sidepot = 0 self._totalpot = 0 self._side_pots = [0] * len(self._seats) self._deck.shuffle() self.level_raises = {0:0, 1:0, 2:0} self.winning_players = None self.game_resolved = False if playing: self._button = (self._button + 1) % len(self._seats) while not self._seats[self._button].playing_hand: self._button = (self._button + 1) % len(self._seats) def _output_state(self, current_player): return { 'players': [player.player_state() for player in self._seats], 'community': self.community, 'my_seat': current_player.get_seat(), 'pocket_cards': current_player.hand, 'pot': self._totalpot, 'button': self._button, 'tocall': (self._tocall - current_player.currentbet), 'stack': current_player.stack, 'bigblind': self._bigblind, 'player_id': current_player.player_id, 'lastraise': self._lastraise, 'minraise': max(self._bigblind, self._lastraise + self._tocall), } def _pad(self, l, n, v): if (not l) or (l is None): l = [] return l + [v] * (n - len(l)) def _get_current_state(self): player_states = [] for player in self._seats: player_features = [ int(player.stack), int(player.handrank), int(player.playedthisround), int(player.betting), int(player.lastsidepot), ] player_states.append((player_features, self._pad(player.hand, 2, -1))) community_states = ([ int(self.learner_bot.position), int(self._totalpot), int(self._lastraise), int(self._current_player.get_seat()), int(max(self._bigblind, self._lastraise + self._tocall)), int(self._tocall - self._current_player.currentbet), ], self._pad(self.community, 5, -1)) # if sum(self.level_raises.values()) > 6: # print("") return (tuple(player_states), community_states) def _get_current_reset_returns(self): return self._get_current_state() def distribute_rewards_given_endgame(self): if self.learner_bot is self.winning_players: self.learner_bot.reward = self.compute_reward() + self._totalpot else: self.learner_bot.reward = self.learner_bot.round_track_stack def _get_current_step_returns(self, terminal, action=None): observations = self._get_current_state() stacks = [player.stack for player in self._seats] reward = None if(action is None): return observations, reward, terminal, [] # TODO, return some info? else: # Focus on this. At end of step, when player has already decided his action. respective_evaluations = [player.he.evaluation if player.he is not None else None for player in self._seats] evaluations_opposing_players = [x for i,x in enumerate(respective_evaluations) if i!= self._last_player.get_seat() and x!=None] if (self._last_player is self.learner_bot): # Learner bot step return if(self.signal_end_round == True): self.signal_end_round = False self.learner_bot.reward = self.compute_reward() # Most common entry point (Learner Checks or raises) else: # Artifical agent step return self.learner_bot.reward = 0 if(self.signal_end_round == True): if(action == ('fold', 0)): # Opponent folded self.learner_bot.reward = self._totalpot # if action is ('fold', 0) or action is ('check', 0) or action[0] is 'call' or action[0] is 'raise': # regret = self.compute_regret_given_action(action, respective_evaluations, evaluations_opposing_players) return observations, action, reward, terminal, [] # TODO, return some info? def compute_reward(self): #only gets called when last player is learner # Expected value is a mathematical concept used to judge whether calling a raise in a game of poker will be profitable. # When an opponent raises a pot in poker, such as on the flop or river, your decision whether to call or fold is more or less # completely dependant on expected value. This is the calculation of whether the probability of winning a pot will make a call # profitable in the long-term. # Expected Value is a monetary value (e.g. +$10.50). It can be positive or # negative. EV tells you how profitable or unprofitable a certain play (e.g. # calling or betting) will be. We work out EV when we are faced with a decision. # EV = (Size of Pot x Probability of Winning) – Cost of Entering it. equity = self.equity() ev = None if self._round == 0 and self._last_player.position == 0: # Only works for heads up: Due to bug with tocall to_call = 15 total_pot = self._totalpot - to_call else: to_call = self._last_actions[1] total_pot = self._totalpot if self._last_player is not self.learner_bot else (self._totalpot - self._last_actions[1]) # Here we compute expected values for actions that were possible during their execution, and we reflect on them here by comparing the expected values # of alternatives. expected_values_order = [0, 0, 0] # In order of call/check, raise/bet, fold if self._last_actions[0] == 'call' or self._last_actions[0] == 'check': action_taken = 0 elif self._last_actions[0] == 'raise' or self._last_actions[0] == 'bet': action_taken = 1 else: action_taken = 2 # Call/Check Regret learner_equity, opp_equity = equity[0], equity[1] stand_to_win = (total_pot * learner_equity) stand_to_lose = to_call * opp_equity expected_value = stand_to_win - stand_to_lose expected_values_order[0] = expected_value # Fold Regret stand_to_win = to_call * opp_equity stand_to_lose = (total_pot) * learner_equity expected_value = stand_to_win - stand_to_lose expected_values_order[2] = expected_value # Raise/Bet Regret if (self.learner_bot.raise_possible_tba): # implied raise (How much more we stand to win given that villain shows confidence in his hand) stand_to_win = ( ((total_pot + 25) * learner_equity) * self.villain.certainty_to_call ) + (total_pot * learner_equity) * (1 - self.villain.certainty_to_call) stand_to_lose = (to_call + 25) * opp_equity expected_value = stand_to_win - stand_to_lose expected_values_order[1] = expected_value max_ev = max(expected_values_order) highest_paying_action = [i for i, j in enumerate(expected_values_order) if j == max_ev] # reward = expected_values_order[action_taken]/max_ev # how much does reward deviate from mean - this determines quality of action in the context of all possible actions reward = expected_values_order[action_taken] - mean(expected_values_order) return reward def compute_reward_end_round_fold(self, respective_evaluations, evaluations_opposing_players): return (respective_evaluations[self._last_player.get_seat()] - mean([other_player_eval for other_player_eval in evaluations_opposing_players])) / self.weighting_coefficient_round_resolve def compute_regret_given_action(self, my_action, respective_evaluations, evaluations_opposing_players): self.compare_evaluations_players(my_action, respective_evaluations, evaluations_opposing_players) # Now player has his regret filled in to his own player instance pass def equity(self): # Equity is a percentage (e.g. 70%). Equity tells you how much of the pot # “belongs” to you, or to put it another way, the percentage of the time # you expect to win the hand on average from that point onwards. _round = self._round if self.signal_end_round is not True else self._round - 1 if (_round == 1 or _round == 2 or _round ==3): # Implies last rounds were either 1 or 2 learner_utility, opp_utility = self.compute_winner_simulation(_round) equity = learner_utility, opp_utility else: learner_hs = self.learner_bot.he.hand_strength, 1 - self.villain.he.hand_strength bot_hs = self.villain.he.hand_strength, 1 - self.learner_bot.he.hand_strength equity = (learner_hs[0] + learner_hs[1])/2, (bot_hs[0] + bot_hs[1])/2 return equity def compute_winner_simulation(self, _round): _evaluator = self._evaluator deck = self._deck if _round == 1: community = [self.community[i] for i in range(3)] elif _round == 2: community = [self.community[i] for i in range(4)] else: community = [self.community[i] for i in range(5)] opp1_cards = self.learner_bot.hand opp2_cards = self.villain.hand unrevealed_cards = sorted([card for card in deck.cards if card not in community and card not in opp1_cards and card not in opp2_cards]) # print(Card.print_pretty_cards(opp1_cards)) # print(Card.print_pretty_cards(opp2_cards)) winning_players_list = [] learner_wins = 0 opp_wins = 0 if _round == 1: for turn_card_idx in range(len(unrevealed_cards)): # print(turn_card_idx) for river_card_idx in range(turn_card_idx, len(unrevealed_cards)): if [unrevealed_cards[turn_card_idx]] == [unrevealed_cards[river_card_idx]]: continue # print(Card.print_pretty_cards(community + [unrevealed_cards[turn_card_idx]] + [unrevealed_cards[river_card_idx]])) learner_eval = (_evaluator.evaluate(opp1_cards, community + [unrevealed_cards[turn_card_idx]] + [unrevealed_cards[river_card_idx]])) opp_eval = (_evaluator.evaluate(opp2_cards, community + [unrevealed_cards[turn_card_idx]] + [unrevealed_cards[river_card_idx]])) winning_rank = min([learner_eval, opp_eval]) winning_players = [player for player, rank in enumerate([learner_eval, opp_eval]) if rank == winning_rank] if len(winning_players) is 2: learner_wins+=1 opp_wins+=1 else: if winning_players[0] == 0: learner_wins+=1 else: opp_wins+=1 elif _round == 2: for river_card in unrevealed_cards: player_handranks = [] # print(Card.print_pretty_cards(community+[river_card])) learner_eval = (_evaluator.evaluate(opp1_cards, community+[river_card])) opp_eval = (_evaluator.evaluate(opp2_cards, community+[river_card])) winning_rank = min([learner_eval, opp_eval]) winning_players = [player for player, rank in enumerate([learner_eval, opp_eval]) if rank == winning_rank] if len(winning_players) is 2: learner_wins+=1 opp_wins+=1 else: if winning_players[0] == 0: learner_wins+=1 else: opp_wins+=1 elif _round == 3: if self.learner_bot is self.winning_players: return 1.0, 0.0 else: return 0.0, 1.0 if opp_wins == 0 and learner_wins == 0: raise("error: division by zero") return (learner_wins/(learner_wins + opp_wins), opp_wins/(learner_wins + opp_wins)) #Using evlaluation here. Might be better to use player.handstrength def compare_evaluations_players(self, my_action, respective_evaluations, evaluations_opposing_players): pass # expected_value = self.expected_value() # if my_action is ('fold', 0): # # calculate how good my cards are compared to raisers cards # _, raiser_bot = self.highest_in_LR() # raiser_strength = raiser_bot.he.evaluation # regret = (raiser_strength - respective_evaluations[self._current_player.get_seat()]) / self.weighting_coefficient_regret_fold # # Remember: Higher evaluation means worse cards, lower means better cards. # # e.g. If my evaluation was 5400, and my opponents evaluation was 7500, I would have positive regret ( I would regret having folded) # self._current_player.regret.update({'fold': regret}) # elif my_action is ('check', 0): # # calculate how good my cards are compared to other players, and thus compute how much I regret not having raised # # If my evaluation is lower (better cards) than my opponents relatively high evaluation (worse cards), I would have positive regret # _, opposing_bot = self.current_player() # We can assign opposing as current_player (2-players heads-up) because we already rotated the table position # opposing_bot_strength = opposing_bot.he.evaluation # regret = (opposing_bot_strength - respective_evaluations[self._current_player.get_seat()]) / self.weighting_coefficient_regret_check # self._current_player.regret.update({'check': regret}) # elif my_action[0] is 'call': # # Now we must compute the regret based on how much we would have been better of taking another action: Here, unlike other times, we have # # 2 possible alternatives : Raise or fold. If we take a call action, we must compute the expected value for the other alternatives. # pass # elif my_action[0] is 'raise': # _, raiser_bot = self.highest_in_LR() # raiser_strength = raiser_bot.he.evaluation # regret = (raiser_evaluation - respective_evaluations[self._current_player.get_seat()]) / self.weighting_coefficient_regret_check # self._current_player.regret.update({'check': regret})