def get_win_prob(self,state, playerid,hand_cards, board_cards,num_players): win = 0 rounds=0 evaluator = Evaluator() for i in range(self.simulation_number): board_cards_to_draw = 5 - len(board_cards) # 2 board_sample = board_cards + self._pick_unused_card(board_cards_to_draw,hand_cards+board_cards) unused_cards = self._pick_unused_card((num_players - 1) * 2, hand_cards + board_sample) board_sample = [Card.new(i) for i in board_sample] unused_cards = [Card.new(i) for i in unused_cards] opponents_hole = [unused_cards[2 * i:2 * i + 2] for i in range(num_players - 1)] #hand_sample = self._pick_unused_card(2, board_sample + hand_cards) try: opponents_score = [1 - evaluator.evaluate(hole, board_sample)/7462 for hole in opponents_hole] myhand_cards = [Card.new(i) for i in hand_cards] my_rank = 1 - evaluator.evaluate(myhand_cards, board_sample)/7462 if my_rank >= max(opponents_score): win += 1 #rival_rank = evaluator.evaluate_hand(hand_sample, board_sample) rounds+=1 except Exception as e: #print e.message continue win_prob = win / rounds return win_prob
def evaluateFromState(self, state, playerid): # print("state",state.player_states[playerid].hand) evaluator = Evaluator() hand = [] board = [] # p1_score = evaluator.evaluate(board, player1_hand) for i in state.player_states[playerid].hand: hand.append(Card.new(card_to_normal_str(i))) # print(card_to_normal_str(i)) # print(hand) for j in state.community_card: if j != -1: # print(card_to_normal_str(j)) board.append(Card.new(card_to_normal_str(j))) # print(board) if len(board) == 0: rank = evaluator.evaluate(hand, []) elif len(board) == 3: rank = evaluator.evaluate(hand, board[:3]) elif len(board) == 4: rank = evaluator.evaluate(hand, board[:4]) elif len(board) == 5: rank = evaluator.evaluate(hand, board[:5]) rank_class = evaluator.get_rank_class(rank) class_string = evaluator.class_to_string(rank_class) percentage = 1.0 - evaluator.get_five_card_rank_percentage( rank) # higher better here # print("Player hand = {}, percentage rank among all hands = {}".format(class_string, percentage)) return [rank, percentage]
def monteCarlo(board, hand, numPlayers, monteN): deck = Deck() evaluator = Evaluator() playerHands = [None]*numPlayers winAmount = 0 board_backup = board.copy() for time in range(int(monteN)): board = board_backup.copy() monteDeck = [card for card in deck.cards if card not in board and card not in hand] for x in range(numPlayers): playerHands[x] = [] for y in range(2): randomIndex = randrange(0, len(monteDeck)) playerHands[x].append(monteDeck[randomIndex]) del monteDeck[randomIndex] while len(board) < 5: randomIndex = randrange(0, len(monteDeck)) board.append(monteDeck[randomIndex]) del monteDeck[randomIndex] win = True handRank = evaluator.evaluate(board, hand) for x in range(numPlayers): otherRank = evaluator.evaluate(board, playerHands[x]) if otherRank < handRank: win = False break if win: winAmount += 1 return winAmount/monteN
def should_call(hh): #Strictly whether or not I should've called or not (but not necessarily the information I want). #According to the treys library, score = rank ranging from 1(royal flush) to 7xxx(nut low). Therefore the lower score (ex. rank 1) will beat the higher score (ex. rank 7xxx) evaluator = Evaluator() hero_score = evaluator.evaluate(community_board(hh['Summary']), hero_hole_cards(hh['Preflop'])) villain_score = evaluator.evaluate( community_board(hh['Summary']), villain_hole_cards(hh['Showdown'], hh['River'])) if villain_score > hero_score: return 1 else: return 0
def _get_cards_rank(self, hole_card, round_state, debug_printouts): """ :param hole_card: Hole cards of own player :param round_state: Current round state, containing community cards :param debug_printouts: Parameter for debugging purpose only. Allows printing the calculated hand rank :return: Float between 0 and 1, representing the current five card rank among all possible poker hands. 0 represents the weakest five card combination, 1 the strongest (Royal Flush) """ evaluator = Evaluator() board = [] hand = [] if len(round_state['community_card']) >= len(board): for card in round_state['community_card']: board.append(Card.new(card)) for card in hole_card: hand.append(Card.new(card)) score = evaluator.evaluate(board, hand) if debug_printouts: Card.print_pretty_cards(board + hand) print(Card.print_pretty_cards(board + hand)) return 1 - evaluator.get_five_card_rank_percentage(score)
def getWinners(gameId, players): evaluator = Evaluator() boardCards = [] rankings = {} _, board, _, _, _, _, _, _, _, _, _ = db.getGame(gameId) for i in board.split(":"): boardCards.append(pyDealerCardToDeucesCard(i)) for i in players: cards = i[3] rankings[i[0]] = evaluator.evaluate(boardCards, [ pyDealerCardToDeucesCard(cards.split(":")[0]), pyDealerCardToDeucesCard(cards.split(":")[1]) ]) v = list(rankings.values()) minValue = min(v) winners = [] for i in rankings: if rankings[i] == minValue: winners.append([ i, evaluator.class_to_string(evaluator.get_rank_class(minValue)) ]) return winners
def index(): evaluator = Evaluator() deck = Deck() card = Card.new('Qh') board = deck.draw(5) player_names = ("player 1", "player 2", "player 3", "player 4", "player 5", "player 6", "player 7", "player 8") players = {} output = {} # this is procedural programming, not functional programming :( for p in player_names: hand = deck.draw(2) score = evaluator.evaluate(board, hand) text = evaluator.class_to_string(evaluator.get_rank_class(score)) players[p] = score output[p] = {'score': score, 'text': text} # What about a tie? tie = (len(players.values()) == len(set(players.values()))) winner = min( players, key=players.get) # always 1 result :( Don't forget to fix the TEST! # does the tie involve the winning hand though? # TODO https://stackoverflow.com/questions/17821079/how-to-check-if-two-keys-in-dictionary-hold-the-same-value output["winners"] = winner output["tie"] = tie output["card"] = Card.int_to_str(card) j = json.dumps(output) return j
async def find_best_plo_hand(user_id, channel_id): active_players = player_list[channel_id] tab = tab_list[channel_id]["table"] evaluator = Evaluator() board = tab.cards print(board, "board") hand = [x.cards for x in active_players if x.name == user_id] hand = hand[0] print(hand, "hand") allboardtuple = list(itertools.combinations(board, 3)) print(allboardtuple) allboardlist = [list(x) for x in allboardtuple] print(allboardlist) allhandtuple = list(itertools.combinations(hand, 2)) print(allhandtuple, "allhandtuple") allhandlist = [list(x) for x in allhandtuple] print(allhandlist, "allhandlist") fullsetlist = [] print("just before loop") for i in allboardlist: print(i, "inside loop i") for j in allhandlist: print(j, "inside loop j") fullsetlist.append(evaluator.evaluate(i, j)) # for allboardlist, allhandlist in zip(allboardlist, allhandlist): # fullsetlist.append(evaluator.evaluate(allboardlist, allhandlist)) fullsetlist.sort() return fullsetlist[0]
def _rank_hands(self): ev = Evaluator() ranks = dict() for p in self.active_players: hand = self.players[p].hand rank = ev.evaluate(hand, self.board) ranks[p] = rank return ranks
def evaluateHands(self): # convert cards to correct format for treys library first_card_board = self.state.community_cards[0][ 'rank'] + self.state.community_cards[0]['suit'].lower() second_card_board = self.state.community_cards[1][ 'rank'] + self.state.community_cards[1]['suit'].lower() third_card_board = self.state.community_cards[2][ 'rank'] + self.state.community_cards[2]['suit'].lower() fourth_card_board = self.state.community_cards[3][ 'rank'] + self.state.community_cards[3]['suit'].lower() fifth_card_board = self.state.community_cards[4][ 'rank'] + self.state.community_cards[4]['suit'].lower() # then create a list of community cards board = [ Card.new(first_card_board), Card.new(second_card_board), Card.new(third_card_board), Card.new(fourth_card_board), Card.new(fifth_card_board) ] results = {} # do the same thing for each active player evaluator = Evaluator() winning_hand = 7463 players_in_hand = { k: v for k, v in self.state.players.items() if v.in_hand } for username, player in players_in_hand.items(): first_card = player.hole_cards[0]['rank'] + player.hole_cards[0][ 'suit'].lower() second_card = player.hole_cards[1]['rank'] + player.hole_cards[1][ 'suit'].lower() hand = [Card.new(first_card), Card.new(second_card)] player_result = {} player_result['score'] = evaluator.evaluate(board, hand) player_result['hand_class'] = evaluator.get_rank_class( player_result['score']) player_result['hand_class_string'] = evaluator.class_to_string( player_result['hand_class']) results[username] = player_result # results = {'player0': {'score': 1, 'hand_class': 8, 'hand_class_string': 'Pair'}, # 'player1': {'score': 1, 'hand_class': 8, 'hand_class_string': 'Pair'}, # 'player2': {'score': 2, 'hand_class': 8, 'hand_class_string': 'Pair'}, # 'player3': {'score': 1, 'hand_class': 8, 'hand_class_string': 'Pair'} # } return results
def evaluateCards(board, hand): board = [Card.new('Ah'), Card.new('Kd'), Card.new('Jc')] hand = [Card.new('Qs'), Card.new('Qh')] Card.print_pretty_cards(board + hand) evaluator = Evaluator() score = evaluator.evaluate(board, hand) handType = evaluator.get_rank_class(score) print("Player 1 hand rank = %d (%s)\n" % (score, evaluator.class_to_string(handType)))
def setHandsStrenght(data, cards_players, cards_table, preflopRank): #Setting Board cards table_cards = [] if cards_table[0] != '': for rounds in range(len(cards_table)): if rounds == 0: table_cards.append([ Card.new(cards_table[rounds][:2]), Card.new(cards_table[rounds][2:4]), Card.new(cards_table[rounds][4:6]) ]) if rounds == 1: table_cards.append((table_cards[0]).copy()) table_cards[1].append(Card.new(cards_table[rounds])) if rounds == 2: table_cards.append(table_cards[1].copy()) table_cards[2].append(Card.new(cards_table[rounds])) #Setting hand strenght into data evaluator = Evaluator() for game_state in range(len(data)): if 1 == 1: #data[game_state] != []: for hand in range(len(data[game_state])): hand_cards = cards_players[data[game_state][hand][0]] if game_state == 0: if hand_cards[1] == hand_cards[3]: suit_char = 's' else: suit_char = 'o' if hand_cards[0] == hand_cards[2]: suit_char = 'p' only_cards = hand_cards[0] + hand_cards[2] hand_strength = findPreFlopRank(only_cards, suit_char, preflopRank) else: if len(table_cards) != 0: hand_cards_obj = [ Card.new(hand_cards[:2]), Card.new(hand_cards[2:]) ] hand_strength = evaluator.evaluate( table_cards[game_state - 1], hand_cards_obj) data[game_state][hand].insert(2, hand_strength) return data
def get_final_ranking(): evaluator = Evaluator() final_ranking = list() for p in state.player_states: hand_cards = get_card_class(p.hand) board_cards = get_card_class(state.community_card) if not hand_cards: # player not play this round continue rank = evaluator.evaluate(hand_cards, board_cards) final_ranking.append(rank) return final_ranking
def get_hands_score(player_hands, round_cards): hands_score = [] evaluator = Evaluator() for player_hand in player_hands: hand_score = evaluator.evaluate(round_cards, player_hand['cards']) player_score = { 'score': hand_score, 'player_id': player_hand['player_id'] } hands_score.append(player_score) return hands_score
def evaluateCards(board, hand): hand = [ Card.new("Qs"), Card.new("8c") ] board = [ Card.new("Ks"), Card.new("Ad"), Card.new("Jc"), Card.new("5d"), Card.new("7s"), ] Card.print_pretty_cards(board + hand) evaluator = Evaluator() score = evaluator.evaluate(board, hand) handType = evaluator.get_rank_class(score) print("Player 1 hand rank = %d (%s)\n" % (score, evaluator.class_to_string(handType)))
def eval_card_rank(self, state, playerid): evaluator = Evaluator() def get_card_class(card_int_list): res = [ Card.new(Card.int_to_str(c)) for c in card_int_list if c != -1 ] return res hand_cards = get_card_class(state.player_states[playerid].hand) board_cards = get_card_class(state.community_card) if len(board_cards) < 3: return self.get_win_prob(state, playerid) #Card.print_pretty_cards(board_cards + hand_cards) rank = evaluator.evaluate(hand_cards, board_cards) percentage = 1.0 - evaluator.get_five_card_rank_percentage(rank) #rank_class = evaluator.get_rank_class(rank) #class_string = evaluator.class_to_string(rank_class) #percentage = 1.0 - evaluator.get_five_card_rank_percentage(rank) # higher better here #return rank, percentage return percentage
def test_my_hand(self): """ Input: Cards from my profile as a string Output: Descripting poker hand as a string """ # Player Arrange table_cards = str(Card.new('3s')) + ',' \ + str(Card.new('2s')) + ',' \ + str(Card.new('As')) + ',' \ + str(Card.new('Ks')) + ',' \ + str(Card.new('Qs')) player_cards = str(Card.new('Js')) + ',' + str(Card.new('Ts')) table = Table(cards_on_table=table_cards) player = Player(table=table, cards=player_cards) # Player Act player_result = player.my_hand() # Treys Arrange board = [ Card.new('3s'), Card.new('2s'), Card.new('As'), Card.new('Ks'), Card.new('Qs') ] gamer = [Card.new('Js'), Card.new('Ts')] # Treys Act evaluator = Evaluator() score = evaluator.evaluate(board, gamer) classs = evaluator.get_rank_class(score) treys_result = evaluator.class_to_string(classs) # Assertion self.assertEqual(player_result, treys_result) self.assertIsInstance(player_result, str)
def my_hand(self): """Return name of my best hand""" # Cards is on the table if self.table.cards_on_table != None: # 1. Get cards from table and from player # and convert them to list of int's cards_on_table = \ self.convert_from_string_to_list(self.table.cards_on_table) cards_on_table = list(map(int, cards_on_table)) my_cards = self.convert_from_string_to_list(self.cards) my_cards = list(map(int, my_cards)) # 2. Show my hand evaluator = Evaluator() score = evaluator.evaluate(cards_on_table, my_cards) classs = evaluator.get_rank_class(score) my_hand = evaluator.class_to_string(classs) return my_hand # No cards on table else: return ''
def run(self): conn, addr = mySocket.accept() print(">>Polaczenie od: " + str(addr)) global_info = "\n\nGra Texas Hold'em Poker sterowany głosowo dla 2 graczy. Zapraszamy do gry!" print(global_info) conn.send(global_info.encode()) # conn.recv(1024) ustawienia = UstawieniaGry(2) gracze = list() for i in range(0, ustawienia.liczba_graczy): gracze.append(poker.Gracz(i)) sprawdz = Evaluator() rozp = 0 # 0 - idx gracza na serwerze # 1 - idx klienta # główna pętla gry while True: global_info = "\n*******************************Kolejna runda*********************************" talia = Deck() # talia kart stol = poker.Stol(len(gracze)) zwyciezca = -1 # indeks zwycięzcy pas = -1 for g in gracze: g.reka = talia.draw(2) stol.karty = talia.draw(5) global_info += '\n' + str( stol.doloz_stawke( gracze[rozp], ustawienia.ciemne)) # początkowa stawka na 1. turę gracze[rozp].stan = poker.stan_gracza["postawil"] najwyzsza_stawka = ustawienia.ciemne print(global_info) conn.send(pickle.dumps(poker.PaczkaDoKlienta(global_info))) # pętla 3 tur for tura in range(1, 4): global_info = "\n\n**************Trwa tura %s****************" % str( tura) global_info += "\n\nObecnie w puli: " + str(stol.pula) poker.zresetuj_akcje( gracze ) # do czyszczenia akcji z poprz. tury poza pasów i ew. allinów if tura == 1: aktywny = poker.nastepny( rozp ) # aktywny to indeks gracza aktywnego (aktualnie decydującego) w licytacji else: # a nastepny() to przesunięcie iteratora na nast. gracza aktywny = rozp koniec = False print(global_info) conn.send(pickle.dumps(poker.PaczkaDoKlienta(global_info))) # pętla pozwalająca wykonywać akcje graczy (jeden obrót to decyzja jednego gracza) while True: global_info = '' if gracze[aktywny].stan != poker.stan_gracza[ "va bank"]: # wyjątek pomijający graczy vabank # wypisywanie info global_info += "\n**************Teraz gracz %s***************" % ( aktywny + 1) global_info += stol.wypisz_karty_na_stole() global_info += '\n' + gracze[ aktywny].wypisz_karty_gracza() global_info += "\nNajwyższa stawka na stole: " + str( najwyzsza_stawka) global_info += "\nTwoja stawka: " + str( stol.stawki_graczy[aktywny]) global_info += "\nKapital: " + str( gracze[aktywny].kapital) if aktywny == 0: print(global_info) # wczytanie akcji gracza, więcej w poker.py odp = poker.wczytaj_poprawna_odp( najwyzsza_stawka - stol.stawki_graczy[aktywny], gracze[aktywny].kapital, gracze[aktywny].podbicia) else: conn.send( pickle.dumps( poker.PaczkaDoKlienta( stol=global_info, min=najwyzsza_stawka - stol.stawki_graczy[aktywny], maks=gracze[aktywny].kapital, podbicia=gracze[aktywny].podbicia, odp=True))) odp = conn.recv(1024) if odp: odp = pickle.loads(odp).odpowiedz else: print('\nUtracono polaczenie z klientem.') conn.close() return # wykonanie wybranej akcji global_info = poker.podejmij_akcje( gracze[aktywny], odp, stol) print(global_info) conn.send( pickle.dumps( poker.PaczkaDoKlienta(akcja=global_info))) if najwyzsza_stawka < stol.stawki_graczy[aktywny]: najwyzsza_stawka = stol.stawki_graczy[ aktywny] # do info o najwyższej postawionej stawce # obsługa spasowania if gracze[aktywny].stan == poker.stan_gracza["spasowal"]: pas = poker.czy_wszyscy_spasowali(gracze) if pas != -1: koniec = True # obsługa opcji wylączenia gry if gracze[aktywny].stan == poker.stan_gracza["skonczyl"]: zwyciezca = poker.nastepny(aktywny) koniec = True if koniec: break # tu jest sprawdzenie czy wszyscy gracze już coś zrobili gdy stawki są sobie równe if poker.czy_koniec_tury(gracze, stol, najwyzsza_stawka): break aktywny = poker.nastepny(aktywny) # **********************************koniec pętli while()*************************************** # sprzątanie po skończonej turze stol.zbierz_do_puli() # wszystkie stawki idą do wspólnej puli if zwyciezca >= 0 or pas >= 0: break # gdy któryś z dwóch graczy spasował if poker.liczba_graczy_w_licytacji(gracze) <= 1: stol.odkryte = 5 break stol.odkryte += 1 najwyzsza_stawka = 0 # **********************koniec pętli z turami*************************************** global_info = '' if pas >= 0 and not poker.czy_ktos_allin( gracze): # gdy wszyscy spasowali global_info = "\n***Zwyciezca rundy zostaje gracz %s!***" % ( pas + 1) gracze[pas].kapital += stol.pula stol.pula = 0 elif zwyciezca == -1: # tu nastąpi sprawdzanie kart global_info = "\n****************Sprawdzenie kart*****************\n" global_info += stol.wypisz_karty_na_stole() + '\n' for g in gracze: global_info += g.wypisz_karty_gracza() wyniki = list() global_info += '\n' for g in gracze: wyniki.append(sprawdz.evaluate(stol.karty, g.reka)) global_info += "\nWynik gracza %d: %s (%d)" \ % (g.id + 1, sprawdz.class_to_string(sprawdz.get_rank_class(wyniki[-1])), wyniki[-1]) global_info += poker.rozdaj_pule(gracze, stol, wyniki) # całkowity stan kapitału graczy global_info += "\n\nStan kapitalu graczy: " for g in gracze: global_info += "\nGracz %d: %d" % (g.id + 1, g.kapital) if zwyciezca == -1: # sprawdzenie czy komuś się pieniądze skończyły zwyciezca = -1 if gracze[0].kapital == 0: zwyciezca = 1 elif gracze[1].kapital == 0: zwyciezca = 0 if zwyciezca != -1: global_info += "\n\n***Zwyciezca gry zostaje gracz %d, gratulacje!!!***" % ( zwyciezca + 1) print(global_info) conn.send(pickle.dumps(poker.PaczkaDoKlienta(global_info))) input("\nNacisnij ENTER aby kontynuowac.") break print(global_info) conn.send(pickle.dumps(poker.PaczkaDoKlienta(global_info))) input("\nNacisnij ENTER aby kontynuowac.") rozp = poker.nastepny(rozp, len(gracze)) poker.zresetuj_akcje(gracze, do_poczatku=True) conn.send( pickle.dumps( poker.PaczkaDoKlienta('\nSerwer zakonczyl polaczenie.'))) conn.close() global koniec_polaczenia koniec_polaczenia = True print('>>Zakonczono dzialanie gry.') return
def evaluateCards(boardCards, handCards): # decrypt the two hand cards sent from the client + board cards n = 2 str(boardCards).lower() boardCardsSplit = [(boardCards[i:i + n]) for i in range(0, len(boardCards), n)] str(handCards).lower() handCardsSplit = [(handCards[i:i + n]) for i in range(0, len(handCards), n)] handCardsSplit[0] = handCardsSplit[0][1] + handCardsSplit[0][0] handCardsSplit[1] = handCardsSplit[1][1] + handCardsSplit[1][0] hand = [ Card.new(str(handCardsSplit[0].capitalize())), Card.new(str(handCardsSplit[1].capitalize())) ] board = [] i = 0 if len(list(boardCardsSplit)) == 3: board = [ Card.new(str(boardCardsSplit[0].capitalize())), Card.new(str(boardCardsSplit[1].capitalize())), Card.new(str(boardCardsSplit[2].capitalize())) ] else: if len(list(boardCardsSplit)) == 4: board = [ Card.new(str(boardCardsSplit[0].capitalize())), Card.new(str(boardCardsSplit[1].capitalize())), Card.new(str(boardCardsSplit[2].capitalize())), Card.new(str(boardCardsSplit[3].capitalize())) ] else: if len(list(boardCardsSplit)) == 5: board = [ Card.new(str(boardCardsSplit[0].capitalize())), Card.new(str(boardCardsSplit[1].capitalize())), Card.new(str(boardCardsSplit[2].capitalize())), Card.new(str(boardCardsSplit[3].capitalize())), Card.new(str(boardCardsSplit[4].capitalize())) ] deck = Deck() print(Card.print_pretty_cards(board + hand)) evaluator = Evaluator() bestScore = evaluator.evaluate(board, hand) handType = evaluator.get_rank_class(bestScore) print("Player 1 hand rank = %d (%s)\n" % (bestScore, evaluator.class_to_string(handType))) if (len(board) == 5): for i in range(len(board) + len(hand)): # Make copy of hand and board tempHand = [] tempBoard = [] for j in range(len(hand)): tempHand.append(hand[j]) for j in range(len(board)): tempBoard.append(board[j]) #First try removing one of the hand cards if (i < 2): tempHand.pop(i) tempHand.append(board[0]) tempBoard.pop(0) #Now we try removing board cards else: tempBoard.pop(i - 2) #Find the score score = evaluator.evaluate(tempBoard, tempHand) #If score is same as before, these cards have the best hand if (score == bestScore): # Make copy of best hand and board best6Hand = [] best6Board = [] for j in range(len(tempHand)): best6Hand.append(tempHand[j]) for j in range(len(tempBoard)): best6Board.append(tempBoard[j]) break else: best6Board = board best6Hand = hand print(Card.print_pretty_cards(best6Board + best6Hand)) if (len(best6Board) == 4 or len(board) == 4): #we repeat the process to have the best 5 cards for i in range(len(best6Board) + len(best6Hand)): #Make copy of hand and board tempHand = [] tempBoard = [] for j in range(len(best6Hand)): tempHand.append(best6Hand[j]) for j in range(len(best6Board)): tempBoard.append(best6Board[j]) if (i < 2): tempHand.pop(i) tempHand.append(best6Board[0]) tempBoard.pop(0) else: tempBoard.pop(i - 2) score = evaluator.evaluate(tempBoard, tempHand) if (score == bestScore): # Make copy of best hand and board best5Hand = [] best5Board = [] for j in range(len(tempHand)): best5Hand.append(tempHand[j]) for j in range(len(tempBoard)): best5Board.append(tempBoard[j]) break else: best5Board = best6Board best5Hand = best6Hand print(Card.print_pretty_cards(best5Board + best5Hand)) card1 = convertCardToString(best5Board.__getitem__(0)) card2 = convertCardToString(best5Board.__getitem__(1)) card3 = convertCardToString(best5Board.__getitem__(2)) card4 = convertCardToString(best5Hand.__getitem__(0)) card5 = convertCardToString(best5Hand.__getitem__(1)) handString = card1 + card2 + card3 + card4 + card5 print("Hand string: " + handString) stringToSend = str(handType) + " " + handString + " " + str(bestScore) print("String to send: " + stringToSend) return stringToSend
class TexasHoldemEnv(Env, utils.EzPickle): BLIND_INCREMENTS = [[10,25], [25,50], [50,100], [75,150], [100,200], [150,300], [200,400], [300,600], [400,800], [500,10000], [600,1200], [800,1600], [1000,2000]] current_player_notifier = "" weighting_coefficient_regret_fold = 10 weighting_coefficient_regret_check = 10 weighting_coefficient_regret_call = 10 weighting_coefficient_regret_raise = 10 weighting_coefficient_round_resolve = 100 def __init__(self, n_seats, max_limit=100000, debug=False): n_suits = 4 # s,h,d,c n_ranks = 13 # 2,3,4,5,6,7,8,9,T,J,Q,K,A n_community_cards = 5 # flop, turn, river n_pocket_cards = 2 n_stud = 5 self.level_raises = {0:0, 1:0, 2:0} # Assuming 3 players self.n_seats = n_seats self._blind_index = 0 [self._smallblind, self._bigblind] = TexasHoldemEnv.BLIND_INCREMENTS[0] self._deck = Deck() self._evaluator = Evaluator() self.last_seq_move = [] self.filled_seats = 0 self.signal_end_round = False self.winning_players = None self.starting_stack_size = None self.community = [] self._round = 0 self._button = 0 self._discard = [] self.game_resolved = False self.is_new_r = True self._side_pots = [0] * n_seats self._current_sidepot = 0 # index of _side_pots self._totalpot = 0 self._tocall = 0 self._lastraise = 0 self._number_of_hands = 0 self._record_players = [] # fill seats with dummy players self._seats = [Player(i, stack=0, emptyplayer=True) for i in range(n_seats)] self.learner_bot = None self.villain = None self.emptyseats = n_seats self._player_dict = {} self._current_player = None self._debug = debug self._last_player = None self._last_actions = None # (PSEUDOCODE) # MODEL HYPERPARAMETERS: # state_size = [(position, learner.stack, learner.handrank, played_this_round ...[card1, card2]), (pot_total, learner.to_call, opponent.stack, community_cards)] # action_size = env.action_space.n # learning_rate = 0.00025 self.observation_space = spaces.Tuple([ spaces.Tuple([ # players spaces.MultiDiscrete([ max_limit, # stack max_limit, # handrank 1, # playedthisround 1, # is_betting max_limit, # last side pot ]), spaces.Tuple([ spaces.MultiDiscrete([ # card n_suits, # suit, can be negative one if it's not avaiable. n_ranks, # rank, can be negative one if it's not avaiable. ]) ] * n_pocket_cards) ] * 4), spaces.Tuple([ spaces.Discrete(max_limit), # learner position spaces.Discrete(max_limit), # pot amount spaces.Discrete(max_limit), # last raise spaces.Discrete(n_seats - 1), # current player seat location. spaces.Discrete(max_limit), # minimum amount to raise spaces.Discrete(max_limit), # how much needed to call by current player. spaces.Tuple([ spaces.MultiDiscrete([ # card n_suits - 1, # suit n_ranks - 1, # rank 1, # is_flopped ]) ] * n_community_cards) ]) ]) ### MAY NEED TO ALTER FOR HEADS-UP # self.action_space = spaces.Tuple([ # spaces.MultiDiscrete([ # 3, # action_id # max_limit, # raise_amount # ]), # ] * n_seats) self.action_space = spaces.Discrete(3) def seed(self, seed=None): _, seed = seeding.np_random(seed) return [seed] # Important Note: Positions are only assigned at end of game. Be aware in # case of reporting stats on position type def assign_positions(self): no_active_players = self.filled_seats if(self.filled_seats == 3): for player in self._seats: player.position = (player.position + (no_active_players-1)) % no_active_players if player in self._player_dict.values() else None elif(self.filled_seats == 2): new_positions = [] # We want to only use positions 0 and 2, which are encodings of BTN and BB respectively # Sort for positions 0 and 2 first for player in self._player_dict.values(): if not(player.emptyplayer): if player.position == 2: player.position = 0 new_positions.append(player.position) elif player.position == 0: player.position = 2 new_positions.append(player.position) # Special case of former position 1 depends on new positions allocated above if len(new_positions) == 1: for player in self._player_dict.values(): if player.position == 1: if new_positions[0] == 0: player.position = 2 elif new_positions[0] == 2: player.position = 0 def add_player(self, seat_id, stack=2000): """Add a player to the environment seat with the given stack (chipcount)""" player_id = seat_id if player_id not in self._player_dict: new_player = Player(player_id, stack=stack, emptyplayer=False) Player.total_plrs+=1 self.starting_stack_size = stack if self._seats[player_id].emptyplayer: self._seats[player_id] = new_player new_player.set_seat(player_id) else: raise error.Error('Seat already taken.') self._player_dict[player_id] = new_player self.emptyseats -= 1 self.filled_seats +=1 if new_player.get_seat() == 0: self.learner_bot = new_player else: self.villain = new_player self._record_players.append(new_player) def move_player_to_empty_seat(self, player): # priority queue placing active players at front of table for seat_no in range(len(self._seats)): if self._seats[seat_no].emptyplayer and (seat_no < player._seat): unused_player = self._seats[seat_no] self._seats[seat_no] = player self._seats[player.get_seat()] = unused_player def reassign_players_seats(self): for player in self._player_dict.values(): self.move_player_to_empty_seat(player) def remove_player(self, seat_id): """Remove a player from the environment seat.""" player_id = seat_id try: idx = self._seats.index(self._player_dict[player_id]) self._seats[idx] = Player(0, stack=0, emptyplayer=True) self._seats[idx].position = None # Very important for when transitioning from 3 to 2 players. del self._player_dict[player_id] self.emptyseats += 1 self.filled_seats-=1 Player.total_plrs-=1 #self.reassign_players_seats() except ValueError: pass def reset(self): self._reset_game() self._ready_players() self._number_of_hands = 1 [self._smallblind, self._bigblind] = TexasHoldemEnv.BLIND_INCREMENTS[0] if (self.emptyseats < len(self._seats) - 1): players = [p for p in self._seats if p.playing_hand] self._new_round() self._round = 0 self._current_player = self._first_to_act(players, "post_blinds") self._post_smallblind(self._current_player) self._current_player = self._next(players, self._current_player) self._post_bigblind(self._current_player) self._current_player = self._next(players, self._current_player) self._tocall = self._bigblind self._round = 0 self._deal_next_round() self.organise_evaluations() self._folded_players = [] return self._get_current_reset_returns() def organise_evaluations(self): for idx, player in self._player_dict.items(): if player is not None: player.he = HandHoldem.HandEvaluation(player.hand, idx, "Preflop") #Unique to player instance player.he.evaluate(event='Preflop') player.set_handrank(player.he.evaluation) def assume_unique_cards(self, players): cards_count = {} this_board = None for player in players: player_cards = player.hand for card in player_cards: cards_count.update({card: 1}) if card not in cards_count else cards_count.update({card: cards_count[card] + 1}) if this_board is None and player.he is not None: if player.he.board is not None: this_board = player.he.board if this_board is not None: for card in this_board: cards_count.update({card: 1}) if card not in cards_count else cards_count.update({card: cards_count[card] + 1}) for card, no_occurence in cards_count.items(): if no_occurence > 1: return False else: return True def step(self, actions): """ CHECK = 0 CALL = 1 RAISE = 2 FO RAISE_AMT = [0, minraise] """ players = [p for p in self._seats if p.playing_hand] assert self.assume_unique_cards(players) is True self._last_player = self._current_player # self._last_actions = actions # if self._last_player.count_r(self.last_seq_move) > 1: # if [3,0] in actions: # print("r") # if current player did not play this round if not self._current_player.playedthisround and len([p for p in players if not p.isallin]) >= 1: if self._current_player.isallin: self._current_player = self._next(players, self._current_player) return self._get_current_step_returns(False) move = self._current_player.player_move(self._output_state(self._current_player), actions[self._current_player.player_id], last_seq_move = self.last_seq_move, _round = self._round) if self.am_i_only_player_wmoney() and self.level_raises[self._current_player.get_seat()] >= self.highest_in_LR()[0]: move = ("check", 0) # Protects against player making bets without any other stacked/active players self._last_actions = move if move[0] == 'call': assert self.action_space.contains(0) self._player_bet(self._current_player, self._tocall, is_posting_blind=False, bet_type=move[0]) if self._debug: print('Player', self._current_player.player_id, move) self._current_player = self._next(players, self._current_player) self.last_seq_move.append('C') self.playedthisround = True self._current_player.round['raises_i_owe'] = 0 elif move[0] == 'check': # assert self.action_space.contains(0) self._player_bet(self._current_player, self._current_player.currentbet, is_posting_blind=False, bet_type=move[0]) if self._debug: print('Player', self._current_player.player_id, move) self._current_player = self._next(players, self._current_player) self.last_seq_move.append('c') self.playedthisround = True elif move[0] == 'raise': # if self._current_player is self.learner_bot and self.level_raises == {0: 1, 1: 0, 2: 2} or self.level_raises == {0: 2, 1: 0, 2: 3} or self.level_raises == {0: 3, 1: 0, 2: 4} or self.level_raises == {0: 4, 1: 0, 2: 5} or self.level_raises == {0: 5, 1: 0, 2: 6} or self.level_raises == {0: 5, 1: 0, 2: 6} and 'R' in self.last_seq_move: # print("watch") assert self.action_space.contains(1) self._player_bet(self._current_player, move[1]+self._current_player.currentbet, is_posting_blind=False, bet_type="bet/raise") if self._debug: print('Player', self._current_player.player_id, move) for p in players: if p != self._current_player: p.playedthisround = False self._current_player = self._next(players, self._current_player) self.last_seq_move.append('R') self._current_player.round['raises_i_owe'] = 0 elif move[0] == 'fold': # if self.highest_in_LR()[0] > 4: # print("watch") assert self.action_space.contains(2) self._current_player.playing_hand = False self._current_player.playedthisround = True if self._debug: print('Player', self._current_player.player_id, move) self._current_player = self._next(players, self._current_player) self._folded_players.append(self._current_player) self.last_seq_move.append('F') # break if a single player left # players = [p for p in self._seats if p.playing_hand] # if len(players) == 1: # self._resolve(players) players = [p for p in self._seats if p.playing_hand] # else: ## This will help eliminate infinite loop # self._current_player = self._next(players, self._current_player) # This will effectively dictate who will become dealer after flop players_with_money = [] for player in players: if(player.stack > 0): players_with_money.append(player) if all([player.playedthisround for player in players_with_money]): self._resolve(players) for player in self._player_dict.values(): player.round == {'moves_i_made_in_this_round_sofar': '', 'possible_moves': set([]), 'raises_owed_to_me': 0, "raises_i_owe": 0} terminal = False if all([player.isallin for player in players]): while self._round < 4: self._deal_next_round() self._round += 1 elif self.count_active_wmoney() == 1 and all([player.playedthisround for player in players]): # do something else here while self._round < 3: self._round += 1 self._deal_next_round() if self._round == 4 or len(players) == 1: terminal = True self._resolve(players) self._resolve_round(players) return self._get_current_step_returns(terminal, action=move) def am_i_only_player_wmoney(self): count_other_broke = 0 for player in self._player_dict.values(): if player is not self._current_player and player.stack <= 0: count_other_broke += 1 if count_other_broke == (len(self._player_dict) - 1): return True else: return False def count_active_wmoney(self): count = 0 account_active_money = {0:{"is_active":False, "has_money":False},1:{"is_active":False, "has_money":False},2:{"is_active":False, "has_money":False}} for player in self._player_dict.values(): if player.playing_hand: account_active_money[player.get_seat()].update({"is_active": True}) if player.stack > 0: account_active_money[player.get_seat()].update({"has_money": True}) for player, account in account_active_money.items(): if account["is_active"] is True and account["has_money"] is True: count+=1 return count def render(self, mode='human', close=False, initial=False, delay=None): if delay: time.sleep(delay) if(initial is True): print("\n") if self._last_actions is not None and initial is False: pid = self._last_player.player_id #print('last action by player {}:'.format(pid)) print(format_action(self._last_player, self._last_actions)) print("\n\n") print('Total Pot: {}'.format(self._totalpot)) (player_states, community_states) = self._get_current_state() (player_infos, player_hands) = zip(*player_states) (community_infos, community_cards) = community_states print('Board:') print('-' + hand_to_str(community_cards)) print('Players:') # for player in self._player_dict: # assert player.round['raises_i_owe'] for idx, hand in enumerate(player_hands): if self._current_player.get_seat() == idx: self.current_player_notifier = "<" + str(self._current_player.position) print('{}{}stack: {} {}'.format(idx, hand_to_str(hand), self._seats[idx].stack, self.current_player_notifier)) self.current_player_notifier = "" def _resolve(self, players): self.signal_end_round = True self._current_player = self._first_to_act(players) self._resolve_sidepots(players + self._folded_players) self._new_round() self._deal_next_round() if self._debug: print('totalpot', self._totalpot) def _resolve_postflop(self, players): self._current_player = self._first_to_act(players) # print(self._current_player) def _deal_next_round(self): if self._round == 0: self._deal() elif self._round == 1: self._flop() elif self._round == 2: self._turn() elif self._round == 3: self._river() def _increment_blinds(self): self._blind_index = min(self._blind_index + 1, len(TexasHoldemEnv.BLIND_INCREMENTS) - 1) [self._smallblind, self._bigblind] = TexasHoldemEnv.BLIND_INCREMENTS[self._blind_index] def _post_smallblind(self, player): if self._debug: print('player ', player.player_id, 'small blind', self._smallblind) self._player_bet(player, self._smallblind, is_posting_blind=True) player.playedthisround = False def _post_bigblind(self, player): if self._debug: print('player ', player.player_id, 'big blind', self._bigblind) self._player_bet(player, self._bigblind, is_posting_blind=True) player.playedthisround = False self._lastraise = self._bigblind def highest_in_LR(self, specific=None, request_is_seq=None): highest_lr_bot = 0 highest_lr_value = 0 if specific is None: spec = self.level_raises else: spec = specific for key, value in spec.items(): if value > highest_lr_value: highest_lr_value = value highest_lr_bot = key rep = [(highest_lr_value, highest_lr_bot)] if request_is_seq: for key, value in spec.items(): if value == highest_lr_value and key != highest_lr_bot: rep.append((value, key)) return rep else: return highest_lr_value, highest_lr_bot def is_level_raises_allzero(self): count_zero = 0 for value in self.level_raises.values(): if value == 0: count_zero+=1 if(count_zero == len(self.level_raises)): return True else: return False def _player_bet(self, player, total_bet, **special_betting_type): # Case 1: New round, players have incosistent raises # Case 2: End of round, difference of raises is 2 import operator sorted_lr = sorted(self.level_raises.items(), key=operator.itemgetter(1)) # if (self.is_off_balance_LR() and self.is_new_r) or ( ((int(self.highest_in_LR()[0]) - int(sorted_lr[1][1])) == 2) and (self.is_new_r is False)): # print("raise") if "is_posting_blind" in special_betting_type and "bet_type" not in special_betting_type: # posting blind (not remainder to match preceding calls/raises) if special_betting_type["is_posting_blind"] is True: self.level_raises[player.get_seat()] = 0 elif "is_posting_blind" in special_betting_type and "bet_type" in special_betting_type: # Bet/Raise or call. Also accounts for checks preflop. highest_lr_value, highest_lr_bot = self.highest_in_LR() if special_betting_type["is_posting_blind"] is False: if special_betting_type["bet_type"] == "bet/raise": if self.level_raises[player.get_seat()] < highest_lr_value: player.action_type = "raise" self.level_raises[player.get_seat()] = highest_lr_value + 1 elif self.level_raises[player.get_seat()] == highest_lr_value: player.action_type = "bet" self.level_raises[player.get_seat()] += 1 elif special_betting_type["bet_type"] == "call": if self.level_raises[player.get_seat()] < highest_lr_value: player.action_type = "call" self.level_raises[player.get_seat()] = highest_lr_value elif self.is_level_raises_allzero(): if player.position == 0: player.action_type = "call" self.level_raises[player.get_seat()] = 1 elif player.position == 2: player.action_type = "call" self.level_raises[player.get_seat()] = highest_lr_value elif special_betting_type["bet_type"] == "check" and self._round is 0: # BB checking preflop if player.position == 2: self.level_raises[player.get_seat()] = 1 # relative_bet is how much _additional_ money is the player betting this turn, # on top of what they have already contributed # total_bet is the total contribution by player to pot in this round relative_bet = min(player.stack, total_bet - player.currentbet) player.bet(relative_bet + player.currentbet) self._totalpot += relative_bet self._tocall = max(self._tocall, total_bet) if self._tocall > 0: self._tocall = max(self._tocall, self._bigblind) self._lastraise = max(self._lastraise, relative_bet - self._lastraise) self.is_new_r = False def _first_to_act(self, players, my_event="Postflop"): # if self._round == 0 and len(players) == 2: # return self._next(sorted( # players + [self._seats[self._button]], key=lambda x:x.get_seat()), # self._seats[self._button]) first_to_act = None if self.filled_seats == 2: if my_event is "Preflop" or my_event is "post_blinds": first_to_act = self.assign_next_to_act(players, [0,2]) elif my_event is "Postflop" or my_event is "sidepot": first_to_act = self.assign_next_to_act(players, [2,0]) elif self.filled_seats == 3: if my_event is "Preflop": first_to_act = self.assign_next_to_act(players, [0,1,2]) elif my_event is "Postflop" or my_event is "post_blinds" or my_event is "sidepot": first_to_act = self.assign_next_to_act(players, [1,2,0]) # else: # my_return = [player for player in players if player.get_seat() > self._button][0] #assert first_to_act is not None and not(first_to_act.emptyplayer) and not(first_to_act.stack <= 0) if len(players) == 1: first_to_act = self._record_players[0] return first_to_act def assign_next_to_act(self, players, precedence_positions): for pos in precedence_positions: for player in players: if player.position == pos and not(player.emptyplayer) and player.playing_hand and player.stack > 0: assert player is not None return player def _next(self, players, current_player): i = 1 current_player_seat = players.index(current_player) while(players[(current_player_seat+i) % len(players)].stack <= 0): i+=1 if i > 10: break # In this case of inifinte loop, self._current_player is assigned to _next but will be irrelevant anyway so okay. assert players[(current_player_seat+i) % len(players)] is not None return players[(current_player_seat+i) % len(players)] def _deal(self): for player in self._seats: if player.playing_hand and player.stack > 0: player.hand = self._deck.draw(2) def _flop(self): self._discard.append(self._deck.draw(1)) #burn this_flop = self._deck.draw(3) self.flop_cards = this_flop self.community = this_flop def _turn(self): self._discard.append(self._deck.draw(1)) #burn self.turn_card = self._deck.draw(1) self.community.append(self.turn_card) # .append(self.community) def _river(self): self._discard.append(self._deck.draw(1)) #burn self.river_card = self._deck.draw(1) self.community.append(self.river_card) def _ready_players(self): for p in self._seats: if not p.emptyplayer and p.sitting_out: p.sitting_out = False p.playing_hand = True def _resolve_sidepots(self, players_playing): players = [p for p in players_playing if p.currentbet] if self._debug: print('current bets: ', [p.currentbet for p in players]) print('playing hand: ', [p.playing_hand for p in players]) if not players: return try: smallest_bet = min([p.currentbet for p in players if p.playing_hand]) except ValueError: for p in players: self._side_pots[self._current_sidepot] += p.currentbet p.currentbet = 0 return smallest_players_allin = [p for p, bet in zip(players, [p.currentbet for p in players]) if bet == smallest_bet and p.isallin] for p in players: self._side_pots[self._current_sidepot] += min(smallest_bet, p.currentbet) p.currentbet -= min(smallest_bet, p.currentbet) p.lastsidepot = self._current_sidepot if smallest_players_allin: self._current_sidepot += 1 self._resolve_sidepots(players) if self._debug: print('sidepots: ', self._side_pots) def _new_round(self): for player in self._player_dict.values(): player.currentbet = 0 player.playedthisround = False player.round = {'moves_i_made_in_this_round_sofar': '', 'possible_moves': set([]), 'raises_owed_to_me': 0, "raises_i_owe": 0} player.round_track_stack = player.stack self.is_new_r = True self._round += 1 self._tocall = 0 self._lastraise = 0 self.last_seq_move = [] # if self.is_off_balance_LR(): # if self._last_actions[0] != 'fold': # raise error.Error() def is_off_balance_LR(self): lr = self.level_raises highest_value, highest_bot = self.highest_in_LR() lr_without_highest = dict(lr) del lr_without_highest[highest_bot] next_highest_value, next_highest_bot = self.highest_in_LR(specific=lr_without_highest) if highest_value != next_highest_value: return True elif highest_value == next_highest_value: return False def _resolve_round(self, players): # if len(players) == 1: # if (self._round == 1 or self._round == 2) and self._last_player.get_seat() == 0 and self._last_actions[0] == 'fold': # if self._last_player.count_r(self.last_seq_move) < 1: # if self.learner_bot.position == 0: # players[0].refund(self._bigblind + self._smallblind) # self._totalpot = 0 # self.winning_players = players[0] # else: # players[0].refund(self._bigblind + self._smallblind + 40) # self._totalpot = 0 # self.winning_players = players[0] # else: # players[0].refund(sum(self._side_pots)) # self._totalpot = 0 # self.winning_players = players[0] if len(players) == 1: winner, loser = None, None # Heads-Up for p in self._record_players: if p == players[0]: winner = p else: loser = p winner_investment = winner.stack_start_game - winner.stack loser_loss = loser.stack_start_game - loser.stack if loser.stack_start_game < 15 and loser.position == 0: players[0].refund((self.starting_stack_size - winner.stack) ) elif loser.stack_start_game < 25 and loser.position == 2: players[0].refund((self.starting_stack_size - winner.stack) ) else: players[0].refund(winner_investment + loser_loss) self._totalpot = 0 self.winning_players = players[0] else: # compute hand ranks for player in players: # assert (len(self.community) <= 5) is True player.handrank = self._evaluator.evaluate(player.hand, self.community) # trim side_pots to only include the non-empty side pots temp_pots = [pot for pot in self._side_pots if pot > 0] # compute who wins each side pot and pay winners for pot_idx,_ in enumerate(temp_pots): # find players involved in given side_pot, compute the winner(s) pot_contributors = [p for p in players if p.lastsidepot >= pot_idx] winning_rank = min([p.handrank for p in pot_contributors]) winning_players = [p for p in pot_contributors if p.handrank == winning_rank] self.winning_players = winning_players[0] for player in winning_players: split_amount = int(self._side_pots[pot_idx]/len(winning_players)) if self._debug: print('Player', player.player_id, 'wins side pot (', int(self._side_pots[pot_idx]/len(winning_players)), ')') player.refund(split_amount) self._side_pots[pot_idx] -= split_amount # any remaining chips after splitting go to the winner in the earliest position if self._side_pots[pot_idx]: earliest = self._first_to_act([player for player in winning_players], "sidepot") earliest.refund(self._side_pots[pot_idx]) # for player in players: ## THIS IS AT THE END OF THE GAME. NOT DURING. (safe) # if(player.stack == 0): # self.remove_player(player.get_seat()) self.game_resolved = True # assert(self._player_dict[0].stack + self._player_dict[2].stack + self._totalpot == 2*self.starting_stack_size) def report_game(self, requested_attributes, specific_player=None): if "stack" in requested_attributes: player_stacks = {} for key, player in self._player_dict.items(): player_stacks.update({key: player.stack}) # if len(player_stacks) < 3: # for i in range(3): # if i not in player_stacks: # player_stacks.update({i:0}) if specific_player is None: return (player_stacks) assert (player_stacks.values()) is not None else: return (player_dict[specific_player].values()) def _reset_game(self): playing = 0 # if self._player_dict[0].stack is not None and self._player_dict[2].stack is not None: # assert(self._player_dict[0].stack + self._player_dict[2].stack == 2*self.starting_stack_size) for player in self._seats: if not player.emptyplayer and not player.sitting_out: player.stack_start_game = player.stack player.reset_hand() playing += 1 self.community = [] self._current_sidepot = 0 self._totalpot = 0 self._side_pots = [0] * len(self._seats) self._deck.shuffle() self.level_raises = {0:0, 1:0, 2:0} self.winning_players = None self.game_resolved = False if playing: self._button = (self._button + 1) % len(self._seats) while not self._seats[self._button].playing_hand: self._button = (self._button + 1) % len(self._seats) def _output_state(self, current_player): return { 'players': [player.player_state() for player in self._seats], 'community': self.community, 'my_seat': current_player.get_seat(), 'pocket_cards': current_player.hand, 'pot': self._totalpot, 'button': self._button, 'tocall': (self._tocall - current_player.currentbet), 'stack': current_player.stack, 'bigblind': self._bigblind, 'player_id': current_player.player_id, 'lastraise': self._lastraise, 'minraise': max(self._bigblind, self._lastraise + self._tocall), } def _pad(self, l, n, v): if (not l) or (l is None): l = [] return l + [v] * (n - len(l)) def _get_current_state(self): player_states = [] for player in self._seats: player_features = [ int(player.stack), int(player.handrank), int(player.playedthisround), int(player.betting), int(player.lastsidepot), ] player_states.append((player_features, self._pad(player.hand, 2, -1))) community_states = ([ int(self.learner_bot.position), int(self._totalpot), int(self._lastraise), int(self._current_player.get_seat()), int(max(self._bigblind, self._lastraise + self._tocall)), int(self._tocall - self._current_player.currentbet), ], self._pad(self.community, 5, -1)) # if sum(self.level_raises.values()) > 6: # print("") return (tuple(player_states), community_states) def _get_current_reset_returns(self): return self._get_current_state() def distribute_rewards_given_endgame(self): if self.learner_bot is self.winning_players: self.learner_bot.reward = self.compute_reward() + self._totalpot else: self.learner_bot.reward = self.learner_bot.round_track_stack def _get_current_step_returns(self, terminal, action=None): observations = self._get_current_state() stacks = [player.stack for player in self._seats] reward = None if(action is None): return observations, reward, terminal, [] # TODO, return some info? else: # Focus on this. At end of step, when player has already decided his action. respective_evaluations = [player.he.evaluation if player.he is not None else None for player in self._seats] evaluations_opposing_players = [x for i,x in enumerate(respective_evaluations) if i!= self._last_player.get_seat() and x!=None] if (self._last_player is self.learner_bot): # Learner bot step return if(self.signal_end_round == True): self.signal_end_round = False self.learner_bot.reward = self.compute_reward() # Most common entry point (Learner Checks or raises) else: # Artifical agent step return self.learner_bot.reward = 0 if(self.signal_end_round == True): if(action == ('fold', 0)): # Opponent folded self.learner_bot.reward = self._totalpot # if action is ('fold', 0) or action is ('check', 0) or action[0] is 'call' or action[0] is 'raise': # regret = self.compute_regret_given_action(action, respective_evaluations, evaluations_opposing_players) return observations, action, reward, terminal, [] # TODO, return some info? def compute_reward(self): #only gets called when last player is learner # Expected value is a mathematical concept used to judge whether calling a raise in a game of poker will be profitable. # When an opponent raises a pot in poker, such as on the flop or river, your decision whether to call or fold is more or less # completely dependant on expected value. This is the calculation of whether the probability of winning a pot will make a call # profitable in the long-term. # Expected Value is a monetary value (e.g. +$10.50). It can be positive or # negative. EV tells you how profitable or unprofitable a certain play (e.g. # calling or betting) will be. We work out EV when we are faced with a decision. # EV = (Size of Pot x Probability of Winning) – Cost of Entering it. equity = self.equity() ev = None if self._round == 0 and self._last_player.position == 0: # Only works for heads up: Due to bug with tocall to_call = 15 total_pot = self._totalpot - to_call else: to_call = self._last_actions[1] total_pot = self._totalpot if self._last_player is not self.learner_bot else (self._totalpot - self._last_actions[1]) # Here we compute expected values for actions that were possible during their execution, and we reflect on them here by comparing the expected values # of alternatives. expected_values_order = [0, 0, 0] # In order of call/check, raise/bet, fold if self._last_actions[0] == 'call' or self._last_actions[0] == 'check': action_taken = 0 elif self._last_actions[0] == 'raise' or self._last_actions[0] == 'bet': action_taken = 1 else: action_taken = 2 # Call/Check Regret learner_equity, opp_equity = equity[0], equity[1] stand_to_win = (total_pot * learner_equity) stand_to_lose = to_call * opp_equity expected_value = stand_to_win - stand_to_lose expected_values_order[0] = expected_value # Fold Regret stand_to_win = to_call * opp_equity stand_to_lose = (total_pot) * learner_equity expected_value = stand_to_win - stand_to_lose expected_values_order[2] = expected_value # Raise/Bet Regret if (self.learner_bot.raise_possible_tba): # implied raise (How much more we stand to win given that villain shows confidence in his hand) stand_to_win = ( ((total_pot + 25) * learner_equity) * self.villain.certainty_to_call ) + (total_pot * learner_equity) * (1 - self.villain.certainty_to_call) stand_to_lose = (to_call + 25) * opp_equity expected_value = stand_to_win - stand_to_lose expected_values_order[1] = expected_value max_ev = max(expected_values_order) highest_paying_action = [i for i, j in enumerate(expected_values_order) if j == max_ev] # reward = expected_values_order[action_taken]/max_ev # how much does reward deviate from mean - this determines quality of action in the context of all possible actions reward = expected_values_order[action_taken] - mean(expected_values_order) return reward def compute_reward_end_round_fold(self, respective_evaluations, evaluations_opposing_players): return (respective_evaluations[self._last_player.get_seat()] - mean([other_player_eval for other_player_eval in evaluations_opposing_players])) / self.weighting_coefficient_round_resolve def compute_regret_given_action(self, my_action, respective_evaluations, evaluations_opposing_players): self.compare_evaluations_players(my_action, respective_evaluations, evaluations_opposing_players) # Now player has his regret filled in to his own player instance pass def equity(self): # Equity is a percentage (e.g. 70%). Equity tells you how much of the pot # “belongs” to you, or to put it another way, the percentage of the time # you expect to win the hand on average from that point onwards. _round = self._round if self.signal_end_round is not True else self._round - 1 if (_round == 1 or _round == 2 or _round ==3): # Implies last rounds were either 1 or 2 learner_utility, opp_utility = self.compute_winner_simulation(_round) equity = learner_utility, opp_utility else: learner_hs = self.learner_bot.he.hand_strength, 1 - self.villain.he.hand_strength bot_hs = self.villain.he.hand_strength, 1 - self.learner_bot.he.hand_strength equity = (learner_hs[0] + learner_hs[1])/2, (bot_hs[0] + bot_hs[1])/2 return equity def compute_winner_simulation(self, _round): _evaluator = self._evaluator deck = self._deck if _round == 1: community = [self.community[i] for i in range(3)] elif _round == 2: community = [self.community[i] for i in range(4)] else: community = [self.community[i] for i in range(5)] opp1_cards = self.learner_bot.hand opp2_cards = self.villain.hand unrevealed_cards = sorted([card for card in deck.cards if card not in community and card not in opp1_cards and card not in opp2_cards]) # print(Card.print_pretty_cards(opp1_cards)) # print(Card.print_pretty_cards(opp2_cards)) winning_players_list = [] learner_wins = 0 opp_wins = 0 if _round == 1: for turn_card_idx in range(len(unrevealed_cards)): # print(turn_card_idx) for river_card_idx in range(turn_card_idx, len(unrevealed_cards)): if [unrevealed_cards[turn_card_idx]] == [unrevealed_cards[river_card_idx]]: continue # print(Card.print_pretty_cards(community + [unrevealed_cards[turn_card_idx]] + [unrevealed_cards[river_card_idx]])) learner_eval = (_evaluator.evaluate(opp1_cards, community + [unrevealed_cards[turn_card_idx]] + [unrevealed_cards[river_card_idx]])) opp_eval = (_evaluator.evaluate(opp2_cards, community + [unrevealed_cards[turn_card_idx]] + [unrevealed_cards[river_card_idx]])) winning_rank = min([learner_eval, opp_eval]) winning_players = [player for player, rank in enumerate([learner_eval, opp_eval]) if rank == winning_rank] if len(winning_players) is 2: learner_wins+=1 opp_wins+=1 else: if winning_players[0] == 0: learner_wins+=1 else: opp_wins+=1 elif _round == 2: for river_card in unrevealed_cards: player_handranks = [] # print(Card.print_pretty_cards(community+[river_card])) learner_eval = (_evaluator.evaluate(opp1_cards, community+[river_card])) opp_eval = (_evaluator.evaluate(opp2_cards, community+[river_card])) winning_rank = min([learner_eval, opp_eval]) winning_players = [player for player, rank in enumerate([learner_eval, opp_eval]) if rank == winning_rank] if len(winning_players) is 2: learner_wins+=1 opp_wins+=1 else: if winning_players[0] == 0: learner_wins+=1 else: opp_wins+=1 elif _round == 3: if self.learner_bot is self.winning_players: return 1.0, 0.0 else: return 0.0, 1.0 if opp_wins == 0 and learner_wins == 0: raise("error: division by zero") return (learner_wins/(learner_wins + opp_wins), opp_wins/(learner_wins + opp_wins)) #Using evlaluation here. Might be better to use player.handstrength def compare_evaluations_players(self, my_action, respective_evaluations, evaluations_opposing_players): pass # expected_value = self.expected_value() # if my_action is ('fold', 0): # # calculate how good my cards are compared to raisers cards # _, raiser_bot = self.highest_in_LR() # raiser_strength = raiser_bot.he.evaluation # regret = (raiser_strength - respective_evaluations[self._current_player.get_seat()]) / self.weighting_coefficient_regret_fold # # Remember: Higher evaluation means worse cards, lower means better cards. # # e.g. If my evaluation was 5400, and my opponents evaluation was 7500, I would have positive regret ( I would regret having folded) # self._current_player.regret.update({'fold': regret}) # elif my_action is ('check', 0): # # calculate how good my cards are compared to other players, and thus compute how much I regret not having raised # # If my evaluation is lower (better cards) than my opponents relatively high evaluation (worse cards), I would have positive regret # _, opposing_bot = self.current_player() # We can assign opposing as current_player (2-players heads-up) because we already rotated the table position # opposing_bot_strength = opposing_bot.he.evaluation # regret = (opposing_bot_strength - respective_evaluations[self._current_player.get_seat()]) / self.weighting_coefficient_regret_check # self._current_player.regret.update({'check': regret}) # elif my_action[0] is 'call': # # Now we must compute the regret based on how much we would have been better of taking another action: Here, unlike other times, we have # # 2 possible alternatives : Raise or fold. If we take a call action, we must compute the expected value for the other alternatives. # pass # elif my_action[0] is 'raise': # _, raiser_bot = self.highest_in_LR() # raiser_strength = raiser_bot.he.evaluation # regret = (raiser_evaluation - respective_evaluations[self._current_player.get_seat()]) / self.weighting_coefficient_regret_check # self._current_player.regret.update({'check': regret})
from treys import Card, Evaluator, Deck evaluator = Evaluator() myhand = [] myhand.append(Card.new('Ac')) myhand.append(Card.new('6c')) myhand.append(Card.new('7s')) myhand.append(Card.new('8h')) myhand.append(Card.new('9h')) print('Rank for A6789 is: ', evaluator.evaluate(myhand, [])) myhand = [] myhand.append(Card.new('5c')) myhand.append(Card.new('6c')) myhand.append(Card.new('7s')) myhand.append(Card.new('8h')) myhand.append(Card.new('9h')) evaluator = Evaluator() myhand = [] myhand.append(Card.new('Ac')) myhand.append(Card.new('6c')) myhand.append(Card.new('7c')) myhand.append(Card.new('8c')) myhand.append(Card.new('9c')) print('Rank for A6789 suited is: ', evaluator.evaluate(myhand, [])) myhand = [] myhand.append(Card.new('5c'))
async def bet_to_close(web_client, user_id, channel_id, bet): active_players = player_list[channel_id] tab = tab_list[channel_id]["table"] deck = tab_list[channel_id]["deck"] if bet == tab.highbet: print("betwixt") tab.pot += bet active_players[0].money = active_players[0].money - bet print(active_players[0].money, "active") print(active_players[1].money, "notactive") if tab.turn == 0: print("stage5") tabcards = Card.print_pretty_cards(tab.cards) await sendslack( "<@%s> calls. dealing flop:" % user_id, web_client, channel_id ) await sendslack(tabcards, web_client, channel_id) if active_players[0].dealer: active_players += [active_players.pop(0)] await sendslack( "<@%s> is next to act" % active_players[0].name, web_client, channel_id ) await sendslack("pot is %s" % tab.pot, web_client, channel_id) for name in active_players: name.bet = 0 name.tocall = 0 name.reraise = 0 tab.turn += 1 tab.highbet = 0 active_players[0].canclose = False active_players[1].canclose = True elif tab.turn == 1: print("stage6") tab.cards.append(deck.draw(1)) print(tab_list[channel_id]["table"].cards) tabcards = Card.print_pretty_cards(tab.cards) await sendslack( "<@%s> calls. dealing turn:" % user_id, web_client, channel_id ) await sendslack(tabcards, web_client, channel_id) if active_players[0].dealer: active_players += [active_players.pop(0)] await sendslack( "<@%s> is next to act" % active_players[0].name, web_client, channel_id ) await sendslack("pot is %s" % tab.pot, web_client, channel_id) for name in active_players: name.bet = 0 name.tocall = 0 name.reraise = 0 tab.turn += 1 tab.highbet = 0 active_players[0].canclose = False active_players[1].canclose = True elif tab.turn == 2: print("stage7") tab.cards.append(deck.draw(1)) print(tab.cards) tabcards = Card.print_pretty_cards(tab.cards) await sendslack( "<@%s> calls. dealing river:" % user_id, web_client, channel_id ) await sendslack(tabcards, web_client, channel_id) if active_players[0].dealer: active_players += [active_players.pop(0)] await sendslack( "<@%s> is next to act" % active_players[0].name, web_client, channel_id ) await sendslack("pot is %s" % tab.pot, web_client, channel_id) for name in active_players: name.bet = 0 name.tocall = 0 name.reraise = 0 tab.turn += 1 tab.highbet = 0 active_players[0].canclose = False active_players[1].canclose = True elif tab.turn == 3: await sendslack("<@%s> calls." % user_id, web_client, channel_id) tabcards = Card.print_pretty_cards(tab.cards) await sendslack(tabcards, web_client, channel_id) if tab.plo == True: await calculate_plo(web_client, user_id, channel_id) else: # players = player_list[channel_id] evaluator = Evaluator() scores = {} for p in active_players: pic = Card.print_pretty_cards(p.cards) await sendslack( "<@%s> has %s" % (p.name, pic), web_client, channel_id ) scores[evaluator.evaluate(tab.cards, p.cards)] = p p.cards = [] d = OrderedDict(sorted(scores.items(), key=lambda t: t[0])) items = list(d.items()) for i in items: print(i, "herewith") p_score = i[0] p_class = evaluator.get_rank_class(p_score) hand = evaluator.class_to_string(p_class) await sendslack( "<@%s> has %s" % (i[1].name, hand), web_client, channel_id ) winner = [x for x in items if x[0] == items[0][0]] for p in winner: await sendslack( "<@%s> won and got %d" % (p[1].name, tab.pot), web_client, channel_id, ) for name in active_players: if name.name == p[1].name: name.money += tab.pot if len(active_players) == 2: if active_players[0].money != 0 and active_players[1].money != 0: if active_players[1].dealer: active_players += [active_players.pop(0)] tab.cards.clear() tab.turn = 0 tab.highbet = 0 tab.pot = 0 for name in active_players: name.cards.clear() name.tocall = 0 name.dealer = False name.bet = 0 name.reraise = 0 name.canclose = False await set_up_game(web_client, channel_id)
def win_prob(board, hand): evaluator = Evaluator() percentage = 1.0 - evaluator.get_five_card_rank_percentage( evaluator.evaluate(board, hand)) return percentage
for runs in range(1, 6): payouts = [] for _ in range(10_000_000): deck = Deck() deck.cards.remove(c1) deck.cards.remove(c2) deck.cards.remove(c3) deck.cards.remove(c4) deck.cards.remove(c5) deck.cards.remove(c6) deck.cards.remove(c7) payout = 0 for _ in range(runs): board = [c5, c6, c7] board.extend(deck.draw(2)) e1 = evaluator.evaluate(board, hand_1) e2 = evaluator.evaluate(board, hand_2) if e1 < e2: payout += 1. elif e1 == e2: payout += 0.5 payouts.append(payout) print('runs = %d' % runs) print(average(payouts) / runs) print(var(payouts) / runs)
board = deck.draw(5) p1 = deck.draw(2) p2 = deck.draw(2) p3 = deck.draw(2) p4 = deck.draw(2) p5 = deck.draw(2) print(Card.print_pretty_cards(board)) print(Card.print_pretty_cards(p1)) print(Card.print_pretty_cards(p2)) print(Card.print_pretty_cards(p3)) print(Card.print_pretty_cards(p4)) print(Card.print_pretty_cards(p5)) evaluator = Evaluator() p1_score = evaluator.evaluate(board, p1) p2_score = evaluator.evaluate(board, p2) p3_score = evaluator.evaluate(board, p3) p4_score = evaluator.evaluate(board, p4) p5_score = evaluator.evaluate(board, p5) scores = [p1_score,p2_score,p3_score,p4_score,p5_score] p1_class = evaluator.get_rank_class(p1_score) p2_class = evaluator.get_rank_class(p2_score) p3_class = evaluator.get_rank_class(p3_score) p4_class = evaluator.get_rank_class(p4_score) p5_class = evaluator.get_rank_class(p5_score) classes = [p1_class,p2_class,p3_class,p4_class,p5_class] print(evaluator.class_to_string(p1_class)) print(evaluator.class_to_string(p2_class))
class TexasHoldemEnv(Env, utils.EzPickle): BLIND_INCREMENTS = [[10, 25], [25, 50], [50, 100], [75, 150], [100, 200], [150, 300], [200, 400], [300, 600], [400, 800], [500, 10000], [600, 1200], [800, 1600], [1000, 2000]] def __init__(self, n_seats, max_limit=100000, all_in_equity_reward=False, equity_steps=100, autoreset_stacks=True, debug=False): # n_suits = 4 # s,h,d,c # n_ranks = 13 # 2,3,4,5,6,7,8,9,T,J,Q,K,A # n_pocket_cards = 2 # n_stud = 5 self.n_seats = n_seats self._blind_index = 0 [self._smallblind, self._bigblind] = TexasHoldemEnv.BLIND_INCREMENTS[0] self._deck = Deck() self._evaluator = Evaluator() self.community = [] self._dead_cards = [] self._street = Street.NOT_STARTED self._button = -1 self._side_pots = [0] * n_seats self._current_sidepot = 0 # index of _side_pots self._totalpot = 0 self._tocall = 0 self._lastraise = 0 self._number_of_hands = 0 self._current_bet = 0 # fill seats with dummy players self._seats = [ Player(i, stack=0, emptyplayer=True) for i in range(n_seats) ] self.emptyseats = n_seats self._player_dict = {} self._current_player = None self._debug = debug self._last_player = None self._last_action = None self.agent_exists = False self.equity_reward = all_in_equity_reward self.equity = Equity(n_evaluations=equity_steps) self._autoreset_stacks = autoreset_stacks # self.observation_space = spaces.Tuple([ # <--- This thing is totally broken!!! # spaces.Tuple([ # players # spaces.MultiDiscrete([ # 1, # emptyplayer # n_seats - 1, # seat # max_limit, # stack # 1, # is_playing_hand # max_limit, # handrank # 1, # playedthisround # 1, # is_betting # 1, # isallin # max_limit, # last side pot # ]), # spaces.Tuple([ # spaces.MultiDiscrete([ # hand # # suit, can be negative one if it's not avaiable. # n_suits, # # rank, can be negative one if it's not avaiable. # n_ranks, # ]) # ] * n_pocket_cards) # ] * n_seats), # spaces.Tuple([ # spaces.Discrete(n_seats - 1), # big blind location # spaces.Discrete(max_limit), # small blind # spaces.Discrete(max_limit), # big blind # spaces.Discrete(max_limit), # pot amount # spaces.Discrete(max_limit), # last raise # spaces.Discrete(max_limit), # minimum amount to raise # # how much needed to call by current player. # spaces.Discrete(max_limit), # spaces.Discrete(n_seats - 1), # current player seat location. # spaces.MultiDiscrete([ # community cards # n_suits - 1, # suit # n_ranks - 1, # rank # 1, # is_flopped # ]), # ] * n_stud), # ]) # self.action_space = spaces.Tuple([ # spaces.MultiDiscrete([ # 3, # action_id # max_limit, # raise_amount # ]), # ] * n_seats) self.observation_space = spaces.Tuple([ spaces.Box(low=0.0, high=1.0, shape=(1, )), # equity spaces.Discrete(max_limit), # stack spaces.Discrete(max_limit), # pot amount ]) self.action_space = spaces.MultiDiscrete([3, max_limit]) @property def current_player_id(self): return self._current_player.player_id @property def tocall(self): return self._tocall def seed(self, seed=None): _, seed = seeding.np_random(seed) return [seed] def add_player(self, seat_id, stack=2500, is_agent=False): """Add a player to the environment seat with the given stack (chipcount)""" player_id = seat_id if player_id not in self._player_dict: if is_agent: if self.agent_exists: raise error.Error('Agent already exists') self.agent_exists = True self.agent_id = player_id new_player = Player(player_id, stack=stack, emptyplayer=False) if self._seats[player_id].emptyplayer: self._seats[player_id] = new_player new_player.set_seat(player_id) else: raise error.Error('Seat already taken.') self._player_dict[player_id] = new_player self.emptyseats -= 1 def remove_player(self, seat_id): """Remove a player from the environment seat.""" player_id = seat_id try: idx = self._seats.index(self._player_dict[player_id]) self._seats[idx] = Player(-1, stack=0, emptyplayer=True) del self._player_dict[player_id] self.emptyseats += 1 except ValueError: pass def reset(self): self._reset_game() self._number_of_hands += 1 [self._smallblind, self._bigblind] = TexasHoldemEnv.BLIND_INCREMENTS[0] if len(self._player_dict) >= 2: players = self._playing_players self._reset_street_state() self._current_player = self._first_to_act(players) self._last_player = self._current_player self._post_smallblind(self._current_player) self._pass_move_to_next_player() self._post_bigblind(self._current_player) self._pass_move_to_next_player() self._tocall = self._bigblind self._folded_players = [] self._deal_next_street() return self._get_current_reset_returns() def step(self, action): """ CHECK = 0 CALL = 1 RAISE = 2 FOLD = 3 RAISE_AMT = [0, minraise] """ if self._current_player is None: raise error.Error( 'Round cannot be played without 2 or more players.') if self._street == Street.SHOWDOWN: raise error.Error('Rounds already finished, needs to be reset.') players = self._playing_players if len(players) <= 1: raise error.Error( 'Round cannot be played with one or less players.') if not any([p.isallin is False for p in players]): raise error.Error('Eveyone all in, round should be finished') if self._current_player.isallin: raise error.Error( 'This should never happen, position to act should pass players' 'that can\'t take any actions') self._current_player.equity = self._compute_my_equity( self._current_player) self._last_action = action move = self._current_player.validate_action(self._tocall, self._minraise, action) if self._debug: print('Player', self._current_player.player_id, move) self._player_action(self._current_player, move[1]) if move[0] == 'raise': for p in players: if p != self._current_player and not p.isallin: p.playedthisround = False self._pass_move_to_next_player() if move[0] == 'fold': self._dead_cards += self._last_player.hand self._last_player.playing_hand = False players.remove(self._last_player) self._folded_players.append(self._last_player) not_acted_players = [ player for player in players if not player.playedthisround ] all_but_one_all_in = sum([player.isallin for player in players]) >= len(players) - 1 street_done = all([player.playedthisround for player in players]) \ or (len(not_acted_players) == 1 and not_acted_players[0].currentbet >= self._tocall and all_but_one_all_in) ready_for_showdown = (len(players) > 1 and all_but_one_all_in and street_done) if ready_for_showdown: if self.equity_reward: self._street = Street.SHOWDOWN else: while self._street < Street.SHOWDOWN: self._deal_next_street() if street_done: self._resolve_street(players) terminal = False if self._street == Street.SHOWDOWN or len(players) == 1: terminal = True self._resolve_hand(players) return self._get_current_step_returns(terminal) def _compute_equities(self, players): return self.equity.get_equities([p.hand for p in players], self.community, self._deck.cards, self._dead_cards) def _compute_my_equity(self, player): return self.equity.get_my_equity([player.hand], len(self._seats), self.community, self._deck.cards) def render(self, mode='human', close=False): for p in self._playing_players: p.equity = self._compute_my_equity(p) print('\ntotal pot: {}'.format(self._totalpot)) if self._last_action is not None: pid = self._last_player.player_id print('last action by player {}:'.format(pid)) print(format_action(self._last_player, self._last_action)) (player_states, community_states) = self._get_current_state() (player_infos, player_hands) = zip(*player_states) (community_infos, community_cards) = community_states blinds_idxs = self._get_blind_indexes(community_infos) print('community:') print('-' + hand_to_str(community_cards)) print('players:') for idx, hand in enumerate(player_hands): idx_relative = (idx + self._current_player.player_id) % len( self._seats) position_string = self._get_blind_str(blinds_idxs, idx_relative) folded = "F" if not player_infos[idx][ player_table.IS_IN_POT] else " " print('{} {} {}{}stack: {}, equity: {}'.format( idx_relative, position_string, folded, hand_to_str(hand), self._seats[idx_relative].stack, self._seats[idx_relative].equity)) def _get_blind_str(self, blinds_idxs, idx): if idx == blinds_idxs[0]: return "SB" elif idx == blinds_idxs[1]: return "BB" else: return " " def _get_blind_indexes(self, community_infos): idx = community_infos[community_table.BUTTON_POS] # If more than 2 players playing, SB is next from BTN, else BTN is SB if len([ s for s in self._seats if not s.sitting_out and not s.emptyplayer ]) > 2: idx = (idx + 1) % len(self._seats) sb_idx = -1 while True: while self._seats[idx].sitting_out or self._seats[idx].emptyplayer: idx = (idx + 1) % len(self._seats) if sb_idx == -1: sb_idx = idx else: return (sb_idx, idx) idx = (idx + 1) % len(self._seats) def _resolve_street(self, players): self._current_player = self._first_to_act(players) self._resolve_sidepots(players + self._folded_players) if self._street < Street.SHOWDOWN and len(players) > 1: self._reset_street_state() self._deal_next_street() def _deal_next_street(self): if self._street == Street.NOT_STARTED: self._deal() elif self._street == Street.PREFLOP: self._flop() elif self._street == Street.FLOP: self._turn() elif self._street == Street.TURN: self._river() self._street += 1 def _increment_blinds(self): self._blind_index = min(self._blind_index + 1, len(TexasHoldemEnv.BLIND_INCREMENTS) - 1) [self._smallblind, self._bigblind] = TexasHoldemEnv.BLIND_INCREMENTS[self._blind_index] def _post_smallblind(self, player): if self._debug: print('player ', player.player_id, 'small blind', self._smallblind) self._player_action(player, min(player.stack, self._smallblind)) player.post_blind(self._smallblind) def _post_bigblind(self, player): if self._debug: print('player ', player.player_id, 'big blind', self._bigblind) self._player_action(player, min(player.stack, self._bigblind)) player.post_blind(self._bigblind) self._lastraise = self._bigblind if self._debug: print('total pot: {}'.format(self._totalpot)) def _player_action(self, player, total_bet): self._current_bet = max(total_bet, self._current_bet) extra_from_player_bet = total_bet - player.currentbet relative_bet = total_bet - self._last_player.currentbet player.declare_action(total_bet) self._totalpot += extra_from_player_bet self._tocall = max(self._tocall, total_bet) if self._tocall > 0: self._tocall = max(self._tocall, self._bigblind) self._lastraise = max(self._lastraise, relative_bet) def _reset_street_state(self): for player in self._player_dict.values(): player.currentbet = 0 if not player.isallin: player.playedthisround = False self._tocall = 0 self._lastraise = 0 self._current_bet = 0 if self._debug: print('totalpot', self._totalpot) @property def _playing_players(self): return [p for p in self._seats if p.playing_hand] def _pass_move_to_next_player(self): self._last_player = self._current_player self._current_player = self._next(self._playing_players, self._current_player) def _first_to_act(self, players): players = sorted(set(players + [self._seats[self._button]]), key=lambda x: x.get_seat()) if self._street == Street.NOT_STARTED and len(players) == 2: return self._seats[self._button] else: return self._next(players, self._seats[self._button]) def _next(self, players, current_player): players = [p for p in players if not p.isallin or p is current_player] idx = players.index(current_player) return players[(idx + 1) % len(players)] def _deal(self): for player in self._seats: if player.playing_hand: player.hand = self._deck.draw(2) def _flop(self): self.community = self._deck.draw(3) def _turn(self): self.community.append(self._deck.draw(1)) def _river(self): self.community.append(self._deck.draw(1)) def _resolve_sidepots(self, players_playing): players = [p for p in players_playing if p.currentbet] if self._debug: print('current bets: ', [p.currentbet for p in players]) print('playing hand: ', [p.playing_hand for p in players]) if not players: return try: smallest_bet = min( [p.currentbet for p in players if p.playing_hand]) except ValueError: for p in players: self._side_pots[self._current_sidepot] += p.currentbet p.currentbet = 0 return smallest_players_allin = [ p for p, bet in zip(players, [p.currentbet for p in players]) if bet == smallest_bet and p.isallin ] for p in players: self._side_pots[self._current_sidepot] += min( smallest_bet, p.currentbet) p.currentbet -= min(smallest_bet, p.currentbet) p.lastsidepot = self._current_sidepot if smallest_players_allin: self._current_sidepot += 1 self._resolve_sidepots(players) assert sum(self._side_pots) == self._totalpot if self._debug: print('sidepots: ', self._side_pots) def _resolve_hand(self, players): if len(players) == 1: # Everyone else folded if self._debug: print('Refunding, sum(sidepots) %s, totalpot %s' % (str(sum(self._side_pots)), str(self._totalpot))) players[0].refund(self._totalpot) else: # trim side_pots to only include the non-empty side pots temp_pots = [pot for pot in self._side_pots if pot > 0] if self.equity_reward and len(self.community) < 5: for pot_idx, _ in enumerate(temp_pots): # find players involved in given side_pot, compute the equities and pot split pot_contributors = [ p for p in players if p.lastsidepot >= pot_idx ] if len(pot_contributors) > 1: equities = self.equity.get_equities( [p.hand for p in pot_contributors], self.community, self._deck.cards, self._dead_cards) amount_distributed = 0 for p_idx, player in enumerate(pot_contributors): split_amount = int( round(self._side_pots[pot_idx] * equities[p_idx])) if self._debug: print('Player', player.player_id, 'wins side pot (', split_amount, ')') player.refund(split_amount) amount_distributed += split_amount else: amount_distributed = int(self._side_pots[pot_idx]) pot_contributors[0].refund(amount_distributed) self._side_pots[pot_idx] -= amount_distributed # any remaining chips after splitting go to the winner in the earliest position if self._side_pots[pot_idx]: earliest = self._first_to_act( [player for player in pot_contributors]) earliest.refund(self._side_pots[pot_idx]) else: # compute hand ranks for player in players: player.handrank = self._evaluator.evaluate( player.hand, self.community) # compute who wins each side pot and pay winners for pot_idx, _ in enumerate(temp_pots): # find players involved in given side_pot, compute the winner(s) pot_contributors = [ p for p in players if p.lastsidepot >= pot_idx ] winning_rank = min([p.handrank for p in pot_contributors]) winning_players = [ p for p in pot_contributors if p.handrank == winning_rank ] for player in winning_players: split_amount = int(self._side_pots[pot_idx] / len(winning_players)) if self._debug: print( 'Player', player.player_id, 'wins side pot (', int(self._side_pots[pot_idx] / len(winning_players)), ')') player.refund(split_amount) self._side_pots[pot_idx] -= split_amount # any remaining chips after splitting go to the winner in the earliest position if self._side_pots[pot_idx]: earliest = self._first_to_act( [player for player in winning_players]) earliest.refund(self._side_pots[pot_idx]) def _reset_game(self): self._street = Street.NOT_STARTED playing = 0 for player in self._seats: if not player.emptyplayer and not player.sitting_out: if self._autoreset_stacks: player.reset_stack() player.reset_hand() playing += 1 self.community = [] self._dead_cards = [] self._current_sidepot = 0 self._totalpot = 0 self._last_action = None self._side_pots = [0] * len(self._seats) self._deck.shuffle() if playing: self._button = (self._button + 1) % len(self._seats) while not self._seats[self._button].playing_hand: self._button = (self._button + 1) % len(self._seats) @property def _minraise(self): minraise = min(self._current_bet + self._lastraise, self._current_player.max_bet) return max(minraise, self._current_bet + 1) def _pad(self, l, n, v): if (not l) or (l is None): l = [] return l + [v] * (n - len(l)) def _get_current_player_state(self, player): return (self._compute_my_equity(player), player.stack, self._totalpot) def _get_current_state(self): player_states = [] n_players = len(self._seats) for i in range(self._current_player.player_id, self._current_player.player_id + n_players): player = self._seats[i % n_players] player_features = [ int(player.currentbet), int(player.stack), int(player.equity), int(player.playing_hand), int(player.playedthisround), int(player.isallin), int(player.lastsidepot), int(player.player_id), ] player_states.append( (player_features, self._pad(player.hand, 2, -1))) community_states = ([ int(self._button), int(self._smallblind), int(self._bigblind), int(self._totalpot), int(self._lastraise), int(self._minraise), int(self._tocall), int(self._current_player.player_id), ], self._pad(self.community, 5, -1)) return (tuple(player_states), community_states) def _get_current_reset_returns(self): observation, _, _, _ = self._get_current_step_returns(terminal=False) return observation, self._get_current_state() def _get_current_step_returns(self, terminal): agent = self._seats[self.agent_id] observation = self._get_current_player_state(agent) reward = ((agent.stack - agent.hand_starting_stack) / self._bigblind if terminal else 0) info = {} info['money_won'] = agent.stack - \ (agent.hand_starting_stack + agent.blind) if terminal else 0 return observation, reward, terminal, info
class Encoder(): pot_normalized_community = [ community_table.SMALL_BLIND, community_table.POT, community_table.LAST_RAISE, community_table.MINRAISE, community_table.TO_CALL] pot_normalized_player = [ player_table.STACK, player_table.LAST_SIDEPOT] def __init__(self, n_seats, ranking_encoding='norm', concat=True, drop_cards=False, split_cards=False): self.n_seats = n_seats self.ranking_encoding = ranking_encoding self._deck = np.array(Deck.GetFullDeck(), dtype=np.int64) self._deck_alt = np.concatenate((np.array([-1], dtype=np.int64), self._deck)) self._evaluator = Evaluator() self.concat = concat @property def n_card_dim(self): return (265 + 104) + (7463 if self.ranking_encoding == 'one-hot' else 1 if self.ranking_encoding == 'norm' else 0) @property def n_other_dim(self): return 6 + self.n_seats + 6 * self.n_seats @property def n_dim(self): return self.n_card_dim + self.n_other_dim def encode(self, player_states, community_infos, community_cards, our_seat): player_infos, player_hands = zip(*player_states) player_infos = np.array(player_infos, dtype=np.float32) community_infos = np.array(community_infos, dtype=np.float32) n_players = player_infos.shape[0] full_stack = community_infos[community_table.BIG_BLIND] * 100 community_infos[Encoder.pot_normalized_community] = community_infos[Encoder.pot_normalized_community] / full_stack community_infos_t = np.zeros(6 + n_players) community_infos_t[:6] = community_infos[community_table.SMALL_BLIND:community_table.TO_ACT_POS] community_infos_t[int(6+community_infos[community_table.BUTTON_POS])] = 1 cards = player_hands[0] community_cards_t = np.zeros(5*53) player_cards_t = np.zeros(52*2) community_cards_t[[int(i * 53 + np.where(self._deck_alt == community_cards[i])[0]) for i in range(5)]] = 1 player_cards_t[[int(i * 52 + np.where(self._deck == int(cards[i]))[0]) for i in range(2)]] = 1 player_infos[:,Encoder.pot_normalized_player] /= full_stack players_info_t = np.zeros((n_players, 6)) players_info_t[:] = player_infos[:,:player_table.ID] if self.ranking_encoding is None: hand = [] else: community_cards = [card for card in community_cards if card > 0] if len(community_cards) > 0: hand_rank = self._evaluator.evaluate(cards, community_cards) else: hand_rank = -1 if self.ranking_encoding == 'norm': if hand_rank > 0: hand = [1 / hand_rank] else: hand = [-1] elif self.ranking_encoding == 'one-hot': hand = [0] * 7643 hand[hand_rank] = 1 else: raise Exception('Unknown ranking encoding!') hand_t = np.array(hand) if self.concat: return np.concatenate( (community_infos_t.flatten(), players_info_t.flatten(), community_cards_t.flatten(), player_cards_t.flatten(), hand_t.flatten()), axis=0) return community_infos_t, players_info_t, community_cards_t, player_cards_t, hand_t
def uruchom(self): sprawdz = Evaluator() rozp = 0 # indeks gracza rozpoczynajacego licytacje # główna pętla gry while True: print( "\n*******************************Kolejna runda*********************************" ) talia = Deck() # talia kart stol = poker.Stol(len(self.gracze)) zwyciezca = -1 # indeks zwycięzcy pas = -1 for g in self.gracze: g.reka = talia.draw(2) stol.karty = talia.draw(5) global_info = stol.doloz_stawke( self.gracze[rozp], self.ustawienia.ciemne) # początkowa stawka na 1. turę self.gracze[rozp].stan = poker.stan_gracza["postawil"] najwyzsza_stawka = self.ustawienia.ciemne print(global_info) # pętla 3 tur for tura in range(1, 4): print("\n\n**************Trwa tura %s****************" % str(tura)) print("\nObecnie w puli: ", stol.pula) poker.zresetuj_akcje( self.gracze ) # do czyszczenia akcji z poprz. tury poza pasów i ew. allinów if tura == 1: aktywny = poker.nastepny( rozp ) # aktywny to indeks gracza aktywnego (aktualnie decydującego) w licytacji else: # a nastepny() to przesunięcie iteratora na nast. gracza aktywny = rozp koniec = False # pętla pozwalająca wykonywać akcje graczy (jeden obrót to decyzja jednego gracza) while True: global_info = '' if self.gracze[aktywny].stan != poker.stan_gracza[ "va bank"]: # wyjątek pomijający graczy vabank # wypisywanie info global_info += "\n**************Teraz gracz %s***************" % ( aktywny + 1) global_info += stol.wypisz_karty_na_stole() global_info += '\n' + self.gracze[ aktywny].wypisz_karty_gracza() global_info += "\nNajwyższa stawka na stole: " + str( najwyzsza_stawka) global_info += "\nTwoja stawka: " + str( stol.stawki_graczy[aktywny]) global_info += "\nKapital: " + str( self.gracze[aktywny].kapital) print(global_info) # wczytanie akcji gracza, więcej w poker.py odp = poker.wczytaj_poprawna_odp( najwyzsza_stawka - stol.stawki_graczy[aktywny], self.gracze[aktywny].kapital, self.gracze[aktywny].podbicia) # wykonanie wybranej akcji global_info = poker.podejmij_akcje( self.gracze[aktywny], odp, stol) print(global_info) if najwyzsza_stawka < stol.stawki_graczy[aktywny]: najwyzsza_stawka = stol.stawki_graczy[ aktywny] # do info o najwyższej postawionej stawce # obsługa spasowania if self.gracze[aktywny].stan == poker.stan_gracza[ "spasowal"]: pas = poker.czy_wszyscy_spasowali(self.gracze) if pas != -1: koniec = True # obsługa opcji wylączenia gry if self.gracze[aktywny].stan == poker.stan_gracza[ "skonczyl"]: zwyciezca = poker.nastepny(aktywny) koniec = True if koniec: break # tu jest sprawdzenie czy wszyscy gracze już coś zrobili gdy stawki są sobie równe if poker.czy_koniec_tury(self.gracze, stol, najwyzsza_stawka): break aktywny = poker.nastepny(aktywny) # **********************************koniec pętli while()*************************************** # sprzątanie po skończonej turze stol.zbierz_do_puli() # wszystkie stawki idą do wspólnej puli if zwyciezca >= 0 or pas >= 0: break # gdy któryś z dwóch graczy spasował if poker.liczba_graczy_w_licytacji(self.gracze) <= 1: stol.odkryte = 5 break stol.odkryte += 1 najwyzsza_stawka = 0 # **********************koniec pętli z turami*************************************** if pas >= 0 and not poker.czy_ktos_allin( self.gracze): # gdy wszyscy spasowali print("\n***Zwyciezca rundy zostaje gracz %s!***" % (pas + 1)) self.gracze[pas].kapital += stol.pula stol.pula = 0 elif zwyciezca == -1: # tu nastąpi sprawdzanie kart print("\n****************Sprawdzenie kart*****************") stol.wypisz_karty_na_stole() wyniki = list() print() for g in self.gracze: g.wypisz_karty_gracza() wyniki.append(sprawdz.evaluate(stol.karty, g.reka)) print( "Wynik gracza %d: %s (%d)" % (g.id + 1, sprawdz.class_to_string( sprawdz.get_rank_class(wyniki[-1])), wyniki[-1])) global_info = poker.rozdaj_pule(self.gracze, stol, wyniki) print(global_info) # całkowity stan kapitału graczy print("\nStan kapitalu graczy: ") for g in self.gracze: print("Gracz %d: %d" % (g.id + 1, g.kapital)) if zwyciezca != -1: print("\n***Zwyciezca gry zostaje gracz %d, gratulacje!!!***" % (zwyciezca + 1)) print("\n") input("Nacisnij ENTER aby kontynuowac.") break else: # sprawdzenie czy komuś się pieniądze skończyły zwyciezca = -1 if self.gracze[0].kapital == 0: zwyciezca = 2 elif self.gracze[1].kapital == 0: zwyciezca = 1 if zwyciezca != -1: print( "\n***Zwyciezca gry zostaje gracz %d, gratulacje!!!***" % (zwyciezca + 1)) print("\n") input("Nacisnij ENTER aby kontynuowac.") break input("\nNacisnij ENTER aby kontynuowac.") rozp = poker.nastepny(rozp, len(self.gracze)) poker.zresetuj_akcje(self.gracze, do_poczatku=True) return
for i in range(n): boards.append(deck.draw(m)) hands.append(deck.draw(2)) deck.shuffle() return boards, hands n = 10000 cumtime = 0.0 evaluator = Evaluator() boards, hands = setup(n, 5) for i in range(len(boards)): start = time.time() evaluator.evaluate(boards[i], hands[i]) cumtime += (time.time() - start) avg = float(cumtime / n) print("7 card evaluation:") print("[*] Treys: Average time per evaluation: %f" % avg) print("[*] Treys: Evaluations per second = %f" % (1.0 / avg)) ### cumtime = 0.0 boards, hands = setup(n, 4) for i in range(len(boards)): start = time.time() evaluator.evaluate(boards[i], hands[i]) cumtime += (time.time() - start)