def __init__(self): self.deck = treys.Deck() self.evaluator = treys.Evaluator() self.reward_range = ( -1, 1) # we will process the reward to fit in [-1,1] from [-10,10] self.metadata = {'render_modes': ['ansi']} self.observation_space = OFCSObservationSpace(356) self.action_space = gym.spaces.Discrete(2) self.done = False self.obs = self.reset()
def __init__(self): """ Initialize a new hand. """ self.board_cards = [] self.hole_cards_as_treys = [] self.i_am_big_blind = False self.pot_size = 0 self.current_stage = '' self.evaluator = treys.Evaluator()
def build_network_input(self, player_idx, round, current_bets, min_raise, prev_round_investment, folded, last_raiser, hole_cards, community_cards): # First convert the treys card IDs into indices hole_cards_converted = 13 * np.log2(np.right_shift(hole_cards, 12) & 0xF) + (np.right_shift(hole_cards, 8) & 0xF) community_cards_converted = 13 * np.log2(np.right_shift(community_cards, 12) & 0xF) + ( np.right_shift(community_cards, 8) & 0xF) # Then convert those indices into 1h hole_cards_1h = (np.arange(52) == hole_cards_converted[..., None] - 1).astype(int) known_community_cards_1h = (np.arange(53) == community_cards_converted[..., None] - 1).astype(int) # Fill missing community cards with zero missing_community_cards = np.zeros((self.BATCH_SIZE, 5 - community_cards.shape[1], 53)) # Have a 53rd column in the 1h to indicate missing cards, and fill that with ones where relevant missing_community_cards[:, :, -1] = 1 community_cards_1h = np.concatenate((known_community_cards_1h, missing_community_cards), axis=1) player_data = np.zeros((self.BATCH_SIZE, 5, self.N_PLAYERS)) # Who folded already player_data[:, 0, :] = folded # Who put how much total into the pot player_data[:, 1, :] = (prev_round_investment + current_bets) / self.INITAL_CAPITAL # Who put how much this round player_data[:, 2, :] = (current_bets) / self.INITAL_CAPITAL # Who was the last to raise player_data[:, 3, :] = np.eye(self.N_PLAYERS)[last_raiser] # Reorder the first four to correspond to player_idx player_data = np.concatenate((player_data[:, :, player_idx:], player_data[:, :, :player_idx]), axis=2) # Which player are we player_data[:, 4, player_idx] = 1 rank_bin = np.zeros(self.BATCH_SIZE) evaluator = treys.Evaluator() for i in range(self.BATCH_SIZE): if round != constants.PRE_FLOP: rank = evaluator.evaluate(community_cards[i].tolist(), hole_cards[i].tolist()) percentile = 1 - self.ranktable[round - constants.FLOP, rank] rank_bin[i] = percentile else: sorted_hole = sorted(hole_cards[i].tolist()) percentile = self.preflop_table.index(sorted_hole) / len(self.preflop_table) rank_bin[i] = percentile tail_data = np.zeros((self.BATCH_SIZE, 5)) tail_data[:, round] = 1 assert(round < 4) tail_data[:, 4] = rank_bin network_input = np.concatenate((hole_cards_1h.reshape(self.BATCH_SIZE, -1), community_cards_1h.reshape(self.BATCH_SIZE, -1), player_data.reshape(self.BATCH_SIZE, -1), tail_data.reshape(self.BATCH_SIZE, -1)), axis=1) assert (network_input.shape[1] == self.obs_dim) return network_input
def __init__(self, num_players, starting_big_blind, hands_per_bb): self.players = [] self.order = [i for i in range(num_players)] self.board = [] self.active = [True for _ in range(num_players)] self.deck = utils.make_deck() self.evaluator = treys.Evaluator() self.big_blind = starting_big_blind self.pot = 0 self.current_bet = 0 self.hands_per_bb = hands_per_bb self.round_counter = 0 self.aggressor = None
def evaluation(my_cards, known_cards): hand = [] for new_card in my_cards: hand.append(treys.Card.new(new_card)) board = [] for new_card in known_cards: board.append(treys.Card.new(new_card)) evaluator = treys.Evaluator() if len(hand) != 0: # print(board, hand) return evaluator.evaluate(board, hand) else: return 10000
def __init__(self): self.action_space = spaces.Discrete(26) # 5 choose 3, plus 5 choose 2, plus 5, plus 1 self.observation_space = spaces.Discrete(2598960) # number of five card hand combos self.deck = treys.Deck() self.hand = self.deal_hand() # self.done = False self.evaluator = treys.Evaluator() # SET THE REWARDS self.RANK_CLASS_STRING_TO_REWARD = { 'Straight Flush': 100, 'Four of a Kind': 25, 'Full House': 9, 'Flush': 6, 'Straight': 4, 'Three of a Kind': 3, 'Two Pair': 2, 'Pair': 1, 'High Card': -1} self.RANK_CLASS_TO_STRING = { 1: "Straight Flush", 2: "Four of a Kind", 3: "Full House", 4: "Flush", 5: "Straight", 6: "Three of a Kind", 7: "Two Pair", 8: "Pair", 9: "High Card" } # get the hand rank for jacks # so we can later check if the player hand is better than jacks self._jacks_hand_score = self.evaluator.evaluate( [ treys.Card.new('Jh'), treys.Card.new('Js'), treys.Card.new('2s') ], [ treys.Card.new('3h'), treys.Card.new('4d') ] )
def getExpectedHandStrengthOdds( p1_hand, # The hole cards numIterations, # The number of iterations to run the simulation for numPlayers, # The number of players in the hand numBoardCardsToCome, # The number of board cards still to come existingBoardCards # The existing board cards ): debug = False # pretend you are playing with n random players # for numIterations hands with numBoardCardsToCome and existingBoardCards board cards evaluator = treys.Evaluator() winner = [0] * numPlayers for j in range(0, numIterations): # create the partial deck that excludes your cards and the board cards p1_HandAndExistingBoardCards = p1_hand + existingBoardCards pDeck = partialDeck.PartialDeck(p1_HandAndExistingBoardCards) # create the hands. # By convention your hand is in index=0 hands = [] hands.append(p1_hand) for i in range(1, numPlayers): hands.append(pDeck.draw(2)) # Now, draw the board cards boardCardsToCome = pDeck.draw(numBoardCardsToCome) if (debug): print("Existing board cards=") treys.Card.print_pretty_cards(existingBoardCards) print("New board cards=") treys.Card.print_pretty_cards(boardCardsToCome) fullBoardCards = existingBoardCards + boardCardsToCome numTies = gameUtilities.findWinners(debug, evaluator, numPlayers, hands, fullBoardCards, winner) for i in range(0, numPlayers): winner[i] /= numIterations return winner
def getpreFlopOdds( p1_hand, # The hole cards numIterations, # The number of iterations to run the simulation for numPlayers, # The number of players in the hand numBoardCards # The number of board cards ): debug = False # pretend you are playing with n random players # for numIterations hands with numBoardCards evaluator = treys.Evaluator() winner = [0] * numPlayers for j in range(0, numIterations): # create the partial deck that excludes your cards pDeck = partialDeck.PartialDeck(p1_hand) # create the hands. # By convention your hand is in index=0 hands = [] hands.append(p1_hand) for i in range(1, numPlayers): hands.append(pDeck.draw(2)) # Now, draw the board cards boardCards = pDeck.draw(numBoardCards) if (debug): print("Board cards=") treys.Card.print_pretty_cards(boardCards) numTies = gameUtilities.findWinners(debug, evaluator, numPlayers, hands, boardCards, winner) for i in range(0, numPlayers): winner[i] /= numIterations return winner
def evaluate_table(n_cards): stats = np.zeros(N_SCORES) deck = treys.Deck() evaluator = treys.Evaluator() for k in range(N_LOOPS): for l in range(PRINT_FREQUENCY): deck.shuffle() hand = deck.draw(n_cards) score = evaluator.evaluate([], hand) stats[score] += 1 summation = np.cumsum(stats) summation /= summation[-1] print("-" * 80) print("Table {0} of {1} cards known:".format(k, n_cards)) for i in range(1, N_BINS): idx = int(i * N_SCORES / N_BINS) print("\tHand value {0} loses to {1}% of possible hands".format( idx, int(100 * summation[idx]))) sns.lineplot(x=np.arange(N_SCORES), y=stats / np.sum(stats)) plt.savefig("dist_{}.png".format(n_cards)) plt.clf() np.save("dist_{}.npy".format(n_cards), summation)
def compute_strat_vector(agent, verbose=False): # Mutable N_CARD_PERCENTILE_BINS = 10 N_OWN_POT_SIZE_BINS = 6 N_MIN_SAMPLES_PER_BIN = 25 BATCH_SIZE = 10000 INITIAL_CAPITAL = 200 MIN_RAISE = 4 ACTION_SPACE_BINS = INITIAL_CAPITAL // 4 + 3 # Fixed N_ROUNDS = 4 N_SEATING_POSITIONS = 6 N_MAX_ACTIVE_PLAYERS = 5 N_PLAYERS_TOTAL = 6 path_root = "strategy" n_games = np.zeros(( N_SEATING_POSITIONS, N_ROUNDS, N_MAX_ACTIVE_PLAYERS - 2, N_OWN_POT_SIZE_BINS, N_CARD_PERCENTILE_BINS )) strategy = np.zeros(( N_SEATING_POSITIONS, N_ROUNDS, N_MAX_ACTIVE_PLAYERS - 2, N_OWN_POT_SIZE_BINS, N_CARD_PERCENTILE_BINS, ACTION_SPACE_BINS )) agent.initialize(BATCH_SIZE, INITIAL_CAPITAL, N_PLAYERS_TOTAL) ranktable = np.stack(( np.load(os.path.join(path_root, "5card_rank_percentile.npy")), np.load(os.path.join(path_root, "6card_rank_percentile.npy")), np.load(os.path.join(path_root, "7card_rank_percentile.npy")) )) preflop_table = np.load(os.path.join(path_root, "preflop_ranks.npy")).tolist() evaluator = treys.Evaluator() lowest_bin_count = np.amin(n_games) while lowest_bin_count <= N_MIN_SAMPLES_PER_BIN: if verbose: print("Starting filling round") print("\tLowest number currently:", lowest_bin_count) print("\tNumber of bins at lowest:", np.count_nonzero(n_games == lowest_bin_count)) print("\tAverage bin count:", np.mean(n_games)) for round in range(0, N_ROUNDS): n_community_cards = [0, 3, 4, 5][round] for seating_position in range(N_SEATING_POSITIONS): for n_active_players in range(2, N_MAX_ACTIVE_PLAYERS): for own_pot_idx in range(N_OWN_POT_SIZE_BINS): own_pot = int(INITIAL_CAPITAL * own_pot_idx / N_OWN_POT_SIZE_BINS) community_cards, all_hole_cards = generate_cards(N_PLAYERS_TOTAL, BATCH_SIZE) hole_cards = all_hole_cards[:, seating_position, :] rank_bin = np.zeros(BATCH_SIZE) folded_players = np.zeros((BATCH_SIZE, N_PLAYERS_TOTAL - n_active_players), dtype=int) active_players = np.zeros((BATCH_SIZE, n_active_players), dtype=int) last_raiser = np.zeros(BATCH_SIZE, dtype=int) for i in range(BATCH_SIZE): folded_row = random.sample([x for x in range(N_PLAYERS_TOTAL) if x != seating_position], N_PLAYERS_TOTAL - n_active_players) active_row = [x for x in range(N_PLAYERS_TOTAL) if x not in folded_row] folded_players[i, :] = folded_row active_players[i, :] = active_row last_raiser[i] = random.choice(active_row) if round != constants.PRE_FLOP: rank = evaluator.evaluate(community_cards[i, :n_community_cards].tolist(), hole_cards[i].tolist()) percentile = 1 - ranktable[round - constants.FLOP, rank] rank_bin[i] = int(percentile * N_CARD_PERCENTILE_BINS) else: sorted_hole = sorted(hole_cards[i].tolist()) percentile = preflop_table.index(sorted_hole) / len(preflop_table) rank_bin[i] = int(percentile * N_CARD_PERCENTILE_BINS) current_bets = np.zeros((BATCH_SIZE, N_PLAYERS_TOTAL)) prev_round_investment = np.zeros((BATCH_SIZE, N_PLAYERS_TOTAL)) if round != constants.PRE_FLOP: portions = np.random.random(size=(BATCH_SIZE, round + 1)) # If we are the last raiser, then we raised in the last round, which means there are no bets standing this round portions[last_raiser == seating_position, -1] = 0 portions /= np.sum(portions, axis=1)[:, None] self_prev_investment = (np.sum(portions[:, :-1], axis=1) * own_pot)[:, None] self_current_investment = (portions[:, -1] * own_pot)[:, None] prev_round_investment[ np.arange(prev_round_investment.shape[0])[:, None], folded_players ] += np.random.random(size=folded_players.shape) * self_prev_investment # https://stackoverflow.com/questions/20103779/index-2d-numpy-array-by-a-2d-array-of-indices-without-loops prev_round_investment[np.arange(prev_round_investment.shape[0])[:, None], active_players] += self_prev_investment current_bets[np.arange(current_bets.shape[0])[:, None], active_players] += self_current_investment # small blind + big blind prev_round_investment[:, 0] = np.maximum(prev_round_investment[:, 0], 2) prev_round_investment[:, 1] = np.maximum(prev_round_investment[:, 1], 4) else: # https://stackoverflow.com/questions/20103779/index-2d-numpy-array-by-a-2d-array-of-indices-without-loops current_bets[np.arange(current_bets.shape[0])[:, None], folded_players] += np.random.random(size=folded_players.shape) * own_pot current_bets[np.arange(current_bets.shape[0])[:, None], active_players] += own_pot _, last_raiser_col = np.where(active_players == last_raiser[:, None]) _, seating_position_col = np.where(active_players == seating_position) for i in range(BATCH_SIZE): if last_raiser_col[i] < seating_position_col[i]: for active_player in active_players[i, last_raiser_col[i]:seating_position_col[i]]: current_bets[i, active_player] += MIN_RAISE elif last_raiser_col[i] > seating_position_col[i]: for active_player in active_players[i, last_raiser_col[i]:]: current_bets[i, active_player] += MIN_RAISE for active_player in active_players[i, :seating_position_col[i]]: current_bets[i, active_player] += MIN_RAISE folded_1h = np.zeros((BATCH_SIZE, N_PLAYERS_TOTAL)) folded_1h[folded_players] = 1 no_bet_rounds = np.where(~current_bets.any(axis=1))[0] last_raiser[no_bet_rounds] = np.argmax(prev_round_investment[no_bet_rounds, :], axis=1) actions, amounts = agent.act( player_idx=seating_position, round=round, active_games=np.ones(BATCH_SIZE), current_bets=current_bets, min_raise=np.full(BATCH_SIZE, MIN_RAISE), prev_round_investment=prev_round_investment, folded=folded_1h, last_raiser=last_raiser, hole_cards=hole_cards, community_cards=community_cards[:, :n_community_cards] ) # print("Seating position", seating_position) # print("Round", round) # print("Active players", active_players[5]) # print("Current bets", current_bets[5, :]) # print("Prev round investment", prev_round_investment[5, :]) # print("Last raiser", last_raiser[5]) # print("Card rank", rank_bin[5]) # print("Action", actions[5]) # print("Amount", amounts[5]) # input() action_bins = 3 + np.floor(amounts * (ACTION_SPACE_BINS - 3) / (INITIAL_CAPITAL+1)).astype(int) action_bins[actions == constants.FOLD] = constants.FOLD action_bins[actions == constants.CALL] = constants.CALL action_bins[np.logical_and(actions == constants.CALL, current_bets.sum(axis=1) == 0)] = constants.CHECK for card_rank in range(N_CARD_PERCENTILE_BINS): n_games[seating_position, round, n_active_players - 2, own_pot_idx, card_rank] += np.count_nonzero( rank_bin == card_rank ) for action in range(ACTION_SPACE_BINS): strategy[ seating_position, round, n_active_players - 2, own_pot_idx, card_rank, action ] += np.count_nonzero(action_bins[rank_bin == card_rank] == action) lowest_bin_count = np.amin(n_games) if verbose: print("Strategy vector finished, min {0}, max {1} games.".format(np.amin(n_games), np.amax(n_games))) return strategy, n_games
async def on_message(message): if message.author == client.user: return if message.content.startswith('!poker'): user = bot.user user2 = bot.user author: discord.Member = message.author channel = message.channel deck = treys.Deck() evaluator = treys.Evaluator() board = deck.draw(5) p1_hand = deck.draw(0) await channel.send(treys.Card.print_pretty_cards(board + p1_hand)) change = await channel.send( str(author.mention) + ', quantas cartas você quer trocar? (Espere todos os emotes aparecerem, você tem 20 segundos)' ) await change.add_reaction('0️⃣') await change.add_reaction('1️⃣') await change.add_reaction('2️⃣') await change.add_reaction('3️⃣') await change.add_reaction('4️⃣') await change.add_reaction('5️⃣') def check(reaction, user): if str(reaction.emoji) == '0️⃣': return user == message.author and str(reaction.emoji) == '0️⃣' if str(reaction.emoji) == '1️⃣': return user == message.author and str(reaction.emoji) == '1️⃣' elif str(reaction.emoji) == '2️⃣': return user == message.author and str(reaction.emoji) == '2️⃣' elif str(reaction.emoji) == '3️⃣': return user == message.author and str(reaction.emoji) == '3️⃣' elif str(reaction.emoji) == '4️⃣': return user == message.author and str(reaction.emoji) == '4️⃣' elif str(reaction.emoji) == '5️⃣': return user == message.author and str(reaction.emoji) == '5️⃣' def check2(reaction): if str(reaction.emoji) == '0️⃣': return -1 if str(reaction.emoji) == '1️⃣': return 0 elif str(reaction.emoji) == '2️⃣': return 1 elif str(reaction.emoji) == '3️⃣': return 2 elif str(reaction.emoji) == '4️⃣': return 3 elif str(reaction.emoji) == '5️⃣': return 4 try: while (user != message.author): reaction, user = await bot.wait_for('reaction_add', timeout=20.0, check=check) a = check2(reaction) + 1 x = np.zeros(a) if (a != 0): change = await channel.send( str(author.mention) + ', quais cartas você quer trocar? (Espere todos os emotes aparecerem, você tem 20 segundos)' ) await change.add_reaction('1️⃣') await change.add_reaction('2️⃣') await change.add_reaction('3️⃣') await change.add_reaction('4️⃣') await change.add_reaction('5️⃣') for i in range(a): while (user2 != message.author): reaction, user2 = await bot.wait_for('reaction_add', timeout=20.0, check=check) user2 = bot.user x[i] = check2(reaction) x.sort() x = x[::-1] for i in range(a): board[int(x[i])] = deck.draw(1) except asyncio.TimeoutError: await channel.send(str(author.mention) + ', demorou demais') else: await channel.send(treys.Card.print_pretty_cards(board + p1_hand)) p1_score = evaluator.evaluate(board, p1_hand) p1_class = evaluator.get_rank_class(p1_score) await channel.send( str(author.mention) + ', você tirou um ' + evaluator.class_to_string(p1_class)) await bot.process_commands(message)
def __init__(self): self.evaluator = treys.Evaluator()
'startingStack=', gameDefinition.startingStack[i]) # Start at handNumber = 0 players.insert(i, gameUtilities.Player(i, gameDefinition, 0)) print('players[',i,'].stackSize=', players[i].stackSize) # Once you've got that sorted out then move on to the end of hand # play and how you store the values of the current hand (cards, bet, stack) # in the historical list of values. print ('players[0].stackSize=', players[0].stackSize) # Create a list to hold the player winnings (needed for the ACPC because the player's # stacks are reset after every hand) playerCumulativeWinnings = [0] * gameDefinition.numPlayers # Create the hand evaluator that will tell you who won a hand handEvaluator = treys.Evaluator() # start communicating with the dealer if (socketComms): s = socket.socket(socket.AF_INET, socket.SOCK_STREAM) s.connect((HOST, PORT)) versionString = "VERSION:2.0.0\r\n" s.send(versionString.encode('utf-8')) previousHandNumber = -1 cont= True dataIn = '' handString = '' while (cont == True):