コード例 #1
0
ファイル: utils.py プロジェクト: yw10/mlp-holdem
def get_card_dict():
    deck = Deck()
    deck_list = deck.GetFullDeck()
    dict = {}
    for i, value in enumerate(deck_list):
        dict[value] = i
    return dict
コード例 #2
0
ファイル: noauth.py プロジェクト: ucffool/poker-backend-api
def index():
    evaluator = Evaluator()
    deck = Deck()
    card = Card.new('Qh')
    board = deck.draw(5)
    player_names = ("player 1", "player 2", "player 3", "player 4", "player 5",
                    "player 6", "player 7", "player 8")
    players = {}
    output = {}
    # this is procedural programming, not functional programming :(
    for p in player_names:
        hand = deck.draw(2)
        score = evaluator.evaluate(board, hand)
        text = evaluator.class_to_string(evaluator.get_rank_class(score))
        players[p] = score
        output[p] = {'score': score, 'text': text}
    # What about a tie?
    tie = (len(players.values()) == len(set(players.values())))
    winner = min(
        players,
        key=players.get)  # always 1 result :( Don't forget to fix the TEST!
    # does the tie involve the winning hand though?
    # TODO https://stackoverflow.com/questions/17821079/how-to-check-if-two-keys-in-dictionary-hold-the-same-value
    output["winners"] = winner
    output["tie"] = tie
    output["card"] = Card.int_to_str(card)
    j = json.dumps(output)
    return j
コード例 #3
0
ファイル: hulth.py プロジェクト: Street-Jack/streetjack
    def __init__(self, deck: Deck, evaluator: Evaluator):
        self._deck = deck

        self._hands = [deck.draw(2), deck.draw(2)]
        self._board = deck.draw(5)

        self._hand_bucket_indices = []

        for i in range(len(self._hands)):
            bucket_indices = dict()

            for key in BOARD_CARDS:
                if key == Stage.SHOWDOWN:
                    bucket_indices[key] = bucket_indices[Stage.RIVER]
                    continue

                stage_board = self.board(stage=key)
                bucket_indices[key] = evaluator.effective_rank(
                    self._hands[i], stage_board, MAX_BUCKETS)

            self._hand_bucket_indices.append(bucket_indices)

        self._hand_ranks = [
            evaluator.rank(self._hands[SMALL_BLIND], self._board),
            evaluator.rank(self._hands[BIG_BLIND], self._board),
        ]
コード例 #4
0
    def evaluate(self, hands, boards, threshold):
        win_rate = super(CardCounting, self).evaluate(hands, boards, False)
        expect_win_rate = 0
        count = 0

        deck = Deck()
        draw_cards = []

        for card in deck.draw(52):
            if card in hands or card in boards:
                continue

            boards.append(card)
            if len(boards) == 5:  # Turn
                new_win_rate = super(CardCounting,
                                     self).evaluate(hands, boards, False)
                if new_win_rate >= threshold and new_win_rate - win_rate > 0.1:
                    draw_cards.append(card)
                    expect_win_rate += new_win_rate
                    count += 1
                    # print('old:{}'.format(win_rate) + ' new:{}'.format(new_win_rate))
            elif len(boards) == 4:  # Flop
                (cards, rate) = self.evaluate(hands, boards, threshold)
                if rate != 0:
                    expect_win_rate += rate
                    count += 1

                for dc in cards:
                    if dc not in draw_cards:
                        draw_cards.append(dc)
            boards.remove(card)

        if count != 0:
            expect_win_rate = expect_win_rate / count
        return (draw_cards, expect_win_rate)
コード例 #5
0
 def __init__(self, ph1, ph2, n=1):
     # initiate playerHand obvjects
     self.ph1 = ph1
     self.ph2 = ph2
     # initiate deck
     self.deck = Deck()
     self.n = n
     # dictionary which maps player (1 or -1) so it's playerHand object
     self.PLAYERS_HAND_DICT = {1: self.ph1, -1: self.ph2}
コード例 #6
0
ファイル: lookaheadplay.py プロジェクト: hisarack/gym-hearts
 def watch(self, observation, info):
     if info['done'] is True:
         pass
     elif info['is_new_round'] is True:
         deck = Deck()
         self._available_cards = deck.draw(52)
     else:
         played_card = info['action']
         self._available_cards.remove(played_card)
コード例 #7
0
 def test_card(self):
     deck = Deck()
     for i in range(2):
         self.guest.card_player_set.create(card_str = Card.int_to_str(deck.draw(1))) 
         self.learner.card_player_set.create(card_str = Card.int_to_str(deck.draw(1))) 
     self.assertTrue([len(x.card_str)<3 for x in self.guest.card_player_set.all()])
     self.assertTrue([len(x.card_str)<3 for x in self.learner.card_player_set.all()])
 
     for i in range(5):
         self.game.card_community_set.create(card_str = Card.int_to_str(deck.draw(1))) 
     self.assertTrue([len(x.card_str)<3 for x in self.game.card_player_set.all()])
コード例 #8
0
    def __init__(self, n_player=4):

        self.deck = Deck()
        self.trumpCard = 0
        self.trumps = 0
        self.leadSuit = 0
        self.n = 52 // n_player - 1
        self.playerList = []
        self.playerOrder = []
        self.playedCards = []
        self.leadPlayer = None
コード例 #9
0
ファイル: run.py プロジェクト: befeltingu/DeepRL
    def __init__(self):

        self.actions = ['raise', 'check', 'call', 'fold']
        self.anticipatory = 0.1
        self.GameState = None  # tensor that tracks current state
        self.current_player = None
        self.deck = Deck()
        self.deck_lookup = None
        self.evaluator = Evaluator()
        self.S = 1000  # Starting stack
        self.SB = None
        self.BB = None
        self.players = ['_', self.SB, self.BB]
コード例 #10
0
 def move(self, observation):
     if self._current_node is None:
         new_mcts_info = {}
         new_mcts_info['my_player_id'] = self._my_player_id
         new_mcts_info['my_hand_cards'] = observation['hand_cards'].copy()
         deck = Deck()
         new_mcts_info['available_cards'] = deck.draw(52)
         for c in new_mcts_info['my_hand_cards']:
             new_mcts_info['available_cards'].remove(c)
         self._current_node = Node(
             MyHeartState(0, observation, new_mcts_info))
         if len(observation['playing_ids']) == 0:
             return Card.new('2c')
     best_next_node = self._mcts.UCTSEARCH(self._current_node, observation)
     return best_next_node.state.get_action_card()
コード例 #11
0
ファイル: encoders.py プロジェクト: bjarnege/learning-ai
 def __init__(self, n_seats, ranking_encoding='norm'):
     self.n_seats = n_seats
     self.ranking_encoding = ranking_encoding
     self.n_dim = 265 + 104 + 6 + n_seats + 6 * n_seats + (7463 if ranking_encoding == 'one-hot' else 1 if ranking_encoding == 'norm' else 0)
     self._deck = np.array(Deck.GetFullDeck(), dtype=np.int64)
     self._deck_alt = np.concatenate((np.array([-1], dtype=np.int64), self._deck))
     self._evaluator = Evaluator()
コード例 #12
0
ファイル: monte.py プロジェクト: ErrolWilliams/Poker-AI
def monteCarlo(board, hand, numPlayers, monteN):
    deck = Deck()
    evaluator = Evaluator()
    playerHands = [None]*numPlayers
    winAmount = 0 
    board_backup = board.copy()
    for time in range(int(monteN)):
        board = board_backup.copy()
        monteDeck = [card for card in deck.cards if card not in board and card not in hand]
        for x in range(numPlayers):
            playerHands[x] = []
            for y in range(2):
                randomIndex = randrange(0, len(monteDeck))
                playerHands[x].append(monteDeck[randomIndex])
                del monteDeck[randomIndex]
        while len(board) < 5:
            randomIndex = randrange(0, len(monteDeck))
            board.append(monteDeck[randomIndex])
            del monteDeck[randomIndex]
        win = True
        
        handRank = evaluator.evaluate(board, hand)
        for x in range(numPlayers):
            otherRank = evaluator.evaluate(board, playerHands[x])
            if otherRank < handRank:
                win = False
                break
        if win:
            winAmount += 1
    return winAmount/monteN
コード例 #13
0
ファイル: run.py プロジェクト: befeltingu/DeepRL
    def init_game(self, SB, BB, GameState):
        """

        :param SB:
        :param BB:
        :param GameState:
        :return:
        """

        self.GameState = GameState

        self.SB = SB

        self.BB = BB

        self.players = ['_', self.SB, self.BB]

        # creating mapping of card bit int to int to make
        # it easier for tracking state
        deck = Deck.GetFullDeck()
        deck_dict = {}
        for i in range(len(deck)):
            deck_dict[deck[i]] = i

        self.deck_lookup = deck_dict
コード例 #14
0
 def __init__(self, n_seats, ranking_encoding='norm', concat=True, drop_cards=False, split_cards=False):
     self.n_seats = n_seats
     self.ranking_encoding = ranking_encoding
     self._deck = np.array(Deck.GetFullDeck(), dtype=np.int64)
     self._deck_alt = np.concatenate((np.array([-1], dtype=np.int64), self._deck))
     self._evaluator = Evaluator()
     self.concat = concat
コード例 #15
0
async def set_up_game(web_client, channel_id, plo=False):
    players = player_list[channel_id]
    if channel_id not in tab_list:
        deck = Deck()
        deck.shuffle()
        tab = Table()
        tab_list[channel_id] = {}
        tab_list[channel_id]["table"] = tab
        tab_list[channel_id]["deck"] = deck
    tab = tab_list[channel_id]["table"]
    deck = tab_list[channel_id]["deck"]
    deck.shuffle()
    tab.cards.extend(deck.draw(3))
    if plo:
        print("plos")
        tab.plo = True
    for name in players:
        if plo:
            name.cards.extend(deck.draw(4))
        else:
            print("nlhe")
            name.cards.extend(deck.draw(2))
        print("got to cards bit")
        pic = Card.print_pretty_cards(name.cards)
        await sendslack(pic, web_client, name.name)

    if len(players) == 2:
        i = random.randint(1, 2)
        if i == 1:
            players += [players.pop(0)]
        await start_heads_up(web_client, channel_id)

    if len(players) == 3:
        await sendslack("Starting new game...", web_client, channel_id)
        await sendslack("Starting stacks are %d" % newMoney, web_client,
                        channel_id)
        await sendslack(
            "Big blind is %d, small blind is %d" % (bigblind, smallblind),
            web_client,
            channel_id,
        )
        players[0].dealer = True
        players[1].bet = smallblind
        players[1].money = players[1].money - smallblind
        players[1].tocall = smallblind
        players[2].bet = bigblind
        players[2].money = players[2].money - bigblind
        tab.pot = tab.pot + players[1].bet + players[2].bet
        tab.highbet = bigblind
        await sendslack("<@%s> is first to act" % players[0].name, web_client,
                        channel_id)
        await sendslack("%d to call" % bigblind, web_client, channel_id)
コード例 #16
0
ファイル: table.py プロジェクト: PhDChe/pokerenv
 def __init__(self, n_players, agents, seed, stack_low=50, stack_high=200, hand_history_location='hands/', invalid_action_penalty=-5):
     self.hand_history_location = hand_history_location
     self.hand_history_enabled = False
     self.stack_low = stack_low
     self.stack_high = stack_high
     self.rng = np.random.default_rng(seed)
     self.n_players = n_players
     self.pot = 0
     self.bet_to_match = 0
     self.minimum_raise = 0
     self.street = GameState.PREFLOP
     self.cards = []
     self.deck = Deck()
     self.players = [Player(n+1, agents[n], 'player_%d' % n, invalid_action_penalty) for n in range(n_players)]
     self.active_players = n_players
     self.evaluator = Evaluator()
     self.history = []
コード例 #17
0
    def simulatePokerGeneral(self):
        playerCards, communityCards, deck = [], [], Deck()
        while len(playerCards) != 5 or len(communityCards) < 5:
            card = deck.draw(1)
            if len(playerCards) != 5:
                stringCardDrawn = Card.print_pretty_card(card)
                if self.isTypeOfCard(stringCardDrawn):
                    playerCards.append(card)
                else:
                    communityCards.append(card)

        return playerCards, communityCards
コード例 #18
0
ファイル: lookaheadplay.py プロジェクト: hisarack/gym-hearts
 def _simulate(self, expanded_card, observation):
     number_of_players = observation['number_of_players']
     number_of_hand_cards_for_all_players = observation[
         'number_of_hand_cards_for_all_players']
     current_player_id = observation['current_player_id']
     competitor_cards = self._available_cards.copy()
     hand_cards = observation['hand_cards'].copy()
     for c in hand_cards:
         competitor_cards.remove(c)
     random.shuffle(competitor_cards)
     # clone the env and players
     Deck._FULL_DECK = competitor_cards
     deck = Deck()
     Deck._FULL_DECK = []
     env = hearts_env.HeartsEnv()
     for player_id in range(0, number_of_players):
         if player_id == current_player_id:
             env.add_player(hand_cards=hand_cards,
                            strategy=CompletePlayStrategy(
                                first_action_card=expanded_card))
         else:
             cards = deck.draw(
                 number_of_hand_cards_for_all_players[player_id])
             if number_of_hand_cards_for_all_players[player_id] == 1:
                 cards = [cards]
             env.add_player(hand_cards=cards,
                            strategy=CompletePlayStrategy())
     env.copy_observation(observation)
     env.start()
     simulated_observation = env.get_observation()
     # play util finish the round and use complete strategy as default policy
     is_new_round = False
     done = False
     while (not is_new_round) and (not done):
         action = env.move()
         simulated_observation, reward, done, info = env.step(action)
         is_new_round = info['is_new_round']
     score = simulated_observation['scores'][current_player_id]
     return score
コード例 #19
0
ファイル: hulth.py プロジェクト: Street-Jack/streetjack
    def __init__(self, deck: Deck, evaluator: Evaluator):
        self._deck = deck

        self._hands = [deck.draw(2), deck.draw(2)]
        self._board = deck.draw(5)

        self._hand_bucket_indices = []

        for _ in range(len([SMALL_BLIND, BIG_BLIND])):
            bucket_indices = dict()

            bucket_indices[Stage.PREFLOP] = random.randint(0, MAX_BUCKETS - 1)
            prev_stage = Stage.PREFLOP

            # Apply deviance with normal distribution.
            for key in [Stage.FLOP, Stage.TURN, Stage.RIVER]:
                deviance = FakeCardBundle.rand_deviance()
                bucket_indices[key] = min(
                    max(bucket_indices[prev_stage] + deviance, 0),
                    MAX_BUCKETS - 1)

            bucket_indices[Stage.SHOWDOWN] = bucket_indices[Stage.RIVER]

            self._hand_bucket_indices.append(bucket_indices)

        small_blind_final_bucket = self.bucket_index(SMALL_BLIND,
                                                     Stage.SHOWDOWN)
        big_blind_final_bucket = self.bucket_index(BIG_BLIND, Stage.SHOWDOWN)

        self._hand_ranks = [0, 0]
        winner = random.randrange(2)

        if small_blind_final_bucket > big_blind_final_bucket:
            winner = SMALL_BLIND

        if small_blind_final_bucket < big_blind_final_bucket:
            winner = BIG_BLIND

        self._hand_ranks[winner] += 1
コード例 #20
0
    def evaluate(self, hands, num_draw, count):
        deck = Deck()
        draw_cards = []
        boards = []
        win_rate = 0.0

        for i in range(0, count):
            deck.shuffle()
            j = 0
            while j < num_draw:
                card = deck.draw(1)
                if card in hands:
                    continue
                else:
                    boards.append(card)
                    j += 1
            win_rate += super(HoleEvaluator,
                              self).evaluate(hands, boards, False)
            boards.clear()

        hole_win_rate = win_rate / float(count)
        print('eval win rate: {:2.2%}'.format(hole_win_rate))
        return hole_win_rate
コード例 #21
0
    def setUp(self):
        """Serializer data matches the Company object for each field."""
        self.guest = Player_Factory.create()
        self.learner = Player_Factory.create()
        self.game = Game_Factory.create(players=(
            self.guest,
            self.learner))  # object instance being created in query set form

        self.game_serializer = GameSerializer(
            self.game)  # object being serialized into json
        for field_name in ['id', 'total_pot']:

            self.assertEqual(game_serializer.data[field_name],
                             getattr(game, field_name))

        deck = Deck()
        for i in range(2):
            self.guest.card_player_set.create(
                card_str=Card.int_to_str(deck.draw(1)))
            self.learner.card_player_set.create(
                card_str=Card.int_to_str(deck.draw(1)))
        for i in range(5):
            self.game.card_community_set.create(
                card_str=Card.int_to_str(deck.draw(1)))
コード例 #22
0
ファイル: perf_treys.py プロジェクト: wstlabs/treys
def setup(n, m):
    assert m > 0
    deck = Deck()
    boards = []
    hands = []
    for i in range(n):
        boards.append(deck.draw(m))
        hands.append(deck.draw(2))
        deck.shuffle()
    return boards, hands
コード例 #23
0
ファイル: table.py プロジェクト: PhDChe/pokerenv
 def reset(self):
     self.pot = 0
     self.street = GameState.PREFLOP
     self.cards = []
     self.deck.cards = Deck.GetFullDeck()
     self.rng.shuffle(self.deck.cards)
     self.rng.shuffle(self.players)
     self.active_players = self.n_players
     initial_draw = self.deck.draw(self.n_players * 2)
     for i, player in enumerate(self.players):
         player.reset()
         player.position = i
         player.cards = [initial_draw[i], initial_draw[i+self.n_players]]
         player.stack = self.rng.integers(self.stack_low, self.stack_high, 1)[0]
     self.bet_to_match = 0
     self.history = []
コード例 #24
0
 def __init__(self):
     # current player index (0 oop, 1 ip)
     # 2 chance node, 3 terminal node
     self._current = CHANCE_ID
     # TODO better intialization
     self._players = [PlayerState(), PlayerState()]
     # current number of chips in pot
     self._pot = 100
     # current streets
     self._street = RIVER
     # action sequence
     self._history = ""
     # create empty deck
     self._deck = Deck()
     # legal actions start as cards to draw
     self._legal_actions = list(range(DECK_SIZE))
     # array of ints for now
     self._board = []
コード例 #25
0
async def set_up_game(web_client, channel_id, plo=False):
    players = player_list[channel_id]
    if channel_id not in tab_list:
        deck = Deck()
        deck.shuffle()
        tab = Table()
        tab_list[channel_id] = {}
        tab_list[channel_id]["table"] = tab
        tab_list[channel_id]["deck"] = deck
    tab = tab_list[channel_id]["table"]
    deck = tab_list[channel_id]["deck"]
    deck.shuffle()
    tab.cards.extend(deck.draw(3))
    if plo:
        print("plos")
        tab.plo = True
    for name in players:
        if plo:
            name.cards.extend(deck.draw(4))
        else:
            print("nlhe")
            name.cards.extend(deck.draw(2))
        print("got to cards bit")
        pic = Card.print_pretty_cards(name.cards)
        await sendslack(pic, web_client, name.name)

    if len(players) == 2:
        i = random.randint(1, 2)
        if i == 1:
            players += [players.pop(0)]
        await start_heads_up(web_client, channel_id)

    if len(players) > 2:
        random.shuffle(players)
        tab.origlist = players.copy()
        await start_game(web_client, channel_id)
コード例 #26
0
def odds(hand, board, num_players):
    my_hand = [Card.new(hand[0]), Card.new(hand[1])]
    remove_cards = [Card.new(hand[0]), Card.new(hand[1])]

    my_board = []

    for i in range(len(board)):
        import sys
        print(board, file=sys.stderr)
        try:
            my_board.append(Card.new(board[i]))
            remove_cards.append(Card.new(board[i]))
        except KeyError as e:
            print("BAD!!!", file=sys.stderr)
            exit()

    my_deck = Deck()

    for i in range(len(remove_cards)):
        my_deck.cards.remove(remove_cards[i])

    my_players = [my_hand]
    evaluator = Evaluator()

    count = 0
    for b in range(1000):
        deck = Deck()
        cards = my_deck.cards.copy()
        rshuffle(cards)
        deck.cards = cards

        players = my_players.copy()
        for j in range(num_players - 1):
            players.append(deck.draw(2))

        board = my_board.copy()
        while len(board) < 5:
            board.append(deck.draw(1))

        if evaluator.hand_summary(board, players) == 0:
            count += 1

    return count / 1000
コード例 #27
0
def evaluateCards(boardCards, handCards):
    # decrypt the two hand cards sent from the client + board cards
    n = 2
    str(boardCards).lower()
    boardCardsSplit = [(boardCards[i:i + n])
                       for i in range(0, len(boardCards), n)]

    str(handCards).lower()
    handCardsSplit = [(handCards[i:i + n])
                      for i in range(0, len(handCards), n)]

    handCardsSplit[0] = handCardsSplit[0][1] + handCardsSplit[0][0]
    handCardsSplit[1] = handCardsSplit[1][1] + handCardsSplit[1][0]

    hand = [
        Card.new(str(handCardsSplit[0].capitalize())),
        Card.new(str(handCardsSplit[1].capitalize()))
    ]
    board = []
    i = 0
    if len(list(boardCardsSplit)) == 3:
        board = [
            Card.new(str(boardCardsSplit[0].capitalize())),
            Card.new(str(boardCardsSplit[1].capitalize())),
            Card.new(str(boardCardsSplit[2].capitalize()))
        ]
    else:
        if len(list(boardCardsSplit)) == 4:
            board = [
                Card.new(str(boardCardsSplit[0].capitalize())),
                Card.new(str(boardCardsSplit[1].capitalize())),
                Card.new(str(boardCardsSplit[2].capitalize())),
                Card.new(str(boardCardsSplit[3].capitalize()))
            ]
        else:
            if len(list(boardCardsSplit)) == 5:
                board = [
                    Card.new(str(boardCardsSplit[0].capitalize())),
                    Card.new(str(boardCardsSplit[1].capitalize())),
                    Card.new(str(boardCardsSplit[2].capitalize())),
                    Card.new(str(boardCardsSplit[3].capitalize())),
                    Card.new(str(boardCardsSplit[4].capitalize()))
                ]

    deck = Deck()
    print(Card.print_pretty_cards(board + hand))

    evaluator = Evaluator()
    bestScore = evaluator.evaluate(board, hand)
    handType = evaluator.get_rank_class(bestScore)

    print("Player 1 hand rank = %d (%s)\n" %
          (bestScore, evaluator.class_to_string(handType)))

    if (len(board) == 5):
        for i in range(len(board) + len(hand)):
            # Make copy of hand and board
            tempHand = []
            tempBoard = []
            for j in range(len(hand)):
                tempHand.append(hand[j])
            for j in range(len(board)):
                tempBoard.append(board[j])

            #First try removing one of the hand cards
            if (i < 2):
                tempHand.pop(i)
                tempHand.append(board[0])
                tempBoard.pop(0)
            #Now we try removing board cards
            else:
                tempBoard.pop(i - 2)

            #Find the score
            score = evaluator.evaluate(tempBoard, tempHand)
            #If score is same as before, these cards have the best hand
            if (score == bestScore):
                # Make copy of best hand and board
                best6Hand = []
                best6Board = []
                for j in range(len(tempHand)):
                    best6Hand.append(tempHand[j])
                for j in range(len(tempBoard)):
                    best6Board.append(tempBoard[j])
                break
    else:
        best6Board = board
        best6Hand = hand

    print(Card.print_pretty_cards(best6Board + best6Hand))

    if (len(best6Board) == 4 or len(board) == 4):
        #we repeat the process to have the best 5 cards
        for i in range(len(best6Board) + len(best6Hand)):
            #Make copy of hand and board
            tempHand = []
            tempBoard = []
            for j in range(len(best6Hand)):
                tempHand.append(best6Hand[j])
            for j in range(len(best6Board)):
                tempBoard.append(best6Board[j])

            if (i < 2):
                tempHand.pop(i)
                tempHand.append(best6Board[0])
                tempBoard.pop(0)
            else:
                tempBoard.pop(i - 2)
            score = evaluator.evaluate(tempBoard, tempHand)
            if (score == bestScore):
                # Make copy of best hand and board
                best5Hand = []
                best5Board = []
                for j in range(len(tempHand)):
                    best5Hand.append(tempHand[j])
                for j in range(len(tempBoard)):
                    best5Board.append(tempBoard[j])
                break

    else:
        best5Board = best6Board
        best5Hand = best6Hand

    print(Card.print_pretty_cards(best5Board + best5Hand))

    card1 = convertCardToString(best5Board.__getitem__(0))
    card2 = convertCardToString(best5Board.__getitem__(1))
    card3 = convertCardToString(best5Board.__getitem__(2))
    card4 = convertCardToString(best5Hand.__getitem__(0))
    card5 = convertCardToString(best5Hand.__getitem__(1))

    handString = card1 + card2 + card3 + card4 + card5
    print("Hand string:  " + handString)

    stringToSend = str(handType) + " " + handString + " " + str(bestScore)

    print("String to send:  " + stringToSend)

    return stringToSend
コード例 #28
0
            pot += MakeDecision(bot1, bot1_prob)
            pot += MakeDecision(bot2, bot2_prob)
            print("Pot:" + str(pot))
            print()
            stage += 1
        if stage == 4:
            #Call Or Raise Expected Value-Don't fold
            print("Post")
            print("---------------")
            print()
            bot1_prob = CalculateWinProb(bot1.hand, bot1.board)
            bot2_prob = CalculateWinProb(bot2.hand, bot2.board)
            pot += MakeDecision(bot1, bot1_prob)
            pot += MakeDecision(bot2, bot2_prob)
            print("Pot:" + str(pot))
            print()
            print("Showdown")
            print("---------------")
            print()
            GoToShowdown(bot1, bot2)
            stage += 1


big_blind = 100
pot = 0
player_1 = Bot()
player_2 = Bot()
evaluator = Evaluator()
deck = Deck()
play_game(player_1, player_2, pot)
print(CalculateWinProb(player_1.hand, player_1.board))
コード例 #29
0
	def __init__(self, n_seats, max_limit=100000, debug=False):
		n_suits = 4                     # s,h,d,c
		n_ranks = 13                    # 2,3,4,5,6,7,8,9,T,J,Q,K,A
		n_community_cards = 5           # flop, turn, river
		n_pocket_cards = 2
		n_stud = 5

		self.level_raises = {0:0, 1:0, 2:0} # Assuming 3 players
		
		self.n_seats = n_seats
		self._blind_index = 0
		[self._smallblind, self._bigblind] = TexasHoldemEnv.BLIND_INCREMENTS[0]
		self._deck = Deck()
		self._evaluator = Evaluator()
		self.last_seq_move = [] 
		self.filled_seats = 0
		self.signal_end_round = False
		self.winning_players = None
		self.starting_stack_size = None
		self.community = []
		self._round = 0
		self._button = 0
		self._discard = []
		self.game_resolved = False
		self.is_new_r = True
		self._side_pots = [0] * n_seats
		self._current_sidepot = 0 # index of _side_pots
		self._totalpot = 0
		self._tocall = 0
		self._lastraise = 0
		self._number_of_hands = 0
		self._record_players = []

		# fill seats with dummy players
		self._seats = [Player(i, stack=0, emptyplayer=True) for i in range(n_seats)]
		self.learner_bot = None
		self.villain = None
		self.emptyseats = n_seats
		self._player_dict = {}
		self._current_player = None
		self._debug = debug
		self._last_player = None
		self._last_actions = None



		# (PSEUDOCODE)
        # MODEL HYPERPARAMETERS: 
        # state_size = [(position, learner.stack, learner.handrank, played_this_round ...[card1, card2]), (pot_total, learner.to_call, opponent.stack, community_cards)]
        # action_size = env.action_space.n
        # learning_rate = 0.00025

		self.observation_space = spaces.Tuple([

			spaces.Tuple([                # players
				spaces.MultiDiscrete([
				max_limit,           # stack
				max_limit,           # handrank
				1,                   # playedthisround
				1,                   # is_betting
				max_limit,           # last side pot
				]),
				spaces.Tuple([
					spaces.MultiDiscrete([    # card
						n_suits,          # suit, can be negative one if it's not avaiable.
						n_ranks,          # rank, can be negative one if it's not avaiable.
					])
				] * n_pocket_cards)
			] * 4),

			spaces.Tuple([
				spaces.Discrete(max_limit),   # learner position
				spaces.Discrete(max_limit),   # pot amount
				spaces.Discrete(max_limit),   # last raise
				spaces.Discrete(n_seats - 1), # current player seat location.
				spaces.Discrete(max_limit),   # minimum amount to raise
				spaces.Discrete(max_limit), # how much needed to call by current player.
				spaces.Tuple([
					spaces.MultiDiscrete([    # card
						n_suits - 1,          # suit
						n_ranks - 1,          # rank
						1,                     # is_flopped
					])
				] * n_community_cards)
			])
		])

		### MAY NEED TO ALTER FOR HEADS-UP
		# self.action_space = spaces.Tuple([
		# spaces.MultiDiscrete([
		# 	3,                     # action_id
		# 	max_limit,             # raise_amount
		# ]),
		# ] * n_seats) 
		self.action_space = spaces.Discrete(3)
コード例 #30
0
class TexasHoldemEnv(Env, utils.EzPickle):
	BLIND_INCREMENTS = [[10,25], [25,50], [50,100], [75,150], [100,200],
						[150,300], [200,400], [300,600], [400,800], [500,10000],
						[600,1200], [800,1600], [1000,2000]]
	
	current_player_notifier = ""
	weighting_coefficient_regret_fold = 10
	weighting_coefficient_regret_check = 10
	weighting_coefficient_regret_call = 10
	weighting_coefficient_regret_raise = 10
	weighting_coefficient_round_resolve = 100

	

	def __init__(self, n_seats, max_limit=100000, debug=False):
		n_suits = 4                     # s,h,d,c
		n_ranks = 13                    # 2,3,4,5,6,7,8,9,T,J,Q,K,A
		n_community_cards = 5           # flop, turn, river
		n_pocket_cards = 2
		n_stud = 5

		self.level_raises = {0:0, 1:0, 2:0} # Assuming 3 players
		
		self.n_seats = n_seats
		self._blind_index = 0
		[self._smallblind, self._bigblind] = TexasHoldemEnv.BLIND_INCREMENTS[0]
		self._deck = Deck()
		self._evaluator = Evaluator()
		self.last_seq_move = [] 
		self.filled_seats = 0
		self.signal_end_round = False
		self.winning_players = None
		self.starting_stack_size = None
		self.community = []
		self._round = 0
		self._button = 0
		self._discard = []
		self.game_resolved = False
		self.is_new_r = True
		self._side_pots = [0] * n_seats
		self._current_sidepot = 0 # index of _side_pots
		self._totalpot = 0
		self._tocall = 0
		self._lastraise = 0
		self._number_of_hands = 0
		self._record_players = []

		# fill seats with dummy players
		self._seats = [Player(i, stack=0, emptyplayer=True) for i in range(n_seats)]
		self.learner_bot = None
		self.villain = None
		self.emptyseats = n_seats
		self._player_dict = {}
		self._current_player = None
		self._debug = debug
		self._last_player = None
		self._last_actions = None



		# (PSEUDOCODE)
        # MODEL HYPERPARAMETERS: 
        # state_size = [(position, learner.stack, learner.handrank, played_this_round ...[card1, card2]), (pot_total, learner.to_call, opponent.stack, community_cards)]
        # action_size = env.action_space.n
        # learning_rate = 0.00025

		self.observation_space = spaces.Tuple([

			spaces.Tuple([                # players
				spaces.MultiDiscrete([
				max_limit,           # stack
				max_limit,           # handrank
				1,                   # playedthisround
				1,                   # is_betting
				max_limit,           # last side pot
				]),
				spaces.Tuple([
					spaces.MultiDiscrete([    # card
						n_suits,          # suit, can be negative one if it's not avaiable.
						n_ranks,          # rank, can be negative one if it's not avaiable.
					])
				] * n_pocket_cards)
			] * 4),

			spaces.Tuple([
				spaces.Discrete(max_limit),   # learner position
				spaces.Discrete(max_limit),   # pot amount
				spaces.Discrete(max_limit),   # last raise
				spaces.Discrete(n_seats - 1), # current player seat location.
				spaces.Discrete(max_limit),   # minimum amount to raise
				spaces.Discrete(max_limit), # how much needed to call by current player.
				spaces.Tuple([
					spaces.MultiDiscrete([    # card
						n_suits - 1,          # suit
						n_ranks - 1,          # rank
						1,                     # is_flopped
					])
				] * n_community_cards)
			])
		])

		### MAY NEED TO ALTER FOR HEADS-UP
		# self.action_space = spaces.Tuple([
		# spaces.MultiDiscrete([
		# 	3,                     # action_id
		# 	max_limit,             # raise_amount
		# ]),
		# ] * n_seats) 
		self.action_space = spaces.Discrete(3)
		

	def seed(self, seed=None):
		_, seed = seeding.np_random(seed)
		return [seed]


	# Important Note: Positions are only assigned at end of game. Be aware in 
	# case of reporting stats on position type
	def assign_positions(self):
		no_active_players = self.filled_seats
		if(self.filled_seats == 3):
			for player in self._seats:
				player.position = (player.position + (no_active_players-1)) % no_active_players if player in self._player_dict.values() else None

		elif(self.filled_seats == 2):
			new_positions = []
			# We want to only use positions 0 and 2, which are encodings of BTN and BB respectively

			# Sort for positions 0 and 2 first
			for player in self._player_dict.values():
				if not(player.emptyplayer):
					if player.position == 2:
						player.position = 0
						new_positions.append(player.position)
					elif player.position == 0:
						player.position = 2
						new_positions.append(player.position)
				
			
			# Special case of former position 1 depends on new positions allocated above
			if len(new_positions) == 1:
				for player in self._player_dict.values():
					if player.position == 1:
						if new_positions[0] == 0:
							player.position = 2
						elif new_positions[0] == 2:
							player.position = 0
				
				



	def add_player(self, seat_id, stack=2000):
		"""Add a player to the environment seat with the given stack (chipcount)"""
		player_id = seat_id
		if player_id not in self._player_dict:
			new_player = Player(player_id, stack=stack, emptyplayer=False)
			Player.total_plrs+=1
			self.starting_stack_size = stack
			if self._seats[player_id].emptyplayer:
				self._seats[player_id] = new_player
				new_player.set_seat(player_id)
			else:
				raise error.Error('Seat already taken.')
			self._player_dict[player_id] = new_player
			self.emptyseats -= 1
			self.filled_seats +=1
		if new_player.get_seat() == 0:
			self.learner_bot = new_player
		else:
			self.villain = new_player
		self._record_players.append(new_player)
			
			
			

	def move_player_to_empty_seat(self, player):
		# priority queue placing active players at front of table
		for seat_no in range(len(self._seats)):
			if self._seats[seat_no].emptyplayer and (seat_no < player._seat):
				unused_player = self._seats[seat_no]
				self._seats[seat_no] = player
				self._seats[player.get_seat()] = unused_player

	def reassign_players_seats(self):
		for player in self._player_dict.values():
			self.move_player_to_empty_seat(player)

	def remove_player(self, seat_id):
		"""Remove a player from the environment seat."""
		player_id = seat_id
		
		try:
			idx = self._seats.index(self._player_dict[player_id])
			self._seats[idx] = Player(0, stack=0, emptyplayer=True)
			
			self._seats[idx].position = None # Very important for when transitioning from 3 to 2 players.
			del self._player_dict[player_id]
			self.emptyseats += 1
			self.filled_seats-=1
			Player.total_plrs-=1

			#self.reassign_players_seats()
		except ValueError:
			pass

	def reset(self):
		self._reset_game()
		self._ready_players()
		self._number_of_hands = 1
		[self._smallblind, self._bigblind] = TexasHoldemEnv.BLIND_INCREMENTS[0]
		if (self.emptyseats < len(self._seats) - 1):
			players = [p for p in self._seats if p.playing_hand]
			self._new_round()
			self._round = 0
			self._current_player = self._first_to_act(players, "post_blinds")
			self._post_smallblind(self._current_player)
			self._current_player = self._next(players, self._current_player)
			self._post_bigblind(self._current_player)
			self._current_player = self._next(players, self._current_player)
			self._tocall = self._bigblind
			self._round = 0
			self._deal_next_round()
			self.organise_evaluations()
			
			self._folded_players = []
		return self._get_current_reset_returns()


	def organise_evaluations(self):
		for idx, player in self._player_dict.items():
			if player is not None:
				player.he = HandHoldem.HandEvaluation(player.hand, idx, "Preflop") #Unique to player instance
				player.he.evaluate(event='Preflop')
				player.set_handrank(player.he.evaluation)
		

	def assume_unique_cards(self, players):
		cards_count = {}
		this_board = None
		for player in players:
			player_cards = player.hand
			for card in player_cards:
				cards_count.update({card: 1}) if card not in cards_count else cards_count.update({card: cards_count[card] + 1})
			if this_board is None and player.he is not None:
				if player.he.board is not None:
					this_board = player.he.board 
		if this_board is not None:
			for card in this_board:
				cards_count.update({card: 1}) if card not in cards_count else cards_count.update({card: cards_count[card] + 1})
		
		for card, no_occurence in cards_count.items():
			if no_occurence > 1:
				return False
			else:
				return True

	def step(self, actions):
		"""
		CHECK = 0
		CALL = 1
		RAISE = 2
		FO

		RAISE_AMT = [0, minraise]
		"""
		
		players = [p for p in self._seats if p.playing_hand]
		assert self.assume_unique_cards(players) is True

		self._last_player = self._current_player
		# self._last_actions = actions
		

		# if self._last_player.count_r(self.last_seq_move) > 1:
		# 	if [3,0] in actions:
		# 		print("r")	

		# if current player did not play this round 
		if not self._current_player.playedthisround and len([p for p in players if not p.isallin]) >= 1:
			if self._current_player.isallin:
				self._current_player = self._next(players, self._current_player)
				return self._get_current_step_returns(False)

			move = self._current_player.player_move(self._output_state(self._current_player), actions[self._current_player.player_id], last_seq_move = self.last_seq_move, _round = self._round)
			if self.am_i_only_player_wmoney() and self.level_raises[self._current_player.get_seat()] >= self.highest_in_LR()[0]:
				move = ("check", 0) # Protects against player making bets without any other stacked/active players
			self._last_actions = move
			if move[0] == 'call':
				assert self.action_space.contains(0)
				self._player_bet(self._current_player, self._tocall, is_posting_blind=False, bet_type=move[0])
				if self._debug:
					print('Player', self._current_player.player_id, move)
				self._current_player = self._next(players, self._current_player)
				self.last_seq_move.append('C')
				self.playedthisround = True
				self._current_player.round['raises_i_owe'] = 0

			elif move[0] == 'check':
				# assert self.action_space.contains(0)
				self._player_bet(self._current_player, self._current_player.currentbet, is_posting_blind=False, bet_type=move[0])
				if self._debug:
					print('Player', self._current_player.player_id, move)
				self._current_player = self._next(players, self._current_player)
				self.last_seq_move.append('c')
				self.playedthisround = True

			elif move[0] == 'raise':
				# if self._current_player is self.learner_bot and self.level_raises == {0: 1, 1: 0, 2: 2} or self.level_raises == {0: 2, 1: 0, 2: 3} or self.level_raises == {0: 3, 1: 0, 2: 4} or self.level_raises == {0: 4, 1: 0, 2: 5} or self.level_raises == {0: 5, 1: 0, 2: 6} or self.level_raises == {0: 5, 1: 0, 2: 6} and 'R' in self.last_seq_move:
				# 	print("watch")
				assert self.action_space.contains(1)
				
				self._player_bet(self._current_player, move[1]+self._current_player.currentbet, is_posting_blind=False, bet_type="bet/raise")
				if self._debug:
					print('Player', self._current_player.player_id, move)
				for p in players:
					if p != self._current_player:
						p.playedthisround = False
				self._current_player = self._next(players, self._current_player)
				
				self.last_seq_move.append('R')
				self._current_player.round['raises_i_owe'] = 0
				
			elif move[0] == 'fold':
				# if self.highest_in_LR()[0] > 4:
				# 	print("watch")
				assert self.action_space.contains(2)
				self._current_player.playing_hand = False
				self._current_player.playedthisround = True
				if self._debug:
					print('Player', self._current_player.player_id, move)
				self._current_player = self._next(players, self._current_player)
				
				self._folded_players.append(self._current_player)
				self.last_seq_move.append('F')
				# break if a single player left
				# players = [p for p in self._seats if p.playing_hand]
				# if len(players) == 1:
				# 	self._resolve(players)

		players = [p for p in self._seats if p.playing_hand]

		# else:	## This will help eliminate infinite loop
		# 	self._current_player = self._next(players, self._current_player)
			
		# This will effectively dictate who will become dealer after flop	
		players_with_money = []
		for player in players:
			if(player.stack > 0):
				players_with_money.append(player)
		if all([player.playedthisround for player in players_with_money]):
			self._resolve(players)
			for player in self._player_dict.values():
				player.round == {'moves_i_made_in_this_round_sofar': '', 'possible_moves': set([]), 'raises_owed_to_me': 0, "raises_i_owe": 0}
		

		terminal = False
		if all([player.isallin for player in players]):
			while self._round < 4:
				self._deal_next_round()
				self._round += 1

		elif self.count_active_wmoney() == 1 and all([player.playedthisround for player in players]):
			# do something else here
			while self._round < 3:
				self._round += 1
				self._deal_next_round()
			

		if self._round == 4 or len(players) == 1:
			terminal = True
			self._resolve(players)
			self._resolve_round(players)


		return self._get_current_step_returns(terminal, action=move)
		

	def am_i_only_player_wmoney(self):
		count_other_broke = 0
		for player in self._player_dict.values():
			if player is not self._current_player and player.stack <= 0:
				count_other_broke += 1
		if count_other_broke == (len(self._player_dict) - 1):
			return True
		else:
			return False

	def count_active_wmoney(self):
		count = 0
		account_active_money = {0:{"is_active":False, "has_money":False},1:{"is_active":False, "has_money":False},2:{"is_active":False, "has_money":False}}
		for player in self._player_dict.values():
			if player.playing_hand:
				account_active_money[player.get_seat()].update({"is_active": True})
			if player.stack > 0:
				account_active_money[player.get_seat()].update({"has_money": True})
			
		for player, account in account_active_money.items():
			if account["is_active"] is True and account["has_money"] is True:
				count+=1

		return count



	def render(self, mode='human', close=False, initial=False, delay=None):
		if delay:
			time.sleep(delay)

		if(initial is True):
			print("\n")
				
		if self._last_actions is not None and initial is False:
			pid = self._last_player.player_id
			#print('last action by player {}:'.format(pid))
			print(format_action(self._last_player, self._last_actions))

		print("\n\n")
		print('Total Pot: {}'.format(self._totalpot))
		(player_states, community_states) = self._get_current_state()
		(player_infos, player_hands) = zip(*player_states)
		(community_infos, community_cards) = community_states

		print('Board:')
		print('-' + hand_to_str(community_cards))
		print('Players:')
		# for player in self._player_dict:
		# 	assert player.round['raises_i_owe']
		for idx, hand in enumerate(player_hands):
			if self._current_player.get_seat() == idx:
				self.current_player_notifier = "<" + str(self._current_player.position)
				
			print('{}{}stack: {} {}'.format(idx, hand_to_str(hand), self._seats[idx].stack, self.current_player_notifier))
			self.current_player_notifier = ""

	def _resolve(self, players):
		
		self.signal_end_round = True
		self._current_player = self._first_to_act(players)
		self._resolve_sidepots(players + self._folded_players)
		self._new_round()
		self._deal_next_round()
		if self._debug:
			print('totalpot', self._totalpot)

	def _resolve_postflop(self, players):
		self._current_player = self._first_to_act(players)
		# print(self._current_player)

	def _deal_next_round(self):
		if self._round == 0:
			self._deal()
		elif self._round == 1:
			self._flop()
		elif self._round == 2:
			self._turn()
		elif self._round == 3:
			self._river()

	def _increment_blinds(self):
		self._blind_index = min(self._blind_index + 1, len(TexasHoldemEnv.BLIND_INCREMENTS) - 1)
		[self._smallblind, self._bigblind] = TexasHoldemEnv.BLIND_INCREMENTS[self._blind_index]

	def _post_smallblind(self, player):
		if self._debug:
			print('player ', player.player_id, 'small blind', self._smallblind)
		self._player_bet(player, self._smallblind, is_posting_blind=True)
		player.playedthisround = False

	def _post_bigblind(self, player):
		if self._debug:
			print('player ', player.player_id, 'big blind', self._bigblind)
		self._player_bet(player, self._bigblind, is_posting_blind=True)
		player.playedthisround = False
		self._lastraise = self._bigblind

	def highest_in_LR(self, specific=None, request_is_seq=None):
		highest_lr_bot = 0
		highest_lr_value = 0
		if specific is None:
			spec = self.level_raises
		else:
			spec = specific
		for key, value in spec.items():
			if value > highest_lr_value:
				highest_lr_value = value
				highest_lr_bot = key
		rep = [(highest_lr_value, highest_lr_bot)]
		if request_is_seq:
			for key, value in spec.items():
				if value == highest_lr_value and key != highest_lr_bot:
					rep.append((value, key))
			return rep
		else:
			return highest_lr_value, highest_lr_bot

	def is_level_raises_allzero(self):
		count_zero = 0
		for value in self.level_raises.values():
			if value == 0:
				count_zero+=1
		if(count_zero == len(self.level_raises)):
			return True
		else: 
			return False

	def _player_bet(self, player, total_bet, **special_betting_type):
		# Case 1: New round, players have incosistent raises
		# Case 2: End of round, difference of raises is 2
		import operator
		sorted_lr = sorted(self.level_raises.items(), key=operator.itemgetter(1))
		
		# if (self.is_off_balance_LR() and self.is_new_r) or ( ((int(self.highest_in_LR()[0]) - int(sorted_lr[1][1])) == 2) and (self.is_new_r is False)):
		# 	print("raise")

		if "is_posting_blind" in special_betting_type and "bet_type" not in special_betting_type: # posting blind (not remainder to match preceding calls/raises)
			if special_betting_type["is_posting_blind"] is True:
				self.level_raises[player.get_seat()] = 0 

		elif "is_posting_blind" in special_betting_type and "bet_type" in special_betting_type: # Bet/Raise or call. Also accounts for checks preflop.
			highest_lr_value, highest_lr_bot = self.highest_in_LR()
			if special_betting_type["is_posting_blind"] is False:
				if special_betting_type["bet_type"] == "bet/raise":
					if self.level_raises[player.get_seat()] < highest_lr_value:
						player.action_type = "raise"
						self.level_raises[player.get_seat()] = highest_lr_value + 1
					elif self.level_raises[player.get_seat()] == highest_lr_value:
						player.action_type = "bet"
						self.level_raises[player.get_seat()] += 1

				elif special_betting_type["bet_type"] == "call":
					if self.level_raises[player.get_seat()] < highest_lr_value:
						player.action_type = "call"
						self.level_raises[player.get_seat()] = highest_lr_value

					elif self.is_level_raises_allzero():
						if player.position == 0:
							player.action_type = "call"
							self.level_raises[player.get_seat()] = 1


					elif player.position == 2:
						player.action_type = "call"
						self.level_raises[player.get_seat()] = highest_lr_value

				elif special_betting_type["bet_type"] == "check" and self._round is 0:	# BB checking preflop
					if player.position == 2:
						self.level_raises[player.get_seat()] = 1
					
		
		# relative_bet is how much _additional_ money is the player betting this turn,
		# on top of what they have already contributed
		# total_bet is the total contribution by player to pot in this round
		relative_bet = min(player.stack, total_bet - player.currentbet)
		player.bet(relative_bet + player.currentbet)

		self._totalpot += relative_bet
		self._tocall = max(self._tocall, total_bet)
		if self._tocall > 0:
			self._tocall = max(self._tocall, self._bigblind)
		self._lastraise = max(self._lastraise, relative_bet  - self._lastraise)
		self.is_new_r = False

	def _first_to_act(self, players, my_event="Postflop"):
		# if self._round == 0 and len(players) == 2:
		# 	return self._next(sorted(
		# 		players + [self._seats[self._button]], key=lambda x:x.get_seat()),
		# 		self._seats[self._button])
		
		first_to_act = None

		if self.filled_seats == 2:
			if my_event is "Preflop" or my_event is "post_blinds":
				first_to_act = self.assign_next_to_act(players, [0,2])

			elif my_event is "Postflop" or my_event is "sidepot":
				first_to_act = self.assign_next_to_act(players, [2,0])

		elif self.filled_seats == 3:
			if my_event is "Preflop":
				first_to_act = self.assign_next_to_act(players, [0,1,2])

			elif my_event is "Postflop" or my_event is "post_blinds" or my_event is "sidepot":
				first_to_act = self.assign_next_to_act(players, [1,2,0])

		# else: 
		# 	my_return = [player for player in players if player.get_seat() > self._button][0]
			
		#assert first_to_act is not None and not(first_to_act.emptyplayer) and not(first_to_act.stack <= 0)

		if len(players) == 1:
			first_to_act = self._record_players[0]

		return first_to_act

	def assign_next_to_act(self, players, precedence_positions):
		for pos in precedence_positions:
			for player in players:
				if player.position == pos and not(player.emptyplayer) and player.playing_hand and player.stack > 0:
					assert player is not None
					return player

	def _next(self, players, current_player):
		i = 1
		current_player_seat = players.index(current_player)
		
		while(players[(current_player_seat+i) % len(players)].stack <= 0):
			i+=1
			if i > 10: 
				break
				# In this case of inifinte loop, self._current_player is assigned to _next but will be irrelevant anyway so okay.
		assert players[(current_player_seat+i) % len(players)] is not None
		return players[(current_player_seat+i) % len(players)]

	def _deal(self):
		for player in self._seats:
			if player.playing_hand and player.stack > 0:
				player.hand = self._deck.draw(2)
				
				

	def _flop(self):
		self._discard.append(self._deck.draw(1)) #burn
		this_flop = self._deck.draw(3)
		self.flop_cards = this_flop
		self.community = this_flop

	def _turn(self):
		self._discard.append(self._deck.draw(1)) #burn
		self.turn_card = self._deck.draw(1)
		self.community.append(self.turn_card)
		# .append(self.community)

	def _river(self):
		self._discard.append(self._deck.draw(1)) #burn
		self.river_card = self._deck.draw(1)
		self.community.append(self.river_card)

	def _ready_players(self):
		for p in self._seats:
			if not p.emptyplayer and p.sitting_out:
				p.sitting_out = False
				p.playing_hand = True
		
		

	def _resolve_sidepots(self, players_playing):
		players = [p for p in players_playing if p.currentbet]
		if self._debug:
			print('current bets: ', [p.currentbet for p in players])
			print('playing hand: ', [p.playing_hand for p in players])
		if not players:
			return
		try:
			smallest_bet = min([p.currentbet for p in players if p.playing_hand])
		except ValueError:
			for p in players:
				self._side_pots[self._current_sidepot] += p.currentbet
				p.currentbet = 0
			return

		smallest_players_allin = [p for p, bet in zip(players, [p.currentbet for p in players]) if bet == smallest_bet and p.isallin]

		for p in players:
			self._side_pots[self._current_sidepot] += min(smallest_bet, p.currentbet)
			p.currentbet -= min(smallest_bet, p.currentbet)
			p.lastsidepot = self._current_sidepot

		if smallest_players_allin:
			self._current_sidepot += 1
			self._resolve_sidepots(players)
		if self._debug:
			print('sidepots: ', self._side_pots)

	def _new_round(self):
		for player in self._player_dict.values():
			player.currentbet = 0
			player.playedthisround = False
			player.round = {'moves_i_made_in_this_round_sofar': '', 'possible_moves': set([]), 'raises_owed_to_me': 0, "raises_i_owe": 0}
			player.round_track_stack =  player.stack

		self.is_new_r = True
		self._round += 1
		self._tocall = 0
		self._lastraise = 0
		self.last_seq_move = []
		# if self.is_off_balance_LR():
		# 	if self._last_actions[0] != 'fold':
		# 		raise error.Error()
		
	def is_off_balance_LR(self):
		
		lr = self.level_raises
		highest_value, highest_bot  = self.highest_in_LR()
		lr_without_highest = dict(lr)
		del lr_without_highest[highest_bot]
		next_highest_value, next_highest_bot = self.highest_in_LR(specific=lr_without_highest)
		
		if highest_value != next_highest_value:
			return True
		elif highest_value == next_highest_value:
			return False

		
	def _resolve_round(self, players):
		# if len(players) == 1:
		# 	if (self._round == 1 or self._round == 2) and self._last_player.get_seat() == 0 and self._last_actions[0] == 'fold':
		# 		if self._last_player.count_r(self.last_seq_move) < 1:
		# 			if self.learner_bot.position == 0:
		# 				players[0].refund(self._bigblind + self._smallblind)
		# 				self._totalpot = 0
		# 				self.winning_players = players[0]
		# 			else:
		# 				players[0].refund(self._bigblind + self._smallblind + 40)
		# 				self._totalpot = 0
		# 				self.winning_players = players[0]
		# 	else:
		# 		players[0].refund(sum(self._side_pots))
		# 		self._totalpot = 0
		# 		self.winning_players = players[0]
		if len(players) == 1:
			winner, loser = None, None # Heads-Up
			for p in self._record_players:
				if p == players[0]:
					winner = p
				else:
					loser = p
			winner_investment = winner.stack_start_game - winner.stack
			loser_loss = loser.stack_start_game - loser.stack

			if loser.stack_start_game < 15 and loser.position == 0:
				players[0].refund((self.starting_stack_size - winner.stack) )
			elif loser.stack_start_game < 25 and loser.position == 2:
				players[0].refund((self.starting_stack_size - winner.stack) )
		
			else:
				players[0].refund(winner_investment + loser_loss)
				
			self._totalpot = 0
			self.winning_players = players[0]

		else:
			# compute hand ranks
			for player in players:
				# assert (len(self.community) <= 5) is True
				player.handrank = self._evaluator.evaluate(player.hand, self.community)

			# trim side_pots to only include the non-empty side pots
			temp_pots = [pot for pot in self._side_pots if pot > 0]

			# compute who wins each side pot and pay winners
			for pot_idx,_ in enumerate(temp_pots):
				# find players involved in given side_pot, compute the winner(s)
				pot_contributors = [p for p in players if p.lastsidepot >= pot_idx]
				winning_rank = min([p.handrank for p in pot_contributors])
				winning_players = [p for p in pot_contributors if p.handrank == winning_rank]
				self.winning_players = winning_players[0]
				for player in winning_players:
					split_amount = int(self._side_pots[pot_idx]/len(winning_players))
					if self._debug:
						print('Player', player.player_id, 'wins side pot (', int(self._side_pots[pot_idx]/len(winning_players)), ')')
					player.refund(split_amount)
					self._side_pots[pot_idx] -= split_amount

				# any remaining chips after splitting go to the winner in the earliest position
				if self._side_pots[pot_idx]:
					earliest = self._first_to_act([player for player in winning_players], "sidepot")
					earliest.refund(self._side_pots[pot_idx])

			# for player in players: ## THIS IS AT THE END OF THE GAME. NOT DURING. (safe)
			# 	if(player.stack == 0):
			# 		self.remove_player(player.get_seat())
		self.game_resolved = True

		# assert(self._player_dict[0].stack + self._player_dict[2].stack + self._totalpot == 2*self.starting_stack_size)
		
	def report_game(self, requested_attributes, specific_player=None):
		if "stack" in requested_attributes:
			player_stacks = {}
			for key, player in self._player_dict.items():
				
				player_stacks.update({key: player.stack})
				
			# if len(player_stacks) < 3:
			# 	for i in range(3):
			# 		if i not in player_stacks:
			# 			player_stacks.update({i:0})
			if specific_player is None:
				return (player_stacks)
				assert (player_stacks.values()) is not None
			else:
				return (player_dict[specific_player].values())
				 
			
		
		

		


		

	def _reset_game(self):
		
		playing = 0

		# if self._player_dict[0].stack is not None and self._player_dict[2].stack is not None:
		# 	assert(self._player_dict[0].stack + self._player_dict[2].stack == 2*self.starting_stack_size)

		
		for player in self._seats:
			if not player.emptyplayer and not player.sitting_out:
				player.stack_start_game = player.stack
				player.reset_hand()
				playing += 1
		self.community = []
		self._current_sidepot = 0
		self._totalpot = 0
		self._side_pots = [0] * len(self._seats)
		self._deck.shuffle()
		self.level_raises = {0:0, 1:0, 2:0}
		self.winning_players = None
		self.game_resolved = False


		if playing:
			self._button = (self._button + 1) % len(self._seats)
			while not self._seats[self._button].playing_hand:
				self._button = (self._button + 1) % len(self._seats)

	def _output_state(self, current_player):
		return {
		'players': [player.player_state() for player in self._seats],
		'community': self.community,
		'my_seat': current_player.get_seat(),
		'pocket_cards': current_player.hand,
		'pot': self._totalpot,
		'button': self._button,
		'tocall': (self._tocall - current_player.currentbet),
		'stack': current_player.stack,
		'bigblind': self._bigblind,
		'player_id': current_player.player_id,
		'lastraise': self._lastraise,
		'minraise': max(self._bigblind, self._lastraise + self._tocall),
		}

	def _pad(self, l, n, v):
		if (not l) or (l is None):
			l = []
		return l + [v] * (n - len(l))

	def _get_current_state(self):
		player_states = []
		for player in self._seats:
			player_features = [
				int(player.stack),
				int(player.handrank),
				int(player.playedthisround),
				int(player.betting),
				int(player.lastsidepot),
			]
			player_states.append((player_features, self._pad(player.hand, 2, -1)))
		community_states = ([
			int(self.learner_bot.position),
			int(self._totalpot),
			int(self._lastraise),
			int(self._current_player.get_seat()),
			int(max(self._bigblind, self._lastraise + self._tocall)),
			int(self._tocall - self._current_player.currentbet),
		], self._pad(self.community, 5, -1))
		# if sum(self.level_raises.values()) > 6:
		# 	print("")
		return (tuple(player_states), community_states)

	def _get_current_reset_returns(self):
		return self._get_current_state()

	def distribute_rewards_given_endgame(self):
	
		if self.learner_bot is self.winning_players:
			self.learner_bot.reward = self.compute_reward() + self._totalpot
		else:
			self.learner_bot.reward = self.learner_bot.round_track_stack


	def _get_current_step_returns(self, terminal, action=None):

		observations = self._get_current_state()
		stacks = [player.stack for player in self._seats]
		reward = None
			
		if(action is None):
			return observations, reward, terminal, [] # TODO, return some info?

		else: 	 # Focus on this. At end of step, when player has already decided his action. 
			respective_evaluations = [player.he.evaluation if player.he is not None else None for player in self._seats]
			evaluations_opposing_players = [x for i,x in enumerate(respective_evaluations) if i!= self._last_player.get_seat() and x!=None]
			
			if (self._last_player is self.learner_bot): 					# Learner bot step return

				if(self.signal_end_round == True):
					self.signal_end_round = False
			
				self.learner_bot.reward = self.compute_reward()		# Most common entry point (Learner Checks or raises)

			else:  		
																	# Artifical agent step return
				self.learner_bot.reward = 0

				if(self.signal_end_round == True):
					if(action == ('fold', 0)): # Opponent folded
						self.learner_bot.reward = self._totalpot
					
			# if action is ('fold', 0) or action is ('check', 0) or action[0] is 'call' or action[0] is 'raise':
			# 	regret = self.compute_regret_given_action(action, respective_evaluations, evaluations_opposing_players)
			
			

			return observations, action, reward, terminal, [] # TODO, return some info?


	def compute_reward(self): #only gets called when last player is learner

		# Expected value is a mathematical concept used to judge whether calling a raise in a game of poker will be profitable.  
		# When an opponent raises a pot in poker, such as on the flop or river, your decision whether to call or fold is more or less 
		# completely dependant on expected value.  This is the calculation of whether the probability of winning a pot will make a call 
		# profitable in the long-term.
		# Expected Value is a monetary value (e.g. +$10.50). It can be positive or
		# negative. EV tells you how profitable or unprofitable a certain play (e.g.
		# calling or betting) will be. We work out EV when we are faced with a decision.

		# EV = (Size of Pot x Probability of Winning) – Cost of Entering it.

		equity = self.equity()
		ev = None
		if self._round == 0 and self._last_player.position == 0: # Only works for heads up: Due to bug with tocall
			to_call = 15
			total_pot = self._totalpot - to_call
		else:
			to_call = self._last_actions[1]
			total_pot = self._totalpot if self._last_player is not self.learner_bot else (self._totalpot - self._last_actions[1])
			
				

		# Here we compute expected values for actions that were possible during their execution, and we reflect on them here by comparing the expected values
		# of alternatives.
		expected_values_order = [0, 0, 0] # In order of call/check, raise/bet, fold
		
		if self._last_actions[0] == 'call' or self._last_actions[0] == 'check':
			action_taken = 0
		elif self._last_actions[0] == 'raise' or self._last_actions[0] == 'bet':
			action_taken = 1
		else:
			action_taken = 2

		# Call/Check Regret
		learner_equity, opp_equity = equity[0], equity[1]
		stand_to_win = (total_pot * learner_equity) 
		stand_to_lose = to_call * opp_equity
		expected_value = stand_to_win - stand_to_lose
		expected_values_order[0] = expected_value

		# Fold Regret
		stand_to_win = to_call * opp_equity
		stand_to_lose = (total_pot) * learner_equity
		expected_value = stand_to_win - stand_to_lose
		expected_values_order[2] = expected_value

		# Raise/Bet Regret
		if (self.learner_bot.raise_possible_tba):
			# implied raise (How much more we stand to win given that villain shows confidence in his hand)
			stand_to_win = ( ((total_pot + 25) * learner_equity) * self.villain.certainty_to_call ) + (total_pot * learner_equity) * (1 - self.villain.certainty_to_call)
			stand_to_lose = (to_call + 25) * opp_equity
			expected_value = stand_to_win - stand_to_lose
			expected_values_order[1] = expected_value

	
		max_ev = max(expected_values_order)
		highest_paying_action = [i for i, j in enumerate(expected_values_order) if j == max_ev]
		
		# reward = expected_values_order[action_taken]/max_ev
		# how much does reward deviate from mean - this determines quality of action in the context of all possible actions
		reward = expected_values_order[action_taken] - mean(expected_values_order)
		return reward 

	def compute_reward_end_round_fold(self, respective_evaluations, evaluations_opposing_players):
		return (respective_evaluations[self._last_player.get_seat()] - mean([other_player_eval for other_player_eval in evaluations_opposing_players])) / self.weighting_coefficient_round_resolve

	def compute_regret_given_action(self, my_action, respective_evaluations, evaluations_opposing_players):
		
		self.compare_evaluations_players(my_action, respective_evaluations, evaluations_opposing_players)
		# Now player has his regret filled in to his own player instance
		pass



	


	def equity(self):

		# Equity is a percentage (e.g. 70%). Equity tells you how much of the pot 
		# “belongs” to you, or to put it another way, the percentage of the time
		#  you expect to win the hand on average from that point onwards.
		_round = self._round if self.signal_end_round is not True else self._round - 1
		if (_round == 1 or _round == 2 or _round ==3): # Implies last rounds were either 1 or 2
			learner_utility, opp_utility = self.compute_winner_simulation(_round)
			equity = learner_utility, opp_utility
			
		else:
			learner_hs = self.learner_bot.he.hand_strength, 1 - self.villain.he.hand_strength
			bot_hs = self.villain.he.hand_strength, 1 - self.learner_bot.he.hand_strength
			equity = (learner_hs[0] + learner_hs[1])/2, (bot_hs[0] + bot_hs[1])/2
		return equity


	def compute_winner_simulation(self, _round):
		_evaluator = self._evaluator
		deck = self._deck
		if _round == 1:
			community = [self.community[i] for i in range(3)]
		elif _round == 2:
			community = [self.community[i] for i in range(4)]
		else:
			community = [self.community[i] for i in range(5)]
		opp1_cards = self.learner_bot.hand
		opp2_cards = self.villain.hand
		unrevealed_cards = sorted([card for card in deck.cards if card not in community and card not in opp1_cards and card not in opp2_cards])
		# print(Card.print_pretty_cards(opp1_cards))
		# print(Card.print_pretty_cards(opp2_cards))
		winning_players_list = []
		learner_wins = 0
		opp_wins = 0
		if _round == 1:
			for turn_card_idx in range(len(unrevealed_cards)):
				# print(turn_card_idx)
				for river_card_idx in range(turn_card_idx, len(unrevealed_cards)):
					if [unrevealed_cards[turn_card_idx]] == [unrevealed_cards[river_card_idx]]:
						continue
					# print(Card.print_pretty_cards(community + [unrevealed_cards[turn_card_idx]] + [unrevealed_cards[river_card_idx]]))
					learner_eval = (_evaluator.evaluate(opp1_cards, community + [unrevealed_cards[turn_card_idx]] + [unrevealed_cards[river_card_idx]]))
					opp_eval = (_evaluator.evaluate(opp2_cards, community + [unrevealed_cards[turn_card_idx]] + [unrevealed_cards[river_card_idx]]))

					winning_rank = min([learner_eval, opp_eval])
					winning_players = [player for player, rank in enumerate([learner_eval, opp_eval]) if rank == winning_rank]
					if len(winning_players) is 2:
						learner_wins+=1
						opp_wins+=1
					else:
						if winning_players[0] == 0:
							learner_wins+=1
						else:
							opp_wins+=1
		

		elif _round == 2:

			for river_card in unrevealed_cards:
				player_handranks = []
				# print(Card.print_pretty_cards(community+[river_card]))
				learner_eval = (_evaluator.evaluate(opp1_cards, community+[river_card]))
				opp_eval = (_evaluator.evaluate(opp2_cards, community+[river_card]))

				winning_rank = min([learner_eval, opp_eval])
				winning_players = [player for player, rank in enumerate([learner_eval, opp_eval]) if rank == winning_rank]
				if len(winning_players) is 2:
					learner_wins+=1
					opp_wins+=1
				else:
					if winning_players[0] == 0:
						learner_wins+=1
					else:
						opp_wins+=1

		elif _round == 3:
			if self.learner_bot is self.winning_players:
				return 1.0, 0.0
			else:
				return 0.0, 1.0
		
		if opp_wins == 0 and learner_wins == 0:
			raise("error: division by zero")
		return (learner_wins/(learner_wins + opp_wins), opp_wins/(learner_wins + opp_wins))





	#Using evlaluation here. Might be better to use player.handstrength
	def compare_evaluations_players(self, my_action, respective_evaluations, evaluations_opposing_players):
		
		pass

		# expected_value = self.expected_value()
		
		# if my_action is ('fold', 0):
		# 	# calculate how good my cards are compared to raisers cards
		# 	_, raiser_bot = self.highest_in_LR()
		# 	raiser_strength = raiser_bot.he.evaluation
		# 	regret = (raiser_strength - respective_evaluations[self._current_player.get_seat()]) / self.weighting_coefficient_regret_fold
		# 	# Remember: Higher evaluation means worse cards, lower means better cards.
		# 	# e.g. If my evaluation was 5400, and my opponents evaluation was 7500, I would have positive regret ( I would regret having folded)
		# 	self._current_player.regret.update({'fold': regret})
		# elif my_action is ('check', 0):
		# 	# calculate how good my cards are compared to other players, and thus compute how much I regret not having raised
		# 	# If my evaluation is lower (better cards) than my opponents relatively high evaluation (worse cards), I would have positive regret
		# 	_, opposing_bot = self.current_player() # We can assign opposing as current_player (2-players heads-up) because we already rotated the table position
		# 	opposing_bot_strength = opposing_bot.he.evaluation
		# 	regret = (opposing_bot_strength - respective_evaluations[self._current_player.get_seat()]) / self.weighting_coefficient_regret_check
		# 	self._current_player.regret.update({'check': regret})
		# elif my_action[0] is 'call':
		# 	# Now we must compute the regret based on how much we would have been better of taking another action: Here, unlike other times, we have
		# 	# 2 possible alternatives : Raise or fold. If we take a call action, we must compute the expected value for the other alternatives. 
		# 	pass

		# elif my_action[0] is 'raise':
		# 	_, raiser_bot = self.highest_in_LR()
		# 	raiser_strength = raiser_bot.he.evaluation
		# 	regret = (raiser_evaluation - respective_evaluations[self._current_player.get_seat()]) / self.weighting_coefficient_regret_check
		# 	self._current_player.regret.update({'check': regret})