Exemple #1
0
        emulator.set_game_rule(player_num=n_players,
                               max_round=max_round,
                               small_blind_amount=20,
                               ante_amount=0)
        players_info = {
            "uuid-1": {
                "name": "player1",
                "stack": 1000
            },
            #"uuid-2": { "name": "player2", "stack": 1000 },
            quuid: {
                "name": "q-learning",
                "stack": 1000
            }
        }
        initial_state = emulator.generate_initial_game_state(players_info)
        game_state, events = emulator.start_new_round(initial_state)

        game_finish_state, events = emulator.run_until_game_finish(game_state)
        qlearner_player.clear_stack()
        #[{'uuid': 'uuid-3', 'stack': 789}]
        win_ratio_dict = {"uuid-1": 0, "uuid-2": 0, "uuid-3": 0, quuid: 0}
        final_stack_dict = {"uuid-1": 0, "uuid-2": 0, "uuid-3": 0, quuid: 0}
        rounds_occ = 0
        for event in events:
            if (event['type'] == "event_round_finish"):
                rounds_occ += 1
                for winner in event['winners']:
                    win_ratio_dict[winner['uuid']] += 1
        for player in events[-1]['players']:
            final_stack_dict[player['uuid']] = player['stack']
class EmulatorTest(BaseUnitTest):

    def setUp(self):
        self.emu = Emulator()

    def test_set_game_rule(self):
        self.emu.set_game_rule(2, 8, 5, 3)
        self.eq(2, self.emu.game_rule["player_num"])
        self.eq(8, self.emu.game_rule["max_round"])
        self.eq(5, self.emu.game_rule["sb_amount"])
        self.eq(3, self.emu.game_rule["ante"])

    def test_register_and_fetch_player(self):
        p1, p2 = FoldMan(), FoldMan()
        self.emu.register_player("uuid-1", p1)
        self.emu.register_player("uuid-2", p2)
        self.eq(p1, self.emu.fetch_player("uuid-1"))
        self.eq(p2, self.emu.fetch_player("uuid-2"))

    @raises(TypeError)
    def test_register_invalid_player(self):
        self.emu.register_player("uuid", "hoge")

    def test_blind_structure(self):
        game_state = restore_game_state(TwoPlayerSample.round_state)
        game_state = attach_hole_card_from_deck(game_state, "tojrbxmkuzrarnniosuhct")
        game_state = attach_hole_card_from_deck(game_state, "pwtwlmfciymjdoljkhagxa")
        self.emu.set_game_rule(2, 10, 5, 0)
        self.emu.set_blind_structure({5: { "ante": 5, "small_blind": 60 } })
        p1 = TestPlayer([("fold", 0), ('raise', 55), ('call', 0)])
        p2 = TestPlayer([("call", 15), ("call", 55), ('fold', 0)])
        self.emu.register_player("tojrbxmkuzrarnniosuhct", p1)
        self.emu.register_player("pwtwlmfciymjdoljkhagxa", p2)

        game_state, events = self.emu.run_until_round_finish(game_state)
        self.eq(65, game_state["table"].seats.players[0].stack)
        self.eq(135, game_state["table"].seats.players[1].stack)

        game_state, events = self.emu.start_new_round(game_state)
        game_state, events = self.emu.run_until_round_finish(game_state)
        self.eq(120, game_state["table"].seats.players[0].stack)
        self.eq(80, game_state["table"].seats.players[1].stack)

        game_state, events = self.emu.start_new_round(game_state)
        self.eq("event_game_finish", events[0]["type"])
        self.eq(0, game_state["table"].seats.players[0].stack)
        self.eq(80, game_state["table"].seats.players[1].stack)

    def test_blind_structure_update(self):
        self.emu.set_game_rule(2, 8, 5, 3)
        p1, p2 = FoldMan(), FoldMan()
        self.emu.register_player("uuid-1", p1)
        self.emu.register_player("uuid-2", p2)
        blind_structure = {
                3: { "ante": 5, "small_blind": 10 },
                5: {"ante": 10, "small_blind": 20 }
                }
        self.emu.set_blind_structure(blind_structure)
        players_info = {
                "uuid-1": { "name": "hoge", "stack": 100 },
                "uuid-2": { "name": "fuga", "stack": 100 }
                }
        state = self.emu.generate_initial_game_state(players_info)
        self.eq(5, state["small_blind_amount"])
        state, _ = self.emu.start_new_round(state)
        state, _ = self.emu.apply_action(state, "fold")
        self.eq(5, state["small_blind_amount"])
        state, _ = self.emu.apply_action(state, "fold")
        self.eq(5, state["small_blind_amount"])
        state, _ = self.emu.apply_action(state, "fold")
        self.eq(10, state["small_blind_amount"])
        state, _ = self.emu.apply_action(state, "fold")
        self.eq(10, state["small_blind_amount"])
        state, _ = self.emu.apply_action(state, "fold")
        self.eq(20, state["small_blind_amount"])
        state, _ = self.emu.apply_action(state, "fold")
        self.eq(20, state["small_blind_amount"])

    def test_apply_action(self):
        game_state = restore_game_state(TwoPlayerSample.round_state)
        game_state = attach_hole_card_from_deck(game_state, "tojrbxmkuzrarnniosuhct")
        game_state = attach_hole_card_from_deck(game_state, "pwtwlmfciymjdoljkhagxa")
        self.emu.set_game_rule(2, 10, 5, 0)
        p1, p2 = FoldMan(), FoldMan()
        self.emu.register_player("tojrbxmkuzrarnniosuhct", FoldMan())
        self.emu.register_player("pwtwlmfciymjdoljkhagxa", FoldMan())

        game_state, events = self.emu.apply_action(game_state, "call", 15)
        self.eq(Const.Street.RIVER, game_state["street"])
        self.eq(TwoPlayerSample.p1_action_histories, \
                game_state["table"].seats.players[0].round_action_histories[Const.Street.TURN])
        self.eq(2, len(events))
        self.eq("event_new_street", events[0]["type"])
        self.eq("event_ask_player", events[1]["type"])

        game_state, events = self.emu.apply_action(game_state, "call", 0)
        self.eq(1, len(events))
        self.eq("event_ask_player", events[0]["type"])

        game_state, events = self.emu.apply_action(game_state, "call", 0)
        self.eq(1, len(events))
        self.eq("event_round_finish", events[0]["type"])

    def test_apply_action_game_finish_detect(self):
        game_state = restore_game_state(TwoPlayerSample.round_state)
        game_state = attach_hole_card_from_deck(game_state, "tojrbxmkuzrarnniosuhct")
        game_state = attach_hole_card_from_deck(game_state, "pwtwlmfciymjdoljkhagxa")
        self.emu.set_game_rule(2, 3, 5, 0)
        p1, p2 = FoldMan(), FoldMan()
        self.emu.register_player("tojrbxmkuzrarnniosuhct", FoldMan())
        self.emu.register_player("pwtwlmfciymjdoljkhagxa", FoldMan())

        game_state, events = self.emu.apply_action(game_state, "fold")
        self.eq("event_game_finish", events[-1]["type"])

    def test_apply_action_start_next_round(self):
        game_state = restore_game_state(TwoPlayerSample.round_state)
        game_state = attach_hole_card_from_deck(game_state, "tojrbxmkuzrarnniosuhct")
        game_state = attach_hole_card_from_deck(game_state, "pwtwlmfciymjdoljkhagxa")
        self.emu.set_game_rule(2, 4, 5, 0)
        p1, p2 = FoldMan(), FoldMan()
        self.emu.register_player("tojrbxmkuzrarnniosuhct", FoldMan())
        self.emu.register_player("pwtwlmfciymjdoljkhagxa", FoldMan())

        game_state, events = self.emu.apply_action(game_state, "fold")
        self.eq(120, game_state["table"].seats.players[0].stack)
        self.eq(80, game_state["table"].seats.players[1].stack)

        game_state, events = self.emu.apply_action(game_state, "raise", 20)
        self.eq("event_ask_player", events[-1]["type"])
        self.eq(100, game_state["table"].seats.players[0].stack)
        self.eq(70, game_state["table"].seats.players[1].stack)

    @raises(Exception)
    def test_apply_action_when_game_finished(self):
        game_state = restore_game_state(TwoPlayerSample.round_state)
        game_state = attach_hole_card_from_deck(game_state, "tojrbxmkuzrarnniosuhct")
        game_state = attach_hole_card_from_deck(game_state, "pwtwlmfciymjdoljkhagxa")
        self.emu.set_game_rule(2, 3, 5, 0)
        p1, p2 = FoldMan(), FoldMan()
        self.emu.register_player("tojrbxmkuzrarnniosuhct", FoldMan())
        self.emu.register_player("pwtwlmfciymjdoljkhagxa", FoldMan())

        game_state, events = self.emu.apply_action(game_state, "fold")
        self.emu.apply_action(game_state, "fold")


    def test_run_until_round_finish(self):
        game_state = restore_game_state(TwoPlayerSample.round_state)
        game_state = attach_hole_card_from_deck(game_state, "tojrbxmkuzrarnniosuhct")
        game_state = attach_hole_card_from_deck(game_state, "pwtwlmfciymjdoljkhagxa")
        self.emu.set_game_rule(2, 10, 5, 0)
        p1 = TestPlayer([("fold", 0)])
        p2 = TestPlayer([("call", 15)])
        self.emu.register_player("tojrbxmkuzrarnniosuhct", p1)
        self.emu.register_player("pwtwlmfciymjdoljkhagxa", p2)

        game_state, events = self.emu.run_until_round_finish(game_state)
        self.eq("event_new_street", events[0]["type"])
        self.eq("event_ask_player", events[1]["type"])
        self.eq("event_round_finish", events[2]["type"])

    def test_run_until_round_finish_when_already_finished(self):
        game_state = restore_game_state(TwoPlayerSample.round_state)
        game_state = attach_hole_card_from_deck(game_state, "tojrbxmkuzrarnniosuhct")
        game_state = attach_hole_card_from_deck(game_state, "pwtwlmfciymjdoljkhagxa")
        self.emu.set_game_rule(2, 10, 5, 0)
        p1 = TestPlayer([("fold", 0)])
        p2 = TestPlayer([("call", 15)])
        self.emu.register_player("tojrbxmkuzrarnniosuhct", p1)
        self.emu.register_player("pwtwlmfciymjdoljkhagxa", p2)
        game_state, events = self.emu.run_until_round_finish(game_state)
        game_state, events = self.emu.run_until_round_finish(game_state)
        self.eq(0, len(events))

    def test_run_until_round_finish_game_finish_detect(self):
        uuids = ["tojrbxmkuzrarnniosuhct", "pwtwlmfciymjdoljkhagxa"]
        holecards = [[Card.from_str(s) for s in ss] for ss in [["CA", "D2"], ["C8", "H5"]]]
        game_state = restore_game_state(TwoPlayerSample.round_state)
        game_state = reduce(lambda a,e: attach_hole_card(a, e[0], e[1]), zip(uuids, holecards), game_state)
        game_state = attach_hole_card_from_deck(game_state, "pwtwlmfciymjdoljkhagxa")
        self.emu.set_game_rule(2, 10, 5, 0)
        p1 = TestPlayer([("raise", 65)])
        p2 = TestPlayer([("call", 15), ("call", 65)])
        self.emu.register_player("tojrbxmkuzrarnniosuhct", p1)
        self.emu.register_player("pwtwlmfciymjdoljkhagxa", p2)
        game_state["table"].deck.deck.append(Card.from_str("C7"))

        game_state, events = self.emu.run_until_round_finish(game_state)
        self.eq("event_new_street", events[0]["type"])
        self.eq("event_ask_player", events[1]["type"])
        self.eq("event_ask_player", events[2]["type"])
        self.eq("event_round_finish", events[3]["type"])
        self.eq("event_game_finish", events[4]["type"])
        self.eq(0, events[4]["players"][0]["stack"])
        self.eq(200, events[4]["players"][1]["stack"])

    def test_run_until_game_finish(self):
        game_state = restore_game_state(TwoPlayerSample.round_state)
        game_state = attach_hole_card_from_deck(game_state, "tojrbxmkuzrarnniosuhct")
        game_state = attach_hole_card_from_deck(game_state, "pwtwlmfciymjdoljkhagxa")
        self.emu.set_game_rule(2, 10, 5, 1)
        self.emu.register_player("tojrbxmkuzrarnniosuhct", FoldMan())
        self.emu.register_player("pwtwlmfciymjdoljkhagxa", FoldMan())

        game_state, events = self.emu.run_until_game_finish(game_state)
        self.eq("event_game_finish", events[-1]["type"])
        self.eq(114, game_state["table"].seats.players[0].stack)
        self.eq(86, game_state["table"].seats.players[1].stack)

    def test_run_until_game_finish_when_one_player_is_left(self):
        uuids = ["ruypwwoqwuwdnauiwpefsw", "sqmfwdkpcoagzqxpxnmxwm", "uxrdiwvctvilasinweqven"]
        holecards = [[Card.from_str(s) for s in ss] for ss in [["C2","C3"],["HA","CA"],["D5","H6"]]]
        game_state = restore_game_state(ThreePlayerGameStateSample.round_state)
        game_state = reduce(lambda state, item: attach_hole_card(state, item[0], item[1]), zip(uuids, holecards), game_state)
        sb_amount, ante = 5, 7
        self.emu.set_game_rule(3, 10, sb_amount, ante)
        p1_acts = [("fold",0), ("call", 10), ('call', 0), ('call', 10), ("fold",0)]
        p2_acts = []
        p3_acts = [("raise", 10)]
        players = [TestPlayer(acts) for acts in [p1_acts, p2_acts, p3_acts]]
        [self.emu.register_player(uuid, player) for uuid, player in zip(uuids, players)]
        game_state["table"].deck.deck.append(Card.from_str("C7"))
        game_state, events = self.emu.run_until_game_finish(game_state)
        self.eq("event_game_finish", events[-1]["type"])
        self.eq(0, game_state["table"].seats.players[0].stack)
        self.eq(0, game_state["table"].seats.players[1].stack)
        self.eq(292, game_state["table"].seats.players[2].stack)

    def test_run_until_game_finish_when_final_round(self):
        uuids = ["ruypwwoqwuwdnauiwpefsw", "sqmfwdkpcoagzqxpxnmxwm", "uxrdiwvctvilasinweqven"]
        holecards = [[Card.from_str(s) for s in ss] for ss in [["C2","C3"],["HA","CA"],["D5","H6"]]]
        game_state = restore_game_state(ThreePlayerGameStateSample.round_state)
        game_state = reduce(lambda state, item: attach_hole_card(state, item[0], item[1]), zip(uuids, holecards), game_state)
        sb_amount, ante = 5, 7
        self.emu.set_game_rule(3, 10, sb_amount, ante)
        [self.emu.register_player(uuid, FoldMan()) for uuid in uuids]
        game_state["table"].deck.deck.append(Card.from_str("C7"))
        game_state, events = self.emu.run_until_game_finish(game_state)
        self.eq("event_game_finish", events[-1]["type"])
        self.eq(10, game_state["round_count"])
        self.eq(35, game_state["table"].seats.players[0].stack)
        self.eq(0, game_state["table"].seats.players[1].stack)
        self.eq(265, game_state["table"].seats.players[2].stack)

    def test_last_round_judge(self):
        game_state = restore_game_state(TwoPlayerSample.round_state)
        self.emu.set_game_rule(2, 3, 5, 0)
        self.false(self.emu._is_last_round(game_state, self.emu.game_rule))
        game_state["street"] = Const.Street.FINISHED
        self.true(self.emu._is_last_round(game_state, self.emu.game_rule))
        game_state["round_count"] = 2
        self.false(self.emu._is_last_round(game_state, self.emu.game_rule))
        game_state["table"].seats.players[0].stack = 0
        self.true(self.emu._is_last_round(game_state, self.emu.game_rule))

    def test_start_new_round(self):
        game_state = restore_game_state(TwoPlayerSample.round_state)
        game_state = attach_hole_card_from_deck(game_state, "tojrbxmkuzrarnniosuhct")
        game_state = attach_hole_card_from_deck(game_state, "pwtwlmfciymjdoljkhagxa")
        p1, p2 = FoldMan(), FoldMan()
        self.emu.set_game_rule(2, 10, 5, 0)
        self.emu.register_player("tojrbxmkuzrarnniosuhct", FoldMan())
        self.emu.register_player("pwtwlmfciymjdoljkhagxa", FoldMan())

        # run until round finish
        game_state, event = self.emu.apply_action(game_state, "call", 15)
        game_state, event = self.emu.apply_action(game_state, "call", 0)
        game_state, event = self.emu.apply_action(game_state, "call", 0)

        game_state, events = self.emu.start_new_round(game_state)
        self.eq(4, game_state["round_count"])
        self.eq(1, game_state["table"].dealer_btn)
        self.eq(0, game_state["street"])
        self.eq(0, game_state["next_player"])
        self.eq("event_new_street", events[0]["type"])
        self.eq("event_ask_player", events[1]["type"])
        self.eq("preflop", events[0]["street"])
        self.eq("tojrbxmkuzrarnniosuhct", events[1]["uuid"])

    def test_start_new_round_exclude_no_money_players(self):
        uuids = ["ruypwwoqwuwdnauiwpefsw", "sqmfwdkpcoagzqxpxnmxwm", "uxrdiwvctvilasinweqven"]
        game_state = restore_game_state(ThreePlayerGameStateSample.round_state)
        original = reduce(lambda state, uuid: attach_hole_card_from_deck(state, uuid), uuids, game_state)
        sb_amount, ante = 5, 7
        self.emu.set_game_rule(3, 10, sb_amount, ante)
        [self.emu.register_player(uuid, FoldMan()) for uuid in uuids]

        # case1: second player cannot pay small blind
        finish_state, events = self.emu.apply_action(original, "fold")
        finish_state["table"].seats.players[0].stack = 11
        stacks = [p.stack for p in finish_state["table"].seats.players]
        game_state, events = self.emu.start_new_round(finish_state)
        self.eq(2, game_state["table"].dealer_btn)
        self.eq(1, game_state["next_player"])
        self.eq(stacks[1]-sb_amount-ante, game_state["table"].seats.players[1].stack)
        self.eq(stacks[2]-sb_amount*2-ante, game_state["table"].seats.players[2].stack)
        self.eq(PayInfo.FOLDED, game_state["table"].seats.players[0].pay_info.status)
        self.eq(sb_amount*3 + ante*2, GameEvaluator.create_pot(game_state["table"].seats.players)[0]["amount"])

        # case2: third player cannot pay big blind
        finish_state, events = self.emu.apply_action(original, "fold")
        finish_state["table"].seats.players[1].stack = 16
        stacks = [p.stack for p in finish_state["table"].seats.players]
        game_state, events = self.emu.start_new_round(finish_state)
        self.eq(2, game_state["table"].dealer_btn)
        self.eq(0, game_state["next_player"])
        self.eq(stacks[0]-sb_amount-ante, game_state["table"].seats.players[0].stack)
        self.eq(stacks[2]-sb_amount*2-ante, game_state["table"].seats.players[2].stack)
        self.eq(PayInfo.FOLDED, game_state["table"].seats.players[1].pay_info.status)
        self.eq(PayInfo.PAY_TILL_END, game_state["table"].seats.players[0].pay_info.status)
        self.eq(sb_amount*3 + ante*2, GameEvaluator.create_pot(game_state["table"].seats.players)[0]["amount"])

    def test_start_new_round_exclude_no_money_players2(self):
        uuids = ["ruypwwoqwuwdnauiwpefsw", "sqmfwdkpcoagzqxpxnmxwm", "uxrdiwvctvilasinweqven"]
        game_state = restore_game_state(ThreePlayerGameStateSample.round_state)
        original = reduce(lambda state, uuid: attach_hole_card_from_deck(state, uuid), uuids, game_state)
        sb_amount, ante = 5, 7
        self.emu.set_game_rule(3, 10, sb_amount, ante)
        [self.emu.register_player(uuid, FoldMan()) for uuid in uuids]

        # case1: second player cannot pay small blind
        finish_state, events = self.emu.apply_action(original, "fold")
        finish_state["table"].seats.players[2].stack = 6
        stacks = [p.stack for p in finish_state["table"].seats.players]
        game_state, events = self.emu.start_new_round(finish_state)
        self.eq(0, game_state["table"].dealer_btn)
        self.eq(1, game_state["table"].sb_pos())
        self.eq(1, game_state["next_player"])


    def test_start_new_round_game_finish_judge(self):
        uuids = ["ruypwwoqwuwdnauiwpefsw", "sqmfwdkpcoagzqxpxnmxwm", "uxrdiwvctvilasinweqven"]
        game_state = restore_game_state(ThreePlayerGameStateSample.round_state)
        original = reduce(lambda state, uuid: attach_hole_card_from_deck(state, uuid), uuids, game_state)
        sb_amount, ante = 5, 7
        self.emu.set_game_rule(3, 10, sb_amount, ante)
        [self.emu.register_player(uuid, FoldMan()) for uuid in uuids]

        finish_state, events = self.emu.apply_action(original, "fold")
        finish_state["table"].seats.players[2].stack = 11
        finish_state["table"].seats.players[1].stack = 16
        game_state, events = self.emu.start_new_round(finish_state)
        self.eq(1, len(events))
        self.eq("event_game_finish", events[0]["type"])

    def test_generate_initial_game_state(self):
        self.emu.set_game_rule(2, 8, 5, 3)
        p1, p2 = FoldMan(), FoldMan()

        players_info = OrderedDict()
        players_info["uuid-1"] = { "name": "hoge", "stack": 100 }
        players_info["uuid-2"] = { "name": "fuga", "stack": 100 }
        state = self.emu.generate_initial_game_state(players_info)
        table = state["table"]
        self.eq(0, state["round_count"])
        self.eq(5, state["small_blind_amount"])
        self.eq(100, table.seats.players[0].stack)
        self.eq("uuid-1", table.seats.players[0].uuid)
        self.eq(100, table.seats.players[1].stack)
        self.eq("uuid-2", table.seats.players[1].uuid)
        self.eq(1, table.dealer_btn)

        state, events = self.emu.start_new_round(state)
        self.eq(0, state["table"].dealer_btn)
        self.eq(1, state["table"].sb_pos())
        self.eq(0, state["table"].bb_pos())
        self.eq(1, state["next_player"])
        state, events = self.emu.apply_action(state, "call", 10)
        self.eq(1, state["next_player"])

    def test_generate_possible_actions(self):
        state1 = restore_game_state(TwoPlayerSample.round_state)
        self.eq(TwoPlayerSample.valid_actions, self.emu.generate_possible_actions(state1))
        state2 = restore_game_state(ThreePlayerGameStateSample.round_state)
        self.eq(ThreePlayerGameStateSample.valid_actions, self.emu.generate_possible_actions(state2))
Exemple #3
0
class PushFoldEmulator:
    def __init__(self, starting_stack, small_blind):
        self.pok = PokerUtils()
        self.starting_stack = starting_stack
        self.small_blind = 10
        self.emulator = Emulator()
        self.emulator.set_game_rule(player_num=2,
                                    max_round=10,
                                    small_blind_amount=small_blind,
                                    ante_amount=0)

        self.hole_cards = {}
        self.players_info = {
            "bb_player": {
                "name": "bb_player",
                "stack": starting_stack
            },
            "sb_player": {
                "name": "sb_player",
                "stack": starting_stack
            },
        }

        self.initial_game_state = self.emulator.generate_initial_game_state(
            self.players_info)

        self.players_cards = [np.zeros(13), np.zeros(13)]
        self.suited = [0, 0]
        self.street = 'preflop'
        self.events = []
        self.game_state = []

    def is_round_finished(self):
        for e in self.events:
            if (e['type'] == 'event_round_finish'):
                return True
        return False

    def new_street(self):

        for e in self.events:
            if (e['type'] == 'event_new_street'):
                self.street = e['street']
                return self.street
        return False

    def save_cards(self):
        for player in self.game_state['table'].seats.players:
            self.hole_cards[player.uuid] = [
                card.__str__() for card in player.hole_card
            ]

    def get_hand_feature(self):
        if (self.street == 'preflop'):
            self.save_cards()
            for i in range(2):
                self.players_cards[i][self.pok.get_card_value_index(
                    self.game_state['table'].seats.players[i].hole_card[0].
                    __str__())] = 1
                self.players_cards[i][self.pok.get_card_value_index(
                    self.game_state['table'].seats.players[i].hole_card[1].
                    __str__())] = 1

                if self.pok.is_suited([
                        self.game_state['table'].seats.players[i].hole_card[0].
                        __str__(), self.game_state['table'].seats.players[i].
                        hole_card[1].__str__()
                ]):
                    self.suited[i] = 1
                else:
                    self.suited[i] = 0

    def get_all_in_amount(self):
        for e in self.events:
            if (e['type'] == 'event_ask_player'):
                return e['valid_actions'][2]['amount']['max']

    def get_call_amount(self):
        for e in self.events:
            if (e['type'] == 'event_ask_player'):
                return e['valid_actions'][1]['amount']

    def get_sb_reward(self):
        for e in self.events:
            if (e['type'] == 'event_round_finish'):
                if (e['winners'][0]['uuid'] == 'sb_player'):
                    return (e['winners'][0]['stack'] - self.starting_stack)
                else:
                    return -(e['winners'][0]['stack'] - self.starting_stack)

    def play_action(self, action):

        if (action == 0):
            self.game_state, self.events = self.emulator.apply_action(
                self.game_state, 'fold', 0)
        elif (action == 1):
            if self.get_all_in_amount() == -1:
                self.game_state, self.events = self.emulator.apply_action(
                    self.game_state, 'call', self.get_call_amount())
            else:
                self.game_state, self.events = self.emulator.apply_action(
                    self.game_state, 'raise', self.get_all_in_amount())

    def new_hand(self, starting_stack):
        self.initial_game_state = self.emulator.generate_initial_game_state(
            self.players_info)

        self.street = 'preflop'
        self.hole_cards = {}
        self.starting_stack = starting_stack
        self.players_info = {
            "bb_player": {
                "name": "bb_player",
                "stack": starting_stack
            },
            "sb_player": {
                "name": "sb_player",
                "stack": starting_stack
            },
        }

        self.initial_game_state = self.emulator.generate_initial_game_state(
            self.players_info)
        self.game_state, self.events = self.emulator.start_new_round(
            self.initial_game_state)

        self.players_cards = [np.zeros(13), np.zeros(13)]

        self.get_hand_feature()

    def get_action_histories_text(self, hole_cards=False):
        if (hole_cards == True):
            print(self.hole_cards)

        histo = self.events[0]['round_state']['action_histories']
        hand_text = ""
        for k, v in histo.items():
            if (len(v) > 0):
                hand_text += k + '\n'
                for a in v:
                    if (a['action'] == 'RAISE'):
                        hand_text += a['uuid'] + \
                            ' raises to ' + str(a['amount']) + '\n'
                    elif (a['action'] == 'FOLD'):
                        hand_text += a['uuid'] + ' folds\n'
                    elif (a['action'] == 'CALL' and a['amount'] == 0):
                        hand_text += a['uuid'] + ' checks\n'
                    else:
                        hand_text += a['uuid'] + ' ' + \
                            a['action'] + ' ' + str(a['amount']) + '\n'

        return hand_text
class EmulatorTest(BaseUnitTest):
    def setUp(self):
        self.emu = Emulator()

    def test_set_game_rule(self):
        self.emu.set_game_rule(2, 8, 5, 3)
        self.eq(2, self.emu.game_rule["player_num"])
        self.eq(8, self.emu.game_rule["max_round"])
        self.eq(5, self.emu.game_rule["sb_amount"])
        self.eq(3, self.emu.game_rule["ante"])

    def test_register_and_fetch_player(self):
        p1, p2 = FoldMan(), FoldMan()
        self.emu.register_player("uuid-1", p1)
        self.emu.register_player("uuid-2", p2)
        self.eq(p1, self.emu.fetch_player("uuid-1"))
        self.eq(p2, self.emu.fetch_player("uuid-2"))

    @raises(TypeError)
    def test_register_invalid_player(self):
        self.emu.register_player("uuid", "hoge")

    def test_blind_structure(self):
        game_state = restore_game_state(TwoPlayerSample.round_state)
        game_state = attach_hole_card_from_deck(game_state,
                                                "tojrbxmkuzrarnniosuhct")
        game_state = attach_hole_card_from_deck(game_state,
                                                "pwtwlmfciymjdoljkhagxa")
        self.emu.set_game_rule(2, 10, 5, 0)
        self.emu.set_blind_structure({5: {"ante": 5, "small_blind": 60}})
        p1 = TestPlayer([("fold", 0), ('raise', 55), ('call', 0)])
        p2 = TestPlayer([("call", 15), ("call", 55), ('fold', 0)])
        self.emu.register_player("tojrbxmkuzrarnniosuhct", p1)
        self.emu.register_player("pwtwlmfciymjdoljkhagxa", p2)

        game_state, events = self.emu.run_until_round_finish(game_state)
        self.eq(65, game_state["table"].seats.players[0].stack)
        self.eq(135, game_state["table"].seats.players[1].stack)

        game_state, events = self.emu.start_new_round(game_state)
        game_state, events = self.emu.run_until_round_finish(game_state)
        self.eq(120, game_state["table"].seats.players[0].stack)
        self.eq(80, game_state["table"].seats.players[1].stack)

        game_state, events = self.emu.start_new_round(game_state)
        self.eq("event_game_finish", events[0]["type"])
        self.eq(0, game_state["table"].seats.players[0].stack)
        self.eq(80, game_state["table"].seats.players[1].stack)

    def test_blind_structure_update(self):
        self.emu.set_game_rule(2, 8, 5, 3)
        p1, p2 = FoldMan(), FoldMan()
        self.emu.register_player("uuid-1", p1)
        self.emu.register_player("uuid-2", p2)
        blind_structure = {
            3: {
                "ante": 5,
                "small_blind": 10
            },
            5: {
                "ante": 10,
                "small_blind": 20
            }
        }
        self.emu.set_blind_structure(blind_structure)
        players_info = {
            "uuid-1": {
                "name": "hoge",
                "stack": 100
            },
            "uuid-2": {
                "name": "fuga",
                "stack": 100
            }
        }
        state = self.emu.generate_initial_game_state(players_info)
        self.eq(5, state["small_blind_amount"])
        state, _ = self.emu.start_new_round(state)
        state, _ = self.emu.apply_action(state, "fold")
        self.eq(5, state["small_blind_amount"])
        state, _ = self.emu.apply_action(state, "fold")
        self.eq(5, state["small_blind_amount"])
        state, _ = self.emu.apply_action(state, "fold")
        self.eq(10, state["small_blind_amount"])
        state, _ = self.emu.apply_action(state, "fold")
        self.eq(10, state["small_blind_amount"])
        state, _ = self.emu.apply_action(state, "fold")
        self.eq(20, state["small_blind_amount"])
        state, _ = self.emu.apply_action(state, "fold")
        self.eq(20, state["small_blind_amount"])

    def test_apply_action(self):
        game_state = restore_game_state(TwoPlayerSample.round_state)
        game_state = attach_hole_card_from_deck(game_state,
                                                "tojrbxmkuzrarnniosuhct")
        game_state = attach_hole_card_from_deck(game_state,
                                                "pwtwlmfciymjdoljkhagxa")
        self.emu.set_game_rule(2, 10, 5, 0)
        p1, p2 = FoldMan(), FoldMan()
        self.emu.register_player("tojrbxmkuzrarnniosuhct", FoldMan())
        self.emu.register_player("pwtwlmfciymjdoljkhagxa", FoldMan())

        game_state, events = self.emu.apply_action(game_state, "call", 15)
        self.eq(Const.Street.RIVER, game_state["street"])
        self.eq(TwoPlayerSample.p1_action_histories, \
                game_state["table"].seats.players[0].round_action_histories[Const.Street.TURN])
        self.eq(2, len(events))
        self.eq("event_new_street", events[0]["type"])
        self.eq("event_ask_player", events[1]["type"])

        game_state, events = self.emu.apply_action(game_state, "call", 0)
        self.eq(1, len(events))
        self.eq("event_ask_player", events[0]["type"])

        game_state, events = self.emu.apply_action(game_state, "call", 0)
        self.eq(1, len(events))
        self.eq("event_round_finish", events[0]["type"])

    def test_apply_action_game_finish_detect(self):
        game_state = restore_game_state(TwoPlayerSample.round_state)
        game_state = attach_hole_card_from_deck(game_state,
                                                "tojrbxmkuzrarnniosuhct")
        game_state = attach_hole_card_from_deck(game_state,
                                                "pwtwlmfciymjdoljkhagxa")
        self.emu.set_game_rule(2, 3, 5, 0)
        p1, p2 = FoldMan(), FoldMan()
        self.emu.register_player("tojrbxmkuzrarnniosuhct", FoldMan())
        self.emu.register_player("pwtwlmfciymjdoljkhagxa", FoldMan())

        game_state, events = self.emu.apply_action(game_state, "fold")
        self.eq("event_game_finish", events[-1]["type"])

    def test_apply_action_start_next_round(self):
        game_state = restore_game_state(TwoPlayerSample.round_state)
        game_state = attach_hole_card_from_deck(game_state,
                                                "tojrbxmkuzrarnniosuhct")
        game_state = attach_hole_card_from_deck(game_state,
                                                "pwtwlmfciymjdoljkhagxa")
        self.emu.set_game_rule(2, 4, 5, 0)
        p1, p2 = FoldMan(), FoldMan()
        self.emu.register_player("tojrbxmkuzrarnniosuhct", FoldMan())
        self.emu.register_player("pwtwlmfciymjdoljkhagxa", FoldMan())

        game_state, events = self.emu.apply_action(game_state, "fold")
        self.eq(120, game_state["table"].seats.players[0].stack)
        self.eq(80, game_state["table"].seats.players[1].stack)

        game_state, events = self.emu.apply_action(game_state, "raise", 20)
        self.eq("event_ask_player", events[-1]["type"])
        self.eq(100, game_state["table"].seats.players[0].stack)
        self.eq(70, game_state["table"].seats.players[1].stack)

    @raises(Exception)
    def test_apply_action_when_game_finished(self):
        game_state = restore_game_state(TwoPlayerSample.round_state)
        game_state = attach_hole_card_from_deck(game_state,
                                                "tojrbxmkuzrarnniosuhct")
        game_state = attach_hole_card_from_deck(game_state,
                                                "pwtwlmfciymjdoljkhagxa")
        self.emu.set_game_rule(2, 3, 5, 0)
        p1, p2 = FoldMan(), FoldMan()
        self.emu.register_player("tojrbxmkuzrarnniosuhct", FoldMan())
        self.emu.register_player("pwtwlmfciymjdoljkhagxa", FoldMan())

        game_state, events = self.emu.apply_action(game_state, "fold")
        self.emu.apply_action(game_state, "fold")

    def test_run_until_round_finish(self):
        game_state = restore_game_state(TwoPlayerSample.round_state)
        game_state = attach_hole_card_from_deck(game_state,
                                                "tojrbxmkuzrarnniosuhct")
        game_state = attach_hole_card_from_deck(game_state,
                                                "pwtwlmfciymjdoljkhagxa")
        self.emu.set_game_rule(2, 10, 5, 0)
        p1 = TestPlayer([("fold", 0)])
        p2 = TestPlayer([("call", 15)])
        self.emu.register_player("tojrbxmkuzrarnniosuhct", p1)
        self.emu.register_player("pwtwlmfciymjdoljkhagxa", p2)

        game_state, events = self.emu.run_until_round_finish(game_state)
        self.eq("event_new_street", events[0]["type"])
        self.eq("event_ask_player", events[1]["type"])
        self.eq("event_round_finish", events[2]["type"])

    def test_run_until_round_finish_when_already_finished(self):
        game_state = restore_game_state(TwoPlayerSample.round_state)
        game_state = attach_hole_card_from_deck(game_state,
                                                "tojrbxmkuzrarnniosuhct")
        game_state = attach_hole_card_from_deck(game_state,
                                                "pwtwlmfciymjdoljkhagxa")
        self.emu.set_game_rule(2, 10, 5, 0)
        p1 = TestPlayer([("fold", 0)])
        p2 = TestPlayer([("call", 15)])
        self.emu.register_player("tojrbxmkuzrarnniosuhct", p1)
        self.emu.register_player("pwtwlmfciymjdoljkhagxa", p2)
        game_state, events = self.emu.run_until_round_finish(game_state)
        game_state, events = self.emu.run_until_round_finish(game_state)
        self.eq(0, len(events))

    def test_run_until_round_finish_game_finish_detect(self):
        uuids = ["tojrbxmkuzrarnniosuhct", "pwtwlmfciymjdoljkhagxa"]
        holecards = [[Card.from_str(s) for s in ss]
                     for ss in [["CA", "D2"], ["C8", "H5"]]]
        game_state = restore_game_state(TwoPlayerSample.round_state)
        game_state = reduce(lambda a, e: attach_hole_card(a, e[0], e[1]),
                            zip(uuids, holecards), game_state)
        game_state = attach_hole_card_from_deck(game_state,
                                                "pwtwlmfciymjdoljkhagxa")
        self.emu.set_game_rule(2, 10, 5, 0)
        p1 = TestPlayer([("raise", 65)])
        p2 = TestPlayer([("call", 15), ("call", 65)])
        self.emu.register_player("tojrbxmkuzrarnniosuhct", p1)
        self.emu.register_player("pwtwlmfciymjdoljkhagxa", p2)
        game_state["table"].deck.deck.append(Card.from_str("C7"))

        game_state, events = self.emu.run_until_round_finish(game_state)
        self.eq("event_new_street", events[0]["type"])
        self.eq("event_ask_player", events[1]["type"])
        self.eq("event_ask_player", events[2]["type"])
        self.eq("event_round_finish", events[3]["type"])
        self.eq("event_game_finish", events[4]["type"])
        self.eq(0, events[4]["players"][0]["stack"])
        self.eq(200, events[4]["players"][1]["stack"])

    def test_run_until_game_finish(self):
        game_state = restore_game_state(TwoPlayerSample.round_state)
        game_state = attach_hole_card_from_deck(game_state,
                                                "tojrbxmkuzrarnniosuhct")
        game_state = attach_hole_card_from_deck(game_state,
                                                "pwtwlmfciymjdoljkhagxa")
        self.emu.set_game_rule(2, 10, 5, 1)
        self.emu.register_player("tojrbxmkuzrarnniosuhct", FoldMan())
        self.emu.register_player("pwtwlmfciymjdoljkhagxa", FoldMan())

        game_state, events = self.emu.run_until_game_finish(game_state)
        self.eq("event_game_finish", events[-1]["type"])
        self.eq(114, game_state["table"].seats.players[0].stack)
        self.eq(86, game_state["table"].seats.players[1].stack)

    def test_run_until_game_finish_when_one_player_is_left(self):
        uuids = [
            "ruypwwoqwuwdnauiwpefsw", "sqmfwdkpcoagzqxpxnmxwm",
            "uxrdiwvctvilasinweqven"
        ]
        holecards = [[Card.from_str(s) for s in ss]
                     for ss in [["C2", "C3"], ["HA", "CA"], ["D5", "H6"]]]
        game_state = restore_game_state(ThreePlayerGameStateSample.round_state)
        game_state = reduce(
            lambda state, item: attach_hole_card(state, item[0], item[1]),
            zip(uuids, holecards), game_state)
        sb_amount, ante = 5, 7
        self.emu.set_game_rule(3, 10, sb_amount, ante)
        p1_acts = [("fold", 0), ("call", 10), ('call', 0), ('call', 10),
                   ("fold", 0)]
        p2_acts = []
        p3_acts = [("raise", 10)]
        players = [TestPlayer(acts) for acts in [p1_acts, p2_acts, p3_acts]]
        [
            self.emu.register_player(uuid, player)
            for uuid, player in zip(uuids, players)
        ]
        game_state["table"].deck.deck.append(Card.from_str("C7"))
        game_state, events = self.emu.run_until_game_finish(game_state)
        self.eq("event_game_finish", events[-1]["type"])
        self.eq(0, game_state["table"].seats.players[0].stack)
        self.eq(0, game_state["table"].seats.players[1].stack)
        self.eq(292, game_state["table"].seats.players[2].stack)

    def test_run_until_game_finish_when_final_round(self):
        uuids = [
            "ruypwwoqwuwdnauiwpefsw", "sqmfwdkpcoagzqxpxnmxwm",
            "uxrdiwvctvilasinweqven"
        ]
        holecards = [[Card.from_str(s) for s in ss]
                     for ss in [["C2", "C3"], ["HA", "CA"], ["D5", "H6"]]]
        game_state = restore_game_state(ThreePlayerGameStateSample.round_state)
        game_state = reduce(
            lambda state, item: attach_hole_card(state, item[0], item[1]),
            zip(uuids, holecards), game_state)
        sb_amount, ante = 5, 7
        self.emu.set_game_rule(3, 10, sb_amount, ante)
        [self.emu.register_player(uuid, FoldMan()) for uuid in uuids]
        game_state["table"].deck.deck.append(Card.from_str("C7"))
        game_state, events = self.emu.run_until_game_finish(game_state)
        self.eq("event_game_finish", events[-1]["type"])
        self.eq(10, game_state["round_count"])
        self.eq(35, game_state["table"].seats.players[0].stack)
        self.eq(0, game_state["table"].seats.players[1].stack)
        self.eq(265, game_state["table"].seats.players[2].stack)

    def test_last_round_judge(self):
        game_state = restore_game_state(TwoPlayerSample.round_state)
        self.emu.set_game_rule(2, 3, 5, 0)
        self.false(self.emu._is_last_round(game_state, self.emu.game_rule))
        game_state["street"] = Const.Street.FINISHED
        self.true(self.emu._is_last_round(game_state, self.emu.game_rule))
        game_state["round_count"] = 2
        self.false(self.emu._is_last_round(game_state, self.emu.game_rule))
        game_state["table"].seats.players[0].stack = 0
        self.true(self.emu._is_last_round(game_state, self.emu.game_rule))

    def test_start_new_round(self):
        game_state = restore_game_state(TwoPlayerSample.round_state)
        game_state = attach_hole_card_from_deck(game_state,
                                                "tojrbxmkuzrarnniosuhct")
        game_state = attach_hole_card_from_deck(game_state,
                                                "pwtwlmfciymjdoljkhagxa")
        p1, p2 = FoldMan(), FoldMan()
        self.emu.set_game_rule(2, 10, 5, 0)
        self.emu.register_player("tojrbxmkuzrarnniosuhct", FoldMan())
        self.emu.register_player("pwtwlmfciymjdoljkhagxa", FoldMan())

        # run until round finish
        game_state, event = self.emu.apply_action(game_state, "call", 15)
        game_state, event = self.emu.apply_action(game_state, "call", 0)
        game_state, event = self.emu.apply_action(game_state, "call", 0)

        game_state, events = self.emu.start_new_round(game_state)
        self.eq(4, game_state["round_count"])
        self.eq(1, game_state["table"].dealer_btn)
        self.eq(0, game_state["street"])
        self.eq(0, game_state["next_player"])
        self.eq("event_new_street", events[0]["type"])
        self.eq("event_ask_player", events[1]["type"])
        self.eq("preflop", events[0]["street"])
        self.eq("tojrbxmkuzrarnniosuhct", events[1]["uuid"])

    def test_start_new_round_exclude_no_money_players(self):
        uuids = [
            "ruypwwoqwuwdnauiwpefsw", "sqmfwdkpcoagzqxpxnmxwm",
            "uxrdiwvctvilasinweqven"
        ]
        game_state = restore_game_state(ThreePlayerGameStateSample.round_state)
        original = reduce(
            lambda state, uuid: attach_hole_card_from_deck(state, uuid), uuids,
            game_state)
        sb_amount, ante = 5, 7
        self.emu.set_game_rule(3, 10, sb_amount, ante)
        [self.emu.register_player(uuid, FoldMan()) for uuid in uuids]

        # case1: second player cannot pay small blind
        finish_state, events = self.emu.apply_action(original, "fold")
        finish_state["table"].seats.players[0].stack = 11
        stacks = [p.stack for p in finish_state["table"].seats.players]
        game_state, events = self.emu.start_new_round(finish_state)
        self.eq(2, game_state["table"].dealer_btn)
        self.eq(1, game_state["next_player"])
        self.eq(stacks[1] - sb_amount - ante,
                game_state["table"].seats.players[1].stack)
        self.eq(stacks[2] - sb_amount * 2 - ante,
                game_state["table"].seats.players[2].stack)
        self.eq(PayInfo.FOLDED,
                game_state["table"].seats.players[0].pay_info.status)
        self.eq(
            sb_amount * 3 + ante * 2,
            GameEvaluator.create_pot(
                game_state["table"].seats.players)[0]["amount"])

        # case2: third player cannot pay big blind
        finish_state, events = self.emu.apply_action(original, "fold")
        finish_state["table"].seats.players[1].stack = 16
        stacks = [p.stack for p in finish_state["table"].seats.players]
        game_state, events = self.emu.start_new_round(finish_state)
        self.eq(2, game_state["table"].dealer_btn)
        self.eq(0, game_state["next_player"])
        self.eq(stacks[0] - sb_amount - ante,
                game_state["table"].seats.players[0].stack)
        self.eq(stacks[2] - sb_amount * 2 - ante,
                game_state["table"].seats.players[2].stack)
        self.eq(PayInfo.FOLDED,
                game_state["table"].seats.players[1].pay_info.status)
        self.eq(PayInfo.PAY_TILL_END,
                game_state["table"].seats.players[0].pay_info.status)
        self.eq(
            sb_amount * 3 + ante * 2,
            GameEvaluator.create_pot(
                game_state["table"].seats.players)[0]["amount"])

    def test_start_new_round_exclude_no_money_players2(self):
        uuids = [
            "ruypwwoqwuwdnauiwpefsw", "sqmfwdkpcoagzqxpxnmxwm",
            "uxrdiwvctvilasinweqven"
        ]
        game_state = restore_game_state(ThreePlayerGameStateSample.round_state)
        original = reduce(
            lambda state, uuid: attach_hole_card_from_deck(state, uuid), uuids,
            game_state)
        sb_amount, ante = 5, 7
        self.emu.set_game_rule(3, 10, sb_amount, ante)
        [self.emu.register_player(uuid, FoldMan()) for uuid in uuids]

        # case1: second player cannot pay small blind
        finish_state, events = self.emu.apply_action(original, "fold")
        finish_state["table"].seats.players[2].stack = 6
        stacks = [p.stack for p in finish_state["table"].seats.players]
        game_state, events = self.emu.start_new_round(finish_state)
        self.eq(0, game_state["table"].dealer_btn)
        self.eq(1, game_state["table"].sb_pos())
        self.eq(1, game_state["next_player"])

    def test_start_new_round_game_finish_judge(self):
        uuids = [
            "ruypwwoqwuwdnauiwpefsw", "sqmfwdkpcoagzqxpxnmxwm",
            "uxrdiwvctvilasinweqven"
        ]
        game_state = restore_game_state(ThreePlayerGameStateSample.round_state)
        original = reduce(
            lambda state, uuid: attach_hole_card_from_deck(state, uuid), uuids,
            game_state)
        sb_amount, ante = 5, 7
        self.emu.set_game_rule(3, 10, sb_amount, ante)
        [self.emu.register_player(uuid, FoldMan()) for uuid in uuids]

        finish_state, events = self.emu.apply_action(original, "fold")
        finish_state["table"].seats.players[2].stack = 11
        finish_state["table"].seats.players[1].stack = 16
        game_state, events = self.emu.start_new_round(finish_state)
        self.eq(1, len(events))
        self.eq("event_game_finish", events[0]["type"])

    def test_generate_initial_game_state(self):
        self.emu.set_game_rule(2, 8, 5, 3)
        p1, p2 = FoldMan(), FoldMan()

        players_info = OrderedDict()
        players_info["uuid-1"] = {"name": "hoge", "stack": 100}
        players_info["uuid-2"] = {"name": "fuga", "stack": 100}
        state = self.emu.generate_initial_game_state(players_info)
        table = state["table"]
        self.eq(0, state["round_count"])
        self.eq(5, state["small_blind_amount"])
        self.eq(100, table.seats.players[0].stack)
        self.eq("uuid-1", table.seats.players[0].uuid)
        self.eq(100, table.seats.players[1].stack)
        self.eq("uuid-2", table.seats.players[1].uuid)
        self.eq(1, table.dealer_btn)

        state, events = self.emu.start_new_round(state)
        self.eq(0, state["table"].dealer_btn)
        self.eq(1, state["table"].sb_pos())
        self.eq(0, state["table"].bb_pos())
        self.eq(1, state["next_player"])
        state, events = self.emu.apply_action(state, "call", 10)
        self.eq(1, state["next_player"])

    def test_generate_possible_actions(self):
        state1 = restore_game_state(TwoPlayerSample.round_state)
        self.eq(TwoPlayerSample.valid_actions,
                self.emu.generate_possible_actions(state1))
        state2 = restore_game_state(ThreePlayerGameStateSample.round_state)
        self.eq(ThreePlayerGameStateSample.valid_actions,
                self.emu.generate_possible_actions(state2))
class EmulatorTest(BaseUnitTest):
    def setUp(self):
        self.emu = Emulator()

    def test_set_game_rule(self):
        self.emu.set_game_rule(2, 8, 5, 3)
        self.eq(2, self.emu.game_rule['player_num'])
        self.eq(8, self.emu.game_rule['max_round'])
        self.eq(5, self.emu.game_rule['sb_amount'])
        self.eq(3, self.emu.game_rule['ante'])

    def test_register_and_fetch_player(self):
        p1, p2 = FoldMan(), FoldMan()
        self.emu.register_player('uuid-1', p1)
        self.emu.register_player('uuid-2', p2)
        self.eq(p1, self.emu.fetch_player('uuid-1'))
        self.eq(p2, self.emu.fetch_player('uuid-2'))

    @raises(TypeError)
    def test_register_invalid_player(self):
        self.emu.register_player('uuid', 'hoge')

    def test_blind_structure(self):
        game_state = restore_game_state(TwoPlayerSample.round_state)
        game_state = attach_hole_card_from_deck(game_state,
                                                'tojrbxmkuzrarnniosuhct')
        game_state = attach_hole_card_from_deck(game_state,
                                                'pwtwlmfciymjdoljkhagxa')
        self.emu.set_game_rule(2, 10, 5, 0)
        self.emu.set_blind_structure({5: {'ante': 5, 'small_blind': 60}})
        p1 = TestPlayer([('fold', 0), ('raise', 55), ('call', 0)])
        p2 = TestPlayer([('call', 15), ('call', 55), ('fold', 0)])
        self.emu.register_player('tojrbxmkuzrarnniosuhct', p1)
        self.emu.register_player('pwtwlmfciymjdoljkhagxa', p2)

        game_state, events = self.emu.run_until_round_finish(game_state)
        self.eq(65, game_state['table'].seats.players[0].stack)
        self.eq(135, game_state['table'].seats.players[1].stack)

        game_state, events = self.emu.start_new_round(game_state)
        game_state, events = self.emu.run_until_round_finish(game_state)
        self.eq(120, game_state['table'].seats.players[0].stack)
        self.eq(80, game_state['table'].seats.players[1].stack)

        game_state, events = self.emu.start_new_round(game_state)
        self.eq('event_game_finish', events[0]['type'])
        self.eq(0, game_state['table'].seats.players[0].stack)
        self.eq(80, game_state['table'].seats.players[1].stack)

    def test_blind_structure_update(self):
        self.emu.set_game_rule(2, 8, 5, 3)
        p1, p2 = FoldMan(), FoldMan()
        self.emu.register_player('uuid-1', p1)
        self.emu.register_player('uuid-2', p2)
        blind_structure = {
            3: {
                'ante': 5,
                'small_blind': 10
            },
            5: {
                'ante': 10,
                'small_blind': 20
            }
        }
        self.emu.set_blind_structure(blind_structure)
        players_info = {
            'uuid-1': {
                'name': 'hoge',
                'stack': 100
            },
            'uuid-2': {
                'name': 'fuga',
                'stack': 100
            }
        }
        state = self.emu.generate_initial_game_state(players_info)
        self.eq(5, state['small_blind_amount'])
        state, _ = self.emu.start_new_round(state)
        state, _ = self.emu.apply_action(state, 'fold')
        self.eq(5, state['small_blind_amount'])
        state, _ = self.emu.apply_action(state, 'fold')
        self.eq(5, state['small_blind_amount'])
        state, _ = self.emu.apply_action(state, 'fold')
        self.eq(10, state['small_blind_amount'])
        state, _ = self.emu.apply_action(state, 'fold')
        self.eq(10, state['small_blind_amount'])
        state, _ = self.emu.apply_action(state, 'fold')
        self.eq(20, state['small_blind_amount'])
        state, _ = self.emu.apply_action(state, 'fold')
        self.eq(20, state['small_blind_amount'])

    def test_apply_action(self):
        game_state = restore_game_state(TwoPlayerSample.round_state)
        game_state = attach_hole_card_from_deck(game_state,
                                                'tojrbxmkuzrarnniosuhct')
        game_state = attach_hole_card_from_deck(game_state,
                                                'pwtwlmfciymjdoljkhagxa')
        self.emu.set_game_rule(2, 10, 5, 0)
        p1, p2 = FoldMan(), FoldMan()
        self.emu.register_player('tojrbxmkuzrarnniosuhct', FoldMan())
        self.emu.register_player('pwtwlmfciymjdoljkhagxa', FoldMan())

        game_state, events = self.emu.apply_action(game_state, 'call', 15)
        self.eq(Const.Street.RIVER, game_state['street'])
        self.eq(
            TwoPlayerSample.p1_action_histories,
            game_state['table'].seats.players[0].round_action_histories[
                Const.Street.TURN])
        self.eq(2, len(events))
        self.eq('event_new_street', events[0]['type'])
        self.eq('event_ask_player', events[1]['type'])

        game_state, events = self.emu.apply_action(game_state, 'call', 0)
        self.eq(1, len(events))
        self.eq('event_ask_player', events[0]['type'])

        game_state, events = self.emu.apply_action(game_state, 'call', 0)
        self.eq(1, len(events))
        self.eq('event_round_finish', events[0]['type'])

    def test_apply_action_game_finish_detect(self):
        game_state = restore_game_state(TwoPlayerSample.round_state)
        game_state = attach_hole_card_from_deck(game_state,
                                                'tojrbxmkuzrarnniosuhct')
        game_state = attach_hole_card_from_deck(game_state,
                                                'pwtwlmfciymjdoljkhagxa')
        self.emu.set_game_rule(2, 3, 5, 0)
        p1, p2 = FoldMan(), FoldMan()
        self.emu.register_player('tojrbxmkuzrarnniosuhct', FoldMan())
        self.emu.register_player('pwtwlmfciymjdoljkhagxa', FoldMan())

        game_state, events = self.emu.apply_action(game_state, 'fold')
        self.eq('event_game_finish', events[-1]['type'])

    def test_apply_action_start_next_round(self):
        game_state = restore_game_state(TwoPlayerSample.round_state)
        game_state = attach_hole_card_from_deck(game_state,
                                                'tojrbxmkuzrarnniosuhct')
        game_state = attach_hole_card_from_deck(game_state,
                                                'pwtwlmfciymjdoljkhagxa')
        self.emu.set_game_rule(2, 4, 5, 0)
        p1, p2 = FoldMan(), FoldMan()
        self.emu.register_player('tojrbxmkuzrarnniosuhct', FoldMan())
        self.emu.register_player('pwtwlmfciymjdoljkhagxa', FoldMan())

        game_state, events = self.emu.apply_action(game_state, 'fold')
        self.eq(120, game_state['table'].seats.players[0].stack)
        self.eq(80, game_state['table'].seats.players[1].stack)

        game_state, events = self.emu.apply_action(game_state, 'raise', 20)
        self.eq('event_ask_player', events[-1]['type'])
        self.eq(100, game_state['table'].seats.players[0].stack)
        self.eq(70, game_state['table'].seats.players[1].stack)

    @raises(Exception)
    def test_apply_action_when_game_finished(self):
        game_state = restore_game_state(TwoPlayerSample.round_state)
        game_state = attach_hole_card_from_deck(game_state,
                                                'tojrbxmkuzrarnniosuhct')
        game_state = attach_hole_card_from_deck(game_state,
                                                'pwtwlmfciymjdoljkhagxa')
        self.emu.set_game_rule(2, 3, 5, 0)
        p1, p2 = FoldMan(), FoldMan()
        self.emu.register_player('tojrbxmkuzrarnniosuhct', FoldMan())
        self.emu.register_player('pwtwlmfciymjdoljkhagxa', FoldMan())

        game_state, events = self.emu.apply_action(game_state, 'fold')
        self.emu.apply_action(game_state, 'fold')

    def test_run_until_round_finish(self):
        game_state = restore_game_state(TwoPlayerSample.round_state)
        game_state = attach_hole_card_from_deck(game_state,
                                                'tojrbxmkuzrarnniosuhct')
        game_state = attach_hole_card_from_deck(game_state,
                                                'pwtwlmfciymjdoljkhagxa')
        self.emu.set_game_rule(2, 10, 5, 0)
        p1 = TestPlayer([('fold', 0)])
        p2 = TestPlayer([('call', 15)])
        self.emu.register_player('tojrbxmkuzrarnniosuhct', p1)
        self.emu.register_player('pwtwlmfciymjdoljkhagxa', p2)

        game_state, events = self.emu.run_until_round_finish(game_state)
        self.eq('event_new_street', events[0]['type'])
        self.eq('event_ask_player', events[1]['type'])
        self.eq('event_round_finish', events[2]['type'])

    def test_run_until_round_finish_when_already_finished(self):
        game_state = restore_game_state(TwoPlayerSample.round_state)
        game_state = attach_hole_card_from_deck(game_state,
                                                'tojrbxmkuzrarnniosuhct')
        game_state = attach_hole_card_from_deck(game_state,
                                                'pwtwlmfciymjdoljkhagxa')
        self.emu.set_game_rule(2, 10, 5, 0)
        p1 = TestPlayer([('fold', 0)])
        p2 = TestPlayer([('call', 15)])
        self.emu.register_player('tojrbxmkuzrarnniosuhct', p1)
        self.emu.register_player('pwtwlmfciymjdoljkhagxa', p2)
        game_state, events = self.emu.run_until_round_finish(game_state)
        game_state, events = self.emu.run_until_round_finish(game_state)
        self.eq(0, len(events))

    def test_run_until_round_finish_game_finish_detect(self):
        uuids = ['tojrbxmkuzrarnniosuhct', 'pwtwlmfciymjdoljkhagxa']
        holecards = [[Card.from_str(s) for s in ss]
                     for ss in [['Ac', '2d'], ['8c', '5h']]]
        game_state = restore_game_state(TwoPlayerSample.round_state)
        game_state = reduce(lambda a, e: attach_hole_card(a, e[0], e[1]),
                            zip(uuids, holecards), game_state)
        game_state = attach_hole_card_from_deck(game_state,
                                                'pwtwlmfciymjdoljkhagxa')
        self.emu.set_game_rule(2, 10, 5, 0)
        p1 = TestPlayer([('raise', 65)])
        p2 = TestPlayer([('call', 15), ('call', 65)])
        self.emu.register_player('tojrbxmkuzrarnniosuhct', p1)
        self.emu.register_player('pwtwlmfciymjdoljkhagxa', p2)
        game_state['table'].deck.deck.append(Card.from_str('7c'))

        game_state, events = self.emu.run_until_round_finish(game_state)
        self.eq('event_new_street', events[0]['type'])
        self.eq('event_ask_player', events[1]['type'])
        self.eq('event_ask_player', events[2]['type'])
        self.eq('event_round_finish', events[3]['type'])
        self.eq('event_game_finish', events[4]['type'])
        self.eq(0, events[4]['players'][0]['stack'])
        self.eq(200, events[4]['players'][1]['stack'])

    def test_run_until_game_finish(self):
        game_state = restore_game_state(TwoPlayerSample.round_state)
        game_state = attach_hole_card_from_deck(game_state,
                                                'tojrbxmkuzrarnniosuhct')
        game_state = attach_hole_card_from_deck(game_state,
                                                'pwtwlmfciymjdoljkhagxa')
        self.emu.set_game_rule(2, 10, 5, 1)
        self.emu.register_player('tojrbxmkuzrarnniosuhct', FoldMan())
        self.emu.register_player('pwtwlmfciymjdoljkhagxa', FoldMan())

        game_state, events = self.emu.run_until_game_finish(game_state)
        self.eq('event_game_finish', events[-1]['type'])
        self.eq(114, game_state['table'].seats.players[0].stack)
        self.eq(86, game_state['table'].seats.players[1].stack)

    def test_run_until_game_finish_when_one_player_is_left(self):
        uuids = [
            'ruypwwoqwuwdnauiwpefsw', 'sqmfwdkpcoagzqxpxnmxwm',
            'uxrdiwvctvilasinweqven'
        ]
        holecards = [[Card.from_str(s) for s in ss]
                     for ss in [['2c', '3c'], ['Ah', 'Ac'], ['5d', '6d']]]
        game_state = restore_game_state(ThreePlayerGameStateSample.round_state)
        game_state = reduce(
            lambda state, item: attach_hole_card(state, item[0], item[1]),
            zip(uuids, holecards), game_state)
        sb_amount, ante = 5, 7
        self.emu.set_game_rule(3, 10, sb_amount, ante)
        p1_acts = [('fold', 0), ('call', 10), ('call', 0), ('call', 10),
                   ('fold', 0)]
        p2_acts = []
        p3_acts = [('raise', 10)]
        players = [TestPlayer(acts) for acts in [p1_acts, p2_acts, p3_acts]]
        [
            self.emu.register_player(uuid, player)
            for uuid, player in zip(uuids, players)
        ]
        game_state['table'].deck.deck.append(Card.from_str('7c'))
        game_state, events = self.emu.run_until_game_finish(game_state)
        self.eq('event_game_finish', events[-1]['type'])
        self.eq(0, game_state['table'].seats.players[0].stack)
        self.eq(0, game_state['table'].seats.players[1].stack)
        self.eq(292, game_state['table'].seats.players[2].stack)

    def test_run_until_game_finish_when_final_round(self):
        uuids = [
            'ruypwwoqwuwdnauiwpefsw', 'sqmfwdkpcoagzqxpxnmxwm',
            'uxrdiwvctvilasinweqven'
        ]
        holecards = [[Card.from_str(s) for s in ss]
                     for ss in [['2c', '3c'], ['Ah', 'Ac'], ['5d', '6d']]]
        game_state = restore_game_state(ThreePlayerGameStateSample.round_state)
        game_state = reduce(
            lambda state, item: attach_hole_card(state, item[0], item[1]),
            zip(uuids, holecards), game_state)
        sb_amount, ante = 5, 7
        self.emu.set_game_rule(3, 10, sb_amount, ante)
        [self.emu.register_player(uuid, FoldMan()) for uuid in uuids]
        game_state['table'].deck.deck.append(Card.from_str('7c'))
        game_state, events = self.emu.run_until_game_finish(game_state)
        self.eq('event_game_finish', events[-1]['type'])
        self.eq(10, game_state['round_count'])
        self.eq(35, game_state['table'].seats.players[0].stack)
        self.eq(0, game_state['table'].seats.players[1].stack)
        self.eq(265, game_state['table'].seats.players[2].stack)

    def test_last_round_judge(self):
        game_state = restore_game_state(TwoPlayerSample.round_state)
        self.emu.set_game_rule(2, 3, 5, 0)
        self.false(self.emu._is_last_round(game_state, self.emu.game_rule))
        game_state['street'] = Const.Street.FINISHED
        self.true(self.emu._is_last_round(game_state, self.emu.game_rule))
        game_state['round_count'] = 2
        self.false(self.emu._is_last_round(game_state, self.emu.game_rule))
        game_state['table'].seats.players[0].stack = 0
        self.true(self.emu._is_last_round(game_state, self.emu.game_rule))

    def test_start_new_round(self):
        game_state = restore_game_state(TwoPlayerSample.round_state)
        game_state = attach_hole_card_from_deck(game_state,
                                                'tojrbxmkuzrarnniosuhct')
        game_state = attach_hole_card_from_deck(game_state,
                                                'pwtwlmfciymjdoljkhagxa')
        p1, p2 = FoldMan(), FoldMan()
        self.emu.set_game_rule(2, 10, 5, 0)
        self.emu.register_player('tojrbxmkuzrarnniosuhct', FoldMan())
        self.emu.register_player('pwtwlmfciymjdoljkhagxa', FoldMan())

        # run until round finish
        game_state, event = self.emu.apply_action(game_state, 'call', 15)
        game_state, event = self.emu.apply_action(game_state, 'call', 0)
        game_state, event = self.emu.apply_action(game_state, 'call', 0)

        game_state, events = self.emu.start_new_round(game_state)
        self.eq(4, game_state['round_count'])
        self.eq(1, game_state['table'].dealer_btn)
        self.eq(0, game_state['street'])
        self.eq(0, game_state['next_player'])
        self.eq('event_new_street', events[0]['type'])
        self.eq('event_ask_player', events[1]['type'])
        self.eq('preflop', events[0]['street'])
        self.eq('tojrbxmkuzrarnniosuhct', events[1]['uuid'])

    def test_start_new_round_exclude_no_money_players(self):
        uuids = [
            'ruypwwoqwuwdnauiwpefsw', 'sqmfwdkpcoagzqxpxnmxwm',
            'uxrdiwvctvilasinweqven'
        ]
        game_state = restore_game_state(ThreePlayerGameStateSample.round_state)
        original = reduce(
            lambda state, uuid: attach_hole_card_from_deck(state, uuid), uuids,
            game_state)
        sb_amount, ante = 5, 7
        self.emu.set_game_rule(3, 10, sb_amount, ante)
        [self.emu.register_player(uuid, FoldMan()) for uuid in uuids]

        # case1: second player cannot pay small blind
        finish_state, events = self.emu.apply_action(original, 'fold')
        finish_state['table'].seats.players[0].stack = 11
        stacks = [p.stack for p in finish_state['table'].seats.players]
        game_state, events = self.emu.start_new_round(finish_state)
        self.eq(2, game_state['table'].dealer_btn)
        self.eq(1, game_state['next_player'])
        self.eq(stacks[1] - sb_amount - ante,
                game_state['table'].seats.players[1].stack)
        self.eq(stacks[2] - sb_amount * 2 - ante,
                game_state['table'].seats.players[2].stack)
        self.eq(PayInfo.FOLDED,
                game_state['table'].seats.players[0].pay_info.status)
        self.eq(
            sb_amount * 3 + ante * 2,
            GameEvaluator.create_pot(
                game_state['table'].seats.players)[0]['amount'])

        # case2: third player cannot pay big blind
        finish_state, events = self.emu.apply_action(original, 'fold')
        finish_state['table'].seats.players[1].stack = 16
        stacks = [p.stack for p in finish_state['table'].seats.players]
        game_state, events = self.emu.start_new_round(finish_state)
        self.eq(2, game_state['table'].dealer_btn)
        self.eq(0, game_state['next_player'])
        self.eq(stacks[0] - sb_amount - ante,
                game_state['table'].seats.players[0].stack)
        self.eq(stacks[2] - sb_amount * 2 - ante,
                game_state['table'].seats.players[2].stack)
        self.eq(PayInfo.FOLDED,
                game_state['table'].seats.players[1].pay_info.status)
        self.eq(PayInfo.PLAY_TILL_END,
                game_state['table'].seats.players[0].pay_info.status)
        self.eq(
            sb_amount * 3 + ante * 2,
            GameEvaluator.create_pot(
                game_state['table'].seats.players)[0]['amount'])

    def test_start_new_round_exclude_no_money_players2(self):
        uuids = [
            'ruypwwoqwuwdnauiwpefsw', 'sqmfwdkpcoagzqxpxnmxwm',
            'uxrdiwvctvilasinweqven'
        ]
        game_state = restore_game_state(ThreePlayerGameStateSample.round_state)
        original = reduce(
            lambda state, uuid: attach_hole_card_from_deck(state, uuid), uuids,
            game_state)
        sb_amount, ante = 5, 7
        self.emu.set_game_rule(3, 10, sb_amount, ante)
        [self.emu.register_player(uuid, FoldMan()) for uuid in uuids]

        # case1: second player cannot pay small blind
        finish_state, events = self.emu.apply_action(original, 'fold')
        finish_state['table'].seats.players[2].stack = 6
        stacks = [p.stack for p in finish_state['table'].seats.players]
        game_state, events = self.emu.start_new_round(finish_state)
        self.eq(0, game_state['table'].dealer_btn)
        self.eq(1, game_state['table'].sb_pos())
        self.eq(1, game_state['next_player'])

    def test_start_new_round_game_finish_judge(self):
        uuids = [
            'ruypwwoqwuwdnauiwpefsw', 'sqmfwdkpcoagzqxpxnmxwm',
            'uxrdiwvctvilasinweqven'
        ]
        game_state = restore_game_state(ThreePlayerGameStateSample.round_state)
        original = reduce(
            lambda state, uuid: attach_hole_card_from_deck(state, uuid), uuids,
            game_state)
        sb_amount, ante = 5, 7
        self.emu.set_game_rule(3, 10, sb_amount, ante)
        [self.emu.register_player(uuid, FoldMan()) for uuid in uuids]

        finish_state, events = self.emu.apply_action(original, 'fold')
        finish_state['table'].seats.players[2].stack = 11
        finish_state['table'].seats.players[1].stack = 16
        game_state, events = self.emu.start_new_round(finish_state)
        self.eq(1, len(events))
        self.eq('event_game_finish', events[0]['type'])

    def test_generate_initial_game_state(self):
        self.emu.set_game_rule(2, 8, 5, 3)
        p1, p2 = FoldMan(), FoldMan()

        players_info = OrderedDict()
        players_info['uuid-1'] = {'name': 'hoge', 'stack': 100}
        players_info['uuid-2'] = {'name': 'fuga', 'stack': 100}
        state = self.emu.generate_initial_game_state(players_info)
        table = state['table']
        self.eq(0, state['round_count'])
        self.eq(5, state['small_blind_amount'])
        self.eq(100, table.seats.players[0].stack)
        self.eq('uuid-1', table.seats.players[0].uuid)
        self.eq(100, table.seats.players[1].stack)
        self.eq('uuid-2', table.seats.players[1].uuid)
        self.eq(1, table.dealer_btn)

        state, events = self.emu.start_new_round(state)
        self.eq(0, state['table'].dealer_btn)
        self.eq(1, state['table'].sb_pos())
        self.eq(0, state['table'].bb_pos())
        self.eq(1, state['next_player'])
        state, events = self.emu.apply_action(state, 'call', 10)
        self.eq(1, state['next_player'])

    def test_generate_possible_actions(self):
        state1 = restore_game_state(TwoPlayerSample.round_state)
        self.eq(TwoPlayerSample.valid_actions,
                self.emu.generate_possible_actions(state1))
        state2 = restore_game_state(ThreePlayerGameStateSample.round_state)
        self.eq(ThreePlayerGameStateSample.valid_actions,
                self.emu.generate_possible_actions(state2))
Exemple #6
0
def run_episode(agents):
    emulator = Emulator()
    temp_final_state = {}
    winner_counts = [0] * N_AGENTS
    n_games_played = 0
    for game in range(GAMES_PER_EPISODE):
        wrappers = []
        player_info = {}
        for i, agent in enumerate(agents):
            if PERSISTENT_STACKS:
                if temp_final_state:
                    for seat in temp_final_state:
                        player_info[seat.uuid] = {'name': 'Player ' + str(seat.uuid),
                                                  'stack': seat.stack if seat.stack else STACK_SIZE}
                else:
                    player_info[i] = {'name': 'Player ' + str(i), 'stack': STACK_SIZE}
            else:
                player_info[i] = {'name': 'Player ' + str(i), 'stack': STACK_SIZE}

            wrappers.append(DQNAgentWrapper(agent, STACK_SIZE))
            emulator.register_player(uuid=i, player=wrappers[-1])
        emulator.set_game_rule(N_AGENTS, 2, BB_SIZE / 2, 0)
        initial_game_state = emulator.generate_initial_game_state(player_info)

        game_state, events = emulator.start_new_round(initial_game_state)
        game_finish_state, events = emulator.run_until_round_finish(game_state)

        # import json
        # if game == 0 or game == 1:
        #     print('dumping')
        #     with open('event_dump_' + str(game), 'w') as f:
        #         json.dump(events, f, indent=2)
        if 'winners' not in events[-1]:
            events.pop()

        winner = events[-1]['winners'][0]['uuid']
        winner_counts[winner] += 1
        n_games_played += 1

        for i in range(N_AGENTS):
            new_stack_size = game_finish_state['table'].seats.players[i].stack
            reward = (new_stack_size - wrappers[i].prev_stack_size) / BB_SIZE
            #print('Remembering {} reward for {} action'.format(reward, wrappers[i].prev_action))
            wrappers[i].agent.remember(wrappers[i].prev_state, wrappers[i].prev_action, reward, None, 1)

        temp_final_state = game_finish_state['table'].seats.players

        # print('====')
        print('\rGame:{}, epsilon:{}'.format(game, wrappers[0].agent.epsilon), end='')
        # print(game_finish_state)
        # print('\n')
        # print(events[-5:])
        # print('====')

        if (game % REPLAY_EVERY_N_GAMES == 0) or (game == GAMES_PER_EPISODE - 1):
            # replay memory for every agent
            # for agent in agents:
            #     agent.replay(BATCH_SIZE)
            agents[0].replay(BATCH_SIZE)

        for i in range(N_AGENTS):
            agents[i].model.reset_states()

    clear_memory()
    return agents[0], temp_final_state, winner_counts, n_games_played
Exemple #7
0
class CFR:
    def __init__(self):
        self.game_states_ = dict()  # maps history to node
        self.emulator = Emulator()

    @staticmethod
    def get_higher_rank(card1, card2):
        if card1.rank > card2.rank:
            return card1
        return card2

    @staticmethod
    def get_higher_suit(card1, card2):
        if card1.suit > card2.suit:
            return card1
        elif card1.suit == card2.suit:
            return 0
        return card2

    @staticmethod
    def simplify_hand(hand, community_cards):
        """ Takes a hand (array of size two) and compresses the hand into simpler representation
            Also puts higher card in front
            i.e. Th 9h becomes T9s as both cards share the same suit
                Th 9s becomes T9o as both cards do not share the same suit (off-suit)
                Th Ts becomes TT (pair of tens)
        """
        generated_hand = gen_cards(hand)
        card1 = generated_hand[0]
        card2 = generated_hand[1]

        # pair
        if card1.rank == card2.rank:
            # print "Pair %s" % str(card1)[1] + str(card2)[1]
            return str(card1)[1] + str(card2)[1] # return the rank 2-9, J-A instead of all ints

        hand = str(CFR.get_higher_rank(card1, card2))[1]
        # print "Higher rank card %s" % hand
        hand += str(card2)[1] if hand == str(card1)[1] else str(card1)[1]
        hand += str("s") if str(card1)[0] == str(card2)[0] else str("o")
        # print "final hand %s" % hand

        if len(community_cards) >= 3:
          strength = HandEvaluator.gen_hand_rank_info(generated_hand, gen_cards(community_cards))
          hand += "_%s" %strength.get("hand")["strength"]

        return hand


    def train(self, iterations, ante=1.0, bet1=2.0, bet2=8.0, print_interval=1000, save_interval=1000):
        """ Do ficticious self-play to find optimal strategy"""
        util = 0.0

        self.ante = ante
        self.bet1 = bet1
        self.bet2 = bet2

        # print "Ante: %f   Bet-1: %f   Bet-2: %f" % (ante, bet1, bet2)

        for i in range(iterations):
            if i % print_interval == 0 and i != 0:
                print("P1 expected value after %i iterations: %f" % (i, util / i))
                # for j in range(-1, 17):
                #     try:
                #         print j, strats["_" + str(j) + "G"]
                #     except:
                #         pass

            self.emulator.set_game_rule(2, 5, 10, 5)
            init_state = self.emulator.generate_initial_game_state(
                {"0": {"stack": 10000, "name": "0"}, "1": {"stack": 10000, "name": "1"}})
            round_start, events = self.emulator.start_new_round(init_state)
            player_one_cards = list(map(str, round_start["table"].seats.players[0].hole_card))
            player_two_cards = list(map(str, round_start["table"].seats.players[1].hole_card))
            cards = [player_one_cards, player_two_cards]
            history = list()
            util += self.cfr(cards, history, round_start, events, 1, 1)
            # strats = self.get_strategy()

            # """
            # if i%save_interval == 0:
            #     print "saving..."
            #     pickle.dump(self.get_strategy(), open("full_game_"+str(i)+"_avg_diff.strat", "wb"))
            # """
        return util / iterations

    def get_strategy(self):
        result = dict()
        for state, node in self.game_states_.items():
            #print(state, node.strategy_)
            result[state] = node.get_average_strategy()
        return result

    # @cards - the cards the players have, with index 0 being the card that player one has
    # and index 1 being the card that player two has
    # @history - a list of moves used to reach this game state
    # @probability1 - the probability of reaching this game state for player 1
    # @probability2 - the probability of reaching this game state for player 2
    # @game_state - PyPokerEngine's game state.
    def cfr(self, cards, history, game_state, events, probability1, probability2):
        player = 1-int(game_state["next_player"])
        opponent = 1 - player
        player_hand = cards[player]
        # print "=========== PLAYER " + str(player) + " TURN ==============="
        # print "history: " + str(history)
        # print "player_hand: %s %s" % (str(player_hand[0]), str(player_hand[1]))
        # print "opp_hand: %s %s" % (str(opponent_hand[0]), str(opponent_hand[1]))

        probability_weight = probability1 if player == 0 else probability2

        # print "probability_weight: " + str(probability_weight)event["round_state"]
        # print "num_moves: " + str(num_moves)

        # BEGIN TERMINAL STATES
        for event in events:
            if event["type"] == "event_round_finish":
                dct = {}
                dct[game_state["table"].seats.players[0].uuid] = game_state["table"].seats.players[0].stack
                dct[game_state["table"].seats.players[1].uuid] = game_state["table"].seats.players[1].stack
                #print (game_state, event)
                #print "player", player
                return dct[str(player)] - dct[str(opponent)]

        community_card = []
        for i in range(len(game_state["table"].get_community_card())):
            community_card.append(game_state["table"].get_community_card()[i].__str__())

        # state = str(player_hand)
        p_hand = self.simplify_hand(player_hand, community_card)
        state = str(p_hand)

        for action in history:
            state += action

        # print "state: %s" % str(state)
        # CREATE NEW ACTIONS

        if state in self.game_states_:
            node = self.game_states_[state]  # Get our node if it already exists
            possible_actions = node.actions_
        else:
            # Create new Node with possible actions we can perform
            possible_actions = [ACTION_TO_HISTORY_MAPPING[i["action"]] for i in
                                self.emulator.generate_possible_actions(game_state)]
            node = Node(possible_actions)
            self.game_states_[state] = node

        strategy = node.get_strategy(probability_weight)

        # print "possible_actions for this round: " + str(possible_actions)
        # print "strategy: " + str(strategy)

        util = dict()
        node_util = 0
        # for each of our possible actions, compute the utility of it
        # thus, finding the overall utility of this current state
        for action in possible_actions:
            next_history = list(history)  # copy
            next_history.append(action)
            new_game_state, new_event = self.emulator.apply_action(game_state, HISTORY_TO_ACTION_MAPPING[action])

            if player == 0:
                util[action] = -self.cfr(cards, next_history, new_game_state, new_event,
                                         probability1 * strategy[action], probability2)
            else:
                util[action] = -self.cfr(cards, next_history, new_game_state, new_event, probability1,
                                         probability2 * strategy[action])
            #print action, util[action]
            node_util += strategy[action] * util[action]

        # compute regret and update Game State for the node based on utility of all actions
        for action in possible_actions:
            regret = util[action] - node_util
            if player == 0:
                node.regret_sum_[action] += regret * probability2
            else:
                node.regret_sum_[action] += regret * probability1

        #print "node_util: "+ str(action) + " " + str(node_util)

        return node_util
class CustomEmulator:
    def __init__(self, starting_stack, small_blind):
        self.pok = PokerUtils()
        self.starting_stack = 500
        self.small_blind = 10
        self.emulator = Emulator()
        self.emulator.set_game_rule(player_num=2,
                                    max_round=10,
                                    small_blind_amount=small_blind,
                                    ante_amount=0)

        self.hole_cards = {}
        self.players_info = {
            "bb_player": {
                "name": "bb_player",
                "stack": starting_stack
            },
            "sb_player": {
                "name": "sb_player",
                "stack": starting_stack
            },
        }

        self.initial_game_state = self.emulator.generate_initial_game_state(
            self.players_info)

        self.street = 'preflop'
        self.events = []
        self.game_state = []

        self.players_cards = [np.zeros(52), np.zeros(52)]
        self.cards_feature = [np.zeros(52), np.zeros(52), np.zeros(52)]

        self.actions_feature = [
            np.zeros(6), np.zeros(6),
            np.zeros(6), np.zeros(6)
        ]

    def is_round_finished(self):
        for e in self.events:
            if (e['type'] == 'event_round_finish'):
                return True
        return False

    def new_street(self):

        for e in self.events:
            if (e['type'] == 'event_new_street'):
                self.street = e['street']
                return self.street
        return False

    def save_cards(self):
        for player in self.game_state['table'].seats.players:
            self.hole_cards[player.uuid] = [
                card.__str__() for card in player.hole_card
            ]

    def make_cards_feature(self):
        if (self.street == 'preflop'):
            self.save_cards()
            for i in range(2):
                self.players_cards[i][self.pok.get_card_total_index(
                    self.game_state['table'].seats.players[i].hole_card[0].
                    __str__())] = 1
                self.players_cards[i][self.pok.get_card_total_index(
                    self.game_state['table'].seats.players[i].hole_card[1].
                    __str__())] = 1
        elif (self.street == 'flop'):
            for card in self.events[0]['round_state']['community_card']:
                self.cards_feature[0][self.pok.get_card_total_index(card)] = 1
        elif (self.street == 'turn'):
            self.cards_feature[1][self.pok.get_card_total_index(
                self.events[0]['round_state']['community_card'][3])] = 1
        elif (self.street == 'river'):
            self.cards_feature[2][self.pok.get_card_total_index(
                self.events[0]['round_state']['community_card'][4])] = 1

    def make_engineered_cards_feature(self):
        if (self.street == 'preflop'):
            self.save_cards()
            for i in range(2):
                self.players_cards[i][self.pok.get_card_total_index(
                    self.game_state['table'].seats.players[i].hole_card[0].
                    __str__())] = 1
                self.players_cards[i][self.pok.get_card_total_index(
                    self.game_state['table'].seats.players[i].hole_card[1].
                    __str__())] = 1
        elif (self.street == 'flop'):
            for card in self.events[0]['round_state']['community_card']:
                self.cards_feature[0][self.pok.get_card_total_index(card)] = 1
        elif (self.street == 'turn'):
            self.cards_feature[1][self.pok.get_card_total_index(
                self.events[0]['round_state']['community_card'][3])] = 1
        elif (self.street == 'river'):
            self.cards_feature[2][self.pok.get_card_total_index(
                self.events[0]['round_state']['community_card'][4])] = 1

    def make_actions_feature(self):
        if (self.street == 'preflop'):
            self.actions_feature[0] = self.pok.get_street_actions(
                self.starting_stack,
                self.events[0]['round_state']['action_histories']['preflop'])
        elif (self.street == 'flop'):
            self.actions_feature[1] = self.pok.get_street_actions(
                self.starting_stack,
                self.events[0]['round_state']['action_histories']['flop'])
        elif (self.street == 'turn'):
            self.actions_feature[2] = self.pok.get_street_actions(
                self.starting_stack,
                self.events[0]['round_state']['action_histories']['turn'])
        elif (self.street == 'river'):
            self.actions_feature[3] = self.pok.get_street_actions(
                self.starting_stack,
                self.events[0]['round_state']['action_histories']['river'])

    def make_features(self):
        # actions are made every street
        self.make_actions_feature()
        if (self.new_street != False):
            self.make_cards_feature()

    def get_minraise_amount(self):
        for e in self.events:
            if (e['type'] == 'event_ask_player'):
                return e['valid_actions'][2]['amount']['min']

    def get_all_in_amount(self):
        for e in self.events:
            if (e['type'] == 'event_ask_player'):
                return e['valid_actions'][2]['amount']['max']

    def get_call_amount(self):
        for e in self.events:
            if (e['type'] == 'event_ask_player'):
                return e['valid_actions'][1]['amount']

    def get_sb_reward(self):
        for e in self.events:
            if (e['type'] == 'event_round_finish'):
                if (e['winners'][0]['uuid'] == 'sb_player'):
                    return (e['winners'][0]['stack'] - self.starting_stack)
                else:
                    return -(e['winners'][0]['stack'] - self.starting_stack)

    def get_spr(self):
        pot = self.events[0]['round_state']['pot']['main']['amount']
        stack = self.starting_stack - int(pot / 2)
        spr = stack / pot
        return spr

    def play_action(self, action):
        if (action == 0):
            self.game_state, self.events = self.emulator.apply_action(
                self.game_state, 'fold', 0)
        elif (action == 1):
            self.game_state, self.events = self.emulator.apply_action(
                self.game_state, 'call', self.get_call_amount())
        elif (action == 2):
            self.game_state, self.events = self.emulator.apply_action(
                self.game_state, 'raise', self.get_minraise_amount())
        elif (action == 3):
            self.game_state, self.events = self.emulator.apply_action(
                self.game_state, 'raise', self.get_all_in_amount())

    def new_hand(self):
        self.initial_game_state = self.emulator.generate_initial_game_state(
            self.players_info)

        self.street = 'preflop'
        self.hole_cards = {}
        self.game_state, self.events = self.emulator.start_new_round(
            self.initial_game_state)

        self.players_cards = [np.zeros(52), np.zeros(52)]
        self.cards_feature = [np.zeros(52), np.zeros(52), np.zeros(52)]

        self.actions_feature = [
            np.zeros(6), np.zeros(6),
            np.zeros(6), np.zeros(6)
        ]

        self.make_features()

    def get_action_histories_text(self, hole_cards=False):
        if (hole_cards == True):
            print(self.hole_cards)

        histo = self.events[0]['round_state']['action_histories']
        hand_text = ""
        for k, v in histo.items():
            if (len(v) > 0):
                hand_text += k + '\n'
                for a in v:
                    if (a['action'] == 'RAISE'):
                        hand_text += a['uuid'] + \
                            ' raises to ' + str(a['amount']) + '\n'
                    elif (a['action'] == 'FOLD'):
                        hand_text += a['uuid'] + ' folds\n'
                    elif (a['action'] == 'CALL' and a['amount'] == 0):
                        hand_text += a['uuid'] + ' checks\n'
                    else:
                        hand_text += a['uuid'] + ' ' + \
                            a['action'] + ' ' + str(a['amount']) + '\n'

        return hand_text
Exemple #9
0
class CFRBase:

    def __init__(self):
        self.emulator = Emulator()
        #try:
        self._sigma = pickle.load(open("strategy.pickle", "rb"))
        #except (OSError, IOError, EOFError) as e:
            #self._sigma = {}
            #pickle.dump(self._sigma, open("strategy.pickle", "wb"))

        self.cumulative_regrets = {}
        self.cumulative_sigma = {}


    def get_tree(self,game_state):
        if game_state[1][-1]['type'] != 'event_game_finish':
            hist = game_state[1][-1]['round_state']['action_histories']
            game_string = "#"
            for i in hist['preflop']:
                game_string = game_string + i['action'][0]
            if "flop" in hist and len(hist['flop']) > 0:
                for i in hist['flop']:
                   game_string = game_string + i['action'][0]
            if "turn" in hist and len(hist['turn']) > 0:
                for i in hist['turn']:
                    game_string = game_string + i['action'][0]
            if "river" in hist and len(hist['river']) > 0:
                for i in hist['river']:
                    game_string = game_string + i['action'][0]
        else:
            hist = game_state[1][0]['round_state']['action_histories']
            game_string = "#"
            for i in hist['preflop']:
                game_string = game_string + i['action'][0]
            if "flop" in hist and len(hist['flop']) > 0:
                for i in hist['flop']:
                   game_string = game_string + i['action'][0]
            if "turn" in hist and len(hist['turn']) > 0:
                for i in hist['turn']:
                    game_string = game_string + i['action'][0]
            if "river" in hist and len(hist['river']) > 0:
                for i in hist['river']:
                    game_string = game_string + i['action'][0]
        return game_string
        
    def is_terminal(self,game_state):
        return game_state[1][-1]['type'] == 'event_game_finish'

    def state_evaluation(self,game_state):
        return game_state[1][1]['players'][1]['stack'] - 1000

    def _available_actions(self,game_state):
        actions = []
        for i in game_state[1][-1]['valid_actions']:
            actions.append(i['action'])
        return actions

    def load_data(self):
        with open('strategy.pickle','rb') as handle:
            self._sigma = pickle.load(handle)

    def store_data(self):
        with open('strategy.pickle', 'wb') as handle:
            pickle.dump(self._sigma, handle, protocol=pickle.HIGHEST_PROTOCOL)

    def getsigma(self, game_state, percentile, action):
        tree = self.get_tree(game_state)
        available_actions = self._available_actions(game_state)
        if tree not in self._sigma:
            self._sigma[tree] = {percentile:{}}
            for i in available_actions:
                self._sigma[tree][percentile][i] = 1./len(available_actions)
        elif percentile not in self._sigma[tree]:
            self._sigma[tree][percentile] = {}
            for i in available_actions:
                self._sigma[tree][percentile][i] = 1./len(available_actions)
        return self._sigma[tree][percentile][action]

    def _statetomove(self,game_state):
        next_player_pos = game_state[0]["next_player"]
        if next_player_pos == 1:
            return 1
        else:
            return -1
        
    def _getcards(self,game_state):
        hole_cards = []
        next_player_pos = game_state[0]["next_player"]
        hole_cards.append(game_state[0]['table'].seats.players[next_player_pos].hole_card[0].__str__())
        hole_cards.append(game_state[0]['table'].seats.players[next_player_pos].hole_card[1].__str__())
        return hole_cards

    def _getboard(self,game_state):
        return game_state[0]['table']._community_card

    def _myround(self,x, base=10):
        return int(base * round(float(x)/base))

    def evaluate_hs(self,game_state):
        #print('evaluating hs')
        hole_cards = gen_cards(self._getcards(game_state))
        #print('hole cards = ' + str(self._getcards(game_state)))
        community_cards = self._getboard(game_state)
        #print('community = ' + str(self._getboard(game_state)))
        hs = estimate_hole_card_win_rate(100,2, hole_cards,community_cards)
        #print('hs = ' + str(hs))
        percentile = self._myround(hs*100)
        return percentile

    def _state_play(self, game_state, action):
        new_game_state = self.emulator.apply_action(game_state[0], action, 0)
        return new_game_state

    def utility_recursive(self):
        final_value = self._utility_recursive(self.game_state,1,1)
        #print("##################CUMULATIVE REGRETS#####################")
        #pprint.pprint(self.cumulative_regrets)
        #print("##################CUMULATIVE SIGMA#####################")
        #pprint.pprint(self.cumulative_sigma)
        return final_value

    def _cumulate_cfr_regret(self, game_state, percentile, action, regret):
        tree = self.get_tree(game_state)
        available_actions = self._available_actions(game_state)
        if tree not in self.cumulative_regrets:
            self.cumulative_regrets[tree] = {percentile:{}}
            for i in available_actions:
                self.cumulative_regrets[tree][percentile][i] = 0
        elif percentile not in self.cumulative_regrets[tree]:
            self.cumulative_regrets[tree][percentile] = {}
            for i in available_actions:
                self.cumulative_regrets[tree][percentile][i] = 0      
        self.cumulative_regrets[tree][percentile][action] += regret

    def _cumulate_sigma(self, game_state, percentile, action, prob):
        tree = self.get_tree(game_state)
        available_actions = self._available_actions(game_state)
        if tree not in self.cumulative_sigma:
            self.cumulative_sigma[tree] = {percentile:{}}
            for i in available_actions:
                self.cumulative_sigma[tree][percentile][i] = 0
        elif percentile not in self.cumulative_sigma[tree]:
            self.cumulative_sigma[tree][percentile] = {}
            for i in available_actions:
                self.cumulative_sigma[tree][percentile][i] = 0      
        self.cumulative_sigma[tree][percentile][action] += prob

    def __update_sigma_recursively(self, game_state):
        # stop traversal at terminal node
        if self.is_terminal(game_state):
            return
        percentile = self.evaluate_hs(game_state)
        self._update_sigma(game_state,percentile)
        # go to subtrees
        for action in self._available_actions(game_state):
            self.__update_sigma_recursively(self._state_play(game_state,action))

    def _update_sigma(self, game_state, percentile):
        tree = self.get_tree(game_state)
        #print("Current State--------")
        #print(self.get_tree(game_state))
        #print(percentile)
        #if percentile not in self._sigma[tree]:
        #    percentile = (min(100, percentile + 5)) if (min(100, percentile + 5) in self._sigma[tree]) else (max(0, percentile - 5))
        if percentile not in self.cumulative_regrets[tree]:
            #print('Enter unable to find percentile')
            if (min(100, percentile + 5) in self.cumulative_regrets[tree]):
                percentile = (min(100, percentile + 5))
            elif (max(0, percentile - 5) in self.cumulative_regrets[tree]):
                percentile = (max(0, percentile - 5))
            elif (min(100, percentile + 10) in self.cumulative_regrets[tree]):
                percentile = (min(100, percentile + 10))
            elif (max(0, percentile - 10) in self.cumulative_regrets[tree]):
                percentile = (max(0, percentile - 10))
            elif (min(100, percentile + 20) in self.cumulative_regrets[tree]):
                percentile = (min(100, percentile + 20))
            elif (max(0, percentile - 20) in self.cumulative_regrets[tree]):
                percentile = (max(0, percentile - 20))
            elif (min(100, percentile + 30) in self.cumulative_regrets[tree]):
                percentile = (min(100, percentile + 30))
            elif (max(0, percentile - 30) in self.cumulative_regrets[tree]):
                percentile = (max(0, percentile - 30))
            elif (min(100, percentile + 40) in self.cumulative_regrets[tree]):
                percentile = (min(100, percentile + 40))
            elif (max(0, percentile - 40) in self.cumulative_regrets[tree]):
                percentile = (max(0, percentile - 40))
            elif (min(100, percentile + 50) in self.cumulative_regrets[tree]):
                percentile = (min(100, percentile + 50))
            elif (max(0, percentile - 50) in self.cumulative_regrets[tree]):
                percentile = (max(0, percentile - 50))
        #print('New Percentile = ' + str(percentile))
        rgrt_sum = sum(filter(lambda x : x > 0, self.cumulative_regrets[tree][percentile].values()))
        nr_of_actions = len(self.cumulative_regrets[tree][percentile].keys())
        for a in self.cumulative_regrets[tree][percentile]:        
            self._sigma[tree][percentile][a] = max(self.cumulative_regrets[tree][percentile][a], 0.) / rgrt_sum if rgrt_sum > 0 else 1. / nr_of_actions        
        
    def _utility_recursive(self,game_state, reach_sb, reach_bb):


        children_states_utilities = {}
        if self.is_terminal(game_state):
            return self.state_evaluation(game_state)
        percentile = self.evaluate_hs(game_state)
        #print("Current State--------")
        #print(self.get_tree(game_state))
        #print(percentile)
        #pprint.pprint(game_state[1][-1]['round_state']['community_card'])
        value = 0.

        for action in self._available_actions(game_state):
            probability = self.getsigma(game_state,percentile,action)
            child_reach_sb = reach_sb * ( probability if self._statetomove(game_state) == 1 else 1)
            child_reach_bb = reach_bb * ( probability if self._statetomove(game_state) == -1 else 1)

            child_state_utility = self._utility_recursive(self._state_play(game_state,action),reach_sb,reach_bb)

            value +=  (probability * child_state_utility)
            
            children_states_utilities[action] = child_state_utility
            
        (cfr_reach, reach) = (reach_bb, reach_sb) if self._statetomove(game_state) == 1 else (reach_sb, reach_bb)

        for action in self._available_actions(game_state):

            action_cfr_regret = self._statetomove(game_state) * cfr_reach * (children_states_utilities[action] - value)

            self._cumulate_cfr_regret(game_state, percentile, action, action_cfr_regret)
            self._cumulate_sigma(game_state, percentile, action, reach * self.getsigma(game_state,percentile,action))
        return value

    def run(self, iterations = 1):
        print(datetime.datetime.now())
        for myiter in range(0, iterations):
            #initialize new game state with new hole cards
            self.emulator.set_game_rule(player_num=2, max_round=1, small_blind_amount=10, ante_amount=0)
            players_info = {
                "uuid-1": { "name": "player1", "stack": 1000 },
                "uuid-2": { "name": "player2", "stack": 1000 }
            }
            player1 = RandomPlayer()
            player2 = RandomPlayer()
            self.emulator.register_player('uuid-1',player1)
            self.emulator.register_player('uuid-2',player2)
            initial_state = self.emulator.generate_initial_game_state(players_info)
            self.game_state = self.emulator.start_new_round(initial_state)
            #goto later gamestate
            #self.game_state = self.emulator.apply_action(self.game_state[0], 'call', 0)



            
            self._utility_recursive(self.game_state, 1, 1)
            self.__update_sigma_recursively(self.game_state)

            
            self.store_data()
            print('data stored,  i = ' + str(myiter) )
            print(datetime.datetime.now())
        print(datetime.datetime.now())
Exemple #10
0
class TexasHoldemTask(BaseTask):
    def __init__(self,
                 final_round=max_round,
                 scale_reward=False,
                 lose_penalty=False,
                 shuffle_position=False,
                 action_record=False):
        self.final_round = final_round
        self.scale_reward = scale_reward
        self.lose_penalty = lose_penalty
        self.shuffle_position = shuffle_position
        self.action_record = action_record
        self.emulator = Emulator()
        self.emulator.set_game_rule(nb_player, final_round, sb_amount, ante)
        self.emulator.set_blind_structure(blind_structure)
        self.opponent_value_functions = {}
        if shuffle_position:
            print "Warning: shuffle_position is set True. Are you sure?"

        for uuid in players_info:
            self.emulator.register_player(uuid, DummyPlayer())
            if uuid != my_uuid: self.opponent_value_functions[uuid] = None

    def set_opponent_value_functions(self, value_functions):
        assert len(value_functions) == 9
        opponent_uuids = [
            uuid for uuid in self.opponent_value_functions if uuid != my_uuid
        ]
        for uuid, value_function in zip(opponent_uuids, value_functions):
            self.opponent_value_functions[uuid] = value_function

    def generate_initial_state(self):
        return self.generate_initial_state_without_action_record() if not self.action_record\
                else self.generate_initial_state_with_action_record()

    def generate_initial_state_without_action_record(self):
        p_info = _get_shuffled_players_info(
        ) if self.shuffle_position else players_info
        clear_state = self.emulator.generate_initial_game_state(p_info)
        state, _events = self.emulator.start_new_round(clear_state)
        while not self._check_my_turn(state):
            action, amount = self._choose_opponent_action(state)
            state, _events = self.emulator.apply_action(state, action, amount)
            if state[
                    "street"] == Const.Street.FINISHED and not self.is_terminal_state(
                        state):
                state, _events = self.emulator.start_new_round(state)
        return state if not self.is_terminal_state(
            state) else self.generate_initial_state()

    def generate_initial_state_with_action_record(self):
        p_info = _get_shuffled_players_info(
        ) if self.shuffle_position else players_info
        clear_state = self.emulator.generate_initial_game_state(p_info)
        p_act_record = {
            p.uuid: [[], [], [], []]
            for p in clear_state["table"].seats.players
        }
        state, _events = self.emulator.start_new_round(clear_state)
        while not self._check_my_turn(state):
            state[ACTION_RECORD_KEY] = p_act_record
            opponent_uuid, action_info = self._choose_opponent_action(
                state, detail_info=True)
            p_act_record = self._update_action_record(state, p_act_record,
                                                      opponent_uuid,
                                                      action_info)
            action, amount = action_info["action"], action_info["amount"]
            state, _events = self.emulator.apply_action(state, action, amount)
            if state[
                    "street"] == Const.Street.FINISHED and not self.is_terminal_state(
                        state):
                state, _events = self.emulator.start_new_round(state)
        state[ACTION_RECORD_KEY] = p_act_record
        return state if not self.is_terminal_state(
            state) else self.generate_initial_state()

    def is_terminal_state(self, state):
        me = pick_me(state)
        round_finished = state["street"] == Const.Street.FINISHED
        active_players = [
            p for p in state["table"].seats.players if p.stack > 0
        ]
        short_of_players = len(active_players) <= table_break_threshold
        i_am_loser = me.stack == 0
        is_final_round = state["round_count"] >= self.final_round
        return round_finished and (short_of_players or i_am_loser
                                   or is_final_round)

    def transit_state(self, state, action):
        return self.transit_state_without_action_record(state, action) if not self.action_record\
                else self.transit_state_with_action_record(state, action)

    def transit_state_without_action_record(self, state, action):
        assert self._check_my_turn(state)
        assert not self.is_terminal_state(state)
        action, amount = action["action"], action["amount"]
        state, _events = self.emulator.apply_action(state, action, amount)
        if state[
                "street"] == Const.Street.FINISHED and not self.is_terminal_state(
                    state):
            state, _events = self.emulator.start_new_round(state)
        while not self._check_my_turn(state) and not self.is_terminal_state(
                state):
            action, amount = self._choose_opponent_action(state)
            state, _events = self.emulator.apply_action(state, action, amount)
            if state[
                    "street"] == Const.Street.FINISHED and not self.is_terminal_state(
                        state):
                state, _events = self.emulator.start_new_round(state)
        return state

    def transit_state_with_action_record(self, state, action_info):
        assert self._check_my_turn(state)
        assert not self.is_terminal_state(state)
        assert state.has_key(ACTION_RECORD_KEY)
        p_act_record = _deepcopy_action_record(state)
        p_act_record = self._update_action_record(state, p_act_record, my_uuid,
                                                  action_info)
        action, amount = action_info["action"], action_info["amount"]
        state, _events = self.emulator.apply_action(state, action, amount)
        state[ACTION_RECORD_KEY] = p_act_record
        if state[
                "street"] == Const.Street.FINISHED and not self.is_terminal_state(
                    state):
            state, _events = self.emulator.start_new_round(state)
        while not self._check_my_turn(state) and not self.is_terminal_state(
                state):
            state[ACTION_RECORD_KEY] = p_act_record
            opponent_uuid, action_info = self._choose_opponent_action(
                state, detail_info=True)
            p_act_record = self._update_action_record(state, p_act_record,
                                                      opponent_uuid,
                                                      action_info)
            action, amount = action_info["action"], action_info["amount"]
            state, _events = self.emulator.apply_action(state, action, amount)
            if state[
                    "street"] == Const.Street.FINISHED and not self.is_terminal_state(
                        state):
                state, _events = self.emulator.start_new_round(state)
        state[ACTION_RECORD_KEY] = p_act_record
        return state

    def _check_my_turn(self, state):
        players = state["table"].seats.players
        return state["next_player"] != "not_found" and my_uuid == players[
            state["next_player"]].uuid

    def _choose_opponent_action(self, state, detail_info=False):
        players = state["table"].seats.players
        opponent_uuid = players[state["next_player"]].uuid
        value_function = self.opponent_value_functions[opponent_uuid]
        action_info = choose_best_action(self, value_function, state)
        return (opponent_uuid,
                action_info) if detail_info else (action_info["action"],
                                                  action_info["amount"])

    def generate_possible_actions(self, state):
        action_info = self.emulator.generate_possible_actions(state)
        min_raise_amount = action_info[2]["amount"]["min"]
        max_raise_amount = action_info[2]["amount"]["max"]
        player = state["table"].seats.players[state["next_player"]]

        actions = [
            gen_fold_action(),
            gen_call_action(action_info[1]["amount"])
        ]
        if min_raise_amount != -1:
            actions.append(gen_min_raise_action(min_raise_amount))
        if min_raise_amount != -1 and min_raise_amount * 2 < max_raise_amount:
            actions.append(gen_double_raise_action(min_raise_amount * 2))
        if min_raise_amount != -1 and min_raise_amount * 3 < max_raise_amount:
            actions.append(gen_triple_raise_action(min_raise_amount * 3))
        if max_raise_amount != -1:
            actions.append(gen_max_raise_action(max_raise_amount))
        return actions

    def calculate_reward(self, state):
        if self.is_terminal_state(state):
            if pick_me(state).stack == 0 and self.lose_penalty:
                return -1
            if self.scale_reward:
                return 1.0 * pick_me(state).stack / (nb_player * initial_stack)
            else:
                return pick_me(state).stack
        else:
            return 0

    def _update_action_record(self, state, action_record, uuid, action_info):
        action, amount = action_info["action"], action_info["amount"]
        if 'fold' == action: idx = 0
        elif 'call' == action: idx = 1
        elif 'raise' == action: idx = 2
        else: raise Exception("unexpected action [ %s ] received" % action)

        # allin check. the idx of allin is 3.
        action_player = [
            player for player in state["table"].seats.players
            if player.uuid == uuid
        ]
        assert len(action_player) == 1
        if amount >= action_player[0].stack and 'fold' != action: idx = 3
        action_record[uuid][idx].append(amount)
        return action_record