class GenPlayer(BasePokerPlayer): # Setup Emulator object by registering game information def receive_game_start_message(self, game_info): player_num = game_info["player_num"] max_round = game_info["rule"]["max_round"] small_blind_amount = game_info["rule"]["small_blind_amount"] ante_amount = game_info["rule"]["ante"] blind_structure = game_info["rule"]["blind_structure"] self.emulator = Emulator() self.emulator.set_game_rule(player_num, max_round, small_blind_amount, ante_amount) self.emulator.set_blind_structure(blind_structure) # Register algorithm of each player which used in the simulation. for player_info in game_info["seats"]["players"]: self.emulator.register_player(player_info["uuid"], RandomPlayer()) def declare_action(self, valid_actions, hole_card, round_state): game_state = restore_game_state(round_state) # decide action by using some simulation result updated_state, events = self.emulator.apply_action(game_state, "fold") # updated_state, events = self.emulator.run_until_round_finish(game_state) # updated_state, events = self.emulator.run_until_game_finish(game_state) if self.is_good_simulation_result(updated_state): return # you would declare CALL or RAISE action else: return "fold", 0
class Game: def __init__(self, game_state, hole_card, player): self.player = player self.initial_state = game_state self.hole_card = hole_card self.emulator = Emulator() self.emulator.set_game_rule(2, 10, 10, 0) def actions(self, state): """Return a list of the allowable moves at this point.""" return list( map(lambda x: x['action'], self.emulator.generate_possible_actions(state))) def result(self, state, move): """Return the state that results from making a move from a state.""" return self.emulator.apply_action(state, move)[0] def utility(self, state, player): """Return the value of this final state to player.""" score = estimate_hole_card_win_rate(100, 2, self.hole_card, state['table']._community_card) # 1 is MAX player if self.player == player: return score else: return -score def terminal_test(self, state): """Return True if this is a final state for the game.""" return self.emulator._is_last_round(state, self.emulator.game_rule) def to_move(self, state): """Return the player whose move it is in this state.""" return state['next_player'] def display(self, state): """Print or otherwise display the state.""" print(state) def __repr__(self): return '<{}>'.format(self.__class__.__name__) def play_game(self, *players): """Play an n-person, move-alternating game.""" state = self.initial_state while True: for player in players: move = player(self, state) state = self.result(state, move) if self.terminal_test(state): self.display(state) return self.utility(state, self.to_move(self.initial_state))
class PushFoldEmulator: def __init__(self, starting_stack, small_blind): self.pok = PokerUtils() self.starting_stack = starting_stack self.small_blind = 10 self.emulator = Emulator() self.emulator.set_game_rule(player_num=2, max_round=10, small_blind_amount=small_blind, ante_amount=0) self.hole_cards = {} self.players_info = { "bb_player": { "name": "bb_player", "stack": starting_stack }, "sb_player": { "name": "sb_player", "stack": starting_stack }, } self.initial_game_state = self.emulator.generate_initial_game_state( self.players_info) self.players_cards = [np.zeros(13), np.zeros(13)] self.suited = [0, 0] self.street = 'preflop' self.events = [] self.game_state = [] def is_round_finished(self): for e in self.events: if (e['type'] == 'event_round_finish'): return True return False def new_street(self): for e in self.events: if (e['type'] == 'event_new_street'): self.street = e['street'] return self.street return False def save_cards(self): for player in self.game_state['table'].seats.players: self.hole_cards[player.uuid] = [ card.__str__() for card in player.hole_card ] def get_hand_feature(self): if (self.street == 'preflop'): self.save_cards() for i in range(2): self.players_cards[i][self.pok.get_card_value_index( self.game_state['table'].seats.players[i].hole_card[0]. __str__())] = 1 self.players_cards[i][self.pok.get_card_value_index( self.game_state['table'].seats.players[i].hole_card[1]. __str__())] = 1 if self.pok.is_suited([ self.game_state['table'].seats.players[i].hole_card[0]. __str__(), self.game_state['table'].seats.players[i]. hole_card[1].__str__() ]): self.suited[i] = 1 else: self.suited[i] = 0 def get_all_in_amount(self): for e in self.events: if (e['type'] == 'event_ask_player'): return e['valid_actions'][2]['amount']['max'] def get_call_amount(self): for e in self.events: if (e['type'] == 'event_ask_player'): return e['valid_actions'][1]['amount'] def get_sb_reward(self): for e in self.events: if (e['type'] == 'event_round_finish'): if (e['winners'][0]['uuid'] == 'sb_player'): return (e['winners'][0]['stack'] - self.starting_stack) else: return -(e['winners'][0]['stack'] - self.starting_stack) def play_action(self, action): if (action == 0): self.game_state, self.events = self.emulator.apply_action( self.game_state, 'fold', 0) elif (action == 1): if self.get_all_in_amount() == -1: self.game_state, self.events = self.emulator.apply_action( self.game_state, 'call', self.get_call_amount()) else: self.game_state, self.events = self.emulator.apply_action( self.game_state, 'raise', self.get_all_in_amount()) def new_hand(self, starting_stack): self.initial_game_state = self.emulator.generate_initial_game_state( self.players_info) self.street = 'preflop' self.hole_cards = {} self.starting_stack = starting_stack self.players_info = { "bb_player": { "name": "bb_player", "stack": starting_stack }, "sb_player": { "name": "sb_player", "stack": starting_stack }, } self.initial_game_state = self.emulator.generate_initial_game_state( self.players_info) self.game_state, self.events = self.emulator.start_new_round( self.initial_game_state) self.players_cards = [np.zeros(13), np.zeros(13)] self.get_hand_feature() def get_action_histories_text(self, hole_cards=False): if (hole_cards == True): print(self.hole_cards) histo = self.events[0]['round_state']['action_histories'] hand_text = "" for k, v in histo.items(): if (len(v) > 0): hand_text += k + '\n' for a in v: if (a['action'] == 'RAISE'): hand_text += a['uuid'] + \ ' raises to ' + str(a['amount']) + '\n' elif (a['action'] == 'FOLD'): hand_text += a['uuid'] + ' folds\n' elif (a['action'] == 'CALL' and a['amount'] == 0): hand_text += a['uuid'] + ' checks\n' else: hand_text += a['uuid'] + ' ' + \ a['action'] + ' ' + str(a['amount']) + '\n' return hand_text
class EmulatorTest(BaseUnitTest): def setUp(self): self.emu = Emulator() def test_set_game_rule(self): self.emu.set_game_rule(2, 8, 5, 3) self.eq(2, self.emu.game_rule["player_num"]) self.eq(8, self.emu.game_rule["max_round"]) self.eq(5, self.emu.game_rule["sb_amount"]) self.eq(3, self.emu.game_rule["ante"]) def test_register_and_fetch_player(self): p1, p2 = FoldMan(), FoldMan() self.emu.register_player("uuid-1", p1) self.emu.register_player("uuid-2", p2) self.eq(p1, self.emu.fetch_player("uuid-1")) self.eq(p2, self.emu.fetch_player("uuid-2")) @raises(TypeError) def test_register_invalid_player(self): self.emu.register_player("uuid", "hoge") def test_blind_structure(self): game_state = restore_game_state(TwoPlayerSample.round_state) game_state = attach_hole_card_from_deck(game_state, "tojrbxmkuzrarnniosuhct") game_state = attach_hole_card_from_deck(game_state, "pwtwlmfciymjdoljkhagxa") self.emu.set_game_rule(2, 10, 5, 0) self.emu.set_blind_structure({5: { "ante": 5, "small_blind": 60 } }) p1 = TestPlayer([("fold", 0), ('raise', 55), ('call', 0)]) p2 = TestPlayer([("call", 15), ("call", 55), ('fold', 0)]) self.emu.register_player("tojrbxmkuzrarnniosuhct", p1) self.emu.register_player("pwtwlmfciymjdoljkhagxa", p2) game_state, events = self.emu.run_until_round_finish(game_state) self.eq(65, game_state["table"].seats.players[0].stack) self.eq(135, game_state["table"].seats.players[1].stack) game_state, events = self.emu.start_new_round(game_state) game_state, events = self.emu.run_until_round_finish(game_state) self.eq(120, game_state["table"].seats.players[0].stack) self.eq(80, game_state["table"].seats.players[1].stack) game_state, events = self.emu.start_new_round(game_state) self.eq("event_game_finish", events[0]["type"]) self.eq(0, game_state["table"].seats.players[0].stack) self.eq(80, game_state["table"].seats.players[1].stack) def test_blind_structure_update(self): self.emu.set_game_rule(2, 8, 5, 3) p1, p2 = FoldMan(), FoldMan() self.emu.register_player("uuid-1", p1) self.emu.register_player("uuid-2", p2) blind_structure = { 3: { "ante": 5, "small_blind": 10 }, 5: {"ante": 10, "small_blind": 20 } } self.emu.set_blind_structure(blind_structure) players_info = { "uuid-1": { "name": "hoge", "stack": 100 }, "uuid-2": { "name": "fuga", "stack": 100 } } state = self.emu.generate_initial_game_state(players_info) self.eq(5, state["small_blind_amount"]) state, _ = self.emu.start_new_round(state) state, _ = self.emu.apply_action(state, "fold") self.eq(5, state["small_blind_amount"]) state, _ = self.emu.apply_action(state, "fold") self.eq(5, state["small_blind_amount"]) state, _ = self.emu.apply_action(state, "fold") self.eq(10, state["small_blind_amount"]) state, _ = self.emu.apply_action(state, "fold") self.eq(10, state["small_blind_amount"]) state, _ = self.emu.apply_action(state, "fold") self.eq(20, state["small_blind_amount"]) state, _ = self.emu.apply_action(state, "fold") self.eq(20, state["small_blind_amount"]) def test_apply_action(self): game_state = restore_game_state(TwoPlayerSample.round_state) game_state = attach_hole_card_from_deck(game_state, "tojrbxmkuzrarnniosuhct") game_state = attach_hole_card_from_deck(game_state, "pwtwlmfciymjdoljkhagxa") self.emu.set_game_rule(2, 10, 5, 0) p1, p2 = FoldMan(), FoldMan() self.emu.register_player("tojrbxmkuzrarnniosuhct", FoldMan()) self.emu.register_player("pwtwlmfciymjdoljkhagxa", FoldMan()) game_state, events = self.emu.apply_action(game_state, "call", 15) self.eq(Const.Street.RIVER, game_state["street"]) self.eq(TwoPlayerSample.p1_action_histories, \ game_state["table"].seats.players[0].round_action_histories[Const.Street.TURN]) self.eq(2, len(events)) self.eq("event_new_street", events[0]["type"]) self.eq("event_ask_player", events[1]["type"]) game_state, events = self.emu.apply_action(game_state, "call", 0) self.eq(1, len(events)) self.eq("event_ask_player", events[0]["type"]) game_state, events = self.emu.apply_action(game_state, "call", 0) self.eq(1, len(events)) self.eq("event_round_finish", events[0]["type"]) def test_apply_action_game_finish_detect(self): game_state = restore_game_state(TwoPlayerSample.round_state) game_state = attach_hole_card_from_deck(game_state, "tojrbxmkuzrarnniosuhct") game_state = attach_hole_card_from_deck(game_state, "pwtwlmfciymjdoljkhagxa") self.emu.set_game_rule(2, 3, 5, 0) p1, p2 = FoldMan(), FoldMan() self.emu.register_player("tojrbxmkuzrarnniosuhct", FoldMan()) self.emu.register_player("pwtwlmfciymjdoljkhagxa", FoldMan()) game_state, events = self.emu.apply_action(game_state, "fold") self.eq("event_game_finish", events[-1]["type"]) def test_apply_action_start_next_round(self): game_state = restore_game_state(TwoPlayerSample.round_state) game_state = attach_hole_card_from_deck(game_state, "tojrbxmkuzrarnniosuhct") game_state = attach_hole_card_from_deck(game_state, "pwtwlmfciymjdoljkhagxa") self.emu.set_game_rule(2, 4, 5, 0) p1, p2 = FoldMan(), FoldMan() self.emu.register_player("tojrbxmkuzrarnniosuhct", FoldMan()) self.emu.register_player("pwtwlmfciymjdoljkhagxa", FoldMan()) game_state, events = self.emu.apply_action(game_state, "fold") self.eq(120, game_state["table"].seats.players[0].stack) self.eq(80, game_state["table"].seats.players[1].stack) game_state, events = self.emu.apply_action(game_state, "raise", 20) self.eq("event_ask_player", events[-1]["type"]) self.eq(100, game_state["table"].seats.players[0].stack) self.eq(70, game_state["table"].seats.players[1].stack) @raises(Exception) def test_apply_action_when_game_finished(self): game_state = restore_game_state(TwoPlayerSample.round_state) game_state = attach_hole_card_from_deck(game_state, "tojrbxmkuzrarnniosuhct") game_state = attach_hole_card_from_deck(game_state, "pwtwlmfciymjdoljkhagxa") self.emu.set_game_rule(2, 3, 5, 0) p1, p2 = FoldMan(), FoldMan() self.emu.register_player("tojrbxmkuzrarnniosuhct", FoldMan()) self.emu.register_player("pwtwlmfciymjdoljkhagxa", FoldMan()) game_state, events = self.emu.apply_action(game_state, "fold") self.emu.apply_action(game_state, "fold") def test_run_until_round_finish(self): game_state = restore_game_state(TwoPlayerSample.round_state) game_state = attach_hole_card_from_deck(game_state, "tojrbxmkuzrarnniosuhct") game_state = attach_hole_card_from_deck(game_state, "pwtwlmfciymjdoljkhagxa") self.emu.set_game_rule(2, 10, 5, 0) p1 = TestPlayer([("fold", 0)]) p2 = TestPlayer([("call", 15)]) self.emu.register_player("tojrbxmkuzrarnniosuhct", p1) self.emu.register_player("pwtwlmfciymjdoljkhagxa", p2) game_state, events = self.emu.run_until_round_finish(game_state) self.eq("event_new_street", events[0]["type"]) self.eq("event_ask_player", events[1]["type"]) self.eq("event_round_finish", events[2]["type"]) def test_run_until_round_finish_when_already_finished(self): game_state = restore_game_state(TwoPlayerSample.round_state) game_state = attach_hole_card_from_deck(game_state, "tojrbxmkuzrarnniosuhct") game_state = attach_hole_card_from_deck(game_state, "pwtwlmfciymjdoljkhagxa") self.emu.set_game_rule(2, 10, 5, 0) p1 = TestPlayer([("fold", 0)]) p2 = TestPlayer([("call", 15)]) self.emu.register_player("tojrbxmkuzrarnniosuhct", p1) self.emu.register_player("pwtwlmfciymjdoljkhagxa", p2) game_state, events = self.emu.run_until_round_finish(game_state) game_state, events = self.emu.run_until_round_finish(game_state) self.eq(0, len(events)) def test_run_until_round_finish_game_finish_detect(self): uuids = ["tojrbxmkuzrarnniosuhct", "pwtwlmfciymjdoljkhagxa"] holecards = [[Card.from_str(s) for s in ss] for ss in [["CA", "D2"], ["C8", "H5"]]] game_state = restore_game_state(TwoPlayerSample.round_state) game_state = reduce(lambda a,e: attach_hole_card(a, e[0], e[1]), zip(uuids, holecards), game_state) game_state = attach_hole_card_from_deck(game_state, "pwtwlmfciymjdoljkhagxa") self.emu.set_game_rule(2, 10, 5, 0) p1 = TestPlayer([("raise", 65)]) p2 = TestPlayer([("call", 15), ("call", 65)]) self.emu.register_player("tojrbxmkuzrarnniosuhct", p1) self.emu.register_player("pwtwlmfciymjdoljkhagxa", p2) game_state["table"].deck.deck.append(Card.from_str("C7")) game_state, events = self.emu.run_until_round_finish(game_state) self.eq("event_new_street", events[0]["type"]) self.eq("event_ask_player", events[1]["type"]) self.eq("event_ask_player", events[2]["type"]) self.eq("event_round_finish", events[3]["type"]) self.eq("event_game_finish", events[4]["type"]) self.eq(0, events[4]["players"][0]["stack"]) self.eq(200, events[4]["players"][1]["stack"]) def test_run_until_game_finish(self): game_state = restore_game_state(TwoPlayerSample.round_state) game_state = attach_hole_card_from_deck(game_state, "tojrbxmkuzrarnniosuhct") game_state = attach_hole_card_from_deck(game_state, "pwtwlmfciymjdoljkhagxa") self.emu.set_game_rule(2, 10, 5, 1) self.emu.register_player("tojrbxmkuzrarnniosuhct", FoldMan()) self.emu.register_player("pwtwlmfciymjdoljkhagxa", FoldMan()) game_state, events = self.emu.run_until_game_finish(game_state) self.eq("event_game_finish", events[-1]["type"]) self.eq(114, game_state["table"].seats.players[0].stack) self.eq(86, game_state["table"].seats.players[1].stack) def test_run_until_game_finish_when_one_player_is_left(self): uuids = ["ruypwwoqwuwdnauiwpefsw", "sqmfwdkpcoagzqxpxnmxwm", "uxrdiwvctvilasinweqven"] holecards = [[Card.from_str(s) for s in ss] for ss in [["C2","C3"],["HA","CA"],["D5","H6"]]] game_state = restore_game_state(ThreePlayerGameStateSample.round_state) game_state = reduce(lambda state, item: attach_hole_card(state, item[0], item[1]), zip(uuids, holecards), game_state) sb_amount, ante = 5, 7 self.emu.set_game_rule(3, 10, sb_amount, ante) p1_acts = [("fold",0), ("call", 10), ('call', 0), ('call', 10), ("fold",0)] p2_acts = [] p3_acts = [("raise", 10)] players = [TestPlayer(acts) for acts in [p1_acts, p2_acts, p3_acts]] [self.emu.register_player(uuid, player) for uuid, player in zip(uuids, players)] game_state["table"].deck.deck.append(Card.from_str("C7")) game_state, events = self.emu.run_until_game_finish(game_state) self.eq("event_game_finish", events[-1]["type"]) self.eq(0, game_state["table"].seats.players[0].stack) self.eq(0, game_state["table"].seats.players[1].stack) self.eq(292, game_state["table"].seats.players[2].stack) def test_run_until_game_finish_when_final_round(self): uuids = ["ruypwwoqwuwdnauiwpefsw", "sqmfwdkpcoagzqxpxnmxwm", "uxrdiwvctvilasinweqven"] holecards = [[Card.from_str(s) for s in ss] for ss in [["C2","C3"],["HA","CA"],["D5","H6"]]] game_state = restore_game_state(ThreePlayerGameStateSample.round_state) game_state = reduce(lambda state, item: attach_hole_card(state, item[0], item[1]), zip(uuids, holecards), game_state) sb_amount, ante = 5, 7 self.emu.set_game_rule(3, 10, sb_amount, ante) [self.emu.register_player(uuid, FoldMan()) for uuid in uuids] game_state["table"].deck.deck.append(Card.from_str("C7")) game_state, events = self.emu.run_until_game_finish(game_state) self.eq("event_game_finish", events[-1]["type"]) self.eq(10, game_state["round_count"]) self.eq(35, game_state["table"].seats.players[0].stack) self.eq(0, game_state["table"].seats.players[1].stack) self.eq(265, game_state["table"].seats.players[2].stack) def test_last_round_judge(self): game_state = restore_game_state(TwoPlayerSample.round_state) self.emu.set_game_rule(2, 3, 5, 0) self.false(self.emu._is_last_round(game_state, self.emu.game_rule)) game_state["street"] = Const.Street.FINISHED self.true(self.emu._is_last_round(game_state, self.emu.game_rule)) game_state["round_count"] = 2 self.false(self.emu._is_last_round(game_state, self.emu.game_rule)) game_state["table"].seats.players[0].stack = 0 self.true(self.emu._is_last_round(game_state, self.emu.game_rule)) def test_start_new_round(self): game_state = restore_game_state(TwoPlayerSample.round_state) game_state = attach_hole_card_from_deck(game_state, "tojrbxmkuzrarnniosuhct") game_state = attach_hole_card_from_deck(game_state, "pwtwlmfciymjdoljkhagxa") p1, p2 = FoldMan(), FoldMan() self.emu.set_game_rule(2, 10, 5, 0) self.emu.register_player("tojrbxmkuzrarnniosuhct", FoldMan()) self.emu.register_player("pwtwlmfciymjdoljkhagxa", FoldMan()) # run until round finish game_state, event = self.emu.apply_action(game_state, "call", 15) game_state, event = self.emu.apply_action(game_state, "call", 0) game_state, event = self.emu.apply_action(game_state, "call", 0) game_state, events = self.emu.start_new_round(game_state) self.eq(4, game_state["round_count"]) self.eq(1, game_state["table"].dealer_btn) self.eq(0, game_state["street"]) self.eq(0, game_state["next_player"]) self.eq("event_new_street", events[0]["type"]) self.eq("event_ask_player", events[1]["type"]) self.eq("preflop", events[0]["street"]) self.eq("tojrbxmkuzrarnniosuhct", events[1]["uuid"]) def test_start_new_round_exclude_no_money_players(self): uuids = ["ruypwwoqwuwdnauiwpefsw", "sqmfwdkpcoagzqxpxnmxwm", "uxrdiwvctvilasinweqven"] game_state = restore_game_state(ThreePlayerGameStateSample.round_state) original = reduce(lambda state, uuid: attach_hole_card_from_deck(state, uuid), uuids, game_state) sb_amount, ante = 5, 7 self.emu.set_game_rule(3, 10, sb_amount, ante) [self.emu.register_player(uuid, FoldMan()) for uuid in uuids] # case1: second player cannot pay small blind finish_state, events = self.emu.apply_action(original, "fold") finish_state["table"].seats.players[0].stack = 11 stacks = [p.stack for p in finish_state["table"].seats.players] game_state, events = self.emu.start_new_round(finish_state) self.eq(2, game_state["table"].dealer_btn) self.eq(1, game_state["next_player"]) self.eq(stacks[1]-sb_amount-ante, game_state["table"].seats.players[1].stack) self.eq(stacks[2]-sb_amount*2-ante, game_state["table"].seats.players[2].stack) self.eq(PayInfo.FOLDED, game_state["table"].seats.players[0].pay_info.status) self.eq(sb_amount*3 + ante*2, GameEvaluator.create_pot(game_state["table"].seats.players)[0]["amount"]) # case2: third player cannot pay big blind finish_state, events = self.emu.apply_action(original, "fold") finish_state["table"].seats.players[1].stack = 16 stacks = [p.stack for p in finish_state["table"].seats.players] game_state, events = self.emu.start_new_round(finish_state) self.eq(2, game_state["table"].dealer_btn) self.eq(0, game_state["next_player"]) self.eq(stacks[0]-sb_amount-ante, game_state["table"].seats.players[0].stack) self.eq(stacks[2]-sb_amount*2-ante, game_state["table"].seats.players[2].stack) self.eq(PayInfo.FOLDED, game_state["table"].seats.players[1].pay_info.status) self.eq(PayInfo.PAY_TILL_END, game_state["table"].seats.players[0].pay_info.status) self.eq(sb_amount*3 + ante*2, GameEvaluator.create_pot(game_state["table"].seats.players)[0]["amount"]) def test_start_new_round_exclude_no_money_players2(self): uuids = ["ruypwwoqwuwdnauiwpefsw", "sqmfwdkpcoagzqxpxnmxwm", "uxrdiwvctvilasinweqven"] game_state = restore_game_state(ThreePlayerGameStateSample.round_state) original = reduce(lambda state, uuid: attach_hole_card_from_deck(state, uuid), uuids, game_state) sb_amount, ante = 5, 7 self.emu.set_game_rule(3, 10, sb_amount, ante) [self.emu.register_player(uuid, FoldMan()) for uuid in uuids] # case1: second player cannot pay small blind finish_state, events = self.emu.apply_action(original, "fold") finish_state["table"].seats.players[2].stack = 6 stacks = [p.stack for p in finish_state["table"].seats.players] game_state, events = self.emu.start_new_round(finish_state) self.eq(0, game_state["table"].dealer_btn) self.eq(1, game_state["table"].sb_pos()) self.eq(1, game_state["next_player"]) def test_start_new_round_game_finish_judge(self): uuids = ["ruypwwoqwuwdnauiwpefsw", "sqmfwdkpcoagzqxpxnmxwm", "uxrdiwvctvilasinweqven"] game_state = restore_game_state(ThreePlayerGameStateSample.round_state) original = reduce(lambda state, uuid: attach_hole_card_from_deck(state, uuid), uuids, game_state) sb_amount, ante = 5, 7 self.emu.set_game_rule(3, 10, sb_amount, ante) [self.emu.register_player(uuid, FoldMan()) for uuid in uuids] finish_state, events = self.emu.apply_action(original, "fold") finish_state["table"].seats.players[2].stack = 11 finish_state["table"].seats.players[1].stack = 16 game_state, events = self.emu.start_new_round(finish_state) self.eq(1, len(events)) self.eq("event_game_finish", events[0]["type"]) def test_generate_initial_game_state(self): self.emu.set_game_rule(2, 8, 5, 3) p1, p2 = FoldMan(), FoldMan() players_info = OrderedDict() players_info["uuid-1"] = { "name": "hoge", "stack": 100 } players_info["uuid-2"] = { "name": "fuga", "stack": 100 } state = self.emu.generate_initial_game_state(players_info) table = state["table"] self.eq(0, state["round_count"]) self.eq(5, state["small_blind_amount"]) self.eq(100, table.seats.players[0].stack) self.eq("uuid-1", table.seats.players[0].uuid) self.eq(100, table.seats.players[1].stack) self.eq("uuid-2", table.seats.players[1].uuid) self.eq(1, table.dealer_btn) state, events = self.emu.start_new_round(state) self.eq(0, state["table"].dealer_btn) self.eq(1, state["table"].sb_pos()) self.eq(0, state["table"].bb_pos()) self.eq(1, state["next_player"]) state, events = self.emu.apply_action(state, "call", 10) self.eq(1, state["next_player"]) def test_generate_possible_actions(self): state1 = restore_game_state(TwoPlayerSample.round_state) self.eq(TwoPlayerSample.valid_actions, self.emu.generate_possible_actions(state1)) state2 = restore_game_state(ThreePlayerGameStateSample.round_state) self.eq(ThreePlayerGameStateSample.valid_actions, self.emu.generate_possible_actions(state2))
class Game: def __init__(self, hole_card, player, state, num_rounds, valid_actions, round_state, weights): self.hole_card = hole_card self.player = player self.init_state = state self.emulator = Emulator() self.num_rounds = num_rounds self.valid_actions = valid_actions self.weights = weights self.round_state = round_state self.emulator.set_game_rule(2, self.num_rounds, 10, 0) def terminal_test(self, state): """ Check if game tree ends """ return self.emulator._is_last_round(state, self.emulator.game_rule) def actions(self, state): """ generate legal moves at this state """ temp = list( map(lambda x: x['action'], self.emulator.generate_possible_actions(state))) return temp def eval_heuristics(self, player, state, win_rate): if isDebug: print("Evaluating heuristics") if isHeuristicTimed: start = time.time() print(time.time()) amount_in_pot = float(self.round_state['pot']['main']['amount']) / 8.8 EHS = (EffectiveHandStrength( self.hole_card, state['table']._community_card) + 10) / 20 * 100 if isDebug: print("=======Got heuristics") if isHeuristicTimed: end = start = time.time() print("==========Got heuristics in time: " + str(end - start) + " secs") heuristics = [win_rate, amount_in_pot, EHS] res = np.dot(self.weights, heuristics) return res def future_move(self, state): return state['next_player'] def project(self, curr_state, move): """ projects what happens when making a move from current state """ return self.emulator.apply_action(curr_state, move)[0] def minimax(self, newState, max_depth, hole_cards, community_cards): """ MiniMax decision strategy """ player = self.future_move(newState) inf = float('inf') if (community_cards == []): # Preflop win_rate = eval_pre_flop.eval_pre_flop(hole_cards) / 100 * 50 + 50 else: win_rate = eval_post_flop.eval_post_flop_rank( hole_cards, community_cards) / 100 * 50 + 50 def min_value(newState, alpha, beta, depth): """ determines what the strategy of the Min palyer should be. It is limited by max depth""" if isDebug: print("In MIN") if depth == max_depth or self.terminal_test(newState): return self.eval_heuristics(player, newState, win_rate) v = inf for a in self.actions(newState): v = min( max_value(self.project(newState, a), alpha, beta, depth + 1), v) if v <= alpha: return v beta = min(beta, v) return v def max_value(newState, alpha, beta, depth): """ determines what the strategy of the Max palyer should be. It is limited by max depth""" if isDebug: print("In MAX") if depth == max_depth or self.terminal_test(newState): return self.eval_heuristics(player, newState, win_rate) v = -inf for a in self.actions(newState): v = max( min_value(self.project(newState, a), alpha, beta, depth + 1), v) if (v >= beta): return v alpha = max(alpha, v) return v # alpha-beta pruning code # considers the next best action to take by starting off the Minimax recursion # and pruning out the nodes that no longer need to be considered best_score = -inf beta = inf best_action = None for a in self.actions(newState): v = min_value(self.project(newState, a), best_score, beta, 0) if v > best_score: best_score = v best_action = a # TODO: Since our player folds a lot. Please update/remove as required if best_action == 'fold': # print('fold') return 'call' else: # print(best_action) return best_action return best_action
class EmulatorTest(BaseUnitTest): def setUp(self): self.emu = Emulator() def test_set_game_rule(self): self.emu.set_game_rule(2, 8, 5, 3) self.eq(2, self.emu.game_rule['player_num']) self.eq(8, self.emu.game_rule['max_round']) self.eq(5, self.emu.game_rule['sb_amount']) self.eq(3, self.emu.game_rule['ante']) def test_register_and_fetch_player(self): p1, p2 = FoldMan(), FoldMan() self.emu.register_player('uuid-1', p1) self.emu.register_player('uuid-2', p2) self.eq(p1, self.emu.fetch_player('uuid-1')) self.eq(p2, self.emu.fetch_player('uuid-2')) @raises(TypeError) def test_register_invalid_player(self): self.emu.register_player('uuid', 'hoge') def test_blind_structure(self): game_state = restore_game_state(TwoPlayerSample.round_state) game_state = attach_hole_card_from_deck(game_state, 'tojrbxmkuzrarnniosuhct') game_state = attach_hole_card_from_deck(game_state, 'pwtwlmfciymjdoljkhagxa') self.emu.set_game_rule(2, 10, 5, 0) self.emu.set_blind_structure({5: {'ante': 5, 'small_blind': 60}}) p1 = TestPlayer([('fold', 0), ('raise', 55), ('call', 0)]) p2 = TestPlayer([('call', 15), ('call', 55), ('fold', 0)]) self.emu.register_player('tojrbxmkuzrarnniosuhct', p1) self.emu.register_player('pwtwlmfciymjdoljkhagxa', p2) game_state, events = self.emu.run_until_round_finish(game_state) self.eq(65, game_state['table'].seats.players[0].stack) self.eq(135, game_state['table'].seats.players[1].stack) game_state, events = self.emu.start_new_round(game_state) game_state, events = self.emu.run_until_round_finish(game_state) self.eq(120, game_state['table'].seats.players[0].stack) self.eq(80, game_state['table'].seats.players[1].stack) game_state, events = self.emu.start_new_round(game_state) self.eq('event_game_finish', events[0]['type']) self.eq(0, game_state['table'].seats.players[0].stack) self.eq(80, game_state['table'].seats.players[1].stack) def test_blind_structure_update(self): self.emu.set_game_rule(2, 8, 5, 3) p1, p2 = FoldMan(), FoldMan() self.emu.register_player('uuid-1', p1) self.emu.register_player('uuid-2', p2) blind_structure = { 3: { 'ante': 5, 'small_blind': 10 }, 5: { 'ante': 10, 'small_blind': 20 } } self.emu.set_blind_structure(blind_structure) players_info = { 'uuid-1': { 'name': 'hoge', 'stack': 100 }, 'uuid-2': { 'name': 'fuga', 'stack': 100 } } state = self.emu.generate_initial_game_state(players_info) self.eq(5, state['small_blind_amount']) state, _ = self.emu.start_new_round(state) state, _ = self.emu.apply_action(state, 'fold') self.eq(5, state['small_blind_amount']) state, _ = self.emu.apply_action(state, 'fold') self.eq(5, state['small_blind_amount']) state, _ = self.emu.apply_action(state, 'fold') self.eq(10, state['small_blind_amount']) state, _ = self.emu.apply_action(state, 'fold') self.eq(10, state['small_blind_amount']) state, _ = self.emu.apply_action(state, 'fold') self.eq(20, state['small_blind_amount']) state, _ = self.emu.apply_action(state, 'fold') self.eq(20, state['small_blind_amount']) def test_apply_action(self): game_state = restore_game_state(TwoPlayerSample.round_state) game_state = attach_hole_card_from_deck(game_state, 'tojrbxmkuzrarnniosuhct') game_state = attach_hole_card_from_deck(game_state, 'pwtwlmfciymjdoljkhagxa') self.emu.set_game_rule(2, 10, 5, 0) p1, p2 = FoldMan(), FoldMan() self.emu.register_player('tojrbxmkuzrarnniosuhct', FoldMan()) self.emu.register_player('pwtwlmfciymjdoljkhagxa', FoldMan()) game_state, events = self.emu.apply_action(game_state, 'call', 15) self.eq(Const.Street.RIVER, game_state['street']) self.eq( TwoPlayerSample.p1_action_histories, game_state['table'].seats.players[0].round_action_histories[ Const.Street.TURN]) self.eq(2, len(events)) self.eq('event_new_street', events[0]['type']) self.eq('event_ask_player', events[1]['type']) game_state, events = self.emu.apply_action(game_state, 'call', 0) self.eq(1, len(events)) self.eq('event_ask_player', events[0]['type']) game_state, events = self.emu.apply_action(game_state, 'call', 0) self.eq(1, len(events)) self.eq('event_round_finish', events[0]['type']) def test_apply_action_game_finish_detect(self): game_state = restore_game_state(TwoPlayerSample.round_state) game_state = attach_hole_card_from_deck(game_state, 'tojrbxmkuzrarnniosuhct') game_state = attach_hole_card_from_deck(game_state, 'pwtwlmfciymjdoljkhagxa') self.emu.set_game_rule(2, 3, 5, 0) p1, p2 = FoldMan(), FoldMan() self.emu.register_player('tojrbxmkuzrarnniosuhct', FoldMan()) self.emu.register_player('pwtwlmfciymjdoljkhagxa', FoldMan()) game_state, events = self.emu.apply_action(game_state, 'fold') self.eq('event_game_finish', events[-1]['type']) def test_apply_action_start_next_round(self): game_state = restore_game_state(TwoPlayerSample.round_state) game_state = attach_hole_card_from_deck(game_state, 'tojrbxmkuzrarnniosuhct') game_state = attach_hole_card_from_deck(game_state, 'pwtwlmfciymjdoljkhagxa') self.emu.set_game_rule(2, 4, 5, 0) p1, p2 = FoldMan(), FoldMan() self.emu.register_player('tojrbxmkuzrarnniosuhct', FoldMan()) self.emu.register_player('pwtwlmfciymjdoljkhagxa', FoldMan()) game_state, events = self.emu.apply_action(game_state, 'fold') self.eq(120, game_state['table'].seats.players[0].stack) self.eq(80, game_state['table'].seats.players[1].stack) game_state, events = self.emu.apply_action(game_state, 'raise', 20) self.eq('event_ask_player', events[-1]['type']) self.eq(100, game_state['table'].seats.players[0].stack) self.eq(70, game_state['table'].seats.players[1].stack) @raises(Exception) def test_apply_action_when_game_finished(self): game_state = restore_game_state(TwoPlayerSample.round_state) game_state = attach_hole_card_from_deck(game_state, 'tojrbxmkuzrarnniosuhct') game_state = attach_hole_card_from_deck(game_state, 'pwtwlmfciymjdoljkhagxa') self.emu.set_game_rule(2, 3, 5, 0) p1, p2 = FoldMan(), FoldMan() self.emu.register_player('tojrbxmkuzrarnniosuhct', FoldMan()) self.emu.register_player('pwtwlmfciymjdoljkhagxa', FoldMan()) game_state, events = self.emu.apply_action(game_state, 'fold') self.emu.apply_action(game_state, 'fold') def test_run_until_round_finish(self): game_state = restore_game_state(TwoPlayerSample.round_state) game_state = attach_hole_card_from_deck(game_state, 'tojrbxmkuzrarnniosuhct') game_state = attach_hole_card_from_deck(game_state, 'pwtwlmfciymjdoljkhagxa') self.emu.set_game_rule(2, 10, 5, 0) p1 = TestPlayer([('fold', 0)]) p2 = TestPlayer([('call', 15)]) self.emu.register_player('tojrbxmkuzrarnniosuhct', p1) self.emu.register_player('pwtwlmfciymjdoljkhagxa', p2) game_state, events = self.emu.run_until_round_finish(game_state) self.eq('event_new_street', events[0]['type']) self.eq('event_ask_player', events[1]['type']) self.eq('event_round_finish', events[2]['type']) def test_run_until_round_finish_when_already_finished(self): game_state = restore_game_state(TwoPlayerSample.round_state) game_state = attach_hole_card_from_deck(game_state, 'tojrbxmkuzrarnniosuhct') game_state = attach_hole_card_from_deck(game_state, 'pwtwlmfciymjdoljkhagxa') self.emu.set_game_rule(2, 10, 5, 0) p1 = TestPlayer([('fold', 0)]) p2 = TestPlayer([('call', 15)]) self.emu.register_player('tojrbxmkuzrarnniosuhct', p1) self.emu.register_player('pwtwlmfciymjdoljkhagxa', p2) game_state, events = self.emu.run_until_round_finish(game_state) game_state, events = self.emu.run_until_round_finish(game_state) self.eq(0, len(events)) def test_run_until_round_finish_game_finish_detect(self): uuids = ['tojrbxmkuzrarnniosuhct', 'pwtwlmfciymjdoljkhagxa'] holecards = [[Card.from_str(s) for s in ss] for ss in [['Ac', '2d'], ['8c', '5h']]] game_state = restore_game_state(TwoPlayerSample.round_state) game_state = reduce(lambda a, e: attach_hole_card(a, e[0], e[1]), zip(uuids, holecards), game_state) game_state = attach_hole_card_from_deck(game_state, 'pwtwlmfciymjdoljkhagxa') self.emu.set_game_rule(2, 10, 5, 0) p1 = TestPlayer([('raise', 65)]) p2 = TestPlayer([('call', 15), ('call', 65)]) self.emu.register_player('tojrbxmkuzrarnniosuhct', p1) self.emu.register_player('pwtwlmfciymjdoljkhagxa', p2) game_state['table'].deck.deck.append(Card.from_str('7c')) game_state, events = self.emu.run_until_round_finish(game_state) self.eq('event_new_street', events[0]['type']) self.eq('event_ask_player', events[1]['type']) self.eq('event_ask_player', events[2]['type']) self.eq('event_round_finish', events[3]['type']) self.eq('event_game_finish', events[4]['type']) self.eq(0, events[4]['players'][0]['stack']) self.eq(200, events[4]['players'][1]['stack']) def test_run_until_game_finish(self): game_state = restore_game_state(TwoPlayerSample.round_state) game_state = attach_hole_card_from_deck(game_state, 'tojrbxmkuzrarnniosuhct') game_state = attach_hole_card_from_deck(game_state, 'pwtwlmfciymjdoljkhagxa') self.emu.set_game_rule(2, 10, 5, 1) self.emu.register_player('tojrbxmkuzrarnniosuhct', FoldMan()) self.emu.register_player('pwtwlmfciymjdoljkhagxa', FoldMan()) game_state, events = self.emu.run_until_game_finish(game_state) self.eq('event_game_finish', events[-1]['type']) self.eq(114, game_state['table'].seats.players[0].stack) self.eq(86, game_state['table'].seats.players[1].stack) def test_run_until_game_finish_when_one_player_is_left(self): uuids = [ 'ruypwwoqwuwdnauiwpefsw', 'sqmfwdkpcoagzqxpxnmxwm', 'uxrdiwvctvilasinweqven' ] holecards = [[Card.from_str(s) for s in ss] for ss in [['2c', '3c'], ['Ah', 'Ac'], ['5d', '6d']]] game_state = restore_game_state(ThreePlayerGameStateSample.round_state) game_state = reduce( lambda state, item: attach_hole_card(state, item[0], item[1]), zip(uuids, holecards), game_state) sb_amount, ante = 5, 7 self.emu.set_game_rule(3, 10, sb_amount, ante) p1_acts = [('fold', 0), ('call', 10), ('call', 0), ('call', 10), ('fold', 0)] p2_acts = [] p3_acts = [('raise', 10)] players = [TestPlayer(acts) for acts in [p1_acts, p2_acts, p3_acts]] [ self.emu.register_player(uuid, player) for uuid, player in zip(uuids, players) ] game_state['table'].deck.deck.append(Card.from_str('7c')) game_state, events = self.emu.run_until_game_finish(game_state) self.eq('event_game_finish', events[-1]['type']) self.eq(0, game_state['table'].seats.players[0].stack) self.eq(0, game_state['table'].seats.players[1].stack) self.eq(292, game_state['table'].seats.players[2].stack) def test_run_until_game_finish_when_final_round(self): uuids = [ 'ruypwwoqwuwdnauiwpefsw', 'sqmfwdkpcoagzqxpxnmxwm', 'uxrdiwvctvilasinweqven' ] holecards = [[Card.from_str(s) for s in ss] for ss in [['2c', '3c'], ['Ah', 'Ac'], ['5d', '6d']]] game_state = restore_game_state(ThreePlayerGameStateSample.round_state) game_state = reduce( lambda state, item: attach_hole_card(state, item[0], item[1]), zip(uuids, holecards), game_state) sb_amount, ante = 5, 7 self.emu.set_game_rule(3, 10, sb_amount, ante) [self.emu.register_player(uuid, FoldMan()) for uuid in uuids] game_state['table'].deck.deck.append(Card.from_str('7c')) game_state, events = self.emu.run_until_game_finish(game_state) self.eq('event_game_finish', events[-1]['type']) self.eq(10, game_state['round_count']) self.eq(35, game_state['table'].seats.players[0].stack) self.eq(0, game_state['table'].seats.players[1].stack) self.eq(265, game_state['table'].seats.players[2].stack) def test_last_round_judge(self): game_state = restore_game_state(TwoPlayerSample.round_state) self.emu.set_game_rule(2, 3, 5, 0) self.false(self.emu._is_last_round(game_state, self.emu.game_rule)) game_state['street'] = Const.Street.FINISHED self.true(self.emu._is_last_round(game_state, self.emu.game_rule)) game_state['round_count'] = 2 self.false(self.emu._is_last_round(game_state, self.emu.game_rule)) game_state['table'].seats.players[0].stack = 0 self.true(self.emu._is_last_round(game_state, self.emu.game_rule)) def test_start_new_round(self): game_state = restore_game_state(TwoPlayerSample.round_state) game_state = attach_hole_card_from_deck(game_state, 'tojrbxmkuzrarnniosuhct') game_state = attach_hole_card_from_deck(game_state, 'pwtwlmfciymjdoljkhagxa') p1, p2 = FoldMan(), FoldMan() self.emu.set_game_rule(2, 10, 5, 0) self.emu.register_player('tojrbxmkuzrarnniosuhct', FoldMan()) self.emu.register_player('pwtwlmfciymjdoljkhagxa', FoldMan()) # run until round finish game_state, event = self.emu.apply_action(game_state, 'call', 15) game_state, event = self.emu.apply_action(game_state, 'call', 0) game_state, event = self.emu.apply_action(game_state, 'call', 0) game_state, events = self.emu.start_new_round(game_state) self.eq(4, game_state['round_count']) self.eq(1, game_state['table'].dealer_btn) self.eq(0, game_state['street']) self.eq(0, game_state['next_player']) self.eq('event_new_street', events[0]['type']) self.eq('event_ask_player', events[1]['type']) self.eq('preflop', events[0]['street']) self.eq('tojrbxmkuzrarnniosuhct', events[1]['uuid']) def test_start_new_round_exclude_no_money_players(self): uuids = [ 'ruypwwoqwuwdnauiwpefsw', 'sqmfwdkpcoagzqxpxnmxwm', 'uxrdiwvctvilasinweqven' ] game_state = restore_game_state(ThreePlayerGameStateSample.round_state) original = reduce( lambda state, uuid: attach_hole_card_from_deck(state, uuid), uuids, game_state) sb_amount, ante = 5, 7 self.emu.set_game_rule(3, 10, sb_amount, ante) [self.emu.register_player(uuid, FoldMan()) for uuid in uuids] # case1: second player cannot pay small blind finish_state, events = self.emu.apply_action(original, 'fold') finish_state['table'].seats.players[0].stack = 11 stacks = [p.stack for p in finish_state['table'].seats.players] game_state, events = self.emu.start_new_round(finish_state) self.eq(2, game_state['table'].dealer_btn) self.eq(1, game_state['next_player']) self.eq(stacks[1] - sb_amount - ante, game_state['table'].seats.players[1].stack) self.eq(stacks[2] - sb_amount * 2 - ante, game_state['table'].seats.players[2].stack) self.eq(PayInfo.FOLDED, game_state['table'].seats.players[0].pay_info.status) self.eq( sb_amount * 3 + ante * 2, GameEvaluator.create_pot( game_state['table'].seats.players)[0]['amount']) # case2: third player cannot pay big blind finish_state, events = self.emu.apply_action(original, 'fold') finish_state['table'].seats.players[1].stack = 16 stacks = [p.stack for p in finish_state['table'].seats.players] game_state, events = self.emu.start_new_round(finish_state) self.eq(2, game_state['table'].dealer_btn) self.eq(0, game_state['next_player']) self.eq(stacks[0] - sb_amount - ante, game_state['table'].seats.players[0].stack) self.eq(stacks[2] - sb_amount * 2 - ante, game_state['table'].seats.players[2].stack) self.eq(PayInfo.FOLDED, game_state['table'].seats.players[1].pay_info.status) self.eq(PayInfo.PLAY_TILL_END, game_state['table'].seats.players[0].pay_info.status) self.eq( sb_amount * 3 + ante * 2, GameEvaluator.create_pot( game_state['table'].seats.players)[0]['amount']) def test_start_new_round_exclude_no_money_players2(self): uuids = [ 'ruypwwoqwuwdnauiwpefsw', 'sqmfwdkpcoagzqxpxnmxwm', 'uxrdiwvctvilasinweqven' ] game_state = restore_game_state(ThreePlayerGameStateSample.round_state) original = reduce( lambda state, uuid: attach_hole_card_from_deck(state, uuid), uuids, game_state) sb_amount, ante = 5, 7 self.emu.set_game_rule(3, 10, sb_amount, ante) [self.emu.register_player(uuid, FoldMan()) for uuid in uuids] # case1: second player cannot pay small blind finish_state, events = self.emu.apply_action(original, 'fold') finish_state['table'].seats.players[2].stack = 6 stacks = [p.stack for p in finish_state['table'].seats.players] game_state, events = self.emu.start_new_round(finish_state) self.eq(0, game_state['table'].dealer_btn) self.eq(1, game_state['table'].sb_pos()) self.eq(1, game_state['next_player']) def test_start_new_round_game_finish_judge(self): uuids = [ 'ruypwwoqwuwdnauiwpefsw', 'sqmfwdkpcoagzqxpxnmxwm', 'uxrdiwvctvilasinweqven' ] game_state = restore_game_state(ThreePlayerGameStateSample.round_state) original = reduce( lambda state, uuid: attach_hole_card_from_deck(state, uuid), uuids, game_state) sb_amount, ante = 5, 7 self.emu.set_game_rule(3, 10, sb_amount, ante) [self.emu.register_player(uuid, FoldMan()) for uuid in uuids] finish_state, events = self.emu.apply_action(original, 'fold') finish_state['table'].seats.players[2].stack = 11 finish_state['table'].seats.players[1].stack = 16 game_state, events = self.emu.start_new_round(finish_state) self.eq(1, len(events)) self.eq('event_game_finish', events[0]['type']) def test_generate_initial_game_state(self): self.emu.set_game_rule(2, 8, 5, 3) p1, p2 = FoldMan(), FoldMan() players_info = OrderedDict() players_info['uuid-1'] = {'name': 'hoge', 'stack': 100} players_info['uuid-2'] = {'name': 'fuga', 'stack': 100} state = self.emu.generate_initial_game_state(players_info) table = state['table'] self.eq(0, state['round_count']) self.eq(5, state['small_blind_amount']) self.eq(100, table.seats.players[0].stack) self.eq('uuid-1', table.seats.players[0].uuid) self.eq(100, table.seats.players[1].stack) self.eq('uuid-2', table.seats.players[1].uuid) self.eq(1, table.dealer_btn) state, events = self.emu.start_new_round(state) self.eq(0, state['table'].dealer_btn) self.eq(1, state['table'].sb_pos()) self.eq(0, state['table'].bb_pos()) self.eq(1, state['next_player']) state, events = self.emu.apply_action(state, 'call', 10) self.eq(1, state['next_player']) def test_generate_possible_actions(self): state1 = restore_game_state(TwoPlayerSample.round_state) self.eq(TwoPlayerSample.valid_actions, self.emu.generate_possible_actions(state1)) state2 = restore_game_state(ThreePlayerGameStateSample.round_state) self.eq(ThreePlayerGameStateSample.valid_actions, self.emu.generate_possible_actions(state2))
class EmulatorTest(BaseUnitTest): def setUp(self): self.emu = Emulator() def test_set_game_rule(self): self.emu.set_game_rule(2, 8, 5, 3) self.eq(2, self.emu.game_rule["player_num"]) self.eq(8, self.emu.game_rule["max_round"]) self.eq(5, self.emu.game_rule["sb_amount"]) self.eq(3, self.emu.game_rule["ante"]) def test_register_and_fetch_player(self): p1, p2 = FoldMan(), FoldMan() self.emu.register_player("uuid-1", p1) self.emu.register_player("uuid-2", p2) self.eq(p1, self.emu.fetch_player("uuid-1")) self.eq(p2, self.emu.fetch_player("uuid-2")) @raises(TypeError) def test_register_invalid_player(self): self.emu.register_player("uuid", "hoge") def test_blind_structure(self): game_state = restore_game_state(TwoPlayerSample.round_state) game_state = attach_hole_card_from_deck(game_state, "tojrbxmkuzrarnniosuhct") game_state = attach_hole_card_from_deck(game_state, "pwtwlmfciymjdoljkhagxa") self.emu.set_game_rule(2, 10, 5, 0) self.emu.set_blind_structure({5: {"ante": 5, "small_blind": 60}}) p1 = TestPlayer([("fold", 0), ('raise', 55), ('call', 0)]) p2 = TestPlayer([("call", 15), ("call", 55), ('fold', 0)]) self.emu.register_player("tojrbxmkuzrarnniosuhct", p1) self.emu.register_player("pwtwlmfciymjdoljkhagxa", p2) game_state, events = self.emu.run_until_round_finish(game_state) self.eq(65, game_state["table"].seats.players[0].stack) self.eq(135, game_state["table"].seats.players[1].stack) game_state, events = self.emu.start_new_round(game_state) game_state, events = self.emu.run_until_round_finish(game_state) self.eq(120, game_state["table"].seats.players[0].stack) self.eq(80, game_state["table"].seats.players[1].stack) game_state, events = self.emu.start_new_round(game_state) self.eq("event_game_finish", events[0]["type"]) self.eq(0, game_state["table"].seats.players[0].stack) self.eq(80, game_state["table"].seats.players[1].stack) def test_blind_structure_update(self): self.emu.set_game_rule(2, 8, 5, 3) p1, p2 = FoldMan(), FoldMan() self.emu.register_player("uuid-1", p1) self.emu.register_player("uuid-2", p2) blind_structure = { 3: { "ante": 5, "small_blind": 10 }, 5: { "ante": 10, "small_blind": 20 } } self.emu.set_blind_structure(blind_structure) players_info = { "uuid-1": { "name": "hoge", "stack": 100 }, "uuid-2": { "name": "fuga", "stack": 100 } } state = self.emu.generate_initial_game_state(players_info) self.eq(5, state["small_blind_amount"]) state, _ = self.emu.start_new_round(state) state, _ = self.emu.apply_action(state, "fold") self.eq(5, state["small_blind_amount"]) state, _ = self.emu.apply_action(state, "fold") self.eq(5, state["small_blind_amount"]) state, _ = self.emu.apply_action(state, "fold") self.eq(10, state["small_blind_amount"]) state, _ = self.emu.apply_action(state, "fold") self.eq(10, state["small_blind_amount"]) state, _ = self.emu.apply_action(state, "fold") self.eq(20, state["small_blind_amount"]) state, _ = self.emu.apply_action(state, "fold") self.eq(20, state["small_blind_amount"]) def test_apply_action(self): game_state = restore_game_state(TwoPlayerSample.round_state) game_state = attach_hole_card_from_deck(game_state, "tojrbxmkuzrarnniosuhct") game_state = attach_hole_card_from_deck(game_state, "pwtwlmfciymjdoljkhagxa") self.emu.set_game_rule(2, 10, 5, 0) p1, p2 = FoldMan(), FoldMan() self.emu.register_player("tojrbxmkuzrarnniosuhct", FoldMan()) self.emu.register_player("pwtwlmfciymjdoljkhagxa", FoldMan()) game_state, events = self.emu.apply_action(game_state, "call", 15) self.eq(Const.Street.RIVER, game_state["street"]) self.eq(TwoPlayerSample.p1_action_histories, \ game_state["table"].seats.players[0].round_action_histories[Const.Street.TURN]) self.eq(2, len(events)) self.eq("event_new_street", events[0]["type"]) self.eq("event_ask_player", events[1]["type"]) game_state, events = self.emu.apply_action(game_state, "call", 0) self.eq(1, len(events)) self.eq("event_ask_player", events[0]["type"]) game_state, events = self.emu.apply_action(game_state, "call", 0) self.eq(1, len(events)) self.eq("event_round_finish", events[0]["type"]) def test_apply_action_game_finish_detect(self): game_state = restore_game_state(TwoPlayerSample.round_state) game_state = attach_hole_card_from_deck(game_state, "tojrbxmkuzrarnniosuhct") game_state = attach_hole_card_from_deck(game_state, "pwtwlmfciymjdoljkhagxa") self.emu.set_game_rule(2, 3, 5, 0) p1, p2 = FoldMan(), FoldMan() self.emu.register_player("tojrbxmkuzrarnniosuhct", FoldMan()) self.emu.register_player("pwtwlmfciymjdoljkhagxa", FoldMan()) game_state, events = self.emu.apply_action(game_state, "fold") self.eq("event_game_finish", events[-1]["type"]) def test_apply_action_start_next_round(self): game_state = restore_game_state(TwoPlayerSample.round_state) game_state = attach_hole_card_from_deck(game_state, "tojrbxmkuzrarnniosuhct") game_state = attach_hole_card_from_deck(game_state, "pwtwlmfciymjdoljkhagxa") self.emu.set_game_rule(2, 4, 5, 0) p1, p2 = FoldMan(), FoldMan() self.emu.register_player("tojrbxmkuzrarnniosuhct", FoldMan()) self.emu.register_player("pwtwlmfciymjdoljkhagxa", FoldMan()) game_state, events = self.emu.apply_action(game_state, "fold") self.eq(120, game_state["table"].seats.players[0].stack) self.eq(80, game_state["table"].seats.players[1].stack) game_state, events = self.emu.apply_action(game_state, "raise", 20) self.eq("event_ask_player", events[-1]["type"]) self.eq(100, game_state["table"].seats.players[0].stack) self.eq(70, game_state["table"].seats.players[1].stack) @raises(Exception) def test_apply_action_when_game_finished(self): game_state = restore_game_state(TwoPlayerSample.round_state) game_state = attach_hole_card_from_deck(game_state, "tojrbxmkuzrarnniosuhct") game_state = attach_hole_card_from_deck(game_state, "pwtwlmfciymjdoljkhagxa") self.emu.set_game_rule(2, 3, 5, 0) p1, p2 = FoldMan(), FoldMan() self.emu.register_player("tojrbxmkuzrarnniosuhct", FoldMan()) self.emu.register_player("pwtwlmfciymjdoljkhagxa", FoldMan()) game_state, events = self.emu.apply_action(game_state, "fold") self.emu.apply_action(game_state, "fold") def test_run_until_round_finish(self): game_state = restore_game_state(TwoPlayerSample.round_state) game_state = attach_hole_card_from_deck(game_state, "tojrbxmkuzrarnniosuhct") game_state = attach_hole_card_from_deck(game_state, "pwtwlmfciymjdoljkhagxa") self.emu.set_game_rule(2, 10, 5, 0) p1 = TestPlayer([("fold", 0)]) p2 = TestPlayer([("call", 15)]) self.emu.register_player("tojrbxmkuzrarnniosuhct", p1) self.emu.register_player("pwtwlmfciymjdoljkhagxa", p2) game_state, events = self.emu.run_until_round_finish(game_state) self.eq("event_new_street", events[0]["type"]) self.eq("event_ask_player", events[1]["type"]) self.eq("event_round_finish", events[2]["type"]) def test_run_until_round_finish_when_already_finished(self): game_state = restore_game_state(TwoPlayerSample.round_state) game_state = attach_hole_card_from_deck(game_state, "tojrbxmkuzrarnniosuhct") game_state = attach_hole_card_from_deck(game_state, "pwtwlmfciymjdoljkhagxa") self.emu.set_game_rule(2, 10, 5, 0) p1 = TestPlayer([("fold", 0)]) p2 = TestPlayer([("call", 15)]) self.emu.register_player("tojrbxmkuzrarnniosuhct", p1) self.emu.register_player("pwtwlmfciymjdoljkhagxa", p2) game_state, events = self.emu.run_until_round_finish(game_state) game_state, events = self.emu.run_until_round_finish(game_state) self.eq(0, len(events)) def test_run_until_round_finish_game_finish_detect(self): uuids = ["tojrbxmkuzrarnniosuhct", "pwtwlmfciymjdoljkhagxa"] holecards = [[Card.from_str(s) for s in ss] for ss in [["CA", "D2"], ["C8", "H5"]]] game_state = restore_game_state(TwoPlayerSample.round_state) game_state = reduce(lambda a, e: attach_hole_card(a, e[0], e[1]), zip(uuids, holecards), game_state) game_state = attach_hole_card_from_deck(game_state, "pwtwlmfciymjdoljkhagxa") self.emu.set_game_rule(2, 10, 5, 0) p1 = TestPlayer([("raise", 65)]) p2 = TestPlayer([("call", 15), ("call", 65)]) self.emu.register_player("tojrbxmkuzrarnniosuhct", p1) self.emu.register_player("pwtwlmfciymjdoljkhagxa", p2) game_state["table"].deck.deck.append(Card.from_str("C7")) game_state, events = self.emu.run_until_round_finish(game_state) self.eq("event_new_street", events[0]["type"]) self.eq("event_ask_player", events[1]["type"]) self.eq("event_ask_player", events[2]["type"]) self.eq("event_round_finish", events[3]["type"]) self.eq("event_game_finish", events[4]["type"]) self.eq(0, events[4]["players"][0]["stack"]) self.eq(200, events[4]["players"][1]["stack"]) def test_run_until_game_finish(self): game_state = restore_game_state(TwoPlayerSample.round_state) game_state = attach_hole_card_from_deck(game_state, "tojrbxmkuzrarnniosuhct") game_state = attach_hole_card_from_deck(game_state, "pwtwlmfciymjdoljkhagxa") self.emu.set_game_rule(2, 10, 5, 1) self.emu.register_player("tojrbxmkuzrarnniosuhct", FoldMan()) self.emu.register_player("pwtwlmfciymjdoljkhagxa", FoldMan()) game_state, events = self.emu.run_until_game_finish(game_state) self.eq("event_game_finish", events[-1]["type"]) self.eq(114, game_state["table"].seats.players[0].stack) self.eq(86, game_state["table"].seats.players[1].stack) def test_run_until_game_finish_when_one_player_is_left(self): uuids = [ "ruypwwoqwuwdnauiwpefsw", "sqmfwdkpcoagzqxpxnmxwm", "uxrdiwvctvilasinweqven" ] holecards = [[Card.from_str(s) for s in ss] for ss in [["C2", "C3"], ["HA", "CA"], ["D5", "H6"]]] game_state = restore_game_state(ThreePlayerGameStateSample.round_state) game_state = reduce( lambda state, item: attach_hole_card(state, item[0], item[1]), zip(uuids, holecards), game_state) sb_amount, ante = 5, 7 self.emu.set_game_rule(3, 10, sb_amount, ante) p1_acts = [("fold", 0), ("call", 10), ('call', 0), ('call', 10), ("fold", 0)] p2_acts = [] p3_acts = [("raise", 10)] players = [TestPlayer(acts) for acts in [p1_acts, p2_acts, p3_acts]] [ self.emu.register_player(uuid, player) for uuid, player in zip(uuids, players) ] game_state["table"].deck.deck.append(Card.from_str("C7")) game_state, events = self.emu.run_until_game_finish(game_state) self.eq("event_game_finish", events[-1]["type"]) self.eq(0, game_state["table"].seats.players[0].stack) self.eq(0, game_state["table"].seats.players[1].stack) self.eq(292, game_state["table"].seats.players[2].stack) def test_run_until_game_finish_when_final_round(self): uuids = [ "ruypwwoqwuwdnauiwpefsw", "sqmfwdkpcoagzqxpxnmxwm", "uxrdiwvctvilasinweqven" ] holecards = [[Card.from_str(s) for s in ss] for ss in [["C2", "C3"], ["HA", "CA"], ["D5", "H6"]]] game_state = restore_game_state(ThreePlayerGameStateSample.round_state) game_state = reduce( lambda state, item: attach_hole_card(state, item[0], item[1]), zip(uuids, holecards), game_state) sb_amount, ante = 5, 7 self.emu.set_game_rule(3, 10, sb_amount, ante) [self.emu.register_player(uuid, FoldMan()) for uuid in uuids] game_state["table"].deck.deck.append(Card.from_str("C7")) game_state, events = self.emu.run_until_game_finish(game_state) self.eq("event_game_finish", events[-1]["type"]) self.eq(10, game_state["round_count"]) self.eq(35, game_state["table"].seats.players[0].stack) self.eq(0, game_state["table"].seats.players[1].stack) self.eq(265, game_state["table"].seats.players[2].stack) def test_last_round_judge(self): game_state = restore_game_state(TwoPlayerSample.round_state) self.emu.set_game_rule(2, 3, 5, 0) self.false(self.emu._is_last_round(game_state, self.emu.game_rule)) game_state["street"] = Const.Street.FINISHED self.true(self.emu._is_last_round(game_state, self.emu.game_rule)) game_state["round_count"] = 2 self.false(self.emu._is_last_round(game_state, self.emu.game_rule)) game_state["table"].seats.players[0].stack = 0 self.true(self.emu._is_last_round(game_state, self.emu.game_rule)) def test_start_new_round(self): game_state = restore_game_state(TwoPlayerSample.round_state) game_state = attach_hole_card_from_deck(game_state, "tojrbxmkuzrarnniosuhct") game_state = attach_hole_card_from_deck(game_state, "pwtwlmfciymjdoljkhagxa") p1, p2 = FoldMan(), FoldMan() self.emu.set_game_rule(2, 10, 5, 0) self.emu.register_player("tojrbxmkuzrarnniosuhct", FoldMan()) self.emu.register_player("pwtwlmfciymjdoljkhagxa", FoldMan()) # run until round finish game_state, event = self.emu.apply_action(game_state, "call", 15) game_state, event = self.emu.apply_action(game_state, "call", 0) game_state, event = self.emu.apply_action(game_state, "call", 0) game_state, events = self.emu.start_new_round(game_state) self.eq(4, game_state["round_count"]) self.eq(1, game_state["table"].dealer_btn) self.eq(0, game_state["street"]) self.eq(0, game_state["next_player"]) self.eq("event_new_street", events[0]["type"]) self.eq("event_ask_player", events[1]["type"]) self.eq("preflop", events[0]["street"]) self.eq("tojrbxmkuzrarnniosuhct", events[1]["uuid"]) def test_start_new_round_exclude_no_money_players(self): uuids = [ "ruypwwoqwuwdnauiwpefsw", "sqmfwdkpcoagzqxpxnmxwm", "uxrdiwvctvilasinweqven" ] game_state = restore_game_state(ThreePlayerGameStateSample.round_state) original = reduce( lambda state, uuid: attach_hole_card_from_deck(state, uuid), uuids, game_state) sb_amount, ante = 5, 7 self.emu.set_game_rule(3, 10, sb_amount, ante) [self.emu.register_player(uuid, FoldMan()) for uuid in uuids] # case1: second player cannot pay small blind finish_state, events = self.emu.apply_action(original, "fold") finish_state["table"].seats.players[0].stack = 11 stacks = [p.stack for p in finish_state["table"].seats.players] game_state, events = self.emu.start_new_round(finish_state) self.eq(2, game_state["table"].dealer_btn) self.eq(1, game_state["next_player"]) self.eq(stacks[1] - sb_amount - ante, game_state["table"].seats.players[1].stack) self.eq(stacks[2] - sb_amount * 2 - ante, game_state["table"].seats.players[2].stack) self.eq(PayInfo.FOLDED, game_state["table"].seats.players[0].pay_info.status) self.eq( sb_amount * 3 + ante * 2, GameEvaluator.create_pot( game_state["table"].seats.players)[0]["amount"]) # case2: third player cannot pay big blind finish_state, events = self.emu.apply_action(original, "fold") finish_state["table"].seats.players[1].stack = 16 stacks = [p.stack for p in finish_state["table"].seats.players] game_state, events = self.emu.start_new_round(finish_state) self.eq(2, game_state["table"].dealer_btn) self.eq(0, game_state["next_player"]) self.eq(stacks[0] - sb_amount - ante, game_state["table"].seats.players[0].stack) self.eq(stacks[2] - sb_amount * 2 - ante, game_state["table"].seats.players[2].stack) self.eq(PayInfo.FOLDED, game_state["table"].seats.players[1].pay_info.status) self.eq(PayInfo.PAY_TILL_END, game_state["table"].seats.players[0].pay_info.status) self.eq( sb_amount * 3 + ante * 2, GameEvaluator.create_pot( game_state["table"].seats.players)[0]["amount"]) def test_start_new_round_exclude_no_money_players2(self): uuids = [ "ruypwwoqwuwdnauiwpefsw", "sqmfwdkpcoagzqxpxnmxwm", "uxrdiwvctvilasinweqven" ] game_state = restore_game_state(ThreePlayerGameStateSample.round_state) original = reduce( lambda state, uuid: attach_hole_card_from_deck(state, uuid), uuids, game_state) sb_amount, ante = 5, 7 self.emu.set_game_rule(3, 10, sb_amount, ante) [self.emu.register_player(uuid, FoldMan()) for uuid in uuids] # case1: second player cannot pay small blind finish_state, events = self.emu.apply_action(original, "fold") finish_state["table"].seats.players[2].stack = 6 stacks = [p.stack for p in finish_state["table"].seats.players] game_state, events = self.emu.start_new_round(finish_state) self.eq(0, game_state["table"].dealer_btn) self.eq(1, game_state["table"].sb_pos()) self.eq(1, game_state["next_player"]) def test_start_new_round_game_finish_judge(self): uuids = [ "ruypwwoqwuwdnauiwpefsw", "sqmfwdkpcoagzqxpxnmxwm", "uxrdiwvctvilasinweqven" ] game_state = restore_game_state(ThreePlayerGameStateSample.round_state) original = reduce( lambda state, uuid: attach_hole_card_from_deck(state, uuid), uuids, game_state) sb_amount, ante = 5, 7 self.emu.set_game_rule(3, 10, sb_amount, ante) [self.emu.register_player(uuid, FoldMan()) for uuid in uuids] finish_state, events = self.emu.apply_action(original, "fold") finish_state["table"].seats.players[2].stack = 11 finish_state["table"].seats.players[1].stack = 16 game_state, events = self.emu.start_new_round(finish_state) self.eq(1, len(events)) self.eq("event_game_finish", events[0]["type"]) def test_generate_initial_game_state(self): self.emu.set_game_rule(2, 8, 5, 3) p1, p2 = FoldMan(), FoldMan() players_info = OrderedDict() players_info["uuid-1"] = {"name": "hoge", "stack": 100} players_info["uuid-2"] = {"name": "fuga", "stack": 100} state = self.emu.generate_initial_game_state(players_info) table = state["table"] self.eq(0, state["round_count"]) self.eq(5, state["small_blind_amount"]) self.eq(100, table.seats.players[0].stack) self.eq("uuid-1", table.seats.players[0].uuid) self.eq(100, table.seats.players[1].stack) self.eq("uuid-2", table.seats.players[1].uuid) self.eq(1, table.dealer_btn) state, events = self.emu.start_new_round(state) self.eq(0, state["table"].dealer_btn) self.eq(1, state["table"].sb_pos()) self.eq(0, state["table"].bb_pos()) self.eq(1, state["next_player"]) state, events = self.emu.apply_action(state, "call", 10) self.eq(1, state["next_player"]) def test_generate_possible_actions(self): state1 = restore_game_state(TwoPlayerSample.round_state) self.eq(TwoPlayerSample.valid_actions, self.emu.generate_possible_actions(state1)) state2 = restore_game_state(ThreePlayerGameStateSample.round_state) self.eq(ThreePlayerGameStateSample.valid_actions, self.emu.generate_possible_actions(state2))
class CFR: def __init__(self): self.game_states_ = dict() # maps history to node self.emulator = Emulator() @staticmethod def get_higher_rank(card1, card2): if card1.rank > card2.rank: return card1 return card2 @staticmethod def get_higher_suit(card1, card2): if card1.suit > card2.suit: return card1 elif card1.suit == card2.suit: return 0 return card2 @staticmethod def simplify_hand(hand, community_cards): """ Takes a hand (array of size two) and compresses the hand into simpler representation Also puts higher card in front i.e. Th 9h becomes T9s as both cards share the same suit Th 9s becomes T9o as both cards do not share the same suit (off-suit) Th Ts becomes TT (pair of tens) """ generated_hand = gen_cards(hand) card1 = generated_hand[0] card2 = generated_hand[1] # pair if card1.rank == card2.rank: # print "Pair %s" % str(card1)[1] + str(card2)[1] return str(card1)[1] + str(card2)[1] # return the rank 2-9, J-A instead of all ints hand = str(CFR.get_higher_rank(card1, card2))[1] # print "Higher rank card %s" % hand hand += str(card2)[1] if hand == str(card1)[1] else str(card1)[1] hand += str("s") if str(card1)[0] == str(card2)[0] else str("o") # print "final hand %s" % hand if len(community_cards) >= 3: strength = HandEvaluator.gen_hand_rank_info(generated_hand, gen_cards(community_cards)) hand += "_%s" %strength.get("hand")["strength"] return hand def train(self, iterations, ante=1.0, bet1=2.0, bet2=8.0, print_interval=1000, save_interval=1000): """ Do ficticious self-play to find optimal strategy""" util = 0.0 self.ante = ante self.bet1 = bet1 self.bet2 = bet2 # print "Ante: %f Bet-1: %f Bet-2: %f" % (ante, bet1, bet2) for i in range(iterations): if i % print_interval == 0 and i != 0: print("P1 expected value after %i iterations: %f" % (i, util / i)) # for j in range(-1, 17): # try: # print j, strats["_" + str(j) + "G"] # except: # pass self.emulator.set_game_rule(2, 5, 10, 5) init_state = self.emulator.generate_initial_game_state( {"0": {"stack": 10000, "name": "0"}, "1": {"stack": 10000, "name": "1"}}) round_start, events = self.emulator.start_new_round(init_state) player_one_cards = list(map(str, round_start["table"].seats.players[0].hole_card)) player_two_cards = list(map(str, round_start["table"].seats.players[1].hole_card)) cards = [player_one_cards, player_two_cards] history = list() util += self.cfr(cards, history, round_start, events, 1, 1) # strats = self.get_strategy() # """ # if i%save_interval == 0: # print "saving..." # pickle.dump(self.get_strategy(), open("full_game_"+str(i)+"_avg_diff.strat", "wb")) # """ return util / iterations def get_strategy(self): result = dict() for state, node in self.game_states_.items(): #print(state, node.strategy_) result[state] = node.get_average_strategy() return result # @cards - the cards the players have, with index 0 being the card that player one has # and index 1 being the card that player two has # @history - a list of moves used to reach this game state # @probability1 - the probability of reaching this game state for player 1 # @probability2 - the probability of reaching this game state for player 2 # @game_state - PyPokerEngine's game state. def cfr(self, cards, history, game_state, events, probability1, probability2): player = 1-int(game_state["next_player"]) opponent = 1 - player player_hand = cards[player] # print "=========== PLAYER " + str(player) + " TURN ===============" # print "history: " + str(history) # print "player_hand: %s %s" % (str(player_hand[0]), str(player_hand[1])) # print "opp_hand: %s %s" % (str(opponent_hand[0]), str(opponent_hand[1])) probability_weight = probability1 if player == 0 else probability2 # print "probability_weight: " + str(probability_weight)event["round_state"] # print "num_moves: " + str(num_moves) # BEGIN TERMINAL STATES for event in events: if event["type"] == "event_round_finish": dct = {} dct[game_state["table"].seats.players[0].uuid] = game_state["table"].seats.players[0].stack dct[game_state["table"].seats.players[1].uuid] = game_state["table"].seats.players[1].stack #print (game_state, event) #print "player", player return dct[str(player)] - dct[str(opponent)] community_card = [] for i in range(len(game_state["table"].get_community_card())): community_card.append(game_state["table"].get_community_card()[i].__str__()) # state = str(player_hand) p_hand = self.simplify_hand(player_hand, community_card) state = str(p_hand) for action in history: state += action # print "state: %s" % str(state) # CREATE NEW ACTIONS if state in self.game_states_: node = self.game_states_[state] # Get our node if it already exists possible_actions = node.actions_ else: # Create new Node with possible actions we can perform possible_actions = [ACTION_TO_HISTORY_MAPPING[i["action"]] for i in self.emulator.generate_possible_actions(game_state)] node = Node(possible_actions) self.game_states_[state] = node strategy = node.get_strategy(probability_weight) # print "possible_actions for this round: " + str(possible_actions) # print "strategy: " + str(strategy) util = dict() node_util = 0 # for each of our possible actions, compute the utility of it # thus, finding the overall utility of this current state for action in possible_actions: next_history = list(history) # copy next_history.append(action) new_game_state, new_event = self.emulator.apply_action(game_state, HISTORY_TO_ACTION_MAPPING[action]) if player == 0: util[action] = -self.cfr(cards, next_history, new_game_state, new_event, probability1 * strategy[action], probability2) else: util[action] = -self.cfr(cards, next_history, new_game_state, new_event, probability1, probability2 * strategy[action]) #print action, util[action] node_util += strategy[action] * util[action] # compute regret and update Game State for the node based on utility of all actions for action in possible_actions: regret = util[action] - node_util if player == 0: node.regret_sum_[action] += regret * probability2 else: node.regret_sum_[action] += regret * probability1 #print "node_util: "+ str(action) + " " + str(node_util) return node_util
class RLPokerPlayer(BasePokerPlayer): def __init__( self, study_mode=False, model_file=None, new_model_file=None, gammaReward=0.1, alphaUpdateNet=0.1, epsilonRandom=0.05, decayRandom=0.5, players=[RandomPlayer()] * 8 + [None], max_history_len=1000, new_model_file=None, ): self.stats = commons.PlayerStats() self.new_model_file = new_model_file self.gammaReward = gammaReward self.alphaUpdateNet = alphaUpdateNet self.decayRandom = decayRandom self.epsilonRandom = epsilonRandom self.study_mode = study_mode self.max_history_len = max_history_len # self.my_seat = 0 if self.study_mode: self.players = players for i in np.arange(len(self.players)): if self.players[i] is None: self.my_seat = i self.history = deque() if model_file is not None: self.model = load(model_file) else: self.model = commons.model1() def good_moves(self, valid_actions, my_stack): good_moves = [] good_moves.append({'action': 'fold', 'amount': 0, 'type': 0}) good_moves.append({ 'action': 'call', 'amount': valid_actions[1]['amount'], 'type': 1 }) if valid_actions[2]['amount']['min'] == -1: return good_moves raise_min, raise_max = valid_actions[2]['amount'][ 'min'], valid_actions[2]['amount']['max'] fix = lambda x: max(min(x, raise_max), raise_min) good_moves.append({ 'action': 'raise', 'amount': fix(self.small_blind_amount * 2), 'type': 2 }) good_moves.append({ 'action': 'raise', 'amount': fix(my_stack // 2), 'type': 3 }) good_moves.append({ 'action': 'raise', 'amount': fix(my_stack), 'type': 4 }) return good_moves def turn2vec(self, state_vec, move): move_numb = move['type'] move_amount = move['amount'] # print(move_numb, move_amount) X = np.concatenate( (np.array(state_vec), np.array([move_numb, move_amount]))) return X.reshape((1, -1)) def find_best_strat(self, valid_actions, cur_state_vec, my_stack): best_move, best_reward = None, -1500 * 9 good_moves = self.good_moves(valid_actions, my_stack) if sps.bernoulli.rvs(p=self.epsilonRandom): ind = sps.randint.rvs(low=0, high=len(good_moves)) move = good_moves[ind] reward = float( self.model.predict(self.turn2vec(cur_state_vec, move), batch_size=1)) return move, reward for move in good_moves: # print(cur_state_vec, move) cur_reward = float( self.model.predict(self.turn2vec(cur_state_vec, move), batch_size=1)) # print(cur_reward) if cur_reward > best_reward: best_move = move best_reward = cur_reward return best_move, best_reward def declare_action(self, valid_actions, hole_card, round_state): self.stats.update(hole_card, round_state) round_state_vec = self.stats.calc_fine_params(hole_card, round_state) my_stack = round_state['seats'][self.my_seat]['stack'] best_move, best_reward = self.find_best_strat(valid_actions, round_state_vec, my_stack) action, amount = best_move['action'], best_move['amount'] print('action {}, amount {}'.format(action, amount)) if self.study_mode: self.update_history(round_state_vec, best_move, best_reward, round_state) return action, amount def update_history(self, round_state_vec, best_move, best_reward, round_state): cur_data_dict = { 'states_vec': round_state_vec, 'states_original': round_state, 'moves': best_move, 'rewards': best_reward, 'stats': deepcopy(self.stats), } self.history.append(cur_data_dict) if len(self.history) == self.max_history_len: self.history.popleft() def receive_game_start_message(self, game_info): # self.my_name = game_info['seats'][self.my_seat]['name'] if self.study_mode: self.uuid = game_info["seats"][self.my_seat]['uuid'] self.stats.init_player_names(game_info) self.player_num = game_info["player_num"] self.max_round = game_info["rule"]["max_round"] self.small_blind_amount = game_info["rule"]["small_blind_amount"] self.ante_amount = game_info["rule"]["ante"] # self.blind_structure = game_info["rule"]["blind_structure"] self.emulator = Emulator() self.emulator.set_game_rule(self.player_num, self.max_round, self.small_blind_amount, self.ante_amount) # self.emulator.set_blind_structure(blind_structure) if self.study_mode: # Register algorithm of each player which used in the simulation. for i in np.arange(self.player_num): self.emulator.register_player( uuid=game_info["seats"][i]["uuid"], player=self.players[i] if self.players[i] is not None else self) def receive_round_start_message(self, round_count, hole_card, seats): self.start_round_stack = seats[self.my_seat]['stack'] def receive_street_start_message(self, street, round_state): pass def receive_game_update_message(self, action, round_state): if self.new_model_file is not None and self.study_mode: self.model.save(self.new_model_file) def receive_round_result_message(self, winners, hand_info, round_state): if self.study_mode: best_move = {'action': 'finish', 'amount': 0, 'type': 5} round_state_vec = -np.ones(commons.FEATURES_LEN - 2) best_reward = round_state['seats'][ self.my_seat]['stack'] - self.start_round_stack self.update_history(round_state_vec, best_move, best_reward, round_state) if len(self.history) > 10 and sps.bernoulli.rvs( p=self.alphaUpdateNet): ind = sps.randint.rvs(low=0, high=len(self.history) - 2) self.learn_with_states(self.history[ind], self.history[ind + 1]) self.epsilonRandom *= self.decayRandom # def generate_possible_actions(self, game_state): # players = game_state["seats"] # player_pos = game_state["next_player"] # sb_amount = game_state["small_blind_amount"] # return ActionChecker.legal_actions(players, player_pos, sb_amount) def do_best_simulation(self, next_state, next_state_vec, next_state_stats): # pp = pprint.PrettyPrinter(indent=4) # pp.pprint(next_state) # pp.pprint('*********************************************') game_state = restore_game_state(next_state) possible_actions = self.emulator.generate_possible_actions(game_state) good_actions = self.good_moves( possible_actions, next_state['seats'][self.my_seat]['stack']) best_reward = -1500 * 9 for action in good_actions: # print(action) try: next_next_game_state = self.emulator.apply_action( game_state, action['action'], action['amount']) next_next_state = next_next_game_state[1][-1]['round_state'] # next_next_state = next_next_game_state['action_histories'] # pp = pprint.PrettyPrinter(indent=4) # pp.pprint(next_next_state) if next_next_state['street'] in ['showdown', 'finished', 4, 5]: best_reward = max( best_reward, next_next_state['seats'][self.my_seat]['stack'] - self.start_round_stack) else: next_next_game_state = restore_game_state(next_next_state) hole_card = attach_hole_card_from_deck( next_next_game_state, self.uuid) next_state_stats.update(hole_card, next_next_state) next_next_state_vec = self.stats.calc_fine_params( hole_card, next_next_state) next_next_actions = self.emulator.generate_possible_actions( next_next_game_state) best_reward = max( best_reward, self.find_best_strat( next_next_actions, next_next_state_vec, next_next_state['seats'][self.my_seat]['stack'], )[1]) except: continue return best_reward def learn_with_states(self, state, next_state): if next_state['states_original']['street'] in [ 'showdown', 'finished', 4, 5 ]: reward = next_state['rewards'] else: reward = next_state['rewards'] +\ self.gammaReward * self.do_best_simulation( next_state['states_original'], next_state['states_vec'], next_state['stats'], ) X = self.turn2vec(state['states_vec'], state['moves']) y = np.array([reward]) # print(X, y, X.shape, y.shape) self.model.fit(x=X, y=y, epochs=1, verbose=0, batch_size=1)
class CustomEmulator: def __init__(self, starting_stack, small_blind): self.pok = PokerUtils() self.starting_stack = 500 self.small_blind = 10 self.emulator = Emulator() self.emulator.set_game_rule(player_num=2, max_round=10, small_blind_amount=small_blind, ante_amount=0) self.hole_cards = {} self.players_info = { "bb_player": { "name": "bb_player", "stack": starting_stack }, "sb_player": { "name": "sb_player", "stack": starting_stack }, } self.initial_game_state = self.emulator.generate_initial_game_state( self.players_info) self.street = 'preflop' self.events = [] self.game_state = [] self.players_cards = [np.zeros(52), np.zeros(52)] self.cards_feature = [np.zeros(52), np.zeros(52), np.zeros(52)] self.actions_feature = [ np.zeros(6), np.zeros(6), np.zeros(6), np.zeros(6) ] def is_round_finished(self): for e in self.events: if (e['type'] == 'event_round_finish'): return True return False def new_street(self): for e in self.events: if (e['type'] == 'event_new_street'): self.street = e['street'] return self.street return False def save_cards(self): for player in self.game_state['table'].seats.players: self.hole_cards[player.uuid] = [ card.__str__() for card in player.hole_card ] def make_cards_feature(self): if (self.street == 'preflop'): self.save_cards() for i in range(2): self.players_cards[i][self.pok.get_card_total_index( self.game_state['table'].seats.players[i].hole_card[0]. __str__())] = 1 self.players_cards[i][self.pok.get_card_total_index( self.game_state['table'].seats.players[i].hole_card[1]. __str__())] = 1 elif (self.street == 'flop'): for card in self.events[0]['round_state']['community_card']: self.cards_feature[0][self.pok.get_card_total_index(card)] = 1 elif (self.street == 'turn'): self.cards_feature[1][self.pok.get_card_total_index( self.events[0]['round_state']['community_card'][3])] = 1 elif (self.street == 'river'): self.cards_feature[2][self.pok.get_card_total_index( self.events[0]['round_state']['community_card'][4])] = 1 def make_engineered_cards_feature(self): if (self.street == 'preflop'): self.save_cards() for i in range(2): self.players_cards[i][self.pok.get_card_total_index( self.game_state['table'].seats.players[i].hole_card[0]. __str__())] = 1 self.players_cards[i][self.pok.get_card_total_index( self.game_state['table'].seats.players[i].hole_card[1]. __str__())] = 1 elif (self.street == 'flop'): for card in self.events[0]['round_state']['community_card']: self.cards_feature[0][self.pok.get_card_total_index(card)] = 1 elif (self.street == 'turn'): self.cards_feature[1][self.pok.get_card_total_index( self.events[0]['round_state']['community_card'][3])] = 1 elif (self.street == 'river'): self.cards_feature[2][self.pok.get_card_total_index( self.events[0]['round_state']['community_card'][4])] = 1 def make_actions_feature(self): if (self.street == 'preflop'): self.actions_feature[0] = self.pok.get_street_actions( self.starting_stack, self.events[0]['round_state']['action_histories']['preflop']) elif (self.street == 'flop'): self.actions_feature[1] = self.pok.get_street_actions( self.starting_stack, self.events[0]['round_state']['action_histories']['flop']) elif (self.street == 'turn'): self.actions_feature[2] = self.pok.get_street_actions( self.starting_stack, self.events[0]['round_state']['action_histories']['turn']) elif (self.street == 'river'): self.actions_feature[3] = self.pok.get_street_actions( self.starting_stack, self.events[0]['round_state']['action_histories']['river']) def make_features(self): # actions are made every street self.make_actions_feature() if (self.new_street != False): self.make_cards_feature() def get_minraise_amount(self): for e in self.events: if (e['type'] == 'event_ask_player'): return e['valid_actions'][2]['amount']['min'] def get_all_in_amount(self): for e in self.events: if (e['type'] == 'event_ask_player'): return e['valid_actions'][2]['amount']['max'] def get_call_amount(self): for e in self.events: if (e['type'] == 'event_ask_player'): return e['valid_actions'][1]['amount'] def get_sb_reward(self): for e in self.events: if (e['type'] == 'event_round_finish'): if (e['winners'][0]['uuid'] == 'sb_player'): return (e['winners'][0]['stack'] - self.starting_stack) else: return -(e['winners'][0]['stack'] - self.starting_stack) def get_spr(self): pot = self.events[0]['round_state']['pot']['main']['amount'] stack = self.starting_stack - int(pot / 2) spr = stack / pot return spr def play_action(self, action): if (action == 0): self.game_state, self.events = self.emulator.apply_action( self.game_state, 'fold', 0) elif (action == 1): self.game_state, self.events = self.emulator.apply_action( self.game_state, 'call', self.get_call_amount()) elif (action == 2): self.game_state, self.events = self.emulator.apply_action( self.game_state, 'raise', self.get_minraise_amount()) elif (action == 3): self.game_state, self.events = self.emulator.apply_action( self.game_state, 'raise', self.get_all_in_amount()) def new_hand(self): self.initial_game_state = self.emulator.generate_initial_game_state( self.players_info) self.street = 'preflop' self.hole_cards = {} self.game_state, self.events = self.emulator.start_new_round( self.initial_game_state) self.players_cards = [np.zeros(52), np.zeros(52)] self.cards_feature = [np.zeros(52), np.zeros(52), np.zeros(52)] self.actions_feature = [ np.zeros(6), np.zeros(6), np.zeros(6), np.zeros(6) ] self.make_features() def get_action_histories_text(self, hole_cards=False): if (hole_cards == True): print(self.hole_cards) histo = self.events[0]['round_state']['action_histories'] hand_text = "" for k, v in histo.items(): if (len(v) > 0): hand_text += k + '\n' for a in v: if (a['action'] == 'RAISE'): hand_text += a['uuid'] + \ ' raises to ' + str(a['amount']) + '\n' elif (a['action'] == 'FOLD'): hand_text += a['uuid'] + ' folds\n' elif (a['action'] == 'CALL' and a['amount'] == 0): hand_text += a['uuid'] + ' checks\n' else: hand_text += a['uuid'] + ' ' + \ a['action'] + ' ' + str(a['amount']) + '\n' return hand_text
class CFRBase: def __init__(self): self.emulator = Emulator() #try: self._sigma = pickle.load(open("strategy.pickle", "rb")) #except (OSError, IOError, EOFError) as e: #self._sigma = {} #pickle.dump(self._sigma, open("strategy.pickle", "wb")) self.cumulative_regrets = {} self.cumulative_sigma = {} def get_tree(self,game_state): if game_state[1][-1]['type'] != 'event_game_finish': hist = game_state[1][-1]['round_state']['action_histories'] game_string = "#" for i in hist['preflop']: game_string = game_string + i['action'][0] if "flop" in hist and len(hist['flop']) > 0: for i in hist['flop']: game_string = game_string + i['action'][0] if "turn" in hist and len(hist['turn']) > 0: for i in hist['turn']: game_string = game_string + i['action'][0] if "river" in hist and len(hist['river']) > 0: for i in hist['river']: game_string = game_string + i['action'][0] else: hist = game_state[1][0]['round_state']['action_histories'] game_string = "#" for i in hist['preflop']: game_string = game_string + i['action'][0] if "flop" in hist and len(hist['flop']) > 0: for i in hist['flop']: game_string = game_string + i['action'][0] if "turn" in hist and len(hist['turn']) > 0: for i in hist['turn']: game_string = game_string + i['action'][0] if "river" in hist and len(hist['river']) > 0: for i in hist['river']: game_string = game_string + i['action'][0] return game_string def is_terminal(self,game_state): return game_state[1][-1]['type'] == 'event_game_finish' def state_evaluation(self,game_state): return game_state[1][1]['players'][1]['stack'] - 1000 def _available_actions(self,game_state): actions = [] for i in game_state[1][-1]['valid_actions']: actions.append(i['action']) return actions def load_data(self): with open('strategy.pickle','rb') as handle: self._sigma = pickle.load(handle) def store_data(self): with open('strategy.pickle', 'wb') as handle: pickle.dump(self._sigma, handle, protocol=pickle.HIGHEST_PROTOCOL) def getsigma(self, game_state, percentile, action): tree = self.get_tree(game_state) available_actions = self._available_actions(game_state) if tree not in self._sigma: self._sigma[tree] = {percentile:{}} for i in available_actions: self._sigma[tree][percentile][i] = 1./len(available_actions) elif percentile not in self._sigma[tree]: self._sigma[tree][percentile] = {} for i in available_actions: self._sigma[tree][percentile][i] = 1./len(available_actions) return self._sigma[tree][percentile][action] def _statetomove(self,game_state): next_player_pos = game_state[0]["next_player"] if next_player_pos == 1: return 1 else: return -1 def _getcards(self,game_state): hole_cards = [] next_player_pos = game_state[0]["next_player"] hole_cards.append(game_state[0]['table'].seats.players[next_player_pos].hole_card[0].__str__()) hole_cards.append(game_state[0]['table'].seats.players[next_player_pos].hole_card[1].__str__()) return hole_cards def _getboard(self,game_state): return game_state[0]['table']._community_card def _myround(self,x, base=10): return int(base * round(float(x)/base)) def evaluate_hs(self,game_state): #print('evaluating hs') hole_cards = gen_cards(self._getcards(game_state)) #print('hole cards = ' + str(self._getcards(game_state))) community_cards = self._getboard(game_state) #print('community = ' + str(self._getboard(game_state))) hs = estimate_hole_card_win_rate(100,2, hole_cards,community_cards) #print('hs = ' + str(hs)) percentile = self._myround(hs*100) return percentile def _state_play(self, game_state, action): new_game_state = self.emulator.apply_action(game_state[0], action, 0) return new_game_state def utility_recursive(self): final_value = self._utility_recursive(self.game_state,1,1) #print("##################CUMULATIVE REGRETS#####################") #pprint.pprint(self.cumulative_regrets) #print("##################CUMULATIVE SIGMA#####################") #pprint.pprint(self.cumulative_sigma) return final_value def _cumulate_cfr_regret(self, game_state, percentile, action, regret): tree = self.get_tree(game_state) available_actions = self._available_actions(game_state) if tree not in self.cumulative_regrets: self.cumulative_regrets[tree] = {percentile:{}} for i in available_actions: self.cumulative_regrets[tree][percentile][i] = 0 elif percentile not in self.cumulative_regrets[tree]: self.cumulative_regrets[tree][percentile] = {} for i in available_actions: self.cumulative_regrets[tree][percentile][i] = 0 self.cumulative_regrets[tree][percentile][action] += regret def _cumulate_sigma(self, game_state, percentile, action, prob): tree = self.get_tree(game_state) available_actions = self._available_actions(game_state) if tree not in self.cumulative_sigma: self.cumulative_sigma[tree] = {percentile:{}} for i in available_actions: self.cumulative_sigma[tree][percentile][i] = 0 elif percentile not in self.cumulative_sigma[tree]: self.cumulative_sigma[tree][percentile] = {} for i in available_actions: self.cumulative_sigma[tree][percentile][i] = 0 self.cumulative_sigma[tree][percentile][action] += prob def __update_sigma_recursively(self, game_state): # stop traversal at terminal node if self.is_terminal(game_state): return percentile = self.evaluate_hs(game_state) self._update_sigma(game_state,percentile) # go to subtrees for action in self._available_actions(game_state): self.__update_sigma_recursively(self._state_play(game_state,action)) def _update_sigma(self, game_state, percentile): tree = self.get_tree(game_state) #print("Current State--------") #print(self.get_tree(game_state)) #print(percentile) #if percentile not in self._sigma[tree]: # percentile = (min(100, percentile + 5)) if (min(100, percentile + 5) in self._sigma[tree]) else (max(0, percentile - 5)) if percentile not in self.cumulative_regrets[tree]: #print('Enter unable to find percentile') if (min(100, percentile + 5) in self.cumulative_regrets[tree]): percentile = (min(100, percentile + 5)) elif (max(0, percentile - 5) in self.cumulative_regrets[tree]): percentile = (max(0, percentile - 5)) elif (min(100, percentile + 10) in self.cumulative_regrets[tree]): percentile = (min(100, percentile + 10)) elif (max(0, percentile - 10) in self.cumulative_regrets[tree]): percentile = (max(0, percentile - 10)) elif (min(100, percentile + 20) in self.cumulative_regrets[tree]): percentile = (min(100, percentile + 20)) elif (max(0, percentile - 20) in self.cumulative_regrets[tree]): percentile = (max(0, percentile - 20)) elif (min(100, percentile + 30) in self.cumulative_regrets[tree]): percentile = (min(100, percentile + 30)) elif (max(0, percentile - 30) in self.cumulative_regrets[tree]): percentile = (max(0, percentile - 30)) elif (min(100, percentile + 40) in self.cumulative_regrets[tree]): percentile = (min(100, percentile + 40)) elif (max(0, percentile - 40) in self.cumulative_regrets[tree]): percentile = (max(0, percentile - 40)) elif (min(100, percentile + 50) in self.cumulative_regrets[tree]): percentile = (min(100, percentile + 50)) elif (max(0, percentile - 50) in self.cumulative_regrets[tree]): percentile = (max(0, percentile - 50)) #print('New Percentile = ' + str(percentile)) rgrt_sum = sum(filter(lambda x : x > 0, self.cumulative_regrets[tree][percentile].values())) nr_of_actions = len(self.cumulative_regrets[tree][percentile].keys()) for a in self.cumulative_regrets[tree][percentile]: self._sigma[tree][percentile][a] = max(self.cumulative_regrets[tree][percentile][a], 0.) / rgrt_sum if rgrt_sum > 0 else 1. / nr_of_actions def _utility_recursive(self,game_state, reach_sb, reach_bb): children_states_utilities = {} if self.is_terminal(game_state): return self.state_evaluation(game_state) percentile = self.evaluate_hs(game_state) #print("Current State--------") #print(self.get_tree(game_state)) #print(percentile) #pprint.pprint(game_state[1][-1]['round_state']['community_card']) value = 0. for action in self._available_actions(game_state): probability = self.getsigma(game_state,percentile,action) child_reach_sb = reach_sb * ( probability if self._statetomove(game_state) == 1 else 1) child_reach_bb = reach_bb * ( probability if self._statetomove(game_state) == -1 else 1) child_state_utility = self._utility_recursive(self._state_play(game_state,action),reach_sb,reach_bb) value += (probability * child_state_utility) children_states_utilities[action] = child_state_utility (cfr_reach, reach) = (reach_bb, reach_sb) if self._statetomove(game_state) == 1 else (reach_sb, reach_bb) for action in self._available_actions(game_state): action_cfr_regret = self._statetomove(game_state) * cfr_reach * (children_states_utilities[action] - value) self._cumulate_cfr_regret(game_state, percentile, action, action_cfr_regret) self._cumulate_sigma(game_state, percentile, action, reach * self.getsigma(game_state,percentile,action)) return value def run(self, iterations = 1): print(datetime.datetime.now()) for myiter in range(0, iterations): #initialize new game state with new hole cards self.emulator.set_game_rule(player_num=2, max_round=1, small_blind_amount=10, ante_amount=0) players_info = { "uuid-1": { "name": "player1", "stack": 1000 }, "uuid-2": { "name": "player2", "stack": 1000 } } player1 = RandomPlayer() player2 = RandomPlayer() self.emulator.register_player('uuid-1',player1) self.emulator.register_player('uuid-2',player2) initial_state = self.emulator.generate_initial_game_state(players_info) self.game_state = self.emulator.start_new_round(initial_state) #goto later gamestate #self.game_state = self.emulator.apply_action(self.game_state[0], 'call', 0) self._utility_recursive(self.game_state, 1, 1) self.__update_sigma_recursively(self.game_state) self.store_data() print('data stored, i = ' + str(myiter) ) print(datetime.datetime.now()) print(datetime.datetime.now())
class MyPlayer(BasePokerPlayer): def __init__(self): self.table = {} self.table1 = {} self.belief = {} # self.opponent_belief ={} self.uuid = None self.opponent = None self.emulator = Emulator() self.emulator.set_game_rule(2, 1, 10, 0) self.current_cost = 10 self.random_game_state = None def declare_action(self, valid_actions, hole_card, round_state): self.round_state = round_state self.table = {} #print(self.current_cost) self.my_uuid = round_state['seats'][round_state['next_player']]['uuid'] self.my_cards = gen_cards(hole_card) self.community_card = gen_cards(round_state['community_card']) self.random_game_state = None if round_state['seats'][ round_state['big_blind_pos']]['uuid'] == self.my_uuid: self.current_cost = 20 if not self.belief: self.initialize_belief() posibile_opponent_cards = [x[0] for x in self.belief['Cards']] opponent_card_prob = self.belief['Probability'] pp = pprint.PrettyPrinter(indent=2) for opponent_cards, prob in zip(posibile_opponent_cards, opponent_card_prob): game_state = setup_game_state(round_state, self.my_cards, opponent_cards, self.my_uuid) self.opponent = ReasonablePlayer( opponent_cards, round_state, self.belief) ## Should not be self.belief, fix later root = Node(self.my_cards, opponent_cards, self.community_card, game_state, round_state, round_state['street'], prob, [], self, self.opponent, self.my_uuid, self.emulator, self.current_cost) # start= time() root.update_expected_table() # pp.pprint(self.table) # sleep(5) if not self.random_game_state: self.random_game_state = game_state pp.pprint(self.table) #print(time()-start) #sleep(5000) strategies = table_to_strategies(self.table) pp.pprint(strategies) strategy = max(strategies, key=self.table.get) action = strategy.split()[0] histories = self.emulator.apply_action( self.random_game_state, action)[1][0]['round_state']['action_histories'] if histories.get(round_state['street']): action_result = histories[round_state['street']][-1] if action_result['action'] != 'FOLD': self.current_cost += action_result['paid'] # print(action) # self.update_opp_belief(opponent_belief) return action # def update_opp_belief(self): def initialize_belief(self): used = [card.to_id() for card in self.my_cards + self.community_card] unused = [card_id for card_id in range(1, 53) if card_id not in used] Cards = [[Card.from_id(card1), Card.from_id(card2)] for card1, card2 in itertools.combinations(unused, 2)] start = time() Cards_Strength = list( map(lambda x: hand_strength(x, self.community_card, self.my_cards), Cards)) print('my', time() - start) self.belief['Cards'] = list(zip(Cards, Cards_Strength)) self.belief['Probability'] = np.ones(len(Cards)) / len(Cards) self.belief['Cards'].sort(key=lambda x: x[1]) def update_belief(self): street = self.round_state['street'] previous_action = self.game_update[-1]['action'] if self.game_state[-1][ 'player_uuid'] == self.my_uuid else self.game_update[-2]['action'] strong_update = 0 weak_update = 0 if previous_action == 'raise': count = 0 for i in range(len(self.belief['Cards'])): if self.belief['Cards'][1] > 0.65: count = count + 1 else: break self.belief['Probability'][:count] += strong_update self.belief['Probability'][count:] -= weak_update self.belief['Probability'] /= sum(self.belief['Probability']) def update_table(self, my_strategy, gain): # print(my_strategy) key = ' '.join(my_strategy) if self.table.get(key): self.table[key] += gain else: self.table[key] = gain def receive_game_start_message(self, game_info): pass def receive_round_start_message(self, round_count, hole_card, seats): pass def receive_street_start_message(self, street, round_state): pass def receive_game_update_message(self, action, round_state): self.action = action def receive_round_result_message(self, winners, hand_info, round_state): pass
class TexasHoldemTask(BaseTask): def __init__(self, final_round=max_round, scale_reward=False, lose_penalty=False, shuffle_position=False, action_record=False): self.final_round = final_round self.scale_reward = scale_reward self.lose_penalty = lose_penalty self.shuffle_position = shuffle_position self.action_record = action_record self.emulator = Emulator() self.emulator.set_game_rule(nb_player, final_round, sb_amount, ante) self.emulator.set_blind_structure(blind_structure) self.opponent_value_functions = {} if shuffle_position: print "Warning: shuffle_position is set True. Are you sure?" for uuid in players_info: self.emulator.register_player(uuid, DummyPlayer()) if uuid != my_uuid: self.opponent_value_functions[uuid] = None def set_opponent_value_functions(self, value_functions): assert len(value_functions) == 9 opponent_uuids = [ uuid for uuid in self.opponent_value_functions if uuid != my_uuid ] for uuid, value_function in zip(opponent_uuids, value_functions): self.opponent_value_functions[uuid] = value_function def generate_initial_state(self): return self.generate_initial_state_without_action_record() if not self.action_record\ else self.generate_initial_state_with_action_record() def generate_initial_state_without_action_record(self): p_info = _get_shuffled_players_info( ) if self.shuffle_position else players_info clear_state = self.emulator.generate_initial_game_state(p_info) state, _events = self.emulator.start_new_round(clear_state) while not self._check_my_turn(state): action, amount = self._choose_opponent_action(state) state, _events = self.emulator.apply_action(state, action, amount) if state[ "street"] == Const.Street.FINISHED and not self.is_terminal_state( state): state, _events = self.emulator.start_new_round(state) return state if not self.is_terminal_state( state) else self.generate_initial_state() def generate_initial_state_with_action_record(self): p_info = _get_shuffled_players_info( ) if self.shuffle_position else players_info clear_state = self.emulator.generate_initial_game_state(p_info) p_act_record = { p.uuid: [[], [], [], []] for p in clear_state["table"].seats.players } state, _events = self.emulator.start_new_round(clear_state) while not self._check_my_turn(state): state[ACTION_RECORD_KEY] = p_act_record opponent_uuid, action_info = self._choose_opponent_action( state, detail_info=True) p_act_record = self._update_action_record(state, p_act_record, opponent_uuid, action_info) action, amount = action_info["action"], action_info["amount"] state, _events = self.emulator.apply_action(state, action, amount) if state[ "street"] == Const.Street.FINISHED and not self.is_terminal_state( state): state, _events = self.emulator.start_new_round(state) state[ACTION_RECORD_KEY] = p_act_record return state if not self.is_terminal_state( state) else self.generate_initial_state() def is_terminal_state(self, state): me = pick_me(state) round_finished = state["street"] == Const.Street.FINISHED active_players = [ p for p in state["table"].seats.players if p.stack > 0 ] short_of_players = len(active_players) <= table_break_threshold i_am_loser = me.stack == 0 is_final_round = state["round_count"] >= self.final_round return round_finished and (short_of_players or i_am_loser or is_final_round) def transit_state(self, state, action): return self.transit_state_without_action_record(state, action) if not self.action_record\ else self.transit_state_with_action_record(state, action) def transit_state_without_action_record(self, state, action): assert self._check_my_turn(state) assert not self.is_terminal_state(state) action, amount = action["action"], action["amount"] state, _events = self.emulator.apply_action(state, action, amount) if state[ "street"] == Const.Street.FINISHED and not self.is_terminal_state( state): state, _events = self.emulator.start_new_round(state) while not self._check_my_turn(state) and not self.is_terminal_state( state): action, amount = self._choose_opponent_action(state) state, _events = self.emulator.apply_action(state, action, amount) if state[ "street"] == Const.Street.FINISHED and not self.is_terminal_state( state): state, _events = self.emulator.start_new_round(state) return state def transit_state_with_action_record(self, state, action_info): assert self._check_my_turn(state) assert not self.is_terminal_state(state) assert state.has_key(ACTION_RECORD_KEY) p_act_record = _deepcopy_action_record(state) p_act_record = self._update_action_record(state, p_act_record, my_uuid, action_info) action, amount = action_info["action"], action_info["amount"] state, _events = self.emulator.apply_action(state, action, amount) state[ACTION_RECORD_KEY] = p_act_record if state[ "street"] == Const.Street.FINISHED and not self.is_terminal_state( state): state, _events = self.emulator.start_new_round(state) while not self._check_my_turn(state) and not self.is_terminal_state( state): state[ACTION_RECORD_KEY] = p_act_record opponent_uuid, action_info = self._choose_opponent_action( state, detail_info=True) p_act_record = self._update_action_record(state, p_act_record, opponent_uuid, action_info) action, amount = action_info["action"], action_info["amount"] state, _events = self.emulator.apply_action(state, action, amount) if state[ "street"] == Const.Street.FINISHED and not self.is_terminal_state( state): state, _events = self.emulator.start_new_round(state) state[ACTION_RECORD_KEY] = p_act_record return state def _check_my_turn(self, state): players = state["table"].seats.players return state["next_player"] != "not_found" and my_uuid == players[ state["next_player"]].uuid def _choose_opponent_action(self, state, detail_info=False): players = state["table"].seats.players opponent_uuid = players[state["next_player"]].uuid value_function = self.opponent_value_functions[opponent_uuid] action_info = choose_best_action(self, value_function, state) return (opponent_uuid, action_info) if detail_info else (action_info["action"], action_info["amount"]) def generate_possible_actions(self, state): action_info = self.emulator.generate_possible_actions(state) min_raise_amount = action_info[2]["amount"]["min"] max_raise_amount = action_info[2]["amount"]["max"] player = state["table"].seats.players[state["next_player"]] actions = [ gen_fold_action(), gen_call_action(action_info[1]["amount"]) ] if min_raise_amount != -1: actions.append(gen_min_raise_action(min_raise_amount)) if min_raise_amount != -1 and min_raise_amount * 2 < max_raise_amount: actions.append(gen_double_raise_action(min_raise_amount * 2)) if min_raise_amount != -1 and min_raise_amount * 3 < max_raise_amount: actions.append(gen_triple_raise_action(min_raise_amount * 3)) if max_raise_amount != -1: actions.append(gen_max_raise_action(max_raise_amount)) return actions def calculate_reward(self, state): if self.is_terminal_state(state): if pick_me(state).stack == 0 and self.lose_penalty: return -1 if self.scale_reward: return 1.0 * pick_me(state).stack / (nb_player * initial_stack) else: return pick_me(state).stack else: return 0 def _update_action_record(self, state, action_record, uuid, action_info): action, amount = action_info["action"], action_info["amount"] if 'fold' == action: idx = 0 elif 'call' == action: idx = 1 elif 'raise' == action: idx = 2 else: raise Exception("unexpected action [ %s ] received" % action) # allin check. the idx of allin is 3. action_player = [ player for player in state["table"].seats.players if player.uuid == uuid ] assert len(action_player) == 1 if amount >= action_player[0].stack and 'fold' != action: idx = 3 action_record[uuid][idx].append(amount) return action_record
game_state = attach_hole_card(game_state, player.uuid, hole_card) else: game_state = attach_hole_card_from_deck( game_state, player.uuid) while game_state["street"] != Const.Street.FINISHED: print("action hitories: ", events[0]["round_state"]['action_histories']) print("community cards: ", events[0]["round_state"]["community_card"]) cur_street = game_state["street"] print("valid actions: ", events[-1]['valid_actions']) if events[0]['round_state']['next_player'] == 1: action = input("Enter a valid action > ") amount = int(input("Enter a valid amount > ")) game_state, events = emul.apply_action(game_state, action, amount) else: action, amount = bot.declare_action( events[-1]['valid_actions'], bot.hole_card_obj, events[0]['round_state']) game_state, events = emul.apply_action(game_state, action, amount) print("action hitories: ", events[0]["round_state"]['action_histories']) if game_state["street"] != cur_street: if events[0]["round_state"]["street"] == "flop": flop1 = input("Enter first flop > ") flop2 = input("Enter second flop > ") flop3 = input("Enter third flop > ") game_state = replace_community_card( game_state, gen_cards([flop1, flop2, flop3]))