コード例 #1
0
class GenPlayer(BasePokerPlayer):

    # Setup Emulator object by registering game information
    def receive_game_start_message(self, game_info):
        player_num = game_info["player_num"]
        max_round = game_info["rule"]["max_round"]
        small_blind_amount = game_info["rule"]["small_blind_amount"]
        ante_amount = game_info["rule"]["ante"]
        blind_structure = game_info["rule"]["blind_structure"]

        self.emulator = Emulator()
        self.emulator.set_game_rule(player_num, max_round, small_blind_amount,
                                    ante_amount)
        self.emulator.set_blind_structure(blind_structure)

        # Register algorithm of each player which used in the simulation.
        for player_info in game_info["seats"]["players"]:
            self.emulator.register_player(player_info["uuid"], RandomPlayer())

    def declare_action(self, valid_actions, hole_card, round_state):
        game_state = restore_game_state(round_state)
        # decide action by using some simulation result
        updated_state, events = self.emulator.apply_action(game_state, "fold")
        # updated_state, events = self.emulator.run_until_round_finish(game_state)
        # updated_state, events = self.emulator.run_until_game_finish(game_state)
        if self.is_good_simulation_result(updated_state):
            return  # you would declare CALL or RAISE action
        else:
            return "fold", 0
コード例 #2
0
class Game:
    def __init__(self, game_state, hole_card, player):
        self.player = player
        self.initial_state = game_state
        self.hole_card = hole_card
        self.emulator = Emulator()
        self.emulator.set_game_rule(2, 10, 10, 0)

    def actions(self, state):
        """Return a list of the allowable moves at this point."""
        return list(
            map(lambda x: x['action'],
                self.emulator.generate_possible_actions(state)))

    def result(self, state, move):
        """Return the state that results from making a move from a state."""
        return self.emulator.apply_action(state, move)[0]

    def utility(self, state, player):
        """Return the value of this final state to player."""
        score = estimate_hole_card_win_rate(100, 2, self.hole_card,
                                            state['table']._community_card)

        # 1 is MAX player
        if self.player == player:
            return score
        else:
            return -score

    def terminal_test(self, state):
        """Return True if this is a final state for the game."""
        return self.emulator._is_last_round(state, self.emulator.game_rule)

    def to_move(self, state):
        """Return the player whose move it is in this state."""
        return state['next_player']

    def display(self, state):
        """Print or otherwise display the state."""
        print(state)

    def __repr__(self):
        return '<{}>'.format(self.__class__.__name__)

    def play_game(self, *players):
        """Play an n-person, move-alternating game."""
        state = self.initial_state
        while True:
            for player in players:
                move = player(self, state)
                state = self.result(state, move)
                if self.terminal_test(state):
                    self.display(state)
                    return self.utility(state,
                                        self.to_move(self.initial_state))
コード例 #3
0
def emulate(hole_card, round_state):
      # 1. Set game settings in Emulator
      emulator = Emulator()
      sb_amount = round_state['small_blind_amount']
      # emulator(nb_player,max_rounds,sb_amount,ante)
      emulator.set_game_rule(2, 10, sb_amount, 1)
          
      # 2. Setup Gamestate object      

      game_state = restore_game_state(round_state)
      
      # Attach hole_cards for each player (at random for opponent)
      game_state = attach_hole_card(game_state,round_state['seats'][0]['uuid'], gen_cards(hole_card))
      game_state = attach_hole_card_from_deck(game_state,round_state['seats'][1]['uuid'])
      
      # 3. Run simulation and get updated GameState object
      # updated_state, events = emulator.apply_action(game_state, "call", 10)
        
      return game_state, emulator
コード例 #4
0
def run_episode(agents):
    emulator = Emulator()
    temp_final_state = {}
    winner_counts = [0] * N_AGENTS
    n_games_played = 0
    for game in range(GAMES_PER_EPISODE):
        wrappers = []
        player_info = {}
        for i, agent in enumerate(agents):
            if PERSISTENT_STACKS:
                if temp_final_state:
                    for seat in temp_final_state:
                        player_info[seat.uuid] = {'name': 'Player ' + str(seat.uuid),
                                                  'stack': seat.stack if seat.stack else STACK_SIZE}
                else:
                    player_info[i] = {'name': 'Player ' + str(i), 'stack': STACK_SIZE}
            else:
                player_info[i] = {'name': 'Player ' + str(i), 'stack': STACK_SIZE}

            wrappers.append(DQNAgentWrapper(agent, STACK_SIZE))
            emulator.register_player(uuid=i, player=wrappers[-1])
        emulator.set_game_rule(N_AGENTS, 2, BB_SIZE / 2, 0)
        initial_game_state = emulator.generate_initial_game_state(player_info)

        game_state, events = emulator.start_new_round(initial_game_state)
        game_finish_state, events = emulator.run_until_round_finish(game_state)

        # import json
        # if game == 0 or game == 1:
        #     print('dumping')
        #     with open('event_dump_' + str(game), 'w') as f:
        #         json.dump(events, f, indent=2)
        if 'winners' not in events[-1]:
            events.pop()

        winner = events[-1]['winners'][0]['uuid']
        winner_counts[winner] += 1
        n_games_played += 1

        for i in range(N_AGENTS):
            new_stack_size = game_finish_state['table'].seats.players[i].stack
            reward = (new_stack_size - wrappers[i].prev_stack_size) / BB_SIZE
            #print('Remembering {} reward for {} action'.format(reward, wrappers[i].prev_action))
            wrappers[i].agent.remember(wrappers[i].prev_state, wrappers[i].prev_action, reward, None, 1)

        temp_final_state = game_finish_state['table'].seats.players

        # print('====')
        print('\rGame:{}, epsilon:{}'.format(game, wrappers[0].agent.epsilon), end='')
        # print(game_finish_state)
        # print('\n')
        # print(events[-5:])
        # print('====')

        if (game % REPLAY_EVERY_N_GAMES == 0) or (game == GAMES_PER_EPISODE - 1):
            # replay memory for every agent
            # for agent in agents:
            #     agent.replay(BATCH_SIZE)
            agents[0].replay(BATCH_SIZE)

        for i in range(N_AGENTS):
            agents[i].model.reset_states()

    clear_memory()
    return agents[0], temp_final_state, winner_counts, n_games_played
コード例 #5
0
ファイル: task.py プロジェクト: ishikota/PyPokerAI
class TexasHoldemTask(BaseTask):
    def __init__(self,
                 final_round=max_round,
                 scale_reward=False,
                 lose_penalty=False,
                 shuffle_position=False,
                 action_record=False):
        self.final_round = final_round
        self.scale_reward = scale_reward
        self.lose_penalty = lose_penalty
        self.shuffle_position = shuffle_position
        self.action_record = action_record
        self.emulator = Emulator()
        self.emulator.set_game_rule(nb_player, final_round, sb_amount, ante)
        self.emulator.set_blind_structure(blind_structure)
        self.opponent_value_functions = {}
        if shuffle_position:
            print "Warning: shuffle_position is set True. Are you sure?"

        for uuid in players_info:
            self.emulator.register_player(uuid, DummyPlayer())
            if uuid != my_uuid: self.opponent_value_functions[uuid] = None

    def set_opponent_value_functions(self, value_functions):
        assert len(value_functions) == 9
        opponent_uuids = [
            uuid for uuid in self.opponent_value_functions if uuid != my_uuid
        ]
        for uuid, value_function in zip(opponent_uuids, value_functions):
            self.opponent_value_functions[uuid] = value_function

    def generate_initial_state(self):
        return self.generate_initial_state_without_action_record() if not self.action_record\
                else self.generate_initial_state_with_action_record()

    def generate_initial_state_without_action_record(self):
        p_info = _get_shuffled_players_info(
        ) if self.shuffle_position else players_info
        clear_state = self.emulator.generate_initial_game_state(p_info)
        state, _events = self.emulator.start_new_round(clear_state)
        while not self._check_my_turn(state):
            action, amount = self._choose_opponent_action(state)
            state, _events = self.emulator.apply_action(state, action, amount)
            if state[
                    "street"] == Const.Street.FINISHED and not self.is_terminal_state(
                        state):
                state, _events = self.emulator.start_new_round(state)
        return state if not self.is_terminal_state(
            state) else self.generate_initial_state()

    def generate_initial_state_with_action_record(self):
        p_info = _get_shuffled_players_info(
        ) if self.shuffle_position else players_info
        clear_state = self.emulator.generate_initial_game_state(p_info)
        p_act_record = {
            p.uuid: [[], [], [], []]
            for p in clear_state["table"].seats.players
        }
        state, _events = self.emulator.start_new_round(clear_state)
        while not self._check_my_turn(state):
            state[ACTION_RECORD_KEY] = p_act_record
            opponent_uuid, action_info = self._choose_opponent_action(
                state, detail_info=True)
            p_act_record = self._update_action_record(state, p_act_record,
                                                      opponent_uuid,
                                                      action_info)
            action, amount = action_info["action"], action_info["amount"]
            state, _events = self.emulator.apply_action(state, action, amount)
            if state[
                    "street"] == Const.Street.FINISHED and not self.is_terminal_state(
                        state):
                state, _events = self.emulator.start_new_round(state)
        state[ACTION_RECORD_KEY] = p_act_record
        return state if not self.is_terminal_state(
            state) else self.generate_initial_state()

    def is_terminal_state(self, state):
        me = pick_me(state)
        round_finished = state["street"] == Const.Street.FINISHED
        active_players = [
            p for p in state["table"].seats.players if p.stack > 0
        ]
        short_of_players = len(active_players) <= table_break_threshold
        i_am_loser = me.stack == 0
        is_final_round = state["round_count"] >= self.final_round
        return round_finished and (short_of_players or i_am_loser
                                   or is_final_round)

    def transit_state(self, state, action):
        return self.transit_state_without_action_record(state, action) if not self.action_record\
                else self.transit_state_with_action_record(state, action)

    def transit_state_without_action_record(self, state, action):
        assert self._check_my_turn(state)
        assert not self.is_terminal_state(state)
        action, amount = action["action"], action["amount"]
        state, _events = self.emulator.apply_action(state, action, amount)
        if state[
                "street"] == Const.Street.FINISHED and not self.is_terminal_state(
                    state):
            state, _events = self.emulator.start_new_round(state)
        while not self._check_my_turn(state) and not self.is_terminal_state(
                state):
            action, amount = self._choose_opponent_action(state)
            state, _events = self.emulator.apply_action(state, action, amount)
            if state[
                    "street"] == Const.Street.FINISHED and not self.is_terminal_state(
                        state):
                state, _events = self.emulator.start_new_round(state)
        return state

    def transit_state_with_action_record(self, state, action_info):
        assert self._check_my_turn(state)
        assert not self.is_terminal_state(state)
        assert state.has_key(ACTION_RECORD_KEY)
        p_act_record = _deepcopy_action_record(state)
        p_act_record = self._update_action_record(state, p_act_record, my_uuid,
                                                  action_info)
        action, amount = action_info["action"], action_info["amount"]
        state, _events = self.emulator.apply_action(state, action, amount)
        state[ACTION_RECORD_KEY] = p_act_record
        if state[
                "street"] == Const.Street.FINISHED and not self.is_terminal_state(
                    state):
            state, _events = self.emulator.start_new_round(state)
        while not self._check_my_turn(state) and not self.is_terminal_state(
                state):
            state[ACTION_RECORD_KEY] = p_act_record
            opponent_uuid, action_info = self._choose_opponent_action(
                state, detail_info=True)
            p_act_record = self._update_action_record(state, p_act_record,
                                                      opponent_uuid,
                                                      action_info)
            action, amount = action_info["action"], action_info["amount"]
            state, _events = self.emulator.apply_action(state, action, amount)
            if state[
                    "street"] == Const.Street.FINISHED and not self.is_terminal_state(
                        state):
                state, _events = self.emulator.start_new_round(state)
        state[ACTION_RECORD_KEY] = p_act_record
        return state

    def _check_my_turn(self, state):
        players = state["table"].seats.players
        return state["next_player"] != "not_found" and my_uuid == players[
            state["next_player"]].uuid

    def _choose_opponent_action(self, state, detail_info=False):
        players = state["table"].seats.players
        opponent_uuid = players[state["next_player"]].uuid
        value_function = self.opponent_value_functions[opponent_uuid]
        action_info = choose_best_action(self, value_function, state)
        return (opponent_uuid,
                action_info) if detail_info else (action_info["action"],
                                                  action_info["amount"])

    def generate_possible_actions(self, state):
        action_info = self.emulator.generate_possible_actions(state)
        min_raise_amount = action_info[2]["amount"]["min"]
        max_raise_amount = action_info[2]["amount"]["max"]
        player = state["table"].seats.players[state["next_player"]]

        actions = [
            gen_fold_action(),
            gen_call_action(action_info[1]["amount"])
        ]
        if min_raise_amount != -1:
            actions.append(gen_min_raise_action(min_raise_amount))
        if min_raise_amount != -1 and min_raise_amount * 2 < max_raise_amount:
            actions.append(gen_double_raise_action(min_raise_amount * 2))
        if min_raise_amount != -1 and min_raise_amount * 3 < max_raise_amount:
            actions.append(gen_triple_raise_action(min_raise_amount * 3))
        if max_raise_amount != -1:
            actions.append(gen_max_raise_action(max_raise_amount))
        return actions

    def calculate_reward(self, state):
        if self.is_terminal_state(state):
            if pick_me(state).stack == 0 and self.lose_penalty:
                return -1
            if self.scale_reward:
                return 1.0 * pick_me(state).stack / (nb_player * initial_stack)
            else:
                return pick_me(state).stack
        else:
            return 0

    def _update_action_record(self, state, action_record, uuid, action_info):
        action, amount = action_info["action"], action_info["amount"]
        if 'fold' == action: idx = 0
        elif 'call' == action: idx = 1
        elif 'raise' == action: idx = 2
        else: raise Exception("unexpected action [ %s ] received" % action)

        # allin check. the idx of allin is 3.
        action_player = [
            player for player in state["table"].seats.players
            if player.uuid == uuid
        ]
        assert len(action_player) == 1
        if amount >= action_player[0].stack and 'fold' != action: idx = 3
        action_record[uuid][idx].append(amount)
        return action_record
コード例 #6
0
class PushFoldEmulator:
    def __init__(self, starting_stack, small_blind):
        self.pok = PokerUtils()
        self.starting_stack = starting_stack
        self.small_blind = 10
        self.emulator = Emulator()
        self.emulator.set_game_rule(player_num=2,
                                    max_round=10,
                                    small_blind_amount=small_blind,
                                    ante_amount=0)

        self.hole_cards = {}
        self.players_info = {
            "bb_player": {
                "name": "bb_player",
                "stack": starting_stack
            },
            "sb_player": {
                "name": "sb_player",
                "stack": starting_stack
            },
        }

        self.initial_game_state = self.emulator.generate_initial_game_state(
            self.players_info)

        self.players_cards = [np.zeros(13), np.zeros(13)]
        self.suited = [0, 0]
        self.street = 'preflop'
        self.events = []
        self.game_state = []

    def is_round_finished(self):
        for e in self.events:
            if (e['type'] == 'event_round_finish'):
                return True
        return False

    def new_street(self):

        for e in self.events:
            if (e['type'] == 'event_new_street'):
                self.street = e['street']
                return self.street
        return False

    def save_cards(self):
        for player in self.game_state['table'].seats.players:
            self.hole_cards[player.uuid] = [
                card.__str__() for card in player.hole_card
            ]

    def get_hand_feature(self):
        if (self.street == 'preflop'):
            self.save_cards()
            for i in range(2):
                self.players_cards[i][self.pok.get_card_value_index(
                    self.game_state['table'].seats.players[i].hole_card[0].
                    __str__())] = 1
                self.players_cards[i][self.pok.get_card_value_index(
                    self.game_state['table'].seats.players[i].hole_card[1].
                    __str__())] = 1

                if self.pok.is_suited([
                        self.game_state['table'].seats.players[i].hole_card[0].
                        __str__(), self.game_state['table'].seats.players[i].
                        hole_card[1].__str__()
                ]):
                    self.suited[i] = 1
                else:
                    self.suited[i] = 0

    def get_all_in_amount(self):
        for e in self.events:
            if (e['type'] == 'event_ask_player'):
                return e['valid_actions'][2]['amount']['max']

    def get_call_amount(self):
        for e in self.events:
            if (e['type'] == 'event_ask_player'):
                return e['valid_actions'][1]['amount']

    def get_sb_reward(self):
        for e in self.events:
            if (e['type'] == 'event_round_finish'):
                if (e['winners'][0]['uuid'] == 'sb_player'):
                    return (e['winners'][0]['stack'] - self.starting_stack)
                else:
                    return -(e['winners'][0]['stack'] - self.starting_stack)

    def play_action(self, action):

        if (action == 0):
            self.game_state, self.events = self.emulator.apply_action(
                self.game_state, 'fold', 0)
        elif (action == 1):
            if self.get_all_in_amount() == -1:
                self.game_state, self.events = self.emulator.apply_action(
                    self.game_state, 'call', self.get_call_amount())
            else:
                self.game_state, self.events = self.emulator.apply_action(
                    self.game_state, 'raise', self.get_all_in_amount())

    def new_hand(self, starting_stack):
        self.initial_game_state = self.emulator.generate_initial_game_state(
            self.players_info)

        self.street = 'preflop'
        self.hole_cards = {}
        self.starting_stack = starting_stack
        self.players_info = {
            "bb_player": {
                "name": "bb_player",
                "stack": starting_stack
            },
            "sb_player": {
                "name": "sb_player",
                "stack": starting_stack
            },
        }

        self.initial_game_state = self.emulator.generate_initial_game_state(
            self.players_info)
        self.game_state, self.events = self.emulator.start_new_round(
            self.initial_game_state)

        self.players_cards = [np.zeros(13), np.zeros(13)]

        self.get_hand_feature()

    def get_action_histories_text(self, hole_cards=False):
        if (hole_cards == True):
            print(self.hole_cards)

        histo = self.events[0]['round_state']['action_histories']
        hand_text = ""
        for k, v in histo.items():
            if (len(v) > 0):
                hand_text += k + '\n'
                for a in v:
                    if (a['action'] == 'RAISE'):
                        hand_text += a['uuid'] + \
                            ' raises to ' + str(a['amount']) + '\n'
                    elif (a['action'] == 'FOLD'):
                        hand_text += a['uuid'] + ' folds\n'
                    elif (a['action'] == 'CALL' and a['amount'] == 0):
                        hand_text += a['uuid'] + ' checks\n'
                    else:
                        hand_text += a['uuid'] + ' ' + \
                            a['action'] + ' ' + str(a['amount']) + '\n'

        return hand_text
コード例 #7
0
p_uuid=["high-iq","low-iq","low-iq2","low-iq3"]
#qlearner_player = RLPlayer(n_players, quuid)
monte_carlos_tries=[[10,10],[10,20],[10,30],[10,50],[10,100],[10,200],[10,400],[10,800],[10,1000],[10,1500]]
for tryM in monte_carlos_tries:
	df = pd.DataFrame(columns = ['uuid', 'stack', 'game'])
	for i in range(0,2):
		print("starting game " + str(i) + " from try " + str(j))
		df1 = pd.DataFrame(columns = ['uuid', 'stack', 'round'])
		df2 = pd.DataFrame(columns = ['uuid', 'stack', 'round'])
		df3 = pd.DataFrame(columns = ['uuid', 'stack', 'round'])
		df4 = pd.DataFrame(columns = ['uuid', 'stack', 'round'])
		emulator.register_player(uuid=p_uuid[0], player=RiskyPlayer(n_players,tryM[1],0.7))
		emulator.register_player(uuid=p_uuid[1], player=RiskyPlayer(n_players,tryM[0],0.7))
		emulator.register_player(uuid=p_uuid[2], player=RiskyPlayer(n_players,tryM[0],0.7))
		emulator.register_player(uuid=p_uuid[3], player=RiskyPlayer(n_players,tryM[0],0.7))
		emulator.set_game_rule(player_num=4, max_round=1000, small_blind_amount=20, ante_amount=0)
		# 2. Setup GameState object
		players_info = {
			p_uuid[0]: { "name": "player1", "stack": 10000 },
			p_uuid[1]: { "name": "player2", "stack": 10000 },
			p_uuid[2]: { "name": "player3", "stack": 10000 },
			p_uuid[3]: { "name": "player4", "stack": 10000 },
		}
		initial_state = emulator.generate_initial_game_state(players_info)
		game_state, events = emulator.start_new_round(initial_state)
		# 3. Run simulation and get updated GameState object
		game_finish_state, events = emulator.run_until_game_finish(game_state)
		#qlearner_player.clear_stack()
		for event in events:
			if(event['type']=="event_round_finish"):
				for player in event['round_state']['seats']:
コード例 #8
0
ファイル: Group48.py プロジェクト: lqtrung1998/CS3243-Project
from pypokerengine.utils.card_utils import estimate_hole_card_win_rate, gen_cards
from pypokerengine.engine.poker_constants import PokerConstants as Const
from pypokerengine.engine.hand_evaluator import HandEvaluator
# from My_utils import hand_strength_with_belief, hand_strength, win_rate
import itertools
import numpy as np
from time import time, sleep
import pprint
import copy
from CardProbability import CardDeck

# In[ ]:
PREFLOP_HAND_STRENGTH = np.load('preflop_hand_strength.npy').item()

emulator = Emulator()
emulator.set_game_rule(2, 1, 20, 0)

# In[115]:

# heuristic_weights for[HC,LP,MP,HP,2P,3C,Straight,flush,FH,4C,SF]
# heuristic_weights =  np.array([1,1.5,2.5,4,10,20,40,80,160,320,640])
heuristic_weights = np.array([0.1, 1, 2, 3, 7, 14, 28, 56, 112, 224, 448])
# heuristic_weights = heuristic_weights/np.sum(heuristic_weights)
# print(heuristic_weights)
#quantiles = [1,8,20]
QUANTILE = [0.4, 0.6, 0.75]


def heuristic1(hole, community_card, heuristic_weights=heuristic_weights):
    cards = hole + community_card
    score_vector = np.zeros(11)
コード例 #9
0
class Game:
    def __init__(self, hole_card, player, state, num_rounds, valid_actions,
                 round_state, weights):
        self.hole_card = hole_card
        self.player = player
        self.init_state = state
        self.emulator = Emulator()
        self.num_rounds = num_rounds
        self.valid_actions = valid_actions
        self.weights = weights
        self.round_state = round_state
        self.emulator.set_game_rule(2, self.num_rounds, 10, 0)

    def terminal_test(self, state):
        """ Check if game tree ends """
        return self.emulator._is_last_round(state, self.emulator.game_rule)

    def actions(self, state):
        """ generate legal moves at this state """
        temp = list(
            map(lambda x: x['action'],
                self.emulator.generate_possible_actions(state)))
        return temp

    def eval_heuristics(self, player, state, win_rate):
        if isDebug:
            print("Evaluating heuristics")
        if isHeuristicTimed:
            start = time.time()
            print(time.time())

        amount_in_pot = float(self.round_state['pot']['main']['amount']) / 8.8
        EHS = (EffectiveHandStrength(
            self.hole_card, state['table']._community_card) + 10) / 20 * 100

        if isDebug:
            print("=======Got heuristics")
        if isHeuristicTimed:
            end = start = time.time()
            print("==========Got heuristics in time: " + str(end - start) +
                  " secs")

        heuristics = [win_rate, amount_in_pot, EHS]
        res = np.dot(self.weights, heuristics)
        return res

    def future_move(self, state):
        return state['next_player']

    def project(self, curr_state, move):
        """ projects what happens when making a move from current state """
        return self.emulator.apply_action(curr_state, move)[0]

    def minimax(self, newState, max_depth, hole_cards, community_cards):
        """ MiniMax decision strategy """

        player = self.future_move(newState)
        inf = float('inf')

        if (community_cards == []):  # Preflop
            win_rate = eval_pre_flop.eval_pre_flop(hole_cards) / 100 * 50 + 50
        else:
            win_rate = eval_post_flop.eval_post_flop_rank(
                hole_cards, community_cards) / 100 * 50 + 50

        def min_value(newState, alpha, beta, depth):
            """ determines what the strategy of the Min palyer should be. It is limited by max depth"""

            if isDebug:
                print("In MIN")

            if depth == max_depth or self.terminal_test(newState):
                return self.eval_heuristics(player, newState, win_rate)

            v = inf
            for a in self.actions(newState):
                v = min(
                    max_value(self.project(newState, a), alpha, beta,
                              depth + 1), v)
                if v <= alpha:
                    return v
                beta = min(beta, v)
            return v

        def max_value(newState, alpha, beta, depth):
            """ determines what the strategy of the Max palyer should be. It is limited by max depth"""
            if isDebug:
                print("In MAX")

            if depth == max_depth or self.terminal_test(newState):
                return self.eval_heuristics(player, newState, win_rate)

            v = -inf
            for a in self.actions(newState):
                v = max(
                    min_value(self.project(newState, a), alpha, beta,
                              depth + 1), v)
                if (v >= beta):
                    return v
                alpha = max(alpha, v)
            return v

        # alpha-beta pruning code
        # considers the next best action to take by starting off the Minimax recursion
        # and pruning out the nodes that no longer need to be considered
        best_score = -inf
        beta = inf
        best_action = None
        for a in self.actions(newState):
            v = min_value(self.project(newState, a), best_score, beta, 0)
            if v > best_score:
                best_score = v
                best_action = a

        # TODO: Since our player folds a lot. Please update/remove as required
        if best_action == 'fold':
            # print('fold')
            return 'call'
        else:
            # print(best_action)
            return best_action

        return best_action
コード例 #10
0
class RLPokerPlayer(BasePokerPlayer):
    def __init__(
        self,
        study_mode=False,
        model_file=None,
        new_model_file=None,
        gammaReward=0.1,
        alphaUpdateNet=0.1,
        epsilonRandom=0.05,
        decayRandom=0.5,
        players=[RandomPlayer()] * 8 + [None],
        max_history_len=1000,
        new_model_file=None,
    ):
        self.stats = commons.PlayerStats()
        self.new_model_file = new_model_file
        self.gammaReward = gammaReward
        self.alphaUpdateNet = alphaUpdateNet
        self.decayRandom = decayRandom
        self.epsilonRandom = epsilonRandom
        self.study_mode = study_mode
        self.max_history_len = max_history_len

        # self.my_seat = 0
        if self.study_mode:
            self.players = players
            for i in np.arange(len(self.players)):
                if self.players[i] is None:
                    self.my_seat = i

        self.history = deque()

        if model_file is not None:
            self.model = load(model_file)
        else:
            self.model = commons.model1()

    def good_moves(self, valid_actions, my_stack):
        good_moves = []

        good_moves.append({'action': 'fold', 'amount': 0, 'type': 0})
        good_moves.append({
            'action': 'call',
            'amount': valid_actions[1]['amount'],
            'type': 1
        })

        if valid_actions[2]['amount']['min'] == -1:
            return good_moves

        raise_min, raise_max = valid_actions[2]['amount'][
            'min'], valid_actions[2]['amount']['max']

        fix = lambda x: max(min(x, raise_max), raise_min)

        good_moves.append({
            'action': 'raise',
            'amount': fix(self.small_blind_amount * 2),
            'type': 2
        })
        good_moves.append({
            'action': 'raise',
            'amount': fix(my_stack // 2),
            'type': 3
        })
        good_moves.append({
            'action': 'raise',
            'amount': fix(my_stack),
            'type': 4
        })

        return good_moves

    def turn2vec(self, state_vec, move):
        move_numb = move['type']
        move_amount = move['amount']
        # print(move_numb, move_amount)
        X = np.concatenate(
            (np.array(state_vec), np.array([move_numb, move_amount])))
        return X.reshape((1, -1))

    def find_best_strat(self, valid_actions, cur_state_vec, my_stack):
        best_move, best_reward = None, -1500 * 9
        good_moves = self.good_moves(valid_actions, my_stack)

        if sps.bernoulli.rvs(p=self.epsilonRandom):
            ind = sps.randint.rvs(low=0, high=len(good_moves))
            move = good_moves[ind]
            reward = float(
                self.model.predict(self.turn2vec(cur_state_vec, move),
                                   batch_size=1))
            return move, reward

        for move in good_moves:
            # print(cur_state_vec, move)
            cur_reward = float(
                self.model.predict(self.turn2vec(cur_state_vec, move),
                                   batch_size=1))
            # print(cur_reward)
            if cur_reward > best_reward:
                best_move = move
                best_reward = cur_reward

        return best_move, best_reward

    def declare_action(self, valid_actions, hole_card, round_state):
        self.stats.update(hole_card, round_state)

        round_state_vec = self.stats.calc_fine_params(hole_card, round_state)

        my_stack = round_state['seats'][self.my_seat]['stack']

        best_move, best_reward = self.find_best_strat(valid_actions,
                                                      round_state_vec,
                                                      my_stack)

        action, amount = best_move['action'], best_move['amount']

        print('action {}, amount {}'.format(action, amount))

        if self.study_mode:
            self.update_history(round_state_vec, best_move, best_reward,
                                round_state)
        return action, amount

    def update_history(self, round_state_vec, best_move, best_reward,
                       round_state):
        cur_data_dict = {
            'states_vec': round_state_vec,
            'states_original': round_state,
            'moves': best_move,
            'rewards': best_reward,
            'stats': deepcopy(self.stats),
        }

        self.history.append(cur_data_dict)
        if len(self.history) == self.max_history_len:
            self.history.popleft()

    def receive_game_start_message(self, game_info):
        # self.my_name = game_info['seats'][self.my_seat]['name']
        if self.study_mode:
            self.uuid = game_info["seats"][self.my_seat]['uuid']
        self.stats.init_player_names(game_info)
        self.player_num = game_info["player_num"]
        self.max_round = game_info["rule"]["max_round"]
        self.small_blind_amount = game_info["rule"]["small_blind_amount"]
        self.ante_amount = game_info["rule"]["ante"]
        # self.blind_structure = game_info["rule"]["blind_structure"]

        self.emulator = Emulator()
        self.emulator.set_game_rule(self.player_num, self.max_round,
                                    self.small_blind_amount, self.ante_amount)
        # self.emulator.set_blind_structure(blind_structure)

        if self.study_mode:
            # Register algorithm of each player which used in the simulation.
            for i in np.arange(self.player_num):
                self.emulator.register_player(
                    uuid=game_info["seats"][i]["uuid"],
                    player=self.players[i]
                    if self.players[i] is not None else self)

    def receive_round_start_message(self, round_count, hole_card, seats):
        self.start_round_stack = seats[self.my_seat]['stack']

    def receive_street_start_message(self, street, round_state):
        pass

    def receive_game_update_message(self, action, round_state):
        if self.new_model_file is not None and self.study_mode:
            self.model.save(self.new_model_file)

    def receive_round_result_message(self, winners, hand_info, round_state):
        if self.study_mode:
            best_move = {'action': 'finish', 'amount': 0, 'type': 5}
            round_state_vec = -np.ones(commons.FEATURES_LEN - 2)
            best_reward = round_state['seats'][
                self.my_seat]['stack'] - self.start_round_stack

            self.update_history(round_state_vec, best_move, best_reward,
                                round_state)

            if len(self.history) > 10 and sps.bernoulli.rvs(
                    p=self.alphaUpdateNet):
                ind = sps.randint.rvs(low=0, high=len(self.history) - 2)
                self.learn_with_states(self.history[ind],
                                       self.history[ind + 1])

            self.epsilonRandom *= self.decayRandom

    # def generate_possible_actions(self, game_state):
    #     players = game_state["seats"]
    #     player_pos = game_state["next_player"]
    #     sb_amount = game_state["small_blind_amount"]
    #     return ActionChecker.legal_actions(players, player_pos, sb_amount)

    def do_best_simulation(self, next_state, next_state_vec, next_state_stats):
        # pp = pprint.PrettyPrinter(indent=4)
        # pp.pprint(next_state)
        # pp.pprint('*********************************************')
        game_state = restore_game_state(next_state)
        possible_actions = self.emulator.generate_possible_actions(game_state)
        good_actions = self.good_moves(
            possible_actions, next_state['seats'][self.my_seat]['stack'])
        best_reward = -1500 * 9
        for action in good_actions:
            # print(action)
            try:
                next_next_game_state = self.emulator.apply_action(
                    game_state, action['action'], action['amount'])
                next_next_state = next_next_game_state[1][-1]['round_state']
                # next_next_state = next_next_game_state['action_histories']
                # pp = pprint.PrettyPrinter(indent=4)
                # pp.pprint(next_next_state)
                if next_next_state['street'] in ['showdown', 'finished', 4, 5]:
                    best_reward = max(
                        best_reward,
                        next_next_state['seats'][self.my_seat]['stack'] -
                        self.start_round_stack)
                else:
                    next_next_game_state = restore_game_state(next_next_state)
                    hole_card = attach_hole_card_from_deck(
                        next_next_game_state, self.uuid)
                    next_state_stats.update(hole_card, next_next_state)
                    next_next_state_vec = self.stats.calc_fine_params(
                        hole_card, next_next_state)

                    next_next_actions = self.emulator.generate_possible_actions(
                        next_next_game_state)
                    best_reward = max(
                        best_reward,
                        self.find_best_strat(
                            next_next_actions,
                            next_next_state_vec,
                            next_next_state['seats'][self.my_seat]['stack'],
                        )[1])
            except:
                continue
        return best_reward

    def learn_with_states(self, state, next_state):
        if next_state['states_original']['street'] in [
                'showdown', 'finished', 4, 5
        ]:
            reward = next_state['rewards']
        else:
            reward = next_state['rewards'] +\
            self.gammaReward * self.do_best_simulation( next_state['states_original'],
                                                        next_state['states_vec'],
                                                        next_state['stats'],
                                                    )
        X = self.turn2vec(state['states_vec'], state['moves'])
        y = np.array([reward])
        # print(X, y, X.shape, y.shape)
        self.model.fit(x=X, y=y, epochs=1, verbose=0, batch_size=1)
コード例 #11
0
class CustomEmulator:
    def __init__(self, starting_stack, small_blind):
        self.pok = PokerUtils()
        self.starting_stack = 500
        self.small_blind = 10
        self.emulator = Emulator()
        self.emulator.set_game_rule(player_num=2,
                                    max_round=10,
                                    small_blind_amount=small_blind,
                                    ante_amount=0)

        self.hole_cards = {}
        self.players_info = {
            "bb_player": {
                "name": "bb_player",
                "stack": starting_stack
            },
            "sb_player": {
                "name": "sb_player",
                "stack": starting_stack
            },
        }

        self.initial_game_state = self.emulator.generate_initial_game_state(
            self.players_info)

        self.street = 'preflop'
        self.events = []
        self.game_state = []

        self.players_cards = [np.zeros(52), np.zeros(52)]
        self.cards_feature = [np.zeros(52), np.zeros(52), np.zeros(52)]

        self.actions_feature = [
            np.zeros(6), np.zeros(6),
            np.zeros(6), np.zeros(6)
        ]

    def is_round_finished(self):
        for e in self.events:
            if (e['type'] == 'event_round_finish'):
                return True
        return False

    def new_street(self):

        for e in self.events:
            if (e['type'] == 'event_new_street'):
                self.street = e['street']
                return self.street
        return False

    def save_cards(self):
        for player in self.game_state['table'].seats.players:
            self.hole_cards[player.uuid] = [
                card.__str__() for card in player.hole_card
            ]

    def make_cards_feature(self):
        if (self.street == 'preflop'):
            self.save_cards()
            for i in range(2):
                self.players_cards[i][self.pok.get_card_total_index(
                    self.game_state['table'].seats.players[i].hole_card[0].
                    __str__())] = 1
                self.players_cards[i][self.pok.get_card_total_index(
                    self.game_state['table'].seats.players[i].hole_card[1].
                    __str__())] = 1
        elif (self.street == 'flop'):
            for card in self.events[0]['round_state']['community_card']:
                self.cards_feature[0][self.pok.get_card_total_index(card)] = 1
        elif (self.street == 'turn'):
            self.cards_feature[1][self.pok.get_card_total_index(
                self.events[0]['round_state']['community_card'][3])] = 1
        elif (self.street == 'river'):
            self.cards_feature[2][self.pok.get_card_total_index(
                self.events[0]['round_state']['community_card'][4])] = 1

    def make_engineered_cards_feature(self):
        if (self.street == 'preflop'):
            self.save_cards()
            for i in range(2):
                self.players_cards[i][self.pok.get_card_total_index(
                    self.game_state['table'].seats.players[i].hole_card[0].
                    __str__())] = 1
                self.players_cards[i][self.pok.get_card_total_index(
                    self.game_state['table'].seats.players[i].hole_card[1].
                    __str__())] = 1
        elif (self.street == 'flop'):
            for card in self.events[0]['round_state']['community_card']:
                self.cards_feature[0][self.pok.get_card_total_index(card)] = 1
        elif (self.street == 'turn'):
            self.cards_feature[1][self.pok.get_card_total_index(
                self.events[0]['round_state']['community_card'][3])] = 1
        elif (self.street == 'river'):
            self.cards_feature[2][self.pok.get_card_total_index(
                self.events[0]['round_state']['community_card'][4])] = 1

    def make_actions_feature(self):
        if (self.street == 'preflop'):
            self.actions_feature[0] = self.pok.get_street_actions(
                self.starting_stack,
                self.events[0]['round_state']['action_histories']['preflop'])
        elif (self.street == 'flop'):
            self.actions_feature[1] = self.pok.get_street_actions(
                self.starting_stack,
                self.events[0]['round_state']['action_histories']['flop'])
        elif (self.street == 'turn'):
            self.actions_feature[2] = self.pok.get_street_actions(
                self.starting_stack,
                self.events[0]['round_state']['action_histories']['turn'])
        elif (self.street == 'river'):
            self.actions_feature[3] = self.pok.get_street_actions(
                self.starting_stack,
                self.events[0]['round_state']['action_histories']['river'])

    def make_features(self):
        # actions are made every street
        self.make_actions_feature()
        if (self.new_street != False):
            self.make_cards_feature()

    def get_minraise_amount(self):
        for e in self.events:
            if (e['type'] == 'event_ask_player'):
                return e['valid_actions'][2]['amount']['min']

    def get_all_in_amount(self):
        for e in self.events:
            if (e['type'] == 'event_ask_player'):
                return e['valid_actions'][2]['amount']['max']

    def get_call_amount(self):
        for e in self.events:
            if (e['type'] == 'event_ask_player'):
                return e['valid_actions'][1]['amount']

    def get_sb_reward(self):
        for e in self.events:
            if (e['type'] == 'event_round_finish'):
                if (e['winners'][0]['uuid'] == 'sb_player'):
                    return (e['winners'][0]['stack'] - self.starting_stack)
                else:
                    return -(e['winners'][0]['stack'] - self.starting_stack)

    def get_spr(self):
        pot = self.events[0]['round_state']['pot']['main']['amount']
        stack = self.starting_stack - int(pot / 2)
        spr = stack / pot
        return spr

    def play_action(self, action):
        if (action == 0):
            self.game_state, self.events = self.emulator.apply_action(
                self.game_state, 'fold', 0)
        elif (action == 1):
            self.game_state, self.events = self.emulator.apply_action(
                self.game_state, 'call', self.get_call_amount())
        elif (action == 2):
            self.game_state, self.events = self.emulator.apply_action(
                self.game_state, 'raise', self.get_minraise_amount())
        elif (action == 3):
            self.game_state, self.events = self.emulator.apply_action(
                self.game_state, 'raise', self.get_all_in_amount())

    def new_hand(self):
        self.initial_game_state = self.emulator.generate_initial_game_state(
            self.players_info)

        self.street = 'preflop'
        self.hole_cards = {}
        self.game_state, self.events = self.emulator.start_new_round(
            self.initial_game_state)

        self.players_cards = [np.zeros(52), np.zeros(52)]
        self.cards_feature = [np.zeros(52), np.zeros(52), np.zeros(52)]

        self.actions_feature = [
            np.zeros(6), np.zeros(6),
            np.zeros(6), np.zeros(6)
        ]

        self.make_features()

    def get_action_histories_text(self, hole_cards=False):
        if (hole_cards == True):
            print(self.hole_cards)

        histo = self.events[0]['round_state']['action_histories']
        hand_text = ""
        for k, v in histo.items():
            if (len(v) > 0):
                hand_text += k + '\n'
                for a in v:
                    if (a['action'] == 'RAISE'):
                        hand_text += a['uuid'] + \
                            ' raises to ' + str(a['amount']) + '\n'
                    elif (a['action'] == 'FOLD'):
                        hand_text += a['uuid'] + ' folds\n'
                    elif (a['action'] == 'CALL' and a['amount'] == 0):
                        hand_text += a['uuid'] + ' checks\n'
                    else:
                        hand_text += a['uuid'] + ' ' + \
                            a['action'] + ' ' + str(a['amount']) + '\n'

        return hand_text
コード例 #12
0
class CFRBase:

    def __init__(self):
        self.emulator = Emulator()
        #try:
        self._sigma = pickle.load(open("strategy.pickle", "rb"))
        #except (OSError, IOError, EOFError) as e:
            #self._sigma = {}
            #pickle.dump(self._sigma, open("strategy.pickle", "wb"))

        self.cumulative_regrets = {}
        self.cumulative_sigma = {}


    def get_tree(self,game_state):
        if game_state[1][-1]['type'] != 'event_game_finish':
            hist = game_state[1][-1]['round_state']['action_histories']
            game_string = "#"
            for i in hist['preflop']:
                game_string = game_string + i['action'][0]
            if "flop" in hist and len(hist['flop']) > 0:
                for i in hist['flop']:
                   game_string = game_string + i['action'][0]
            if "turn" in hist and len(hist['turn']) > 0:
                for i in hist['turn']:
                    game_string = game_string + i['action'][0]
            if "river" in hist and len(hist['river']) > 0:
                for i in hist['river']:
                    game_string = game_string + i['action'][0]
        else:
            hist = game_state[1][0]['round_state']['action_histories']
            game_string = "#"
            for i in hist['preflop']:
                game_string = game_string + i['action'][0]
            if "flop" in hist and len(hist['flop']) > 0:
                for i in hist['flop']:
                   game_string = game_string + i['action'][0]
            if "turn" in hist and len(hist['turn']) > 0:
                for i in hist['turn']:
                    game_string = game_string + i['action'][0]
            if "river" in hist and len(hist['river']) > 0:
                for i in hist['river']:
                    game_string = game_string + i['action'][0]
        return game_string
        
    def is_terminal(self,game_state):
        return game_state[1][-1]['type'] == 'event_game_finish'

    def state_evaluation(self,game_state):
        return game_state[1][1]['players'][1]['stack'] - 1000

    def _available_actions(self,game_state):
        actions = []
        for i in game_state[1][-1]['valid_actions']:
            actions.append(i['action'])
        return actions

    def load_data(self):
        with open('strategy.pickle','rb') as handle:
            self._sigma = pickle.load(handle)

    def store_data(self):
        with open('strategy.pickle', 'wb') as handle:
            pickle.dump(self._sigma, handle, protocol=pickle.HIGHEST_PROTOCOL)

    def getsigma(self, game_state, percentile, action):
        tree = self.get_tree(game_state)
        available_actions = self._available_actions(game_state)
        if tree not in self._sigma:
            self._sigma[tree] = {percentile:{}}
            for i in available_actions:
                self._sigma[tree][percentile][i] = 1./len(available_actions)
        elif percentile not in self._sigma[tree]:
            self._sigma[tree][percentile] = {}
            for i in available_actions:
                self._sigma[tree][percentile][i] = 1./len(available_actions)
        return self._sigma[tree][percentile][action]

    def _statetomove(self,game_state):
        next_player_pos = game_state[0]["next_player"]
        if next_player_pos == 1:
            return 1
        else:
            return -1
        
    def _getcards(self,game_state):
        hole_cards = []
        next_player_pos = game_state[0]["next_player"]
        hole_cards.append(game_state[0]['table'].seats.players[next_player_pos].hole_card[0].__str__())
        hole_cards.append(game_state[0]['table'].seats.players[next_player_pos].hole_card[1].__str__())
        return hole_cards

    def _getboard(self,game_state):
        return game_state[0]['table']._community_card

    def _myround(self,x, base=10):
        return int(base * round(float(x)/base))

    def evaluate_hs(self,game_state):
        #print('evaluating hs')
        hole_cards = gen_cards(self._getcards(game_state))
        #print('hole cards = ' + str(self._getcards(game_state)))
        community_cards = self._getboard(game_state)
        #print('community = ' + str(self._getboard(game_state)))
        hs = estimate_hole_card_win_rate(100,2, hole_cards,community_cards)
        #print('hs = ' + str(hs))
        percentile = self._myround(hs*100)
        return percentile

    def _state_play(self, game_state, action):
        new_game_state = self.emulator.apply_action(game_state[0], action, 0)
        return new_game_state

    def utility_recursive(self):
        final_value = self._utility_recursive(self.game_state,1,1)
        #print("##################CUMULATIVE REGRETS#####################")
        #pprint.pprint(self.cumulative_regrets)
        #print("##################CUMULATIVE SIGMA#####################")
        #pprint.pprint(self.cumulative_sigma)
        return final_value

    def _cumulate_cfr_regret(self, game_state, percentile, action, regret):
        tree = self.get_tree(game_state)
        available_actions = self._available_actions(game_state)
        if tree not in self.cumulative_regrets:
            self.cumulative_regrets[tree] = {percentile:{}}
            for i in available_actions:
                self.cumulative_regrets[tree][percentile][i] = 0
        elif percentile not in self.cumulative_regrets[tree]:
            self.cumulative_regrets[tree][percentile] = {}
            for i in available_actions:
                self.cumulative_regrets[tree][percentile][i] = 0      
        self.cumulative_regrets[tree][percentile][action] += regret

    def _cumulate_sigma(self, game_state, percentile, action, prob):
        tree = self.get_tree(game_state)
        available_actions = self._available_actions(game_state)
        if tree not in self.cumulative_sigma:
            self.cumulative_sigma[tree] = {percentile:{}}
            for i in available_actions:
                self.cumulative_sigma[tree][percentile][i] = 0
        elif percentile not in self.cumulative_sigma[tree]:
            self.cumulative_sigma[tree][percentile] = {}
            for i in available_actions:
                self.cumulative_sigma[tree][percentile][i] = 0      
        self.cumulative_sigma[tree][percentile][action] += prob

    def __update_sigma_recursively(self, game_state):
        # stop traversal at terminal node
        if self.is_terminal(game_state):
            return
        percentile = self.evaluate_hs(game_state)
        self._update_sigma(game_state,percentile)
        # go to subtrees
        for action in self._available_actions(game_state):
            self.__update_sigma_recursively(self._state_play(game_state,action))

    def _update_sigma(self, game_state, percentile):
        tree = self.get_tree(game_state)
        #print("Current State--------")
        #print(self.get_tree(game_state))
        #print(percentile)
        #if percentile not in self._sigma[tree]:
        #    percentile = (min(100, percentile + 5)) if (min(100, percentile + 5) in self._sigma[tree]) else (max(0, percentile - 5))
        if percentile not in self.cumulative_regrets[tree]:
            #print('Enter unable to find percentile')
            if (min(100, percentile + 5) in self.cumulative_regrets[tree]):
                percentile = (min(100, percentile + 5))
            elif (max(0, percentile - 5) in self.cumulative_regrets[tree]):
                percentile = (max(0, percentile - 5))
            elif (min(100, percentile + 10) in self.cumulative_regrets[tree]):
                percentile = (min(100, percentile + 10))
            elif (max(0, percentile - 10) in self.cumulative_regrets[tree]):
                percentile = (max(0, percentile - 10))
            elif (min(100, percentile + 20) in self.cumulative_regrets[tree]):
                percentile = (min(100, percentile + 20))
            elif (max(0, percentile - 20) in self.cumulative_regrets[tree]):
                percentile = (max(0, percentile - 20))
            elif (min(100, percentile + 30) in self.cumulative_regrets[tree]):
                percentile = (min(100, percentile + 30))
            elif (max(0, percentile - 30) in self.cumulative_regrets[tree]):
                percentile = (max(0, percentile - 30))
            elif (min(100, percentile + 40) in self.cumulative_regrets[tree]):
                percentile = (min(100, percentile + 40))
            elif (max(0, percentile - 40) in self.cumulative_regrets[tree]):
                percentile = (max(0, percentile - 40))
            elif (min(100, percentile + 50) in self.cumulative_regrets[tree]):
                percentile = (min(100, percentile + 50))
            elif (max(0, percentile - 50) in self.cumulative_regrets[tree]):
                percentile = (max(0, percentile - 50))
        #print('New Percentile = ' + str(percentile))
        rgrt_sum = sum(filter(lambda x : x > 0, self.cumulative_regrets[tree][percentile].values()))
        nr_of_actions = len(self.cumulative_regrets[tree][percentile].keys())
        for a in self.cumulative_regrets[tree][percentile]:        
            self._sigma[tree][percentile][a] = max(self.cumulative_regrets[tree][percentile][a], 0.) / rgrt_sum if rgrt_sum > 0 else 1. / nr_of_actions        
        
    def _utility_recursive(self,game_state, reach_sb, reach_bb):


        children_states_utilities = {}
        if self.is_terminal(game_state):
            return self.state_evaluation(game_state)
        percentile = self.evaluate_hs(game_state)
        #print("Current State--------")
        #print(self.get_tree(game_state))
        #print(percentile)
        #pprint.pprint(game_state[1][-1]['round_state']['community_card'])
        value = 0.

        for action in self._available_actions(game_state):
            probability = self.getsigma(game_state,percentile,action)
            child_reach_sb = reach_sb * ( probability if self._statetomove(game_state) == 1 else 1)
            child_reach_bb = reach_bb * ( probability if self._statetomove(game_state) == -1 else 1)

            child_state_utility = self._utility_recursive(self._state_play(game_state,action),reach_sb,reach_bb)

            value +=  (probability * child_state_utility)
            
            children_states_utilities[action] = child_state_utility
            
        (cfr_reach, reach) = (reach_bb, reach_sb) if self._statetomove(game_state) == 1 else (reach_sb, reach_bb)

        for action in self._available_actions(game_state):

            action_cfr_regret = self._statetomove(game_state) * cfr_reach * (children_states_utilities[action] - value)

            self._cumulate_cfr_regret(game_state, percentile, action, action_cfr_regret)
            self._cumulate_sigma(game_state, percentile, action, reach * self.getsigma(game_state,percentile,action))
        return value

    def run(self, iterations = 1):
        print(datetime.datetime.now())
        for myiter in range(0, iterations):
            #initialize new game state with new hole cards
            self.emulator.set_game_rule(player_num=2, max_round=1, small_blind_amount=10, ante_amount=0)
            players_info = {
                "uuid-1": { "name": "player1", "stack": 1000 },
                "uuid-2": { "name": "player2", "stack": 1000 }
            }
            player1 = RandomPlayer()
            player2 = RandomPlayer()
            self.emulator.register_player('uuid-1',player1)
            self.emulator.register_player('uuid-2',player2)
            initial_state = self.emulator.generate_initial_game_state(players_info)
            self.game_state = self.emulator.start_new_round(initial_state)
            #goto later gamestate
            #self.game_state = self.emulator.apply_action(self.game_state[0], 'call', 0)



            
            self._utility_recursive(self.game_state, 1, 1)
            self.__update_sigma_recursively(self.game_state)

            
            self.store_data()
            print('data stored,  i = ' + str(myiter) )
            print(datetime.datetime.now())
        print(datetime.datetime.now())
コード例 #13
0
class MyPlayer(BasePokerPlayer):
    def __init__(self):
        self.table = {}
        self.table1 = {}
        self.belief = {}
        # self.opponent_belief ={}
        self.uuid = None
        self.opponent = None
        self.emulator = Emulator()
        self.emulator.set_game_rule(2, 1, 10, 0)

        self.current_cost = 10
        self.random_game_state = None

    def declare_action(self, valid_actions, hole_card, round_state):
        self.round_state = round_state
        self.table = {}

        #print(self.current_cost)
        self.my_uuid = round_state['seats'][round_state['next_player']]['uuid']
        self.my_cards = gen_cards(hole_card)
        self.community_card = gen_cards(round_state['community_card'])
        self.random_game_state = None

        if round_state['seats'][
                round_state['big_blind_pos']]['uuid'] == self.my_uuid:
            self.current_cost = 20

        if not self.belief:
            self.initialize_belief()

        posibile_opponent_cards = [x[0] for x in self.belief['Cards']]
        opponent_card_prob = self.belief['Probability']
        pp = pprint.PrettyPrinter(indent=2)

        for opponent_cards, prob in zip(posibile_opponent_cards,
                                        opponent_card_prob):
            game_state = setup_game_state(round_state, self.my_cards,
                                          opponent_cards, self.my_uuid)
            self.opponent = ReasonablePlayer(
                opponent_cards, round_state,
                self.belief)  ## Should not be self.belief, fix later

            root = Node(self.my_cards, opponent_cards, self.community_card,
                        game_state, round_state, round_state['street'], prob,
                        [], self, self.opponent, self.my_uuid, self.emulator,
                        self.current_cost)
            # start= time()
            root.update_expected_table()

            # pp.pprint(self.table)
            # sleep(5)
            if not self.random_game_state:
                self.random_game_state = game_state
        pp.pprint(self.table)
        #print(time()-start)
        #sleep(5000)
        strategies = table_to_strategies(self.table)
        pp.pprint(strategies)
        strategy = max(strategies, key=self.table.get)
        action = strategy.split()[0]

        histories = self.emulator.apply_action(
            self.random_game_state,
            action)[1][0]['round_state']['action_histories']
        if histories.get(round_state['street']):
            action_result = histories[round_state['street']][-1]
            if action_result['action'] != 'FOLD':
                self.current_cost += action_result['paid']

            # print(action)
        # self.update_opp_belief(opponent_belief)
        return action

    # def update_opp_belief(self):

    def initialize_belief(self):
        used = [card.to_id() for card in self.my_cards + self.community_card]
        unused = [card_id for card_id in range(1, 53) if card_id not in used]

        Cards = [[Card.from_id(card1),
                  Card.from_id(card2)]
                 for card1, card2 in itertools.combinations(unused, 2)]
        start = time()
        Cards_Strength = list(
            map(lambda x: hand_strength(x, self.community_card, self.my_cards),
                Cards))
        print('my', time() - start)

        self.belief['Cards'] = list(zip(Cards, Cards_Strength))
        self.belief['Probability'] = np.ones(len(Cards)) / len(Cards)

        self.belief['Cards'].sort(key=lambda x: x[1])

    def update_belief(self):
        street = self.round_state['street']
        previous_action = self.game_update[-1]['action'] if self.game_state[-1][
            'player_uuid'] == self.my_uuid else self.game_update[-2]['action']

        strong_update = 0
        weak_update = 0

        if previous_action == 'raise':
            count = 0
            for i in range(len(self.belief['Cards'])):
                if self.belief['Cards'][1] > 0.65:
                    count = count + 1
                else:
                    break
            self.belief['Probability'][:count] += strong_update
            self.belief['Probability'][count:] -= weak_update
            self.belief['Probability'] /= sum(self.belief['Probability'])

    def update_table(self, my_strategy, gain):
        # print(my_strategy)
        key = ' '.join(my_strategy)
        if self.table.get(key):
            self.table[key] += gain
        else:
            self.table[key] = gain

    def receive_game_start_message(self, game_info):
        pass

    def receive_round_start_message(self, round_count, hole_card, seats):
        pass

    def receive_street_start_message(self, street, round_state):
        pass

    def receive_game_update_message(self, action, round_state):
        self.action = action

    def receive_round_result_message(self, winners, hand_info, round_state):
        pass
コード例 #14
0
class RLPlayer(BasePokerPlayer):
    def __init__(self, nplayers, uuid, alpha=0.7):
        self._probability_cutoff = [0.2, 0.62, 0.92]
        self._stack_cutoff = [5, 20, 32.5]
        self._raise_cutoff = [4, 10, 20]
        self._stages = [0, 1, 2, 3]
        self._pot_cutoff = [4, 15, 32.5]
        self.STATES = list(
            itertools.product(self._raise_cutoff, self._stages,
                              self._probability_cutoff, self._pot_cutoff,
                              self._stack_cutoff))
        #self.STATES = list(itertools.product(self._raise_cutoff, self._stages, self._probability_cutoff, self._stack_cutoff))
        self.ACTIONS = ['FOLD', 'RAISE', 'CALL']
        self._pot = []
        self._previous_state = []
        self._stack = []
        self._uuid = uuid
        self._probabilities = []
        self._state = []
        self._raise = []
        self.nb_player = nplayers
        self.ALPHA = alpha

    # Setup Emulator object by registering game information
    def receive_game_start_message(self, game_info):
        self.nb_player = game_info['player_num']
        global PLAYER_NUM
        PLAYER_NUM = game_info["player_num"]
        max_round = game_info["rule"]["max_round"]
        small_blind_amount = game_info["rule"]["small_blind_amount"]
        ante_amount = game_info["rule"]["ante"]
        blind_structure = game_info["rule"]["blind_structure"]

        for i in range(0, len(game_info['seats'])):
            if (game_info['seats'][i]['name'] == 'q-learning'):
                self._uuid = game_info['seats'][i]['uuid']

        self.emulator = Emulator()
        self.emulator.set_game_rule(PLAYER_NUM, max_round, small_blind_amount,
                                    ante_amount)
        self.emulator.set_blind_structure(blind_structure)

    def declare_action(self, valid_actions, hole_card, round_state):
        community_card = round_state['community_card']
        game_state = restore_game_state(round_state)
        big_blind = round_state['small_blind_amount'] * 2

        win_rate = estimate_hole_card_win_rate(
            nb_simulation=NB_SIMULATION,
            nb_player=self.nb_player,
            hole_card=gen_cards(hole_card),
            community_card=gen_cards(community_card))

        if np.random.rand() <= EPSILON:
            choice = np.random.choice(["fold", "call", "raise"], 1)[0]
            #print(valid_actions[2])
            if choice == "raise":
                #amount = rand.randrange(valid_actions[2]['amount']["min"], max(valid_actions[2]['amount']["min"], valid_actions[2]['amount']["max"]) + 1)
                #amount = valid_actions[2]['amount']['min']
                amount = np.random.randint(
                    valid_actions[2]['amount']["min"],
                    valid_actions[2]['amount']["max"] + 1, 1)[0]
                #print(amount)
                return 'raise', amount
            elif (choice == "fold"):
                return "fold", 0
            elif (choice == "call"):
                return "call", valid_actions[1]['amount']

        self._probabilities.append([win_rate, round_state['street']])
        self._pot.append(round_state['pot']['main']['amount'] / big_blind)
        my_stack = 0
        for i in range(0, len(round_state['seats'])):
            if (round_state['seats'][i]['name'] == 'q-learning'):
                my_stack = round_state['seats'][i]['stack']
                self._stack.append(my_stack)

        # GET RAISED AMOUNT AND STAGES
        total_raised_amount = 0
        for i in ["preflop", "flop", "turn", "river"]:
            if (i in round_state['action_histories']):
                for y in round_state['action_histories'][i]:
                    if (y['uuid'] == self._uuid):
                        self._previous_state.append([
                            y['action'], ["preflop", "flop", "turn",
                                          "river"].index(i)
                        ])
                    if (y['action'] == 'RAISE' and y['uuid'] != self._uuid):
                        total_raised_amount += y['amount']

        total_raised_amount = (total_raised_amount /
                               self.nb_player) / big_blind

        for i in range(0, len(self._raise_cutoff)):
            if (i == len(self._raise_cutoff) - 1):
                self._raise.append(self._raise_cutoff[i])
            elif (total_raised_amount >= self._raise_cutoff[i]):
                continue
            else:
                self._raise.append(self._raise_cutoff[i])
                break

        # GET PRESENT POT
        for i in range(0, len(self._pot_cutoff)):
            if (i == len(self._pot_cutoff) - 1):
                self._pot[-1] = self._pot_cutoff[i]
            elif (self._pot[-1] >= self._pot_cutoff[i]):
                continue
            else:
                self._pot[-1] = self._pot_cutoff[i]
                break

        # DISCRETIZE PRESENT STACK
        for i in range(0, len(self._stack_cutoff)):
            if (i == len(self._stack_cutoff) - 1):
                self._stack[-1] = self._stack_cutoff[i]
            elif (self._stack[-1] >= self._stack_cutoff[i]):
                continue
            else:
                self._stack[-1] = self._stack_cutoff[i]
                break

        # DISCRETIZE PROBABILITIES
        for i in range(0, len(self._probability_cutoff)):
            if (i == len(self._probability_cutoff) - 1):
                self._probabilities[-1][0] = self._probability_cutoff[i]
            elif (self._probabilities[-1][0] >= self._probability_cutoff[i]):
                continue
            else:
                self._probabilities[-1][0] = self._probability_cutoff[i]
                break

        if (len(self._stack) > 1):
            self.update_qmatrix()

        present_state = self.STATES.index(
            (self._raise[-1], ["preflop", "flop", "turn",
                               "river"].index(self._probabilities[-1][1]),
             self._probabilities[-1][0], self._pot[-1], self._stack[-1]))
        #present_state = self.STATES.index( (self._raise[-1], ["preflop", "flop", "turn", "river"].index(self._probabilities[-1][1]), self._probabilities[-1][0], self._stack[-1]) )
        #present_state = self.STATES.index( (self._probabilities[-1][0]) )
        action_todo = self.ACTIONS[np.argmax(QVALUE_MATRIX[present_state])]
        if (action_todo == 'FOLD'):
            return "fold", 0
        elif (action_todo == 'CALL'):
            return "call", valid_actions[1]['amount']
        else:
            #avg_raise = (valid_actions[2]['amount']['max'] + valid_actions[2]['amount']['min']) / 2.0
            #if (avg_raise/3 <= 0):
            #    return "raise", valid_actions[2]['amount']['min']
            #raise_amount = int(np.random.poisson(avg_raise/200, 1))
            raise_amount = valid_actions[2]['amount']['min']
            #print("raise_amount",raise_amount)
            #print(raise_amount)
            return "raise", raise_amount

    def receive_round_start_message(self, round_count, hole_card, seats):
        pass

    def receive_street_start_message(self, street, round_state):
        pass

    def receive_game_update_message(self, new_action, round_state):
        pass

    def receive_round_result_message(self, winners, hand_info, round_state):
        pass

    def update_qmatrix(self):
        i = -1
        reward = self._stack[-1] - self._stack[-2]
        index = self.STATES.index(
            (self._raise[i], self._previous_state[i][1],
             self._probabilities[i][0], self._pot[i], self._stack[i]))
        #index = self.STATES.index( (self._raise[i], self._previous_state[i][1], self._probabilities[i][0], self._stack[i]) )
        if (i + 1 == len(self._probabilities)):
            index_next_state = index
        else:
            index_next_state = self.STATES.index(
                (self._raise[i + 1], self._previous_state[i + 1][1],
                 self._probabilities[i + 1][0], self._pot[i + 1],
                 self._stack[i + 1]))
            #index_next_state = self.STATES.index( (self._raise[i+1], self._previous_state[i+1][1], self._probabilities[i+1][0], self._stack[i+1]) )
        if ((self._previous_state[i][0] == "SMALLBLIND")
                or (self._previous_state[i][0] == "BIGBLIND")):
            self._previous_state[i][0] = "CALL"
        action = self.ACTIONS.index(self._previous_state[i][0])
        maxi = np.amax(QVALUE_MATRIX[index_next_state])
        QVALUE_MATRIX[index][
            action] = QVALUE_MATRIX[index][action] + self.ALPHA * (
                reward + GAMMA * maxi - QVALUE_MATRIX[index][action])

    def clear_stack(self):
        self._probabilities = []
        self._stack = []
        self._raise = []
        self._pot = []
        self._previous_state = []
        self._raise = []
コード例 #15
0
ファイル: emulator_test.py プロジェクト: neo-godlike/pokerbot
class EmulatorTest(BaseUnitTest):
    def setUp(self):
        self.emu = Emulator()

    def test_set_game_rule(self):
        self.emu.set_game_rule(2, 8, 5, 3)
        self.eq(2, self.emu.game_rule["player_num"])
        self.eq(8, self.emu.game_rule["max_round"])
        self.eq(5, self.emu.game_rule["sb_amount"])
        self.eq(3, self.emu.game_rule["ante"])

    def test_register_and_fetch_player(self):
        p1, p2 = FoldMan(), FoldMan()
        self.emu.register_player("uuid-1", p1)
        self.emu.register_player("uuid-2", p2)
        self.eq(p1, self.emu.fetch_player("uuid-1"))
        self.eq(p2, self.emu.fetch_player("uuid-2"))

    @raises(TypeError)
    def test_register_invalid_player(self):
        self.emu.register_player("uuid", "hoge")

    def test_blind_structure(self):
        game_state = restore_game_state(TwoPlayerSample.round_state)
        game_state = attach_hole_card_from_deck(game_state,
                                                "tojrbxmkuzrarnniosuhct")
        game_state = attach_hole_card_from_deck(game_state,
                                                "pwtwlmfciymjdoljkhagxa")
        self.emu.set_game_rule(2, 10, 5, 0)
        self.emu.set_blind_structure({5: {"ante": 5, "small_blind": 60}})
        p1 = TestPlayer([("fold", 0), ('raise', 55), ('call', 0)])
        p2 = TestPlayer([("call", 15), ("call", 55), ('fold', 0)])
        self.emu.register_player("tojrbxmkuzrarnniosuhct", p1)
        self.emu.register_player("pwtwlmfciymjdoljkhagxa", p2)

        game_state, events = self.emu.run_until_round_finish(game_state)
        self.eq(65, game_state["table"].seats.players[0].stack)
        self.eq(135, game_state["table"].seats.players[1].stack)

        game_state, events = self.emu.start_new_round(game_state)
        game_state, events = self.emu.run_until_round_finish(game_state)
        self.eq(120, game_state["table"].seats.players[0].stack)
        self.eq(80, game_state["table"].seats.players[1].stack)

        game_state, events = self.emu.start_new_round(game_state)
        self.eq("event_game_finish", events[0]["type"])
        self.eq(0, game_state["table"].seats.players[0].stack)
        self.eq(80, game_state["table"].seats.players[1].stack)

    def test_blind_structure_update(self):
        self.emu.set_game_rule(2, 8, 5, 3)
        p1, p2 = FoldMan(), FoldMan()
        self.emu.register_player("uuid-1", p1)
        self.emu.register_player("uuid-2", p2)
        blind_structure = {
            3: {
                "ante": 5,
                "small_blind": 10
            },
            5: {
                "ante": 10,
                "small_blind": 20
            }
        }
        self.emu.set_blind_structure(blind_structure)
        players_info = {
            "uuid-1": {
                "name": "hoge",
                "stack": 100
            },
            "uuid-2": {
                "name": "fuga",
                "stack": 100
            }
        }
        state = self.emu.generate_initial_game_state(players_info)
        self.eq(5, state["small_blind_amount"])
        state, _ = self.emu.start_new_round(state)
        state, _ = self.emu.apply_action(state, "fold")
        self.eq(5, state["small_blind_amount"])
        state, _ = self.emu.apply_action(state, "fold")
        self.eq(5, state["small_blind_amount"])
        state, _ = self.emu.apply_action(state, "fold")
        self.eq(10, state["small_blind_amount"])
        state, _ = self.emu.apply_action(state, "fold")
        self.eq(10, state["small_blind_amount"])
        state, _ = self.emu.apply_action(state, "fold")
        self.eq(20, state["small_blind_amount"])
        state, _ = self.emu.apply_action(state, "fold")
        self.eq(20, state["small_blind_amount"])

    def test_apply_action(self):
        game_state = restore_game_state(TwoPlayerSample.round_state)
        game_state = attach_hole_card_from_deck(game_state,
                                                "tojrbxmkuzrarnniosuhct")
        game_state = attach_hole_card_from_deck(game_state,
                                                "pwtwlmfciymjdoljkhagxa")
        self.emu.set_game_rule(2, 10, 5, 0)
        p1, p2 = FoldMan(), FoldMan()
        self.emu.register_player("tojrbxmkuzrarnniosuhct", FoldMan())
        self.emu.register_player("pwtwlmfciymjdoljkhagxa", FoldMan())

        game_state, events = self.emu.apply_action(game_state, "call", 15)
        self.eq(Const.Street.RIVER, game_state["street"])
        self.eq(TwoPlayerSample.p1_action_histories, \
                game_state["table"].seats.players[0].round_action_histories[Const.Street.TURN])
        self.eq(2, len(events))
        self.eq("event_new_street", events[0]["type"])
        self.eq("event_ask_player", events[1]["type"])

        game_state, events = self.emu.apply_action(game_state, "call", 0)
        self.eq(1, len(events))
        self.eq("event_ask_player", events[0]["type"])

        game_state, events = self.emu.apply_action(game_state, "call", 0)
        self.eq(1, len(events))
        self.eq("event_round_finish", events[0]["type"])

    def test_apply_action_game_finish_detect(self):
        game_state = restore_game_state(TwoPlayerSample.round_state)
        game_state = attach_hole_card_from_deck(game_state,
                                                "tojrbxmkuzrarnniosuhct")
        game_state = attach_hole_card_from_deck(game_state,
                                                "pwtwlmfciymjdoljkhagxa")
        self.emu.set_game_rule(2, 3, 5, 0)
        p1, p2 = FoldMan(), FoldMan()
        self.emu.register_player("tojrbxmkuzrarnniosuhct", FoldMan())
        self.emu.register_player("pwtwlmfciymjdoljkhagxa", FoldMan())

        game_state, events = self.emu.apply_action(game_state, "fold")
        self.eq("event_game_finish", events[-1]["type"])

    def test_apply_action_start_next_round(self):
        game_state = restore_game_state(TwoPlayerSample.round_state)
        game_state = attach_hole_card_from_deck(game_state,
                                                "tojrbxmkuzrarnniosuhct")
        game_state = attach_hole_card_from_deck(game_state,
                                                "pwtwlmfciymjdoljkhagxa")
        self.emu.set_game_rule(2, 4, 5, 0)
        p1, p2 = FoldMan(), FoldMan()
        self.emu.register_player("tojrbxmkuzrarnniosuhct", FoldMan())
        self.emu.register_player("pwtwlmfciymjdoljkhagxa", FoldMan())

        game_state, events = self.emu.apply_action(game_state, "fold")
        self.eq(120, game_state["table"].seats.players[0].stack)
        self.eq(80, game_state["table"].seats.players[1].stack)

        game_state, events = self.emu.apply_action(game_state, "raise", 20)
        self.eq("event_ask_player", events[-1]["type"])
        self.eq(100, game_state["table"].seats.players[0].stack)
        self.eq(70, game_state["table"].seats.players[1].stack)

    @raises(Exception)
    def test_apply_action_when_game_finished(self):
        game_state = restore_game_state(TwoPlayerSample.round_state)
        game_state = attach_hole_card_from_deck(game_state,
                                                "tojrbxmkuzrarnniosuhct")
        game_state = attach_hole_card_from_deck(game_state,
                                                "pwtwlmfciymjdoljkhagxa")
        self.emu.set_game_rule(2, 3, 5, 0)
        p1, p2 = FoldMan(), FoldMan()
        self.emu.register_player("tojrbxmkuzrarnniosuhct", FoldMan())
        self.emu.register_player("pwtwlmfciymjdoljkhagxa", FoldMan())

        game_state, events = self.emu.apply_action(game_state, "fold")
        self.emu.apply_action(game_state, "fold")

    def test_run_until_round_finish(self):
        game_state = restore_game_state(TwoPlayerSample.round_state)
        game_state = attach_hole_card_from_deck(game_state,
                                                "tojrbxmkuzrarnniosuhct")
        game_state = attach_hole_card_from_deck(game_state,
                                                "pwtwlmfciymjdoljkhagxa")
        self.emu.set_game_rule(2, 10, 5, 0)
        p1 = TestPlayer([("fold", 0)])
        p2 = TestPlayer([("call", 15)])
        self.emu.register_player("tojrbxmkuzrarnniosuhct", p1)
        self.emu.register_player("pwtwlmfciymjdoljkhagxa", p2)

        game_state, events = self.emu.run_until_round_finish(game_state)
        self.eq("event_new_street", events[0]["type"])
        self.eq("event_ask_player", events[1]["type"])
        self.eq("event_round_finish", events[2]["type"])

    def test_run_until_round_finish_when_already_finished(self):
        game_state = restore_game_state(TwoPlayerSample.round_state)
        game_state = attach_hole_card_from_deck(game_state,
                                                "tojrbxmkuzrarnniosuhct")
        game_state = attach_hole_card_from_deck(game_state,
                                                "pwtwlmfciymjdoljkhagxa")
        self.emu.set_game_rule(2, 10, 5, 0)
        p1 = TestPlayer([("fold", 0)])
        p2 = TestPlayer([("call", 15)])
        self.emu.register_player("tojrbxmkuzrarnniosuhct", p1)
        self.emu.register_player("pwtwlmfciymjdoljkhagxa", p2)
        game_state, events = self.emu.run_until_round_finish(game_state)
        game_state, events = self.emu.run_until_round_finish(game_state)
        self.eq(0, len(events))

    def test_run_until_round_finish_game_finish_detect(self):
        uuids = ["tojrbxmkuzrarnniosuhct", "pwtwlmfciymjdoljkhagxa"]
        holecards = [[Card.from_str(s) for s in ss]
                     for ss in [["CA", "D2"], ["C8", "H5"]]]
        game_state = restore_game_state(TwoPlayerSample.round_state)
        game_state = reduce(lambda a, e: attach_hole_card(a, e[0], e[1]),
                            zip(uuids, holecards), game_state)
        game_state = attach_hole_card_from_deck(game_state,
                                                "pwtwlmfciymjdoljkhagxa")
        self.emu.set_game_rule(2, 10, 5, 0)
        p1 = TestPlayer([("raise", 65)])
        p2 = TestPlayer([("call", 15), ("call", 65)])
        self.emu.register_player("tojrbxmkuzrarnniosuhct", p1)
        self.emu.register_player("pwtwlmfciymjdoljkhagxa", p2)
        game_state["table"].deck.deck.append(Card.from_str("C7"))

        game_state, events = self.emu.run_until_round_finish(game_state)
        self.eq("event_new_street", events[0]["type"])
        self.eq("event_ask_player", events[1]["type"])
        self.eq("event_ask_player", events[2]["type"])
        self.eq("event_round_finish", events[3]["type"])
        self.eq("event_game_finish", events[4]["type"])
        self.eq(0, events[4]["players"][0]["stack"])
        self.eq(200, events[4]["players"][1]["stack"])

    def test_run_until_game_finish(self):
        game_state = restore_game_state(TwoPlayerSample.round_state)
        game_state = attach_hole_card_from_deck(game_state,
                                                "tojrbxmkuzrarnniosuhct")
        game_state = attach_hole_card_from_deck(game_state,
                                                "pwtwlmfciymjdoljkhagxa")
        self.emu.set_game_rule(2, 10, 5, 1)
        self.emu.register_player("tojrbxmkuzrarnniosuhct", FoldMan())
        self.emu.register_player("pwtwlmfciymjdoljkhagxa", FoldMan())

        game_state, events = self.emu.run_until_game_finish(game_state)
        self.eq("event_game_finish", events[-1]["type"])
        self.eq(114, game_state["table"].seats.players[0].stack)
        self.eq(86, game_state["table"].seats.players[1].stack)

    def test_run_until_game_finish_when_one_player_is_left(self):
        uuids = [
            "ruypwwoqwuwdnauiwpefsw", "sqmfwdkpcoagzqxpxnmxwm",
            "uxrdiwvctvilasinweqven"
        ]
        holecards = [[Card.from_str(s) for s in ss]
                     for ss in [["C2", "C3"], ["HA", "CA"], ["D5", "H6"]]]
        game_state = restore_game_state(ThreePlayerGameStateSample.round_state)
        game_state = reduce(
            lambda state, item: attach_hole_card(state, item[0], item[1]),
            zip(uuids, holecards), game_state)
        sb_amount, ante = 5, 7
        self.emu.set_game_rule(3, 10, sb_amount, ante)
        p1_acts = [("fold", 0), ("call", 10), ('call', 0), ('call', 10),
                   ("fold", 0)]
        p2_acts = []
        p3_acts = [("raise", 10)]
        players = [TestPlayer(acts) for acts in [p1_acts, p2_acts, p3_acts]]
        [
            self.emu.register_player(uuid, player)
            for uuid, player in zip(uuids, players)
        ]
        game_state["table"].deck.deck.append(Card.from_str("C7"))
        game_state, events = self.emu.run_until_game_finish(game_state)
        self.eq("event_game_finish", events[-1]["type"])
        self.eq(0, game_state["table"].seats.players[0].stack)
        self.eq(0, game_state["table"].seats.players[1].stack)
        self.eq(292, game_state["table"].seats.players[2].stack)

    def test_run_until_game_finish_when_final_round(self):
        uuids = [
            "ruypwwoqwuwdnauiwpefsw", "sqmfwdkpcoagzqxpxnmxwm",
            "uxrdiwvctvilasinweqven"
        ]
        holecards = [[Card.from_str(s) for s in ss]
                     for ss in [["C2", "C3"], ["HA", "CA"], ["D5", "H6"]]]
        game_state = restore_game_state(ThreePlayerGameStateSample.round_state)
        game_state = reduce(
            lambda state, item: attach_hole_card(state, item[0], item[1]),
            zip(uuids, holecards), game_state)
        sb_amount, ante = 5, 7
        self.emu.set_game_rule(3, 10, sb_amount, ante)
        [self.emu.register_player(uuid, FoldMan()) for uuid in uuids]
        game_state["table"].deck.deck.append(Card.from_str("C7"))
        game_state, events = self.emu.run_until_game_finish(game_state)
        self.eq("event_game_finish", events[-1]["type"])
        self.eq(10, game_state["round_count"])
        self.eq(35, game_state["table"].seats.players[0].stack)
        self.eq(0, game_state["table"].seats.players[1].stack)
        self.eq(265, game_state["table"].seats.players[2].stack)

    def test_last_round_judge(self):
        game_state = restore_game_state(TwoPlayerSample.round_state)
        self.emu.set_game_rule(2, 3, 5, 0)
        self.false(self.emu._is_last_round(game_state, self.emu.game_rule))
        game_state["street"] = Const.Street.FINISHED
        self.true(self.emu._is_last_round(game_state, self.emu.game_rule))
        game_state["round_count"] = 2
        self.false(self.emu._is_last_round(game_state, self.emu.game_rule))
        game_state["table"].seats.players[0].stack = 0
        self.true(self.emu._is_last_round(game_state, self.emu.game_rule))

    def test_start_new_round(self):
        game_state = restore_game_state(TwoPlayerSample.round_state)
        game_state = attach_hole_card_from_deck(game_state,
                                                "tojrbxmkuzrarnniosuhct")
        game_state = attach_hole_card_from_deck(game_state,
                                                "pwtwlmfciymjdoljkhagxa")
        p1, p2 = FoldMan(), FoldMan()
        self.emu.set_game_rule(2, 10, 5, 0)
        self.emu.register_player("tojrbxmkuzrarnniosuhct", FoldMan())
        self.emu.register_player("pwtwlmfciymjdoljkhagxa", FoldMan())

        # run until round finish
        game_state, event = self.emu.apply_action(game_state, "call", 15)
        game_state, event = self.emu.apply_action(game_state, "call", 0)
        game_state, event = self.emu.apply_action(game_state, "call", 0)

        game_state, events = self.emu.start_new_round(game_state)
        self.eq(4, game_state["round_count"])
        self.eq(1, game_state["table"].dealer_btn)
        self.eq(0, game_state["street"])
        self.eq(0, game_state["next_player"])
        self.eq("event_new_street", events[0]["type"])
        self.eq("event_ask_player", events[1]["type"])
        self.eq("preflop", events[0]["street"])
        self.eq("tojrbxmkuzrarnniosuhct", events[1]["uuid"])

    def test_start_new_round_exclude_no_money_players(self):
        uuids = [
            "ruypwwoqwuwdnauiwpefsw", "sqmfwdkpcoagzqxpxnmxwm",
            "uxrdiwvctvilasinweqven"
        ]
        game_state = restore_game_state(ThreePlayerGameStateSample.round_state)
        original = reduce(
            lambda state, uuid: attach_hole_card_from_deck(state, uuid), uuids,
            game_state)
        sb_amount, ante = 5, 7
        self.emu.set_game_rule(3, 10, sb_amount, ante)
        [self.emu.register_player(uuid, FoldMan()) for uuid in uuids]

        # case1: second player cannot pay small blind
        finish_state, events = self.emu.apply_action(original, "fold")
        finish_state["table"].seats.players[0].stack = 11
        stacks = [p.stack for p in finish_state["table"].seats.players]
        game_state, events = self.emu.start_new_round(finish_state)
        self.eq(2, game_state["table"].dealer_btn)
        self.eq(1, game_state["next_player"])
        self.eq(stacks[1] - sb_amount - ante,
                game_state["table"].seats.players[1].stack)
        self.eq(stacks[2] - sb_amount * 2 - ante,
                game_state["table"].seats.players[2].stack)
        self.eq(PayInfo.FOLDED,
                game_state["table"].seats.players[0].pay_info.status)
        self.eq(
            sb_amount * 3 + ante * 2,
            GameEvaluator.create_pot(
                game_state["table"].seats.players)[0]["amount"])

        # case2: third player cannot pay big blind
        finish_state, events = self.emu.apply_action(original, "fold")
        finish_state["table"].seats.players[1].stack = 16
        stacks = [p.stack for p in finish_state["table"].seats.players]
        game_state, events = self.emu.start_new_round(finish_state)
        self.eq(2, game_state["table"].dealer_btn)
        self.eq(0, game_state["next_player"])
        self.eq(stacks[0] - sb_amount - ante,
                game_state["table"].seats.players[0].stack)
        self.eq(stacks[2] - sb_amount * 2 - ante,
                game_state["table"].seats.players[2].stack)
        self.eq(PayInfo.FOLDED,
                game_state["table"].seats.players[1].pay_info.status)
        self.eq(PayInfo.PAY_TILL_END,
                game_state["table"].seats.players[0].pay_info.status)
        self.eq(
            sb_amount * 3 + ante * 2,
            GameEvaluator.create_pot(
                game_state["table"].seats.players)[0]["amount"])

    def test_start_new_round_exclude_no_money_players2(self):
        uuids = [
            "ruypwwoqwuwdnauiwpefsw", "sqmfwdkpcoagzqxpxnmxwm",
            "uxrdiwvctvilasinweqven"
        ]
        game_state = restore_game_state(ThreePlayerGameStateSample.round_state)
        original = reduce(
            lambda state, uuid: attach_hole_card_from_deck(state, uuid), uuids,
            game_state)
        sb_amount, ante = 5, 7
        self.emu.set_game_rule(3, 10, sb_amount, ante)
        [self.emu.register_player(uuid, FoldMan()) for uuid in uuids]

        # case1: second player cannot pay small blind
        finish_state, events = self.emu.apply_action(original, "fold")
        finish_state["table"].seats.players[2].stack = 6
        stacks = [p.stack for p in finish_state["table"].seats.players]
        game_state, events = self.emu.start_new_round(finish_state)
        self.eq(0, game_state["table"].dealer_btn)
        self.eq(1, game_state["table"].sb_pos())
        self.eq(1, game_state["next_player"])

    def test_start_new_round_game_finish_judge(self):
        uuids = [
            "ruypwwoqwuwdnauiwpefsw", "sqmfwdkpcoagzqxpxnmxwm",
            "uxrdiwvctvilasinweqven"
        ]
        game_state = restore_game_state(ThreePlayerGameStateSample.round_state)
        original = reduce(
            lambda state, uuid: attach_hole_card_from_deck(state, uuid), uuids,
            game_state)
        sb_amount, ante = 5, 7
        self.emu.set_game_rule(3, 10, sb_amount, ante)
        [self.emu.register_player(uuid, FoldMan()) for uuid in uuids]

        finish_state, events = self.emu.apply_action(original, "fold")
        finish_state["table"].seats.players[2].stack = 11
        finish_state["table"].seats.players[1].stack = 16
        game_state, events = self.emu.start_new_round(finish_state)
        self.eq(1, len(events))
        self.eq("event_game_finish", events[0]["type"])

    def test_generate_initial_game_state(self):
        self.emu.set_game_rule(2, 8, 5, 3)
        p1, p2 = FoldMan(), FoldMan()

        players_info = OrderedDict()
        players_info["uuid-1"] = {"name": "hoge", "stack": 100}
        players_info["uuid-2"] = {"name": "fuga", "stack": 100}
        state = self.emu.generate_initial_game_state(players_info)
        table = state["table"]
        self.eq(0, state["round_count"])
        self.eq(5, state["small_blind_amount"])
        self.eq(100, table.seats.players[0].stack)
        self.eq("uuid-1", table.seats.players[0].uuid)
        self.eq(100, table.seats.players[1].stack)
        self.eq("uuid-2", table.seats.players[1].uuid)
        self.eq(1, table.dealer_btn)

        state, events = self.emu.start_new_round(state)
        self.eq(0, state["table"].dealer_btn)
        self.eq(1, state["table"].sb_pos())
        self.eq(0, state["table"].bb_pos())
        self.eq(1, state["next_player"])
        state, events = self.emu.apply_action(state, "call", 10)
        self.eq(1, state["next_player"])

    def test_generate_possible_actions(self):
        state1 = restore_game_state(TwoPlayerSample.round_state)
        self.eq(TwoPlayerSample.valid_actions,
                self.emu.generate_possible_actions(state1))
        state2 = restore_game_state(ThreePlayerGameStateSample.round_state)
        self.eq(ThreePlayerGameStateSample.valid_actions,
                self.emu.generate_possible_actions(state2))
コード例 #16
0
class EmulatorTest(BaseUnitTest):
    def setUp(self):
        self.emu = Emulator()

    def test_set_game_rule(self):
        self.emu.set_game_rule(2, 8, 5, 3)
        self.eq(2, self.emu.game_rule['player_num'])
        self.eq(8, self.emu.game_rule['max_round'])
        self.eq(5, self.emu.game_rule['sb_amount'])
        self.eq(3, self.emu.game_rule['ante'])

    def test_register_and_fetch_player(self):
        p1, p2 = FoldMan(), FoldMan()
        self.emu.register_player('uuid-1', p1)
        self.emu.register_player('uuid-2', p2)
        self.eq(p1, self.emu.fetch_player('uuid-1'))
        self.eq(p2, self.emu.fetch_player('uuid-2'))

    @raises(TypeError)
    def test_register_invalid_player(self):
        self.emu.register_player('uuid', 'hoge')

    def test_blind_structure(self):
        game_state = restore_game_state(TwoPlayerSample.round_state)
        game_state = attach_hole_card_from_deck(game_state,
                                                'tojrbxmkuzrarnniosuhct')
        game_state = attach_hole_card_from_deck(game_state,
                                                'pwtwlmfciymjdoljkhagxa')
        self.emu.set_game_rule(2, 10, 5, 0)
        self.emu.set_blind_structure({5: {'ante': 5, 'small_blind': 60}})
        p1 = TestPlayer([('fold', 0), ('raise', 55), ('call', 0)])
        p2 = TestPlayer([('call', 15), ('call', 55), ('fold', 0)])
        self.emu.register_player('tojrbxmkuzrarnniosuhct', p1)
        self.emu.register_player('pwtwlmfciymjdoljkhagxa', p2)

        game_state, events = self.emu.run_until_round_finish(game_state)
        self.eq(65, game_state['table'].seats.players[0].stack)
        self.eq(135, game_state['table'].seats.players[1].stack)

        game_state, events = self.emu.start_new_round(game_state)
        game_state, events = self.emu.run_until_round_finish(game_state)
        self.eq(120, game_state['table'].seats.players[0].stack)
        self.eq(80, game_state['table'].seats.players[1].stack)

        game_state, events = self.emu.start_new_round(game_state)
        self.eq('event_game_finish', events[0]['type'])
        self.eq(0, game_state['table'].seats.players[0].stack)
        self.eq(80, game_state['table'].seats.players[1].stack)

    def test_blind_structure_update(self):
        self.emu.set_game_rule(2, 8, 5, 3)
        p1, p2 = FoldMan(), FoldMan()
        self.emu.register_player('uuid-1', p1)
        self.emu.register_player('uuid-2', p2)
        blind_structure = {
            3: {
                'ante': 5,
                'small_blind': 10
            },
            5: {
                'ante': 10,
                'small_blind': 20
            }
        }
        self.emu.set_blind_structure(blind_structure)
        players_info = {
            'uuid-1': {
                'name': 'hoge',
                'stack': 100
            },
            'uuid-2': {
                'name': 'fuga',
                'stack': 100
            }
        }
        state = self.emu.generate_initial_game_state(players_info)
        self.eq(5, state['small_blind_amount'])
        state, _ = self.emu.start_new_round(state)
        state, _ = self.emu.apply_action(state, 'fold')
        self.eq(5, state['small_blind_amount'])
        state, _ = self.emu.apply_action(state, 'fold')
        self.eq(5, state['small_blind_amount'])
        state, _ = self.emu.apply_action(state, 'fold')
        self.eq(10, state['small_blind_amount'])
        state, _ = self.emu.apply_action(state, 'fold')
        self.eq(10, state['small_blind_amount'])
        state, _ = self.emu.apply_action(state, 'fold')
        self.eq(20, state['small_blind_amount'])
        state, _ = self.emu.apply_action(state, 'fold')
        self.eq(20, state['small_blind_amount'])

    def test_apply_action(self):
        game_state = restore_game_state(TwoPlayerSample.round_state)
        game_state = attach_hole_card_from_deck(game_state,
                                                'tojrbxmkuzrarnniosuhct')
        game_state = attach_hole_card_from_deck(game_state,
                                                'pwtwlmfciymjdoljkhagxa')
        self.emu.set_game_rule(2, 10, 5, 0)
        p1, p2 = FoldMan(), FoldMan()
        self.emu.register_player('tojrbxmkuzrarnniosuhct', FoldMan())
        self.emu.register_player('pwtwlmfciymjdoljkhagxa', FoldMan())

        game_state, events = self.emu.apply_action(game_state, 'call', 15)
        self.eq(Const.Street.RIVER, game_state['street'])
        self.eq(
            TwoPlayerSample.p1_action_histories,
            game_state['table'].seats.players[0].round_action_histories[
                Const.Street.TURN])
        self.eq(2, len(events))
        self.eq('event_new_street', events[0]['type'])
        self.eq('event_ask_player', events[1]['type'])

        game_state, events = self.emu.apply_action(game_state, 'call', 0)
        self.eq(1, len(events))
        self.eq('event_ask_player', events[0]['type'])

        game_state, events = self.emu.apply_action(game_state, 'call', 0)
        self.eq(1, len(events))
        self.eq('event_round_finish', events[0]['type'])

    def test_apply_action_game_finish_detect(self):
        game_state = restore_game_state(TwoPlayerSample.round_state)
        game_state = attach_hole_card_from_deck(game_state,
                                                'tojrbxmkuzrarnniosuhct')
        game_state = attach_hole_card_from_deck(game_state,
                                                'pwtwlmfciymjdoljkhagxa')
        self.emu.set_game_rule(2, 3, 5, 0)
        p1, p2 = FoldMan(), FoldMan()
        self.emu.register_player('tojrbxmkuzrarnniosuhct', FoldMan())
        self.emu.register_player('pwtwlmfciymjdoljkhagxa', FoldMan())

        game_state, events = self.emu.apply_action(game_state, 'fold')
        self.eq('event_game_finish', events[-1]['type'])

    def test_apply_action_start_next_round(self):
        game_state = restore_game_state(TwoPlayerSample.round_state)
        game_state = attach_hole_card_from_deck(game_state,
                                                'tojrbxmkuzrarnniosuhct')
        game_state = attach_hole_card_from_deck(game_state,
                                                'pwtwlmfciymjdoljkhagxa')
        self.emu.set_game_rule(2, 4, 5, 0)
        p1, p2 = FoldMan(), FoldMan()
        self.emu.register_player('tojrbxmkuzrarnniosuhct', FoldMan())
        self.emu.register_player('pwtwlmfciymjdoljkhagxa', FoldMan())

        game_state, events = self.emu.apply_action(game_state, 'fold')
        self.eq(120, game_state['table'].seats.players[0].stack)
        self.eq(80, game_state['table'].seats.players[1].stack)

        game_state, events = self.emu.apply_action(game_state, 'raise', 20)
        self.eq('event_ask_player', events[-1]['type'])
        self.eq(100, game_state['table'].seats.players[0].stack)
        self.eq(70, game_state['table'].seats.players[1].stack)

    @raises(Exception)
    def test_apply_action_when_game_finished(self):
        game_state = restore_game_state(TwoPlayerSample.round_state)
        game_state = attach_hole_card_from_deck(game_state,
                                                'tojrbxmkuzrarnniosuhct')
        game_state = attach_hole_card_from_deck(game_state,
                                                'pwtwlmfciymjdoljkhagxa')
        self.emu.set_game_rule(2, 3, 5, 0)
        p1, p2 = FoldMan(), FoldMan()
        self.emu.register_player('tojrbxmkuzrarnniosuhct', FoldMan())
        self.emu.register_player('pwtwlmfciymjdoljkhagxa', FoldMan())

        game_state, events = self.emu.apply_action(game_state, 'fold')
        self.emu.apply_action(game_state, 'fold')

    def test_run_until_round_finish(self):
        game_state = restore_game_state(TwoPlayerSample.round_state)
        game_state = attach_hole_card_from_deck(game_state,
                                                'tojrbxmkuzrarnniosuhct')
        game_state = attach_hole_card_from_deck(game_state,
                                                'pwtwlmfciymjdoljkhagxa')
        self.emu.set_game_rule(2, 10, 5, 0)
        p1 = TestPlayer([('fold', 0)])
        p2 = TestPlayer([('call', 15)])
        self.emu.register_player('tojrbxmkuzrarnniosuhct', p1)
        self.emu.register_player('pwtwlmfciymjdoljkhagxa', p2)

        game_state, events = self.emu.run_until_round_finish(game_state)
        self.eq('event_new_street', events[0]['type'])
        self.eq('event_ask_player', events[1]['type'])
        self.eq('event_round_finish', events[2]['type'])

    def test_run_until_round_finish_when_already_finished(self):
        game_state = restore_game_state(TwoPlayerSample.round_state)
        game_state = attach_hole_card_from_deck(game_state,
                                                'tojrbxmkuzrarnniosuhct')
        game_state = attach_hole_card_from_deck(game_state,
                                                'pwtwlmfciymjdoljkhagxa')
        self.emu.set_game_rule(2, 10, 5, 0)
        p1 = TestPlayer([('fold', 0)])
        p2 = TestPlayer([('call', 15)])
        self.emu.register_player('tojrbxmkuzrarnniosuhct', p1)
        self.emu.register_player('pwtwlmfciymjdoljkhagxa', p2)
        game_state, events = self.emu.run_until_round_finish(game_state)
        game_state, events = self.emu.run_until_round_finish(game_state)
        self.eq(0, len(events))

    def test_run_until_round_finish_game_finish_detect(self):
        uuids = ['tojrbxmkuzrarnniosuhct', 'pwtwlmfciymjdoljkhagxa']
        holecards = [[Card.from_str(s) for s in ss]
                     for ss in [['Ac', '2d'], ['8c', '5h']]]
        game_state = restore_game_state(TwoPlayerSample.round_state)
        game_state = reduce(lambda a, e: attach_hole_card(a, e[0], e[1]),
                            zip(uuids, holecards), game_state)
        game_state = attach_hole_card_from_deck(game_state,
                                                'pwtwlmfciymjdoljkhagxa')
        self.emu.set_game_rule(2, 10, 5, 0)
        p1 = TestPlayer([('raise', 65)])
        p2 = TestPlayer([('call', 15), ('call', 65)])
        self.emu.register_player('tojrbxmkuzrarnniosuhct', p1)
        self.emu.register_player('pwtwlmfciymjdoljkhagxa', p2)
        game_state['table'].deck.deck.append(Card.from_str('7c'))

        game_state, events = self.emu.run_until_round_finish(game_state)
        self.eq('event_new_street', events[0]['type'])
        self.eq('event_ask_player', events[1]['type'])
        self.eq('event_ask_player', events[2]['type'])
        self.eq('event_round_finish', events[3]['type'])
        self.eq('event_game_finish', events[4]['type'])
        self.eq(0, events[4]['players'][0]['stack'])
        self.eq(200, events[4]['players'][1]['stack'])

    def test_run_until_game_finish(self):
        game_state = restore_game_state(TwoPlayerSample.round_state)
        game_state = attach_hole_card_from_deck(game_state,
                                                'tojrbxmkuzrarnniosuhct')
        game_state = attach_hole_card_from_deck(game_state,
                                                'pwtwlmfciymjdoljkhagxa')
        self.emu.set_game_rule(2, 10, 5, 1)
        self.emu.register_player('tojrbxmkuzrarnniosuhct', FoldMan())
        self.emu.register_player('pwtwlmfciymjdoljkhagxa', FoldMan())

        game_state, events = self.emu.run_until_game_finish(game_state)
        self.eq('event_game_finish', events[-1]['type'])
        self.eq(114, game_state['table'].seats.players[0].stack)
        self.eq(86, game_state['table'].seats.players[1].stack)

    def test_run_until_game_finish_when_one_player_is_left(self):
        uuids = [
            'ruypwwoqwuwdnauiwpefsw', 'sqmfwdkpcoagzqxpxnmxwm',
            'uxrdiwvctvilasinweqven'
        ]
        holecards = [[Card.from_str(s) for s in ss]
                     for ss in [['2c', '3c'], ['Ah', 'Ac'], ['5d', '6d']]]
        game_state = restore_game_state(ThreePlayerGameStateSample.round_state)
        game_state = reduce(
            lambda state, item: attach_hole_card(state, item[0], item[1]),
            zip(uuids, holecards), game_state)
        sb_amount, ante = 5, 7
        self.emu.set_game_rule(3, 10, sb_amount, ante)
        p1_acts = [('fold', 0), ('call', 10), ('call', 0), ('call', 10),
                   ('fold', 0)]
        p2_acts = []
        p3_acts = [('raise', 10)]
        players = [TestPlayer(acts) for acts in [p1_acts, p2_acts, p3_acts]]
        [
            self.emu.register_player(uuid, player)
            for uuid, player in zip(uuids, players)
        ]
        game_state['table'].deck.deck.append(Card.from_str('7c'))
        game_state, events = self.emu.run_until_game_finish(game_state)
        self.eq('event_game_finish', events[-1]['type'])
        self.eq(0, game_state['table'].seats.players[0].stack)
        self.eq(0, game_state['table'].seats.players[1].stack)
        self.eq(292, game_state['table'].seats.players[2].stack)

    def test_run_until_game_finish_when_final_round(self):
        uuids = [
            'ruypwwoqwuwdnauiwpefsw', 'sqmfwdkpcoagzqxpxnmxwm',
            'uxrdiwvctvilasinweqven'
        ]
        holecards = [[Card.from_str(s) for s in ss]
                     for ss in [['2c', '3c'], ['Ah', 'Ac'], ['5d', '6d']]]
        game_state = restore_game_state(ThreePlayerGameStateSample.round_state)
        game_state = reduce(
            lambda state, item: attach_hole_card(state, item[0], item[1]),
            zip(uuids, holecards), game_state)
        sb_amount, ante = 5, 7
        self.emu.set_game_rule(3, 10, sb_amount, ante)
        [self.emu.register_player(uuid, FoldMan()) for uuid in uuids]
        game_state['table'].deck.deck.append(Card.from_str('7c'))
        game_state, events = self.emu.run_until_game_finish(game_state)
        self.eq('event_game_finish', events[-1]['type'])
        self.eq(10, game_state['round_count'])
        self.eq(35, game_state['table'].seats.players[0].stack)
        self.eq(0, game_state['table'].seats.players[1].stack)
        self.eq(265, game_state['table'].seats.players[2].stack)

    def test_last_round_judge(self):
        game_state = restore_game_state(TwoPlayerSample.round_state)
        self.emu.set_game_rule(2, 3, 5, 0)
        self.false(self.emu._is_last_round(game_state, self.emu.game_rule))
        game_state['street'] = Const.Street.FINISHED
        self.true(self.emu._is_last_round(game_state, self.emu.game_rule))
        game_state['round_count'] = 2
        self.false(self.emu._is_last_round(game_state, self.emu.game_rule))
        game_state['table'].seats.players[0].stack = 0
        self.true(self.emu._is_last_round(game_state, self.emu.game_rule))

    def test_start_new_round(self):
        game_state = restore_game_state(TwoPlayerSample.round_state)
        game_state = attach_hole_card_from_deck(game_state,
                                                'tojrbxmkuzrarnniosuhct')
        game_state = attach_hole_card_from_deck(game_state,
                                                'pwtwlmfciymjdoljkhagxa')
        p1, p2 = FoldMan(), FoldMan()
        self.emu.set_game_rule(2, 10, 5, 0)
        self.emu.register_player('tojrbxmkuzrarnniosuhct', FoldMan())
        self.emu.register_player('pwtwlmfciymjdoljkhagxa', FoldMan())

        # run until round finish
        game_state, event = self.emu.apply_action(game_state, 'call', 15)
        game_state, event = self.emu.apply_action(game_state, 'call', 0)
        game_state, event = self.emu.apply_action(game_state, 'call', 0)

        game_state, events = self.emu.start_new_round(game_state)
        self.eq(4, game_state['round_count'])
        self.eq(1, game_state['table'].dealer_btn)
        self.eq(0, game_state['street'])
        self.eq(0, game_state['next_player'])
        self.eq('event_new_street', events[0]['type'])
        self.eq('event_ask_player', events[1]['type'])
        self.eq('preflop', events[0]['street'])
        self.eq('tojrbxmkuzrarnniosuhct', events[1]['uuid'])

    def test_start_new_round_exclude_no_money_players(self):
        uuids = [
            'ruypwwoqwuwdnauiwpefsw', 'sqmfwdkpcoagzqxpxnmxwm',
            'uxrdiwvctvilasinweqven'
        ]
        game_state = restore_game_state(ThreePlayerGameStateSample.round_state)
        original = reduce(
            lambda state, uuid: attach_hole_card_from_deck(state, uuid), uuids,
            game_state)
        sb_amount, ante = 5, 7
        self.emu.set_game_rule(3, 10, sb_amount, ante)
        [self.emu.register_player(uuid, FoldMan()) for uuid in uuids]

        # case1: second player cannot pay small blind
        finish_state, events = self.emu.apply_action(original, 'fold')
        finish_state['table'].seats.players[0].stack = 11
        stacks = [p.stack for p in finish_state['table'].seats.players]
        game_state, events = self.emu.start_new_round(finish_state)
        self.eq(2, game_state['table'].dealer_btn)
        self.eq(1, game_state['next_player'])
        self.eq(stacks[1] - sb_amount - ante,
                game_state['table'].seats.players[1].stack)
        self.eq(stacks[2] - sb_amount * 2 - ante,
                game_state['table'].seats.players[2].stack)
        self.eq(PayInfo.FOLDED,
                game_state['table'].seats.players[0].pay_info.status)
        self.eq(
            sb_amount * 3 + ante * 2,
            GameEvaluator.create_pot(
                game_state['table'].seats.players)[0]['amount'])

        # case2: third player cannot pay big blind
        finish_state, events = self.emu.apply_action(original, 'fold')
        finish_state['table'].seats.players[1].stack = 16
        stacks = [p.stack for p in finish_state['table'].seats.players]
        game_state, events = self.emu.start_new_round(finish_state)
        self.eq(2, game_state['table'].dealer_btn)
        self.eq(0, game_state['next_player'])
        self.eq(stacks[0] - sb_amount - ante,
                game_state['table'].seats.players[0].stack)
        self.eq(stacks[2] - sb_amount * 2 - ante,
                game_state['table'].seats.players[2].stack)
        self.eq(PayInfo.FOLDED,
                game_state['table'].seats.players[1].pay_info.status)
        self.eq(PayInfo.PLAY_TILL_END,
                game_state['table'].seats.players[0].pay_info.status)
        self.eq(
            sb_amount * 3 + ante * 2,
            GameEvaluator.create_pot(
                game_state['table'].seats.players)[0]['amount'])

    def test_start_new_round_exclude_no_money_players2(self):
        uuids = [
            'ruypwwoqwuwdnauiwpefsw', 'sqmfwdkpcoagzqxpxnmxwm',
            'uxrdiwvctvilasinweqven'
        ]
        game_state = restore_game_state(ThreePlayerGameStateSample.round_state)
        original = reduce(
            lambda state, uuid: attach_hole_card_from_deck(state, uuid), uuids,
            game_state)
        sb_amount, ante = 5, 7
        self.emu.set_game_rule(3, 10, sb_amount, ante)
        [self.emu.register_player(uuid, FoldMan()) for uuid in uuids]

        # case1: second player cannot pay small blind
        finish_state, events = self.emu.apply_action(original, 'fold')
        finish_state['table'].seats.players[2].stack = 6
        stacks = [p.stack for p in finish_state['table'].seats.players]
        game_state, events = self.emu.start_new_round(finish_state)
        self.eq(0, game_state['table'].dealer_btn)
        self.eq(1, game_state['table'].sb_pos())
        self.eq(1, game_state['next_player'])

    def test_start_new_round_game_finish_judge(self):
        uuids = [
            'ruypwwoqwuwdnauiwpefsw', 'sqmfwdkpcoagzqxpxnmxwm',
            'uxrdiwvctvilasinweqven'
        ]
        game_state = restore_game_state(ThreePlayerGameStateSample.round_state)
        original = reduce(
            lambda state, uuid: attach_hole_card_from_deck(state, uuid), uuids,
            game_state)
        sb_amount, ante = 5, 7
        self.emu.set_game_rule(3, 10, sb_amount, ante)
        [self.emu.register_player(uuid, FoldMan()) for uuid in uuids]

        finish_state, events = self.emu.apply_action(original, 'fold')
        finish_state['table'].seats.players[2].stack = 11
        finish_state['table'].seats.players[1].stack = 16
        game_state, events = self.emu.start_new_round(finish_state)
        self.eq(1, len(events))
        self.eq('event_game_finish', events[0]['type'])

    def test_generate_initial_game_state(self):
        self.emu.set_game_rule(2, 8, 5, 3)
        p1, p2 = FoldMan(), FoldMan()

        players_info = OrderedDict()
        players_info['uuid-1'] = {'name': 'hoge', 'stack': 100}
        players_info['uuid-2'] = {'name': 'fuga', 'stack': 100}
        state = self.emu.generate_initial_game_state(players_info)
        table = state['table']
        self.eq(0, state['round_count'])
        self.eq(5, state['small_blind_amount'])
        self.eq(100, table.seats.players[0].stack)
        self.eq('uuid-1', table.seats.players[0].uuid)
        self.eq(100, table.seats.players[1].stack)
        self.eq('uuid-2', table.seats.players[1].uuid)
        self.eq(1, table.dealer_btn)

        state, events = self.emu.start_new_round(state)
        self.eq(0, state['table'].dealer_btn)
        self.eq(1, state['table'].sb_pos())
        self.eq(0, state['table'].bb_pos())
        self.eq(1, state['next_player'])
        state, events = self.emu.apply_action(state, 'call', 10)
        self.eq(1, state['next_player'])

    def test_generate_possible_actions(self):
        state1 = restore_game_state(TwoPlayerSample.round_state)
        self.eq(TwoPlayerSample.valid_actions,
                self.emu.generate_possible_actions(state1))
        state2 = restore_game_state(ThreePlayerGameStateSample.round_state)
        self.eq(ThreePlayerGameStateSample.valid_actions,
                self.emu.generate_possible_actions(state2))
コード例 #17
0
        model.load_weights(os.path.join(log_folder_weights, log_weights[-1]))

if os.path.exists(os.path.join(data_folder, 'latest_memory.json')):
    memory.load_lt_memory(data_folder, 'latest_memory.json')

for game in range(starting_game, starting_game + num_game):

    print(game)

    nb_player = random.randint(2, config.game_param['MAX_PLAYER'])
    sb_amount = random.randint(1, 10)
    ante_amount = random.randint(0, 5)
    max_round = random.randint(10, 20)

    emulator.set_game_rule(player_num=nb_player,
                           max_round=max_round,
                           small_blind_amount=sb_amount,
                           ante_amount=ante_amount)
    players_info = {
        "uuid-" + str(i): {
            'name': 'player' + str(i),
            'stack': random.randint(80, 120)
        }
        for i in range(0, nb_player)
    }

    initial_state = emulator.generate_initial_game_state(players_info)
    [
        emulator.register_player(
            k,
            Alpha0Regret(
                100, model, 1, k,
コード例 #18
0
class ModelPlayer(BasePokerPlayer):

    table_name = "gameinfotbl"
    standard_insert = "Player_name, win_rate, hole_cards, community_cards, action, stack, small_blind_amount"
    db_name = "game_information.db"
    street = ['preflop', 'flop', 'turn', 'river', 'showdown']

    def __init__(self, name):

        self.name = name
        self.seat = None
        self.raise_amount = 0
        self.win_rate = None
        self.stack = None
        self.small_blind_amount = None
        self.hole_card = None
        self.community_card = None
        self.action = None
        self.opponent_raise_threshold = 0.7
        self.opponent_bluffing_ratio = 0.5
        self.__init_database()
        self.Opponent_Model = OpponentModel("Opponent", 0.5, 0.5)
        self.my_model = MyModel()

    def declare_action(self, valid_actions, hole_card, round_state):

        # try_actions = [MyModel.FOLD, MyModel.CALL, MyModel.RAISE]
        # community_card = round_state['community_card']
        pot = round_state['pot']['main']['amount']
        call_amount = valid_actions[1]['amount']
        self.raise_amount = valid_actions[2]['amount']['min']
        # round_strategy = {'preflop' : 0, 'flop' : 0, 'turn' : 0, 'river' : 0, 'showdown' : 0}
        # street_now = round_state['street']

        # # self.win_rate = estimate_hole_card_win_rate(nb_simulation = NB_SIMULATION,
        # #                                         nb_player = self.nb_player,
        # #                                         hole_card=gen_cards(hole_card),
        # #                                         community_card=gen_cards(community_card))

        # action_results = [0 for i in range(len(try_actions))]

        # # win_rate = calculate_win_rate_with_model(hole_card, community_card)
        # log("hole_card of emulator player is %s" % hole_card)
        # for action_now in try_actions:
        #     round_strategy[round_state['street']] = action_now
        #     for street in enumerate(self.street, self.street.index(street_now) + 1):
        #         for action_later in try_actions:
        #             round_strategy[street] = action_later
        #             self.my_model.set_round_strategy(round_strategy)
        #             simulation_results = []
        #             for _ in range(NB_SIMULATION):
        #                 game_state = self._setup_game_state(round_state, hole_card)
        #                 round_finished_state, _events = self.emulator.run_until_round_finish(game_state)
        #                 new_winner_uuid = _events[-1]['winners'][-1]['uuid']
        #                 my_stack = [player for player in round_finished_state['table'].seats.players if player.uuid == self.uuid][0].stack
        #                 simulation_results.append(my_stack)

        #             if action_results[action_now] < 1.0 * sum(simulation_results) / NB_SIMULATION:
        #                 action_results[action_now] = 1.0 * sum(simulation_results) / NB_SIMULATION
        #                 log("average stack after simulation when declares %s : %s" % (
        #                     {0:'FOLD', 1:'CALL', 2:'RAISE'}[action_now], action_results[action_now])
        #                     )

        # best_action = max(zip(action_results, try_actions))[1]
        # round_strategy[round_state['street']] = best_action
        # self.my_model.set_round_strategy(round_strategy)
        # declare_action, amount = self.my_model.declare_action(valid_actions, hole_card, round_state)

        # if declare_action == "FOLD":
        #     self.action = 0
        # elif declare_action == "CALL":
        #     self.action = 1
        # else:
        #     self.action = 2
        # self.record_action()
        win_rate_with_model = self.estimate_win_rate_with_model_to_convergent(
            hole_card, round_state)
        print(win_rate_with_model)
        ev = self.ev_calculation(win_rate_with_model, pot, call_amount)

        if ev.index(max(ev)) == 0:
            return valid_actions[0]['action'], valid_actions[0]['amount']

        elif ev.index(max(ev)) == 1:
            return valid_actions[1]['action'], valid_actions[1]['amount']

        else:
            return valid_actions[2]['action'], valid_actions[2]['amount'][
                'min']

    def receive_game_start_message(self, game_info):
        self.nb_player = game_info['player_num']
        self.small_blind_amount = game_info['rule']['small_blind_amount']
        self.Opponent_Model.set_nb_player(self.nb_player)
        self.my_model = MyModel()
        max_round = game_info['rule']['max_round']
        sb_amount = game_info['rule']['small_blind_amount']
        ante_amount = game_info['rule']['ante']

        for player in game_info['seats']:
            if player['uuid'] == self.uuid:
                self.seat = game_info['seats'].index(player)
                break
            else:
                raise ('not participating!')

        self.emulator = Emulator()
        self.emulator.set_game_rule(self.nb_player, max_round, sb_amount,
                                    ante_amount)
        for player_info in game_info['seats']:
            uuid = player_info['uuid']
            player_model = self.my_model if uuid == self.uuid else self.Opponent_Model
            self.emulator.register_player(uuid, player_model)

    def receive_round_start_message(self, round_count, hole_card, seats):
        self.self_bet = 0
        self.hole_card = hole_card

    def receive_street_start_message(self, street, round_state):
        self.community_card = round_state['community_card']

    def receive_game_update_message(self, action, round_state):
        if action['player_uuid'] == self.uuid:
            self.self_bet = self.self_bet + action['amount']
        Opponent_bet = round_state['pot']['main']['amount'] - self.self_bet
        self.Opponent_Model.set_bet(Opponent_bet)
        self.stack = round_state['seats'][self.seat]['stack']

    def receive_round_result_message(self, winners, hand_info, round_state):
        pass

    def record_action(self):
        msg_record = self.__make_message()

        con = lite.connect(self.db_name)

        with con:
            cur = con.cursor()
            cur.execute(
                "INSERT INTO gameinfotbl(Player_name, win_rate, hole_cards, community_cards, action, stack, small_blind_amount) VALUES(?,?,?,?,?,?,?)",
                msg_record)

    #######################################################################

    def ev_calculation(self, win_rate, pot, call_amount):
        ev = [0 for i in range(3)]
        ev[0] = -self.self_bet
        ev[1] = win_rate * (pot - self.self_bet) - (1 - win_rate) * (
            self.self_bet + call_amount)
        ev[2] = win_rate * (pot - self.self_bet + self.raise_amount) - \
                        (1 - win_rate) * (self.self_bet + self.raise_amount + call_amount)
        return ev

    # def choose_action(self, win_rate, pot, valid_actions):
    #     r = rand.random()
    #     ev = self.ev_calculation(win_rate, pot, valid_actions[1]['amount'])
    #     if win_rate >= self.raise_threshold:
    #         return valid_actions[2]['action'], 2 * self.small_blind_amount
    #     elif r >= self.bluffing_ratio:
    #         return valid_actions[2]['action'], 2 * self.small_blind_amount
    #     elif ev[1] >= ev[0]:
    #         return valid_actions[1]['action'], valid_actions[1]['amount']
    #     else:
    #         return valid_actions[0]['action'], valid_actions[0]['amount']

    def _setup_game_state(self, round_state, my_hole_card):
        game_state = restore_game_state(round_state)
        game_state['table'].deck.shuffle()
        community_card = round_state['community_card']
        current_street = round_state['street']

        player_uuids = [
            player_info['uuid'] for player_info in round_state['seats']
        ]
        for uuid in player_uuids:
            if uuid == self.uuid:
                game_state = attach_hole_card(
                    game_state, uuid,
                    gen_cards(my_hole_card))  # attach my holecard
            else:
                while True:
                    opponent_card = assuming_card(gen_cards(my_hole_card))
                    if not self.is_already_fold(opponent_card, current_street,
                                                community_card):
                        break
                game_state = attach_hole_card(
                    game_state, uuid,
                    opponent_card)  # attach opponents holecard at random
        return game_state

    def __make_message(self):
        msg = []
        msg.append(self.name)
        msg.append(self.win_rate)
        msg.append(str(self.hole_card))
        msg.append(str(self.community_card))
        msg.append(self.action)
        msg.append(self.stack)
        msg.append(self.small_blind_amount)

        return msg

    def __init_database(self):
        con = lite.connect(self.db_name)

        with con:
            cur = con.cursor()

            cur.execute(
                "CREATE TABLE IF NOT EXISTS {tbl_name}(_Id INTEGER PRIMARY KEY, Player_name TEXT, win_rate REAL,  \
                                                                hole_cards TEXT, community_cards TEXT, action INT, stack INT, small_blind_amount INT)"
                .format(tbl_name=self.table_name))

    def _modify_round_strategy_list(self, action, street, round_strategy):
        round_strategy['street'] = action

    def is_already_fold(self, opponent_card, current_street, community_card):
        win_rate = 0
        r = rand.random()
        str_card = [str(a) for a in opponent_card]
        for street in street_table.keys():
            win_rate = get_broad_win_rate(
                str_card, community_card[0:street_table[street]])
            if win_rate < self.opponent_raise_threshold and r < self.opponent_bluffing_ratio:
                return True
            if street == current_street or street == 'river':
                break
        return False

    def simulate_one_time(self, round_state, hole_card):
        game_state = self._setup_game_state(round_state, hole_card)
        _round_finished_state, _events = self.emulator.run_until_round_finish(
            game_state)
        if _events[-1]['type'] == 'event_game_finish':
            return 0  #if not 0, the last round will report a error
        new_winner_uuid = _events[-1]['winners'][-1]['uuid']

        # my_stack = [player for player in round_finished_state['table'].seats.players if player.uuid == self.uuid][0].stack
        # simulation_results.append(my_stack)

        return 1 if new_winner_uuid == self.uuid else 0

    def calculate_win_rate_with_model_serval_time(self, round_state, hole_card,
                                                  nb_simulation):
        win_count = sum([
            self.simulate_one_time(round_state, hole_card)
            for _ in range(nb_simulation)
        ])
        return 1.0 * win_count / nb_simulation

    def estimate_win_rate_with_model_to_convergent(self, hole_card,
                                                   round_state):
        ls_win_rate = [.0 for _ in range(10)]
        rpt_times = 50
        counter = 0  ##the newer calculated win percentage must be the avarage number
        while True:
            newer_win_rate = self.calculate_win_rate_with_model_serval_time(
                round_state, hole_card, rpt_times)
            ls_win_rate.pop(0)
            itered_win_rate = (ls_win_rate[-1] * counter +
                               newer_win_rate) / (counter + 1)
            ls_win_rate.append(itered_win_rate)
            if (np.max(ls_win_rate) - np.min(ls_win_rate)) < 0.1:
                break
            counter = counter + 1

        return ls_win_rate[-1]
コード例 #19
0
ファイル: main.py プロジェクト: markchen8717/DQL_Poker
hall_of_fame = [
    DQLPlayer(
        str(i),
        DQL_Agent(DQN(),
                  ReplayMemory(REPLAY_MEMORY_SIZE, REPLAY_MEMORY_BATCH_SIZE),
                  EPSILON, EPSILON_DECAY, EPSILON_END, DISCOUNT_FACTOR, J))
    for i in range(8)
]

# for player in hall_of_fame:
#     player.agent.load_model("player_"+str(player.name)+".h5")

# set up the emulator
emulator = Emulator()
emulator.set_game_rule(player_num=len(hall_of_fame),
                       max_round=2**32,
                       small_blind_amount=SB,
                       ante_amount=ANTE["initial"])

# simulate 1 round to obtain the starting game state and player info
config = setup_config(max_round=1,
                      initial_stack=STARTING_STACK,
                      small_blind_amount=SB)
for player in hall_of_fame:
    config.register_player(name=player.name, algorithm=player)
game_result = start_poker(config, verbose=0)
# obtain simulated player info
for player in game_result['players']:
    for _player in hall_of_fame:
        if _player.name == player['name']:
            _player.uuid = player['uuid']
            emulator.register_player(_player.uuid, _player)
コード例 #20
0
def initialize_new_emulator(player_num, max_round, small_blind_amount, ante_amount):
    emulator = Emulator()
    emulator.set_game_rule(player_num=player_num, max_round=max_round, small_blind_amount=small_blind_amount, ante_amount=ante_amount)
    return emulator
コード例 #21
0
ファイル: CFR_emulator_B.py プロジェクト: darren96/PokerAI
class CFR:
    def __init__(self):
        self.game_states_ = dict()  # maps history to node
        self.emulator = Emulator()

    @staticmethod
    def get_higher_rank(card1, card2):
        if card1.rank > card2.rank:
            return card1
        return card2

    @staticmethod
    def get_higher_suit(card1, card2):
        if card1.suit > card2.suit:
            return card1
        elif card1.suit == card2.suit:
            return 0
        return card2

    @staticmethod
    def simplify_hand(hand, community_cards):
        """ Takes a hand (array of size two) and compresses the hand into simpler representation
            Also puts higher card in front
            i.e. Th 9h becomes T9s as both cards share the same suit
                Th 9s becomes T9o as both cards do not share the same suit (off-suit)
                Th Ts becomes TT (pair of tens)
        """
        generated_hand = gen_cards(hand)
        card1 = generated_hand[0]
        card2 = generated_hand[1]

        # pair
        if card1.rank == card2.rank:
            # print "Pair %s" % str(card1)[1] + str(card2)[1]
            return str(card1)[1] + str(card2)[1] # return the rank 2-9, J-A instead of all ints

        hand = str(CFR.get_higher_rank(card1, card2))[1]
        # print "Higher rank card %s" % hand
        hand += str(card2)[1] if hand == str(card1)[1] else str(card1)[1]
        hand += str("s") if str(card1)[0] == str(card2)[0] else str("o")
        # print "final hand %s" % hand

        if len(community_cards) >= 3:
          strength = HandEvaluator.gen_hand_rank_info(generated_hand, gen_cards(community_cards))
          hand += "_%s" %strength.get("hand")["strength"]

        return hand


    def train(self, iterations, ante=1.0, bet1=2.0, bet2=8.0, print_interval=1000, save_interval=1000):
        """ Do ficticious self-play to find optimal strategy"""
        util = 0.0

        self.ante = ante
        self.bet1 = bet1
        self.bet2 = bet2

        # print "Ante: %f   Bet-1: %f   Bet-2: %f" % (ante, bet1, bet2)

        for i in range(iterations):
            if i % print_interval == 0 and i != 0:
                print("P1 expected value after %i iterations: %f" % (i, util / i))
                # for j in range(-1, 17):
                #     try:
                #         print j, strats["_" + str(j) + "G"]
                #     except:
                #         pass

            self.emulator.set_game_rule(2, 5, 10, 5)
            init_state = self.emulator.generate_initial_game_state(
                {"0": {"stack": 10000, "name": "0"}, "1": {"stack": 10000, "name": "1"}})
            round_start, events = self.emulator.start_new_round(init_state)
            player_one_cards = list(map(str, round_start["table"].seats.players[0].hole_card))
            player_two_cards = list(map(str, round_start["table"].seats.players[1].hole_card))
            cards = [player_one_cards, player_two_cards]
            history = list()
            util += self.cfr(cards, history, round_start, events, 1, 1)
            # strats = self.get_strategy()

            # """
            # if i%save_interval == 0:
            #     print "saving..."
            #     pickle.dump(self.get_strategy(), open("full_game_"+str(i)+"_avg_diff.strat", "wb"))
            # """
        return util / iterations

    def get_strategy(self):
        result = dict()
        for state, node in self.game_states_.items():
            #print(state, node.strategy_)
            result[state] = node.get_average_strategy()
        return result

    # @cards - the cards the players have, with index 0 being the card that player one has
    # and index 1 being the card that player two has
    # @history - a list of moves used to reach this game state
    # @probability1 - the probability of reaching this game state for player 1
    # @probability2 - the probability of reaching this game state for player 2
    # @game_state - PyPokerEngine's game state.
    def cfr(self, cards, history, game_state, events, probability1, probability2):
        player = 1-int(game_state["next_player"])
        opponent = 1 - player
        player_hand = cards[player]
        # print "=========== PLAYER " + str(player) + " TURN ==============="
        # print "history: " + str(history)
        # print "player_hand: %s %s" % (str(player_hand[0]), str(player_hand[1]))
        # print "opp_hand: %s %s" % (str(opponent_hand[0]), str(opponent_hand[1]))

        probability_weight = probability1 if player == 0 else probability2

        # print "probability_weight: " + str(probability_weight)event["round_state"]
        # print "num_moves: " + str(num_moves)

        # BEGIN TERMINAL STATES
        for event in events:
            if event["type"] == "event_round_finish":
                dct = {}
                dct[game_state["table"].seats.players[0].uuid] = game_state["table"].seats.players[0].stack
                dct[game_state["table"].seats.players[1].uuid] = game_state["table"].seats.players[1].stack
                #print (game_state, event)
                #print "player", player
                return dct[str(player)] - dct[str(opponent)]

        community_card = []
        for i in range(len(game_state["table"].get_community_card())):
            community_card.append(game_state["table"].get_community_card()[i].__str__())

        # state = str(player_hand)
        p_hand = self.simplify_hand(player_hand, community_card)
        state = str(p_hand)

        for action in history:
            state += action

        # print "state: %s" % str(state)
        # CREATE NEW ACTIONS

        if state in self.game_states_:
            node = self.game_states_[state]  # Get our node if it already exists
            possible_actions = node.actions_
        else:
            # Create new Node with possible actions we can perform
            possible_actions = [ACTION_TO_HISTORY_MAPPING[i["action"]] for i in
                                self.emulator.generate_possible_actions(game_state)]
            node = Node(possible_actions)
            self.game_states_[state] = node

        strategy = node.get_strategy(probability_weight)

        # print "possible_actions for this round: " + str(possible_actions)
        # print "strategy: " + str(strategy)

        util = dict()
        node_util = 0
        # for each of our possible actions, compute the utility of it
        # thus, finding the overall utility of this current state
        for action in possible_actions:
            next_history = list(history)  # copy
            next_history.append(action)
            new_game_state, new_event = self.emulator.apply_action(game_state, HISTORY_TO_ACTION_MAPPING[action])

            if player == 0:
                util[action] = -self.cfr(cards, next_history, new_game_state, new_event,
                                         probability1 * strategy[action], probability2)
            else:
                util[action] = -self.cfr(cards, next_history, new_game_state, new_event, probability1,
                                         probability2 * strategy[action])
            #print action, util[action]
            node_util += strategy[action] * util[action]

        # compute regret and update Game State for the node based on utility of all actions
        for action in possible_actions:
            regret = util[action] - node_util
            if player == 0:
                node.regret_sum_[action] += regret * probability2
            else:
                node.regret_sum_[action] += regret * probability1

        #print "node_util: "+ str(action) + " " + str(node_util)

        return node_util
コード例 #22
0
class EmulatorTest(BaseUnitTest):

    def setUp(self):
        self.emu = Emulator()

    def test_set_game_rule(self):
        self.emu.set_game_rule(2, 8, 5, 3)
        self.eq(2, self.emu.game_rule["player_num"])
        self.eq(8, self.emu.game_rule["max_round"])
        self.eq(5, self.emu.game_rule["sb_amount"])
        self.eq(3, self.emu.game_rule["ante"])

    def test_register_and_fetch_player(self):
        p1, p2 = FoldMan(), FoldMan()
        self.emu.register_player("uuid-1", p1)
        self.emu.register_player("uuid-2", p2)
        self.eq(p1, self.emu.fetch_player("uuid-1"))
        self.eq(p2, self.emu.fetch_player("uuid-2"))

    @raises(TypeError)
    def test_register_invalid_player(self):
        self.emu.register_player("uuid", "hoge")

    def test_blind_structure(self):
        game_state = restore_game_state(TwoPlayerSample.round_state)
        game_state = attach_hole_card_from_deck(game_state, "tojrbxmkuzrarnniosuhct")
        game_state = attach_hole_card_from_deck(game_state, "pwtwlmfciymjdoljkhagxa")
        self.emu.set_game_rule(2, 10, 5, 0)
        self.emu.set_blind_structure({5: { "ante": 5, "small_blind": 60 } })
        p1 = TestPlayer([("fold", 0), ('raise', 55), ('call', 0)])
        p2 = TestPlayer([("call", 15), ("call", 55), ('fold', 0)])
        self.emu.register_player("tojrbxmkuzrarnniosuhct", p1)
        self.emu.register_player("pwtwlmfciymjdoljkhagxa", p2)

        game_state, events = self.emu.run_until_round_finish(game_state)
        self.eq(65, game_state["table"].seats.players[0].stack)
        self.eq(135, game_state["table"].seats.players[1].stack)

        game_state, events = self.emu.start_new_round(game_state)
        game_state, events = self.emu.run_until_round_finish(game_state)
        self.eq(120, game_state["table"].seats.players[0].stack)
        self.eq(80, game_state["table"].seats.players[1].stack)

        game_state, events = self.emu.start_new_round(game_state)
        self.eq("event_game_finish", events[0]["type"])
        self.eq(0, game_state["table"].seats.players[0].stack)
        self.eq(80, game_state["table"].seats.players[1].stack)

    def test_blind_structure_update(self):
        self.emu.set_game_rule(2, 8, 5, 3)
        p1, p2 = FoldMan(), FoldMan()
        self.emu.register_player("uuid-1", p1)
        self.emu.register_player("uuid-2", p2)
        blind_structure = {
                3: { "ante": 5, "small_blind": 10 },
                5: {"ante": 10, "small_blind": 20 }
                }
        self.emu.set_blind_structure(blind_structure)
        players_info = {
                "uuid-1": { "name": "hoge", "stack": 100 },
                "uuid-2": { "name": "fuga", "stack": 100 }
                }
        state = self.emu.generate_initial_game_state(players_info)
        self.eq(5, state["small_blind_amount"])
        state, _ = self.emu.start_new_round(state)
        state, _ = self.emu.apply_action(state, "fold")
        self.eq(5, state["small_blind_amount"])
        state, _ = self.emu.apply_action(state, "fold")
        self.eq(5, state["small_blind_amount"])
        state, _ = self.emu.apply_action(state, "fold")
        self.eq(10, state["small_blind_amount"])
        state, _ = self.emu.apply_action(state, "fold")
        self.eq(10, state["small_blind_amount"])
        state, _ = self.emu.apply_action(state, "fold")
        self.eq(20, state["small_blind_amount"])
        state, _ = self.emu.apply_action(state, "fold")
        self.eq(20, state["small_blind_amount"])

    def test_apply_action(self):
        game_state = restore_game_state(TwoPlayerSample.round_state)
        game_state = attach_hole_card_from_deck(game_state, "tojrbxmkuzrarnniosuhct")
        game_state = attach_hole_card_from_deck(game_state, "pwtwlmfciymjdoljkhagxa")
        self.emu.set_game_rule(2, 10, 5, 0)
        p1, p2 = FoldMan(), FoldMan()
        self.emu.register_player("tojrbxmkuzrarnniosuhct", FoldMan())
        self.emu.register_player("pwtwlmfciymjdoljkhagxa", FoldMan())

        game_state, events = self.emu.apply_action(game_state, "call", 15)
        self.eq(Const.Street.RIVER, game_state["street"])
        self.eq(TwoPlayerSample.p1_action_histories, \
                game_state["table"].seats.players[0].round_action_histories[Const.Street.TURN])
        self.eq(2, len(events))
        self.eq("event_new_street", events[0]["type"])
        self.eq("event_ask_player", events[1]["type"])

        game_state, events = self.emu.apply_action(game_state, "call", 0)
        self.eq(1, len(events))
        self.eq("event_ask_player", events[0]["type"])

        game_state, events = self.emu.apply_action(game_state, "call", 0)
        self.eq(1, len(events))
        self.eq("event_round_finish", events[0]["type"])

    def test_apply_action_game_finish_detect(self):
        game_state = restore_game_state(TwoPlayerSample.round_state)
        game_state = attach_hole_card_from_deck(game_state, "tojrbxmkuzrarnniosuhct")
        game_state = attach_hole_card_from_deck(game_state, "pwtwlmfciymjdoljkhagxa")
        self.emu.set_game_rule(2, 3, 5, 0)
        p1, p2 = FoldMan(), FoldMan()
        self.emu.register_player("tojrbxmkuzrarnniosuhct", FoldMan())
        self.emu.register_player("pwtwlmfciymjdoljkhagxa", FoldMan())

        game_state, events = self.emu.apply_action(game_state, "fold")
        self.eq("event_game_finish", events[-1]["type"])

    def test_apply_action_start_next_round(self):
        game_state = restore_game_state(TwoPlayerSample.round_state)
        game_state = attach_hole_card_from_deck(game_state, "tojrbxmkuzrarnniosuhct")
        game_state = attach_hole_card_from_deck(game_state, "pwtwlmfciymjdoljkhagxa")
        self.emu.set_game_rule(2, 4, 5, 0)
        p1, p2 = FoldMan(), FoldMan()
        self.emu.register_player("tojrbxmkuzrarnniosuhct", FoldMan())
        self.emu.register_player("pwtwlmfciymjdoljkhagxa", FoldMan())

        game_state, events = self.emu.apply_action(game_state, "fold")
        self.eq(120, game_state["table"].seats.players[0].stack)
        self.eq(80, game_state["table"].seats.players[1].stack)

        game_state, events = self.emu.apply_action(game_state, "raise", 20)
        self.eq("event_ask_player", events[-1]["type"])
        self.eq(100, game_state["table"].seats.players[0].stack)
        self.eq(70, game_state["table"].seats.players[1].stack)

    @raises(Exception)
    def test_apply_action_when_game_finished(self):
        game_state = restore_game_state(TwoPlayerSample.round_state)
        game_state = attach_hole_card_from_deck(game_state, "tojrbxmkuzrarnniosuhct")
        game_state = attach_hole_card_from_deck(game_state, "pwtwlmfciymjdoljkhagxa")
        self.emu.set_game_rule(2, 3, 5, 0)
        p1, p2 = FoldMan(), FoldMan()
        self.emu.register_player("tojrbxmkuzrarnniosuhct", FoldMan())
        self.emu.register_player("pwtwlmfciymjdoljkhagxa", FoldMan())

        game_state, events = self.emu.apply_action(game_state, "fold")
        self.emu.apply_action(game_state, "fold")


    def test_run_until_round_finish(self):
        game_state = restore_game_state(TwoPlayerSample.round_state)
        game_state = attach_hole_card_from_deck(game_state, "tojrbxmkuzrarnniosuhct")
        game_state = attach_hole_card_from_deck(game_state, "pwtwlmfciymjdoljkhagxa")
        self.emu.set_game_rule(2, 10, 5, 0)
        p1 = TestPlayer([("fold", 0)])
        p2 = TestPlayer([("call", 15)])
        self.emu.register_player("tojrbxmkuzrarnniosuhct", p1)
        self.emu.register_player("pwtwlmfciymjdoljkhagxa", p2)

        game_state, events = self.emu.run_until_round_finish(game_state)
        self.eq("event_new_street", events[0]["type"])
        self.eq("event_ask_player", events[1]["type"])
        self.eq("event_round_finish", events[2]["type"])

    def test_run_until_round_finish_when_already_finished(self):
        game_state = restore_game_state(TwoPlayerSample.round_state)
        game_state = attach_hole_card_from_deck(game_state, "tojrbxmkuzrarnniosuhct")
        game_state = attach_hole_card_from_deck(game_state, "pwtwlmfciymjdoljkhagxa")
        self.emu.set_game_rule(2, 10, 5, 0)
        p1 = TestPlayer([("fold", 0)])
        p2 = TestPlayer([("call", 15)])
        self.emu.register_player("tojrbxmkuzrarnniosuhct", p1)
        self.emu.register_player("pwtwlmfciymjdoljkhagxa", p2)
        game_state, events = self.emu.run_until_round_finish(game_state)
        game_state, events = self.emu.run_until_round_finish(game_state)
        self.eq(0, len(events))

    def test_run_until_round_finish_game_finish_detect(self):
        uuids = ["tojrbxmkuzrarnniosuhct", "pwtwlmfciymjdoljkhagxa"]
        holecards = [[Card.from_str(s) for s in ss] for ss in [["CA", "D2"], ["C8", "H5"]]]
        game_state = restore_game_state(TwoPlayerSample.round_state)
        game_state = reduce(lambda a,e: attach_hole_card(a, e[0], e[1]), zip(uuids, holecards), game_state)
        game_state = attach_hole_card_from_deck(game_state, "pwtwlmfciymjdoljkhagxa")
        self.emu.set_game_rule(2, 10, 5, 0)
        p1 = TestPlayer([("raise", 65)])
        p2 = TestPlayer([("call", 15), ("call", 65)])
        self.emu.register_player("tojrbxmkuzrarnniosuhct", p1)
        self.emu.register_player("pwtwlmfciymjdoljkhagxa", p2)
        game_state["table"].deck.deck.append(Card.from_str("C7"))

        game_state, events = self.emu.run_until_round_finish(game_state)
        self.eq("event_new_street", events[0]["type"])
        self.eq("event_ask_player", events[1]["type"])
        self.eq("event_ask_player", events[2]["type"])
        self.eq("event_round_finish", events[3]["type"])
        self.eq("event_game_finish", events[4]["type"])
        self.eq(0, events[4]["players"][0]["stack"])
        self.eq(200, events[4]["players"][1]["stack"])

    def test_run_until_game_finish(self):
        game_state = restore_game_state(TwoPlayerSample.round_state)
        game_state = attach_hole_card_from_deck(game_state, "tojrbxmkuzrarnniosuhct")
        game_state = attach_hole_card_from_deck(game_state, "pwtwlmfciymjdoljkhagxa")
        self.emu.set_game_rule(2, 10, 5, 1)
        self.emu.register_player("tojrbxmkuzrarnniosuhct", FoldMan())
        self.emu.register_player("pwtwlmfciymjdoljkhagxa", FoldMan())

        game_state, events = self.emu.run_until_game_finish(game_state)
        self.eq("event_game_finish", events[-1]["type"])
        self.eq(114, game_state["table"].seats.players[0].stack)
        self.eq(86, game_state["table"].seats.players[1].stack)

    def test_run_until_game_finish_when_one_player_is_left(self):
        uuids = ["ruypwwoqwuwdnauiwpefsw", "sqmfwdkpcoagzqxpxnmxwm", "uxrdiwvctvilasinweqven"]
        holecards = [[Card.from_str(s) for s in ss] for ss in [["C2","C3"],["HA","CA"],["D5","H6"]]]
        game_state = restore_game_state(ThreePlayerGameStateSample.round_state)
        game_state = reduce(lambda state, item: attach_hole_card(state, item[0], item[1]), zip(uuids, holecards), game_state)
        sb_amount, ante = 5, 7
        self.emu.set_game_rule(3, 10, sb_amount, ante)
        p1_acts = [("fold",0), ("call", 10), ('call', 0), ('call', 10), ("fold",0)]
        p2_acts = []
        p3_acts = [("raise", 10)]
        players = [TestPlayer(acts) for acts in [p1_acts, p2_acts, p3_acts]]
        [self.emu.register_player(uuid, player) for uuid, player in zip(uuids, players)]
        game_state["table"].deck.deck.append(Card.from_str("C7"))
        game_state, events = self.emu.run_until_game_finish(game_state)
        self.eq("event_game_finish", events[-1]["type"])
        self.eq(0, game_state["table"].seats.players[0].stack)
        self.eq(0, game_state["table"].seats.players[1].stack)
        self.eq(292, game_state["table"].seats.players[2].stack)

    def test_run_until_game_finish_when_final_round(self):
        uuids = ["ruypwwoqwuwdnauiwpefsw", "sqmfwdkpcoagzqxpxnmxwm", "uxrdiwvctvilasinweqven"]
        holecards = [[Card.from_str(s) for s in ss] for ss in [["C2","C3"],["HA","CA"],["D5","H6"]]]
        game_state = restore_game_state(ThreePlayerGameStateSample.round_state)
        game_state = reduce(lambda state, item: attach_hole_card(state, item[0], item[1]), zip(uuids, holecards), game_state)
        sb_amount, ante = 5, 7
        self.emu.set_game_rule(3, 10, sb_amount, ante)
        [self.emu.register_player(uuid, FoldMan()) for uuid in uuids]
        game_state["table"].deck.deck.append(Card.from_str("C7"))
        game_state, events = self.emu.run_until_game_finish(game_state)
        self.eq("event_game_finish", events[-1]["type"])
        self.eq(10, game_state["round_count"])
        self.eq(35, game_state["table"].seats.players[0].stack)
        self.eq(0, game_state["table"].seats.players[1].stack)
        self.eq(265, game_state["table"].seats.players[2].stack)

    def test_last_round_judge(self):
        game_state = restore_game_state(TwoPlayerSample.round_state)
        self.emu.set_game_rule(2, 3, 5, 0)
        self.false(self.emu._is_last_round(game_state, self.emu.game_rule))
        game_state["street"] = Const.Street.FINISHED
        self.true(self.emu._is_last_round(game_state, self.emu.game_rule))
        game_state["round_count"] = 2
        self.false(self.emu._is_last_round(game_state, self.emu.game_rule))
        game_state["table"].seats.players[0].stack = 0
        self.true(self.emu._is_last_round(game_state, self.emu.game_rule))

    def test_start_new_round(self):
        game_state = restore_game_state(TwoPlayerSample.round_state)
        game_state = attach_hole_card_from_deck(game_state, "tojrbxmkuzrarnniosuhct")
        game_state = attach_hole_card_from_deck(game_state, "pwtwlmfciymjdoljkhagxa")
        p1, p2 = FoldMan(), FoldMan()
        self.emu.set_game_rule(2, 10, 5, 0)
        self.emu.register_player("tojrbxmkuzrarnniosuhct", FoldMan())
        self.emu.register_player("pwtwlmfciymjdoljkhagxa", FoldMan())

        # run until round finish
        game_state, event = self.emu.apply_action(game_state, "call", 15)
        game_state, event = self.emu.apply_action(game_state, "call", 0)
        game_state, event = self.emu.apply_action(game_state, "call", 0)

        game_state, events = self.emu.start_new_round(game_state)
        self.eq(4, game_state["round_count"])
        self.eq(1, game_state["table"].dealer_btn)
        self.eq(0, game_state["street"])
        self.eq(0, game_state["next_player"])
        self.eq("event_new_street", events[0]["type"])
        self.eq("event_ask_player", events[1]["type"])
        self.eq("preflop", events[0]["street"])
        self.eq("tojrbxmkuzrarnniosuhct", events[1]["uuid"])

    def test_start_new_round_exclude_no_money_players(self):
        uuids = ["ruypwwoqwuwdnauiwpefsw", "sqmfwdkpcoagzqxpxnmxwm", "uxrdiwvctvilasinweqven"]
        game_state = restore_game_state(ThreePlayerGameStateSample.round_state)
        original = reduce(lambda state, uuid: attach_hole_card_from_deck(state, uuid), uuids, game_state)
        sb_amount, ante = 5, 7
        self.emu.set_game_rule(3, 10, sb_amount, ante)
        [self.emu.register_player(uuid, FoldMan()) for uuid in uuids]

        # case1: second player cannot pay small blind
        finish_state, events = self.emu.apply_action(original, "fold")
        finish_state["table"].seats.players[0].stack = 11
        stacks = [p.stack for p in finish_state["table"].seats.players]
        game_state, events = self.emu.start_new_round(finish_state)
        self.eq(2, game_state["table"].dealer_btn)
        self.eq(1, game_state["next_player"])
        self.eq(stacks[1]-sb_amount-ante, game_state["table"].seats.players[1].stack)
        self.eq(stacks[2]-sb_amount*2-ante, game_state["table"].seats.players[2].stack)
        self.eq(PayInfo.FOLDED, game_state["table"].seats.players[0].pay_info.status)
        self.eq(sb_amount*3 + ante*2, GameEvaluator.create_pot(game_state["table"].seats.players)[0]["amount"])

        # case2: third player cannot pay big blind
        finish_state, events = self.emu.apply_action(original, "fold")
        finish_state["table"].seats.players[1].stack = 16
        stacks = [p.stack for p in finish_state["table"].seats.players]
        game_state, events = self.emu.start_new_round(finish_state)
        self.eq(2, game_state["table"].dealer_btn)
        self.eq(0, game_state["next_player"])
        self.eq(stacks[0]-sb_amount-ante, game_state["table"].seats.players[0].stack)
        self.eq(stacks[2]-sb_amount*2-ante, game_state["table"].seats.players[2].stack)
        self.eq(PayInfo.FOLDED, game_state["table"].seats.players[1].pay_info.status)
        self.eq(PayInfo.PAY_TILL_END, game_state["table"].seats.players[0].pay_info.status)
        self.eq(sb_amount*3 + ante*2, GameEvaluator.create_pot(game_state["table"].seats.players)[0]["amount"])

    def test_start_new_round_exclude_no_money_players2(self):
        uuids = ["ruypwwoqwuwdnauiwpefsw", "sqmfwdkpcoagzqxpxnmxwm", "uxrdiwvctvilasinweqven"]
        game_state = restore_game_state(ThreePlayerGameStateSample.round_state)
        original = reduce(lambda state, uuid: attach_hole_card_from_deck(state, uuid), uuids, game_state)
        sb_amount, ante = 5, 7
        self.emu.set_game_rule(3, 10, sb_amount, ante)
        [self.emu.register_player(uuid, FoldMan()) for uuid in uuids]

        # case1: second player cannot pay small blind
        finish_state, events = self.emu.apply_action(original, "fold")
        finish_state["table"].seats.players[2].stack = 6
        stacks = [p.stack for p in finish_state["table"].seats.players]
        game_state, events = self.emu.start_new_round(finish_state)
        self.eq(0, game_state["table"].dealer_btn)
        self.eq(1, game_state["table"].sb_pos())
        self.eq(1, game_state["next_player"])


    def test_start_new_round_game_finish_judge(self):
        uuids = ["ruypwwoqwuwdnauiwpefsw", "sqmfwdkpcoagzqxpxnmxwm", "uxrdiwvctvilasinweqven"]
        game_state = restore_game_state(ThreePlayerGameStateSample.round_state)
        original = reduce(lambda state, uuid: attach_hole_card_from_deck(state, uuid), uuids, game_state)
        sb_amount, ante = 5, 7
        self.emu.set_game_rule(3, 10, sb_amount, ante)
        [self.emu.register_player(uuid, FoldMan()) for uuid in uuids]

        finish_state, events = self.emu.apply_action(original, "fold")
        finish_state["table"].seats.players[2].stack = 11
        finish_state["table"].seats.players[1].stack = 16
        game_state, events = self.emu.start_new_round(finish_state)
        self.eq(1, len(events))
        self.eq("event_game_finish", events[0]["type"])

    def test_generate_initial_game_state(self):
        self.emu.set_game_rule(2, 8, 5, 3)
        p1, p2 = FoldMan(), FoldMan()

        players_info = OrderedDict()
        players_info["uuid-1"] = { "name": "hoge", "stack": 100 }
        players_info["uuid-2"] = { "name": "fuga", "stack": 100 }
        state = self.emu.generate_initial_game_state(players_info)
        table = state["table"]
        self.eq(0, state["round_count"])
        self.eq(5, state["small_blind_amount"])
        self.eq(100, table.seats.players[0].stack)
        self.eq("uuid-1", table.seats.players[0].uuid)
        self.eq(100, table.seats.players[1].stack)
        self.eq("uuid-2", table.seats.players[1].uuid)
        self.eq(1, table.dealer_btn)

        state, events = self.emu.start_new_round(state)
        self.eq(0, state["table"].dealer_btn)
        self.eq(1, state["table"].sb_pos())
        self.eq(0, state["table"].bb_pos())
        self.eq(1, state["next_player"])
        state, events = self.emu.apply_action(state, "call", 10)
        self.eq(1, state["next_player"])

    def test_generate_possible_actions(self):
        state1 = restore_game_state(TwoPlayerSample.round_state)
        self.eq(TwoPlayerSample.valid_actions, self.emu.generate_possible_actions(state1))
        state2 = restore_game_state(ThreePlayerGameStateSample.round_state)
        self.eq(ThreePlayerGameStateSample.valid_actions, self.emu.generate_possible_actions(state2))
コード例 #23
0
class Alpha0Regret(BasePokerPlayer):
    def __init__(self,
                 mccfr_simulations,
                 model,
                 exp=0,
                 uuid=None,
                 emulator=None,
                 memory=None):
        super().__init__()
        self.MCCFR_simulations = mccfr_simulations
        self.model = model
        self.mccfr = None
        self.intial_round_stack = 0
        self.total_round_money = 0
        self.emulator = emulator
        self.memory = memory
        if uuid is not None:
            self.uuid = uuid
        self.exp = exp

    # Setup Emulator object by registering game information
    def declare_action(self, valid_actions, hole_card, round_state):

        state = GameState(self.uuid, round_state, gen_cards(hole_card),
                          self.emulator)
        if self.mccfr == None or state.id not in self.mccfr.tree:
            self.buildMCCFR(state)
        else:
            self.changeRootMCCFR(state)

        [self.simulate() for sim in range(self.MCCFR_simulations)]

        #for sim in range(self.MCCFR_simulations):
        #    self.simulate()

        pi = self.getAV()

        action = self.chooseAction(pi)
        if self.memory is not None:
            self.memory.commit_stmemory(self.uuid,
                                        state.convertStateToModelInput(), pi,
                                        np.zeros((1, )))

        return list(state.get_action_list()[action].values())

    def receive_game_start_message(self, game_info):
        player_num = game_info["player_num"]
        max_round = game_info["rule"]["max_round"]
        small_blind_amount = game_info["rule"]["small_blind_amount"]
        ante_amount = game_info["rule"]["ante"]
        blind_structure = game_info["rule"]["blind_structure"]

        self.emulator = Emulator()
        self.emulator.set_game_rule(player_num, max_round, small_blind_amount,
                                    ante_amount)
        self.emulator.set_blind_structure(blind_structure)

    def receive_round_start_message(self, round_count, hole_card, seats):
        pass

    def receive_street_start_message(self, street, round_state):
        pass

    def receive_game_update_message(self, action, round_state):
        pass

    def receive_round_result_message(self, winners, hand_info, round_state):
        pass

    def buildMCCFR(self, state):
        self.root = mc.Node(state)
        self.mccfr = mc.MCCFR(self.root, self.uuid)

    def changeRootMCCFR(self, state):
        self.mccfr.root = self.mccfr.tree[state.id]

    def simulate(self):
        leaf, value, done, breadcrumbs = self.mccfr.moveToLeaf(self.model)
        value, breadcrumbs = self.evaluateLeaf(leaf, value, done, breadcrumbs)
        self.mccfr.backFill(value, breadcrumbs)

    def evaluateLeaf(self, leaf, value, done, breadcrumbs):
        if done == 0:

            value, probs, allowedActions = self.get_preds(leaf.state)
            probs = probs[allowedActions]

            for idx, action in enumerate(allowedActions):
                newState, _, _ = leaf.state.takeAction(action)
                if newState.id not in self.mccfr.tree:
                    node = mc.Node(newState)
                    self.mccfr.addNode(node)
                else:
                    node = self.mccfr.tree[newState.id]
                newEdge = mc.Edge(leaf, node, probs[idx], action)
                leaf.edges.append((action, newEdge))

        return ((value, breadcrumbs))

    def get_preds(self, state):
        # predict the leaf
        main_input, my_info, my_history, adv_info, adv_history = state.convertStateToModelInput(
        )
        preds = self.model.predict(
            [main_input, my_info, my_history, *adv_info, *adv_history])
        value_array = preds[0]
        logits_array = preds[1]
        value = value_array[0][0]
        logits = logits_array[0]

        allowedActions = state.allowed_action

        mask = np.ones(logits.shape, dtype=bool)
        mask[allowedActions] = False
        logits[mask] = float('-inf')

        probs = stable_softmax(logits)

        return ((value, probs, allowedActions))

    def getAV(self):
        edges = self.mccfr.root.edges
        pi = np.zeros(config.game_param['RAISE_PARTITION_NUM'] + 2,
                      dtype=np.float32)
        for action, edge in edges:
            pi[action] = max(0, edge.stats['R'])
        if np.sum(pi) == 0:
            for action, edge in edges:
                pi[action] = edge.stats['N']

        pi = pi / np.sum(pi)
        return pi

    def chooseAction(self, pi):
        pi = np.asarray([truncate_float(x, 7) for x in pi])
        if self.exp == 0:
            actions = np.argwhere(pi == max(pi))
            action = random.choice(actions)[0]
        else:
            action_idx = np.random.multinomial(1, pi)
            action = np.where(action_idx == 1)[0][0]

        return action
コード例 #24
0
class EmulatorPlayer(BasePokerPlayer):

    def set_opponents_model(self, model_player):
        self.opponents_model = model_player

    # setup Emulator with passed game information
    def receive_game_start_message(self, game_info):
        self.my_model = MyModel()
        nb_player = game_info['player_num']
        max_round = game_info['rule']['max_round']
        sb_amount = game_info['rule']['small_blind_amount']
        ante_amount = game_info['rule']['ante']

        self.emulator = Emulator()
        self.emulator.set_game_rule(nb_player, max_round, sb_amount, ante_amount)
        for player_info in game_info['seats']:
            uuid = player_info['uuid']
            player_model = self.my_model if uuid == self.uuid else self.opponents_model
            self.emulator.register_player(uuid, player_model)

    def declare_action(self, valid_actions, hole_card, round_state):
        try_actions = [MyModel.FOLD, MyModel.CALL, MyModel.MIN_RAISE, MyModel.MAX_RAISE]
        action_results = [0 for i in range(len(try_actions))]

        log("hole_card of emulator player is %s" % hole_card)
        for action in try_actions:
            self.my_model.set_action(action)
            simulation_results = []
            for i in range(NB_SIMULATION):
                game_state = self._setup_game_state(round_state, hole_card)
                round_finished_state, _events = self.emulator.run_until_round_finish(game_state)
                my_stack = [player for player in round_finished_state['table'].seats.players if player.uuid == self.uuid][0].stack
                simulation_results.append(my_stack)
            action_results[action] = 1.0 * sum(simulation_results) / NB_SIMULATION
            log("average stack after simulation when declares %s : %s" % (
                {0:'FOLD', 1:'CALL', 2:'MIN_RAISE', 3:'MAX_RAISE'}[action], action_results[action])
                )

        best_action = max(zip(action_results, try_actions))[1]
        self.my_model.set_action(best_action)
        return self.my_model.declare_action(valid_actions, hole_card, round_state)

    def _setup_game_state(self, round_state, my_hole_card):
        game_state = restore_game_state(round_state)
        game_state['table'].deck.shuffle()
        player_uuids = [player_info['uuid'] for player_info in round_state['seats']]
        for uuid in player_uuids:
            if uuid == self.uuid:
                game_state = attach_hole_card(game_state, uuid, gen_cards(my_hole_card))  # attach my holecard
            else:
                game_state = attach_hole_card_from_deck(game_state, uuid)  # attach opponents holecard at random
        return game_state

    def receive_round_start_message(self, round_count, hole_card, seats):
        pass

    def receive_street_start_message(self, street, round_state):
        pass

    def receive_game_update_message(self, new_action, round_state):
        pass

    def receive_round_result_message(self, winners, hand_info, round_state):
        pass
コード例 #25
0
class EmulatorPlayer(BasePokerPlayer):

    def set_opponents_model(self, model_player):
        self.opponents_model = model_player

    # setup Emulator with passed game information
    def receive_game_start_message(self, game_info):
        self.my_model = MyModel()
        nb_player = game_info['player_num']
        max_round = game_info['rule']['max_round']
        sb_amount = game_info['rule']['small_blind_amount']
        ante_amount = game_info['rule']['ante']

        self.emulator = Emulator()
        self.emulator.set_game_rule(nb_player, max_round, sb_amount, ante_amount)
        for player_info in game_info['seats']:
            uuid = player_info['uuid']
            player_model = self.my_model if uuid == self.uuid else self.opponents_model
            self.emulator.register_player(uuid, player_model)

    def declare_action(self, valid_actions, hole_card, round_state):
        try_actions = [MyModel.FOLD, MyModel.CALL, MyModel.MIN_RAISE, MyModel.MAX_RAISE]
        action_results = [0 for i in range(len(try_actions))]

        log("hole_card of emulator player is %s" % hole_card)
        for action in try_actions:
            self.my_model.set_action(action)
            simulation_results = []
            for i in range(NB_SIMULATION):
                game_state = self._setup_game_state(round_state, hole_card)
                round_finished_state, _events = self.emulator.run_until_round_finish(game_state)
                my_stack = [player for player in round_finished_state['table'].seats.players if player.uuid == self.uuid][0].stack
                simulation_results.append(my_stack)
            action_results[action] = 1.0 * sum(simulation_results) / NB_SIMULATION
            log("average stack after simulation when declares %s : %s" % (
                {0:'FOLD', 1:'CALL', 2:'MIN_RAISE', 3:'MAX_RAISE'}[action], action_results[action])
                )

        best_action = max(zip(action_results, try_actions))[1]
        self.my_model.set_action(best_action)
        return self.my_model.declare_action(valid_actions, hole_card, round_state)

    def _setup_game_state(self, round_state, my_hole_card):
        game_state = restore_game_state(round_state)
        game_state['table'].deck.shuffle()
        player_uuids = [player_info['uuid'] for player_info in round_state['seats']]
        for uuid in player_uuids:
            if uuid == self.uuid:
                game_state = attach_hole_card(game_state, uuid, gen_cards(my_hole_card))  # attach my holecard
            else:
                game_state = attach_hole_card_from_deck(game_state, uuid)  # attach opponents holecard at random
        return game_state

    def receive_round_start_message(self, round_count, hole_card, seats):
        pass

    def receive_street_start_message(self, street, round_state):
        pass

    def receive_game_update_message(self, new_action, round_state):
        pass

    def receive_round_result_message(self, winners, hand_info, round_state):
        pass
コード例 #26
0
ファイル: emulator.py プロジェクト: Henrique97/AASMAPOKER
qlearner_player = RLPlayer(n_players, quuid)
df1 = pd.DataFrame(columns=['uuid', 'win_ratio', 'stack-diff'])
df2 = pd.DataFrame(columns=['uuid', 'win_ratio', 'stack-diff'])
df3 = pd.DataFrame(columns=['uuid', 'win_ratio', 'stack-diff'])
df4 = pd.DataFrame(columns=['uuid', 'win_ratio', 'stack-diff'])
max_round = 50
learning_rates = np.arange(0.1, 1, 0.1)
#learning_rates = [0.3]
for j in learning_rates:
    qlearner_player = RLPlayer(n_players, quuid, j)
    for i in range(0, 150):
        emulator.register_player(uuid="uuid-1", player=HonestPlayer(n_players))
        #emulator.register_player(uuid="uuid-2", player=RiskyPlayer(n_players))
        emulator.register_player(uuid=quuid, player=qlearner_player)
        emulator.set_game_rule(player_num=n_players,
                               max_round=max_round,
                               small_blind_amount=20,
                               ante_amount=0)
        players_info = {
            "uuid-1": {
                "name": "player1",
                "stack": 1000
            },
            #"uuid-2": { "name": "player2", "stack": 1000 },
            quuid: {
                "name": "q-learning",
                "stack": 1000
            }
        }
        initial_state = emulator.generate_initial_game_state(players_info)
        game_state, events = emulator.start_new_round(initial_state)
コード例 #27
0
from pypokerengine.api.emulator import Emulator
from bot import PGBot
from pypokerengine.utils.game_state_utils import attach_hole_card_from_deck, attach_hole_card, replace_community_card
from pypokerengine.utils.card_utils import gen_cards

SAVE_PATH = 'saved_models/second_test/policy_net_after'

STACK = 1000
N_PLAYER = 2
MAX_ROUND = 2
SMALL_BLIND_AMOUNT = 5
ANTE_AMOUNT = 1

if __name__ == '__main__':
    emul = Emulator()
    emul.set_game_rule(N_PLAYER, MAX_ROUND, SMALL_BLIND_AMOUNT, ANTE_AMOUNT)
    console = ConsolePlayer()
    bot = PGBot("bot", pickle.load(open(SAVE_PATH + "105980", "rb")))
    emul.register_player("target", bot)
    emul.register_player("baseline", console)
    players_info = {
        "target": {
            "name": "PGBot",
            "stack": STACK
        },
        "baseline": {
            "name": "console",
            "stack": STACK
        }
    }
    game_state = emul.generate_initial_game_state(players_info)