def _print_terminal_state(info_set: hulth.InfoSet, user: int) -> None: winner = info_set.winner() user_utility = info_set.utility(user) if winner == user: _print_center("You won {} chips!!!".format(user_utility), Color.GREEN) else: _print_center("You lost {} chips!!! :(".format(-user_utility), Color.RED)
def _generate_subgame_tree(self, info_set: InfoSet) -> None: if not info_set.is_chance(): encoding = info_set.encoding() self._tree.add(encoding) if info_set.is_terminal(): return for action in info_set.actions(): child = info_set.play(action) self._generate_subgame_tree(child)
def _play_as_user(info_set: hulth.InfoSet) -> hulth.InfoSet: possible_actions_as_str = _possible_actions_as_str(info_set) user_action_as_str = _get_input( "Pick your action ({}): ".format(possible_actions_as_str)) user_action = None for action in info_set.actions(): if action.value == user_action_as_str: user_action = action if not user_action: _print_center("You picked an invalid action") time.sleep(USER_ERROR_WAIT_IN_SECONDS) return _play_as_user(info_set) return info_set.play(user_action)
def _chance_sampling_cfr(self, info_set: hulth.InfoSet, trainee: int, small_blind_prob: float, big_blind_prob: float) -> float: if info_set.is_terminal(): return info_set.utility(trainee) if info_set.is_chance(): return self._chance_sampling_cfr( info_set.play(hulth.Action.CHANCE), trainee, small_blind_prob, big_blind_prob) encoding = info_set.encoding() actions = info_set.actions() expected_utility = 0.0 action_utilties = dict() strategy = self._get_strategy(encoding, actions) curr_player = info_set.curr_player() for action in actions: action_utilties[action] = 0.0 child = info_set.play(action) action_prob = strategy[action] if curr_player == hulth.SMALL_BLIND: action_utilties[action] = self._chance_sampling_cfr( child, trainee, action_prob * small_blind_prob, big_blind_prob) else: action_utilties[action] = self._chance_sampling_cfr( child, trainee, small_blind_prob, action_prob * big_blind_prob) expected_utility += action_utilties[action] * action_prob if curr_player == trainee: cum_regret = self._info_set_cum_regret(encoding, actions) cum_strategy = self._info_set_cum_strategy(encoding, actions) reach_prob = big_blind_prob cfr_reach_prob = small_blind_prob if curr_player == hulth.SMALL_BLIND: reach_prob, cfr_reach_prob = cfr_reach_prob, reach_prob for action in actions: cum_regret[action] += cfr_reach_prob * ( action_utilties[action] - expected_utility) cum_strategy[action] += reach_prob * strategy[action] return expected_utility
def _avg_strategy(self, info_set: hulth.InfoSet) -> Dict: actions = info_set.actions() encoding = info_set.encoding() avg_strategy = dict() normalising_sum = 0.0 cum_strategy = self._info_set_cum_strategy(encoding, actions) for action in actions: normalising_sum += cum_strategy[action] for action in actions: if normalising_sum > 0.0: avg_strategy[action] = cum_strategy[action] / normalising_sum else: avg_strategy[action] = 1 / len(actions) return avg_strategy
def _possible_actions_as_str(info_set: hulth.InfoSet) -> str: actions = info_set.actions() possible_actions = [] for action in actions: possible_action = "{} - {}".format(action.value, hulth.ACTION_NAMES[action]) possible_actions.append(possible_action) return ", ".join(possible_actions)
def _print_stats(bundle: hulth.CardBundle, info_set: hulth.InfoSet, user: int) -> None: hand = bundle.player_hand(user) hand_as_str = _cards_as_pretty_strings(hand) stats = "[Hand - {}] ".format(hand_as_str) available_money = info_set.available_money_of_players() user_money = available_money[user] bot = hulth.get_opponent(user) bot_money = available_money[bot] stats += "[Our Money - {}] [Opponent Money - {}] ".format( user_money, bot_money) pot_money = info_set.pot_money() stats += "[Pot Money - {}]".format(pot_money) _print_center(stats, Color.YELLOW)
def play(self, info_set: hulth.InfoSet) -> hulth.InfoSet: strategy = self._avg_strategy(info_set) rand_float = random.uniform(0.0, 1.0) prob_sum = 0.0 play_action = None for action in strategy: action_prob = strategy[action] prob_sum += action_prob if prob_sum >= rand_float: play_action = action break # If not lucky due to floating point precision. if not play_action: return self.play(info_set) return info_set.play(play_action)