Exemple #1
0
def _print_terminal_state(info_set: hulth.InfoSet, user: int) -> None:
    winner = info_set.winner()
    user_utility = info_set.utility(user)

    if winner == user:
        _print_center("You won {} chips!!!".format(user_utility), Color.GREEN)
    else:
        _print_center("You lost {} chips!!! :(".format(-user_utility),
                      Color.RED)
    def _generate_subgame_tree(self, info_set: InfoSet) -> None:
        if not info_set.is_chance():
            encoding = info_set.encoding()
            self._tree.add(encoding)

        if info_set.is_terminal():
            return

        for action in info_set.actions():
            child = info_set.play(action)

            self._generate_subgame_tree(child)
Exemple #3
0
def _play_as_user(info_set: hulth.InfoSet) -> hulth.InfoSet:
    possible_actions_as_str = _possible_actions_as_str(info_set)

    user_action_as_str = _get_input(
        "Pick your action ({}): ".format(possible_actions_as_str))
    user_action = None

    for action in info_set.actions():
        if action.value == user_action_as_str:
            user_action = action

    if not user_action:
        _print_center("You picked an invalid action")
        time.sleep(USER_ERROR_WAIT_IN_SECONDS)
        return _play_as_user(info_set)

    return info_set.play(user_action)
Exemple #4
0
    def _chance_sampling_cfr(self, info_set: hulth.InfoSet, trainee: int,
                             small_blind_prob: float,
                             big_blind_prob: float) -> float:
        if info_set.is_terminal():
            return info_set.utility(trainee)

        if info_set.is_chance():
            return self._chance_sampling_cfr(
                info_set.play(hulth.Action.CHANCE), trainee, small_blind_prob,
                big_blind_prob)

        encoding = info_set.encoding()
        actions = info_set.actions()

        expected_utility = 0.0
        action_utilties = dict()

        strategy = self._get_strategy(encoding, actions)
        curr_player = info_set.curr_player()

        for action in actions:
            action_utilties[action] = 0.0

            child = info_set.play(action)

            action_prob = strategy[action]

            if curr_player == hulth.SMALL_BLIND:
                action_utilties[action] = self._chance_sampling_cfr(
                    child, trainee, action_prob * small_blind_prob,
                    big_blind_prob)
            else:
                action_utilties[action] = self._chance_sampling_cfr(
                    child, trainee, small_blind_prob,
                    action_prob * big_blind_prob)

            expected_utility += action_utilties[action] * action_prob

        if curr_player == trainee:
            cum_regret = self._info_set_cum_regret(encoding, actions)
            cum_strategy = self._info_set_cum_strategy(encoding, actions)

            reach_prob = big_blind_prob
            cfr_reach_prob = small_blind_prob

            if curr_player == hulth.SMALL_BLIND:
                reach_prob, cfr_reach_prob = cfr_reach_prob, reach_prob

            for action in actions:
                cum_regret[action] += cfr_reach_prob * (
                    action_utilties[action] - expected_utility)
                cum_strategy[action] += reach_prob * strategy[action]

        return expected_utility
Exemple #5
0
    def _avg_strategy(self, info_set: hulth.InfoSet) -> Dict:
        actions = info_set.actions()
        encoding = info_set.encoding()

        avg_strategy = dict()
        normalising_sum = 0.0

        cum_strategy = self._info_set_cum_strategy(encoding, actions)

        for action in actions:
            normalising_sum += cum_strategy[action]

        for action in actions:
            if normalising_sum > 0.0:
                avg_strategy[action] = cum_strategy[action] / normalising_sum
            else:
                avg_strategy[action] = 1 / len(actions)

        return avg_strategy
Exemple #6
0
def _possible_actions_as_str(info_set: hulth.InfoSet) -> str:
    actions = info_set.actions()
    possible_actions = []

    for action in actions:
        possible_action = "{} - {}".format(action.value,
                                           hulth.ACTION_NAMES[action])
        possible_actions.append(possible_action)

    return ", ".join(possible_actions)
Exemple #7
0
def _print_stats(bundle: hulth.CardBundle, info_set: hulth.InfoSet,
                 user: int) -> None:
    hand = bundle.player_hand(user)
    hand_as_str = _cards_as_pretty_strings(hand)

    stats = "[Hand - {}] ".format(hand_as_str)

    available_money = info_set.available_money_of_players()

    user_money = available_money[user]
    bot = hulth.get_opponent(user)
    bot_money = available_money[bot]

    stats += "[Our Money - {}] [Opponent Money - {}] ".format(
        user_money, bot_money)

    pot_money = info_set.pot_money()
    stats += "[Pot Money - {}]".format(pot_money)

    _print_center(stats, Color.YELLOW)
Exemple #8
0
    def play(self, info_set: hulth.InfoSet) -> hulth.InfoSet:
        strategy = self._avg_strategy(info_set)

        rand_float = random.uniform(0.0, 1.0)
        prob_sum = 0.0

        play_action = None

        for action in strategy:
            action_prob = strategy[action]
            prob_sum += action_prob

            if prob_sum >= rand_float:
                play_action = action
                break

        # If not lucky due to floating point precision.
        if not play_action:
            return self.play(info_set)

        return info_set.play(play_action)