예제 #1
0
파일: action.py 프로젝트: hobbit19/Pokermon
def make_last_actions(game: GameView) -> List[LastAction]:
    # We need a dummy entry for the first voluntary action
    actions = [
        LastAction(
            move=-1,
            action_encoded=-1,
            amount_added=-1,
            amount_added_percent_of_remaining=-1,
            amount_raised=-1,
            amount_raised_percent_of_pot=-1,
        )
    ]

    # Iterate over states shifted by one
    for i in list(iter_game_states(game))[:-1]:
        game_view = game.view(i)
        a: Action = game_view.next_action()

        stack_size = game_view.current_stack_sizes()[
            game_view.current_player()]
        pot_size = game_view.pot_size()
        current_bet = game_view.current_bet_amount()
        raise_amount = a.total_bet - current_bet

        actions.append(
            LastAction(
                move=a.move.value,
                action_encoded=encode_action(a, game_view),
                amount_added=a.amount_added,
                amount_added_percent_of_remaining=a.amount_added / stack_size,
                amount_raised=raise_amount,
                amount_raised_percent_of_pot=raise_amount / pot_size,
            ))

    return actions
예제 #2
0
def make_rewards(game: GameView, result: Result):
    """
    Generate a list of rewards for every non-voluntary action
    """

    # This only makes sense at the end of the game
    assert game.street() == Street.HAND_OVER

    rewards = []

    # Profits between now and the end of the hand
    cumulative_rewards: List[int] = result.earned_from_pot

    is_last_action: List[bool] = [True for _ in range(game.num_players())]

    # Iterate in reverse order
    for i in reversed(list(iter_game_states(game))):

        a: Action = game.view(i).next_action()

        won_hand = result.won_hand[a.player_index]

        # Subtract the amount lost after taking the given action, which is a part
        # of the future cumulative winnings / losses
        # print(cumulative_rewards, a.player_index, a.amount_added)
        cumulative_rewards[a.player_index] -= a.amount_added

        if is_last_action[a.player_index]:
            instant_reward = cumulative_rewards[a.player_index]
        else:
            instant_reward = -1 * a.amount_added

        rewards.append(
            Reward(
                is_players_last_action=is_last_action[a.player_index],
                cumulative_reward=cumulative_rewards[a.player_index],
                instant_reward=instant_reward,
                won_hand=won_hand,
            ))

        is_last_action[a.player_index] = False

    return list(reversed(rewards))
예제 #3
0
파일: action.py 프로젝트: hobbit19/Pokermon
def make_next_actions(game: GameView) -> List[NextAction]:
    actions: List[NextAction] = []

    for i in iter_game_states(game):
        game_view = game.view(i)

        a: Action = game_view.next_action()

        current_bet = game_view.current_bet_amount()
        raise_amount = a.total_bet - current_bet

        actions.append(
            NextAction(
                move=a.move.value,
                action_encoded=encode_action(a, game_view),
                amount_added=a.amount_added,
                new_total_bet=a.total_bet,
                amount_raised=raise_amount,
            ))

    return actions
예제 #4
0
def make_player_states(player_index: int, game: GameView,
                       hole_cards: HoleCards,
                       board: Board) -> List[PlayerState]:

    player_states = []
    street_cache: Dict[Street, PlayerState] = {}

    for i in iter_game_states(game):
        game_view = game.view(i)

        is_player_turn = game_view.current_player() == player_index

        # We don't set the rest of the values for non-current-players
        if not is_player_turn:
            player_states.append(
                PlayerState(
                    is_current_player=False,
                    current_player_offset=(game_view.current_player() -
                                           player_index),
                ))
            continue

        street = game_view.street()

        # These values don't vary by street, so we cache them
        if street in street_cache:
            player_states.append(street_cache[street])
            continue

        if game_view.street() == Street.PREFLOP:
            player_state = PlayerState(is_current_player=True,
                                       current_player_offset=0)

        else:
            current_board = board.at_street(game_view.street())
            hand_eval = evaluate_hand(hole_cards, current_board)
            hand_features = pyholdthem.make_hand_features_from_indices(
                hole_cards.index(), [c.index() for c in current_board.cards()],
                1000)

            player_state = PlayerState(
                is_current_player=True,
                current_player_offset=0,
                current_hand_type=hand_eval.hand_type.value,
                frac_better_hands=hand_features.frac_better_hands,
                frac_tied_hands=hand_features.frac_tied_hands,
                frac_worse_hands=hand_features.frac_worse_hands,
                win_odds=hand_features.win_odds,
                tie_odds=hand_features.tie_odds,
                lose_odds=hand_features.lose_odds,
                win_odds_vs_better=hand_features.win_odds_vs_better,
                tie_odds_vs_better=hand_features.tie_odds_vs_better,
                lose_odds_vs_better=hand_features.lose_odds_vs_better,
                win_odds_vs_tied=hand_features.win_odds_vs_tied,
                tie_odds_vs_tied=hand_features.tie_odds_vs_tied,
                lose_odds_vs_tied=hand_features.lose_odds_vs_tied,
                win_odds_vs_worse=hand_features.win_odds_vs_worse,
                tie_odds_vs_worse=hand_features.tie_odds_vs_worse,
                lose_odds_vs_worse=hand_features.lose_odds_vs_worse,
            )

        street_cache[street] = player_state
        player_states.append(player_state)

    return player_states
예제 #5
0
def make_public_states(game: GameView, board: Optional[Board]):
    public_states = []

    for i in iter_game_states(game):
        game_view = game.view(i)

        if board is None:
            current_board = Board(flop=None, turn=None, river=None)
        else:
            current_board = board.at_street(game_view.street())

        current_player_mask = [0 for _ in range(game_view.num_players())]
        current_player_mask[game_view.current_player()] = 1

        if game_view.street(
        ) >= Street.FLOP and current_board.flop is not None:
            flop_0, flop_1, flop_2 = sorted(current_board.flop, key=card_order)
            flop_0_rank = flop_0.rank.value
            flop_0_suit = flop_0.suit.value
            flop_1_rank = flop_1.rank.value
            flop_1_suit = flop_1.suit.value
            flop_2_rank = flop_2.rank.value
            flop_2_suit = flop_2.suit.value
        else:
            flop_0_rank = None
            flop_0_suit = None
            flop_1_rank = None
            flop_1_suit = None
            flop_2_rank = None
            flop_2_suit = None

        if game_view.street(
        ) >= Street.TURN and current_board.turn is not None:
            turn = current_board.turn
            turn_rank = turn.rank.value
            turn_suit = turn.suit.value
        else:
            turn_rank = None
            turn_suit = None

        if game_view.street(
        ) >= Street.RIVER and current_board.river is not None:
            river = current_board.river
            river_rank = river.rank.value
            river_suit = river.suit.value
        else:
            river_rank = None
            river_suit = None

        public_states.append(
            PublicState(
                num_players_remaining=sum(game_view.is_in_hand()),
                pot_size=game_view.pot_size(),
                street=game_view.street().value,
                current_player_mask=current_player_mask,
                folded_player_mask=game_view.is_folded(),
                all_in_player_mask=game_view.is_all_in(),
                stack_sizes=game_view.current_stack_sizes(),
                amount_to_call=game_view.amount_to_call(),
                min_raise_amount=game_view.min_bet_amount(),
                flop_0_rank=flop_0_rank,
                flop_0_suit=flop_0_suit,
                flop_1_rank=flop_1_rank,
                flop_1_suit=flop_1_suit,
                flop_2_rank=flop_2_rank,
                flop_2_suit=flop_2_suit,
                turn_rank=turn_rank,
                turn_suit=turn_suit,
                river_rank=river_rank,
                river_suit=river_suit,
            ))

    return public_states