def make_last_actions(game: GameView) -> List[LastAction]: # We need a dummy entry for the first voluntary action actions = [ LastAction( move=-1, action_encoded=-1, amount_added=-1, amount_added_percent_of_remaining=-1, amount_raised=-1, amount_raised_percent_of_pot=-1, ) ] # Iterate over states shifted by one for i in list(iter_game_states(game))[:-1]: game_view = game.view(i) a: Action = game_view.next_action() stack_size = game_view.current_stack_sizes()[ game_view.current_player()] pot_size = game_view.pot_size() current_bet = game_view.current_bet_amount() raise_amount = a.total_bet - current_bet actions.append( LastAction( move=a.move.value, action_encoded=encode_action(a, game_view), amount_added=a.amount_added, amount_added_percent_of_remaining=a.amount_added / stack_size, amount_raised=raise_amount, amount_raised_percent_of_pot=raise_amount / pot_size, )) return actions
def make_rewards(game: GameView, result: Result): """ Generate a list of rewards for every non-voluntary action """ # This only makes sense at the end of the game assert game.street() == Street.HAND_OVER rewards = [] # Profits between now and the end of the hand cumulative_rewards: List[int] = result.earned_from_pot is_last_action: List[bool] = [True for _ in range(game.num_players())] # Iterate in reverse order for i in reversed(list(iter_game_states(game))): a: Action = game.view(i).next_action() won_hand = result.won_hand[a.player_index] # Subtract the amount lost after taking the given action, which is a part # of the future cumulative winnings / losses # print(cumulative_rewards, a.player_index, a.amount_added) cumulative_rewards[a.player_index] -= a.amount_added if is_last_action[a.player_index]: instant_reward = cumulative_rewards[a.player_index] else: instant_reward = -1 * a.amount_added rewards.append( Reward( is_players_last_action=is_last_action[a.player_index], cumulative_reward=cumulative_rewards[a.player_index], instant_reward=instant_reward, won_hand=won_hand, )) is_last_action[a.player_index] = False return list(reversed(rewards))
def make_next_actions(game: GameView) -> List[NextAction]: actions: List[NextAction] = [] for i in iter_game_states(game): game_view = game.view(i) a: Action = game_view.next_action() current_bet = game_view.current_bet_amount() raise_amount = a.total_bet - current_bet actions.append( NextAction( move=a.move.value, action_encoded=encode_action(a, game_view), amount_added=a.amount_added, new_total_bet=a.total_bet, amount_raised=raise_amount, )) return actions
def make_player_states(player_index: int, game: GameView, hole_cards: HoleCards, board: Board) -> List[PlayerState]: player_states = [] street_cache: Dict[Street, PlayerState] = {} for i in iter_game_states(game): game_view = game.view(i) is_player_turn = game_view.current_player() == player_index # We don't set the rest of the values for non-current-players if not is_player_turn: player_states.append( PlayerState( is_current_player=False, current_player_offset=(game_view.current_player() - player_index), )) continue street = game_view.street() # These values don't vary by street, so we cache them if street in street_cache: player_states.append(street_cache[street]) continue if game_view.street() == Street.PREFLOP: player_state = PlayerState(is_current_player=True, current_player_offset=0) else: current_board = board.at_street(game_view.street()) hand_eval = evaluate_hand(hole_cards, current_board) hand_features = pyholdthem.make_hand_features_from_indices( hole_cards.index(), [c.index() for c in current_board.cards()], 1000) player_state = PlayerState( is_current_player=True, current_player_offset=0, current_hand_type=hand_eval.hand_type.value, frac_better_hands=hand_features.frac_better_hands, frac_tied_hands=hand_features.frac_tied_hands, frac_worse_hands=hand_features.frac_worse_hands, win_odds=hand_features.win_odds, tie_odds=hand_features.tie_odds, lose_odds=hand_features.lose_odds, win_odds_vs_better=hand_features.win_odds_vs_better, tie_odds_vs_better=hand_features.tie_odds_vs_better, lose_odds_vs_better=hand_features.lose_odds_vs_better, win_odds_vs_tied=hand_features.win_odds_vs_tied, tie_odds_vs_tied=hand_features.tie_odds_vs_tied, lose_odds_vs_tied=hand_features.lose_odds_vs_tied, win_odds_vs_worse=hand_features.win_odds_vs_worse, tie_odds_vs_worse=hand_features.tie_odds_vs_worse, lose_odds_vs_worse=hand_features.lose_odds_vs_worse, ) street_cache[street] = player_state player_states.append(player_state) return player_states
def make_public_states(game: GameView, board: Optional[Board]): public_states = [] for i in iter_game_states(game): game_view = game.view(i) if board is None: current_board = Board(flop=None, turn=None, river=None) else: current_board = board.at_street(game_view.street()) current_player_mask = [0 for _ in range(game_view.num_players())] current_player_mask[game_view.current_player()] = 1 if game_view.street( ) >= Street.FLOP and current_board.flop is not None: flop_0, flop_1, flop_2 = sorted(current_board.flop, key=card_order) flop_0_rank = flop_0.rank.value flop_0_suit = flop_0.suit.value flop_1_rank = flop_1.rank.value flop_1_suit = flop_1.suit.value flop_2_rank = flop_2.rank.value flop_2_suit = flop_2.suit.value else: flop_0_rank = None flop_0_suit = None flop_1_rank = None flop_1_suit = None flop_2_rank = None flop_2_suit = None if game_view.street( ) >= Street.TURN and current_board.turn is not None: turn = current_board.turn turn_rank = turn.rank.value turn_suit = turn.suit.value else: turn_rank = None turn_suit = None if game_view.street( ) >= Street.RIVER and current_board.river is not None: river = current_board.river river_rank = river.rank.value river_suit = river.suit.value else: river_rank = None river_suit = None public_states.append( PublicState( num_players_remaining=sum(game_view.is_in_hand()), pot_size=game_view.pot_size(), street=game_view.street().value, current_player_mask=current_player_mask, folded_player_mask=game_view.is_folded(), all_in_player_mask=game_view.is_all_in(), stack_sizes=game_view.current_stack_sizes(), amount_to_call=game_view.amount_to_call(), min_raise_amount=game_view.min_bet_amount(), flop_0_rank=flop_0_rank, flop_0_suit=flop_0_suit, flop_1_rank=flop_1_rank, flop_1_suit=flop_1_suit, flop_2_rank=flop_2_rank, flop_2_suit=flop_2_suit, turn_rank=turn_rank, turn_suit=turn_suit, river_rank=river_rank, river_suit=river_suit, )) return public_states