コード例 #1
0
    def traverse_ESMCCFR(self, state, player):

        if state.is_terminal():
            return state.get_utility(player)

        #default to chance player
        other_player = 3 - player
        player_turn = state.get_players_turn()
        possible_bets = self.available_bets.get_bets_as_numbers(
            state._my_contrib(player_turn), state._other_contrib(player_turn),
            self.abstracted)
        # Determine the strategy at this infoset
        infoset = state.get_infoset(player_turn)
        if infoset in self.infoset_strategy_map.keys():
            strategy = self.infoset_strategy_map[infoset]
        else:
            strategy = Strategy(len(possible_bets))
            self.infoset_strategy_map[infoset] = strategy

        player_strategy = strategy.calculate_strategy()

        if player_turn == player:
            # initialize expected value
            # value of a node h is the value player i expects to achieve if all players play according to given strategy, having reached h
            value = 0
            value_bet = [0] * len(player_strategy)
            for bet_index, bet in enumerate(possible_bets):
                # need to define adding an bet to a bets, make bet class
                memento = state.update(bet)

                # Traverse each bet (per iteration of loop) (each bet changes the bets)
                va = self.traverse_ESMCCFR(state, player)
                state.reverse_update(memento)

                value_bet[bet_index] = va

                # Update the expected value
                value += player_strategy[bet_index] * va
            for bet_index in range(len(possible_bets)):
                # Update the cumulative regret of each bet
                strategy.regret_sum[bet_index] += value_bet[bet_index] - value

            return value

        elif player_turn == other_player:
            # Sample one bet and increment bet counter
            bet_index = self.get_random_bet(player_strategy)
            bet = possible_bets[bet_index]
            strategy.count[bet_index] += 1

            memento = state.update(bet)
            val = self.traverse_ESMCCFR(state, player)
            state.reverse_update(memento)
            return val
        else:
            raise Exception('How did we get here? There are no other players')