def get_nash_strategies(self):
   """Returns the nash meta-strategy distribution on meta game matrix. When other meta strategies in play, nash strategy is still needed for evaluation
   """
   if self._meta_strategy_method_name in {'general_nash_strategy','nash_strategy'} or self._num_players > 2:
     return self.get_meta_strategies()
   meta_strategy_probabilities = meta_strategies.general_nash_strategy(self, checkpoint_dir=self.checkpoint_dir)
   return [np.copy(a) for a in meta_strategy_probabilities]
Example #2
0
    def inner_loop(self):
        """
    Find equilibrium in the incomplete self._meta_games through iteratively augment the maximum complete subgame by sampling. Symmetric game could have insymmetric nash equilibrium, so uses self._game_num_players instead of self._num_players
    Returns:
        Equilibrium support, non_margianlized profile probability
    """
        found_confirmed_eq = False
        NE_solver = 'replicator' if self._num_players > 2 else 'gambit'
        while not found_confirmed_eq:
            maximum_subgame = self.get_complete_meta_game
            ne_subgame = meta_strategies.general_nash_strategy(
                solver=self,
                return_joint=False,
                NE_solver=NE_solver,
                game=maximum_subgame,
                checkpoint_dir=self.checkpoint_dir)
            # ne_support_num: list of list, index of where equilibrium is [[0,1],[2]]
            # cumsum: index ne_subgame with self._complete_ind
            cum_sum = [np.cumsum(ele) for ele in self._complete_ind]
            ne_support_num = []
            for i in range(self._game_num_players):
                ne_support_num_p = []
                for j in range(len(self._complete_ind[i])):
                    if self._complete_ind[i][j] == 1 and ne_subgame[i][
                            cum_sum[i][j] - 1] != 0:
                        ne_support_num_p.append(j)
                ne_support_num.append(ne_support_num_p)
            # ne_subgame: non-zero equilibrium support, [[0.1,0.5,0.4],[0.2,0.4,0.4]]
            ne_subgame_nonzero = [np.array(ele) for ele in ne_subgame]
            ne_subgame_nonzero = [ele[ele != 0] for ele in ne_subgame_nonzero]
            # get players' payoffs in nash equilibrium
            ne_payoffs = self.get_mixed_payoff(ne_support_num,
                                               ne_subgame_nonzero)
            # all possible deviation payoffs
            dev_pol, dev_payoffs = self.schedule_deviation(
                ne_support_num, ne_subgame_nonzero)
            # check max deviations and sample full subgame where beneficial deviation included
            dev = []
            maximum_subgame_index = [
                list(np.where(np.array(ele) == 1)[0])
                for ele in self._complete_ind
            ]
            for i in range(self._game_num_players):
                if not len(dev_payoffs[i]) == 0 and max(
                        dev_payoffs[i]) > ne_payoffs[i]:
                    pol = dev_pol[i][np.argmax(dev_payoffs[i])]
                    new_subgame_sample_ind = copy.deepcopy(
                        maximum_subgame_index)
                    maximum_subgame_index[i].append(pol)
                    new_subgame_sample_ind[i] = [pol]
                    # add best deviation into subgame and sample it
                    for pol in itertools.product(*new_subgame_sample_ind):
                        self.sample_pure_policy_to_empirical_game(pol)
                    dev.append(i)
                    # all other player's policies have to sample previous players' best deviation
            found_confirmed_eq = (len(dev) == 0)
            # debug: check maximum subgame remains the same
            # debug: check maximum game reached

        # return confirmed nash equilibrium
        eq = []
        policy_len = [
            len(self._policies) for _ in range(self._game_num_players)
        ] if self.symmetric_game else [len(ele) for ele in self._policies]
        for p in range(self._game_num_players):
            eq_p = np.zeros([policy_len[p]], dtype=float)
            np.put(eq_p, ne_support_num[p], ne_subgame_nonzero[p])
            eq.append(eq_p)
        non_marginalized_probabilities = meta_strategies.get_joint_strategy_from_marginals(
            eq)
        return eq, non_marginalized_probabilities