Exemple #1
0
 def _pfsp_branch(self):
     historical = [
         player for player in self._payoff.players
         if isinstance(player, Historical)
     ]
     win_rates = self._payoff[self, historical]
     return np.random.choice(
         historical, p=pfsp(win_rates, weighting="squared")), True
Exemple #2
0
    def get_match(self):
        historical = [
            player for player in self._payoff.players 
            if isinstance(player, Historical)
        ]
        win_rates = self._payoff[self, historical]
        return np.random.choice(

            historical, p=pfsp(win_rates, weighting="linear_capped")), True
Exemple #3
0
    def _verification_branch(self, opponent):
        # Check exploitation
        exploiters = set([
            player for player in self._payoff.players
            if isinstance(player, MainExploiter)
        ])
        # Q: What is the player.parent?
        # A: This is only the property of Historical
        exp_historical = [
            player for player in self._payoff.players
            if isinstance(player, Historical) and player.parent in exploiters
        ]
        win_rates = self._payoff[self, exp_historical]
        if len(win_rates) and win_rates.min() < 0.3:
            return np.random.choice(
                exp_historical, p=pfsp(win_rates, weighting="squared")), True

        # Check forgetting
        historical = [
            player for player in self._payoff.players
            if isinstance(player, Historical) and player.parent == opponent
        ]
        win_rates = self._payoff[self, historical]

        def remove_monotonic_suffix(win_rates, players):
            if not win_rates:
                return win_rates, players

            for i in range(len(win_rates) - 1, 0, -1):
                if win_rates[i - 1] < win_rates[i]:
                    return win_rates[:i + 1], players[:i + 1]

            return np.array([]), []

        win_rates, historical = remove_monotonic_suffix(win_rates, historical)
        if len(win_rates) and win_rates.min() < 0.7:
            return np.random.choice(
                historical, p=pfsp(win_rates, weighting="squared")), True

        return None
Exemple #4
0
    def _selfplay_branch(self, opponent):
        # Play self-play match
        if self._payoff[self, opponent] > 0.3:
            return opponent, False

        # If opponent is too strong, look for a checkpoint
        # as curriculum
        historical = [
            player for player in self._payoff.players
            if isinstance(player, Historical) and player.parent == opponent
        ]
        win_rates = self._payoff[self, historical]
        return np.random.choice(
            historical, p=pfsp(win_rates, weighting="variance")), True
Exemple #5
0
    def get_match(self):
        main_agents = [
            player for player in self._payoff.players
            if isinstance(player, MainPlayer)
        ]
        opponent = np.random.choice(main_agents)

        if self._payoff[self, opponent] > 0.1:
            return opponent, True

        historical = [
            player for player in self._payoff.players
            if isinstance(player, Historical) and player.parent == opponent
        ]
        win_rates = self._payoff[self, historical]

        return np.random.choice(
            historical, p=pfsp(win_rates, weighting="variance")), True