Exemple #1
0
class SimplePlayer(Player):

    def __init__(self, pid: int, communication: Tuple[int, Tuple[Queue, Queue]], teams: List[Team],
                 payoff: Tensor):
        super().__init__(pid, communication, teams, payoff)
        self._pfsp = PFSPSampling()

    def is_main_player(self):
        return True

    def get_match(self, team=None) -> Union[None, Tuple[int, Team, OrderedDict]]:
        """
        Samples an SimplePlayer opponent using PFSP with win rates as prioritization.
        :param **kwargs:
        :return:
        """
        agents = self.get_agents()
        # [agent for agent in agents if agent == SimplePlayer] # TODO move instance mapping to payoff where its needed
        opponents_idxs: List[int] = [self._tid_to_instance[tid] for tid in list(agents.keys())]
        win_rates = self.payoff.win_rates(self.pid, opponents_idxs)
        chosen_idx = self._pfsp.sample(opponents_idxs, prio_measure=win_rates, weighting="squared")
        chosen_tid = self._instance_to_tid[chosen_idx]
        chosen_team = self.get_team(tid=chosen_tid)
        return chosen_idx, chosen_team, agents[chosen_tid]

    def ready_to_checkpoint(self) -> bool:
        """
        Checkpoint Logic - Checkpoint agent if more than 2e9 training steps passed
        :return:
        """
        steps_passed = self.trained_steps - self._checkpoint_step
        if steps_passed < 2e9:
            return False
        return True
Exemple #2
0
class MainExploiter(Player):
    def __init__(self, pid: int, payoff, team):
        super().__init__(pid, payoff, team)
        self._checkpoint_step = 0
        self._pfsp = PFSPSampling()

    def get_match(self) -> Tuple[Player, bool]:
        """

        :return:
        """
        from league.rolebased.alphastar.main_player import MainPlayer
        main_agents = [
            player for player in self.payoff.players
            if isinstance(player, MainPlayer)
        ]
        opponent = np.random.choice(main_agents)

        if self.payoff[self.tid, opponent.tid] > 0.1:
            return opponent, True

        from league.rolebased.players import HistoricalPlayer
        historical = [
            player.tid for player in self.payoff.players if
            isinstance(player, HistoricalPlayer) and player.parent == opponent
        ]
        win_rates = self.payoff[self.tid, historical]

        if len(historical) == 0:  # TODO
            return opponent, True

        chosen = self._pfsp.sample(historical,
                                   prio_measure=win_rates,
                                   weighting="variance")
        return self.payoff.players[chosen], True

    def checkpoint(self):
        """

        :return:
        """
        self._checkpoint_step = self.agent.trained_steps
        return self._create_checkpoint()

    def ready_to_checkpoint(self):
        """

        :return:
        """
        steps_passed = self.agent.trained_steps - self._checkpoint_step
        if steps_passed < 2e9:
            return False
        from league.rolebased.alphastar.main_player import MainPlayer
        main_agents = [
            player.tid for player in self.payoff.players
            if isinstance(player, MainPlayer)
        ]
        win_rates = self.payoff[self.tid, main_agents]
        return win_rates.min() > 0.7 or steps_passed > 4e9
Exemple #3
0
class LeagueExploiter(Player):
    def __init__(self, pid: int, payoff, team):
        super().__init__(pid, payoff, team)
        self._checkpoint_step = 0
        self._pfsp = PFSPSampling()

    def get_match(self) -> Tuple[Player, bool]:
        """

        :return:
        """
        from league.rolebased.players import HistoricalPlayer

        historical = [
            player.tid for player in self.payoff.players
            if isinstance(player, HistoricalPlayer)
        ]
        win_rates = self.payoff[self.tid, historical]

        if len(historical) == 0:  # TODO
            return None, None

        chosen = self._pfsp.sample(historical,
                                   prio_measure=win_rates,
                                   weighting="linear_capped")
        return self.payoff.players[chosen], True

    def checkpoint(self):
        """

        :return:
        """
        # if np.random.random() < 0.25: TODO: reset at random to initial weights (when are they initial?)
        #     self.learner.set_weights(self._initial_weights)
        self._checkpoint_step = self.agent.trained_steps
        return self._create_checkpoint()

    def ready_to_checkpoint(self):
        """

        :return:
        """
        steps_passed = self.agent.trained_steps - self._checkpoint_step
        if steps_passed < 2e9:
            return False
        from league.rolebased.players import HistoricalPlayer
        historical = [
            player.tid for player in self.payoff.players
            if isinstance(player, HistoricalPlayer)
        ]
        win_rates = self.payoff[self.tid, historical]
        return win_rates.min() > 0.7 or steps_passed > 4e9
Exemple #4
0
class PFSPMatchmaking(Matchmaker):
    def __init__(self, communication: Tuple[int, Tuple[Queue, Queue]],
                 teams: List[Team], payoff: Tensor):
        super().__init__(communication, teams, payoff)

        self._sampling_strategy = PFSPSampling()

    def get_match(
            self,
            home_team: Team) -> Union[None, Tuple[int, Team, OrderedDict]]:
        home_instance = self.get_instance_id(home_team)
        opponents = self.get_agents()
        win_rates = self.payoff.win_rates(home_instance)
        chosen_tid: int = self._sampling_strategy.sample(
            opponents=list(opponents.keys()), prio_measure=win_rates)
        chosen_idx = self._tid_to_instance[chosen_tid]
        team = self.get_team(tid=chosen_tid)
        self.payoff.match(home_instance, chosen_idx)
        return chosen_idx, team, opponents[chosen_tid]
Exemple #5
0
 def __init__(self, pid: int, communication: Tuple[int, Tuple[Queue, Queue]], teams: List[Team],
              payoff: Tensor):
     super().__init__(pid, communication, teams, payoff)
     self._pfsp = PFSPSampling()
Exemple #6
0
    def __init__(self, communication: Tuple[int, Tuple[Queue, Queue]],
                 teams: List[Team], payoff: Tensor):
        super().__init__(communication, teams, payoff)

        self._sampling_strategy = PFSPSampling()
Exemple #7
0
 def __init__(self, pid: int, payoff, team):
     super().__init__(pid, payoff, team)
     self._checkpoint_step = 0
     self._pfsp = PFSPSampling()
Exemple #8
0
class MainPlayer(Player):
    def __init__(self, pid: int, payoff, team):
        super().__init__(pid, payoff, team)
        self._checkpoint_step = 0
        self._pfsp = PFSPSampling()

    def get_match(self,
                  team=None) -> Union[Tuple[Any, bool], Tuple[Player, bool]]:
        """
        Samples an HistoricalPlayer opponent using PFSP with probability 0.5.
        In other cases play against MainPlayers using SP or verify that no player was omitted.
        :return:
        """
        coin_toss = np.random.random()

        # Make sure you can beat the League via PFSP
        if coin_toss < 0.5:
            return self._pfsp_branch()

        main_agents = [
            player for player in self.payoff.players
            if isinstance(player, MainPlayer)
        ]
        opponent = np.random.choice(main_agents)

        # Verify if there are some rare players we omitted
        if coin_toss < 0.5 + 0.15:
            request = self._verification_branch(opponent)
            if request is not None:
                return request

        # Else play against yourself (MainPlayer)
        return self._selfplay_branch(opponent)

    def _pfsp_branch(self) -> Union[Tuple[Player, bool], Tuple[None, bool]]:
        """
        PFSP against historical players
        :return:
        """
        historical = [
            player.tid for player in self.payoff.players
            if isinstance(player, HistoricalPlayer)
        ]

        if len(historical) == 0:  # no new historical opponents found # TODO
            return None, False

        win_rates = self.payoff[self.tid, historical]
        chosen = self._pfsp.sample(historical,
                                   prio_measure=win_rates,
                                   weighting="squared")
        return self.payoff.players[chosen], True

    def _selfplay_branch(self, opponent: Player) -> Tuple[Player, bool]:
        """
        SP against main players, with exceptions if the opponent is too strong.
        :param opponent:
        :return:
        """
        # Play self-play match
        if self.payoff[self.tid, opponent.tid] > 0.3:
            return opponent, False

        # Opponent too strong -> use checkpoint of the opponent as curriculum
        historical = [
            player.tid for player in self.payoff.players if
            isinstance(player, HistoricalPlayer) and player.parent == opponent
        ]

        if len(historical) == 0:  # no new historical opponents found # TODO
            return opponent, False

        # PFSP on checkpoints of opponent
        win_rates = self.payoff[self.tid, historical]
        chosen = self._pfsp.sample(historical,
                                   prio_measure=win_rates,
                                   weighting="variance")
        return self.payoff.players[chosen], True

    def _verification_branch(
            self, opponent) -> Union[Tuple[None, None], Tuple[Player, bool]]:
        # Check exploitation
        from league.rolebased.alphastar.exploiters import MainExploiter

        exploiters = set([  # Get all exploiters
            player for player in self.payoff.players
            if isinstance(player, MainExploiter)
        ])
        exp_historical = [  # Get all historical players which originate from exploiters
            player.tid for player in self.payoff.players
            if isinstance(player, HistoricalPlayer)
            and player.parent in exploiters
        ]
        # If historical exploiters min. win rate is smaller threshold -> PFSP
        win_rates = self.payoff[self.tid, exp_historical]
        if len(win_rates) and win_rates.min() < 0.3:
            chosen = self._pfsp.sample(exp_historical,
                                       prio_measure=win_rates,
                                       weighting="squared")
            return self.payoff.players[chosen], True

        # Check forgetting
        historical = [
            player.tid for player in self.payoff.players if
            isinstance(player, HistoricalPlayer) and player.parent == opponent
        ]
        win_rates = self.payoff[self.tid, historical]
        win_rates, historical = remove_monotonic_suffix(win_rates, historical)
        if len(win_rates) and win_rates.min() < 0.7:
            chosen = self._pfsp.sample(historical,
                                       prio_measure=win_rates,
                                       weighting="squared")
            return self.payoff.players[chosen], True

        # TODO: when and why do we get here?
        return None, None

    def ready_to_checkpoint(self) -> bool:
        """
        Checkpoint Logic - AlphaStars Checkpointing Logic
        :return:
        """
        steps_passed = self.agent.trained_steps - self._checkpoint_step
        if steps_passed < 2e9:  # TODO make constant
            return False

        historical = [
            player.tid for player in self.payoff.players
            if isinstance(player, HistoricalPlayer)
        ]
        win_rates = self.payoff[self.tid, historical]
        return win_rates.min(
        ) > 0.7 or steps_passed > 4e9  # TODO make constant