class SimplePlayer(Player): def __init__(self, pid: int, communication: Tuple[int, Tuple[Queue, Queue]], teams: List[Team], payoff: Tensor): super().__init__(pid, communication, teams, payoff) self._pfsp = PFSPSampling() def is_main_player(self): return True def get_match(self, team=None) -> Union[None, Tuple[int, Team, OrderedDict]]: """ Samples an SimplePlayer opponent using PFSP with win rates as prioritization. :param **kwargs: :return: """ agents = self.get_agents() # [agent for agent in agents if agent == SimplePlayer] # TODO move instance mapping to payoff where its needed opponents_idxs: List[int] = [self._tid_to_instance[tid] for tid in list(agents.keys())] win_rates = self.payoff.win_rates(self.pid, opponents_idxs) chosen_idx = self._pfsp.sample(opponents_idxs, prio_measure=win_rates, weighting="squared") chosen_tid = self._instance_to_tid[chosen_idx] chosen_team = self.get_team(tid=chosen_tid) return chosen_idx, chosen_team, agents[chosen_tid] def ready_to_checkpoint(self) -> bool: """ Checkpoint Logic - Checkpoint agent if more than 2e9 training steps passed :return: """ steps_passed = self.trained_steps - self._checkpoint_step if steps_passed < 2e9: return False return True
class MainExploiter(Player): def __init__(self, pid: int, payoff, team): super().__init__(pid, payoff, team) self._checkpoint_step = 0 self._pfsp = PFSPSampling() def get_match(self) -> Tuple[Player, bool]: """ :return: """ from league.rolebased.alphastar.main_player import MainPlayer main_agents = [ player for player in self.payoff.players if isinstance(player, MainPlayer) ] opponent = np.random.choice(main_agents) if self.payoff[self.tid, opponent.tid] > 0.1: return opponent, True from league.rolebased.players import HistoricalPlayer historical = [ player.tid for player in self.payoff.players if isinstance(player, HistoricalPlayer) and player.parent == opponent ] win_rates = self.payoff[self.tid, historical] if len(historical) == 0: # TODO return opponent, True chosen = self._pfsp.sample(historical, prio_measure=win_rates, weighting="variance") return self.payoff.players[chosen], True def checkpoint(self): """ :return: """ self._checkpoint_step = self.agent.trained_steps return self._create_checkpoint() def ready_to_checkpoint(self): """ :return: """ steps_passed = self.agent.trained_steps - self._checkpoint_step if steps_passed < 2e9: return False from league.rolebased.alphastar.main_player import MainPlayer main_agents = [ player.tid for player in self.payoff.players if isinstance(player, MainPlayer) ] win_rates = self.payoff[self.tid, main_agents] return win_rates.min() > 0.7 or steps_passed > 4e9
class LeagueExploiter(Player): def __init__(self, pid: int, payoff, team): super().__init__(pid, payoff, team) self._checkpoint_step = 0 self._pfsp = PFSPSampling() def get_match(self) -> Tuple[Player, bool]: """ :return: """ from league.rolebased.players import HistoricalPlayer historical = [ player.tid for player in self.payoff.players if isinstance(player, HistoricalPlayer) ] win_rates = self.payoff[self.tid, historical] if len(historical) == 0: # TODO return None, None chosen = self._pfsp.sample(historical, prio_measure=win_rates, weighting="linear_capped") return self.payoff.players[chosen], True def checkpoint(self): """ :return: """ # if np.random.random() < 0.25: TODO: reset at random to initial weights (when are they initial?) # self.learner.set_weights(self._initial_weights) self._checkpoint_step = self.agent.trained_steps return self._create_checkpoint() def ready_to_checkpoint(self): """ :return: """ steps_passed = self.agent.trained_steps - self._checkpoint_step if steps_passed < 2e9: return False from league.rolebased.players import HistoricalPlayer historical = [ player.tid for player in self.payoff.players if isinstance(player, HistoricalPlayer) ] win_rates = self.payoff[self.tid, historical] return win_rates.min() > 0.7 or steps_passed > 4e9
class PFSPMatchmaking(Matchmaker): def __init__(self, communication: Tuple[int, Tuple[Queue, Queue]], teams: List[Team], payoff: Tensor): super().__init__(communication, teams, payoff) self._sampling_strategy = PFSPSampling() def get_match( self, home_team: Team) -> Union[None, Tuple[int, Team, OrderedDict]]: home_instance = self.get_instance_id(home_team) opponents = self.get_agents() win_rates = self.payoff.win_rates(home_instance) chosen_tid: int = self._sampling_strategy.sample( opponents=list(opponents.keys()), prio_measure=win_rates) chosen_idx = self._tid_to_instance[chosen_tid] team = self.get_team(tid=chosen_tid) self.payoff.match(home_instance, chosen_idx) return chosen_idx, team, opponents[chosen_tid]
def __init__(self, pid: int, communication: Tuple[int, Tuple[Queue, Queue]], teams: List[Team], payoff: Tensor): super().__init__(pid, communication, teams, payoff) self._pfsp = PFSPSampling()
def __init__(self, communication: Tuple[int, Tuple[Queue, Queue]], teams: List[Team], payoff: Tensor): super().__init__(communication, teams, payoff) self._sampling_strategy = PFSPSampling()
def __init__(self, pid: int, payoff, team): super().__init__(pid, payoff, team) self._checkpoint_step = 0 self._pfsp = PFSPSampling()
class MainPlayer(Player): def __init__(self, pid: int, payoff, team): super().__init__(pid, payoff, team) self._checkpoint_step = 0 self._pfsp = PFSPSampling() def get_match(self, team=None) -> Union[Tuple[Any, bool], Tuple[Player, bool]]: """ Samples an HistoricalPlayer opponent using PFSP with probability 0.5. In other cases play against MainPlayers using SP or verify that no player was omitted. :return: """ coin_toss = np.random.random() # Make sure you can beat the League via PFSP if coin_toss < 0.5: return self._pfsp_branch() main_agents = [ player for player in self.payoff.players if isinstance(player, MainPlayer) ] opponent = np.random.choice(main_agents) # Verify if there are some rare players we omitted if coin_toss < 0.5 + 0.15: request = self._verification_branch(opponent) if request is not None: return request # Else play against yourself (MainPlayer) return self._selfplay_branch(opponent) def _pfsp_branch(self) -> Union[Tuple[Player, bool], Tuple[None, bool]]: """ PFSP against historical players :return: """ historical = [ player.tid for player in self.payoff.players if isinstance(player, HistoricalPlayer) ] if len(historical) == 0: # no new historical opponents found # TODO return None, False win_rates = self.payoff[self.tid, historical] chosen = self._pfsp.sample(historical, prio_measure=win_rates, weighting="squared") return self.payoff.players[chosen], True def _selfplay_branch(self, opponent: Player) -> Tuple[Player, bool]: """ SP against main players, with exceptions if the opponent is too strong. :param opponent: :return: """ # Play self-play match if self.payoff[self.tid, opponent.tid] > 0.3: return opponent, False # Opponent too strong -> use checkpoint of the opponent as curriculum historical = [ player.tid for player in self.payoff.players if isinstance(player, HistoricalPlayer) and player.parent == opponent ] if len(historical) == 0: # no new historical opponents found # TODO return opponent, False # PFSP on checkpoints of opponent win_rates = self.payoff[self.tid, historical] chosen = self._pfsp.sample(historical, prio_measure=win_rates, weighting="variance") return self.payoff.players[chosen], True def _verification_branch( self, opponent) -> Union[Tuple[None, None], Tuple[Player, bool]]: # Check exploitation from league.rolebased.alphastar.exploiters import MainExploiter exploiters = set([ # Get all exploiters player for player in self.payoff.players if isinstance(player, MainExploiter) ]) exp_historical = [ # Get all historical players which originate from exploiters player.tid for player in self.payoff.players if isinstance(player, HistoricalPlayer) and player.parent in exploiters ] # If historical exploiters min. win rate is smaller threshold -> PFSP win_rates = self.payoff[self.tid, exp_historical] if len(win_rates) and win_rates.min() < 0.3: chosen = self._pfsp.sample(exp_historical, prio_measure=win_rates, weighting="squared") return self.payoff.players[chosen], True # Check forgetting historical = [ player.tid for player in self.payoff.players if isinstance(player, HistoricalPlayer) and player.parent == opponent ] win_rates = self.payoff[self.tid, historical] win_rates, historical = remove_monotonic_suffix(win_rates, historical) if len(win_rates) and win_rates.min() < 0.7: chosen = self._pfsp.sample(historical, prio_measure=win_rates, weighting="squared") return self.payoff.players[chosen], True # TODO: when and why do we get here? return None, None def ready_to_checkpoint(self) -> bool: """ Checkpoint Logic - AlphaStars Checkpointing Logic :return: """ steps_passed = self.agent.trained_steps - self._checkpoint_step if steps_passed < 2e9: # TODO make constant return False historical = [ player.tid for player in self.payoff.players if isinstance(player, HistoricalPlayer) ] win_rates = self.payoff[self.tid, historical] return win_rates.min( ) > 0.7 or steps_passed > 4e9 # TODO make constant