async def main(): start = time.time() # We define two player configurations. player_1_configuration = PlayerConfiguration("Random player", None) player_2_configuration = PlayerConfiguration("Max damage player", None) # We create the corresponding players. random_player = RandomPlayer( player_configuration=player_1_configuration, battle_format="gen7randombattle", server_configuration=LocalhostServerConfiguration, ) max_damage_player = MaxDamagePlayer( player_configuration=player_2_configuration, battle_format="gen7randombattle", server_configuration=LocalhostServerConfiguration, ) # Now, let's evaluate our player cross_evaluation = await cross_evaluate([random_player, max_damage_player], n_challenges=100) print("Max damage player won %d / 100 battles [this took %f seconds]" % ( cross_evaluation[max_damage_player.username][random_player.username] * 100, time.time() - start, ))
async def main(): # Configs for random agents, one from class defined above, one from library. player1 = PlayerConfiguration("Manual Random", None) player2 = PlayerConfiguration("Library Random", None) # Create the players from configs. players = [ RandomAgent( player_configuration=player1, battle_format="gen7randombattle", server_configuration=LocalhostServerConfiguration, ), RandomPlayer( player_configuration=player2, battle_format="gen7randombattle", server_configuration=LocalhostServerConfiguration, ) ] # Get results from 10 games cross_evaluation = await cross_evaluate(players, n_challenges=10) # Display results in a nice format table = [["-"] + [p.username for p in players]] for p_1, results in cross_evaluation.items(): table.append([p_1] + [cross_evaluation[p_1][p_2] for p_2 in results]) print(tabulate(table))
async def main(): start = time.time() # We define two player configurations. player_1_configuration = PlayerConfiguration("Super Effective Player", None) player_2_configuration = PlayerConfiguration("True Max Player", None) # We create the corresponding players. super_effective_player = SuperEffectivePlayer( player_configuration=player_1_configuration, battle_format="gen7randombattle", server_configuration=LocalhostServerConfiguration, ) true_max_player = TrueMaxPlayer( player_configuration=player_2_configuration, battle_format="gen7randombattle", server_configuration=LocalhostServerConfiguration, ) # Now, let's evaluate our player cross_evaluation = await cross_evaluate( [super_effective_player, true_max_player], n_challenges=5000) print("True Max Player won %d / 5000 battles [this took %f seconds]" % ( cross_evaluation[true_max_player.username][ super_effective_player.username] * 5000, time.time() - start, ))
async def main(): # First, we define three player configurations. player_1_configuration = PlayerConfiguration("Player 1", None) player_2_configuration = PlayerConfiguration("Player 2", None) player_3_configuration = PlayerConfiguration("Player 3", None) # Then, we create the corresponding players. players = [ RandomPlayer( player_configuration=player_config, battle_format="gen7randombattle", server_configuration=LocalhostServerConfiguration, max_concurrent_battles=10, ) for player_config in [ player_1_configuration, player_2_configuration, player_3_configuration, ] ] # Now, we can cross evaluate them: every player will player 20 games against every # other player. cross_evaluation = await cross_evaluate(players, n_challenges=20) # Defines a header for displaying results table = [["-"] + [p.username for p in players]] # Adds one line per player with corresponding results for p_1, results in cross_evaluation.items(): table.append([p_1] + [cross_evaluation[p_1][p_2] for p_2 in results]) # Displays results in a nicely formatted table. print(tabulate(table))
async def main(): # First, we define three player configurations. player_1_configuration = PlayerConfiguration("Player 1", None) player_2_configuration = PlayerConfiguration("Player 2", None) player_3_configuration = PlayerConfiguration("Max Damage Player", None) player_4_configuration = PlayerConfiguration("Type Damage Player", None) cheater_player_config = PlayerConfiguration("Cheater Player", None) #player_custom_configuration = PlayerConfiguration("Custom Player", None) #player_test_configuration = PlayerConfiguration("Test Player", None) # # Then, we create the corresponding players. players = [] players.extend([ RandomPlayer( player_configuration=player_config, battle_format="gen7randombattle", server_configuration=LocalhostServerConfiguration, max_concurrent_battles=max_concurrent_battles, ) for player_config in [ player_4_configuration # player_2_configuration, ] ]) # players.extend( # CheaterPlayer( # player_configuration=player_config, # battle_format="gen7randombattle", # server_configuration=LocalhostServerConfiguration, # max_concurrent_battles=max_concurrent_battles, # ) # for player_config in [ # player_custom_configuration, # ] # ) players.extend([ CheaterPlayer( player_configuration=player_config, battle_format="gen7randombattle", server_configuration=LocalhostServerConfiguration, max_concurrent_battles=max_concurrent_battles, ) for player_config in [cheater_player_config] ]) # Now, we can cross evaluate them: every player will player 20 games against every # other player. cross_evaluation = await cross_evaluate(players, n_challenges=20) # Defines a header for displaying results table = [["-"] + [p.username for p in players]] # Adds one line per player with corresponding results for p_1, results in cross_evaluation.items(): table.append([p_1] + [cross_evaluation[p_1][p_2] for p_2 in results]) # Displays results in a nicely formatted table. print(tabulate(table))
async def main(): start = time.time() # We define two player configurations. player_1_configuration = PlayerConfiguration("Random Player", None) player_2_configuration = PlayerConfiguration("Tabular Q Player", None) # We create the corresponding players. random_player = RandomPlayer( player_configuration=player_1_configuration, battle_format="gen7randombattle", server_configuration=LocalhostServerConfiguration, ) tabular_q_player = TabularQPlayer( player_configuration=player_2_configuration, battle_format="gen7randombattle", server_configuration=LocalhostServerConfiguration, ) #defines training protocols spars = 10 battles = 100 n_challenges = 1 q_wins = [] game_count = [] for s in range(spars): print('spar ' + str(s)) battle_wins = 0 for b in range(battles): cross_evaluation = await cross_evaluate( [random_player, tabular_q_player], n_challenges) battle_win_rate = cross_evaluation[tabular_q_player.username][ random_player.username] battle_wins = battle_wins + battle_win_rate #information for matplotlib graphing on jupyter q_wins.append(((battle_wins) / (battles)) * 100.0) game_count.append((s + 1) * battles) print('done') print(q_wins) print(game_count) print(time.time() - start)
def serialize_memory(memory, writepath): with jsonlines.open(writepath, mode='w') as writer: writer.write(memory) if __name__ == "__main__": global config hyperparameter_defaults = dict( experiment_name = "BigBoy", memory_size = 10000, #How many S,A,S',R transitions we keep in memory ) wandb.init(config=hyperparameter_defaults) config = wandb.config writepath = "ReplayBuffers/{}.jsonl".format(config.experiment_name) if not os.path.exists(writepath): os.makedirs(writepath) custom_builder = RandomTeamFromPool([team_starters]) custom_builder2 = RandomTeamFromPool([team_starters]) player_one = MaxDamagePlayer( player_configuration=PlayerConfiguration("Max player one", None), battle_format="gen8ou", team=custom_builder, server_configuration=LocalhostServerConfiguration, ) player_two = MaxDamagePlayer( player_configuration=PlayerConfiguration("Max player two", None), battle_format="gen8ou", team=custom_builder2, server_configuration=LocalhostServerConfiguration, ) memory = ReplayMemory(config.memory_size) player_one.play_against( env_algorithm=get_transitions_for_replay_memory, opponent=player_two, env_algorithm_kwargs={"nb_steps": 100000000}, ) env_player.close() serialize_memory(memory, writepath)
async def main_ladder(model): ''' Evaluate model on ladder :param model: model to evaluate :return: None ''' # We create a random player # model = load_model('pokemon_project/model_25000') # model = load_model('model_25000') player = TrainedRLPlayer( model, player_configuration=PlayerConfiguration("mynameisgillian", "beanscool"), server_configuration=ShowdownServerConfiguration, ) # Sending challenges to 'your_username' await player.send_challenges("UW_Brock", n_challenges=1) # Accepting one challenge from any user # await player.accept_challenges(None, 1) # Accepting three challenges from 'your_username' # await player.accept_challenges('your_username', 3) # Playing 5 games on the ladder # await player.ladder(5) # Print the rating of the player and its opponent after each battle for battle in player.battles.values(): print(battle.rating, battle.opponent_rating)
def get_opponent(battle_format="gen8randombattle", cycle_count=0) -> Player: opponent_string = ai_config.get("Opponent", "Opponent").lower() opponent_num = cycle_count % len(opponents) if opponent_string == "ladder": return None elif opponent_string == "random" or (opponent_string == "cycle" and opponent_num == 0): return opponents["random"] elif opponent_string == "default" or (opponent_string == "cycle" and opponent_num == 1): return opponents["default"] elif opponent_string == "max" or (opponent_string == "cycle" and opponent_num == 2): return opponents["max"] elif opponent_string == "heuristics" or (opponent_string == "cycle" and opponent_num == 3): return opponents["heuristics"] elif opponent_string == "self": from src.geniusect.player.reinforcement_learning_player import RLPlayer return RLPlayer(train=False, validate=False, load_from_checkpoint=True, battle_format=battle_format, player_configuration=PlayerConfiguration( "RL Player " + str(cycle_count), "")) else: raise AttributeError()
def test_random_gym_player_gen8(): env_player = RandomEnvPlayer( player_configuration=PlayerConfiguration("EnvPlayerGen8", None), battle_format="gen8randombattle", server_configuration=LocalhostServerConfiguration, ) random_player = RandomPlayer( player_configuration=PlayerConfiguration("RandomPlayerGen8", None), battle_format="gen8randombattle", server_configuration=LocalhostServerConfiguration, ) env_player.play_against( env_algorithm=play_function, opponent=random_player, env_algorithm_kwargs={"n_battles": 5}, )
async def main(): # We create a random player player = RandomPlayer( player_configuration=PlayerConfiguration("bot_username", "bot_password"), server_configuration=ShowdownServerConfiguration, ) # Sending challenges to 'your_username' await player.send_challenges("your_username", n_challenges=1)
async def main(): player_1_configuration = PlayerConfiguration("Player 1", None) player_2_configuration = PlayerConfiguration("Player 2", None) p1 = RandomPlayer( player_configuration=player_1_configuration, battle_format="gen8nationaldexag", server_configuration=LocalhostServerConfiguration, team=RandomTeambuilder(), max_concurrent_battles=int(sys.argv[3]), log_level=int(sys.argv[2]), ) p2 = RandomPlayer( player_configuration=player_2_configuration, battle_format="gen8nationaldexag", server_configuration=LocalhostServerConfiguration, team=RandomTeambuilder(), max_concurrent_battles=int(sys.argv[3]), log_level=int(sys.argv[2]), ) await asyncio.gather( p1.send_challenges( opponent=to_id_str(p2.username), n_challenges=int(sys.argv[1]) % int(sys.argv[3]), to_wait=p2.logged_in, ), p2.accept_challenges( opponent=to_id_str(p1.username), n_challenges=int(sys.argv[1]) % int(sys.argv[3]), ), ) for _ in tqdm(range(int(sys.argv[1]) // int(sys.argv[3]))): await asyncio.gather( p1.send_challenges( opponent=to_id_str(p2.username), n_challenges=int(sys.argv[3]), to_wait=p2.logged_in, ), p2.accept_challenges( opponent=to_id_str(p1.username), n_challenges=int(sys.argv[3]) ), )
def choose_move(self, battle): state = SimpleRLPlayer( player_configuration=PlayerConfiguration(LOGIN_USERNAME, LOGIN_PASSWORD), server_configuration=ShowdownServerConfiguration).embed_battle( battle=battle) state = np.array(state).reshape((1, 1, -1)) predictions = self.model.predict([state])[0] action = np.argmax(predictions) return SimpleRLPlayer()._action_to_move(action, battle)
async def simple_cross_evaluation(n_battles, finished): player_1_configuration = PlayerConfiguration("Player 1", None) player_2_configuration = PlayerConfiguration("Player 2", None) player_3_configuration = PlayerConfiguration("Player 3", None) players = [ RandomPlayer( player_configuration=player_config, battle_format="gen7randombattle", server_configuration=LocalhostServerConfiguration, max_concurrent_battles=n_battles, ) for player_config in [ player_1_configuration, player_2_configuration, player_3_configuration, ] ] await cross_evaluate(players, n_challenges=n_battles) for player in players: await player.stop_listening()
async def simple_cross_evaluation(n_battles, format_, i=0): player_1_configuration = PlayerConfiguration("Player %d" % (i + 1), None) player_2_configuration = PlayerConfiguration("Player %d" % (i + 2), None) player_3_configuration = PlayerConfiguration("Player %d" % (i + 3), None) players = [ RandomPlayer( player_configuration=player_config, battle_format=format_, server_configuration=LocalhostServerConfiguration, max_concurrent_battles=n_battles, ) for player_config in [ player_1_configuration, player_2_configuration, player_3_configuration, ] ] await cross_evaluate(players, n_challenges=n_battles) for player in players: await player.stop_listening()
async def training(future, child, opponent): # We define two player configurations. player_1_configuration = PlayerConfiguration("Agent player", None) player_2_configuration = PlayerConfiguration("Enemy player", None) # We create the corresponding players. agent_player = PokeAgent( player_configuration=player_1_configuration, battle_format="gen7letsgorandombattle", server_configuration=LocalhostServerConfiguration, conn=child ) if opponent == "random": enemy_player = RandomPlayer( player_configuration=player_2_configuration, battle_format="gen7letsgorandombattle", server_configuration=LocalhostServerConfiguration, ) else: enemy_player = AggressivePlayer( player_configuration=player_2_configuration, battle_format="gen7letsgorandombattle", server_configuration=LocalhostServerConfiguration, ) episodes = 800 while episodes > 0: await agent_player.train_against(enemy_player, 1) episodes -=1 if agent_player.epsilon > agent_player.min_epsilon: agent_player.epsilon = max(agent_player.epsilon * agent_player.epsilon_decay, agent_player.min_epsilon) if (800 - episodes == 1 or 800 - episodes == 100 or 800 - episodes == 200 or 800 - episodes == 300 or 800 - episodes == 400 or 800 - episodes == 500 or 800 - episodes == 600 or 800 - episodes == 700 or 800 - episodes == 800): print("Fiz " + str(800 - episodes) + " batalhas - SAVING MODEL") agent_player.conn.send([-3, 800 - episodes]) print("Terminei") future.set_result("I'm done!") agent_player.conn.send([-1])
async def evaluating(future, child, opponent): # We define two player configurations. player_1_configuration = PlayerConfiguration("Agent player", None) player_2_configuration = PlayerConfiguration("Enemy player", None) # We create the corresponding players. agent_player = PokeAgent( player_configuration=player_1_configuration, battle_format="gen7letsgorandombattle", server_configuration=LocalhostServerConfiguration, conn=child, train=False ) if opponent == "random": enemy_player = RandomPlayer( player_configuration=player_2_configuration, battle_format="gen7letsgorandombattle", server_configuration=LocalhostServerConfiguration, ) else: enemy_player = AggressivePlayer( player_configuration=player_2_configuration, battle_format="gen7letsgorandombattle", server_configuration=LocalhostServerConfiguration, ) n_battles = 100 won_battles = 0 while n_battles > 0: cross_evaluation = await cross_evaluate( [agent_player, enemy_player], n_challenges=1 ) n_battles -= 1 won_battles += cross_evaluation[agent_player.username][enemy_player.username] print("Agent won {} / 100 battles".format(won_battles)) future.set_result("I'm done!") agent_player.conn.send([-1])
async def cross_evaluation(n_battles, format_, teams): players = [ RandomPlayer( player_configuration=PlayerConfiguration("Player %d" % i, None), battle_format=format_, server_configuration=LocalhostServerConfiguration, max_concurrent_battles=n_battles, team=team, ) for i, team in enumerate(teams) ] await cross_evaluate(players, n_challenges=n_battles) for player in players: await player.stop_listening()
async def simple_cross_evaluation(n_battles, n_concurrent_battles): player_1_configuration = PlayerConfiguration("Player 1", None) player_2_configuration = PlayerConfiguration("Player 2", None) players = [ RandomPlayer( player_configuration=player_1_configuration, battle_format="gen7randombattle", server_configuration=LocalhostServerConfiguration, max_concurrent_battles=n_concurrent_battles, ), MoveCallTrackingRandomPlayer( player_configuration=player_2_configuration, battle_format="gen7randombattle", server_configuration=LocalhostServerConfiguration, max_concurrent_battles=n_concurrent_battles, ), ] await cross_evaluate(players, n_challenges=n_battles) for player in players: await player.stop_listening() return players[-1].move_history
def choose_move(self, battle): ''' Determine the best move given the battle and model :param battle: Pokemon Showdown battle object :return: move (integer index) ''' state = SimpleRLPlayer( player_configuration=PlayerConfiguration("mynameisgillian", "beanscool"), server_configuration=ShowdownServerConfiguration).embed_battle( battle=battle) state = np.array(state).reshape((1, 1, -1)) predictions = self.model.predict([state])[0] action = np.argmax(predictions) return SimpleRLPlayer()._action_to_move(action, battle)
async def main(): player = MaxDamagePlayer(player_configuration=PlayerConfiguration( LOGIN_USERNAME, LOGIN_PASSWORD), server_configuration=ShowdownServerConfiguration) # Sending challenges to 'your_username' # await player.send_challenges("UW_Brock", n_challenges=1) # Playing X games on the ladder await player.ladder(40) # Print the rating of the player and its opponent after each battle for battle in player.battles.values(): print(battle.rating, battle.opponent_rating)
def test_two_successive_calls_gen8(): env_player = RandomEnvPlayer( player_configuration=PlayerConfiguration("EnvPlayerGen8-2", None), battle_format="gen8randombattle", server_configuration=LocalhostServerConfiguration, log_level=20, ) random_player = RandomPlayer( player_configuration=PlayerConfiguration("RandomPlayerGen8-2", None), battle_format="gen8randombattle", server_configuration=LocalhostServerConfiguration, log_level=20, ) env_player.play_against( env_algorithm=play_function, opponent=random_player, env_algorithm_kwargs={"n_battles": 2}, ) env_player.play_against( env_algorithm=play_function, opponent=random_player, env_algorithm_kwargs={"n_battles": 2}, )
async def battle_human(future, child): player_1_configuration = PlayerConfiguration("Agent player", None) agent_player = PokeAgent( player_configuration=player_1_configuration, battle_format="gen7letsgorandombattle", server_configuration=LocalhostServerConfiguration, conn=child, train=False ) await agent_player.accept_challenges(None, 1) future.set_result("I'm done!") agent_player.conn.send([-1])
def __init__(self, name, shortname, team, battle_format="gen8ou", log_level=0, server_configuration=None, save_replays=False): pc = PlayerConfiguration(name, None) super().__init__(team=team, name=name, shortname=shortname, battle_format=battle_format, log_level=log_level, server_configuration=server_configuration, save_replays=save_replays) self.name = name
async def main(): # We create a random player player = MaxDamagePlayer( player_configuration=PlayerConfiguration("USCPokebot", "uscpokebot"), server_configuration=ServerConfiguration( "sim.smogon.com:8000", "https://play.pokemonshowdown.com/action.php?"), ) # Sending challenges to 'your_username' #await player.send_challenges("your_username", n_challenges=1) # Accepting one challenge from any user # await player.accept_challenges(None, 1) # Accepting three challenges from 'your_username' # await player.accept_challenges('your_username', 3) # Playing 5 games on the ladder await player.ladder(5)
async def main(): # We create a random player player = RandomPlayer( player_configuration=PlayerConfiguration("bot_username", "bot_password") server_configuration=ShowdownServerConfiguration, ) # Sending challenges to 'your_username' await player.send_challenges("your_username", n_challenges=1) # Accepting one challenge from any user await player.accept_challenges(None, 1) # Accepting three challenges from 'your_username' await player.accept_challenges('your_username', 3) # Playing 5 games on the ladder await player.ladder(5) # Print the rating of the player and its opponent after each battle for battle in player.battles.values(): print(battle.rating, battle.opponent_rating)
async def train(hparams, fp): wandb.config.update(hparams) p_dict = hparams.policy a_dict = hparams.agent SEngine = importlib.import_module('pokebot.bots.state_engine') se_clazz = getattr(SEngine, hparams.state_engine) se_dict = hparams.se_params if hparams.se_params else {} player = BotPlayer( player_configuration=PlayerConfiguration("test", None), state_engine=se_clazz(**se_dict) ) m_lib = importlib.import_module('pokebot.models') m_clazz = getattr(m_lib, hparams.model) m_dict = hparams.model_params if hparams.model_params else {} model = m_clazz(player, **m_dict) train_lib = importlib.import_module('pokebot.trainers.trainer') t_class = getattr(train_lib, hparams.trainer) t_dict = hparams.trainer_params if hparams.trainer_params else {} trainer = t_class(player, model, p_dict, a_dict, **t_dict) await trainer.train() opponents = [RandomPlayer(battle_format="gen8randombattle"), MaxBasePowerPlayer(battle_format="gen8randombattle"), SimpleHeuristicsPlayer(battle_format="gen8randombattle")] print("Beginning Eval") await trainer.evaluate(opponents) trainer.agent.save_weights(fp)
import asyncio import time import numpy as np from poke_env.player.player import Player from poke_env.player.random_player import RandomPlayer from poke_env.player.utils import cross_evaluate from poke_env.player_configuration import PlayerConfiguration from poke_env.server_configuration import ServerConfiguration from poke_env.server_configuration import ShowdownServerConfiguration my_player_config = PlayerConfiguration("A80VE", "123456") # If your server is accessible at my.custom.host:5432, and your authentication # endpoint is authentication-endpoint.com/action.php? my_server_config = ServerConfiguration( "https://china.psim.us/", "authentication-endpoint.com/action.php?" ) class MaxDamagePlayer(Player): def switch(self, battle): if (len(battle.available_switches) == 0): return self.choose_random_move(battle) max_type_adv = -np.inf switch = None for pokemon in battle.available_switches: curr_type_adv = teampreview_performance( pokemon, battle.opponent_active_pokemon)
dqn.fit(player, nb_steps=nb_steps) player.complete_current_battle() def dqn_evaluation(player, dqn, nb_episodes): # Reset battle statistics player.reset_battles() dqn.test(player, nb_episodes=nb_episodes, visualize=False, verbose=False) print("DQN Evaluation: %d victories out of %d episodes" % (player.n_won_battles, nb_episodes)) if __name__ == "__main__": env_player = SimpleRLPlayer( player_configuration=PlayerConfiguration("RL Player", None), battle_format="gen7randombattle", server_configuration=LocalhostServerConfiguration, ) opponent = RandomPlayer( player_configuration=PlayerConfiguration("Random player", None), battle_format="gen7randombattle", server_configuration=LocalhostServerConfiguration, ) second_opponent = MaxDamagePlayer( player_configuration=PlayerConfiguration("Max damage player", None), battle_format="gen7randombattle", server_configuration=LocalhostServerConfiguration, )
parser = argparse.ArgumentParser() parser.add_argument("--config", type=str, default="filepath") parser.add_argument("--saved_model", type=str, default="filepath") args = parser.parse_args() hyperparameter_defaults = create_config(args.config) hyperparameter_defaults["saved_model"] = args.saved_model wandb.init(config=hyperparameter_defaults) config = wandb.config file_time = str(time.time()) custom_builder = RandomTeamFromPool([teams[config.our_team_name]]) custom_builder2 = RandomTeamFromPool([teams[config.opponent_team_name]]) env_player = BigBoyRLPlayer( player_configuration=PlayerConfiguration("SimpleRLPlayer", None), battle_format="gen8ou", team=custom_builder, server_configuration=LocalhostServerConfiguration, ) opponent = RandomPlayer( player_configuration=PlayerConfiguration("Random player", None), battle_format="gen8ou", team=custom_builder2, server_configuration=LocalhostServerConfiguration, ) second_opponent = MaxDamagePlayer( player_configuration=PlayerConfiguration("Max damage player", None), battle_format="gen8ou",