def prepare_game(self): """ Prepares the game by getting the latest parameters of the model from the parent """ self.players = [ self.agent.get_player(), self.agent.get_player(), self.agent.get_player(), self.agent.get_player() ] self.game = Game(self.players)
def evaluate(num_games, players, idx=0, debug=False): print("Starting evaluation...") # players[idx].reset(idx) wins = 0 toss = None for i in range(num_games): game = Game(players, debug, debug) game.initialize(toss) winners = game.play_game() if idx in winners: wins += 1 # toss = winners[0] # wins, _ = players[idx].reset(idx) avg_reward = players[idx].total_reward / num_games print(wins, wins / num_games, avg_reward) return wins / num_games * 100, avg_reward
def evaluate(num_games, players, idx=0, debug=False): """ Evaluate the agent with the given index to count the number of wins of the particular agent """ print("Starting evaluation...") wins = 0 toss = None for i in range(num_games): game = Game(players, debug, debug) game.initialize(toss) winners = game.play_game() if idx in winners: wins += 1 toss = winners[0] avg_reward = 0 print(wins, wins / num_games, avg_reward) return wins / num_games * 100, avg_reward
def test(): agent = DQNAgent() agent.load_model() agent.eval = True players = [agent, agent, agent, agent] game = Game(players) game.initialize() game.play_game()
def experiments(): games = 10000 steps = 0 players = [RandomAgent(0), RandomAgent(1), RandomAgent(2), RandomAgent(3)] for i in range(games): game = Game(players) game.initialize() game.play_game() steps += players[0].get_steps() return steps / games
def evaluate(num_games, players, idx=0, debug=False): print("Starting evaluation...") players[idx].reset(idx) for i in range(num_games): game = Game(players, debug, debug) game.initialize() game.play_game() wins, _ = players[idx].reset(idx) print(wins, wins / num_games) return wins / num_games * 100
def train_dqn(num_games, debug=False): win_rate_radiant = [] win_rate_dire = [] games = [] weak_agent = DQNAgent(False, True) weak_agent.eval = True print("Starting training") agent = DQNAgent(True, True) # to indicate that we want to train the agent agent.save_model("weak") players = [agent, agent, agent, agent] plt.plot(games, win_rate_radiant, win_rate_dire) i = 0 games_i = 0 while 1: game = Game(players, debug, debug) game.initialize() game.play_game() agent.optimize_model() if i % 250 == 0: agent.mirror_models() if i % 300 == 0: print("Total Games: {}".format(i)) if i % 10000 == 0 and i != 0: players[0].save_model("final") agent.eval = True agent.train = False weak_agent.load_model("weak") players3 = [agent, weak_agent, agent, weak_agent] win_rate_r, _ = evaluate(500, players3, 0) players2 = [agent, RandomAgent(), agent, RandomAgent()] win_rate_d, _ = evaluate(500, players2, 0) win_rate_radiant.append(win_rate_r / 100) win_rate_dire.append(win_rate_d / 100) games.append(games_i) plt.plot(games, win_rate_radiant, win_rate_dire) plt.savefig() agent.eval = False agent.train = True if win_rate_r < 50: # if the previous agent beats you, train against that strategy_collapse(players3, agent) games_i += 2500 agent.save_model("weak") i += 1 games_i += 1
def evaluate(num_games, players, idx=0, debug=False): print("Starting evaluation...") players[idx].reset() for i in range(num_games): for player in players: if type(player) == NFSPAgent: player.sample_episode_policy() game = Game(players, debug, debug) game.initialize() game.play_game() wins = players[idx].reset() print(wins, wins / num_games) return wins / num_games * 100
def strategy_collapse(players, agent): """ In order to prevent strategy collapse, we ocassionally train against former version of ourself that beat us in evaluation """ wins = 0 for i in range(2500): game = Game(players) game.initialize() game.play_game() agent.optimize_model() if i % 250 == 0: agent.mirror_models()
def train_nfsp(num_games, debug=False): players = [NFSPAgent(), NFSPAgent(), NFSPAgent(), NFSPAgent()] win_rate_radiant = [] win_rate_dire = [] games = [] for i in range(num_games): for player in players: player.sample_episode_policy() game = Game(players, False, False) game.initialize() game.play_game() # for player in players: # player.optimize_model() if i % 100 == 0: print("Total Games: {}".format(i)) # if i % 1000 == 0: # for player in players: # player.mirror_models() if i % 5000 == 0: players[0].save_model("final") print("Steps done: {}".format(players[0].steps)) if i % 5000 == 0 and i != 0: temp_players_radiant = [ players[0], RandomAgent(1), players[2], RandomAgent(3) ] temp_players_dire = [ RandomAgent(0), players[1], RandomAgent(2), players[3] ] for player in players: player.eval = True win_rate_r = evaluate(100, temp_players_radiant, 0) win_rate_d = evaluate(100, temp_players_dire, 1) games.append(i) win_rate_radiant.append(win_rate_r) win_rate_dire.append(win_rate_d) plt.plot(games, win_rate_radiant, win_rate_dire) plt.savefig() for player in players: player.eval = False plt.savefig() players[0].save_model("final") print("Steps done: {}".format(players[0].steps))
def strategy_collapse(agent, weak_agent, num_games): print("Strategy Collapse...") for i in range(num_games): players = [ agent.get_player(), weak_agent.get_player(False), agent.get_player(), weak_agent.get_player(False) ] game = Game(players) game.initialize() game.play_game() if (i % CONCURRENT_GAMES) == 0: agent.gather_experience() agent.optimize_model() agent.save_model("weak")
def run_game(players): game = Game(players) game.initialize() game.play_game()
def train_a2c(num_games, debug=False): agent = A2CAgent() dqn_agent = DQNAgent(True) dqn_agent.eval = True players = [agent, agent, agent, agent] win_rate_radiant = [] win_rate_dire = [] games = [] rewards = [] wins = 0 win_rate = [] avg_rewards = [] for i in range(num_games): game = Game(players, False, False) game.initialize() game.play_game() if i % 6 == 0: agent.optimize_model() print() # agent.reset(0) # temp_players_radiant = [agent, RandomAgent(1), agent, RandomAgent(3)] # test_game = Game(temp_players_radiant, False, False) # test_game.initialize() # test_game.play_game() # win, reward = agent.reset(0) # wins += win # # rewards.append(reward) # # games.append(i) # rewards.append(reward) # # avg_reward = sum(avg_reward) / 100 # # avg_rewards.append(avg_reward) # agent.clear_trajectory() if i % 100 == 0: print("Total Games: {}".format(i)) # if i % 250 == 0 and i != 0: # games.append(i) # avg_reward = statistics.mean(rewards) # avg_rewards.append(avg_reward) # rewards = [] # win_rate.append(wins / 250) # wins = 0 # plt.plot_reward(games, avg_rewards, win_rate) # plt.savefig() if i % 5000 == 0: players[0].save_model("final") print("Steps done: {}".format(players[0].steps)) if i % 5000 == 0 and i != 0: temp_players_radiant = [ players[0], RandomAgent(), players[2], RandomAgent() ] temp_players_dire = [dqn_agent, players[1], dqn_agent, players[3]] # # for player in players: # # player.eval = True win_rate_r = evaluate(100, temp_players_radiant, 0) win_rate_d = evaluate(100, temp_players_dire, 1) games.append(i) win_rate_radiant.append(win_rate_r) win_rate_dire.append(win_rate_d) plt.plot(games, win_rate_dire, win_rate_radiant) plt.savefig() agent.clear_trajectory() # # for player in players: # # player.eval = False plt.savefig() players[0].save_model("final") print("Steps done: {}".format(players[0].steps))
def train_a2c(num_games, debug=False): agent = PPOAgent() weak_agent = PPOAgent() agent.save_model(0) weak_agent.load_model(0) # dqn_agent = DQNAgent(True) # dqn_agent.eval = True # players = [agent, agent, agent, agent] win_rate_radiant = [] win_rate_dire = [] win_rate_random = [] games = [] rewards = [] wins = 0 win_rate = [] avg_rewards_r = [] avg_rewards_d = [] for i in range(num_games): then = time.time() for _ in range(CONCURRENT_GAMES): # play concurrent games players = [ agent.get_player(), agent.get_player(), agent.get_player(), agent.get_player() ] game = Game(players) game.initialize() game.play_game() agent.gather_experience() agent.optimize_model() print("Time elapsed for {} games: {}".format(CONCURRENT_GAMES, time.time() - then)) # processes = [] # for rank in range(CONCURRENT_GAMES): # players = [ # RandomAgent(0), # RandomAgent(1), # RandomAgent(2), # RandomAgent(3) # ] # p = mp.Process(target=play_game, args=(players,)) # p.start() # processes.append(p) # for p in processes: # p.join() # if (i % 200) == 0: # agent.save_model(i) if (i % 20) == 0 and i != 0: # evaluation time temp_players = [ weak_agent.get_player(False), agent.get_player(False), weak_agent.get_player(False), agent.get_player(False) ] # temp_players_radiant = [ # agent.get_player(False), # dqn_agent, # agent.get_player(False), # dqn_agent # ] temp_players_random = [ agent.get_player(False), RandomAgent(), agent.get_player(False), RandomAgent() ] win_rate_self, _ = evaluate(100, temp_players, 1) # win_rate_r, _ = evaluate(100, temp_players_radiant, 0) win_rate_rand, _ = evaluate(100, temp_players_random, 0) games.append(i) win_rate_radiant.append(win_rate_self / 100) # win_rate_dire.append(win_rate_r/100) win_rate_random.append(win_rate_rand / 100) plt.plot(games, win_rate_radiant, win_rate_dire, win_rate_random) plt.savefig() if win_rate_self < 50: # past self was better, train against that strategy_collapse(agent, weak_agent, CONCURRENT_GAMES * 5) agent.clear_experience() agent.save_model(i) weak_agent.load_model(i) agent.save_model() if (i % 20) == 0: agent.save_model(i) print("Total Games:{}, Total Updates:{}".format( i * CONCURRENT_GAMES, i)) # if (i / CONCURRENT_GAMES) % 100 == 0 and i != 0: # agent.save_model("final") # # print("Steps done: {}".format(players[0].steps)) # print("Total Games: {}".format(i)) # temp_players_radiant = [ # agent.get_player(False), # RandomAgent(1), # agent.get_player(False), # RandomAgent(3)] # temp_players_dire = [ # dqn_agent, # agent.get_player(False), # dqn_agent, # agent.get_player(False)] # # # for player in players: # # # player.eval = True # win_rate_r, reward_r = evaluate(100, temp_players_radiant, 0) # win_rate_d, reward_d = evaluate(100, temp_players_dire, 1) # games.append(i/CONCURRENT_GAMES) # actually updates # # win_rate_radiant.append(win_rate_r) # # win_rate_dire.append(win_rate_d) # avg_rewards_r.append(reward_r) # avg_rewards_d.append(reward_d) # plt.plot_reward(games, avg_rewards_r, avg_rewards_d) # plt.savefig() # agent.clear_experience() # agent.train = True # # for player in players: # # player.eval = False plt.savefig()
def play_game(): players = [RandomAgent(0), HumanAgent(1), RandomAgent(2), RandomAgent(3)] game = Game(players, True, True) game.initialize() game.play_game()
class RungEnv(Process): def __init__(self, pipe) -> None: super(RungEnv, self).__init__() self.pipe: Pipe = pipe self.actor = None self.critic = None self.game = None self.agent = PPOAgent() def get_params(self): """ Gets the parameters of the latest model from the parent process and loads them into the agent """ # print("COMGIN ACTOR") # self.actor = self.queue.get() self.actor = self.pipe.recv() # print("COMING CITIC") # self.critic = self.queue.get() self.critic = self.pipe.recv() # self.actor = self.pipe.recv() # self.critic = self.pipe.recv() self.agent.load_params(self.actor, self.critic) # print("LOADED") def prepare_game(self): """ Prepares the game by getting the latest parameters of the model from the parent """ self.players = [ self.agent.get_player(), self.agent.get_player(), self.agent.get_player(), self.agent.get_player() ] self.game = Game(self.players) def run(self): while True: msg = self.pipe.recv() # print(msg) if msg == "REFRESH": # print("Getting new parameters and starting a new game: ") self.get_params() # print("GOT Params") continue elif msg == "RESET": # print("Starting a new game: ") pass elif msg == "TERMINATE": print("Terminate the environment instance") break # print("Preparing") self.prepare_game() self.game.initialize() self.game.play_game() self.agent.gather_experience() self.send_data() def send_data(self): # print("Game ended") self.pipe.send("END") # self.pipe.send("STATE") self.pipe.send(self.agent.state_batch) # self.pipe.send("ACTION") self.pipe.send(self.agent.action_batch) # self.pipe.send("REWARD") self.pipe.send(self.agent.reward_batch) # self.pipe.send("LOGPROBS") self.pipe.send(self.agent.log_probs_batch) self.agent.clear_experience() pass
def __init__(self, players): self.players = players self.game = Game(players) self.rung_chosen = False self.current_player = None