def evaluate(agent_profile, agent_new_path, agent_old_path, games_num, experience_path=None, acceptance_rate=0.6, verbose=True, debug=False, max_steps=None, self_play_examples_deque=deque([])): print("Evaluating model with games_num %d and acceptance_rate %f" % (games_num, acceptance_rate)) env_selector = EnvironmentSelector() agent = env_selector.get_agent(agent_profile) agent.set_exploration_enabled(False) agent_profile = env_selector.get_profile(agent_profile) game = env_selector.get_game(agent_profile.game) agents = [] for idx in range(game.get_players_num()): old_agent = agent.clone() old_agent.load(agent_old_path) agents.append(old_agent) agent.load(agent_new_path) agents[0] = agent arena_games_results = [0] * len(agents) arena_examples = [] arena_games_n = int(games_num / game.get_players_num()) world = World() for jdx in range(game.get_players_num()): playing_agents = shift_list(agents, jdx) sess_arena_examples, games_results = world.execute_games(playing_agents, game, arena_games_n, max_game_steps_n=max_steps, verbose=verbose, show_every_turn=debug) games_results = shift_list(games_results, -jdx) for index in range(len(arena_games_results)): arena_games_results[index] += games_results[index] arena_examples.extend(sess_arena_examples) self_play_examples_deque += arena_examples if experience_path: serialize(self_play_examples_deque, experience_path) cur_rewards = arena_games_results[0] other_rewards = sum(arena_games_results) - cur_rewards print("Current agent got rewards: %d\n" "Total reward across all other agents: %d" % (cur_rewards, other_rewards)) updated = (cur_rewards > other_rewards) >= acceptance_rate return updated
def train(agent_profile, agent_path, out_agent_path, memory_path=None, game_memory=None, train_distributed=False, train_distributed_native=False, epochs=1): env_selector = EnvironmentSelector() agent = env_selector.get_agent(agent_profile, native_multi_gpu_enabled=train_distributed_native) if agent_path: agent.load(agent_path) if not game_memory: if not memory_path: print("Error: You must specify either game memory or memory path!") throw_error("Error: You must specify either game memory or memory path!") print("deserializing memory from the memory model...") game_memory = deserialize(memory_path) print("%d steps loaded from memory" % len(game_memory)) print("Initiate training...") agent.train(game_memory, epochs=epochs) print("Training finished!") if train_distributed: import horovod.tensorflow as hvd if hvd.rank() == 0: # save only on the main server agent.save(out_agent_path) else: agent.save(out_agent_path) print("Model saved!")
def generate_self_play(opt_agent_profile, agent_path, games_num, experience_path, max_steps, verbose, debug, exploration_decay_steps, optimize_for_inference=False, self_play_examples_deque=deque([])): world = World() env_selector = EnvironmentSelector() agent = env_selector.get_agent(opt_agent_profile) agent.load(agent_path) agent_profile = env_selector.get_profile(opt_agent_profile) game = env_selector.get_game(agent_profile.game) if optimize_for_inference: agent.disable_training_capability() self_play_examples = world.generate_self_play( agent, game, games_num, max_game_steps_n=max_steps, verbose=verbose, show_every_turn=debug, exploration_decay_steps=exploration_decay_steps) self_play_examples_deque += self_play_examples serialize(self_play_examples_deque, experience_path)
parser.add_argument("--games_num", dest="games_num", default=100, type=int, help="Number of games to play. ") options = parser.parse_args() if not options.folder: parser.error('Folder must be selected') if not options.agent: parser.error('Agent profile must be selected') if not options.random_agent: parser.error('Random agent profile must be selected') env_selector = EnvironmentSelector() agent = env_selector.get_agent(options.agent) print("Pit with agent ", agent.name) agent.set_exploration_enabled(False) random_agent = env_selector.get_agent(options.random_agent) print("Pit with agent ", random_agent.name) random_agent.set_exploration_enabled(False) agent_profile = env_selector.get_profile(options.agent) game = env_selector.get_game(agent_profile.game) world = World() agents = [agent, random_agent]
action='store_true', help="Optimize for inference in self-play and evaluation phases") parser.set_defaults(optimize_for_inference=False) options = parser.parse_args() if not options.agent_profile_new: parser.error('Agent profile must be selected') if not options.agent_profile_old: parser.error('Agent profile must be selected') if not options.games_num: parser.error('Number of games must be selected') env_selector = EnvironmentSelector() agent_first = env_selector.get_agent(options.agent_profile_new) print("Pit with agent ", agent_first.name) agent_first.set_exploration_enabled(False) agent_second = env_selector.get_agent(options.agent_profile_old) print("Pit with agent ", agent_second.name) agent_second.set_exploration_enabled(False) agent_profile = env_selector.get_profile(options.agent_profile_new) game = env_selector.get_game(agent_profile.game) agents = [agent_first, agent_second] if options.agent_new_path: agent_first.load(options.agent_new_path)
def init_nnet(self): env_selector = EnvironmentSelector() agent_nnet = env_selector.get_agent("watten_agent_nnet") self.nnet = agent_nnet