def tournament(model_dir, local_pop_dir, game_path, base_port, num_envs, num_trials, worker_idx, total_workers, reuse_ports=True, level_path=None): org_stdout = sys.stdout org_stderr = sys.stderr my_pop = subset_pop(train.load_pop(model_dir), worker_idx, total_workers) results = [] for i,p in enumerate(my_pop): print("Worker", worker_idx, "is starting evaluation of", p, "for", num_trials, "trials per competitor", flush=True) sys.stdout = open(model_dir+p+"/tourn_log.txt", 'a') sys.stderr = sys.stdout p_base_port = base_port if reuse_ports else base_port+(num_envs*i*2) j = 0 last_error = None while p_base_port+(j*num_envs*2) < 60000: try: p_results = eval.evaluate_agent(model_dir, local_pop_dir, p, game_path, p_base_port+(j*num_envs*2), num_envs, num_trials, level_path=level_path) break except ConnectionError as e: print("ConnectionError detected during tournament, trying a higher port range") j += 1 last_error = e except ConnectionResetError as e2: print("ConnectionResetError detected during tournament, trying a higher port range") j += 1 last_error = e2 except EOFError as e3: print("EOFError detected during tournament, trying higher port range") j += 1 last_error = e3 except json.decoder.JSONDecodeError as e4: print("JSONDecodeError detected during tournament, trying higher port range") j += 1 last_error = e4 sys.stdout.close() sys.stderr.close() sys.stdout = org_stdout sys.stderr = org_stderr if p_base_port+(j*num_envs*2) >= 60000: if last_error: raise last_error else: raise ValueError("So there's no last_error, but we got here...?") results.append((p,p_results)) print("Worker", worker_idx, "has completed the evaluation of", p, flush=True) return results
def get_pop_elos_agents_stats(model_dir, flags): pop = load_pop(model_dir) main_pop = [] elos = [] agents = [] stats = [] for p in pop: if "nemesis" in p: flags["has_nemesis"] = True continue elif "survivor" in p: flags["has_survivor"] = True continue main_pop.append(p) stat = load_stats(model_dir, p) elos.append(last_elo(stat)) agents.append(last_model_path(model_dir, p, stat)) stats.append(stat) return list(zip(main_pop, elos, agents, stats))
else: #print(i_stats) i_stats["elo"][str(i_stats["last_eval_steps"] )] = last_elo(i_stats) + elo_changes[i] i_stats["last_elo_change_steps"] = i_stats["num_steps"] if __name__ == "__main__": # Setup command line arguments parser = argparse.ArgumentParser() parser.add_argument("model_dir", type=str, help="Base directory for agent models") args = parser.parse_args() print(args, flush=True) if not (args.model_dir[-1] == '/' or args.model_dir[-1] == '\\'): args.model_dir = args.model_dir + "/" if not os.path.isdir(args.model_dir): raise FileNotFoundError( "Base directory for agent models is not a folder") pop = train.load_pop(args.model_dir) print("Consolidating training population:", pop, flush=True) all_stats = [] for p in pop: all_stats.append(train.load_stats(args.model_dir, p)) consolidate_results(pop, all_stats) make_elo_changes(pop, all_stats) for p, s in zip(pop, all_stats): train.save_stats(args.model_dir, p, s)
def train_multiple_agents(model_dir, local_pop_dir, game_path, base_port, num_envs, num_steps, worker_idx, total_workers, reuse_ports=True, level_path=None, time_reward=0.): org_stdout = sys.stdout org_stderr = sys.stderr my_pop = subset_pop(train.load_pop(model_dir), worker_idx, total_workers) for i, p in enumerate(my_pop): print("Worker", worker_idx, "is starting training of", p, "for", num_steps, "steps", flush=True) sys.stdout = open(model_dir + p + "/train_log.txt", 'a') sys.stderr = sys.stdout p_base_port = base_port if reuse_ports else base_port + (num_envs * i * 2) j = 0 last_error = None while p_base_port + (j * num_envs * 2) < 60000: try: train.train_agent(model_dir, local_pop_dir, p, game_path, p_base_port + (j * num_envs * 2), num_envs, num_steps, level_path=level_path, time_reward=time_reward) break except ConnectionError as e: print( "ConnectionError detected during training, trying a higher port range" ) j += 1 last_error = e except ConnectionResetError as e2: print( "ConnectionResetError detected during training, trying a higher port range" ) j += 1 last_error = e2 except EOFError as e3: print( "EOFError detected during training, trying higher port range" ) j += 1 last_error = e3 except json.decoder.JSONDecodeError as e4: print( "JSONDecodeError detected during training, trying higher port range" ) j += 1 last_error = e4 sys.stdout.close() sys.stderr.close() sys.stdout = org_stdout sys.stderr = org_stderr if p_base_port + (j * num_envs * 2) >= 60000: if last_error: raise last_error else: raise ValueError( "So there's no last_error, but we got here...?") print("Worker", worker_idx, "has completed training of", p, "for", num_steps, "steps", flush=True)
def ai_matchmaking_eval(args): PLAYER_1=0 PLAYER_2=1 if args.model_dir != '/': args.model_dir += '/' pop = load_pop(args.model_dir) all_stats = {} for p in pop: all_stats[p] = load_stats(args.model_dir, p) all_opps = list(all_stats.keys()) all_elos = [] for opp in all_opps: all_elos.append(int(avg_elo(all_stats[opp], avg_len=args.avg_len))) if args.agent_dir[-1] != '/': args.agent_dir += '/' if not os.path.exists(args.agent_dir): os.mkdir(args.agent_dir) if ".txt" in args.agent_id: with open(args.agent_dir + args.agent_id, 'r') as name_file: args.agent_id = name_file.readlines()[0] agent_stats = load_stats(args.agent_dir, args.agent_id) agent = PPO.load(curr_model_path(args.agent_dir, args.agent_id, agent_stats)) image_based = agent_stats["image_based"] env_p = agent_stats["env_p"] args.num_envs = min(args.num_envs, len(all_opps)) env_idx_to_opp_idx = [0 for _ in range(args.num_envs)] try: env_stack = make_ai_matchmaker_eval_stack(args.game_path, args.base_port, image_based, args.level_path, env_p, args.num_envs) for n in range(args.N_games): print("Agent Elo before iteration", n, ':', agent_stats["elo"]["value"][-1], flush=True) elo_delta = 0. for i in range(args.num_envs): env_stack.env_method("load_new_opp", 0, opp_fp(args.model_dir, all_opps[i]), 0, indices=[i]) env_idx_to_opp_idx[i] = i del i states = env_stack.reset() envs_done = [] next_i = args.num_envs prog_bar = tqdm(range(len(all_opps)), file=sys.stdout) while not all([tmp == -1 for tmp in env_idx_to_opp_idx]): #print("Worker", args.seed, "current opps:", env_idx_to_opp_idx, flush=True) reset_states = env_stack.env_method("reset", indices = envs_done) for state,env_idx in zip(reset_states, envs_done): states[env_idx] = state envs_done = [] while len(envs_done) < 1: actions, _ = agent.predict(states) states, _, dones, infos = env_stack.step(actions) if any(dones): for j,done in enumerate(dones): if done: # Record elo change if needed if env_idx_to_opp_idx[j] != -1: win_rate = .5 if "winner" in infos[j]: if infos[j]["winner"] == PLAYER_1: win_rate = 1. elif infos[j]["winner"] == PLAYER_2: win_rate = 0. elo_delta += elo_change(agent_stats["elo"]["value"][-1], all_elos[env_idx_to_opp_idx[j]], args.K, win_rate)[0] prog_bar.update() # Load next opponent if needed if next_i < len(all_opps): env_stack.env_method("load_new_opp", 0, opp_fp(args.model_dir, all_opps[next_i]), 0, indices=[j]) env_idx_to_opp_idx[j] = next_i next_i += 1 else: env_idx_to_opp_idx[j] = -1 envs_done.append(j) prog_bar.close() agent_stats["elo"]["value"][-1] += int(elo_delta) print("Final agent Elo:", agent_stats["elo"]["value"][-1], flush=True) save_stats(args.agent_dir, args.agent_id, agent_stats) finally: env_stack.close()
def ai_matchmaking(args): set_random_seed(args.seed) if args.model_dir != '/': args.model_dir += '/' pop = load_pop(args.model_dir) all_stats = {} for p in pop: all_stats[p] = load_stats(args.model_dir, p) all_opps = sorted_keys(all_stats) all_opps.reverse() all_elos = [] for opp in all_opps: all_elos.append(int(avg_elo(all_stats[opp], avg_len=args.avg_len))) if args.agent_dir[-1] != '/': args.agent_dir += '/' if not os.path.exists(args.agent_dir): os.mkdir(args.agent_dir) load_agent = True if args.agent_id: if ".txt" in args.agent_id and os.path.exists(args.agent_dir + args.agent_id): with open(args.agent_dir + args.agent_id, 'r') as name_file: agent_name = name_file.readlines()[0] agent_stats = load_stats(args.agent_dir, agent_name) elif not ".txt" in args.agent_id: agent_name = args.agent_id agent_stats = load_stats(args.agent_dir, agent_name) else: agent_name, agent_stats = init_agent(args) load_agent = False else: agent_name, agent_stats = init_agent(args) load_agent = False try: env_stack = make_ai_matchmaker_stack( all_stats, all_opps, all_elos, args.game_path, args.model_dir, base_port=args.base_port, image_based=args.image_based, level_path=args.level_path, env_p=args.env_p, time_reward=args.time_reward, num_envs=args.num_envs, matchmaking_mode=args.mm, win_loss_ratio=args.win_loss_ratio) v = 1 if args.verbose else 0 if load_agent: agent = PPO.load(args.agent_dir + agent_name + '/' + agent_name + '_' + str(agent_stats["num_steps"]), env=env_stack) else: agent = save_new_model(agent_name, env_stack, args.num_envs, args.agent_dir, image_based=args.image_based, image_pretrain=args.image_pretrain, verbose=v, w=args.w) agent.learn(total_timesteps=args.num_steps) agent_stats["num_steps"] += args.num_steps agent_base = args.agent_dir + agent_name + '/' agent_save_path = agent_base + agent_name + '_' + str( agent_stats["num_steps"]) os.system("zip " + agent_base + "archive.zip " + agent_base + "*_*.zip") os.system("rm " + agent_base + "*_*.zip") agent.save(agent_save_path) agent_avg_elo = env_stack.env_method("get_agent_elo") agent_avg_elo = sum(agent_avg_elo) / len(agent_avg_elo) agent_stats["elo"]["steps"].append(agent_stats["num_steps"]) agent_stats["elo"]["value"].append(agent_avg_elo) save_stats(args.agent_dir, agent_name, agent_stats) if not load_agent: with open(args.agent_dir + "name_" + str(args.seed) + ".txt", 'w') as name_file: name_file.write(agent_name) finally: env_stack.close()
parser.add_argument("--save_name", type=str, default="traj_dataset.npz", help="Name for file to save a single model's data to") parser.add_argument("--max_len", type=int, default=300, help="Max length of any trajectory.") parser.add_argument("--worker_idx", type=int, default=1, help="Index of worker (for parallel training)") parser.add_argument("--total_workers", type=int, default=1, help="Total number of workers (for parallel training)") parser.add_argument("--from_right", action="store_true", help="Indicates that data will be collected where player 1 starts on the right") args = parser.parse_args() print(args) if not (args.model_dir[-1] == '/' or args.model_dir[-1] == '\\'): args.model_dir = args.model_dir + "/" if not (args.local_pop_dir[-1] == '/' or args.local_pop_dir[-1] == '\\'): args.local_pop_dir = args.local_pop_dir + "/" print("Worker", args.worker_idx, "got here", 1, flush=True) pop = load_pop(args.local_pop_dir) pop_stats = [] for opp in pop: pop_stats.append(load_stats(args.local_pop_dir, opp)) my_pop = subset_pop(pop, args.worker_idx, args.total_workers) print("Worker", args.worker_idx, "got here", 2, flush=True) for port,p in enumerate(my_pop): p_idx = pop.index(p) p_model = PPO.load(curr_model_path(args.local_pop_dir, p, pop_stats[pop.index(p)])) traj_set = np.full((len(pop), args.N, args.max_len+1, 12*pop_stats[p_idx]["env_p"], 20*pop_stats[p_idx]["env_p"], 3), 255, dtype=np.uint8) info_set = np.full((len(pop), args.N), -1, dtype=np.int16) print("Worker", args.worker_idx, "got here", 3, flush=True)
def human_matchmaking(args): WINS = 0 LOSSES = 1 GAMES = 2 pop = load_pop(args.model_dir) all_stats = {} for p in pop: all_stats[p] = load_stats(args.model_dir, p) all_opps = sorted_keys(all_stats) all_opps.reverse() all_elos = [] for opp in all_opps: all_elos.append(int(avg_elo(all_stats[opp], avg_len=args.avg_len))) human_stats = get_human_stats(args.human_db) current_opp_idx = len(all_elos) // 2 current_opp = all_opps[current_opp_idx] current_opp_elo = all_elos[current_opp_idx] human_elo = human_stats["elo"][-1] if len( human_stats["elo"]) > 0 else current_opp_elo try: env = TankEnv(args.game_path, opp_fp_and_elo=[(opp_fp(args.model_dir, current_opp), current_opp_elo)], game_port=args.base_port, my_port=args.my_port, image_based=args.image_based, level_path=args.level_path, p=args.env_p) print("Starting matchmaking") while human_elo <= all_elos[-1]: print("Current opp:", current_opp) print("Opp elo:", current_opp_elo) print("Human elo:", human_elo) score = play_match(env, args.num_games) human_win_rate = ( (score[WINS] - score[LOSSES]) / sum(score) + 1) / 2 K = 16 human_elo_change, _ = elo_change(human_elo, current_opp_elo, K, human_win_rate) human_elo += int(human_elo_change) human_stats["elo"].append(human_elo) if not current_opp in human_stats["win_rate"]: human_stats["win_rate"][current_opp] = [0, 0, 0] human_stats["win_rate"][current_opp][WINS] += score[WINS] human_stats["win_rate"][current_opp][LOSSES] += score[LOSSES] human_stats["win_rate"][current_opp][GAMES] += sum(score) D = 5. current_opp_idx = elo_based_choice(all_elos, human_elo, D) current_opp = all_opps[current_opp_idx] current_opp_elo = all_elos[current_opp_idx] env.load_new_opp(0, opp_fp(args.model_dir, current_opp), current_opp_elo) print("CONGRATS, YOU ARE BETTER THAN ALL THE AGENTS!") finally: env.close()