예제 #1
0
def get_pop_elos_agents_stats(model_dir, flags):
    pop = load_pop(model_dir)
    main_pop = []
    elos = []
    agents = []
    stats = []
    for p in pop:
        if "nemesis" in p:
            flags["has_nemesis"] = True
            continue
        elif "survivor" in p:
            flags["has_survivor"] = True
            continue
        main_pop.append(p)
        stat = load_stats(model_dir, p)
        elos.append(last_elo(stat))
        agents.append(last_model_path(model_dir, p, stat))
        stats.append(stat)
    return list(zip(main_pop, elos, agents, stats))
예제 #2
0
        else:
            #print(i_stats)
            i_stats["elo"][str(i_stats["last_eval_steps"]
                               )] = last_elo(i_stats) + elo_changes[i]
        i_stats["last_elo_change_steps"] = i_stats["num_steps"]


if __name__ == "__main__":
    # Setup command line arguments
    parser = argparse.ArgumentParser()
    parser.add_argument("model_dir",
                        type=str,
                        help="Base directory for agent models")
    args = parser.parse_args()
    print(args, flush=True)

    if not (args.model_dir[-1] == '/' or args.model_dir[-1] == '\\'):
        args.model_dir = args.model_dir + "/"
    if not os.path.isdir(args.model_dir):
        raise FileNotFoundError(
            "Base directory for agent models is not a folder")

    pop = train.load_pop(args.model_dir)
    print("Consolidating training population:", pop, flush=True)
    all_stats = []
    for p in pop:
        all_stats.append(train.load_stats(args.model_dir, p))
    consolidate_results(pop, all_stats)
    make_elo_changes(pop, all_stats)
    for p, s in zip(pop, all_stats):
        train.save_stats(args.model_dir, p, s)
예제 #3
0
def ai_matchmaking_eval(args):
    PLAYER_1=0
    PLAYER_2=1

    if args.model_dir != '/':
        args.model_dir += '/'
    pop = load_pop(args.model_dir)
    all_stats = {}
    for p in pop:
        all_stats[p] = load_stats(args.model_dir, p)
        
    all_opps = list(all_stats.keys())
    
    all_elos = []
    for opp in all_opps:
        all_elos.append(int(avg_elo(all_stats[opp], avg_len=args.avg_len)))
          
    if args.agent_dir[-1] != '/':
        args.agent_dir += '/'
    if not os.path.exists(args.agent_dir):
        os.mkdir(args.agent_dir)
        
    if ".txt" in args.agent_id:
        with open(args.agent_dir + args.agent_id, 'r') as name_file:
            args.agent_id = name_file.readlines()[0]
    
    agent_stats = load_stats(args.agent_dir, args.agent_id)
    agent = PPO.load(curr_model_path(args.agent_dir, args.agent_id, agent_stats))
    image_based = agent_stats["image_based"]
    env_p = agent_stats["env_p"]
    
    args.num_envs = min(args.num_envs, len(all_opps))
    env_idx_to_opp_idx = [0 for _ in range(args.num_envs)]
        
    try:
        env_stack = make_ai_matchmaker_eval_stack(args.game_path, args.base_port, image_based, args.level_path, env_p, args.num_envs)
        
        for n in range(args.N_games):
            print("Agent Elo before iteration", n, ':', agent_stats["elo"]["value"][-1], flush=True)
            elo_delta = 0.
            for i in range(args.num_envs):
                env_stack.env_method("load_new_opp", 0, opp_fp(args.model_dir, all_opps[i]), 0, indices=[i])
                env_idx_to_opp_idx[i] = i
            del i
            
            states = env_stack.reset()
            envs_done = []
            next_i = args.num_envs
            prog_bar = tqdm(range(len(all_opps)), file=sys.stdout)
            
            while not all([tmp == -1 for tmp in env_idx_to_opp_idx]):
                #print("Worker", args.seed, "current opps:", env_idx_to_opp_idx, flush=True)
                reset_states = env_stack.env_method("reset", indices = envs_done)
                for state,env_idx in zip(reset_states, envs_done):
                    states[env_idx] = state
                envs_done = []
                while len(envs_done) < 1:
                    actions, _ = agent.predict(states)
                    states, _, dones, infos = env_stack.step(actions)
                    if any(dones):
                        for j,done in enumerate(dones):
                            if done:
                                # Record elo change if needed
                                if env_idx_to_opp_idx[j] != -1:
                                    win_rate = .5
                                    if "winner" in infos[j]:
                                        if infos[j]["winner"] == PLAYER_1:
                                            win_rate = 1.
                                        elif infos[j]["winner"] == PLAYER_2:
                                            win_rate = 0.
                                    elo_delta += elo_change(agent_stats["elo"]["value"][-1], all_elos[env_idx_to_opp_idx[j]], args.K, win_rate)[0]
                                    prog_bar.update()
                                # Load next opponent if needed
                                if next_i < len(all_opps):
                                    env_stack.env_method("load_new_opp", 0, opp_fp(args.model_dir, all_opps[next_i]), 0, indices=[j])
                                    env_idx_to_opp_idx[j] = next_i
                                    next_i += 1
                                else:
                                    env_idx_to_opp_idx[j] = -1
                                envs_done.append(j)
                                
            prog_bar.close()
            agent_stats["elo"]["value"][-1] += int(elo_delta)
        print("Final agent Elo:", agent_stats["elo"]["value"][-1], flush=True)
        save_stats(args.agent_dir, args.agent_id, agent_stats)
            
    finally:
        env_stack.close()
예제 #4
0
def record_results(model_dir, agent_id, results):
    agent_stats = train.load_stats(model_dir, agent_id)
    agent_stats["last_eval_steps"] = agent_stats["num_steps"]
    agent_stats["performance"][str(agent_stats["num_steps"])] = results
    train.save_stats(model_dir, agent_id, agent_stats)
def ai_matchmaking(args):
    set_random_seed(args.seed)

    if args.model_dir != '/':
        args.model_dir += '/'
    pop = load_pop(args.model_dir)
    all_stats = {}
    for p in pop:
        all_stats[p] = load_stats(args.model_dir, p)

    all_opps = sorted_keys(all_stats)
    all_opps.reverse()

    all_elos = []
    for opp in all_opps:
        all_elos.append(int(avg_elo(all_stats[opp], avg_len=args.avg_len)))

    if args.agent_dir[-1] != '/':
        args.agent_dir += '/'
    if not os.path.exists(args.agent_dir):
        os.mkdir(args.agent_dir)

    load_agent = True
    if args.agent_id:
        if ".txt" in args.agent_id and os.path.exists(args.agent_dir +
                                                      args.agent_id):
            with open(args.agent_dir + args.agent_id, 'r') as name_file:
                agent_name = name_file.readlines()[0]
            agent_stats = load_stats(args.agent_dir, agent_name)
        elif not ".txt" in args.agent_id:
            agent_name = args.agent_id
            agent_stats = load_stats(args.agent_dir, agent_name)
        else:
            agent_name, agent_stats = init_agent(args)
            load_agent = False
    else:
        agent_name, agent_stats = init_agent(args)
        load_agent = False

    try:
        env_stack = make_ai_matchmaker_stack(
            all_stats,
            all_opps,
            all_elos,
            args.game_path,
            args.model_dir,
            base_port=args.base_port,
            image_based=args.image_based,
            level_path=args.level_path,
            env_p=args.env_p,
            time_reward=args.time_reward,
            num_envs=args.num_envs,
            matchmaking_mode=args.mm,
            win_loss_ratio=args.win_loss_ratio)

        v = 1 if args.verbose else 0
        if load_agent:
            agent = PPO.load(args.agent_dir + agent_name + '/' + agent_name +
                             '_' + str(agent_stats["num_steps"]),
                             env=env_stack)
        else:
            agent = save_new_model(agent_name,
                                   env_stack,
                                   args.num_envs,
                                   args.agent_dir,
                                   image_based=args.image_based,
                                   image_pretrain=args.image_pretrain,
                                   verbose=v,
                                   w=args.w)
        agent.learn(total_timesteps=args.num_steps)

        agent_stats["num_steps"] += args.num_steps
        agent_base = args.agent_dir + agent_name + '/'
        agent_save_path = agent_base + agent_name + '_' + str(
            agent_stats["num_steps"])
        os.system("zip " + agent_base + "archive.zip " + agent_base +
                  "*_*.zip")
        os.system("rm " + agent_base + "*_*.zip")
        agent.save(agent_save_path)

        agent_avg_elo = env_stack.env_method("get_agent_elo")
        agent_avg_elo = sum(agent_avg_elo) / len(agent_avg_elo)

        agent_stats["elo"]["steps"].append(agent_stats["num_steps"])
        agent_stats["elo"]["value"].append(agent_avg_elo)

        save_stats(args.agent_dir, agent_name, agent_stats)
        if not load_agent:
            with open(args.agent_dir + "name_" + str(args.seed) + ".txt",
                      'w') as name_file:
                name_file.write(agent_name)

    finally:
        env_stack.close()
def record_results(model_dir, agent_id, results):
    agent_stats = train.load_stats(model_dir, agent_id)
    agent_stats["win_rates"][str(agent_stats["curr_iter"])] = results
    agent_stats["curr_iter"] += 1
    train.save_stats(model_dir, agent_id, agent_stats)
    parser.add_argument("--total_workers", type=int, default=1, help="Total number of workers (for parallel training)")
    parser.add_argument("--from_right", action="store_true", help="Indicates that data will be collected where player 1 starts on the right")
    args = parser.parse_args()
    print(args)

    if not (args.model_dir[-1] == '/' or args.model_dir[-1] == '\\'):
        args.model_dir = args.model_dir + "/"
    if not (args.local_pop_dir[-1] == '/' or args.local_pop_dir[-1] == '\\'):
        args.local_pop_dir = args.local_pop_dir + "/"
        
    print("Worker", args.worker_idx, "got here", 1, flush=True)

    pop = load_pop(args.local_pop_dir)
    pop_stats = []
    for opp in pop:
        pop_stats.append(load_stats(args.local_pop_dir, opp))
    my_pop = subset_pop(pop, args.worker_idx, args.total_workers)
    
    print("Worker", args.worker_idx, "got here", 2, flush=True)

    for port,p in enumerate(my_pop):
        p_idx = pop.index(p)
        p_model = PPO.load(curr_model_path(args.local_pop_dir, p, pop_stats[pop.index(p)]))
        traj_set = np.full((len(pop), args.N, args.max_len+1, 12*pop_stats[p_idx]["env_p"], 20*pop_stats[p_idx]["env_p"], 3), 255, dtype=np.uint8)
        info_set = np.full((len(pop), args.N), -1, dtype=np.int16)
        
        print("Worker", args.worker_idx, "got here", 3, flush=True)
        
        try:
            env = TankEnv(args.game_path, 
                opp_fp_and_elo=[], 
def human_matchmaking(args):
    WINS = 0
    LOSSES = 1
    GAMES = 2

    pop = load_pop(args.model_dir)
    all_stats = {}
    for p in pop:
        all_stats[p] = load_stats(args.model_dir, p)

    all_opps = sorted_keys(all_stats)
    all_opps.reverse()

    all_elos = []
    for opp in all_opps:
        all_elos.append(int(avg_elo(all_stats[opp], avg_len=args.avg_len)))

    human_stats = get_human_stats(args.human_db)

    current_opp_idx = len(all_elos) // 2
    current_opp = all_opps[current_opp_idx]
    current_opp_elo = all_elos[current_opp_idx]
    human_elo = human_stats["elo"][-1] if len(
        human_stats["elo"]) > 0 else current_opp_elo

    try:
        env = TankEnv(args.game_path,
                      opp_fp_and_elo=[(opp_fp(args.model_dir,
                                              current_opp), current_opp_elo)],
                      game_port=args.base_port,
                      my_port=args.my_port,
                      image_based=args.image_based,
                      level_path=args.level_path,
                      p=args.env_p)

        print("Starting matchmaking")
        while human_elo <= all_elos[-1]:
            print("Current opp:", current_opp)
            print("Opp elo:", current_opp_elo)
            print("Human elo:", human_elo)

            score = play_match(env, args.num_games)
            human_win_rate = (
                (score[WINS] - score[LOSSES]) / sum(score) + 1) / 2
            K = 16
            human_elo_change, _ = elo_change(human_elo, current_opp_elo, K,
                                             human_win_rate)
            human_elo += int(human_elo_change)

            human_stats["elo"].append(human_elo)
            if not current_opp in human_stats["win_rate"]:
                human_stats["win_rate"][current_opp] = [0, 0, 0]
            human_stats["win_rate"][current_opp][WINS] += score[WINS]
            human_stats["win_rate"][current_opp][LOSSES] += score[LOSSES]
            human_stats["win_rate"][current_opp][GAMES] += sum(score)

            D = 5.
            current_opp_idx = elo_based_choice(all_elos, human_elo, D)
            current_opp = all_opps[current_opp_idx]
            current_opp_elo = all_elos[current_opp_idx]
            env.load_new_opp(0, opp_fp(args.model_dir, current_opp),
                             current_opp_elo)

        print("CONGRATS, YOU ARE BETTER THAN ALL THE AGENTS!")

    finally:
        env.close()