예제 #1
0
def tournament(model_dir, local_pop_dir, game_path, base_port, num_envs, num_trials, worker_idx, total_workers, reuse_ports=True, level_path=None):
    org_stdout = sys.stdout
    org_stderr = sys.stderr
    my_pop = subset_pop(train.load_pop(model_dir), worker_idx, total_workers)
    results = []
    for i,p in enumerate(my_pop):
        print("Worker", worker_idx, "is starting evaluation of", p, "for", num_trials, "trials per competitor", flush=True)
        sys.stdout = open(model_dir+p+"/tourn_log.txt", 'a')
        sys.stderr = sys.stdout
        p_base_port = base_port if reuse_ports else base_port+(num_envs*i*2)
        j = 0
        last_error = None
        while p_base_port+(j*num_envs*2) < 60000:
            try:
                p_results = eval.evaluate_agent(model_dir, local_pop_dir, p, game_path, p_base_port+(j*num_envs*2), num_envs, num_trials, level_path=level_path)
                break
            except ConnectionError as e:
                print("ConnectionError detected during tournament, trying a higher port range")
                j += 1
                last_error = e
            except ConnectionResetError as e2:
                print("ConnectionResetError detected during tournament, trying a higher port range")
                j += 1
                last_error = e2
            except EOFError as e3:
                print("EOFError detected during tournament, trying higher port range")
                j += 1
                last_error = e3
            except json.decoder.JSONDecodeError as e4:
                print("JSONDecodeError detected during tournament, trying higher port range")
                j += 1
                last_error = e4
        sys.stdout.close()
        sys.stderr.close()
        sys.stdout = org_stdout
        sys.stderr = org_stderr
        if p_base_port+(j*num_envs*2) >= 60000:
            if last_error:
                raise last_error
            else:
                raise ValueError("So there's no last_error, but we got here...?")
        results.append((p,p_results))
        print("Worker", worker_idx, "has completed the evaluation of", p, flush=True)
    return results
예제 #2
0
def get_pop_elos_agents_stats(model_dir, flags):
    pop = load_pop(model_dir)
    main_pop = []
    elos = []
    agents = []
    stats = []
    for p in pop:
        if "nemesis" in p:
            flags["has_nemesis"] = True
            continue
        elif "survivor" in p:
            flags["has_survivor"] = True
            continue
        main_pop.append(p)
        stat = load_stats(model_dir, p)
        elos.append(last_elo(stat))
        agents.append(last_model_path(model_dir, p, stat))
        stats.append(stat)
    return list(zip(main_pop, elos, agents, stats))
예제 #3
0
        else:
            #print(i_stats)
            i_stats["elo"][str(i_stats["last_eval_steps"]
                               )] = last_elo(i_stats) + elo_changes[i]
        i_stats["last_elo_change_steps"] = i_stats["num_steps"]


if __name__ == "__main__":
    # Setup command line arguments
    parser = argparse.ArgumentParser()
    parser.add_argument("model_dir",
                        type=str,
                        help="Base directory for agent models")
    args = parser.parse_args()
    print(args, flush=True)

    if not (args.model_dir[-1] == '/' or args.model_dir[-1] == '\\'):
        args.model_dir = args.model_dir + "/"
    if not os.path.isdir(args.model_dir):
        raise FileNotFoundError(
            "Base directory for agent models is not a folder")

    pop = train.load_pop(args.model_dir)
    print("Consolidating training population:", pop, flush=True)
    all_stats = []
    for p in pop:
        all_stats.append(train.load_stats(args.model_dir, p))
    consolidate_results(pop, all_stats)
    make_elo_changes(pop, all_stats)
    for p, s in zip(pop, all_stats):
        train.save_stats(args.model_dir, p, s)
예제 #4
0
def train_multiple_agents(model_dir,
                          local_pop_dir,
                          game_path,
                          base_port,
                          num_envs,
                          num_steps,
                          worker_idx,
                          total_workers,
                          reuse_ports=True,
                          level_path=None,
                          time_reward=0.):
    org_stdout = sys.stdout
    org_stderr = sys.stderr
    my_pop = subset_pop(train.load_pop(model_dir), worker_idx, total_workers)
    for i, p in enumerate(my_pop):
        print("Worker",
              worker_idx,
              "is starting training of",
              p,
              "for",
              num_steps,
              "steps",
              flush=True)
        sys.stdout = open(model_dir + p + "/train_log.txt", 'a')
        sys.stderr = sys.stdout
        p_base_port = base_port if reuse_ports else base_port + (num_envs * i *
                                                                 2)
        j = 0
        last_error = None
        while p_base_port + (j * num_envs * 2) < 60000:
            try:
                train.train_agent(model_dir,
                                  local_pop_dir,
                                  p,
                                  game_path,
                                  p_base_port + (j * num_envs * 2),
                                  num_envs,
                                  num_steps,
                                  level_path=level_path,
                                  time_reward=time_reward)
                break
            except ConnectionError as e:
                print(
                    "ConnectionError detected during training, trying a higher port range"
                )
                j += 1
                last_error = e
            except ConnectionResetError as e2:
                print(
                    "ConnectionResetError detected during training, trying a higher port range"
                )
                j += 1
                last_error = e2
            except EOFError as e3:
                print(
                    "EOFError detected during training, trying higher port range"
                )
                j += 1
                last_error = e3
            except json.decoder.JSONDecodeError as e4:
                print(
                    "JSONDecodeError detected during training, trying higher port range"
                )
                j += 1
                last_error = e4
        sys.stdout.close()
        sys.stderr.close()
        sys.stdout = org_stdout
        sys.stderr = org_stderr
        if p_base_port + (j * num_envs * 2) >= 60000:
            if last_error:
                raise last_error
            else:
                raise ValueError(
                    "So there's no last_error, but we got here...?")
        print("Worker",
              worker_idx,
              "has completed training of",
              p,
              "for",
              num_steps,
              "steps",
              flush=True)
예제 #5
0
def ai_matchmaking_eval(args):
    PLAYER_1=0
    PLAYER_2=1

    if args.model_dir != '/':
        args.model_dir += '/'
    pop = load_pop(args.model_dir)
    all_stats = {}
    for p in pop:
        all_stats[p] = load_stats(args.model_dir, p)
        
    all_opps = list(all_stats.keys())
    
    all_elos = []
    for opp in all_opps:
        all_elos.append(int(avg_elo(all_stats[opp], avg_len=args.avg_len)))
          
    if args.agent_dir[-1] != '/':
        args.agent_dir += '/'
    if not os.path.exists(args.agent_dir):
        os.mkdir(args.agent_dir)
        
    if ".txt" in args.agent_id:
        with open(args.agent_dir + args.agent_id, 'r') as name_file:
            args.agent_id = name_file.readlines()[0]
    
    agent_stats = load_stats(args.agent_dir, args.agent_id)
    agent = PPO.load(curr_model_path(args.agent_dir, args.agent_id, agent_stats))
    image_based = agent_stats["image_based"]
    env_p = agent_stats["env_p"]
    
    args.num_envs = min(args.num_envs, len(all_opps))
    env_idx_to_opp_idx = [0 for _ in range(args.num_envs)]
        
    try:
        env_stack = make_ai_matchmaker_eval_stack(args.game_path, args.base_port, image_based, args.level_path, env_p, args.num_envs)
        
        for n in range(args.N_games):
            print("Agent Elo before iteration", n, ':', agent_stats["elo"]["value"][-1], flush=True)
            elo_delta = 0.
            for i in range(args.num_envs):
                env_stack.env_method("load_new_opp", 0, opp_fp(args.model_dir, all_opps[i]), 0, indices=[i])
                env_idx_to_opp_idx[i] = i
            del i
            
            states = env_stack.reset()
            envs_done = []
            next_i = args.num_envs
            prog_bar = tqdm(range(len(all_opps)), file=sys.stdout)
            
            while not all([tmp == -1 for tmp in env_idx_to_opp_idx]):
                #print("Worker", args.seed, "current opps:", env_idx_to_opp_idx, flush=True)
                reset_states = env_stack.env_method("reset", indices = envs_done)
                for state,env_idx in zip(reset_states, envs_done):
                    states[env_idx] = state
                envs_done = []
                while len(envs_done) < 1:
                    actions, _ = agent.predict(states)
                    states, _, dones, infos = env_stack.step(actions)
                    if any(dones):
                        for j,done in enumerate(dones):
                            if done:
                                # Record elo change if needed
                                if env_idx_to_opp_idx[j] != -1:
                                    win_rate = .5
                                    if "winner" in infos[j]:
                                        if infos[j]["winner"] == PLAYER_1:
                                            win_rate = 1.
                                        elif infos[j]["winner"] == PLAYER_2:
                                            win_rate = 0.
                                    elo_delta += elo_change(agent_stats["elo"]["value"][-1], all_elos[env_idx_to_opp_idx[j]], args.K, win_rate)[0]
                                    prog_bar.update()
                                # Load next opponent if needed
                                if next_i < len(all_opps):
                                    env_stack.env_method("load_new_opp", 0, opp_fp(args.model_dir, all_opps[next_i]), 0, indices=[j])
                                    env_idx_to_opp_idx[j] = next_i
                                    next_i += 1
                                else:
                                    env_idx_to_opp_idx[j] = -1
                                envs_done.append(j)
                                
            prog_bar.close()
            agent_stats["elo"]["value"][-1] += int(elo_delta)
        print("Final agent Elo:", agent_stats["elo"]["value"][-1], flush=True)
        save_stats(args.agent_dir, args.agent_id, agent_stats)
            
    finally:
        env_stack.close()
def ai_matchmaking(args):
    set_random_seed(args.seed)

    if args.model_dir != '/':
        args.model_dir += '/'
    pop = load_pop(args.model_dir)
    all_stats = {}
    for p in pop:
        all_stats[p] = load_stats(args.model_dir, p)

    all_opps = sorted_keys(all_stats)
    all_opps.reverse()

    all_elos = []
    for opp in all_opps:
        all_elos.append(int(avg_elo(all_stats[opp], avg_len=args.avg_len)))

    if args.agent_dir[-1] != '/':
        args.agent_dir += '/'
    if not os.path.exists(args.agent_dir):
        os.mkdir(args.agent_dir)

    load_agent = True
    if args.agent_id:
        if ".txt" in args.agent_id and os.path.exists(args.agent_dir +
                                                      args.agent_id):
            with open(args.agent_dir + args.agent_id, 'r') as name_file:
                agent_name = name_file.readlines()[0]
            agent_stats = load_stats(args.agent_dir, agent_name)
        elif not ".txt" in args.agent_id:
            agent_name = args.agent_id
            agent_stats = load_stats(args.agent_dir, agent_name)
        else:
            agent_name, agent_stats = init_agent(args)
            load_agent = False
    else:
        agent_name, agent_stats = init_agent(args)
        load_agent = False

    try:
        env_stack = make_ai_matchmaker_stack(
            all_stats,
            all_opps,
            all_elos,
            args.game_path,
            args.model_dir,
            base_port=args.base_port,
            image_based=args.image_based,
            level_path=args.level_path,
            env_p=args.env_p,
            time_reward=args.time_reward,
            num_envs=args.num_envs,
            matchmaking_mode=args.mm,
            win_loss_ratio=args.win_loss_ratio)

        v = 1 if args.verbose else 0
        if load_agent:
            agent = PPO.load(args.agent_dir + agent_name + '/' + agent_name +
                             '_' + str(agent_stats["num_steps"]),
                             env=env_stack)
        else:
            agent = save_new_model(agent_name,
                                   env_stack,
                                   args.num_envs,
                                   args.agent_dir,
                                   image_based=args.image_based,
                                   image_pretrain=args.image_pretrain,
                                   verbose=v,
                                   w=args.w)
        agent.learn(total_timesteps=args.num_steps)

        agent_stats["num_steps"] += args.num_steps
        agent_base = args.agent_dir + agent_name + '/'
        agent_save_path = agent_base + agent_name + '_' + str(
            agent_stats["num_steps"])
        os.system("zip " + agent_base + "archive.zip " + agent_base +
                  "*_*.zip")
        os.system("rm " + agent_base + "*_*.zip")
        agent.save(agent_save_path)

        agent_avg_elo = env_stack.env_method("get_agent_elo")
        agent_avg_elo = sum(agent_avg_elo) / len(agent_avg_elo)

        agent_stats["elo"]["steps"].append(agent_stats["num_steps"])
        agent_stats["elo"]["value"].append(agent_avg_elo)

        save_stats(args.agent_dir, agent_name, agent_stats)
        if not load_agent:
            with open(args.agent_dir + "name_" + str(args.seed) + ".txt",
                      'w') as name_file:
                name_file.write(agent_name)

    finally:
        env_stack.close()
    parser.add_argument("--save_name", type=str, default="traj_dataset.npz", help="Name for file to save a single model's data to")
    parser.add_argument("--max_len", type=int, default=300, help="Max length of any trajectory.")
    parser.add_argument("--worker_idx", type=int, default=1, help="Index of worker (for parallel training)")
    parser.add_argument("--total_workers", type=int, default=1, help="Total number of workers (for parallel training)")
    parser.add_argument("--from_right", action="store_true", help="Indicates that data will be collected where player 1 starts on the right")
    args = parser.parse_args()
    print(args)

    if not (args.model_dir[-1] == '/' or args.model_dir[-1] == '\\'):
        args.model_dir = args.model_dir + "/"
    if not (args.local_pop_dir[-1] == '/' or args.local_pop_dir[-1] == '\\'):
        args.local_pop_dir = args.local_pop_dir + "/"
        
    print("Worker", args.worker_idx, "got here", 1, flush=True)

    pop = load_pop(args.local_pop_dir)
    pop_stats = []
    for opp in pop:
        pop_stats.append(load_stats(args.local_pop_dir, opp))
    my_pop = subset_pop(pop, args.worker_idx, args.total_workers)
    
    print("Worker", args.worker_idx, "got here", 2, flush=True)

    for port,p in enumerate(my_pop):
        p_idx = pop.index(p)
        p_model = PPO.load(curr_model_path(args.local_pop_dir, p, pop_stats[pop.index(p)]))
        traj_set = np.full((len(pop), args.N, args.max_len+1, 12*pop_stats[p_idx]["env_p"], 20*pop_stats[p_idx]["env_p"], 3), 255, dtype=np.uint8)
        info_set = np.full((len(pop), args.N), -1, dtype=np.int16)
        
        print("Worker", args.worker_idx, "got here", 3, flush=True)
        
def human_matchmaking(args):
    WINS = 0
    LOSSES = 1
    GAMES = 2

    pop = load_pop(args.model_dir)
    all_stats = {}
    for p in pop:
        all_stats[p] = load_stats(args.model_dir, p)

    all_opps = sorted_keys(all_stats)
    all_opps.reverse()

    all_elos = []
    for opp in all_opps:
        all_elos.append(int(avg_elo(all_stats[opp], avg_len=args.avg_len)))

    human_stats = get_human_stats(args.human_db)

    current_opp_idx = len(all_elos) // 2
    current_opp = all_opps[current_opp_idx]
    current_opp_elo = all_elos[current_opp_idx]
    human_elo = human_stats["elo"][-1] if len(
        human_stats["elo"]) > 0 else current_opp_elo

    try:
        env = TankEnv(args.game_path,
                      opp_fp_and_elo=[(opp_fp(args.model_dir,
                                              current_opp), current_opp_elo)],
                      game_port=args.base_port,
                      my_port=args.my_port,
                      image_based=args.image_based,
                      level_path=args.level_path,
                      p=args.env_p)

        print("Starting matchmaking")
        while human_elo <= all_elos[-1]:
            print("Current opp:", current_opp)
            print("Opp elo:", current_opp_elo)
            print("Human elo:", human_elo)

            score = play_match(env, args.num_games)
            human_win_rate = (
                (score[WINS] - score[LOSSES]) / sum(score) + 1) / 2
            K = 16
            human_elo_change, _ = elo_change(human_elo, current_opp_elo, K,
                                             human_win_rate)
            human_elo += int(human_elo_change)

            human_stats["elo"].append(human_elo)
            if not current_opp in human_stats["win_rate"]:
                human_stats["win_rate"][current_opp] = [0, 0, 0]
            human_stats["win_rate"][current_opp][WINS] += score[WINS]
            human_stats["win_rate"][current_opp][LOSSES] += score[LOSSES]
            human_stats["win_rate"][current_opp][GAMES] += sum(score)

            D = 5.
            current_opp_idx = elo_based_choice(all_elos, human_elo, D)
            current_opp = all_opps[current_opp_idx]
            current_opp_elo = all_elos[current_opp_idx]
            env.load_new_opp(0, opp_fp(args.model_dir, current_opp),
                             current_opp_elo)

        print("CONGRATS, YOU ARE BETTER THAN ALL THE AGENTS!")

    finally:
        env.close()