예제 #1
0
def make_env_stack(num_envs,
                   game_path,
                   base_port,
                   game_log_path,
                   opp_fp_and_elo,
                   trainee_elo,
                   elo_match=True,
                   survivor=False,
                   stdout_path=None,
                   level_path=None,
                   image_based=False,
                   time_reward=0.,
                   env_p=3):
    if num_envs >= 1:
        envs = []
        for i in range(num_envs):
            envs.append(lambda game_path=game_path, b=base_port +
                        (i * 2), c=game_log_path.replace(
                            ".txt", "-" + str(i) + ".txt"), d=opp_fp_and_elo, e
                        =elo_match, f=trainee_elo, g=survivor, h=stdout_path.
                        replace(".txt", "-" + str(i) + ".txt"), i=level_path, j
                        =image_based, k=time_reward: TankEnv(game_path,
                                                             game_port=b,
                                                             game_log_path=c,
                                                             opp_fp_and_elo=d,
                                                             elo_match=e,
                                                             center_elo=f,
                                                             survivor=g,
                                                             stdout_path=h,
                                                             verbose=True,
                                                             level_path=i,
                                                             image_based=j,
                                                             time_reward=k,
                                                             p=env_p))
        if num_envs == 1:
            env_stack = SubprocVecEnv(envs, start_method="fork")
        else:
            env_stack = SubprocVecEnv(envs, start_method="forkserver")
        env_stack.reset()
        return env_stack
    else:
        env = TankEnv(game_path,
                      game_port=base_port,
                      game_log_path=game_log_path,
                      opp_fp_and_elo=opp_fp_and_elo,
                      elo_match=elo_match,
                      center_elo=trainee_elo,
                      survivor=survivor,
                      stdout_path=stdout_path,
                      level_path=level_path,
                      image_based=image_based,
                      time_reward=time_reward,
                      p=env_p)
        env.reset()
        return env
    def __init__(self,
                 all_stats,
                 all_opps,
                 all_elos,
                 game_path,
                 model_dir,
                 base_port=50000,
                 my_port=50001,
                 image_based=False,
                 level_path=None,
                 env_p=3,
                 starting_elo=None,
                 K=16,
                 D=5.,
                 time_reward=-0.003,
                 matchmaking_mode=0,
                 elo_log_interval=10000,
                 win_loss_ratio=[0, 0]):
        super(AIMatchmaker, self).__init__()

        self.all_stats = combine_winrates(all_stats)
        self.all_opps = all_opps
        self.all_elos = all_elos
        self.model_dir = model_dir

        self.agent_elo = starting_elo if starting_elo != None else self.all_elos[
            0]
        self.env = TankEnv(game_path,
                           opp_fp_and_elo=[],
                           game_port=base_port,
                           my_port=my_port,
                           image_based=image_based,
                           level_path=level_path,
                           p=env_p,
                           time_reward=time_reward)
        self.action_space = self.env.action_space
        self.observation_space = self.env.observation_space

        self.K = K
        self.D = D
        self.my_port = my_port
        self.mm = matchmaking_mode

        self.uncounted_games = np.array([0, 0], dtype=np.uint32)
        self.counted_game_sets = 0
        self.win_loss_ratio = np.array(win_loss_ratio, dtype=np.uint32)

        self.started = False
        self.next_opp()

        self.elo_log_interval = elo_log_interval
        self.num_steps = 0
        self.elo_log = []
예제 #3
0
def run_model(args):
    env = TankEnv(args.game_path,
                  opp_fp_and_elo=[(args.opp, 1000)],
                  game_port=args.base_port,
                  my_port=args.my_port,
                  image_based=args.image_based,
                  level_path=args.level_path,
                  rand_opp=args.rand_opp,
                  p=args.env_p,
                  opp_p=args.opp_env_p)
    model = None
    if args.p1:
        model = PPO.load(args.p1)
    elif args.p1same:
        model = PPO.load(args.opp)

    score = [0, 0, 0]
    print("Score: [Player1 Wins, Player2 Wins, Ties]")

    obs = env.reset()
    if args.image_based and (args.ai_view or args.rev_ai_view):
        fig = plt.gcf()
        fig.show()
        fig.canvas.draw()
    while True:
        if args.image_based and (args.ai_view or args.rev_ai_view):
            if not args.rev_ai_view:
                plt.imshow(obs, origin="lower")
            else:
                plt.imshow(env.opp_state, origin="lower")
            fig.canvas.draw()
        if model:
            action, _ = model.predict(obs)
        elif args.rand_p1:
            action = np.random.rand(5) * 2 - 1
        else:
            action = np.zeros(5, dtype=np.float32)
        obs, reward, done, info = env.step(action)
        if done:
            score[info["winner"]] += 1
            print("Score:", score)
            obs = env.reset()
예제 #4
0
def make_ai_matchmaker_eval_stack(game_path, base_port, image_based, level_path, env_p, num_envs):
        envs = []
        for i in range(num_envs):
            envs.append(
                lambda a=game_path, b=base_port+(i*2), c=base_port+(i*2)+1, d=image_based, e=level_path, f=env_p: 
                    TankEnv(a, 
                        opp_fp_and_elo=[], 
                        game_port=b, 
                        my_port=c, 
                        elo_match=False,
                        image_based=d,
                        level_path=e,
                        p=f
                    )
            )
        env_stack = SubprocVecEnv(envs, start_method="fork")
        return env_stack
class AIMatchmaker(gym.Env):
    metadata = {'render.modes': None}

    def __init__(self,
                 all_stats,
                 all_opps,
                 all_elos,
                 game_path,
                 model_dir,
                 base_port=50000,
                 my_port=50001,
                 image_based=False,
                 level_path=None,
                 env_p=3,
                 starting_elo=None,
                 K=16,
                 D=5.,
                 time_reward=-0.003,
                 matchmaking_mode=0,
                 elo_log_interval=10000,
                 win_loss_ratio=[0, 0]):
        super(AIMatchmaker, self).__init__()

        self.all_stats = combine_winrates(all_stats)
        self.all_opps = all_opps
        self.all_elos = all_elos
        self.model_dir = model_dir

        self.agent_elo = starting_elo if starting_elo != None else self.all_elos[
            0]
        self.env = TankEnv(game_path,
                           opp_fp_and_elo=[],
                           game_port=base_port,
                           my_port=my_port,
                           image_based=image_based,
                           level_path=level_path,
                           p=env_p,
                           time_reward=time_reward)
        self.action_space = self.env.action_space
        self.observation_space = self.env.observation_space

        self.K = K
        self.D = D
        self.my_port = my_port
        self.mm = matchmaking_mode

        self.uncounted_games = np.array([0, 0], dtype=np.uint32)
        self.counted_game_sets = 0
        self.win_loss_ratio = np.array(win_loss_ratio, dtype=np.uint32)

        self.started = False
        self.next_opp()

        self.elo_log_interval = elo_log_interval
        self.num_steps = 0
        self.elo_log = []

    def next_opp(self):
        weights = np.zeros((len(self.all_elos)), dtype=np.float32)
        if self.mm == 1:
            # ELO based matchmaking, where ELOs closer to agent ELo is prefered (but not guarenteed)
            weights += np.array([
                weight_func(elo - self.agent_elo, self.D)
                for elo in self.all_elos
            ],
                                dtype=np.float32)

        if any(self.win_loss_ratio):
            while all(self.uncounted_games >= self.win_loss_ratio):
                self.uncounted_games -= self.win_loss_ratio
                self.counted_game_sets += 1

            tmp = self.uncounted_games >= self.win_loss_ratio
            if tmp[0] and not tmp[1]:
                # Need more losses
                if self.mm == 1:
                    # Zero weights for opponents that have <= ELOs than agent
                    for i, elo in enumerate(self.all_elos):
                        if elo <= self.agent_elo:
                            weights[i] = 0
                    # Choose agent with highest ELO if agent ELO is higher than all opponent ELOs
                    if sum(weights) == 0:
                        weights[self.all_elos.index(max(self.all_elos))] = 1
                else:
                    # Equal probability for opponents that have > ELOs than agent
                    for i, elo in enumerate(self.all_elos):
                        if elo > self.agent_elo:
                            weights[i] = 1
                    # Choose agent with highest ELO if agent ELO is higher than all opponent ELOs
                    if sum(weights) == 0:
                        weights[self.all_elos.index(max(self.all_elos))] = 1
            elif not tmp[0] and tmp[1]:
                # Need more wins
                if self.mm == 1:
                    # Zero weights for opponents that have >= ELOs than agent
                    for i, elo in enumerate(self.all_elos):
                        if elo >= self.agent_elo:
                            weights[i] = 0
                    # Choose agent with lowest ELO if agent ELO is higher than all opponent ELOs
                    if sum(weights) == 0:
                        weights[self.all_elos.index(min(self.all_elos))] = 1
                else:
                    # Equal probability for opponents that have < ELOs than agent
                    for i, elo in enumerate(self.all_elos):
                        if elo < self.agent_elo:
                            weights[i] = 1
                    # Choose agent with highest ELO if agent ELO is higher than all opponent ELOs
                    if sum(weights) == 0:
                        weights[self.all_elos.index(min(self.all_elos))] = 1

        self.current_opp_idx = choice_with_normalization(
            [i for i in range(len(self.all_elos))], weights)
        self.current_opp = self.all_opps[self.current_opp_idx]
        self.current_opp_elo = self.all_elos[self.current_opp_idx]
        #print("thread", self.my_port, "current opp elo:", self.current_opp_elo, "agent elo:", self.agent_elo, flush=True)
        self.env.load_new_opp(0, opp_fp(self.model_dir, self.current_opp),
                              self.current_opp_elo)

    def get_agent_elo(self):
        return self.agent_elo

    def reset(self):
        if self.started:
            last_winner = self.env.last_winner
            if last_winner == 0:
                win_rate = 1.
                self.uncounted_games[0] += 1
            elif last_winner == 1:
                win_rate = 0.
                self.uncounted_games[1] += 1
            else:
                win_rate = .5

            agent_elo_change, _ = elo_change(self.agent_elo,
                                             self.current_opp_elo, self.K,
                                             win_rate)
            self.agent_elo += int(agent_elo_change)
            #print("THREAD", self.my_port, "CURRENT AGENT ELO:", self.agent_elo, flush=True)
        else:
            self.started = True

        self.next_opp()
        return self.env.reset()

    def step(self, action):
        if self.num_steps % self.elo_log_interval == 0:
            self.elo_log.append(self.agent_elo)
        self.num_steps += 1
        return self.env.step(action)

    def render(self, mode='console'):
        raise NotImplementedError()

    def close(self):
        self.env.close()
예제 #6
0
    if not os.path.isdir(args.model_dir):
        raise FileNotFoundError("Base directory for agent models is not a folder")
    if not os.path.exists(args.noun_file_path):
        raise FileNotFoundError("Inputted path does not lead to noun file")
    if not os.path.exists(args.adj_file_path):
        raise FileNotFoundError("Inputted path does not lead to adjective file")

    if args.num_envs > 1:
        envs = []
        for i in range(args.num_envs):
            envs.append(
                lambda game_path=args.game_path, b=args.base_port+(i*2), c="gamelog-"+str(i)+".txt", d=args.level_path, e=args.image_based, f=args.env_p: 
                        TankEnv(game_path,
                                game_port=b,
                                game_log_path=c,
                                level_path=d,
                                image_based=e,
                                p=f
                        )
            )
        env_stack = DummyVecEnv(envs)
    else:
        env_stack = TankEnv(args.game_path, game_port=args.base_port, game_log_path="gamelog.txt", level_path=args.level_path, image_based=args.image_based, p=args.env_p)

    try:
        population = []
        for i in range(args.start):
            agent_name, agent = gen_agent(env_stack, args.num_envs, args.model_dir, args.noun_file_path, args.adj_file_path, 
                batch_size=args.batch_size, image_based=args.image_based, image_pretrain=args.image_pretrain, env_p=args.env_p)
            population.append(agent_name)
            if args.nem:
예제 #7
0
parser.add_argument("game_path", type=str, default=None, help="File path of game executable")
parser.add_argument("--base_port", type=int, default=50000, help="Base port to be used for game environment")
parser.add_argument("--my_port", type=int, default=50500, help="Port to be used on Python side of network socket connection")
parser.add_argument("--image_based", action="store_true", help="Indicates that env observation space is image based, and will show those states using matplotlib")
parser.add_argument("--level_path", type=str, default=None, help="Path to level file")
parser.add_argument("--ai_view", action="store_true", help="Indicates that AI version of game state should be rendered")
parser.add_argument("--train", action="store_true", help="Indicates that test should try training model (as opposed to just running eval)")
parser.add_argument("--num_steps", type=int, default=128, help="Number of steps to run for")
parser.add_argument("--env_p", type=int, default=3, help="p^2 pixels will represent one in-game grid square")
args = parser.parse_args()
print(args)

env = TankEnv(args.game_path, 
    opp_fp_and_elo=[], 
    game_port=args.base_port, 
    my_port=args.my_port, 
    image_based=args.image_based,
    level_path=args.level_path,
    rand_opp=True,
    p=args.env_p)
if args.image_based:
    model = PPO("CnnPolicy", env, n_steps=64)
else:
    model = PPO("MlpPolicy", env, n_steps=64)
    
print(model.policy)
  
try:
    if args.train:
        model.learn(total_timesteps=args.num_steps)
    else:
        obs = env.reset()
    print("Worker", args.worker_idx, "got here", 2, flush=True)

    for port,p in enumerate(my_pop):
        p_idx = pop.index(p)
        p_model = PPO.load(curr_model_path(args.local_pop_dir, p, pop_stats[pop.index(p)]))
        traj_set = np.full((len(pop), args.N, args.max_len+1, 12*pop_stats[p_idx]["env_p"], 20*pop_stats[p_idx]["env_p"], 3), 255, dtype=np.uint8)
        info_set = np.full((len(pop), args.N), -1, dtype=np.int16)
        
        print("Worker", args.worker_idx, "got here", 3, flush=True)
        
        try:
            env = TankEnv(args.game_path, 
                opp_fp_and_elo=[], 
                game_port=args.base_port+port, 
                my_port=args.base_port+port+1,
                level_path=args.level_path,
                image_based=pop_stats[p_idx]["image_based"],
                p=pop_stats[p_idx]["env_p"],
                verbose=True
                )
                
            print("Worker", args.worker_idx, "got here", 4, flush=True)
                
            for i,opp in enumerate(tqdm(pop, file=sys.stdout)):
                env.load_new_opp(0, curr_model_path(args.local_pop_dir, opp, pop_stats[pop.index(opp)]), 0)
                for j in range(args.N):
                    obs = env.reset()
                    side = -1 if args.from_right else 1
                    while env.raw_state[0] * side > 0:
                        obs = env.reset()
                        
def human_matchmaking(args):
    WINS = 0
    LOSSES = 1
    GAMES = 2

    pop = load_pop(args.model_dir)
    all_stats = {}
    for p in pop:
        all_stats[p] = load_stats(args.model_dir, p)

    all_opps = sorted_keys(all_stats)
    all_opps.reverse()

    all_elos = []
    for opp in all_opps:
        all_elos.append(int(avg_elo(all_stats[opp], avg_len=args.avg_len)))

    human_stats = get_human_stats(args.human_db)

    current_opp_idx = len(all_elos) // 2
    current_opp = all_opps[current_opp_idx]
    current_opp_elo = all_elos[current_opp_idx]
    human_elo = human_stats["elo"][-1] if len(
        human_stats["elo"]) > 0 else current_opp_elo

    try:
        env = TankEnv(args.game_path,
                      opp_fp_and_elo=[(opp_fp(args.model_dir,
                                              current_opp), current_opp_elo)],
                      game_port=args.base_port,
                      my_port=args.my_port,
                      image_based=args.image_based,
                      level_path=args.level_path,
                      p=args.env_p)

        print("Starting matchmaking")
        while human_elo <= all_elos[-1]:
            print("Current opp:", current_opp)
            print("Opp elo:", current_opp_elo)
            print("Human elo:", human_elo)

            score = play_match(env, args.num_games)
            human_win_rate = (
                (score[WINS] - score[LOSSES]) / sum(score) + 1) / 2
            K = 16
            human_elo_change, _ = elo_change(human_elo, current_opp_elo, K,
                                             human_win_rate)
            human_elo += int(human_elo_change)

            human_stats["elo"].append(human_elo)
            if not current_opp in human_stats["win_rate"]:
                human_stats["win_rate"][current_opp] = [0, 0, 0]
            human_stats["win_rate"][current_opp][WINS] += score[WINS]
            human_stats["win_rate"][current_opp][LOSSES] += score[LOSSES]
            human_stats["win_rate"][current_opp][GAMES] += sum(score)

            D = 5.
            current_opp_idx = elo_based_choice(all_elos, human_elo, D)
            current_opp = all_opps[current_opp_idx]
            current_opp_elo = all_elos[current_opp_idx]
            env.load_new_opp(0, opp_fp(args.model_dir, current_opp),
                             current_opp_elo)

        print("CONGRATS, YOU ARE BETTER THAN ALL THE AGENTS!")

    finally:
        env.close()
parser.add_argument(
    "--env_p",
    type=int,
    default=3,
    help=
    "Image-based environment will draw one in-game grid square as p^2 pixels")
args = parser.parse_args()
print(args)

obs_set = np.zeros((args.num_obs, 52), dtype=np.float32)
img_set = np.zeros((args.num_obs, 12 * args.env_p, 20 * args.env_p, 3),
                   dtype=np.uint8)
try:
    env = TankEnv(args.game_path,
                  opp_fp_and_elo=[],
                  game_port=args.base_port,
                  my_port=args.my_port,
                  rand_opp=True)

    if not args.canvas_game_path:
        args.canvas_game_path = args.game_path
    canvas = TankEnv(args.canvas_game_path,
                     opp_fp_and_elo=[],
                     game_port=args.base_port + 1,
                     my_port=args.my_port + 1,
                     image_based=True,
                     level_path=args.level_path,
                     rand_opp=True,
                     p=args.env_p)

    obs = env.reset()