class AIMatchmaker(gym.Env): metadata = {'render.modes': None} def __init__(self, all_stats, all_opps, all_elos, game_path, model_dir, base_port=50000, my_port=50001, image_based=False, level_path=None, env_p=3, starting_elo=None, K=16, D=5., time_reward=-0.003, matchmaking_mode=0, elo_log_interval=10000, win_loss_ratio=[0, 0]): super(AIMatchmaker, self).__init__() self.all_stats = combine_winrates(all_stats) self.all_opps = all_opps self.all_elos = all_elos self.model_dir = model_dir self.agent_elo = starting_elo if starting_elo != None else self.all_elos[ 0] self.env = TankEnv(game_path, opp_fp_and_elo=[], game_port=base_port, my_port=my_port, image_based=image_based, level_path=level_path, p=env_p, time_reward=time_reward) self.action_space = self.env.action_space self.observation_space = self.env.observation_space self.K = K self.D = D self.my_port = my_port self.mm = matchmaking_mode self.uncounted_games = np.array([0, 0], dtype=np.uint32) self.counted_game_sets = 0 self.win_loss_ratio = np.array(win_loss_ratio, dtype=np.uint32) self.started = False self.next_opp() self.elo_log_interval = elo_log_interval self.num_steps = 0 self.elo_log = [] def next_opp(self): weights = np.zeros((len(self.all_elos)), dtype=np.float32) if self.mm == 1: # ELO based matchmaking, where ELOs closer to agent ELo is prefered (but not guarenteed) weights += np.array([ weight_func(elo - self.agent_elo, self.D) for elo in self.all_elos ], dtype=np.float32) if any(self.win_loss_ratio): while all(self.uncounted_games >= self.win_loss_ratio): self.uncounted_games -= self.win_loss_ratio self.counted_game_sets += 1 tmp = self.uncounted_games >= self.win_loss_ratio if tmp[0] and not tmp[1]: # Need more losses if self.mm == 1: # Zero weights for opponents that have <= ELOs than agent for i, elo in enumerate(self.all_elos): if elo <= self.agent_elo: weights[i] = 0 # Choose agent with highest ELO if agent ELO is higher than all opponent ELOs if sum(weights) == 0: weights[self.all_elos.index(max(self.all_elos))] = 1 else: # Equal probability for opponents that have > ELOs than agent for i, elo in enumerate(self.all_elos): if elo > self.agent_elo: weights[i] = 1 # Choose agent with highest ELO if agent ELO is higher than all opponent ELOs if sum(weights) == 0: weights[self.all_elos.index(max(self.all_elos))] = 1 elif not tmp[0] and tmp[1]: # Need more wins if self.mm == 1: # Zero weights for opponents that have >= ELOs than agent for i, elo in enumerate(self.all_elos): if elo >= self.agent_elo: weights[i] = 0 # Choose agent with lowest ELO if agent ELO is higher than all opponent ELOs if sum(weights) == 0: weights[self.all_elos.index(min(self.all_elos))] = 1 else: # Equal probability for opponents that have < ELOs than agent for i, elo in enumerate(self.all_elos): if elo < self.agent_elo: weights[i] = 1 # Choose agent with highest ELO if agent ELO is higher than all opponent ELOs if sum(weights) == 0: weights[self.all_elos.index(min(self.all_elos))] = 1 self.current_opp_idx = choice_with_normalization( [i for i in range(len(self.all_elos))], weights) self.current_opp = self.all_opps[self.current_opp_idx] self.current_opp_elo = self.all_elos[self.current_opp_idx] #print("thread", self.my_port, "current opp elo:", self.current_opp_elo, "agent elo:", self.agent_elo, flush=True) self.env.load_new_opp(0, opp_fp(self.model_dir, self.current_opp), self.current_opp_elo) def get_agent_elo(self): return self.agent_elo def reset(self): if self.started: last_winner = self.env.last_winner if last_winner == 0: win_rate = 1. self.uncounted_games[0] += 1 elif last_winner == 1: win_rate = 0. self.uncounted_games[1] += 1 else: win_rate = .5 agent_elo_change, _ = elo_change(self.agent_elo, self.current_opp_elo, self.K, win_rate) self.agent_elo += int(agent_elo_change) #print("THREAD", self.my_port, "CURRENT AGENT ELO:", self.agent_elo, flush=True) else: self.started = True self.next_opp() return self.env.reset() def step(self, action): if self.num_steps % self.elo_log_interval == 0: self.elo_log.append(self.agent_elo) self.num_steps += 1 return self.env.step(action) def render(self, mode='console'): raise NotImplementedError() def close(self): self.env.close()
def human_matchmaking(args): WINS = 0 LOSSES = 1 GAMES = 2 pop = load_pop(args.model_dir) all_stats = {} for p in pop: all_stats[p] = load_stats(args.model_dir, p) all_opps = sorted_keys(all_stats) all_opps.reverse() all_elos = [] for opp in all_opps: all_elos.append(int(avg_elo(all_stats[opp], avg_len=args.avg_len))) human_stats = get_human_stats(args.human_db) current_opp_idx = len(all_elos) // 2 current_opp = all_opps[current_opp_idx] current_opp_elo = all_elos[current_opp_idx] human_elo = human_stats["elo"][-1] if len( human_stats["elo"]) > 0 else current_opp_elo try: env = TankEnv(args.game_path, opp_fp_and_elo=[(opp_fp(args.model_dir, current_opp), current_opp_elo)], game_port=args.base_port, my_port=args.my_port, image_based=args.image_based, level_path=args.level_path, p=args.env_p) print("Starting matchmaking") while human_elo <= all_elos[-1]: print("Current opp:", current_opp) print("Opp elo:", current_opp_elo) print("Human elo:", human_elo) score = play_match(env, args.num_games) human_win_rate = ( (score[WINS] - score[LOSSES]) / sum(score) + 1) / 2 K = 16 human_elo_change, _ = elo_change(human_elo, current_opp_elo, K, human_win_rate) human_elo += int(human_elo_change) human_stats["elo"].append(human_elo) if not current_opp in human_stats["win_rate"]: human_stats["win_rate"][current_opp] = [0, 0, 0] human_stats["win_rate"][current_opp][WINS] += score[WINS] human_stats["win_rate"][current_opp][LOSSES] += score[LOSSES] human_stats["win_rate"][current_opp][GAMES] += sum(score) D = 5. current_opp_idx = elo_based_choice(all_elos, human_elo, D) current_opp = all_opps[current_opp_idx] current_opp_elo = all_elos[current_opp_idx] env.load_new_opp(0, opp_fp(args.model_dir, current_opp), current_opp_elo) print("CONGRATS, YOU ARE BETTER THAN ALL THE AGENTS!") finally: env.close()
model = PPO("CnnPolicy", env, n_steps=64) else: model = PPO("MlpPolicy", env, n_steps=64) print(model.policy) try: if args.train: model.learn(total_timesteps=args.num_steps) else: obs = env.reset() if args.image_based and args.ai_view: fig = plt.gcf() fig.show() fig.canvas.draw() for _ in tqdm(range(args.num_steps)): if args.image_based and args.ai_view: plt.imshow(obs, origin="lower", interpolation='none') fig.canvas.draw() if model: action, _ = model.predict(obs) elif args.rand_p1: action = np.random.rand(5) * 2 - 1 else: action = np.zeros(5, dtype=np.float32) obs, reward, done, info = env.step(action) if done: obs = env.reset() finally: env.close()
rand_opp=True) if not args.canvas_game_path: args.canvas_game_path = args.game_path canvas = TankEnv(args.canvas_game_path, opp_fp_and_elo=[], game_port=args.base_port + 1, my_port=args.my_port + 1, image_based=True, level_path=args.level_path, rand_opp=True, p=args.env_p) obs = env.reset() for i in tqdm(range(args.num_obs)): if i % (args.num_obs // 10) == 0: print(i / (args.num_obs // 10), "% complete", sep="", flush=True) # Save states obs_set[i] = obs.copy() canvas.draw_state(obs) img_set[i] = canvas.state.copy() # Generate next observation action = np.random.rand(5) * 2 - 1 obs, _, done, _ = env.step(action) if done: obs = env.reset() savez_compressed(args.save_loc, obs=obs_set, img=img_set) finally: env.close() canvas.close()