try: env = TankEnv(args.game_path, opp_fp_and_elo=[], game_port=args.base_port+port, my_port=args.base_port+port+1, level_path=args.level_path, image_based=pop_stats[p_idx]["image_based"], p=pop_stats[p_idx]["env_p"], verbose=True ) print("Worker", args.worker_idx, "got here", 4, flush=True) for i,opp in enumerate(tqdm(pop, file=sys.stdout)): env.load_new_opp(0, curr_model_path(args.local_pop_dir, opp, pop_stats[pop.index(opp)]), 0) for j in range(args.N): obs = env.reset() side = -1 if args.from_right else 1 while env.raw_state[0] * side > 0: obs = env.reset() done=False for k in range(args.max_len+1): traj_set[i,j,k,:,:,:] = obs if done or k==args.max_len: info_set[i,j] = k break else: action, _ = p_model.predict(obs) obs,_,done,_ = env.step(action)
class AIMatchmaker(gym.Env): metadata = {'render.modes': None} def __init__(self, all_stats, all_opps, all_elos, game_path, model_dir, base_port=50000, my_port=50001, image_based=False, level_path=None, env_p=3, starting_elo=None, K=16, D=5., time_reward=-0.003, matchmaking_mode=0, elo_log_interval=10000, win_loss_ratio=[0, 0]): super(AIMatchmaker, self).__init__() self.all_stats = combine_winrates(all_stats) self.all_opps = all_opps self.all_elos = all_elos self.model_dir = model_dir self.agent_elo = starting_elo if starting_elo != None else self.all_elos[ 0] self.env = TankEnv(game_path, opp_fp_and_elo=[], game_port=base_port, my_port=my_port, image_based=image_based, level_path=level_path, p=env_p, time_reward=time_reward) self.action_space = self.env.action_space self.observation_space = self.env.observation_space self.K = K self.D = D self.my_port = my_port self.mm = matchmaking_mode self.uncounted_games = np.array([0, 0], dtype=np.uint32) self.counted_game_sets = 0 self.win_loss_ratio = np.array(win_loss_ratio, dtype=np.uint32) self.started = False self.next_opp() self.elo_log_interval = elo_log_interval self.num_steps = 0 self.elo_log = [] def next_opp(self): weights = np.zeros((len(self.all_elos)), dtype=np.float32) if self.mm == 1: # ELO based matchmaking, where ELOs closer to agent ELo is prefered (but not guarenteed) weights += np.array([ weight_func(elo - self.agent_elo, self.D) for elo in self.all_elos ], dtype=np.float32) if any(self.win_loss_ratio): while all(self.uncounted_games >= self.win_loss_ratio): self.uncounted_games -= self.win_loss_ratio self.counted_game_sets += 1 tmp = self.uncounted_games >= self.win_loss_ratio if tmp[0] and not tmp[1]: # Need more losses if self.mm == 1: # Zero weights for opponents that have <= ELOs than agent for i, elo in enumerate(self.all_elos): if elo <= self.agent_elo: weights[i] = 0 # Choose agent with highest ELO if agent ELO is higher than all opponent ELOs if sum(weights) == 0: weights[self.all_elos.index(max(self.all_elos))] = 1 else: # Equal probability for opponents that have > ELOs than agent for i, elo in enumerate(self.all_elos): if elo > self.agent_elo: weights[i] = 1 # Choose agent with highest ELO if agent ELO is higher than all opponent ELOs if sum(weights) == 0: weights[self.all_elos.index(max(self.all_elos))] = 1 elif not tmp[0] and tmp[1]: # Need more wins if self.mm == 1: # Zero weights for opponents that have >= ELOs than agent for i, elo in enumerate(self.all_elos): if elo >= self.agent_elo: weights[i] = 0 # Choose agent with lowest ELO if agent ELO is higher than all opponent ELOs if sum(weights) == 0: weights[self.all_elos.index(min(self.all_elos))] = 1 else: # Equal probability for opponents that have < ELOs than agent for i, elo in enumerate(self.all_elos): if elo < self.agent_elo: weights[i] = 1 # Choose agent with highest ELO if agent ELO is higher than all opponent ELOs if sum(weights) == 0: weights[self.all_elos.index(min(self.all_elos))] = 1 self.current_opp_idx = choice_with_normalization( [i for i in range(len(self.all_elos))], weights) self.current_opp = self.all_opps[self.current_opp_idx] self.current_opp_elo = self.all_elos[self.current_opp_idx] #print("thread", self.my_port, "current opp elo:", self.current_opp_elo, "agent elo:", self.agent_elo, flush=True) self.env.load_new_opp(0, opp_fp(self.model_dir, self.current_opp), self.current_opp_elo) def get_agent_elo(self): return self.agent_elo def reset(self): if self.started: last_winner = self.env.last_winner if last_winner == 0: win_rate = 1. self.uncounted_games[0] += 1 elif last_winner == 1: win_rate = 0. self.uncounted_games[1] += 1 else: win_rate = .5 agent_elo_change, _ = elo_change(self.agent_elo, self.current_opp_elo, self.K, win_rate) self.agent_elo += int(agent_elo_change) #print("THREAD", self.my_port, "CURRENT AGENT ELO:", self.agent_elo, flush=True) else: self.started = True self.next_opp() return self.env.reset() def step(self, action): if self.num_steps % self.elo_log_interval == 0: self.elo_log.append(self.agent_elo) self.num_steps += 1 return self.env.step(action) def render(self, mode='console'): raise NotImplementedError() def close(self): self.env.close()
def human_matchmaking(args): WINS = 0 LOSSES = 1 GAMES = 2 pop = load_pop(args.model_dir) all_stats = {} for p in pop: all_stats[p] = load_stats(args.model_dir, p) all_opps = sorted_keys(all_stats) all_opps.reverse() all_elos = [] for opp in all_opps: all_elos.append(int(avg_elo(all_stats[opp], avg_len=args.avg_len))) human_stats = get_human_stats(args.human_db) current_opp_idx = len(all_elos) // 2 current_opp = all_opps[current_opp_idx] current_opp_elo = all_elos[current_opp_idx] human_elo = human_stats["elo"][-1] if len( human_stats["elo"]) > 0 else current_opp_elo try: env = TankEnv(args.game_path, opp_fp_and_elo=[(opp_fp(args.model_dir, current_opp), current_opp_elo)], game_port=args.base_port, my_port=args.my_port, image_based=args.image_based, level_path=args.level_path, p=args.env_p) print("Starting matchmaking") while human_elo <= all_elos[-1]: print("Current opp:", current_opp) print("Opp elo:", current_opp_elo) print("Human elo:", human_elo) score = play_match(env, args.num_games) human_win_rate = ( (score[WINS] - score[LOSSES]) / sum(score) + 1) / 2 K = 16 human_elo_change, _ = elo_change(human_elo, current_opp_elo, K, human_win_rate) human_elo += int(human_elo_change) human_stats["elo"].append(human_elo) if not current_opp in human_stats["win_rate"]: human_stats["win_rate"][current_opp] = [0, 0, 0] human_stats["win_rate"][current_opp][WINS] += score[WINS] human_stats["win_rate"][current_opp][LOSSES] += score[LOSSES] human_stats["win_rate"][current_opp][GAMES] += sum(score) D = 5. current_opp_idx = elo_based_choice(all_elos, human_elo, D) current_opp = all_opps[current_opp_idx] current_opp_elo = all_elos[current_opp_idx] env.load_new_opp(0, opp_fp(args.model_dir, current_opp), current_opp_elo) print("CONGRATS, YOU ARE BETTER THAN ALL THE AGENTS!") finally: env.close()