Ejemplo n.º 1
0
def make_env_stack(num_envs,
                   game_path,
                   base_port,
                   game_log_path,
                   opp_fp_and_elo,
                   trainee_elo,
                   elo_match=True,
                   survivor=False,
                   stdout_path=None,
                   level_path=None,
                   image_based=False,
                   time_reward=0.,
                   env_p=3):
    if num_envs >= 1:
        envs = []
        for i in range(num_envs):
            envs.append(lambda game_path=game_path, b=base_port +
                        (i * 2), c=game_log_path.replace(
                            ".txt", "-" + str(i) + ".txt"), d=opp_fp_and_elo, e
                        =elo_match, f=trainee_elo, g=survivor, h=stdout_path.
                        replace(".txt", "-" + str(i) + ".txt"), i=level_path, j
                        =image_based, k=time_reward: TankEnv(game_path,
                                                             game_port=b,
                                                             game_log_path=c,
                                                             opp_fp_and_elo=d,
                                                             elo_match=e,
                                                             center_elo=f,
                                                             survivor=g,
                                                             stdout_path=h,
                                                             verbose=True,
                                                             level_path=i,
                                                             image_based=j,
                                                             time_reward=k,
                                                             p=env_p))
        if num_envs == 1:
            env_stack = SubprocVecEnv(envs, start_method="fork")
        else:
            env_stack = SubprocVecEnv(envs, start_method="forkserver")
        env_stack.reset()
        return env_stack
    else:
        env = TankEnv(game_path,
                      game_port=base_port,
                      game_log_path=game_log_path,
                      opp_fp_and_elo=opp_fp_and_elo,
                      elo_match=elo_match,
                      center_elo=trainee_elo,
                      survivor=survivor,
                      stdout_path=stdout_path,
                      level_path=level_path,
                      image_based=image_based,
                      time_reward=time_reward,
                      p=env_p)
        env.reset()
        return env
Ejemplo n.º 2
0
def run_model(args):
    env = TankEnv(args.game_path,
                  opp_fp_and_elo=[(args.opp, 1000)],
                  game_port=args.base_port,
                  my_port=args.my_port,
                  image_based=args.image_based,
                  level_path=args.level_path,
                  rand_opp=args.rand_opp,
                  p=args.env_p,
                  opp_p=args.opp_env_p)
    model = None
    if args.p1:
        model = PPO.load(args.p1)
    elif args.p1same:
        model = PPO.load(args.opp)

    score = [0, 0, 0]
    print("Score: [Player1 Wins, Player2 Wins, Ties]")

    obs = env.reset()
    if args.image_based and (args.ai_view or args.rev_ai_view):
        fig = plt.gcf()
        fig.show()
        fig.canvas.draw()
    while True:
        if args.image_based and (args.ai_view or args.rev_ai_view):
            if not args.rev_ai_view:
                plt.imshow(obs, origin="lower")
            else:
                plt.imshow(env.opp_state, origin="lower")
            fig.canvas.draw()
        if model:
            action, _ = model.predict(obs)
        elif args.rand_p1:
            action = np.random.rand(5) * 2 - 1
        else:
            action = np.zeros(5, dtype=np.float32)
        obs, reward, done, info = env.step(action)
        if done:
            score[info["winner"]] += 1
            print("Score:", score)
            obs = env.reset()
class AIMatchmaker(gym.Env):
    metadata = {'render.modes': None}

    def __init__(self,
                 all_stats,
                 all_opps,
                 all_elos,
                 game_path,
                 model_dir,
                 base_port=50000,
                 my_port=50001,
                 image_based=False,
                 level_path=None,
                 env_p=3,
                 starting_elo=None,
                 K=16,
                 D=5.,
                 time_reward=-0.003,
                 matchmaking_mode=0,
                 elo_log_interval=10000,
                 win_loss_ratio=[0, 0]):
        super(AIMatchmaker, self).__init__()

        self.all_stats = combine_winrates(all_stats)
        self.all_opps = all_opps
        self.all_elos = all_elos
        self.model_dir = model_dir

        self.agent_elo = starting_elo if starting_elo != None else self.all_elos[
            0]
        self.env = TankEnv(game_path,
                           opp_fp_and_elo=[],
                           game_port=base_port,
                           my_port=my_port,
                           image_based=image_based,
                           level_path=level_path,
                           p=env_p,
                           time_reward=time_reward)
        self.action_space = self.env.action_space
        self.observation_space = self.env.observation_space

        self.K = K
        self.D = D
        self.my_port = my_port
        self.mm = matchmaking_mode

        self.uncounted_games = np.array([0, 0], dtype=np.uint32)
        self.counted_game_sets = 0
        self.win_loss_ratio = np.array(win_loss_ratio, dtype=np.uint32)

        self.started = False
        self.next_opp()

        self.elo_log_interval = elo_log_interval
        self.num_steps = 0
        self.elo_log = []

    def next_opp(self):
        weights = np.zeros((len(self.all_elos)), dtype=np.float32)
        if self.mm == 1:
            # ELO based matchmaking, where ELOs closer to agent ELo is prefered (but not guarenteed)
            weights += np.array([
                weight_func(elo - self.agent_elo, self.D)
                for elo in self.all_elos
            ],
                                dtype=np.float32)

        if any(self.win_loss_ratio):
            while all(self.uncounted_games >= self.win_loss_ratio):
                self.uncounted_games -= self.win_loss_ratio
                self.counted_game_sets += 1

            tmp = self.uncounted_games >= self.win_loss_ratio
            if tmp[0] and not tmp[1]:
                # Need more losses
                if self.mm == 1:
                    # Zero weights for opponents that have <= ELOs than agent
                    for i, elo in enumerate(self.all_elos):
                        if elo <= self.agent_elo:
                            weights[i] = 0
                    # Choose agent with highest ELO if agent ELO is higher than all opponent ELOs
                    if sum(weights) == 0:
                        weights[self.all_elos.index(max(self.all_elos))] = 1
                else:
                    # Equal probability for opponents that have > ELOs than agent
                    for i, elo in enumerate(self.all_elos):
                        if elo > self.agent_elo:
                            weights[i] = 1
                    # Choose agent with highest ELO if agent ELO is higher than all opponent ELOs
                    if sum(weights) == 0:
                        weights[self.all_elos.index(max(self.all_elos))] = 1
            elif not tmp[0] and tmp[1]:
                # Need more wins
                if self.mm == 1:
                    # Zero weights for opponents that have >= ELOs than agent
                    for i, elo in enumerate(self.all_elos):
                        if elo >= self.agent_elo:
                            weights[i] = 0
                    # Choose agent with lowest ELO if agent ELO is higher than all opponent ELOs
                    if sum(weights) == 0:
                        weights[self.all_elos.index(min(self.all_elos))] = 1
                else:
                    # Equal probability for opponents that have < ELOs than agent
                    for i, elo in enumerate(self.all_elos):
                        if elo < self.agent_elo:
                            weights[i] = 1
                    # Choose agent with highest ELO if agent ELO is higher than all opponent ELOs
                    if sum(weights) == 0:
                        weights[self.all_elos.index(min(self.all_elos))] = 1

        self.current_opp_idx = choice_with_normalization(
            [i for i in range(len(self.all_elos))], weights)
        self.current_opp = self.all_opps[self.current_opp_idx]
        self.current_opp_elo = self.all_elos[self.current_opp_idx]
        #print("thread", self.my_port, "current opp elo:", self.current_opp_elo, "agent elo:", self.agent_elo, flush=True)
        self.env.load_new_opp(0, opp_fp(self.model_dir, self.current_opp),
                              self.current_opp_elo)

    def get_agent_elo(self):
        return self.agent_elo

    def reset(self):
        if self.started:
            last_winner = self.env.last_winner
            if last_winner == 0:
                win_rate = 1.
                self.uncounted_games[0] += 1
            elif last_winner == 1:
                win_rate = 0.
                self.uncounted_games[1] += 1
            else:
                win_rate = .5

            agent_elo_change, _ = elo_change(self.agent_elo,
                                             self.current_opp_elo, self.K,
                                             win_rate)
            self.agent_elo += int(agent_elo_change)
            #print("THREAD", self.my_port, "CURRENT AGENT ELO:", self.agent_elo, flush=True)
        else:
            self.started = True

        self.next_opp()
        return self.env.reset()

    def step(self, action):
        if self.num_steps % self.elo_log_interval == 0:
            self.elo_log.append(self.agent_elo)
        self.num_steps += 1
        return self.env.step(action)

    def render(self, mode='console'):
        raise NotImplementedError()

    def close(self):
        self.env.close()
Ejemplo n.º 4
0
    image_based=args.image_based,
    level_path=args.level_path,
    rand_opp=True,
    p=args.env_p)
if args.image_based:
    model = PPO("CnnPolicy", env, n_steps=64)
else:
    model = PPO("MlpPolicy", env, n_steps=64)
    
print(model.policy)
  
try:
    if args.train:
        model.learn(total_timesteps=args.num_steps)
    else:
        obs = env.reset()
        if args.image_based and args.ai_view:
            fig = plt.gcf()
            fig.show()
            fig.canvas.draw()
        for _ in tqdm(range(args.num_steps)):
            if args.image_based and args.ai_view:
                plt.imshow(obs, origin="lower", interpolation='none')
                fig.canvas.draw()
            if model:
                action, _ = model.predict(obs)
            elif args.rand_p1:
                action = np.random.rand(5) * 2 - 1
            else:
                action = np.zeros(5, dtype=np.float32)
            obs, reward, done, info = env.step(action)