Python TankEnv.load_new_opp 예제들

프로그래밍 언어: Python

네임스페이스/패키지 이름: tank_env

클래스/타입: TankEnv

메소드/함수: load_new_opp

hotexamples.com에서의 예제들: 3

Python TankEnv.load_new_opp - 3개의 예제가 발견되었습니다. 이것들은 오픈소스 프로젝트에서 추출된 Python의 tank_env.TankEnv.load_new_opp에 대한 실세계 최고 등급의 예제들입니다. 예제들을 평가하여 예제의 품질 향상에 도움을 줄 수 있습니다.

자주 사용되는 메소드들

보기 숨기기

TankEnv(9)

close(4)

reset(4)

step(4)

load_new_opp(3)

draw_state(1)

예제 #1

파일 보기

파일: gen_trajectory_dataset.py 프로젝트: urciuolim/TankTwinStickShooter

 
 try:
     env = TankEnv(args.game_path, 
         opp_fp_and_elo=[], 
         game_port=args.base_port+port, 
         my_port=args.base_port+port+1,
         level_path=args.level_path,
         image_based=pop_stats[p_idx]["image_based"],
         p=pop_stats[p_idx]["env_p"],
         verbose=True
         )
         
     print("Worker", args.worker_idx, "got here", 4, flush=True)
         
     for i,opp in enumerate(tqdm(pop, file=sys.stdout)):
         env.load_new_opp(0, curr_model_path(args.local_pop_dir, opp, pop_stats[pop.index(opp)]), 0)
         for j in range(args.N):
             obs = env.reset()
             side = -1 if args.from_right else 1
             while env.raw_state[0] * side > 0:
                 obs = env.reset()
                 
             done=False
             for k in range(args.max_len+1):
                 traj_set[i,j,k,:,:,:] = obs
                 if done or k==args.max_len:
                     info_set[i,j] = k
                     break
                 else:
                     action, _ = p_model.predict(obs)
                     obs,_,done,_ = env.step(action)

예제 #2

파일 보기

파일: ai_matchmaker.py 프로젝트: urciuolim/TankTwinStickShooter

class AIMatchmaker(gym.Env):
    metadata = {'render.modes': None}

    def __init__(self,
                 all_stats,
                 all_opps,
                 all_elos,
                 game_path,
                 model_dir,
                 base_port=50000,
                 my_port=50001,
                 image_based=False,
                 level_path=None,
                 env_p=3,
                 starting_elo=None,
                 K=16,
                 D=5.,
                 time_reward=-0.003,
                 matchmaking_mode=0,
                 elo_log_interval=10000,
                 win_loss_ratio=[0, 0]):
        super(AIMatchmaker, self).__init__()

        self.all_stats = combine_winrates(all_stats)
        self.all_opps = all_opps
        self.all_elos = all_elos
        self.model_dir = model_dir

        self.agent_elo = starting_elo if starting_elo != None else self.all_elos[
            0]
        self.env = TankEnv(game_path,
                           opp_fp_and_elo=[],
                           game_port=base_port,
                           my_port=my_port,
                           image_based=image_based,
                           level_path=level_path,
                           p=env_p,
                           time_reward=time_reward)
        self.action_space = self.env.action_space
        self.observation_space = self.env.observation_space

        self.K = K
        self.D = D
        self.my_port = my_port
        self.mm = matchmaking_mode

        self.uncounted_games = np.array([0, 0], dtype=np.uint32)
        self.counted_game_sets = 0
        self.win_loss_ratio = np.array(win_loss_ratio, dtype=np.uint32)

        self.started = False
        self.next_opp()

        self.elo_log_interval = elo_log_interval
        self.num_steps = 0
        self.elo_log = []

    def next_opp(self):
        weights = np.zeros((len(self.all_elos)), dtype=np.float32)
        if self.mm == 1:
            # ELO based matchmaking, where ELOs closer to agent ELo is prefered (but not guarenteed)
            weights += np.array([
                weight_func(elo - self.agent_elo, self.D)
                for elo in self.all_elos
            ],
                                dtype=np.float32)

        if any(self.win_loss_ratio):
            while all(self.uncounted_games >= self.win_loss_ratio):
                self.uncounted_games -= self.win_loss_ratio
                self.counted_game_sets += 1

            tmp = self.uncounted_games >= self.win_loss_ratio
            if tmp[0] and not tmp[1]:
                # Need more losses
                if self.mm == 1:
                    # Zero weights for opponents that have <= ELOs than agent
                    for i, elo in enumerate(self.all_elos):
                        if elo <= self.agent_elo:
                            weights[i] = 0
                    # Choose agent with highest ELO if agent ELO is higher than all opponent ELOs
                    if sum(weights) == 0:
                        weights[self.all_elos.index(max(self.all_elos))] = 1
                else:
                    # Equal probability for opponents that have > ELOs than agent
                    for i, elo in enumerate(self.all_elos):
                        if elo > self.agent_elo:
                            weights[i] = 1
                    # Choose agent with highest ELO if agent ELO is higher than all opponent ELOs
                    if sum(weights) == 0:
                        weights[self.all_elos.index(max(self.all_elos))] = 1
            elif not tmp[0] and tmp[1]:
                # Need more wins
                if self.mm == 1:
                    # Zero weights for opponents that have >= ELOs than agent
                    for i, elo in enumerate(self.all_elos):
                        if elo >= self.agent_elo:
                            weights[i] = 0
                    # Choose agent with lowest ELO if agent ELO is higher than all opponent ELOs
                    if sum(weights) == 0:
                        weights[self.all_elos.index(min(self.all_elos))] = 1
                else:
                    # Equal probability for opponents that have < ELOs than agent
                    for i, elo in enumerate(self.all_elos):
                        if elo < self.agent_elo:
                            weights[i] = 1
                    # Choose agent with highest ELO if agent ELO is higher than all opponent ELOs
                    if sum(weights) == 0:
                        weights[self.all_elos.index(min(self.all_elos))] = 1

        self.current_opp_idx = choice_with_normalization(
            [i for i in range(len(self.all_elos))], weights)
        self.current_opp = self.all_opps[self.current_opp_idx]
        self.current_opp_elo = self.all_elos[self.current_opp_idx]
        #print("thread", self.my_port, "current opp elo:", self.current_opp_elo, "agent elo:", self.agent_elo, flush=True)
        self.env.load_new_opp(0, opp_fp(self.model_dir, self.current_opp),
                              self.current_opp_elo)

    def get_agent_elo(self):
        return self.agent_elo

    def reset(self):
        if self.started:
            last_winner = self.env.last_winner
            if last_winner == 0:
                win_rate = 1.
                self.uncounted_games[0] += 1
            elif last_winner == 1:
                win_rate = 0.
                self.uncounted_games[1] += 1
            else:
                win_rate = .5

            agent_elo_change, _ = elo_change(self.agent_elo,
                                             self.current_opp_elo, self.K,
                                             win_rate)
            self.agent_elo += int(agent_elo_change)
            #print("THREAD", self.my_port, "CURRENT AGENT ELO:", self.agent_elo, flush=True)
        else:
            self.started = True

        self.next_opp()
        return self.env.reset()

    def step(self, action):
        if self.num_steps % self.elo_log_interval == 0:
            self.elo_log.append(self.agent_elo)
        self.num_steps += 1
        return self.env.step(action)

    def render(self, mode='console'):
        raise NotImplementedError()

    def close(self):
        self.env.close()

예제 #3

파일 보기

파일: human_matchmaking.py 프로젝트: urciuolim/TankTwinStickShooter

def human_matchmaking(args):
    WINS = 0
    LOSSES = 1
    GAMES = 2

    pop = load_pop(args.model_dir)
    all_stats = {}
    for p in pop:
        all_stats[p] = load_stats(args.model_dir, p)

    all_opps = sorted_keys(all_stats)
    all_opps.reverse()

    all_elos = []
    for opp in all_opps:
        all_elos.append(int(avg_elo(all_stats[opp], avg_len=args.avg_len)))

    human_stats = get_human_stats(args.human_db)

    current_opp_idx = len(all_elos) // 2
    current_opp = all_opps[current_opp_idx]
    current_opp_elo = all_elos[current_opp_idx]
    human_elo = human_stats["elo"][-1] if len(
        human_stats["elo"]) > 0 else current_opp_elo

    try:
        env = TankEnv(args.game_path,
                      opp_fp_and_elo=[(opp_fp(args.model_dir,
                                              current_opp), current_opp_elo)],
                      game_port=args.base_port,
                      my_port=args.my_port,
                      image_based=args.image_based,
                      level_path=args.level_path,
                      p=args.env_p)

        print("Starting matchmaking")
        while human_elo <= all_elos[-1]:
            print("Current opp:", current_opp)
            print("Opp elo:", current_opp_elo)
            print("Human elo:", human_elo)

            score = play_match(env, args.num_games)
            human_win_rate = (
                (score[WINS] - score[LOSSES]) / sum(score) + 1) / 2
            K = 16
            human_elo_change, _ = elo_change(human_elo, current_opp_elo, K,
                                             human_win_rate)
            human_elo += int(human_elo_change)

            human_stats["elo"].append(human_elo)
            if not current_opp in human_stats["win_rate"]:
                human_stats["win_rate"][current_opp] = [0, 0, 0]
            human_stats["win_rate"][current_opp][WINS] += score[WINS]
            human_stats["win_rate"][current_opp][LOSSES] += score[LOSSES]
            human_stats["win_rate"][current_opp][GAMES] += sum(score)

            D = 5.
            current_opp_idx = elo_based_choice(all_elos, human_elo, D)
            current_opp = all_opps[current_opp_idx]
            current_opp_elo = all_elos[current_opp_idx]
            env.load_new_opp(0, opp_fp(args.model_dir, current_opp),
                             current_opp_elo)

        print("CONGRATS, YOU ARE BETTER THAN ALL THE AGENTS!")

    finally:
        env.close()