コード例 #1
0
    def run(self):
        with open(Config.RESULTS_FILENAME,
                  'a') as results_logger, open(Config.EPISDOES_LOG_FILENAME,
                                               'a') as eval_logger:
            rolling_frame_count = 0
            rolling_reward = 0
            results_q = queueQueue(maxsize=Config.STAT_ROLLING_MEAN_WINDOW)
            episode_scores = []

            self.start_time = time.time()
            first_time = datetime.now()
            while True:
                episode_time, reward, length = self.episode_log_q.get(
                )  # get episode log from queue
                results_logger.write(
                    '%s, %d, %d\n' %
                    (episode_time.strftime("%Y-%m-%d %H:%M:%S"), reward,
                     length))
                results_logger.flush()

                self.total_frame_count += length
                self.episode_count.value += 1

                rolling_frame_count += length
                rolling_reward += reward

                if results_q.full():
                    old_episode_time, old_reward, old_length = results_q.get()
                    rolling_frame_count -= old_length
                    rolling_reward -= old_reward
                    first_time = old_episode_time

                results_q.put((episode_time, reward, length))

                episode_scores.append(reward)
                if len(episode_scores) >= 1000:
                    eval_logger.write("{} {} {} {}\n".format(
                        self.episode_count.value,
                        np.max(episode_scores[-100:]),
                        np.mean(episode_scores[-100:]),
                        np.min(episode_scores[-100:])))
                    eval_logger.flush()
                    episode_scores = []

                if self.episode_count.value % Config.SAVE_FREQUENCY == 0:
                    self.should_save_model.value = 1

                if self.episode_count.value % Config.PRINT_STATS_FREQUENCY == 0:
                    print('[Time: %8d] '
                          '[Episode: %8d Score: %10.4f] '
                          '[RScore: %10.4f RPPS: %5d] '
                          '[PPS: %5d TPS: %5d] '
                          '[NT: %2d NP: %2d NA: %2d]' %
                          (int(time.time() - self.start_time),
                           self.episode_count.value, reward, rolling_reward /
                           results_q.qsize(), rolling_frame_count /
                           (datetime.now() - first_time).total_seconds(),
                           self.FPS(), self.TPS(), self.trainer_count.value,
                           self.predictor_count.value, self.agent_count.value))
                    sys.stdout.flush()
コード例 #2
0
    def run(self):
        with open(Config.RESULTS_FILENAME, 'a') as results_logger:
            rolling_frame_count = 0
            rolling_reward = 0
            results_q = queueQueue(maxsize=Config.STAT_ROLLING_MEAN_WINDOW)

            rolling_distance = 0
            rolling_success_rate = 0
            max_success_rate = -1
            
            self.start_time = time.time()
            first_time = datetime.now()
            while True:
                distance, reward, length = self.episode_log_q.get()
                results_logger.write('%s, %d, %d\n' % (distance, reward, length))
                results_logger.flush()

                self.total_frame_count += length
                self.episode_count.value += 1

                rolling_frame_count += length
                rolling_reward += reward

                rolling_distance += distance
                success_rate = 1 if distance <= 1 else 0
                rolling_success_rate += success_rate

                if results_q.full():
                    old_distance, old_reward, old_length, old_success_rate = results_q.get()
                    rolling_frame_count -= old_length
                    rolling_reward -= old_reward
                    first_time = old_distance

                    rolling_distance -= old_distance
                    rolling_success_rate -= old_success_rate

                results_q.put((distance, reward, length, success_rate))

                self.mode.value = 0
                if rolling_success_rate / results_q.qsize() > max_success_rate:
                    max_success_rate = rolling_success_rate / results_q.qsize()
                    self.mode.value = 1

                if self.episode_count.value % Config.SAVE_FREQUENCY == 0:
                    self.should_save_model.value = 1

                if self.episode_count.value % Config.PRINT_STATS_FREQUENCY == 0:
                    print(
                        '[Time: %8d] '
                        '[Episode: %8d Score: %10.4f] '
                        '[RScore: %10.4f RPPS: %5d] '
                        '[PPS: %5d TPS: %5d] '
                        '[NT: %2d NP: %2d NA: %2d]'
                        % (int(time.time()-self.start_time),
                           self.episode_count.value, reward,
                           rolling_reward / results_q.qsize(),
                           2313, # rolling_frame_count / (datetime.now() - first_time).total_seconds(),
                           self.FPS(), self.TPS(),
                           self.trainer_count.value, self.predictor_count.value, self.agent_count.value))
                    sys.stdout.flush()
コード例 #3
0
    def run(self):
        with open(Config.RESULTS_FILENAME, 'a') as results_logger:
            rolling_frame_count = 0
            rolling_reward = 0
            results_q = queueQueue(maxsize=Config.STAT_ROLLING_MEAN_WINDOW)

            self.start_time = time.time()
            first_time = datetime.now()
            while True:
                episode_time, reward, length = self.episode_log_q.get()
                results_logger.write(
                    '%s, %d, %d\n' %
                    (episode_time.strftime("%Y-%m-%d %H:%M:%S"), reward,
                     length))
                results_logger.flush()

                self.total_frame_count += length
                self.episode_count.value += 1

                rolling_frame_count += length
                rolling_reward += reward

                # COPYPASTA FROM Server.py TODO: Refactor! #
                step = min(self.episode_count.value,
                           Config.ANNEALING_EPISODE_COUNT - 1)
                beta_multiplier = (Config.BETA_END - Config.BETA_START
                                   ) / Config.ANNEALING_EPISODE_COUNT
                beta = Config.BETA_START + beta_multiplier * step
                ###

                if results_q.full():
                    old_episode_time, old_reward, old_length = results_q.get()
                    rolling_frame_count -= old_length
                    rolling_reward -= old_reward
                    first_time = old_episode_time

                results_q.put((episode_time, reward, length))

                if self.episode_count.value % Config.SAVE_FREQUENCY == 0:
                    self.should_save_model.value = 1

                if self.episode_count.value % Config.PRINT_STATS_FREQUENCY == 0:
                    print('[Time: %8d] '
                          '[Episode: %8d Score: %10.4f] '
                          '[RScore: %10.4f RPPS: %5d] '
                          '[PPS: %5d TPS: %5d] '
                          '[NT: %2d NP: %2d NA: %2d] '
                          '[Beta: %5.4f] ' %
                          (int(time.time() - self.start_time),
                           self.episode_count.value, reward, rolling_reward /
                           results_q.qsize(), rolling_frame_count /
                           (datetime.now() - first_time).total_seconds(),
                           self.FPS(), self.TPS(), self.trainer_count.value,
                           self.predictor_count.value, self.agent_count.value,
                           beta))
                    sys.stdout.flush()
コード例 #4
0
    def run(self):
        #  try:
        with open(os.path.join(Config.LOGDIR, Config.RESULTS_FILENAME), 'a') as results_logger:
            # Init parameters
            rolling_frame_count = 0
            rolling_reward      = 0
            results_q           = queueQueue(maxsize=Config.STAT_ROLLING_MEAN_WINDOW)
            self.start_time     = time.time()
            first_time          = datetime.now()

            while True:
                episode_time, reward, length = self.episode_log_q.get()

                self.total_frame_count += length
                self.episode_count.value += 1

                rolling_frame_count += length
                rolling_reward += reward

                # Append episode_time, reward, length to results_q
                if results_q.full():
                    old_episode_time, old_reward, old_length = results_q.get()
                    rolling_frame_count -= old_length
                    rolling_reward -= old_reward
                    first_time = old_episode_time
                results_q.put((episode_time, reward, length))

                if self.episode_count.value % Config.SAVE_FREQUENCY == 0:
                    self.should_save_model.value = 1

                # Print result to table
                if self.episode_count.value % Config.PRINT_STATS_FREQUENCY == 0:
                    print('[Time: %8d] '
                          '[Episode: %8d Score: %10.4f] '
                          '[RScore: %10.4f RPPS: %5d] '
                          '[PPS: %5d TPS: %5d] '
                          '[NT: %2d NP: %2d NA: %2d]'
                        % (int(time.time()-self.start_time),
                           self.episode_count.value,
                           reward,
                           rolling_reward / results_q.qsize(),
                           rolling_frame_count / (datetime.now() - first_time).total_seconds(),
                           self.FPS(),
                           self.TPS(),
                           self.trainer_count.value,
                           self.predictor_count.value,
                           self.agent_count.value))
                    self.reward_log.value = reward
                    self.roll_reward_log.value = rolling_reward/results_q.qsize()

                    sys.stdout.flush()

                # Results_logger (results.txt)
                # Log date, rolling reward, length
                results_logger.write('%s, %10.4f, %d\n' % (episode_time.strftime("%Y-%m-%d %H:%M:%S"), rolling_reward / results_q.qsize(), length))
                results_logger.flush()
コード例 #5
0
    def run(self):
        with open(Config.RESULTS_FILENAME, 'a') as results_logger:
            rolling_frame_count = 0
            rolling_reward = 0
            results_q = queueQueue(maxsize=Config.STAT_ROLLING_MEAN_WINDOW)

            self.start_time = time.time()
            first_time = datetime.now()
            while True:
                episode_time, reward, length, iou, name, r0_, p_mask_ = self.episode_log_q.get(
                )
                results_logger.write(
                    '%s, %d, %d\n' %
                    (episode_time.strftime("%Y-%m-%d %H:%M:%S"), reward,
                     length))
                results_logger.flush()

                self.total_frame_count += length
                self.episode_count.value += 1

                rolling_frame_count += length
                rolling_reward += reward

                if results_q.full():
                    old_episode_time, old_reward, old_length, old_iou, old_name = results_q.get(
                    )
                    rolling_frame_count -= old_length
                    rolling_reward -= old_reward
                    first_time = old_episode_time

                results_q.put((episode_time, reward, length, iou, name))

                if self.episode_count.value % Config.SAVE_FREQUENCY == 0:
                    self.should_save_model.value = 1

                if self.episode_count.value % Config.PRINT_STATS_FREQUENCY == 0:
                    if iou >= 0.95 and reward < 0:
                        print(str(r0_) + '\n')
                        print(str(p_mask_) + '\n')
                    print('[Time: %8d] '
                          '[Episode: %8d Score: %10.4f] '
                          '[RScore: %10.4f RPPS: %5d] '
                          '[PPS: %5d TPS: %5d] '
                          '[NT: %2d NP: %2d NA: %2d]'
                          '[IOU: %10.2f]'
                          '[last: %s]' %
                          (int(time.time() - self.start_time),
                           self.episode_count.value, reward, rolling_reward /
                           results_q.qsize(), rolling_frame_count /
                           (datetime.now() - first_time).total_seconds(),
                           self.FPS(), self.TPS(), self.trainer_count.value,
                           self.predictor_count.value, self.agent_count.value,
                           iou, name))
                    #print(str(r0_) + '\n')
                    sys.stdout.flush()
コード例 #6
0
    def run(self):
        self.log_writer = tf.summary.FileWriter(Config.RESULT_DIR)
        with open(Config.RESULTS_FILENAME, 'a') as results_logger:
            rolling_frame_count = 0
            rolling_reward = 0
            results_q = queueQueue(maxsize=Config.STAT_ROLLING_MEAN_WINDOW)

            self.start_time = time.time()
            first_time = datetime.now()
            while True:
                episode_time, reward, length = self.episode_log_q.get()
                results_logger.write(
                    '%s, %d, %d\n' %
                    (episode_time.strftime("%Y-%m-%d %H:%M:%S"), reward,
                     length))
                results_logger.flush()

                self.total_frame_count += length
                self.episode_count.value += 1
                self.add_summary(self.episode_count.value, 'reward_vs_episode',
                                 reward, self.log_writer)

                rolling_frame_count += length
                rolling_reward += reward

                if results_q.full():
                    old_episode_time, old_reward, old_length = results_q.get()
                    rolling_frame_count -= old_length
                    rolling_reward -= old_reward
                    first_time = old_episode_time

                results_q.put((episode_time, reward, length))

                if self.episode_count.value % Config.SAVE_FREQUENCY == 0:
                    self.should_save_model.value = 1

                if self.episode_count.value % Config.PRINT_STATS_FREQUENCY == 0:
                    print('[Time: %8d] '
                          '[Episode: %8d Score: %10.4f] '
                          '[RScore: %10.4f RPPS: %5d] '
                          '[PPS: %5d TPS: %5d] '
                          '[NT: %2d NP: %2d NA: %2d]' %
                          (int(time.time() - self.start_time),
                           self.episode_count.value, reward, rolling_reward /
                           results_q.qsize(), rolling_frame_count /
                           (datetime.now() - first_time).total_seconds(),
                           self.FPS(), self.TPS(), self.trainer_count.value,
                           self.predictor_count.value, self.agent_count.value))
                    sys.stdout.flush()
コード例 #7
0
    def run(self):
        with open("/home/lsy/Desktop/results/%s" % Config.RESULTS_FILENAME, 'a') as results_logger:
            rolling_frame_count = 0
            rolling_reward = 0
            results_q = queueQueue(maxsize=Config.STAT_ROLLING_MEAN_WINDOW)
            
            self.start_time = time.time()
            first_time = datetime.now()
            while True:
                episode_time, reward, length, steps = self.episode_log_q.get()
                results_logger.write('%d\n' % (reward))
                print("reward:"+str(reward))
                results_logger.flush()

                self.total_frame_count += length
                self.episode_count.value += 1

                rolling_frame_count += length
                rolling_reward += reward

                if results_q.full():
                    old_episode_time, old_reward, old_length = results_q.get()
                    rolling_frame_count -= old_length
                    rolling_reward -= old_reward
                    first_time = old_episode_time

                results_q.put((episode_time, reward, length))

                if self.episode_count.value % Config.SAVE_FREQUENCY == 0:
                    self.should_save_model.value = 1

                if self.episode_count.value % Config.PRINT_STATS_FREQUENCY == 0:
                    print(
                        '[Time: %8d] '
                        '[Steps: %8d] '
                        '[Episode: %8d Score: %10.4f] '
                        '[RScore: %10.4f RPPS: %5d] '
                        '[PPS: %5d TPS: %5d] '
                        '[NT: %2d NP: %2d NA: %2d]'
                        % (int(time.time()-self.start_time),
                           steps,
                           self.episode_count.value, reward,
                           rolling_reward / results_q.qsize(),
                           rolling_frame_count / (datetime.now() - first_time).total_seconds(),
                           self.FPS(), self.TPS(),
                           self.trainer_count.value, self.predictor_count.value, self.agent_count.value))
                    sys.stdout.flush()
コード例 #8
0
ファイル: ProcessStats.py プロジェクト: superjax/NNOA
    def run(self):
        with open(Config.RESULTS_FILENAME, 'a') as results_logger:
            rolling_frame_count = 0
            rolling_reward = 0
            results_q = queueQueue(maxsize=Config.STAT_ROLLING_MEAN_WINDOW)
            
            self.start_time = time.time()
            first_time = datetime.now()
            while True:
                episode_time, reward, length = self.episode_log_q.get()
                results_logger.write('%s, %d, %d\n' % (episode_time.strftime("%Y-%m-%d %H:%M:%S"), reward, length))
                results_logger.flush()

                self.total_frame_count += length
                self.episode_count.value += 1

                rolling_frame_count += length
                rolling_reward += reward

                if results_q.full():
                    old_episode_time, old_reward, old_length = results_q.get()
                    rolling_frame_count -= old_length
                    rolling_reward -= old_reward
                    first_time = old_episode_time

                results_q.put((episode_time, reward, length))

                if self.episode_count.value % Config.SAVE_FREQUENCY == 0:
                    self.should_save_model.value = 1

                if self.episode_count.value % Config.PRINT_STATS_FREQUENCY == 0:
                    print(
                        '[Time: %8d] '
                        '[Episode: %8d Score: %10.4f] '
                        '[RScore: %10.4f RPPS: %5d] '
                        '[PPS: %5d TPS: %5d] '
                        '[NT: %2d NP: %2d NA: %2d]'
                        % (int(time.time()-self.start_time),
                           self.episode_count.value, reward,
                           rolling_reward / results_q.qsize(),
                           rolling_frame_count / (datetime.now() - first_time).total_seconds(),
                           self.FPS(), self.TPS(),
                           self.trainer_count.value, self.predictor_count.value, self.agent_count.value))
                    sys.stdout.flush()
コード例 #9
0
    def run(self):
        with open(Config.RESULTS_FILENAME, 'a') as results_logger:
            rolling_frame_count = 0
            rolling_reward = 0
            results_q = queueQueue(maxsize=Config.STAT_ROLLING_MEAN_WINDOW)

            self.start_time = time.time()
            first_time = datetime.now()
            while True:
                episode_time, reward = self.episode_log_q.get()
                results_logger.write(
                    '%s, %d\n' %
                    (episode_time.strftime("%Y-%m-%d %H:%M:%S"), reward))
                results_logger.flush()

                self.episode_count.value += 1

                rolling_reward += reward

                if results_q.full():
                    old_episode_time, old_reward = results_q.get()
                    rolling_reward -= old_reward
                    first_time = old_episode_time

                results_q.put((episode_time, reward))

                if self.episode_count.value % GlobalConfig.CHECKPOINT_MARK == 0:
                    self.should_save_model.value = 1

                if self.episode_count.value % Config.PRINT_STATS_FREQUENCY == 0:
                    print('[Time: %8d] '
                          '[Episode: %8d Score: %10.4f] '
                          '[RScore: %10.4f] '
                          '[TPS: %5d] '
                          '[NT: %2d NP: %2d NA: %2d]' %
                          (int(time.time() - self.start_time),
                           self.episode_count.value,
                           reward, rolling_reward / results_q.qsize(),
                           self.TPS(), self.trainer_count.value,
                           self.predictor_count.value, self.agent_count.value))
                    sys.stdout.flush()
コード例 #10
0
    def run(self):
        with open(Config.RESULTS_FILENAME, 'a') as results_logger:

            rolling_reward = 0
            results_q = queueQueue(maxsize=Config.STAT_ROLLING_MEAN_WINDOW)

            self.start_time = time.time()
            first_time = datetime.now()

            while not (self.exit_flag.value and self.episode_log_q.empty()):

                episode_time, player, pid, reward, length = self.episode_log_q.get(
                )
                results_logger.write(
                    '%s, %s, %s, %d, %d\n' %
                    (episode_time.strftime("%Y-%m-%d %H:%M:%S"), player, pid,
                     reward, length))
                results_logger.flush()

                self.episode_count.value += 1
                rolling_reward += reward

                if results_q.full():
                    old_episode_time, old_player, old_pid, old_reward, old_length = results_q.get(
                    )
                    rolling_reward -= old_reward
                    first_time = old_episode_time

                results_q.put((episode_time, player, pid, reward, length))

                if self.episode_count.value % Config.SAVE_FREQUENCY == 0:
                    self.should_save_model.value = 1

                if self.episode_count.value % Config.PRINT_STATS_FREQUENCY == 0:
                    print('[Time: %8d Episode: %8d] '
                          '[%s %s\'s Reward: %10.4f RRward: %10.4f] ' %
                          (int(time.time() - self.start_time),
                           self.episode_count.value, player, pid, reward,
                           rolling_reward / results_q.qsize()))
                    sys.stdout.flush()
コード例 #11
0
    def __init__(self):
        self.stats = ProcessStats()

        self.combining_q = Queue(maxsize=Config.MAX_QUEUE_SIZE)
        self.training_q = queueQueue(maxsize=Config.MAX_QUEUE_SIZE)
        self.prediction_q = Queue(maxsize=Config.MAX_QUEUE_SIZE)

        self.state2batch = {}

        self.model = NetworkVP(Config.DEVICE, Config.NETWORK_NAME,
                               Environment().get_num_actions())
        if Config.LOAD_CHECKPOINT:
            self.stats.episode_count.value = self.model.load()

        self.training_step = 0
        self.frame_counter = 0

        self.agents = []
        self.predictors = []
        self.trainers = []
        self.dynamic_adjustment = ThreadDynamicAdjustment(self)
        self.combiner = ThreadCombiner(self)
コード例 #12
0
    def run(self):

        rolling_frame_count = 0
        rolling_reward = 0
        results_q = queueQueue(maxsize=Config.STAT_ROLLING_MEAN_WINDOW)

        self.start_time = time.time()
        while self.exit_flag.value == 0:
            reward, length = self.episode_log_q.get()

            #self.total_frame_count += length
            self.episode_count.value += 1

            rolling_frame_count += length
            rolling_reward += reward

            if results_q.full():
                old_reward, old_length = results_q.get()
                rolling_frame_count -= old_length
                rolling_reward -= old_reward

            results_q.put((reward, length))

            if self.episode_count.value % Config.SAVE_FREQUENCY == 0:
                self.should_save_model.value = 1

            if self.episode_count.value % Config.PRINT_STATS_FREQUENCY == 0:
                print(
                    '[Time: %8d] '
                    '[Episode: %8d Score: %1d] '
                    '[RScore: %10.4f] '
                    % (int(time.time()-self.start_time),
                       self.episode_count.value, reward,
                       rolling_reward / results_q.qsize()))
                sys.stdout.flush()
        print('Exit stats')
コード例 #13
0
    def run(self):
        # randomly sleep up to 1 second. helps agents boot smoothly.
        time.sleep(np.random.rand())
        np.random.seed(np.int32(time.time() % 1 * 1000 + self.id * 10))

        with open(Config.RESULTS_FILENAME + self.role + str(self.id) + '.txt',
                  'a') as results_logger:
            # results logger
            rolling_reward = 0
            results_q = queueQueue(maxsize=Config.STAT_ROLLING_MEAN_WINDOW)
            self.start_time = time.time()
            first_time = datetime.now()

            while self.exit_flag.value == 0:

                if self.state.done:
                    continue
                if self.env.lock.value:
                    continue

                print('learning')
                total_reward = 0
                total_length = 0

                if self.local_episode_count % 10 == 0:
                    results_logger.write('episode %f\n' %
                                         (self.local_episode_count))
                    results_logger.flush()
                for x_, a_, r_, reward_sum in self.run_episode():
                    total_reward += reward_sum
                    total_length += len(
                        r_) + 1  # +1 for last frame that we drop
                    self.training_q.put((x_, a_, r_))
                    if self.local_episode_count % 10 == 0:
                        results_logger.write(
                            '%.2f %.2f %.2f %.2f %.2f %.2f\n' %
                            (x_[0][0], x_[0][1], x_[0][3], x_[0][4], x_[0][6],
                             x_[0][7]))
                        results_logger.flush()

                self.local_episode_count += 1
                with self.lock:
                    self.global_episode_count.value = self.local_episode_count
                rolling_reward += total_reward

                if results_q.full():
                    old_episode_time, old_reward, old_length = results_q.get()
                    rolling_reward -= old_reward
                    first_time = old_episode_time
                results_q.put((datetime.now(), total_reward, total_length))

                # if self.local_episode_count % Config.SAVE_FREQUENCY == 0:
                #     self.model.save(self.local_episode_count)
                #     print('running loc 3')

                if self.local_episode_count % Config.PRINT_STATS_FREQUENCY == 0:
                    print('[Time: %8d Episode: %8d] '
                          '[%s %s\'s Reward: %10.4f RRward: %10.4f] ' %
                          (int(time.time() - self.start_time),
                           self.local_episode_count, self.role, self.id,
                           total_reward, rolling_reward / results_q.qsize()))
                    sys.stdout.flush()
コード例 #14
0
ファイル: test.py プロジェクト: hsuyunyu/a3c_modelB
def test(args, shared_model, env_conf):
    ptitle('Test Agent')
    gpu_id = args.gpu_ids[-1]
    log = {}
    setup_logger('{}_log'.format(args.env),
                 r'{0}{1}_log'.format(args.log_dir, args.env))
    log['{}_log'.format(args.env)] = logging.getLogger('{}_log'.format(
        args.env))
    d_args = vars(args)
    for k in d_args.keys():
        log['{}_log'.format(args.env)].info('{0}: {1}'.format(k, d_args[k]))

    torch.manual_seed(args.seed)
    if gpu_id >= 0:
        torch.cuda.manual_seed(args.seed)
    env = Environment(Config.SHOW_MODE)  #(True) or False
    reward_sum = 0
    start_time = time.time()
    num_tests = 0
    reward_total_sum = 0
    player = Agent(None, env, args, None)
    player.gpu_id = gpu_id
    num_actions = env.get_num_actions()

    player.model = A3Clstm(Config.STACKED_FRAMES, num_actions)

    player.state, available = player.env.reset()
    # player.eps_len += 1
    player.state = torch.from_numpy(player.state).float()
    player.available = torch.from_numpy(available).float()
    if gpu_id >= 0:
        with torch.cuda.device(gpu_id):
            player.model = player.model.cuda()
            player.state = player.state.cuda()
            player.available = player.available.cuda()
    flag = True
    max_score = 0
    results_logger = open(Config.RESULTS_FILENAME, 'a')
    rolling_frame_count = 0
    rolling_reward = 0
    results_q = queueQueue(maxsize=Config.STAT_ROLLING_MEAN_WINDOW)
    while True:
        if flag:  # first load state
            if gpu_id >= 0:
                with torch.cuda.device(gpu_id):
                    player.model.load_state_dict(shared_model.state_dict())
            else:
                player.model.load_state_dict(shared_model.state_dict())
            player.model.eval()  # model變成测试模式
            flag = False

        player.action_test()
        reward_sum += player.reward

        if player.done and not player.info:
            state, available = player.env.reset()
            # player.eps_len += 1
            player.state = torch.from_numpy(state).float()
            player.available = torch.from_numpy(available).float()
            if gpu_id >= 0:
                with torch.cuda.device(gpu_id):
                    player.state = player.state.cuda()
                    player.available = player.available.cuda()
        elif player.info:
            flag = True
            num_tests += 1
            reward_total_sum += reward_sum
            reward_mean = reward_total_sum / num_tests

            rolling_frame_count += player.eps_len
            rolling_reward += reward_sum

            if results_q.full():
                old_length, old_reward = results_q.get()
                rolling_frame_count -= old_length
                rolling_reward -= old_reward
            results_q.put((player.eps_len, reward_sum))

            episode_time = int(
                time.time() - start_time
            )  # time.strftime("%H:%M:%S", time.gmtime(time.time() - start_time))
            log['{}_log'.format(args.env)].info(
                "Time {0:10d}, episode {1}, reward {2}, Step {3}, reward mean {4:.4f}, Rstep {5:.4f}, Rreward {6:.4f}"
                .format(episode_time, num_tests, reward_sum, player.eps_len,
                        reward_mean, (rolling_frame_count / results_q.qsize()),
                        (rolling_reward / results_q.qsize())))
            results_logger.write(
                '%d, %d, %10.4f, %d, %10.4f, %10.4f\n' %
                (episode_time, num_tests, reward_sum, player.eps_len,
                 player.envs_mean, player.envs_std))
            results_logger.flush()

            if args.save_max and reward_sum >= max_score:
                max_score = reward_sum
                if gpu_id >= 0:
                    with torch.cuda.device(gpu_id):
                        state_to_save = player.model.state_dict()
                        torch.save(
                            state_to_save,
                            '{0}{1}.dat'.format(args.save_model_dir, args.env))
                else:
                    state_to_save = player.model.state_dict()
                    torch.save(
                        state_to_save,
                        '{0}{1}.dat'.format(args.save_model_dir, args.env))

            reward_sum = 0
            player.eps_len = 0
            state, available = player.env.reset()
            # player.eps_len += 1
            time.sleep(1)
            player.state = torch.from_numpy(state).float()
            player.available = torch.from_numpy(available).float()
            if gpu_id >= 0:
                with torch.cuda.device(gpu_id):
                    player.state = player.state.cuda()
                    player.available = player.available.cuda()
    results_logger.close()