def run(self): with open(Config.RESULTS_FILENAME, 'a') as results_logger, open(Config.EPISDOES_LOG_FILENAME, 'a') as eval_logger: rolling_frame_count = 0 rolling_reward = 0 results_q = queueQueue(maxsize=Config.STAT_ROLLING_MEAN_WINDOW) episode_scores = [] self.start_time = time.time() first_time = datetime.now() while True: episode_time, reward, length = self.episode_log_q.get( ) # get episode log from queue results_logger.write( '%s, %d, %d\n' % (episode_time.strftime("%Y-%m-%d %H:%M:%S"), reward, length)) results_logger.flush() self.total_frame_count += length self.episode_count.value += 1 rolling_frame_count += length rolling_reward += reward if results_q.full(): old_episode_time, old_reward, old_length = results_q.get() rolling_frame_count -= old_length rolling_reward -= old_reward first_time = old_episode_time results_q.put((episode_time, reward, length)) episode_scores.append(reward) if len(episode_scores) >= 1000: eval_logger.write("{} {} {} {}\n".format( self.episode_count.value, np.max(episode_scores[-100:]), np.mean(episode_scores[-100:]), np.min(episode_scores[-100:]))) eval_logger.flush() episode_scores = [] if self.episode_count.value % Config.SAVE_FREQUENCY == 0: self.should_save_model.value = 1 if self.episode_count.value % Config.PRINT_STATS_FREQUENCY == 0: print('[Time: %8d] ' '[Episode: %8d Score: %10.4f] ' '[RScore: %10.4f RPPS: %5d] ' '[PPS: %5d TPS: %5d] ' '[NT: %2d NP: %2d NA: %2d]' % (int(time.time() - self.start_time), self.episode_count.value, reward, rolling_reward / results_q.qsize(), rolling_frame_count / (datetime.now() - first_time).total_seconds(), self.FPS(), self.TPS(), self.trainer_count.value, self.predictor_count.value, self.agent_count.value)) sys.stdout.flush()
def run(self): with open(Config.RESULTS_FILENAME, 'a') as results_logger: rolling_frame_count = 0 rolling_reward = 0 results_q = queueQueue(maxsize=Config.STAT_ROLLING_MEAN_WINDOW) rolling_distance = 0 rolling_success_rate = 0 max_success_rate = -1 self.start_time = time.time() first_time = datetime.now() while True: distance, reward, length = self.episode_log_q.get() results_logger.write('%s, %d, %d\n' % (distance, reward, length)) results_logger.flush() self.total_frame_count += length self.episode_count.value += 1 rolling_frame_count += length rolling_reward += reward rolling_distance += distance success_rate = 1 if distance <= 1 else 0 rolling_success_rate += success_rate if results_q.full(): old_distance, old_reward, old_length, old_success_rate = results_q.get() rolling_frame_count -= old_length rolling_reward -= old_reward first_time = old_distance rolling_distance -= old_distance rolling_success_rate -= old_success_rate results_q.put((distance, reward, length, success_rate)) self.mode.value = 0 if rolling_success_rate / results_q.qsize() > max_success_rate: max_success_rate = rolling_success_rate / results_q.qsize() self.mode.value = 1 if self.episode_count.value % Config.SAVE_FREQUENCY == 0: self.should_save_model.value = 1 if self.episode_count.value % Config.PRINT_STATS_FREQUENCY == 0: print( '[Time: %8d] ' '[Episode: %8d Score: %10.4f] ' '[RScore: %10.4f RPPS: %5d] ' '[PPS: %5d TPS: %5d] ' '[NT: %2d NP: %2d NA: %2d]' % (int(time.time()-self.start_time), self.episode_count.value, reward, rolling_reward / results_q.qsize(), 2313, # rolling_frame_count / (datetime.now() - first_time).total_seconds(), self.FPS(), self.TPS(), self.trainer_count.value, self.predictor_count.value, self.agent_count.value)) sys.stdout.flush()
def run(self): with open(Config.RESULTS_FILENAME, 'a') as results_logger: rolling_frame_count = 0 rolling_reward = 0 results_q = queueQueue(maxsize=Config.STAT_ROLLING_MEAN_WINDOW) self.start_time = time.time() first_time = datetime.now() while True: episode_time, reward, length = self.episode_log_q.get() results_logger.write( '%s, %d, %d\n' % (episode_time.strftime("%Y-%m-%d %H:%M:%S"), reward, length)) results_logger.flush() self.total_frame_count += length self.episode_count.value += 1 rolling_frame_count += length rolling_reward += reward # COPYPASTA FROM Server.py TODO: Refactor! # step = min(self.episode_count.value, Config.ANNEALING_EPISODE_COUNT - 1) beta_multiplier = (Config.BETA_END - Config.BETA_START ) / Config.ANNEALING_EPISODE_COUNT beta = Config.BETA_START + beta_multiplier * step ### if results_q.full(): old_episode_time, old_reward, old_length = results_q.get() rolling_frame_count -= old_length rolling_reward -= old_reward first_time = old_episode_time results_q.put((episode_time, reward, length)) if self.episode_count.value % Config.SAVE_FREQUENCY == 0: self.should_save_model.value = 1 if self.episode_count.value % Config.PRINT_STATS_FREQUENCY == 0: print('[Time: %8d] ' '[Episode: %8d Score: %10.4f] ' '[RScore: %10.4f RPPS: %5d] ' '[PPS: %5d TPS: %5d] ' '[NT: %2d NP: %2d NA: %2d] ' '[Beta: %5.4f] ' % (int(time.time() - self.start_time), self.episode_count.value, reward, rolling_reward / results_q.qsize(), rolling_frame_count / (datetime.now() - first_time).total_seconds(), self.FPS(), self.TPS(), self.trainer_count.value, self.predictor_count.value, self.agent_count.value, beta)) sys.stdout.flush()
def run(self): # try: with open(os.path.join(Config.LOGDIR, Config.RESULTS_FILENAME), 'a') as results_logger: # Init parameters rolling_frame_count = 0 rolling_reward = 0 results_q = queueQueue(maxsize=Config.STAT_ROLLING_MEAN_WINDOW) self.start_time = time.time() first_time = datetime.now() while True: episode_time, reward, length = self.episode_log_q.get() self.total_frame_count += length self.episode_count.value += 1 rolling_frame_count += length rolling_reward += reward # Append episode_time, reward, length to results_q if results_q.full(): old_episode_time, old_reward, old_length = results_q.get() rolling_frame_count -= old_length rolling_reward -= old_reward first_time = old_episode_time results_q.put((episode_time, reward, length)) if self.episode_count.value % Config.SAVE_FREQUENCY == 0: self.should_save_model.value = 1 # Print result to table if self.episode_count.value % Config.PRINT_STATS_FREQUENCY == 0: print('[Time: %8d] ' '[Episode: %8d Score: %10.4f] ' '[RScore: %10.4f RPPS: %5d] ' '[PPS: %5d TPS: %5d] ' '[NT: %2d NP: %2d NA: %2d]' % (int(time.time()-self.start_time), self.episode_count.value, reward, rolling_reward / results_q.qsize(), rolling_frame_count / (datetime.now() - first_time).total_seconds(), self.FPS(), self.TPS(), self.trainer_count.value, self.predictor_count.value, self.agent_count.value)) self.reward_log.value = reward self.roll_reward_log.value = rolling_reward/results_q.qsize() sys.stdout.flush() # Results_logger (results.txt) # Log date, rolling reward, length results_logger.write('%s, %10.4f, %d\n' % (episode_time.strftime("%Y-%m-%d %H:%M:%S"), rolling_reward / results_q.qsize(), length)) results_logger.flush()
def run(self): with open(Config.RESULTS_FILENAME, 'a') as results_logger: rolling_frame_count = 0 rolling_reward = 0 results_q = queueQueue(maxsize=Config.STAT_ROLLING_MEAN_WINDOW) self.start_time = time.time() first_time = datetime.now() while True: episode_time, reward, length, iou, name, r0_, p_mask_ = self.episode_log_q.get( ) results_logger.write( '%s, %d, %d\n' % (episode_time.strftime("%Y-%m-%d %H:%M:%S"), reward, length)) results_logger.flush() self.total_frame_count += length self.episode_count.value += 1 rolling_frame_count += length rolling_reward += reward if results_q.full(): old_episode_time, old_reward, old_length, old_iou, old_name = results_q.get( ) rolling_frame_count -= old_length rolling_reward -= old_reward first_time = old_episode_time results_q.put((episode_time, reward, length, iou, name)) if self.episode_count.value % Config.SAVE_FREQUENCY == 0: self.should_save_model.value = 1 if self.episode_count.value % Config.PRINT_STATS_FREQUENCY == 0: if iou >= 0.95 and reward < 0: print(str(r0_) + '\n') print(str(p_mask_) + '\n') print('[Time: %8d] ' '[Episode: %8d Score: %10.4f] ' '[RScore: %10.4f RPPS: %5d] ' '[PPS: %5d TPS: %5d] ' '[NT: %2d NP: %2d NA: %2d]' '[IOU: %10.2f]' '[last: %s]' % (int(time.time() - self.start_time), self.episode_count.value, reward, rolling_reward / results_q.qsize(), rolling_frame_count / (datetime.now() - first_time).total_seconds(), self.FPS(), self.TPS(), self.trainer_count.value, self.predictor_count.value, self.agent_count.value, iou, name)) #print(str(r0_) + '\n') sys.stdout.flush()
def run(self): self.log_writer = tf.summary.FileWriter(Config.RESULT_DIR) with open(Config.RESULTS_FILENAME, 'a') as results_logger: rolling_frame_count = 0 rolling_reward = 0 results_q = queueQueue(maxsize=Config.STAT_ROLLING_MEAN_WINDOW) self.start_time = time.time() first_time = datetime.now() while True: episode_time, reward, length = self.episode_log_q.get() results_logger.write( '%s, %d, %d\n' % (episode_time.strftime("%Y-%m-%d %H:%M:%S"), reward, length)) results_logger.flush() self.total_frame_count += length self.episode_count.value += 1 self.add_summary(self.episode_count.value, 'reward_vs_episode', reward, self.log_writer) rolling_frame_count += length rolling_reward += reward if results_q.full(): old_episode_time, old_reward, old_length = results_q.get() rolling_frame_count -= old_length rolling_reward -= old_reward first_time = old_episode_time results_q.put((episode_time, reward, length)) if self.episode_count.value % Config.SAVE_FREQUENCY == 0: self.should_save_model.value = 1 if self.episode_count.value % Config.PRINT_STATS_FREQUENCY == 0: print('[Time: %8d] ' '[Episode: %8d Score: %10.4f] ' '[RScore: %10.4f RPPS: %5d] ' '[PPS: %5d TPS: %5d] ' '[NT: %2d NP: %2d NA: %2d]' % (int(time.time() - self.start_time), self.episode_count.value, reward, rolling_reward / results_q.qsize(), rolling_frame_count / (datetime.now() - first_time).total_seconds(), self.FPS(), self.TPS(), self.trainer_count.value, self.predictor_count.value, self.agent_count.value)) sys.stdout.flush()
def run(self): with open("/home/lsy/Desktop/results/%s" % Config.RESULTS_FILENAME, 'a') as results_logger: rolling_frame_count = 0 rolling_reward = 0 results_q = queueQueue(maxsize=Config.STAT_ROLLING_MEAN_WINDOW) self.start_time = time.time() first_time = datetime.now() while True: episode_time, reward, length, steps = self.episode_log_q.get() results_logger.write('%d\n' % (reward)) print("reward:"+str(reward)) results_logger.flush() self.total_frame_count += length self.episode_count.value += 1 rolling_frame_count += length rolling_reward += reward if results_q.full(): old_episode_time, old_reward, old_length = results_q.get() rolling_frame_count -= old_length rolling_reward -= old_reward first_time = old_episode_time results_q.put((episode_time, reward, length)) if self.episode_count.value % Config.SAVE_FREQUENCY == 0: self.should_save_model.value = 1 if self.episode_count.value % Config.PRINT_STATS_FREQUENCY == 0: print( '[Time: %8d] ' '[Steps: %8d] ' '[Episode: %8d Score: %10.4f] ' '[RScore: %10.4f RPPS: %5d] ' '[PPS: %5d TPS: %5d] ' '[NT: %2d NP: %2d NA: %2d]' % (int(time.time()-self.start_time), steps, self.episode_count.value, reward, rolling_reward / results_q.qsize(), rolling_frame_count / (datetime.now() - first_time).total_seconds(), self.FPS(), self.TPS(), self.trainer_count.value, self.predictor_count.value, self.agent_count.value)) sys.stdout.flush()
def run(self): with open(Config.RESULTS_FILENAME, 'a') as results_logger: rolling_frame_count = 0 rolling_reward = 0 results_q = queueQueue(maxsize=Config.STAT_ROLLING_MEAN_WINDOW) self.start_time = time.time() first_time = datetime.now() while True: episode_time, reward, length = self.episode_log_q.get() results_logger.write('%s, %d, %d\n' % (episode_time.strftime("%Y-%m-%d %H:%M:%S"), reward, length)) results_logger.flush() self.total_frame_count += length self.episode_count.value += 1 rolling_frame_count += length rolling_reward += reward if results_q.full(): old_episode_time, old_reward, old_length = results_q.get() rolling_frame_count -= old_length rolling_reward -= old_reward first_time = old_episode_time results_q.put((episode_time, reward, length)) if self.episode_count.value % Config.SAVE_FREQUENCY == 0: self.should_save_model.value = 1 if self.episode_count.value % Config.PRINT_STATS_FREQUENCY == 0: print( '[Time: %8d] ' '[Episode: %8d Score: %10.4f] ' '[RScore: %10.4f RPPS: %5d] ' '[PPS: %5d TPS: %5d] ' '[NT: %2d NP: %2d NA: %2d]' % (int(time.time()-self.start_time), self.episode_count.value, reward, rolling_reward / results_q.qsize(), rolling_frame_count / (datetime.now() - first_time).total_seconds(), self.FPS(), self.TPS(), self.trainer_count.value, self.predictor_count.value, self.agent_count.value)) sys.stdout.flush()
def run(self): with open(Config.RESULTS_FILENAME, 'a') as results_logger: rolling_frame_count = 0 rolling_reward = 0 results_q = queueQueue(maxsize=Config.STAT_ROLLING_MEAN_WINDOW) self.start_time = time.time() first_time = datetime.now() while True: episode_time, reward = self.episode_log_q.get() results_logger.write( '%s, %d\n' % (episode_time.strftime("%Y-%m-%d %H:%M:%S"), reward)) results_logger.flush() self.episode_count.value += 1 rolling_reward += reward if results_q.full(): old_episode_time, old_reward = results_q.get() rolling_reward -= old_reward first_time = old_episode_time results_q.put((episode_time, reward)) if self.episode_count.value % GlobalConfig.CHECKPOINT_MARK == 0: self.should_save_model.value = 1 if self.episode_count.value % Config.PRINT_STATS_FREQUENCY == 0: print('[Time: %8d] ' '[Episode: %8d Score: %10.4f] ' '[RScore: %10.4f] ' '[TPS: %5d] ' '[NT: %2d NP: %2d NA: %2d]' % (int(time.time() - self.start_time), self.episode_count.value, reward, rolling_reward / results_q.qsize(), self.TPS(), self.trainer_count.value, self.predictor_count.value, self.agent_count.value)) sys.stdout.flush()
def run(self): with open(Config.RESULTS_FILENAME, 'a') as results_logger: rolling_reward = 0 results_q = queueQueue(maxsize=Config.STAT_ROLLING_MEAN_WINDOW) self.start_time = time.time() first_time = datetime.now() while not (self.exit_flag.value and self.episode_log_q.empty()): episode_time, player, pid, reward, length = self.episode_log_q.get( ) results_logger.write( '%s, %s, %s, %d, %d\n' % (episode_time.strftime("%Y-%m-%d %H:%M:%S"), player, pid, reward, length)) results_logger.flush() self.episode_count.value += 1 rolling_reward += reward if results_q.full(): old_episode_time, old_player, old_pid, old_reward, old_length = results_q.get( ) rolling_reward -= old_reward first_time = old_episode_time results_q.put((episode_time, player, pid, reward, length)) if self.episode_count.value % Config.SAVE_FREQUENCY == 0: self.should_save_model.value = 1 if self.episode_count.value % Config.PRINT_STATS_FREQUENCY == 0: print('[Time: %8d Episode: %8d] ' '[%s %s\'s Reward: %10.4f RRward: %10.4f] ' % (int(time.time() - self.start_time), self.episode_count.value, player, pid, reward, rolling_reward / results_q.qsize())) sys.stdout.flush()
def __init__(self): self.stats = ProcessStats() self.combining_q = Queue(maxsize=Config.MAX_QUEUE_SIZE) self.training_q = queueQueue(maxsize=Config.MAX_QUEUE_SIZE) self.prediction_q = Queue(maxsize=Config.MAX_QUEUE_SIZE) self.state2batch = {} self.model = NetworkVP(Config.DEVICE, Config.NETWORK_NAME, Environment().get_num_actions()) if Config.LOAD_CHECKPOINT: self.stats.episode_count.value = self.model.load() self.training_step = 0 self.frame_counter = 0 self.agents = [] self.predictors = [] self.trainers = [] self.dynamic_adjustment = ThreadDynamicAdjustment(self) self.combiner = ThreadCombiner(self)
def run(self): rolling_frame_count = 0 rolling_reward = 0 results_q = queueQueue(maxsize=Config.STAT_ROLLING_MEAN_WINDOW) self.start_time = time.time() while self.exit_flag.value == 0: reward, length = self.episode_log_q.get() #self.total_frame_count += length self.episode_count.value += 1 rolling_frame_count += length rolling_reward += reward if results_q.full(): old_reward, old_length = results_q.get() rolling_frame_count -= old_length rolling_reward -= old_reward results_q.put((reward, length)) if self.episode_count.value % Config.SAVE_FREQUENCY == 0: self.should_save_model.value = 1 if self.episode_count.value % Config.PRINT_STATS_FREQUENCY == 0: print( '[Time: %8d] ' '[Episode: %8d Score: %1d] ' '[RScore: %10.4f] ' % (int(time.time()-self.start_time), self.episode_count.value, reward, rolling_reward / results_q.qsize())) sys.stdout.flush() print('Exit stats')
def run(self): # randomly sleep up to 1 second. helps agents boot smoothly. time.sleep(np.random.rand()) np.random.seed(np.int32(time.time() % 1 * 1000 + self.id * 10)) with open(Config.RESULTS_FILENAME + self.role + str(self.id) + '.txt', 'a') as results_logger: # results logger rolling_reward = 0 results_q = queueQueue(maxsize=Config.STAT_ROLLING_MEAN_WINDOW) self.start_time = time.time() first_time = datetime.now() while self.exit_flag.value == 0: if self.state.done: continue if self.env.lock.value: continue print('learning') total_reward = 0 total_length = 0 if self.local_episode_count % 10 == 0: results_logger.write('episode %f\n' % (self.local_episode_count)) results_logger.flush() for x_, a_, r_, reward_sum in self.run_episode(): total_reward += reward_sum total_length += len( r_) + 1 # +1 for last frame that we drop self.training_q.put((x_, a_, r_)) if self.local_episode_count % 10 == 0: results_logger.write( '%.2f %.2f %.2f %.2f %.2f %.2f\n' % (x_[0][0], x_[0][1], x_[0][3], x_[0][4], x_[0][6], x_[0][7])) results_logger.flush() self.local_episode_count += 1 with self.lock: self.global_episode_count.value = self.local_episode_count rolling_reward += total_reward if results_q.full(): old_episode_time, old_reward, old_length = results_q.get() rolling_reward -= old_reward first_time = old_episode_time results_q.put((datetime.now(), total_reward, total_length)) # if self.local_episode_count % Config.SAVE_FREQUENCY == 0: # self.model.save(self.local_episode_count) # print('running loc 3') if self.local_episode_count % Config.PRINT_STATS_FREQUENCY == 0: print('[Time: %8d Episode: %8d] ' '[%s %s\'s Reward: %10.4f RRward: %10.4f] ' % (int(time.time() - self.start_time), self.local_episode_count, self.role, self.id, total_reward, rolling_reward / results_q.qsize())) sys.stdout.flush()
def test(args, shared_model, env_conf): ptitle('Test Agent') gpu_id = args.gpu_ids[-1] log = {} setup_logger('{}_log'.format(args.env), r'{0}{1}_log'.format(args.log_dir, args.env)) log['{}_log'.format(args.env)] = logging.getLogger('{}_log'.format( args.env)) d_args = vars(args) for k in d_args.keys(): log['{}_log'.format(args.env)].info('{0}: {1}'.format(k, d_args[k])) torch.manual_seed(args.seed) if gpu_id >= 0: torch.cuda.manual_seed(args.seed) env = Environment(Config.SHOW_MODE) #(True) or False reward_sum = 0 start_time = time.time() num_tests = 0 reward_total_sum = 0 player = Agent(None, env, args, None) player.gpu_id = gpu_id num_actions = env.get_num_actions() player.model = A3Clstm(Config.STACKED_FRAMES, num_actions) player.state, available = player.env.reset() # player.eps_len += 1 player.state = torch.from_numpy(player.state).float() player.available = torch.from_numpy(available).float() if gpu_id >= 0: with torch.cuda.device(gpu_id): player.model = player.model.cuda() player.state = player.state.cuda() player.available = player.available.cuda() flag = True max_score = 0 results_logger = open(Config.RESULTS_FILENAME, 'a') rolling_frame_count = 0 rolling_reward = 0 results_q = queueQueue(maxsize=Config.STAT_ROLLING_MEAN_WINDOW) while True: if flag: # first load state if gpu_id >= 0: with torch.cuda.device(gpu_id): player.model.load_state_dict(shared_model.state_dict()) else: player.model.load_state_dict(shared_model.state_dict()) player.model.eval() # model變成测试模式 flag = False player.action_test() reward_sum += player.reward if player.done and not player.info: state, available = player.env.reset() # player.eps_len += 1 player.state = torch.from_numpy(state).float() player.available = torch.from_numpy(available).float() if gpu_id >= 0: with torch.cuda.device(gpu_id): player.state = player.state.cuda() player.available = player.available.cuda() elif player.info: flag = True num_tests += 1 reward_total_sum += reward_sum reward_mean = reward_total_sum / num_tests rolling_frame_count += player.eps_len rolling_reward += reward_sum if results_q.full(): old_length, old_reward = results_q.get() rolling_frame_count -= old_length rolling_reward -= old_reward results_q.put((player.eps_len, reward_sum)) episode_time = int( time.time() - start_time ) # time.strftime("%H:%M:%S", time.gmtime(time.time() - start_time)) log['{}_log'.format(args.env)].info( "Time {0:10d}, episode {1}, reward {2}, Step {3}, reward mean {4:.4f}, Rstep {5:.4f}, Rreward {6:.4f}" .format(episode_time, num_tests, reward_sum, player.eps_len, reward_mean, (rolling_frame_count / results_q.qsize()), (rolling_reward / results_q.qsize()))) results_logger.write( '%d, %d, %10.4f, %d, %10.4f, %10.4f\n' % (episode_time, num_tests, reward_sum, player.eps_len, player.envs_mean, player.envs_std)) results_logger.flush() if args.save_max and reward_sum >= max_score: max_score = reward_sum if gpu_id >= 0: with torch.cuda.device(gpu_id): state_to_save = player.model.state_dict() torch.save( state_to_save, '{0}{1}.dat'.format(args.save_model_dir, args.env)) else: state_to_save = player.model.state_dict() torch.save( state_to_save, '{0}{1}.dat'.format(args.save_model_dir, args.env)) reward_sum = 0 player.eps_len = 0 state, available = player.env.reset() # player.eps_len += 1 time.sleep(1) player.state = torch.from_numpy(state).float() player.available = torch.from_numpy(available).float() if gpu_id >= 0: with torch.cuda.device(gpu_id): player.state = player.state.cuda() player.available = player.available.cuda() results_logger.close()