def test(args, shared_model, env_conf): log = {} setup_logger('{}_log'.format(args.env), r'{0}{1}_{2}workers_log'.format(args.log_dir, args.env, args.workers)) log['{}_log'.format(args.env)] = logging.getLogger( '{}_log'.format(args.env)) d_args = vars(args) for k in d_args.keys(): log['{}_log'.format(args.env)].info('{0}: {1}'.format(k, d_args[k])) torch.manual_seed(args.seed) env = atari_env(args.env, env_conf) reward_sum = 0 start_time = time.time() num_tests = 0 reward_total_sum = 0 player = Agent(None, env, args, None) player.model = A3Clstm( player.env.observation_space.shape[0], player.env.action_space) player.state = player.env.reset() player.state = torch.from_numpy(player.state).float() player.model.eval() while True: if player.done: player.model.load_state_dict(shared_model.state_dict()) player.action_test() reward_sum += player.reward if player.done: num_tests += 1 player.current_life = 0 reward_total_sum += reward_sum reward_mean = reward_total_sum / num_tests log['{}_log'.format(args.env)].info( "Time {0}, episode reward {1}, episode length {2}, reward mean {3:.4f}". format( time.strftime("%Hh %Mm %Ss", time.gmtime(time.time() - start_time)), reward_sum, player.eps_len, reward_mean)) if reward_sum > args.save_score_level: player.model.load_state_dict(shared_model.state_dict()) state_to_save = player.model.state_dict() torch.save(state_to_save, '{0}{1}.dat'.format( args.save_model_dir, args.env)) reward_sum = 0 player.eps_len = 0 state = player.env.reset() time.sleep(60) player.state = torch.from_numpy(state).float()
def test(self, iteration, show='none', save_max=False): env = create_env(self.args) player = Agent(None, env, self.args, None) player.gpu_id = self.gpu_id if self.args.model == 'MLP': player.model = A3C_MLP( player.env.observation_space.shape[0], player.env.action_space, self.args.stack_frames) if self.args.model == 'CONV': player.model = A3C_CONV(self.args.stack_frames, player.env.action_space) # load the input model if self.gpu_id >= 0: with torch.cuda.device(self.gpu_id): player.model.load_state_dict(self.shared_model.state_dict()) else: player.model.load_state_dict(self.shared_model.state_dict()) player.state = player.env.reset(self.args) player.state = torch.from_numpy(player.state).float() if self.gpu_id >= 0: with torch.cuda.device(self.gpu_id): player.model = player.model.cuda() player.state = player.state.cuda() player.model.eval() while True: player.action_test() if self.args.show != 'none' or show != 'none': player.env.render() self.reward_sum += player.reward if player.done: self.num_tests += 1 self.reward_total_sum += self.reward_sum reward_mean = self.reward_total_sum / self.num_tests self.reward_sum = 0 player.eps_len = 0 state = player.env.reset(self.args) player.state = torch.from_numpy(state).float() if self.gpu_id >= 0: with torch.cuda.device(self.gpu_id): player.state = player.state.cuda() if self.args.show != 'none' or show != 'none': player.env.close() break return self.reward_total_sum
def test(args, shared_model, env_conf): ptitle('Test Agent') gpu_id = args.gpu_ids[-1] log = {} setup_logger('{}_log'.format(args.env), r'{0}{1}_log'.format(args.log_dir, args.env)) log['{}_log'.format(args.env)] = logging.getLogger( '{}_log'.format(args.env)) d_args = vars(args) for k in d_args.keys(): log['{}_log'.format(args.env)].info('{0}: {1}'.format(k, d_args[k])) torch.manual_seed(args.seed) if gpu_id >= 0: torch.cuda.manual_seed(args.seed) print("test proc:") env = AllowBacktracking(make_local_env(env_conf['game'], env_conf['level'], stack=False, scale_rew=False)) print("test got env:", env.observation_space) reward_sum = 0 start_time = time.time() num_tests = 0 reward_total_sum = 0 player = Agent(None, env, args, None) player.gpu_id = gpu_id player.model = A3Clstm( player.env.observation_space.shape[0], player.env.action_space) player.state = player.env.reset() player.eps_len += 2 player.state = torch.from_numpy(player.state).float() if gpu_id >= 0: with torch.cuda.device(gpu_id): player.model = player.model.cuda() player.state = player.state.cuda() flag = True max_score = 0 while True: if flag: if gpu_id >= 0: with torch.cuda.device(gpu_id): player.model.load_state_dict(shared_model.state_dict()) else: player.model.load_state_dict(shared_model.state_dict()) player.model.eval() flag = False player.action_test() reward_sum += player.reward """ if player.done and player.info['ale.lives'] > 0 and not player.max_length: state = player.env.reset() player.eps_len += 2 player.state = torch.from_numpy(state).float() if gpu_id >= 0: with torch.cuda.device(gpu_id): player.state = player.state.cuda() """ if player.done or player.max_length: flag = True num_tests += 1 reward_total_sum += reward_sum reward_mean = reward_total_sum / num_tests log['{}_log'.format(args.env)].info( "Time {0}, episode reward {1}, episode length {2}, reward mean {3:.4f}". format( time.strftime("%Hh %Mm %Ss", time.gmtime(time.time() - start_time)), reward_sum, player.eps_len, reward_mean)) if args.save_max and reward_sum >= max_score: max_score = reward_sum if gpu_id >= 0: with torch.cuda.device(gpu_id): state_to_save = player.model.state_dict() torch.save(state_to_save, '{0}{1}.dat'.format(args.save_model_dir, args.env)) else: state_to_save = player.model.state_dict() torch.save(state_to_save, '{0}{1}.dat'.format(args.save_model_dir, args.env)) reward_sum = 0 player.eps_len = 0 state = player.env.reset() player.eps_len += 2 time.sleep(10) player.state = torch.from_numpy(state).float() if gpu_id >= 0: with torch.cuda.device(gpu_id): player.state = player.state.cuda()
def test(args, shared_model, env_conf, shared_counter): ptitle('Test Agent') gpu_id = args.gpu_ids[-1] device = torch.device('cuda:{}'.format(gpu_id) if gpu_id >= 0 else 'cpu') log = {} setup_logger( '{}_log'.format(args.env), os.path.join(args.log_dir, '{}-{}_log'.format(args.env, args.exp_name))) log['{}_log'.format(args.env)] = logging.getLogger('{}_log'.format( args.env)) d_args = vars(args) for k in d_args.keys(): log['{}_log'.format(args.env)].info('{0}: {1}'.format(k, d_args[k])) torch.manual_seed(args.seed) torch.cuda.manual_seed(args.seed) env = atari_env(args.env, env_conf, args) reward_sum = 0 start_time = time.time() num_tests = 0 reward_total_sum = 0 player = Agent(None, env, args, None, gpu_id=gpu_id) player.model = A3Clstm(player.env.observation_space.shape[0], player.env.action_space) player.model.apply(weights_init) player.state = player.env.reset() player.eps_len += 2 player.state = torch.from_numpy(player.state).to(torch.float32) player.model = player.model.to(device) player.state = player.state.to(device) flag = True max_score = 0 while True: if flag: player.model.load_state_dict(shared_model.state_dict()) player.model.eval() flag = False player.action_test() reward_sum += player.reward if player.done and not player.info: state = player.env.reset() player.eps_len += 2 player.state = torch.from_numpy(state).to(torch.float32) player.state = player.state.to(device) elif player.info: flag = True num_tests += 1 reward_total_sum += reward_sum reward_mean = reward_total_sum / num_tests log['{}_log'.format(args.env)].info( "Time {0}, episode reward {1}, episode length {2}, reward mean {3:.4f}, alpha {4:.4f}" .format( time.strftime("%Hh %Mm %Ss", time.gmtime(time.time() - start_time)), reward_sum, player.eps_len, reward_mean, player.model.log_alpha.exp().detach().item())) if args.save_max and reward_sum >= max_score: max_score = reward_sum torch.save( player.model.state_dict(), os.path.join(args.save_model_dir, '{}-{}.dat'.format(args.env, args.exp_name))) with shared_counter.get_lock(): shared_counter.value += player.eps_len if shared_counter.value > args.interact_steps: break reward_sum = 0 player.eps_len = 0 state = player.env.reset() player.eps_len += 2 time.sleep(10) player.state = torch.from_numpy(state).to(torch.float32) player.state = player.state.to(device)
def test(args, shared_model): ptitle('Test Agent') gpu_id = args.gpu_ids[-1] log = {} setup_logger('{}_log'.format(args.env), r'{0}{1}_log'.format(args.log_dir, args.env)) log['{}_log'.format(args.env)] = logging.getLogger( '{}_log'.format(args.env)) d_args = vars(args) for k in d_args.keys(): log['{}_log'.format(args.env)].info('{0}: {1}'.format(k, d_args[k])) torch.manual_seed(args.seed) if gpu_id >= 0: torch.cuda.manual_seed(args.seed) env = create_env(args.env, args) reward_sum = 0 start_time = time.time() num_tests = 0 reward_total_sum = 0 player = Agent(None, env, args, None) player.gpu_id = gpu_id if args.model == 'MLP': player.model = A3C_MLP( player.env.observation_space.shape[0], player.env.action_space, args.stack_frames) if args.model == 'CONV': player.model = A3C_CONV(args.stack_frames, player.env.action_space) player.state = player.env.reset() player.state = torch.from_numpy(player.state).float() if gpu_id >= 0: with torch.cuda.device(gpu_id): player.model = player.model.cuda() player.state = player.state.cuda() player.model.eval() while True: if player.done: if gpu_id >= 0: with torch.cuda.device(gpu_id): player.model.load_state_dict(shared_model.state_dict()) else: player.model.load_state_dict(shared_model.state_dict()) player.action_test() reward_sum += player.reward if player.done: num_tests += 1 reward_total_sum += reward_sum reward_mean = reward_total_sum / num_tests log['{}_log'.format(args.env)].info( "Time {0}, episode reward {1}, episode length {2}, reward mean {3:.4f}". format( time.strftime("%Hh %Mm %Ss", time.gmtime(time.time() - start_time)), reward_sum, player.eps_len, reward_mean)) if reward_sum > args.save_score_level: player.model.load_state_dict(shared_model.state_dict()) state_to_save = player.model.state_dict() torch.save(state_to_save, '{0}{1}.dat'.format( args.save_model_dir, args.env)) reward_sum = 0 player.eps_len = 0 state = player.env.reset() time.sleep(60) player.state = torch.from_numpy(state).float() if gpu_id >= 0: with torch.cuda.device(gpu_id): player.state = player.state.cuda()
def test(rank, args, shared_model): writer = SummaryWriter('8_27_test') model_buffer = Model_Buffer(args) test_episodes = args.test_episodes ptitle('Test Agent') log = {} setup_logger('{}_log'.format(args.env), r'{0}{1}_log'.format(args.log_dir, args.env)) print("logfile check", r'{0} {1}_log'.format(args.log_dir, args.env)) print("logs in test", args.log_dir) log['{}_log'.format(args.env)] = logging.getLogger( # 将logger放进字典 '{}_log'.format(args.env)) d_args = vars(args) # vars() 函数返回对象object的属性和属性值的字典对象。 for k in d_args.keys(): log['{}_log'.format(args.env)].info('{0}: {1}'.format( k, d_args[k])) # 输出参数信息 # for i in range(100): # log['{}_log'.format(args.env)].info('{0}'.format(i)) # print('we prefix seed = -1 when testing') # args.seed = -1 torch.manual_seed(args.seed) env = create_env(args.env, args.seed) # env = gym.make(args.env) # env.seed(args.seed) start_time = time.time() num_tests = 0 # 当前玩的回合数 player = Agent(None, env, args, None, rank) player.model = A3C_MLP(player.env.observation_space, player.env.action_space, args.stack_frames) # 设置model player.state = player.env.reset() # 设置state player.state = torch.from_numpy(player.state).float() player.done = True player.model.eval() # 设为eval模式 is_model_empty = True is_testing = False while True: model_buffer.put(shared_model) # 测试够一大回合,初始化 if player.done and np.mod(num_tests, test_episodes) == 0 and not is_testing: reward_episode = 0 success_rate = 0 load_model = model_buffer.get() # 获取公共model model_queue_size = model_buffer.qsize() if load_model: is_testing = True is_model_empty = False training_steps = load_model[1] training_episodes = load_model[2] # 用公共model实例化player_model(传入参数) load_model[0]保存的参数 player.model.load_state_dict(load_model[0]) else: is_model_empty = True # 未获取到model time.sleep(10) if not is_model_empty: player.action_test() # log['{}_log'.format(args.env)].info("test steps {}".format(1)) reward_episode += player.reward if 'is_success' in player.info.keys(): # 判断是否因成功而done success_rate += 1 if player.done: # 到达目标位置或撞毁或太远时为done,一回合结束 # print("crash detected") # eps_len_temp = player.eps_len #? num_tests += 1 # done时test回合数加一 player.eps_len = 0 # player这一回合所走的步数归零 state = player.env.reset() player.state = torch.from_numpy(state).float() if np.mod(num_tests, test_episodes) == 0: # 测试够一大回合,开始统计信息 is_testing = False reward_episode = reward_episode / test_episodes writer.add_scalar('success_num/Test', success_rate, training_steps) success_rate = success_rate / test_episodes log['{}_log'.format(args.env)].info( "Time {0}, training episodes {1}, training steps {2}, reward episode {3}, success_rate {4}, " "model cached {5}".format( time.strftime( "%Hh %Mm %Ss", time.gmtime(time.time() - start_time)), training_episodes, training_steps, reward_episode, success_rate, model_queue_size)) writer.add_scalar('success_rate/Test', success_rate, training_steps) # save model: state_to_save = player.model.state_dict() # torch.save(state_to_save, '{0}{1}.dat'.format(args.save_model_dir, args.env)) # torch.save(state_to_save, '{0}{1}_pre.dat'.format(args.save_model_dir, args.env)) torch.save(state_to_save, '{0}{1}.dat'.format(args.log_dir, args.env)) torch.save(state_to_save, '{0}{1}_pre.dat'.format(args.log_dir, args.env)) if training_steps > args.training_steps: break
def test(rank, args, shared_model, train_modes, n_iters, device): writer = SummaryWriter( os.path.join(args.log_dir, 'Test Agent:{}'.format(rank))) ptitle('Test Agent: {}'.format(rank)) torch.manual_seed(args.seed + rank) n_iter = 0 log = {} setup_logger('{}_log'.format(args.env), r'{0}/logger'.format(args.log_dir)) log['{}_log'.format(args.env)] = logging.getLogger('{}_log'.format( args.env)) d_args = vars(args) for k in d_args.keys(): log['{}_log'.format(args.env)].info('{0}: {1}'.format(k, d_args[k])) torch.manual_seed(args.seed) env = create_env(args.env, args) start_time = time.time() num_tests = 1 n_step = 0 player = Agent(None, env, args, None, None, device) player.model = build_model(player.env.observation_space, player.env.action_space, args, device).to(device) player.state = player.env.reset() if 'Unreal' in args.env: player.cam_pos = player.env.env.env.env.cam_pose player.collect_state = player.env.env.env.env.current_states player.set_cam_info() player.state = torch.from_numpy(player.state).float().to(device) player.model.eval() max_score = -100 reward_sum = np.zeros(player.num_agents) reward_total_sum = np.zeros(player.num_agents) reward_sum_ep = np.zeros(player.num_agents) success_rate_sum_ep = np.zeros(player.num_agents) fps_counter = 0 t0 = time.time() cross_entropy_loss = nn.CrossEntropyLoss() len_sum = 0 seed = args.seed count_eps = 0 eps_length = 0 rate = 0 rates = [0, 0] step_rates = [0, 0] mean_rates = [0, 0] visible_steps = 0 while True: if player.done: count_eps += 1 t0 = time.time() eps_length = 0 player.model.load_state_dict(shared_model.state_dict()) player.action_test() eps_length += 1 n_step += 1 fps_counter += 1 reward_sum_ep += player.reward success_rate_sum_ep += player.success_rate gate_ids, gate_probs, gt_gates = [], [], [] for k1 in range(len(player.rewards)): for k2 in range(player.num_agents): _, max_id = torch.max(player.gates[k1][k2], 0) gate_probs.append(player.gates[k1][k2]) gate_ids.append(max_id) gt_gates.append(player.gate_gts[k1][k2]) gate_probs = torch.cat(gate_probs).view(-1, 2).to(device) gate_gt_ids = torch.Tensor(gt_gates).view( 1, -1).squeeze().long().to(device) gate_loss = cross_entropy_loss(gate_probs, gate_gt_ids) visible_steps += sum(np.array(gt_gates).squeeze()) / 4 gate_ids = np.array( [gate_ids[i].cpu().detach().numpy() for i in range(4)]) gt_gates = np.array( [gt_gates[i].cpu().detach().numpy() for i in range(4)]) one_step_rate = sum(gate_ids == gt_gates) / player.num_agents rate += one_step_rate for id in range(2): right_num = sum(gate_ids[i] == gt_gates[i] == id for i in range(4)) num = sum(gt_gates[i] == id for i in range(4)) step_rate = right_num / num if num != 0 else 0 if step_rate > 0: rates[id] += step_rate step_rates[id] += 1 mean_rates[id] = rates[id] / step_rates[id] mean_rate = rate / n_step if player.done: player.state = player.env.reset() player.state = torch.from_numpy(player.state).float().to(device) player.set_cam_info() reward_sum += reward_sum_ep len_sum += player.eps_len fps = fps_counter / (time.time() - t0) n_iter = 0 for n in n_iters: n_iter += n for i in range(player.num_agents): writer.add_scalar('test/reward' + str(i), reward_sum_ep[i], n_iter) writer.add_scalar('test/fps', fps, n_iter) writer.add_scalar('test/eps_len', player.eps_len, n_iter) writer.add_scalar('test/unvisible_acc', mean_rates[0], n_iter) writer.add_scalar('test/visible_acc', mean_rates[1], n_iter) writer.add_scalar('test/mean_acc', mean_rate, n_iter) writer.add_scalar('test/gate_loss', gate_loss, n_iter) player.eps_len = 0 fps_counter = 0 reward_sum_ep = np.zeros(player.num_agents) t0 = time.time() count_eps += 1 if count_eps % args.test_eps == 0: player.max_length = True else: player.max_length = False if player.done and not player.max_length: seed += 1 player.env.seed(seed) player.state = player.env.reset() player.set_cam_info() player.state = torch.from_numpy(player.state).float().to(device) player.eps_len += 2 elif player.done and player.max_length: ave_reward_sum = reward_sum / args.test_eps reward_total_sum += ave_reward_sum reward_mean = reward_total_sum / num_tests len_mean = len_sum / args.test_eps reward_step = reward_sum / len_sum log['{}_log'.format(args.env)].info( "Time {0}, ave eps reward {1}, ave eps length {2}, reward mean {3}, reward step {4}" .format( time.strftime("%Hh %Mm %Ss", time.gmtime(time.time() - start_time)), ave_reward_sum, len_mean, reward_mean, reward_step)) if ave_reward_sum.mean() >= max_score: print('save best! in %d iters' % n_step) max_score = ave_reward_sum.mean() model_dir = os.path.join( args.log_dir, '{0}-gate-all-model-best-{1}.dat'.format(args.env, n_step)) else: model_dir = os.path.join(args.log_dir, '{0}-new.dat'.format(args.env)) if args.gpu_ids[-1] >= 0: with torch.cuda.device(args.gpu_ids[-1]): state_to_save = player.model.state_dict() torch.save(state_to_save, model_dir) else: state_to_save = player.model.state_dict() torch.save(state_to_save, model_dir) num_tests += 1 reward_sum = 0 len_sum = 0 seed += 1 player.env.seed(seed) player.state = player.env.reset() if 'Unreal' in args.env: player.cam_pos = player.env.env.env.env.cam_pose player.collect_state = player.env.env.env.env.current_states player.set_cam_info() player.state = torch.from_numpy(player.state).float().to(device) player.input_actions = torch.Tensor( np.zeros((player.num_agents, 9))) time.sleep(args.sleep_time) if n_iter > args.max_step: env.close() for id in range(0, args.workers): train_modes[id] = -100 break player.clear_actions()
player.model.eval() for i_episode in range(args.num_episodes): player.state = player.env.reset() player.state = torch.from_numpy(player.state).float() if gpu_id >= 0: with torch.cuda.device(gpu_id): player.state = player.state.cuda() player.eps_len += 2 reward_sum = 0 while True: if args.render: if i_episode % args.render_freq == 0: player.env.render() player.action_test() reward_sum += player.reward if player.done and not player.info: state = player.env.reset() player.eps_len += 2 player.state = torch.from_numpy(state).float() if gpu_id >= 0: with torch.cuda.device(gpu_id): player.state = player.state.cuda() elif player.info: num_tests += 1 reward_total_sum += reward_sum reward_mean = reward_total_sum / num_tests log['{}_mon_log'.format(args.env)].info( "Time {0}, episode reward {1}, episode length {2}, reward mean {3:.4f}".
def test(args, shared_model, env_conf): ptitle('Valid agent') if args.valid_gpu < 0: gpu_id = args.gpu_ids[-1] else: gpu_id = args.valid_gpu env_conf["env_gpu"] = gpu_id log = {} logger = Logger(args.log_dir) create_dir(args.log_dir + "models/") os.system("cp *.sh " + args.log_dir) os.system("cp *.py " + args.log_dir) os.system("cp models/models.py " + args.log_dir + "models/") os.system("cp models/basic_modules.py " + args.log_dir + "models/") setup_logger('{}_log'.format(args.env), r'{0}{1}_log'.format(args.log_dir, args.env)) log['{}_log'.format(args.env)] = logging.getLogger('{}_log'.format( args.env)) d_args = vars(args) env_conf_log = env_conf for k in d_args.keys(): log['{}_log'.format(args.env)].info('{0}: {1}'.format(k, d_args[k])) for k in env_conf_log.keys(): log['{}_log'.format(args.env)].info('{0}: {1}'.format( k, env_conf_log[k])) torch.manual_seed(args.seed) if gpu_id >= 0: torch.cuda.manual_seed(args.seed) env = database_env(env_conf, seed=0, dstype="test") env.max_step = 900 reward_sum = 0 start_time = time.time() num_tests = 0 reward_total_sum = 0 player = Agent(None, env, args, None, gpu_id) player.gpu_id = gpu_id player.model = get_model(args, args.model, env_conf["observation_shape"], args.features, env_conf["num_actions"], gpu_id=0, lstm_feats=args.lstm_feats) with torch.cuda.device(gpu_id): player.model = player.model.cuda() player.state = player.env.reset() player.state = torch.from_numpy(player.state).float() if gpu_id >= 0: with torch.cuda.device(gpu_id): player.state = player.state.cuda() player.model.eval() flag = True create_dir(args.save_model_dir) recent_episode_scores = ScalaTracker(100) max_score = 0 while True: if flag: if gpu_id >= 0: with torch.cuda.device(gpu_id): player.model.load_state_dict(shared_model.state_dict()) else: player.model.load_state_dict(shared_model.state_dict()) player.model.eval() flag = False player.action_test() reward_sum += player.reward.mean() if player.done: flag = True num_tests += 1 reward_total_sum += reward_sum reward_mean = reward_total_sum / num_tests log['{}_log'.format(args.env)].info( "VALID: Time {0}, episode reward {1}, num tests {4}, episode length {2}, reward mean {3:.4f}" .format( time.strftime("%Hh %Mm %Ss", time.gmtime(time.time() - start_time)), reward_sum, player.eps_len, reward_mean, num_tests)) recent_episode_scores.push(reward_sum) if args.save_max and recent_episode_scores.mean() >= max_score: max_score = recent_episode_scores.mean() if gpu_id >= 0: with torch.cuda.device(gpu_id): state_to_save = {} state_to_save = player.model.state_dict() torch.save( state_to_save, '{0}{1}.dat'.format(args.save_model_dir, 'best_model_' + args.env)) if num_tests % args.save_period == 0: if gpu_id >= 0: with torch.cuda.device(gpu_id): state_to_save = player.model.state_dict() torch.save( state_to_save, '{0}{1}.dat'.format( args.save_model_dir, args.env + '_' + str(num_tests))) if num_tests % args.log_period == 0: print("------------------------------------------------") print(args.env) print("Log test #:", num_tests) print("sum rewards: ", player.env.sum_reward) print("action_history\n", player.env.action_his) print() print("------------------------------------------------") log_info = { 'mean_reward': reward_mean, '100_mean_reward': recent_episode_scores.mean() } for tag, value in log_info.items(): logger.scalar_summary(tag, value, num_tests) reward_sum = 0 player.eps_len = 0 player.clear_actions() state = player.env.reset() time.sleep(15) player.state = torch.from_numpy(state).float() if gpu_id >= 0: with torch.cuda.device(gpu_id): player.state = player.state.cuda()
def test(args, shared_models, env_conf): ptitle('Test Agent') gpu_id = args.gpu_ids[-1] log = {} setup_logger('{}_log'.format(args.env), r'{0}{1}_log'.format(args.log_dir, args.env)) log['{}_log'.format(args.env)] = logging.getLogger('{}_log'.format( args.env)) d_args = vars(args) for k in d_args.keys(): log['{}_log'.format(args.env)].info('{0}: {1}'.format(k, d_args[k])) torch.manual_seed(args.seed) if gpu_id >= 0: torch.cuda.manual_seed(args.seed) env = atari_env(args.env, env_conf, args) reward_sum = 0 start_time = time.time() num_tests = 0 reward_total_sum = 0 player = Agent(env, args, gpu_id) player.state = player.env.reset() player.eps_len += 2 player.state = torch.from_numpy(player.state).float() if gpu_id >= 0: with torch.cuda.device(gpu_id): player.state = player.state.cuda() flag = True max_score = 0 prev_reward = 0 while True: if flag: if gpu_id >= 0: with torch.cuda.device(gpu_id): player.models[0].load_state_dict( shared_models[0].state_dict()) player.models[1].load_state_dict( shared_models[1].state_dict()) else: player.models[0].load_state_dict(shared_models[0].state_dict()) player.models[1].load_state_dict(shared_models[1].state_dict()) player.models[0].eval() player.models[1].eval() flag = False player.action_test() reward_sum += player.reward if player.done and not player.info: state = player.env.reset() player.eps_len += 2 player.state = torch.from_numpy(state).float() if gpu_id >= 0: with torch.cuda.device(gpu_id): player.state = player.state.cuda() elif player.info: flag = True num_tests += 1 reward_total_sum += reward_sum reward_mean = reward_total_sum / num_tests log['{}_log'.format(args.env)].info( "Time {0}, episode reward {1}, episode length {2}, reward mean {3:.4f}" .format( time.strftime("%Hh %Mm %Ss", time.gmtime(time.time() - start_time)), reward_sum, player.eps_len, reward_mean)) with open('./results', 'a') as f: line = f"{reward_total_sum - prev_reward}\n" f.write(line) prev_reward = reward_total_sum player.episodic_reward = 0 if args.save_max and reward_sum >= max_score: max_score = reward_sum if gpu_id >= 0: with torch.cuda.device(gpu_id): state_to_save = player.models[0].state_dict() torch.save( state_to_save, '{0}{1}_early.dat'.format(args.save_model_dir, args.env)) state_to_save = player.models[1].state_dict() torch.save( state_to_save, '{0}{1}_late.dat'.format(args.save_model_dir, args.env)) else: state_to_save = player.models[0].state_dict() torch.save( state_to_save, '{0}{1}_early.dat'.format(args.save_model_dir, args.env)) state_to_save = player.models[1].state_dict() torch.save( state_to_save, '{0}{1}_late.dat'.format(args.save_model_dir, args.env)) reward_sum = 0 player.eps_len = 0 state = player.env.reset() player.eps_len += 2 time.sleep(10) player.state = torch.from_numpy(state).float() if gpu_id >= 0: with torch.cuda.device(gpu_id): player.state = player.state.cuda()
def test_func(args, shared_model, env_conf, datasets): ptitle('Valid agent') gpu_id = args.gpu_ids[-1] env_conf["env_gpu"] = gpu_id if not args.deploy: logger = Logger(args.log_dir) saved_src_dir = args.log_dir + "/src/" create_dir(saved_src_dir) os.system("cp *.py " + saved_src_dir) os.system("cp -r Models " + saved_src_dir) os.system("cp -r run_scripts " + saved_src_dir) os.system("cp -r Utils " + saved_src_dir) torch.manual_seed(args.seed) if gpu_id >= 0: torch.cuda.manual_seed(args.seed) env = Debug_env(datasets, env_conf) reward_sum = 0 start_time = time.time() num_tests = 0 reward_total_sum = 0 player = Agent(None, env, args, None) player.gpu_id = gpu_id nChan = 3 if args.is3D: nChan = 4 if args.alpha_only: nChan = 1 if not args.is3D: player.model = get_model(args, "ENet", input_shape=env_conf["obs_shape"], num_actions=args.num_actions * nChan) elif not args.obs3D: player.model = get_model(args, "ENet", input_shape=env_conf["obs_shape"], num_actions=args.num_actions * nChan) elif args.obs3D: player.model = get_model(args, "Net3D", input_shape=env_conf["obs_shape"], num_actions=args.num_actions * nChan) player.state = player.env.reset() player.state = torch.from_numpy(player.state).float() if gpu_id >= 0: with torch.cuda.device(gpu_id): player.model = player.model.cuda() player.state = player.state.cuda() player.model.eval() flag = True if not args.deploy: create_dir(args.save_model_dir) recent_episode_scores = ScalaTracker(100) recent_rand_i = ScalaTracker(100) renderlist = [] renderlist.append(player.env.render()) max_score = 0 if args.deploy: deploy(args, shared_model, player, gpu_id) exit() while True: if flag: if gpu_id >= 0: with torch.cuda.device(gpu_id): player.model.load_state_dict(shared_model.state_dict()) else: player.model.load_state_dict(shared_model.state_dict()) player.model.eval() flag = False player.action_test() reward_sum += player.reward.mean() renderlist.append(player.env.render()) if player.done: flag = True num_tests += 1 reward_total_sum += reward_sum reward_mean = reward_total_sum / num_tests print( "VALID: Time {0}, episode reward {1}, num tests {4}, episode length {2}, reward mean {3:.4f}" .format( time.strftime("%Hh %Mm %Ss", time.gmtime(time.time() - start_time)), reward_sum, player.eps_len, reward_mean, num_tests)) recent_episode_scores.push(reward_sum) if num_tests % args.save_period == 0: if gpu_id >= 0: with torch.cuda.device(gpu_id): state_to_save = player.model.state_dict() torch.save( state_to_save, '{0}{1}.dat'.format(args.save_model_dir, str(num_tests))) if num_tests % args.log_period == 0: print( "----------------------VALID SET--------------------------" ) print(args.env) print("Log test #:", num_tests) print("rewards: ", player.reward.mean()) print("sum rewards: ", reward_sum) log_rewards = [ int(rew * 100) for rew in player.env.sum_rewards ] print("rewards:", log_rewards) print("action: ", player.env.actions) print("reward history: ", player.env.rewards) print("------------------------------------------------") log_img = np.concatenate(renderlist, 0) log_info = {"valid_sample": log_img} for tag, img in log_info.items(): img = img[None] logger.image_summary(tag, img, num_tests) if not args.deploy: log_info = { 'mean_valid_reward': reward_mean, '100_mean_reward': recent_episode_scores.mean(), } for tag, value in log_info.items(): logger.scalar_summary(tag, value, num_tests) if args.save_sample: deploy_list = player.env.deploy print(len(deploy_list)) for stepi, (vol, ref_img, lut, _) in enumerate(deploy_list): io.imsave( args.log_dir + "/" + str(num_tests) + "_vol_" + str(stepi) + ".tif", vol) io.imsave( args.log_dir + "/" + str(num_tests) + "_ref_" + str(stepi) + ".tif", ref_img) plt.figure(figsize=(10, 10)) plt.plot(range(256), lut[..., 2], 'b') plt.plot(range(256), lut[..., 1], 'g') plt.plot(range(256), lut[..., 0], 'r') plt.plot(range(256), lut[..., 3], 'gray') plt.ylabel('Mapping value') plt.xlabel('Voxel intensity') plt.title("Transfer function visualization") plt.savefig("Ref_LUT" + "_" + str(num_tests) + "_" + str(stepi) + ".png") renderlist = [] reward_sum = 0 player.eps_len = 0 player.clear_actions() state = player.env.reset() renderlist.append(player.env.render()) time.sleep(15) player.state = torch.from_numpy(state).float() if gpu_id >= 0: with torch.cuda.device(gpu_id): player.state = player.state.cuda()
def test(args, shared_model, train_modes, n_iters): ptitle('Test Agent') n_iter = 0 writer = SummaryWriter(os.path.join(args.log_dir, 'Test')) gpu_id = args.gpu_ids[-1] log = {} setup_logger('{}_log'.format(args.env), r'{0}/logger'.format(args.log_dir)) log['{}_log'.format(args.env)] = logging.getLogger('{}_log'.format( args.env)) d_args = vars(args) for k in d_args.keys(): log['{}_log'.format(args.env)].info('{0}: {1}'.format(k, d_args[k])) torch.manual_seed(args.seed) if gpu_id >= 0: torch.cuda.manual_seed(args.seed) device = torch.device('cuda:' + str(gpu_id)) else: device = torch.device('cpu') if args.env_base is None: env = create_env(args.env, args) else: env = create_env(args.env_base, args) env.seed(args.seed) start_time = time.time() count_eps = 0 player = Agent(None, env, args, None, device) player.gpu_id = gpu_id player.model = build_model(player.env.observation_space, player.env.action_space, args, device).to(device) player.model.eval() max_score = -100 seed = args.seed last_iter = 0 iter_th = args.init_step while True: reward_sum = np.zeros(2) len_sum = 0 for i_episode in range(args.test_eps): player.model.load_state_dict(shared_model.state_dict()) player.env.seed(seed) # seed += 1 player.reset() reward_sum_ep = np.zeros(player.num_agents) fps_counter = 0 t0 = time.time() count_eps += 1 while True: if args.render: if 'Unreal' in args.env: cv2_show(env, False) else: env.render() player.action_test() fps_counter += 1 reward_sum_ep += player.reward if player.done: reward_sum += reward_sum_ep[:2] len_sum += player.eps_len fps = fps_counter / (time.time() - t0) n_iter = 0 for n in n_iters: n_iter += n for rank in range(len(n_iters)): if n_iter < args.init_step: train_modes[rank] = 0 elif args.train_mode == 2 and n_iter - last_iter > iter_th: train_modes[rank] = 1 - train_modes[rank] last_iter = n_iter iter_th = args.init_step if train_modes[ rank] == 0 else args.adv_step else: train_modes[rank] = args.train_mode for i, r_i in enumerate(reward_sum_ep): writer.add_scalar('test/reward' + str(i), r_i, n_iter) writer.add_scalar('test/fps', fps, n_iter) writer.add_scalar('test/eps_len', player.eps_len, n_iter) break ave_reward_sum = reward_sum / args.test_eps len_mean = len_sum / args.test_eps reward_step = reward_sum / len_sum log['{}_log'.format(args.env)].info( "Time {0}, ave eps reward {1}, ave eps length {2}, reward step {3}" .format( time.strftime("%Hh %Mm %Ss", time.gmtime(time.time() - start_time)), ave_reward_sum, len_mean, reward_step)) # save model if ave_reward_sum[0] >= max_score: print('Save best!') max_score = ave_reward_sum[0] model_dir = os.path.join(args.log_dir, 'all-best-{0}.dat'.format(n_iter)) tracker_model_dir = os.path.join(args.log_dir, 'tracker-best.dat'.format(n_iter)) target_model_dir = os.path.join(args.log_dir, 'target-best.dat'.format(n_iter)) else: model_dir = os.path.join(args.log_dir, 'all-new.dat'.format(args.env)) tracker_model_dir = os.path.join(args.log_dir, 'tracker-new.dat') target_model_dir = os.path.join(args.log_dir, 'target-new.dat') torch.save(player.model.state_dict(), model_dir) if args.split: torch.save(player.model.player0.state_dict(), tracker_model_dir) if not args.single: torch.save(player.model.player1.state_dict(), target_model_dir) time.sleep(args.sleep_time) if n_iter > args.max_step: env.close() for id in range(0, args.workers): train_modes[id] = -100 break
def test(rank, args, shared_model): ptitle('Test Agent') gpu_id = args.gpu_ids[rank % len(args.gpu_ids)] writer = SummaryWriter(log_dir=args.log_dir + 'tb_test') log = {} setup_logger('{}_log'.format('Test_' + str(rank)), r'{0}{1}_log'.format(args.log_dir, 'Test_' + str(rank))) log['{}_log'.format('Test_' + str(rank))] = logging.getLogger( '{}_log'.format('Test_' + str(rank))) d_args = vars(args) for k in d_args.keys(): log['{}_log'.format('Test_' + str(rank))].info('{0}: {1}'.format( k, d_args[k])) torch.manual_seed(args.seed) if gpu_id >= 0: torch.cuda.manual_seed(args.seed) env = atari_env(env_id=rank, args=args, type='train') reward_sum = 0 start_time = time.time() num_tests = 0 num_inside_target_room = 0 reward_total_sum = 0 player = Agent(None, env, args, None) player.gpu_id = gpu_id player.model = A3Clstm(player.env.observation_space.shape[2], player.env.action_space.n) player.state = player.env.reset() player.state = normalize_rgb_obs(player.state) player.state = torch.from_numpy(player.state).float() if gpu_id >= 0: with torch.cuda.device(gpu_id): player.model = player.model.cuda() player.state = player.state.cuda() player.model.eval() action_times = 0 while True: action_times += 1 if player.done: if gpu_id >= 0: with torch.cuda.device(gpu_id): player.model.load_state_dict(shared_model.state_dict()) else: player.model.load_state_dict(shared_model.state_dict()) player.action_test() reward_sum += player.reward if not os.path.exists(args.log_dir + "video/" + str(rank) + "_" + str(num_tests)): os.makedirs(args.log_dir + "video/" + str(rank) + "_" + str(num_tests)) cv2.imwrite(args.log_dir + "video/" + str(rank) + "_" + str(num_tests) + "/" + str(action_times) + ".png", player.env.get_rgb()) # (90, 120, 3) if player.done: frame_to_video(fileloc=args.log_dir + "video/" + str(rank) + "_" + str(num_tests) + "/%d.png", t_w=120, t_h=90, destination=args.log_dir + "video/" + str(rank) + "_" + str(num_tests) + ".mp4") shutil.rmtree(args.log_dir + "video/" + str(rank) + "_" + str(num_tests)) action_times = 0 num_tests += 1 num_inside_target_room += player.env.inside_target_room reward_total_sum += reward_sum reward_mean = reward_total_sum / num_tests success_rate = num_inside_target_room / num_tests log['{}_log'.format('Test_' + str(rank))].info( "Time {0}, Tester {1}, test counter {2}, episode reward {3}, episode length {4}, reward mean {5:.4f}, success rate {6}" .format( time.strftime("%Hh %Mm %Ss", time.gmtime(time.time() - start_time)), rank, num_tests, reward_sum, player.eps_len, reward_mean, success_rate)) # Tensorboard writer.add_scalar("data/episode_reward", reward_sum, num_tests) writer.add_scalar("data/episode_length", player.eps_len, num_tests) writer.add_scalar("data/reward_mean", reward_mean, num_tests) writer.add_scalar("data/success_rate", success_rate, num_tests) if reward_sum > args.save_score_level: # player.model.load_state_dict(shared_model.state_dict()) if gpu_id >= 0: with torch.cuda.device(gpu_id): state_to_save = player.model.state_dict() torch.save( state_to_save, '{0}{1}_{2}.dat'.format(args.save_model_dir, 'Test_' + str(rank), reward_sum)) else: state_to_save = player.model.state_dict() torch.save( state_to_save, '{0}{1}_{2}.dat'.format(args.save_model_dir, 'Test_' + str(rank), reward_sum)) reward_sum = 0 player.eps_len = 0 state = player.env.reset() time.sleep(10) state = normalize_rgb_obs(state) player.state = torch.from_numpy(state).float() if gpu_id >= 0: with torch.cuda.device(gpu_id): player.state = player.state.cuda()
def test(args, shared_model, env_conf): ptitle('Test Agent') gpu_id = args.gpu_ids[-1] log = {} setup_logger('{}_log'.format(args.env), r'{0}{1}_log'.format(args.log_dir, args.env)) log['{}_log'.format(args.env)] = logging.getLogger('{}_log'.format( args.env)) d_args = vars(args) for k in d_args.keys(): log['{}_log'.format(args.env)].info('{0}: {1}'.format(k, d_args[k])) torch.manual_seed(args.seed) if gpu_id >= 0: torch.cuda.manual_seed(args.seed) env = Environment(Config.SHOW_MODE) #(True) or False reward_sum = 0 start_time = time.time() num_tests = 0 reward_total_sum = 0 player = Agent(None, env, args, None) player.gpu_id = gpu_id num_actions = env.get_num_actions() player.model = A3Clstm(Config.STACKED_FRAMES, num_actions) player.state, available = player.env.reset() # player.eps_len += 1 player.state = torch.from_numpy(player.state).float() player.available = torch.from_numpy(available).float() if gpu_id >= 0: with torch.cuda.device(gpu_id): player.model = player.model.cuda() player.state = player.state.cuda() player.available = player.available.cuda() flag = True max_score = 0 results_logger = open(Config.RESULTS_FILENAME, 'a') rolling_frame_count = 0 rolling_reward = 0 results_q = queueQueue(maxsize=Config.STAT_ROLLING_MEAN_WINDOW) while True: if flag: # first load state if gpu_id >= 0: with torch.cuda.device(gpu_id): player.model.load_state_dict(shared_model.state_dict()) else: player.model.load_state_dict(shared_model.state_dict()) player.model.eval() # model變成测试模式 flag = False player.action_test() reward_sum += player.reward if player.done and not player.info: state, available = player.env.reset() # player.eps_len += 1 player.state = torch.from_numpy(state).float() player.available = torch.from_numpy(available).float() if gpu_id >= 0: with torch.cuda.device(gpu_id): player.state = player.state.cuda() player.available = player.available.cuda() elif player.info: flag = True num_tests += 1 reward_total_sum += reward_sum reward_mean = reward_total_sum / num_tests rolling_frame_count += player.eps_len rolling_reward += reward_sum if results_q.full(): old_length, old_reward = results_q.get() rolling_frame_count -= old_length rolling_reward -= old_reward results_q.put((player.eps_len, reward_sum)) episode_time = int( time.time() - start_time ) # time.strftime("%H:%M:%S", time.gmtime(time.time() - start_time)) log['{}_log'.format(args.env)].info( "Time {0:10d}, episode {1}, reward {2}, Step {3}, reward mean {4:.4f}, Rstep {5:.4f}, Rreward {6:.4f}" .format(episode_time, num_tests, reward_sum, player.eps_len, reward_mean, (rolling_frame_count / results_q.qsize()), (rolling_reward / results_q.qsize()))) results_logger.write( '%d, %d, %10.4f, %d, %10.4f, %10.4f\n' % (episode_time, num_tests, reward_sum, player.eps_len, player.envs_mean, player.envs_std)) results_logger.flush() if args.save_max and reward_sum >= max_score: max_score = reward_sum if gpu_id >= 0: with torch.cuda.device(gpu_id): state_to_save = player.model.state_dict() torch.save( state_to_save, '{0}{1}.dat'.format(args.save_model_dir, args.env)) else: state_to_save = player.model.state_dict() torch.save( state_to_save, '{0}{1}.dat'.format(args.save_model_dir, args.env)) reward_sum = 0 player.eps_len = 0 state, available = player.env.reset() # player.eps_len += 1 time.sleep(1) player.state = torch.from_numpy(state).float() player.available = torch.from_numpy(available).float() if gpu_id >= 0: with torch.cuda.device(gpu_id): player.state = player.state.cuda() player.available = player.available.cuda() results_logger.close()
def test(args, shared_model, env_conf, datasets): ptitle('Test agent') gpu_id = args.gpu_ids[-1] log = {} logger = Logger(args.log_dir) setup_logger('{}_log'.format(args.env), r'{0}{1}_log'.format(args.log_dir, args.env)) log['{}_log'.format(args.env)] = logging.getLogger('{}_log'.format( args.env)) d_args = vars(args) for k in d_args.keys(): log['{}_log'.format(args.env)].info('{0}: {1}'.format(k, d_args[k])) torch.manual_seed(args.seed) if gpu_id >= 0: torch.cuda.manual_seed(args.seed) raw, gt_lbl = datasets env = EM_env(raw, gt_lbl, env_conf) reward_sum = 0 start_time = time.time() num_tests = 0 reward_total_sum = 0 player = Agent(None, env, args, None) player.gpu_id = gpu_id # player.model = A3Clstm (env.observation_space.shape, env_conf["num_action"], args.hidden_feat) player.model = SimpleCNN(env.observation_space.shape, env_conf["num_action"]) player.state = player.env.reset() player.state = torch.from_numpy(player.state).float() if gpu_id >= 0: with torch.cuda.device(gpu_id): player.model = player.model.cuda() player.state = player.state.cuda() flag = True create_dir(args.save_model_dir) recent_episode_scores = [] renderlist = [] renderlist.append(player.env.render()) max_score = 0 while True: if flag: if gpu_id >= 0: with torch.cuda.device(gpu_id): player.model.load_state_dict(shared_model.state_dict()) else: player.model.load_state_dict(shared_model.state_dict()) player.model.eval() flag = False player.action_test() reward_sum += player.reward renderlist.append(player.env.render()) if player.done: flag = True if gpu_id >= 0: with torch.cuda.device(gpu_id): player.state = player.state.cuda() num_tests += 1 reward_total_sum += reward_sum reward_mean = reward_total_sum / num_tests log['{}_log'.format(args.env)].info( "Time {0}, episode reward {1}, num tests {4}, episode length {2}, reward mean {3:.4f}" .format( time.strftime("%Hh %Mm %Ss", time.gmtime(time.time() - start_time)), reward_sum, player.eps_len, reward_mean, num_tests)) recent_episode_scores += [reward_sum] if len(recent_episode_scores) > 200: recent_episode_scores.pop(0) if args.save_max and np.mean(recent_episode_scores) >= max_score: max_score = np.mean(recent_episode_scores) if gpu_id >= 0: with torch.cuda.device(gpu_id): state_to_save = player.model.state_dict() torch.save( state_to_save, '{0}{1}.dat'.format(args.save_model_dir, 'best_model_' + args.env)) if num_tests % args.save_period == 0: if gpu_id >= 0: with torch.cuda.device(gpu_id): state_to_save = player.model.state_dict() torch.save( state_to_save, '{0}{1}.dat'.format( args.save_model_dir, args.env + '_' + str(num_tests))) if num_tests % args.log_period == 0: print("------------------------------------------------") print("Log test #:", num_tests) print("Prob: ") for i in range(player.env.agent_out_shape[1]): for j in range(player.env.agent_out_shape[2]): print("{:.3f}\t".format(player.prob_cpu[0, i, j]), end='') print() print("Actions :", player.actions) print("Actions transformed: ") print(player.actions_explained) print("rewards: ", player.rewards) print("sum rewards: ", reward_sum) print("------------------------------------------------") log_img = np.concatenate(renderlist, 0) log_info = {"test: traning_sample": log_img} for tag, img in log_info.items(): img = img[None] logger.image_summary(tag, img, num_tests) log_info = {'test: mean_reward': reward_mean} for tag, value in log_info.items(): logger.scalar_summary(tag, value, num_tests) renderlist = [] reward_sum = 0 player.eps_len = 0 time.sleep(30) player.clear_actions() state = player.env.reset() player.state = torch.from_numpy(state).float() if gpu_id >= 0: with torch.cuda.device(gpu_id): player.state = player.state.cuda()
def train_rep(args, shared_model, env_conf): batch_size = 16 train_times = args.rep_train_time trace = [] td_class = [(0, 1), (1, 2), (2, 3), (3, 5), (5, 7), (7, 9)] loss_fn = nn.CrossEntropyLoss() optimizer_r = Adam(shared_model.r_net.parameters(), lr=args.rl_r) optimizer_c = Adam(shared_model.c_net.parameters(), lr=args.rl_r) ptitle('Train rep') gpu_id = args.gpu_ids[-1] torch.manual_seed(args.seed) if gpu_id >= 0: torch.cuda.manual_seed(args.seed) env = atari_env(args.env, env_conf, args) player = Agent(None, env, args, None) player.gpu_id = gpu_id player.model = A3Clstm(player.env.observation_space.shape[0], player.env.action_space) player.state = player.env.reset() player.state = torch.from_numpy(player.state).float() if gpu_id >= 0: with torch.cuda.device(gpu_id): player.model = player.model.cuda() player.state = player.state.cuda() # player.model.r_net = player.model.r_net.cuda() # player.model.c_net = player.model.c_net.cuda() flag = True while True: if flag: if gpu_id >= 0: with torch.cuda.device(gpu_id): player.model.load_state_dict(shared_model.state_dict()) else: player.model.load_state_dict(shared_model.state_dict()) player.model.train() flag = False player.action_test() trace.append(player.state) if len(trace) > args.trace_length: # 训练几百次 for _ in range(train_times): range_c = np.random.randint(0, len(td_class)) TD = np.random.randint(td_class[range_c][0], td_class[range_c][1]) begin = np.random.randint(0, len(trace) - TD - batch_size) former = torch.stack(trace[begin:begin + batch_size], dim=0) latter = torch.stack(trace[begin + TD:begin + TD + batch_size], dim=0) target = torch.zeros(batch_size, dtype=torch.long) + range_c if gpu_id >= 0: with torch.cuda.device(gpu_id): former = former.cuda() latter = latter.cuda() target = target.cuda() rep_f, rep_l = player.model.r_net(former), player.model.r_net( latter) output = player.model.c_net(rep_f, rep_l, False) loss = loss_fn(output, target) optimizer_r.zero_grad() optimizer_c.zero_grad() loss.backward() ensure_shared_grads(player.model.r_net, shared_model.r_net, gpu=gpu_id >= 0) ensure_shared_grads(player.model.c_net, shared_model.c_net, gpu=gpu_id >= 0) optimizer_r.step() optimizer_c.step() trace = [] if player.done and not player.info: state = player.env.reset() player.state = torch.from_numpy(state).float() if gpu_id >= 0: with torch.cuda.device(gpu_id): player.state = player.state.cuda() elif player.info: flag = True state = player.env.reset() time.sleep(10) player.state = torch.from_numpy(state).float() if gpu_id >= 0: with torch.cuda.device(gpu_id): player.state = player.state.cuda()
def test(args, shared_model, env_conf): # print('IN TEST') ptitle('Test Agent') gpu_id = args.gpu_ids[-1] log = {} setup_logger('{}_log'.format(args.env), r'{0}{1}_log'.format(args.log_dir, args.env)) log['{}_log'.format(args.env)] = logging.getLogger('{}_log'.format( args.env)) setup_logger('{}_map_log'.format(args.env), r'{0}{1}_map_log'.format(args.log_dir, args.env)) log['{}_map_log'.format(args.env)] = logging.getLogger('{}_map_log'.format( args.env)) d_args = vars(args) for k in d_args.keys(): log['{}_log'.format(args.env)].info('{0}: {1}'.format(k, d_args[k])) torch.manual_seed(args.seed) if gpu_id >= 0: torch.cuda.manual_seed(args.seed) if 'micropolis' in args.env.lower(): import gym_micropolis env = micropolis_env(args.env, env_conf, args) else: # print('using atari env for test') env = atari_env(args.env, env_conf, args) reward_sum = 0 entropy_sum = 0 start_time = time.time() num_tests = 0 reward_total_sum = 0 player = Agent(None, env, args, None) player.gpu_id = gpu_id if 'micropolis' in args.env.lower(): modelInit = getattr(model, args.design_head) player.model = modelInit(player.env.observation_space.shape[0], player.env.action_space, player.env.env.env.MAP_X) player.lstm_sizes = player.model.getMemorySizes() if not 'arcade' in args.env.lower(): player.lstm_size = (1, 16, player.env.env.env.MAP_X, env.env.env.MAP_Y) else: player.model = A3Clstm(player.env.observation_space.shape[0], player.env.action_space) player.state = player.env.reset() player.eps_len += 2 player.state = torch.from_numpy(player.state).float() if gpu_id >= 0: with torch.cuda.device(gpu_id): player.model = player.model.cuda() player.state = player.state.cuda() flag = True max_score = 0 i = 0 while True: if flag: if gpu_id >= 0: with torch.cuda.device(gpu_id): player.model.load_state_dict(shared_model.state_dict()) else: player.model.load_state_dict(shared_model.state_dict()) player.model.eval() flag = False player.action_test() reward_sum += player.reward entropy_sum += player.entropy.data.item() if player.done and not player.info: state = player.env.reset() player.eps_len += 2 player.state = torch.from_numpy(state).float() if gpu_id >= 0: with torch.cuda.device(gpu_id): player.state = player.state.cuda() elif player.info: flag = True num_tests += 1 reward_total_sum += reward_sum reward_mean = reward_total_sum / num_tests log['{}_log'.format(args.env)].info( "Time {0}, episode reward {1:1.5e}, entropy {4:1.5e} episode length {2}, reward mean {3:1.5e}" .format( time.strftime("%Hh %Mm %Ss", time.gmtime(time.time() - start_time)), reward_sum, player.eps_len, reward_mean, entropy_sum)) import numpy as np np.set_printoptions(threshold=400) log['{}_map_log'.format(args.env)].info('\n{}'.format( np.array2string( np.add( player.env.env.env.micro.map.zoneMap[-1], np.full((player.env.env.env.MAP_X, player.env.env.env.MAP_Y), 2))).replace('\n ', '').replace('][', ']\n[').replace( '[[', '[').replace(']]', ']'))) if args.save_max and reward_sum >= max_score: max_score = reward_sum if gpu_id >= 0: with torch.cuda.device(gpu_id): state_to_save = player.model.state_dict() torch.save( state_to_save, '{0}best_{1}.dat'.format(args.save_model_dir, args.env)) else: state_to_save = player.model.state_dict() torch.save( state_to_save, '{0}best_{1}.dat'.format(args.save_model_dir, args.env)) if i % 10 == 0: if gpu_id >= 0: with torch.cuda.device(gpu_id): state_to_save = player.model.state_dict() torch.save( state_to_save, '{0}latest_{1}.dat'.format(args.save_model_dir, args.env)) else: state_to_save = player.model.state_dict() torch.save( state_to_save, '{0}latest_{1}.dat'.format(args.save_model_dir, args.env)) reward_sum = 0 entropy_sum = 0 player.eps_len = 0 state = player.env.reset() player.eps_len += 2 i += 1 time.sleep(10) player.state = torch.from_numpy(state).float() if gpu_id >= 0: with torch.cuda.device(gpu_id): player.state = player.state.cuda()
def test(args, shared_model, env_conf, datasets=None, hasLbl=True): if hasLbl: ptitle('Valid agent') else: ptitle("Test agent") gpu_id = args.gpu_ids[-1] env_conf["env_gpu"] = gpu_id log = {} logger = Logger(args.log_dir) setup_logger('{}_log'.format(args.env), r'{0}{1}_log'.format(args.log_dir, args.env)) log['{}_log'.format(args.env)] = logging.getLogger('{}_log'.format( args.env)) d_args = vars(args) if hasLbl: for k in d_args.keys(): log['{}_log'.format(args.env)].info('{0}: {1}'.format( k, d_args[k])) torch.manual_seed(args.seed) if gpu_id >= 0: torch.cuda.manual_seed(args.seed) if "EM_env" in args.env: raw_list, gt_lbl_list = datasets env = EM_env(raw_list, env_conf, type="train", gt_lbl_list=gt_lbl_list) else: env = Voronoi_env(env_conf) reward_sum = 0 start_time = time.time() num_tests = 0 reward_total_sum = 0 player = Agent(None, env, args, None) player.gpu_id = gpu_id if args.model == "UNet": player.model = UNet(env.observation_space.shape[0], args.features, 2) elif args.model == "FusionNetLstm": player.model = FusionNetLstm(env.observation_space.shape, args.features, 2, args.hidden_feat) elif args.model == "FusionNet": player.model = FusionNet(env.observation_space.shape[0], args.features, 2) elif (args.model == "UNetLstm"): player.model = UNetLstm(env.observation_space.shape, args.features, 2, args.hidden_feat) player.state = player.env.reset() player.state = torch.from_numpy(player.state).float() if gpu_id >= 0: with torch.cuda.device(gpu_id): player.model = player.model.cuda() player.state = player.state.cuda() player.model.eval() flag = True create_dir(args.save_model_dir) recent_episode_scores = [] renderlist = [] renderlist.append(player.env.render()) max_score = 0 while True: if flag: if gpu_id >= 0: with torch.cuda.device(gpu_id): player.model.load_state_dict(shared_model.state_dict()) else: player.model.load_state_dict(shared_model.state_dict()) player.model.eval() flag = False player.action_test() reward_sum += player.reward.mean() renderlist.append(player.env.render()) if player.done: flag = True num_tests += 1 reward_total_sum += reward_sum reward_mean = reward_total_sum / num_tests if hasLbl: log['{}_log'.format(args.env)].info( "VALID: Time {0}, episode reward {1}, num tests {4}, episode length {2}, reward mean {3:.4f}" .format( time.strftime("%Hh %Mm %Ss", time.gmtime(time.time() - start_time)), reward_sum, player.eps_len, reward_mean, num_tests)) recent_episode_scores += [reward_sum] if len(recent_episode_scores) > 200: recent_episode_scores.pop(0) if args.save_max and np.mean(recent_episode_scores) >= max_score: max_score = np.mean(recent_episode_scores) if gpu_id >= 0: with torch.cuda.device(gpu_id): state_to_save = player.model.state_dict() torch.save( state_to_save, '{0}{1}.dat'.format(args.save_model_dir, 'best_model_' + args.env)) if num_tests % args.save_period == 0: if gpu_id >= 0: with torch.cuda.device(gpu_id): state_to_save = player.model.state_dict() torch.save( state_to_save, '{0}{1}.dat'.format( args.save_model_dir, args.env + '_' + str(num_tests))) if num_tests % args.log_period == 0: if hasLbl: print( "----------------------VALID SET--------------------------" ) print("Log test #:", num_tests) print("rewards: ", player.reward.mean()) print("sum rewards: ", reward_sum) print("------------------------------------------------") log_img = np.concatenate(renderlist, 0) if hasLbl: log_info = {"valid_sample": log_img} else: log_info = {"test_sample": log_img} for tag, img in log_info.items(): img = img[None] logger.image_summary(tag, img, num_tests) if hasLbl: log_info = {'mean_valid_reward': reward_mean} for tag, value in log_info.items(): logger.scalar_summary(tag, value, num_tests) renderlist = [] reward_sum = 0 player.eps_len = 0 player.clear_actions() state = player.env.reset() renderlist.append(player.env.render()) time.sleep(15) player.state = torch.from_numpy(state).float() if gpu_id >= 0: with torch.cuda.device(gpu_id): player.state = player.state.cuda()
def test(args, shared_model, env_conf, lock, counter): ptitle('Test Agent') gpu_id = args.gpu_ids[-1] log = {} setup_logger( '{}_log'.format(args.env), r'{0}{1}-{2}_log'.format(args.log_dir, args.env, args.log_target)) log['{}_log'.format(args.env)] = logging.getLogger('{}_log'.format( args.env)) d_args = vars(args) for k in d_args.keys(): log['{}_log'.format(args.env)].info('{0}: {1}'.format(k, d_args[k])) torch.manual_seed(args.seed) if gpu_id >= 0: torch.cuda.manual_seed(args.seed) env = atari_env(args.env, env_conf, args) reward_sum = 0 start_time = time.time() num_tests = 0 reward_total_sum = 0 player = Agent(None, env, args, None) player.gpu_id = gpu_id player.model = A3Clstm(player.env.observation_space.shape[0], player.env.action_space) player.state = player.env.reset() player.eps_len += 2 player.state = torch.from_numpy(player.state).float() if gpu_id >= 0: with torch.cuda.device(gpu_id): player.model = player.model.cuda() player.state = player.state.cuda() flag = True max_score = 0 while True: if flag: if gpu_id >= 0: with torch.cuda.device(gpu_id): player.model.load_state_dict(shared_model.state_dict()) else: player.model.load_state_dict(shared_model.state_dict()) player.model.eval() flag = False player.action_test() reward_sum += player.reward if player.done and not player.info: state = player.env.reset() player.eps_len += 2 player.state = torch.from_numpy(state).float() if gpu_id >= 0: with torch.cuda.device(gpu_id): player.state = player.state.cuda() elif player.info: flag = True num_tests += 1 reward_total_sum += reward_sum reward_mean = reward_total_sum / num_tests with lock: counter.value += 1 log['{}_log'.format(args.env)].info( "UpdateStep {0} Time {1}, episode reward {2}, episode length {3}, reward mean {4:.4f}" .format( counter.value, time.strftime("%Hh %Mm %Ss", time.gmtime(time.time() - start_time)), reward_sum, player.eps_len, reward_mean)) if args.save_max and reward_sum >= max_score: max_score = reward_sum if gpu_id >= 0: with torch.cuda.device(gpu_id): state_to_save = player.model.state_dict() torch.save( state_to_save, '{0}{1}_{2}.dat'.format(args.save_model_dir, args.env, args.log_target)) else: state_to_save = player.model.state_dict() torch.save( state_to_save, '{0}{1}_{2}.dat'.format(args.save_model_dir, args.env, args.log_target)) reward_sum = 0 player.eps_len = 0 state = player.env.reset() player.eps_len += 2 time.sleep(10) player.state = torch.from_numpy(state).float() if gpu_id >= 0: with torch.cuda.device(gpu_id): player.state = player.state.cuda()
def test(args, shared_model, optimizer, train_modes, n_iters): ptitle('Test Agent') n_iter = 0 writer = SummaryWriter(os.path.join(args.log_dir, 'Test')) gpu_id = args.gpu_ids[-1] log = {} setup_logger('{}_log'.format(args.env), r'{0}/logger'.format(args.log_dir)) log['{}_log'.format(args.env)] = logging.getLogger('{}_log'.format( args.env)) d_args = vars(args) for k in d_args.keys(): log['{}_log'.format(args.env)].info('{0}: {1}'.format(k, d_args[k])) torch.manual_seed(args.seed) if gpu_id >= 0: torch.cuda.manual_seed(args.seed) device = torch.device('cuda:' + str(gpu_id)) else: device = torch.device('cpu') env = create_env(args.env, args) env.seed(args.seed) start_time = time.time() count_eps = 0 player = Agent(None, env, args, None, device) player.gpu_id = gpu_id player.model = build_model(player.env.observation_space, player.env.action_space, args, device).to(device) player.model.eval() max_score = -100 while True: AG = 0 reward_sum = np.zeros(player.num_agents) reward_sum_list = [] len_sum = 0 for i_episode in range(args.test_eps): player.model.load_state_dict(shared_model.state_dict()) player.reset() reward_sum_ep = np.zeros(player.num_agents) rotation_sum_ep = 0 fps_counter = 0 t0 = time.time() count_eps += 1 fps_all = [] while True: player.action_test() fps_counter += 1 reward_sum_ep += player.reward rotation_sum_ep += player.rotation if player.done: AG += reward_sum_ep[0] / rotation_sum_ep * player.num_agents reward_sum += reward_sum_ep reward_sum_list.append(reward_sum_ep[0]) len_sum += player.eps_len fps = fps_counter / (time.time() - t0) n_iter = 0 for n in n_iters: n_iter += n for i, r_i in enumerate(reward_sum_ep): writer.add_scalar('test/reward' + str(i), r_i, n_iter) fps_all.append(fps) writer.add_scalar('test/fps', fps, n_iter) writer.add_scalar('test/eps_len', player.eps_len, n_iter) break # player.max_length: ave_AG = AG / args.test_eps ave_reward_sum = reward_sum / args.test_eps len_mean = len_sum / args.test_eps reward_step = reward_sum / len_sum mean_reward = np.mean(reward_sum_list) std_reward = np.std(reward_sum_list) log['{}_log'.format(args.env)].info( "Time {0}, ave eps reward {1}, ave eps length {2}, reward step {3}, FPS {4}, " "mean reward {5}, std reward {6}, AG {7}".format( time.strftime("%Hh %Mm %Ss", time.gmtime(time.time() - start_time)), np.around(ave_reward_sum, decimals=2), np.around(len_mean, decimals=2), np.around(reward_step, decimals=2), np.around(np.mean(fps_all), decimals=2), mean_reward, std_reward, np.around(ave_AG, decimals=2))) # save model if ave_reward_sum[0] >= max_score: print('save best!') max_score = ave_reward_sum[0] model_dir = os.path.join(args.log_dir, 'best.pth') else: model_dir = os.path.join(args.log_dir, 'new.pth'.format(args.env)) state_to_save = { "model": player.model.state_dict(), "optimizer": optimizer.state_dict() } torch.save(state_to_save, model_dir) time.sleep(args.sleep_time) if n_iter > args.max_step: env.close() for id in range(0, args.workers): train_modes[id] = -100 break
def test_func(args, shared_model, env_conf, datasets=None, tests=None, shared_dict=None): ptitle('Valid agent') if args.valid_gpu < 0: gpu_id = args.gpu_ids[-1] else: gpu_id = args.valid_gpu env_conf["env_gpu"] = gpu_id if not args.deploy: log = {} logger = Logger(args.log_dir) create_dir(args.log_dir + "models/") create_dir(args.log_dir + "tifs/") create_dir(args.log_dir + "tifs_test/") os.system("cp *.py " + args.log_dir) os.system("cp *.sh " + args.log_dir) os.system("cp models/*.py " + args.log_dir + "models/") setup_logger('{}_log'.format(args.env), r'{0}{1}_log'.format(args.log_dir, args.env)) log['{}_log'.format(args.env)] = logging.getLogger('{}_log'.format( args.env)) d_args = vars(args) env_conf_log = env_conf if tests is not None: if args.testlbl: test_env = EM_env(tests[0], env_conf, type="test", gt_lbl_list=tests[1]) else: test_env = EM_env(tests[0], env_conf, type="test") if not args.deploy: for k in d_args.keys(): log['{}_log'.format(args.env)].info('{0}: {1}'.format( k, d_args[k])) for k in env_conf_log.keys(): log['{}_log'.format(args.env)].info('{0}: {1}'.format( k, env_conf_log[k])) torch.manual_seed(args.seed) if gpu_id >= 0: torch.cuda.manual_seed(args.seed) raw_list, gt_lbl_list = datasets env = EM_env(raw_list, env_conf, type="train", gt_lbl_list=gt_lbl_list) reward_sum = 0 start_time = time.time() num_tests = 0 reward_total_sum = 0 player = Agent(None, env, args, None) player.gpu_id = gpu_id player.model = get_model(args, args.model, env_conf["observation_shape"], args.features, atrous_rates=args.atr_rate, num_actions=2, split=args.data_channel, gpu_id=gpu_id, multi=args.multi) player.state = player.env.reset() player.state = torch.from_numpy(player.state).float() if gpu_id >= 0: with torch.cuda.device(gpu_id): player.model = player.model.cuda() player.state = player.state.cuda() player.model.eval() flag = True if not args.deploy: create_dir(args.save_model_dir) recent_episode_scores = ScalaTracker(100) recent_FgBgDice = ScalaTracker(100) recent_bestDice = ScalaTracker(100) recent_diffFG = ScalaTracker(100) recent_MUCov = ScalaTracker(100) recent_MWCov = ScalaTracker(100) recent_AvgFP = ScalaTracker(100) recent_AvgFN = ScalaTracker(100) recent_rand_i = ScalaTracker(100) renderlist = [] renderlist.append(player.env.render()) max_score = 0 # ----------------------------------------- Deploy / Inference ----------------------------------------- if args.deploy: with torch.cuda.device(gpu_id): player.model.load_state_dict(shared_model.state_dict()) # inference (args, None, player.model, tests [0], test_env, gpu_id, player.env.rng, len (tests [0])) if len(tests) == 4: inference(args, None, player.model, tests[0], test_env, gpu_id, player.env.rng, len(tests[0]), tests[3]) else: inference(args, None, player.model, tests[0], test_env, gpu_id, player.env.rng, len(tests[0])) return # ----------------------------------------- End Deploy / Inference ----------------------------------------- merge_ratios = [] split_ratios = [] if args.wctrl == "s2m": schedule = args.wctrl_schedule delta = (shared_dict['spl_w'] - shared_dict['mer_w']) / (2 * len(schedule)) mer_w_delta = delta mer_w_var = shared_dict['mer_w'] mer_w_scheduler = Scheduler(mer_w_var, schedule, mer_w_delta) split_delta = -delta / len(args.out_radius) split_var = shared_dict['spl_w'] / len(args.out_radius) spl_w_scheduler = Scheduler(split_var, schedule, split_delta) while True: if flag: if gpu_id >= 0: with torch.cuda.device(gpu_id): player.model.load_state_dict(shared_model.state_dict()) else: player.model.load_state_dict(shared_model.state_dict()) player.model.eval() flag = False player.action_test() reward_sum += player.reward.mean() renderlist.append(player.env.render()) if player.done: flag = True num_tests += 1 reward_total_sum += reward_sum reward_mean = reward_total_sum / num_tests log['{}_log'.format(args.env)].info( "VALID: Time {0}, episode reward {1}, num tests {4}, episode length {2}, reward mean {3:.4f}" .format( time.strftime("%Hh %Mm %Ss", time.gmtime(time.time() - start_time)), reward_sum, player.eps_len, reward_mean, num_tests)) recent_episode_scores.push(reward_sum) if args.save_max and recent_episode_scores.mean() >= max_score: max_score = recent_episode_scores.mean() if gpu_id >= 0: with torch.cuda.device(gpu_id): state_to_save = {} state_to_save = player.model.state_dict() torch.save( state_to_save, '{0}{1}.dat'.format(args.save_model_dir, 'best_model_' + args.env)) if num_tests % args.save_period == 0: if gpu_id >= 0: with torch.cuda.device(gpu_id): state_to_save = player.model.state_dict() torch.save( state_to_save, '{0}{1}.dat'.format(args.save_model_dir, str(num_tests))) if num_tests % args.log_period == 0: if tests is not None and not args.DEBUG: inference(args, logger, player.model, tests[0], test_env, gpu_id, player.env.rng, num_tests) if (np.max(env.lbl) != 0 and np.max(env.gt_lbl) != 0): bestDice, FgBgDice, diffFG, MWCov, MUCov, AvgFP, AvgFN, rand_i = evaluate( args, player.env) recent_FgBgDice.push(FgBgDice) recent_diffFG.push(abs(diffFG)) recent_bestDice.push(bestDice) recent_MWCov.push(MWCov) recent_MUCov.push(MUCov) recent_AvgFP.push(AvgFP) recent_AvgFN.push(AvgFN) recent_rand_i.push(rand_i) log_info = { "bestDice": recent_bestDice.mean(), "FgBgDice": recent_FgBgDice.mean(), "diffFG": recent_diffFG.mean(), "MWCov": recent_MWCov.mean(), "MUCov": recent_MUCov.mean(), "AvgFP": recent_AvgFP.mean(), "AvgFN": recent_AvgFN.mean(), "rand_i": recent_rand_i.mean() } for tag, value in log_info.items(): logger.scalar_summary(tag, value, num_tests) else: bestDice, FgBgDice, diffFG = 0, 0, 0 MWCov, MUCov, AvgFP, AvgFN = 0, 0, 0, 0 rand_i = 0 print( "----------------------VALID SET--------------------------" ) print(args.env) print("bestDice:", bestDice, "FgBgDice:", FgBgDice, "diffFG:", diffFG, "MWCov:", MWCov, "MUCov:", MUCov, "AvgFP:", AvgFP, "AvgFN:", AvgFN, "rand_i:", rand_i) # print ("mean bestDice") print("Log test #:", num_tests) print("rewards: ", player.reward.mean()) print("sum rewards: ", reward_sum) print("#gt_values:", len(np.unique(player.env.gt_lbl))) print("values:") values = player.env.unique() print(np.concatenate([values[0][None], values[1][None]], 0)) print("------------------------------------------------") log_img = np.concatenate(renderlist[::-1], 0) if not "3D" in args.data: for i in range(3): player.probs.insert(0, np.zeros_like(player.probs[0])) while (len(player.probs) - 3 < args.max_episode_length): player.probs.append(np.zeros_like(player.probs[0])) probslist = [ np.repeat(np.expand_dims(prob, -1), 3, -1) for prob in player.probs ] probslist = np.concatenate(probslist, 1) probslist = (probslist * 256).astype(np.uint8, copy=False) # log_img = renderlist [-1] print(probslist.shape, log_img.shape) log_img = np.concatenate([probslist, log_img], 0) log_info = {"valid_sample": log_img} print(log_img.shape) io.imsave( args.log_dir + "tifs/" + str(num_tests) + "_sample.tif", log_img.astype(np.uint8)) io.imsave( args.log_dir + "tifs/" + str(num_tests) + "_pred.tif", player.env.lbl.astype(np.uint8)) io.imsave(args.log_dir + "tifs/" + str(num_tests) + "_gt.tif", player.env.gt_lbl.astype(np.int32)) if args.seg_scale: log_info["scaler"] = player.env.scaler for tag, img in log_info.items(): img = img[None] logger.image_summary(tag, img, num_tests) if not args.deploy: log_info = { 'mean_valid_reward': reward_mean, '100_mean_reward': recent_episode_scores.mean(), 'split_ratio': player.env.split_ratio_sum.sum() / np.count_nonzero(player.env.gt_lbl), 'merge_ratio': player.env.merge_ratio_sum.sum() / np.count_nonzero(player.env.gt_lbl), } if args.wctrl == 's2m': log_info.update({ 'mer_w': mer_w_scheduler.value(), 'spl_w': spl_w_scheduler.value() * len(args.out_radius), }) merge_ratios.append(player.env.merge_ratio_sum.sum() / np.count_nonzero(player.env.gt_lbl)) split_ratios.append(player.env.split_ratio_sum.sum() / np.count_nonzero(player.env.gt_lbl)) print("split ratio: ", np.max(player.env.split_ratio_sum), np.min(player.env.split_ratio_sum)) print("merge ratio: ", np.max(player.env.merge_ratio_sum), np.min(player.env.merge_ratio_sum)) print("merge ratio: ", merge_ratios) print("split ratio: ", split_ratios) for tag, value in log_info.items(): logger.scalar_summary(tag, value, num_tests) renderlist = [] reward_sum = 0 player.eps_len = 0 if args.wctrl == "s2m": shared_dict["spl_w"] = spl_w_scheduler.next() shared_dict["mer_w"] = mer_w_scheduler.next() player.env.config["spl_w"] = shared_dict["spl_w"] player.env.config["mer_w"] = shared_dict["mer_w"] player.clear_actions() state = player.env.reset(player.model, gpu_id) renderlist.append(player.env.render()) time.sleep(15) player.state = torch.from_numpy(state).float() if gpu_id >= 0: with torch.cuda.device(gpu_id): player.state = player.state.cuda()
def test(args, shared_model, env_conf): ptitle('Test Agent') gpu_id = args.gpu_ids[-1] log = {} setup_logger('{}_log'.format(args.env), r'{0}{1}_log'.format(args.log_dir, args.env)) log['{}_log'.format(args.env)] = logging.getLogger('{}_log'.format( args.env)) d_args = vars(args) for k in d_args.keys(): log['{}_log'.format(args.env)].info('{0}: {1}'.format(k, d_args[k])) torch.manual_seed(args.seed) if gpu_id >= 0: torch.cuda.manual_seed(args.seed) env = GymEnvironment(env_name='Pong-v4') action_size = env.get_action_size('Pong-v4') reward_sum = 0 start_time = time.time() num_tests = 0 reward_total_sum = 0 player = Agent(model=None, env=env, action_size=action_size, args=args, state=None) player.gpu_id = gpu_id player.model = UNREALModule(3, action_size=action_size, enable_pixel_control=True) player.state = player.env.last_state player.eps_len += 2 player.state = torch.from_numpy(player.state).float() if gpu_id >= 0: with torch.cuda.device(gpu_id): player.model = player.model.cuda() player.state = player.state.cuda() flag = True max_score = 0 while True: if flag: if gpu_id >= 0: with torch.cuda.device(gpu_id): player.model.load_state_dict(shared_model.state_dict()) else: player.model.load_state_dict(shared_model.state_dict()) player.model.eval() flag = False player.action_test() reward_sum += player.reward if player.done and not player.info: player.env.reset() state = player.env.last_state player.eps_len += 2 player.state = torch.from_numpy(state).float() if gpu_id >= 0: with torch.cuda.device(gpu_id): player.state = player.state.cuda() elif player.info: flag = True num_tests += 1 reward_total_sum += reward_sum reward_mean = reward_total_sum / num_tests log['{}_log'.format(args.env)].info( "Time {0}, episode reward {1}, episode length {2}, reward mean {3:.4f}" .format( time.strftime("%Hh %Mm %Ss", time.gmtime(time.time() - start_time)), reward_sum, player.eps_len, reward_mean)) if args.save_max and reward_sum >= max_score: max_score = reward_sum if gpu_id >= 0: with torch.cuda.device(gpu_id): state_to_save = player.model.state_dict() torch.save( state_to_save, '{0}{1}.dat'.format(args.save_model_dir, args.env)) else: state_to_save = player.model.state_dict() torch.save( state_to_save, '{0}{1}.dat'.format(args.save_model_dir, args.env)) reward_sum = 0 player.eps_len = 0 player.env.reset() state = player.env.last_state player.eps_len += 2 time.sleep(10) player.state = torch.from_numpy(state).float() if gpu_id >= 0: with torch.cuda.device(gpu_id): player.state = player.state.cuda()