def test(env_id, policy_name, seed, nstack=1, numAgents=2): iters = 100 rwd = [] percent_exp = [] env = gym.make(env_id) env.seed(seed) print("logger dir: ", logger.get_dir()) env = bench.Monitor(env, logger.get_dir() and os.path.join(logger.get_dir())) if env_id == 'Pendulum-v0': if continuous_actions: env.action_space.n = env.action_space.shape[0] else: env.action_space.n = 10 gym.logger.setLevel(logging.WARN) # img_shape = (84, 84, 3) img_shape = (84, 84, 3) ob_space = spaces.Box(low=0, high=255, shape=img_shape) ac_space = env.action_space # def get_img(env): # ax, img = env.get_img() # return ax, img # def process_img(img): # img = rgb2grey(copy.deepcopy(img)) # img = resize(img, img_shape) # return img policy_fn = policy_fn_name(policy_name) nsteps = 5 total_timesteps = int(80e6) vf_coef = 0.5 ent_coef = 0.01 max_grad_norm = 0.5 lr = 7e-4 lrschedule = 'linear' epsilon = 1e-5 alpha = 0.99 continuous_actions = False debug = False if numAgents == 1: model = Model(policy=policy_fn, ob_space=ob_space, ac_space=ac_space, nenvs=1, nsteps=nsteps, nstack=nstack, num_procs=1, ent_coef=ent_coef, vf_coef=vf_coef, max_grad_norm=max_grad_norm, lr=lr, alpha=alpha, epsilon=epsilon, total_timesteps=total_timesteps, lrschedule=lrschedule, continuous_actions=continuous_actions, debug=debug) m_name = 'test_model_Mar7_1mil.pkl' model.load(m_name) else: model = [] for i in range(numAgents): model.append( Model(policy=policy_fn, ob_space=ob_space, ac_space=ac_space, nenvs=1, nsteps=nsteps, nstack=nstack, num_procs=1, ent_coef=ent_coef, vf_coef=vf_coef, max_grad_norm=max_grad_norm, lr=lr, alpha=alpha, epsilon=epsilon, total_timesteps=total_timesteps, lrschedule=lrschedule, continuous_actions=continuous_actions, debug=debug, itr=i)) for i in range(numAgents): m_name = 'test_model_' + str(i) + '_300k.pkl' # + '100k.pkl' model[i].load(m_name) print('---------------------------------------------') print("Successfully Loaded: ", m_name) print('---------------------------------------------') env.env, img = env.reset() rwd = [[], []] percent_exp = [[], []] for i in range(1, iters + 1): if i % 10 == 0: for j in range(numAgents): print('-----------------------------------') print('Agent ' + str(j)) print('Iteration: ', i) avg_rwd = sum(rwd[j]) / i avg_pct_exp = sum(percent_exp[j]) / i med_pct_exp = statistics.median(percent_exp[j]) print('Average Reward: ', avg_rwd) print('Average Percent Explored: ', avg_pct_exp, '%') print('Median Percent Explored: ', med_pct_exp) print('-----------------------------------') # ax, img = get_img(env) img_hist = [] frames_dir = [] for j in range(numAgents): frames_dir.append('exp_frames' + str(j * 100 + i + 200)) if os.path.exists(frames_dir[j]): # raise ValueError('Frames directory already exists.') shutil.rmtree(frames_dir[j]) os.makedirs(frames_dir[j]) img_hist.append(deque([img[j] for _ in range(4)], maxlen=nstack)) action = 0 total_rewards = [0, 0] nstack = 1 for tidx in range(1000): # if tidx % nstack == 0: for j in range(numAgents): if tidx > 0: input_imgs = np.expand_dims( np.squeeze(np.stack(img_hist, -1)), 0) # print(np.shape(input_imgs)) # plt.imshow(input_imgs[0, :, :, 0]) # plt.imshow(input_imgs[0, :, :, 1]) # plt.draw() # plt.pause(0.000001) if input_imgs.shape == (1, 84, 84, 3): actions, values, states = model[j].step_model.step( input_imgs) else: actions, values, states = model[j].step_model.step( input_imgs[:, :, :, :, 0]) # actions, values, states = model.step_model.step(input_imgs) action = actions[0] value = values[0] # print('Value: ', value, ' Action: ', action) img, reward, done, _ = env.step(action, j) total_rewards[j] += reward # img = get_img(env) img_hist[j].append(img[j]) imsave( os.path.join(frames_dir[j], 'frame_{:04d}.png'.format(tidx)), resize(img[j], (img_shape[0], img_shape[1], 3))) # print(tidx, '\tAction: ', action, '\tValues: ', value, '\tRewards: ', reward, '\tTotal rewards: ', total_rewards)#, flush=True) if done: # print('Faultered at tidx: ', tidx) for j in range(numAgents): rwd[j].append(total_rewards[j]) percent_exp[j].append(env.env.percent_explored[j]) # env.env, img = env.reset() break for i in range(numAgents): print('-----------------------------------') print('Agent ' + str(i)) print('Iteration: ', iters) avg_rwd = sum(rwd[i]) / iters avg_pct_exp = sum(percent_exp[i]) / iters med_pct_exp = statistics.median(percent_exp[i]) print('Average Reward: ', avg_rwd) print('Average Percent Explored: ', avg_pct_exp, '%') print('Median Percent Explored: ', med_pct_exp) print('-----------------------------------')
def test(env_id, policy_name, seed, nstack=1, numAgents=2, benchmark=False): iters = 100 rwd = [] percent_exp = [] env = EnvVec([ make_env(env_id, benchmark=benchmark, rank=idx, seed=seed) for idx in range(1) ], particleEnv=True, test=True) # print(env_id) # print("logger dir: ", logger.get_dir()) # env = bench.Monitor(env, logger.get_dir() and os.path.join(logger.get_dir())) if env_id == 'Pendulum-v0': if continuous_actions: env.action_space.n = env.action_space.shape[0] else: env.action_space.n = 10 gym.logger.setLevel(logging.WARN) ob_space = env.observation_space ac_space = env.action_space # def get_img(env): # ax, img = env.get_img() # return ax, img # def process_img(img): # img = rgb2grey(copy.deepcopy(img)) # img = resize(img, img_shape) # return img policy_fn = policy_fn_name(policy_name) nsteps = 5 total_timesteps = int(80e6) vf_coef = 0.9 ent_coef = 0.01 max_grad_norm = 0.5 lr = 7e-4 lrschedule = 'linear' epsilon = 1e-5 alpha = 0.99 continuous_actions = False debug = False if numAgents == 1: model = Model(policy=policy_fn, ob_space=ob_space, ac_space=ac_space, nenvs=1, nsteps=nsteps, nstack=nstack, num_procs=1, ent_coef=ent_coef, vf_coef=vf_coef, max_grad_norm=max_grad_norm, lr=lr, alpha=alpha, epsilon=epsilon, total_timesteps=total_timesteps, lrschedule=lrschedule, continuous_actions=continuous_actions, debug=debug) m_name = 'test_model_Mar7_1mil.pkl' model.load(m_name) else: model = [] for i in range(numAgents): model.append( Model(policy=policy_fn, ob_space=ob_space, ac_space=ac_space, nenvs=1, nsteps=nsteps, nstack=nstack, num_procs=1, ent_coef=ent_coef, vf_coef=vf_coef, max_grad_norm=max_grad_norm, lr=lr, alpha=alpha, epsilon=epsilon, total_timesteps=total_timesteps, lrschedule=lrschedule, continuous_actions=continuous_actions, debug=debug, itr=i, particleEnv=True)) for i in range(numAgents): m_name = 'partEnv_model_' + str(i) + '.pkl' model[i].load(m_name) print('---------------------------------------------') print("Successfully Loaded: ", m_name) print('---------------------------------------------') obs = env.reset() states = [[], []] dones = [False, False] rwd = [[], []] percent_exp = [[], []] for i in range(1, iters + 1): if i % 1 == 0: for j in range(numAgents): print('-----------------------------------') print('Agent ' + str(j)) print('Iteration: ', i) avg_rwd = sum(rwd[j]) / i # avg_pct_exp = sum(percent_exp[j])/i # med_pct_exp = statistics.median(percent_exp[j]) print('Average Reward: ', avg_rwd) # print('Average Percent Explored: ', avg_pct_exp, '%') # print('Median Percent Explored: ', med_pct_exp) print('-----------------------------------') actions = [[], []] values = [[], []] total_rewards = [[0], [0]] nstack = 1 for tidx in range(1000): # if tidx % nstack == 0: for j in range(numAgents): # if tidx > 0: # input_imgs = np.expand_dims(np.squeeze(np.stack(img_hist, -1)), 0) # print(np.shape(input_imgs)) # plt.imshow(input_imgs[0, :, :, 0]) # plt.imshow(input_imgs[0, :, :, 1]) # plt.draw() # plt.pause(0.000001) # print(obs[:, j]) # print(states[j]) # print(dones) # actions[j], values[j], states[j] = model[j].step(obs[:, j].reshape(1, 21), states[j], dones[j]) ob_shape = np.asarray([ env.observation_space[i].shape for i in range(env.n) ]).flatten() print(ob_shape) actions[j], values[j], states[j] = model[j].step( obs[:, j].reshape(1, ob_shape[1]), states[j], dones[j]) # action = actions[0] # value = values[0] obs, rewards, dones, _ = env.step(actions) dones = dones.flatten() total_rewards += rewards # wrong? print(total_rewards) # print(dones) # img = get_img(env) # obs_hist[j].append(img[j]) # imsave(os.path.join(frames_dir[j], 'frame_{:04d}.png'.format(tidx)), resize(img[j], (img_shape[0], img_shape[1], 3))) # print(tidx, '\tAction: ', action, '\tValues: ', value, '\tRewards: ', reward, '\tTotal rewards: ', total_rewards)#, flush=True) if True in dones: # print('Faultered at tidx: ', tidx) for j in range(numAgents): rwd[j].append(total_rewards[j]) # percent_exp[j].append(env.env.percent_explored[j]) obs = env.reset() break for i in range(numAgents): print('-----------------------------------') print('Agent ' + str(i)) print('Iteration: ', iters) avg_rwd = sum(rwd[i]) / iters # avg_pct_exp = sum(percent_exp[i])/iters # med_pct_exp = statistics.median(percent_exp[i]) print('Average Reward: ', avg_rwd) # print('Average Percent Explored: ', avg_pct_exp, '%') # print('Median Percent Explored: ', med_pct_exp) print('-----------------------------------')