def play(policy_file, seed, n_test_rollouts, render): set_global_seeds(seed) # Load policy. with open(policy_file, 'rb') as f: policy = pickle.load(f) env_name = policy.info['env_name'] # Load params with open(PARAMS_FILE) as json_file: params = json.load(json_file) params['env_name'] = env_name params = config.prepare_params(params) config.log_params(params, logger=logger) structure = params['structure'] task_selection = params['task_selection'] goal_selection = params['goal_selection'] dims = config.configure_dims(params) eval_params = { 'exploit': True, 'use_target_net': params['test_with_polyak'], 'use_demo_states': False, 'compute_Q': True, 'T': params['T'], 'structure': structure, 'task_selection': task_selection, 'goal_selection': goal_selection, 'queue_length': params['queue_length'], 'eval': True, 'render': render } for name in ['T', 'gamma', 'noise_eps', 'random_eps']: eval_params[name] = params[name] evaluator = RolloutWorker(params['make_env'], policy, dims, logger, **eval_params) evaluator.seed(seed) # Run evaluation. evaluator.clear_history() for _ in range(n_test_rollouts): evaluator.generate_rollouts() # record logs for key, val in evaluator.logs('test'): logger.record_tabular(key, np.mean(val)) logger.dump_tabular()
def main(policy_file, seed, n_test_rollouts, render): """ run HER from a saved policy :param policy_file: (str) pickle path to a saved policy :param seed: (int) initial seed :param n_test_rollouts: (int) the number of test rollouts :param render: (bool) if rendering should be done """ set_global_seeds(seed) # Load policy. with open(policy_file, 'rb') as file_handler: policy = pickle.load(file_handler) env_name = policy.info['env_name'] # Prepare params. params = config.DEFAULT_PARAMS if env_name in config.DEFAULT_ENV_PARAMS: params.update(config.DEFAULT_ENV_PARAMS[env_name] ) # merge env-specific parameters in params['env_name'] = env_name params = config.prepare_params(params) config.log_params(params, logger_input=logger) dims = config.configure_dims(params) eval_params = { 'exploit': True, 'use_target_net': params['test_with_polyak'], 'compute_q': True, 'rollout_batch_size': 1, 'render': bool(render), } for name in ['time_horizon', 'gamma', 'noise_eps', 'random_eps']: eval_params[name] = params[name] evaluator = RolloutWorker(params['make_env'], policy, dims, logger, **eval_params) evaluator.seed(seed) # Run evaluation. evaluator.clear_history() for _ in range(n_test_rollouts): evaluator.generate_rollouts() # record logs for key, val in evaluator.logs('test'): logger.record_tabular(key, np.mean(val)) logger.dump_tabular()
def main(policy_file, seed, n_test_rollouts, render, record): set_global_seeds(seed) # Load policy. with open(policy_file, 'rb') as f: policy = pickle.load(f) env_name = policy.info['env_name'] # Prepare params. params = config.DEFAULT_PARAMS if env_name in config.DEFAULT_ENV_PARAMS: params.update(config.DEFAULT_ENV_PARAMS[env_name]) # merge env-specific parameters in params['env_name'] = env_name params = config.prepare_params(params) config.log_params(params, logger=logger) dims = config.configure_dims(params) eval_params = { 'exploit': True, 'use_target_net': params['test_with_polyak'], 'compute_Q': True, 'rollout_batch_size': 1, 'render': bool(render), } for name in ['T', 'gamma', 'noise_eps', 'random_eps']: eval_params[name] = params[name] if record: make_env = params['make_env'] def video_callable(episode_id): return True def make_record_env(): env = make_env() return gym.wrappers.Monitor(env, '../../../results/video/' + env_name, force=True, video_callable=video_callable) params['make_env'] = make_record_env evaluator = RolloutWorker(params['make_env'], policy, dims, logger, **eval_params) evaluator.seed(seed) # Run evaluation. evaluator.clear_history() for _ in range(n_test_rollouts): evaluator.generate_rollouts() # record logs for key, val in evaluator.logs('test'): logger.record_tabular(key, np.mean(val)) logger.dump_tabular()
def main(policy_file, seed, n_test_rollouts, render, with_forces): set_global_seeds(seed) # Load policy. with open(policy_file, 'rb') as f: policy = pickle.load(f) env_name = policy.info['env_name'] # Prepare params. params = config.DEFAULT_PARAMS params['with_forces'] = with_forces params['plot_forces'] = False if env_name in config.DEFAULT_ENV_PARAMS: params.update(config.DEFAULT_ENV_PARAMS[env_name] ) # merge env-specific parameters in params['env_name'] = env_name params = config.prepare_params(params) config.log_params(params, logger=logger) dims = config.configure_dims(params) eval_params = { 'exploit': True, 'use_target_net': params['test_with_polyak'], 'compute_Q': True, 'rollout_batch_size': 1, 'with_forces': with_forces, 'plot_forces': False, 'render': bool(render), } for name in ['T', 'gamma', 'noise_eps', 'random_eps']: eval_params[name] = params[name] evaluator = RolloutWorker(params['make_env'], policy, dims, logger, **eval_params) evaluator.seed(seed) # Run evaluation. evaluator.clear_history() for _ in range(n_test_rollouts): evaluator.generate_rollouts() # record logs for key, val in evaluator.logs('test'): logger.record_tabular(key, np.mean(val)) logger.dump_tabular()
def main(policy_file, seed, n_test_rollouts, render): set_global_seeds(seed) # Load policy. with open(policy_file, 'rb') as f: policy = pickle.load(f) env_name = policy.info['env_name'] # Prepare params. params = config.DEFAULT_PARAMS if env_name in config.DEFAULT_ENV_PARAMS: params.update(config.DEFAULT_ENV_PARAMS[env_name]) # merge env-specific parameters in params['env_name'] = env_name params = config.prepare_params(params) config.log_params(params, logger=logger) dims = config.configure_dims(params) eval_params = { 'exploit': True, 'use_target_net': params['test_with_polyak'], 'compute_Q': True, 'rollout_batch_size': 1, 'render': bool(render), } for name in ['T', 'gamma', 'noise_eps', 'random_eps']: eval_params[name] = params[name] evaluator = RolloutWorker(params['make_env'], policy, dims, logger, **eval_params) evaluator.seed(seed) # Run evaluation. evaluator.clear_history() for _ in range(n_test_rollouts): evaluator.generate_rollouts() # record logs for key, val in evaluator.logs('test'): logger.record_tabular(key, np.mean(val)) logger.dump_tabular()
def main(policy_file, seed, n_test_rollouts, render, exploit, compute_q, collect_data, goal_generation, note): set_global_seeds(seed) # Load policy. with open(policy_file, 'rb') as f: policy = pickle.load(f) env_name = policy.info['env_name'] # Prepare params. params = config.DEFAULT_PARAMS params['note'] = note or params['note'] if note: with open('params/' + env_name + '/' + note + '.json', 'r') as file: override_params = json.loads(file.read()) params.update(**override_params) if env_name in config.DEFAULT_ENV_PARAMS: params.update(config.DEFAULT_ENV_PARAMS[env_name] ) # merge env-specific parameters in params['env_name'] = env_name goal_generation = params['goal_generation'] params = config.prepare_params(params) config.log_params(params, logger=logger) dims = config.configure_dims(params) eval_params = { 'exploit': exploit, # eval: True, train: False 'use_target_net': params['test_with_polyak'], # eval/train: False 'compute_Q': compute_q, # eval: True, train: False 'rollout_batch_size': 1, 'render': render, } for name in ['T', 'gamma', 'noise_eps', 'random_eps']: eval_params[name] = params[name] evaluator = RolloutWorker(params['make_env'], policy, dims, logger, **eval_params) evaluator.seed(seed) # Run evaluation. evaluator.clear_history() num_skills = params['num_skills'] if goal_generation == 'Zero': generated_goal = np.zeros(evaluator.g.shape) else: generated_goal = False for z in range(num_skills): assert (evaluator.rollout_batch_size == 1) z_s_onehot = np.zeros([evaluator.rollout_batch_size, num_skills]) z_s_onehot[0, z] = 1 base = os.path.splitext(policy_file)[0] for i_test_rollouts in range(n_test_rollouts): if render == 'rgb_array' or render == 'human': imgs, episode = evaluator.generate_rollouts( generated_goal=generated_goal, z_s_onehot=z_s_onehot) end = '_test_{:02d}_exploit_{}_compute_q_{}_skill_{}.avi'.format( i_test_rollouts, exploit, compute_q, z) test_filename = base + end save_video(imgs[0], test_filename, lib='cv2') else: episode = evaluator.generate_rollouts( generated_goal=generated_goal, z_s_onehot=z_s_onehot) if collect_data: end = '_test_{:02d}_exploit_{}_compute_q_{}_skill_{}.txt'.format( i_test_rollouts, exploit, compute_q, z) test_filename = base + end with open(test_filename, 'w') as file: file.write(json.dumps(episode['o'].tolist())) # record logs for key, val in evaluator.logs('test'): logger.record_tabular(key, np.mean(val)) logger.dump_tabular()