def main(args): for ite in range(int(args['trial_num'])): print('Trial Number:', ite) # gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=0.333) # config = tf.ConfigProto(gpu_options=gpu_options, log_device_placement=False) # with tf.Session(config=config) as sess: with tf.Session() as sess: if args['change_seed']: rand_seed = 10 * ite #rand_seed = np.random.randint(1, 1000, size=1) else: rand_seed = 0 env = gym.make(args['env']) np.random.seed(int(args['random_seed']) + int(rand_seed)) tf.set_random_seed(int(args['random_seed']) + int(rand_seed)) env.seed(int(args['random_seed']) + int(rand_seed)) env_test = gym.make(args['env']) env_test.seed(int(args['random_seed']) + int(rand_seed)) state_dim = env.observation_space.shape[0] action_dim = env.action_space.shape[0] print('action_space.shape', env.action_space.shape) print('observation_space.shape', env.observation_space.shape) action_bound = env.action_space.high assert (env.action_space.high[0] == -env.action_space.low[0]) agent = adInfoHRLTD3(sess, env, state_dim, action_dim, action_bound, int(args['minibatch_size']), tau=float(args['tau']), actor_lr=float(args['actor_lr']), critic_lr=float(args['critic_lr']), option_lr=float(args['option_lr']), gamma=float(args['gamma']), hidden_dim=np.asarray(args['hidden_dim']), entropy_coeff=float(args['lambda']), c_reg=float(args['c_reg']), option_num=int(args['option_num']), vat_noise=float(args['vat_noise']), c_ent=float(args['c_ent'])) if args['use_gym_monitor']: if not args['render_env']: env = wrappers.Monitor(env, args['monitor_dir'], video_callable=False, force=True) else: env = wrappers.Monitor( env, args['monitor_dir'], video_callable=lambda episode_id: episode_id % 50 == 0, force=True) step_R_i = train(sess, env, env_test, args, agent) result_path = "./results/trials/separate/" try: import pathlib pathlib.Path(result_path).mkdir(parents=True, exist_ok=True) except: print( "A result directory does not exist and cannot be created. The trial results are not saved" ) result_filename = args['result_file'] + '_' + args['env'] \ + '_lambda_' + str(float(args['lambda'])) \ + '_c_reg_' + str(float(args['c_reg'])) \ + '_vat_noise_' + str(float(args['vat_noise'])) \ + '_c_ent_' + str(float(args['c_ent'])) \ + '_option_' + str(float(args['option_num'])) \ + '_temporal_' + str(float(args['temporal_num'])) \ + '_trial_idx_' + str(int(args['trial_idx'])) \ + '.txt' if args['overwrite_result'] and ite == 0: np.savetxt(result_filename, np.asarray(step_R_i)) else: data = np.loadtxt(result_filename, dtype=float) data_new = np.vstack((data, np.asarray(step_R_i))) np.savetxt(result_filename, data_new) if args['use_gym_monitor']: env.monitor.close()
def main(args): result_name = 'adInfoHRLTD3_' + args['env'] \ + '_lambda_' + str(float(args['lambda'])) \ + '_c_reg_' + str(float(args['c_reg'])) \ + '_vat_noise_' + str(float(args['vat_noise'])) \ + '_c_ent_' + str(float(args['c_ent'])) \ + '_option_' + str(float(args['option_num'])) \ + '_temporal_' + str(float(args['temporal_num'])) \ + '_trial_idx_' + str(int(args['trial_idx'])) for ite in range(int(args['trial_num'])): print('Trial Number:', ite) gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=0.333) config = tf.ConfigProto(gpu_options=gpu_options, log_device_placement=False) with tf.Session(config=config) as sess: if args['change_seed']: rand_seed = 10 * ite else: rand_seed = 0 np.random.seed(int(args['random_seed']) + int(rand_seed)) tf.set_random_seed(int(args['random_seed']) + int(rand_seed)) env = gym.make(args['env']) env.seed(int(args['random_seed']) + int(rand_seed)) if args['save_video']: try: import pathlib pathlib.Path("./Video/" + args['env']).mkdir(parents=True, exist_ok=True) video_relative_path = "./Video/" + args['env'] + "/" ## To save video of the first episode env = gym.wrappers.Monitor( env, video_relative_path, video_callable=lambda episode_id: episode_id == 0, force=True) ## To save video of every episodes # env_test = gym.wrappers.Monitor(env_test, video_relative_path, \ # video_callable=lambda episode_id: episode_id%1==0, force =True) except: print( "Cannot create video directories. Video will not be saved." ) state_dim = env.observation_space.shape[0] action_dim = env.action_space.shape[0] action_bound = env.action_space.high # Ensure action bound is symmetric assert (env.action_space.high[0] == -env.action_space.low[0]) if args['method_name'] == 'adInfoHRLTD3': from adInfoHRL_agent import adInfoHRLTD3 agent = adInfoHRLTD3( sess, env, state_dim, action_dim, action_bound, # , int(args['minibatch_size']), # tau=float(args['tau']), # actor_lr=float(args['actor_lr']), # critic_lr=float(args['critic_lr']), # gamma=float(args['gamma']), # hidden_dim=np.asarray(args['hidden_dim']), # entropy_coeff=float(args['lambda']), # c_reg=float(args['c_reg']), option_num=int(args['option_num']), # vat_noise=float(args['vat_noise']) ) model_path = "./Model/adInfoHRL/" + args['env'] + '/' agent.load_model(iteration=int(args['load_model_iter']), expname=result_name, model_path=model_path) test(sess, env, args, agent, result_name) if args['use_gym_monitor']: env.close()