def run_algo(output_dir, seed): #Note it's best to run the algorithm in a function to allow for multiple runs #in the same Ipython console: #I believe what it's doing is avoiding contamination of tensor flow objects #which can cause the reloaded networks to lose their trained parameters def env_fn(): try: import env.tightWellEnvironment return gym.make('TightWellEnv-v0') except: env_dict = gym.envs.registration.registry.env_specs.copy() for env in env_dict: if 'CustomEnv-v0' in env: print('Remove {} from registry'.format(env)) del gym.envs.registration.registry.env_specs[env] import env.tightWellEnvironment return gym.make('TightWellEnv-v0') print('gym environment already registered.') sess = tf.Session() sess.run(tf.global_variables_initializer()) logger_kwargs = dict(output_dir = output_dir) ac_kwargs = dict(hidden_sizes=(32,)) with tf.Session(graph=tf.Graph()): #ppo or trpo or vpg trpo(env_fn,gamma=1., steps_per_epoch=600, epochs=500, seed=seed, ac_kwargs=ac_kwargs, logger_kwargs = logger_kwargs) return
def run_algo(output_dir, seed): #Note it's best to run the algorithm in a function to allow for multiple runs #in the same Ipython console: #I believe what it's doing is avoiding contamination of tensor flow objects #which can cause the reloaded networks to lose their trained parameters #Needed to use custom environment def env_fn(): import env.smallVaringGoalRep1Environment return gym.make('SmallVaringGoalRep1Env-v0') sess = tf.Session() sess.run(tf.global_variables_initializer()) logger_kwargs = dict(output_dir=output_dir) ac_kwargs = dict(hidden_sizes=(32, )) with tf.Session(graph=tf.Graph()): #ppo or trpo or vpg trpo(env_fn, gamma=1., steps_per_epoch=1200, epochs=5000, seed=seed, ac_kwargs=ac_kwargs, logger_kwargs=logger_kwargs) return
def train(alg, task): if task == 'reach': env_fn = lambda: SawyerReachEnv(n_substeps=25, reward_type='dense') elif task == 'grasp': env_fn = lambda: SawyerGraspEnv(n_substeps=5, reward_type='dense') ac_kwargs = dict(hidden_sizes=[64, 64], activation=tf.nn.relu) save_path = os.path.join(SAVE_PATH, task, alg) if alg == 'ppo': # mpi_fork(2) logger_kwargs = dict(output_dir=save_path, exp_name=EXP_NAME) ppo(env_fn=env_fn, steps_per_epoch=4000, epochs=20000, logger_kwargs=logger_kwargs, max_ep_len=1000) elif alg == 'ddpg': logger_kwargs = dict(output_dir=SAVE_PATH + '/ddpg_suite', exp_name=EXP_NAME) ddpg(env_fn=env_fn, steps_per_epoch=5000, batch_size=256, epochs=2000, logger_kwargs=logger_kwargs, max_ep_len=200) elif alg == 'trpo': logger_kwargs = dict(output_dir=SAVE_PATH + '/trpo_suite', exp_name=EXP_NAME) trpo(env_fn=env_fn, ac_kwargs=ac_kwargs, steps_per_epoch=5000, epochs=2000, logger_kwargs=logger_kwargs, max_ep_len=200) elif alg == 'td3': logger_kwargs = dict(output_dir=save_path, exp_name=EXP_NAME) td3(env_fn=env_fn, start_steps=100000, steps_per_epoch=5000, epochs=2000, logger_kwargs=logger_kwargs, max_ep_len=1000) elif alg == 'sac': logger_kwargs = dict(output_dir=save_path, exp_name=EXP_NAME) sac(env_fn=env_fn, start_steps=100000, steps_per_epoch=5000, epochs=2000, logger_kwargs=logger_kwargs, max_ep_len=200)
max_ep_len=1000, gamma=0.99, seed=seed, steps_per_epoch=steps_per_epoch, pi_lr=0.005, vf_lr=0.005, epochs=epochs, logger_kwargs=logger_kwargs, clip_ratio=float(clip_ratio), target_kl=float(target_kl)) # train with TRPO if algorithm == 'trpo': delta = sys.argv[2] backtrack_coef = sys.argv[3] exp_name = 'll_trpo_seed' + str(seed) + '_epochs' + str(epochs) exp_name += '_delta' + delta + '_bc' + backtrack_coef logger_kwargs = dict(output_dir='data_spinning_up/' + exp_name + '/', exp_name=exp_name) trpo(env_fn=env_fn, ac_kwargs=ac_kwargs, max_ep_len=1000, gamma=0.99, seed=seed, steps_per_epoch=steps_per_epoch, vf_lr=0.005, epochs=epochs, logger_kwargs=logger_kwargs, backtrack_coeff=float(backtrack_coef), delta=float(delta))
# output_dir = 'logging/awake/PPO/' # logger_kwargs = dict(output_dir=output_dir, exp_name='transport_awake') # agent = ppo(env_fn=env_fn, epochs=100, steps_per_epoch=5000, logger_kwargs=logger_kwargs, seed=123, save_freq=100) # output_dir = 'logging/awake/SAC/' # logger_kwargs = dict(output_dir=output_dir, exp_name='transport_awake') # agent = sac(env_fn=env_fn, epochs=25, steps_per_epoch=1000, logger_kwargs=logger_kwargs, start_steps=1000) output_dir = 'logging/awake/TRPO/' logger_kwargs = dict(output_dir=output_dir, exp_name='transport_awake') # agent = td3(env_fn=env_fn, epochs=25, steps_per_epoch=1000, # logger_kwargs=logger_kwargs, start_steps=500) agent = trpo(env_fn=env_fn, epochs=50, steps_per_epoch=1000, logger_kwargs=logger_kwargs, delta=0.5) plot_name = 'Stats' name = plot_name data = pd.read_csv(output_dir + '/progress.txt', sep="\t") data.index = data['TotalEnvInteracts'] data_plot = data[['EpLen', 'MinEpRet', 'AverageEpRet']] data_plot.plot(secondary_y=['MinEpRet', 'AverageEpRet']) plt.title(name) # plt.savefig(name + '.pdf') plt.show()
from spinup import ppo,trpo import tensorflow as tf import gym from mycart import MyCartPoleEnv #env_fn = lambda : gym.make('LunarLander-v2') env_fn = lambda : MyCartPoleEnv() ac_kwargs = dict(hidden_sizes=[64,64], activation=tf.nn.relu) logger_kwargs = dict(output_dir='./logsDDPG', exp_name='pend') trpo(env_fn=env_fn, ac_kwargs=ac_kwargs, steps_per_epoch=5000, epochs=200, logger_kwargs=logger_kwargs)