Ejemplo n.º 1
0
def run_algo(output_dir, seed):
	#Note it's best to run the algorithm in a function to allow for multiple runs
	#in the same Ipython console:
	#I believe what it's doing is avoiding contamination of tensor flow objects 
	#which can cause the reloaded networks to lose their trained parameters

	def env_fn():
		try:
			import env.tightWellEnvironment
			return gym.make('TightWellEnv-v0')
		except:
			env_dict = gym.envs.registration.registry.env_specs.copy()
			for env in env_dict:
				if 'CustomEnv-v0' in env:
					print('Remove {} from registry'.format(env))
					del gym.envs.registration.registry.env_specs[env]
			import env.tightWellEnvironment
			return gym.make('TightWellEnv-v0')
			print('gym environment already registered.')
            
	sess = tf.Session()
	sess.run(tf.global_variables_initializer())
	
	logger_kwargs = dict(output_dir = output_dir)
	
	ac_kwargs = dict(hidden_sizes=(32,))
	with tf.Session(graph=tf.Graph()):
		#ppo or trpo or vpg
		trpo(env_fn,gamma=1., steps_per_epoch=600, epochs=500, seed=seed, ac_kwargs=ac_kwargs, logger_kwargs = logger_kwargs)
	return
Ejemplo n.º 2
0
def run_algo(output_dir, seed):
    #Note it's best to run the algorithm in a function to allow for multiple runs
    #in the same Ipython console:
    #I believe what it's doing is avoiding contamination of tensor flow objects
    #which can cause the reloaded networks to lose their trained parameters

    #Needed to use custom environment
    def env_fn():
        import env.smallVaringGoalRep1Environment
        return gym.make('SmallVaringGoalRep1Env-v0')

    sess = tf.Session()
    sess.run(tf.global_variables_initializer())

    logger_kwargs = dict(output_dir=output_dir)

    ac_kwargs = dict(hidden_sizes=(32, ))
    with tf.Session(graph=tf.Graph()):
        #ppo or trpo or vpg
        trpo(env_fn,
             gamma=1.,
             steps_per_epoch=1200,
             epochs=5000,
             seed=seed,
             ac_kwargs=ac_kwargs,
             logger_kwargs=logger_kwargs)
    return
Ejemplo n.º 3
0
def train(alg, task):
    if task == 'reach':
        env_fn = lambda: SawyerReachEnv(n_substeps=25, reward_type='dense')
    elif task == 'grasp':
        env_fn = lambda: SawyerGraspEnv(n_substeps=5, reward_type='dense')

    ac_kwargs = dict(hidden_sizes=[64, 64], activation=tf.nn.relu)
    save_path = os.path.join(SAVE_PATH, task, alg)
    if alg == 'ppo':
        # mpi_fork(2)

        logger_kwargs = dict(output_dir=save_path, exp_name=EXP_NAME)
        ppo(env_fn=env_fn,
            steps_per_epoch=4000,
            epochs=20000,
            logger_kwargs=logger_kwargs,
            max_ep_len=1000)

    elif alg == 'ddpg':

        logger_kwargs = dict(output_dir=SAVE_PATH + '/ddpg_suite',
                             exp_name=EXP_NAME)
        ddpg(env_fn=env_fn,
             steps_per_epoch=5000,
             batch_size=256,
             epochs=2000,
             logger_kwargs=logger_kwargs,
             max_ep_len=200)

    elif alg == 'trpo':

        logger_kwargs = dict(output_dir=SAVE_PATH + '/trpo_suite',
                             exp_name=EXP_NAME)
        trpo(env_fn=env_fn,
             ac_kwargs=ac_kwargs,
             steps_per_epoch=5000,
             epochs=2000,
             logger_kwargs=logger_kwargs,
             max_ep_len=200)

    elif alg == 'td3':

        logger_kwargs = dict(output_dir=save_path, exp_name=EXP_NAME)
        td3(env_fn=env_fn,
            start_steps=100000,
            steps_per_epoch=5000,
            epochs=2000,
            logger_kwargs=logger_kwargs,
            max_ep_len=1000)

    elif alg == 'sac':

        logger_kwargs = dict(output_dir=save_path, exp_name=EXP_NAME)
        sac(env_fn=env_fn,
            start_steps=100000,
            steps_per_epoch=5000,
            epochs=2000,
            logger_kwargs=logger_kwargs,
            max_ep_len=200)
Ejemplo n.º 4
0
        max_ep_len=1000,
        gamma=0.99,
        seed=seed,
        steps_per_epoch=steps_per_epoch,
        pi_lr=0.005,
        vf_lr=0.005,
        epochs=epochs,
        logger_kwargs=logger_kwargs,
        clip_ratio=float(clip_ratio),
        target_kl=float(target_kl))

# train with TRPO
if algorithm == 'trpo':
    delta = sys.argv[2]
    backtrack_coef = sys.argv[3]
    exp_name = 'll_trpo_seed' + str(seed) + '_epochs' + str(epochs)
    exp_name += '_delta' + delta + '_bc' + backtrack_coef
    logger_kwargs = dict(output_dir='data_spinning_up/' + exp_name + '/',
                         exp_name=exp_name)
    trpo(env_fn=env_fn,
         ac_kwargs=ac_kwargs,
         max_ep_len=1000,
         gamma=0.99,
         seed=seed,
         steps_per_epoch=steps_per_epoch,
         vf_lr=0.005,
         epochs=epochs,
         logger_kwargs=logger_kwargs,
         backtrack_coeff=float(backtrack_coef),
         delta=float(delta))
# output_dir = 'logging/awake/PPO/'
# logger_kwargs = dict(output_dir=output_dir, exp_name='transport_awake')
# agent = ppo(env_fn=env_fn, epochs=100, steps_per_epoch=5000, logger_kwargs=logger_kwargs, seed=123, save_freq=100)

# output_dir = 'logging/awake/SAC/'
# logger_kwargs = dict(output_dir=output_dir, exp_name='transport_awake')
# agent = sac(env_fn=env_fn, epochs=25, steps_per_epoch=1000, logger_kwargs=logger_kwargs, start_steps=1000)

output_dir = 'logging/awake/TRPO/'
logger_kwargs = dict(output_dir=output_dir, exp_name='transport_awake')
# agent = td3(env_fn=env_fn, epochs=25, steps_per_epoch=1000,
#             logger_kwargs=logger_kwargs, start_steps=500)

agent = trpo(env_fn=env_fn,
             epochs=50,
             steps_per_epoch=1000,
             logger_kwargs=logger_kwargs,
             delta=0.5)

plot_name = 'Stats'
name = plot_name
data = pd.read_csv(output_dir + '/progress.txt', sep="\t")

data.index = data['TotalEnvInteracts']
data_plot = data[['EpLen', 'MinEpRet', 'AverageEpRet']]
data_plot.plot(secondary_y=['MinEpRet', 'AverageEpRet'])

plt.title(name)
# plt.savefig(name + '.pdf')
plt.show()
Ejemplo n.º 6
0
from spinup import ppo,trpo
import tensorflow as tf
import gym
from mycart import MyCartPoleEnv

#env_fn = lambda : gym.make('LunarLander-v2')
env_fn = lambda : MyCartPoleEnv()

ac_kwargs = dict(hidden_sizes=[64,64], activation=tf.nn.relu)

logger_kwargs = dict(output_dir='./logsDDPG', exp_name='pend')

trpo(env_fn=env_fn, ac_kwargs=ac_kwargs, steps_per_epoch=5000, epochs=200, logger_kwargs=logger_kwargs)