def train(alg, task): if task == 'reach': env_fn = lambda: SawyerReachEnv(n_substeps=25, reward_type='dense') elif task == 'grasp': env_fn = lambda: SawyerGraspEnv(n_substeps=5, reward_type='dense') ac_kwargs = dict(hidden_sizes=[64, 64], activation=tf.nn.relu) save_path = os.path.join(SAVE_PATH, task, alg) if alg == 'ppo': # mpi_fork(2) logger_kwargs = dict(output_dir=save_path, exp_name=EXP_NAME) ppo(env_fn=env_fn, steps_per_epoch=4000, epochs=20000, logger_kwargs=logger_kwargs, max_ep_len=1000) elif alg == 'ddpg': logger_kwargs = dict(output_dir=SAVE_PATH + '/ddpg_suite', exp_name=EXP_NAME) ddpg(env_fn=env_fn, steps_per_epoch=5000, batch_size=256, epochs=2000, logger_kwargs=logger_kwargs, max_ep_len=200) elif alg == 'trpo': logger_kwargs = dict(output_dir=SAVE_PATH + '/trpo_suite', exp_name=EXP_NAME) trpo(env_fn=env_fn, ac_kwargs=ac_kwargs, steps_per_epoch=5000, epochs=2000, logger_kwargs=logger_kwargs, max_ep_len=200) elif alg == 'td3': logger_kwargs = dict(output_dir=save_path, exp_name=EXP_NAME) td3(env_fn=env_fn, start_steps=100000, steps_per_epoch=5000, epochs=2000, logger_kwargs=logger_kwargs, max_ep_len=1000) elif alg == 'sac': logger_kwargs = dict(output_dir=save_path, exp_name=EXP_NAME) sac(env_fn=env_fn, start_steps=100000, steps_per_epoch=5000, epochs=2000, logger_kwargs=logger_kwargs, max_ep_len=200)
def __call__(self, *args, **kwargs): ac_kwargs = dict(hidden_sizes=[400, 300, 200, 100], activation=torch.nn.ReLU) logger_kwargs = dict(output_dir=self.outdir, exp_name=self.expt_name) ddpg(env_fn=self.env, ac_kwargs=ac_kwargs, steps_per_epoch=250, epochs=400, logger_kwargs=logger_kwargs)
def ddpg_with_actor_critic(bugged, **kwargs): actor_critic = bugged_mlp_actor_critic if bugged else mlp_actor_critic return ddpg(actor_critic=actor_critic, ac_kwargs=dict(hidden_sizes=[args.h] * args.l), start_steps=5000, max_ep_len=150, batch_size=64, polyak=0.95, **kwargs)
def ddpg_with_actor_critic(bugged, **kwargs): from spinup.exercises.pytorch.problem_set_2.exercise2_2 import BuggedMLPActorCritic actor_critic = BuggedMLPActorCritic if bugged else MLPActorCritic return ddpg(actor_critic=actor_critic, ac_kwargs=dict(hidden_sizes=[args.h]*args.l), start_steps=5000, max_ep_len=150, batch_size=64, polyak=0.95, **kwargs)
# directory_naf = "logging/awake/NAF" # if not os.path.exists(directory_naf): # os.makedirs(directory_naf) # else: # for f in os.listdir(directory_naf): # print('Deleting: ', directory_naf + '/' + f) # os.remove(directory_naf + '/' + f) # time.sleep(3) output_dir = 'logging/awake/NAF/' logger_kwargs = dict(output_dir=output_dir, exp_name='transport_awake') agent = ddpg(env_fn=env_fn, epochs=10, steps_per_epoch=100, ac_kwargs=ac_kwargs, logger_kwargs=logger_kwargs, start_steps=1e6, seed=random_seed) plot_name = 'Stats' name = plot_name data = pd.read_csv(output_dir + '/progress.txt', sep="\t") data.index = data['TotalEnvInteracts'] data_plot = data[['EpLen', 'MinEpRet', 'AverageEpRet']] data_plot.plot(secondary_y=['MinEpRet', 'AverageEpRet']) label = 'Classic DDPG on: ' + env.__name__ plt.title(label=label) plt.ylim(-10, 0)
from spinup import ddpg import tensorflow as tf import gym import gym_foo env_fn = lambda: gym.make('pro-v0') logger_kwargs = dict(output_dir='./output', exp_name='ddpg') ddpg(env_fn=env_fn, seed=0, steps_per_epoch=5000, epochs=15, replay_size=int(1e6), gamma=0.99, polyak=0.995, pi_lr=0.001, q_lr=0.001, batch_size=100, start_steps=10000, act_noise=0.1, max_ep_len=1000, logger_kwargs=logger_kwargs, save_freq=1) # ppo(env_fn=env_fn, seed=0, steps_per_epoch=4000, epochs=50, gamma=0.99, clip_ratio=0.2, pi_lr=0.0003, vf_lr=0.001, # train_pi_iters=80, train_v_iters=80, lam=0.97, max_ep_len=1000, target_kl=0.01, logger_kwargs=logger_kwargs, save_freq=10) # env = gym.make('foo-v0') #ac_kwargs = dict(hidden_sizes=[40,40], activation=tf.nn.relu) # ppo(env_fn=env_fn, ac_kwargs=ac_kwargs, steps_per_epoch=5000, epochs=250, logger_kwargs=logger_kwargs)
.. _Pendulum environment: https://gym.openai.com/envs/Pendulum-v0/ """ import gym import argparse from spinup import ddpg_pytorch as ddpg # from spinup import ddpg_tf1 as ddpg_tf1 from spinup.utils.run_utils import setup_logger_kwargs if __name__ == '__main__': parser = argparse.ArgumentParser() parser.add_argument('--env', type=str, default='Pendulum-v0') parser.add_argument('--hid', type=int, default=256) parser.add_argument('--l', type=int, default=2) parser.add_argument('--gamma', type=float, default=0.99) parser.add_argument('--seed', '-s', type=int, default=7) parser.add_argument('--epochs', type=int, default=50) parser.add_argument('--exp_name', type=str, default='ddpg') args = parser.parse_args() logger_kwargs = setup_logger_kwargs(args.exp_name, args.seed) ddpg(lambda : gym.make(args.env), ac_kwargs=dict(hidden_sizes=[args.hid]*args.l), gamma=args.gamma, seed=args.seed, epochs=args.epochs, logger_kwargs=logger_kwargs)
# ddpg(env_fn=env_fn, ac_kwargs=ac_kwargs, steps_per_epoch=5000, epochs=200, logger_kwargs=logger_kwargs) # logger_kwargs = dict(output_dir='baseline_data/HalfCheetah/sac', exp_name='HalfCheetah_sac') # sac(env_fn=env_fn, ac_kwargs=ac_kwargs, steps_per_epoch=5000, epochs=200, logger_kwargs=logger_kwargs) # env_fn = lambda : gym.make('Ant-v2') # logger_kwargs = dict(output_dir='baseline_data/Ant/ppo', exp_name='Ant_ppo') # ppo(env_fn=env_fn, ac_kwargs=ac_kwargs, steps_per_epoch=5000, epochs=200, logger_kwargs=logger_kwargs) # # env_fn = lambda : gym.make('Walker2d-v2') # # ac_kwargs = dict(hidden_sizes=[64,64], activation=tf.nn.relu) # logger_kwargs = dict(output_dir='baseline_data/Ant/ddpg', exp_name='Ant_ddpg') # ddpg(env_fn=env_fn, ac_kwargs=ac_kwargs, steps_per_epoch=5000, epochs=200, logger_kwargs=logger_kwargs) # logger_kwargs = dict(output_dir='baseline_data/Ant/sac', exp_name='Ant_sac') # sac(env_fn=env_fn, ac_kwargs=ac_kwargs, steps_per_epoch=5000, epochs=200, logger_kwargs=logger_kwargs) env_fn = lambda : gym.make('Humanoid-v2') # logger_kwargs = dict(output_dir='baseline_data/Humanoid/ppo', exp_name='Humanoid_ppo') # ppo(env_fn=env_fn, ac_kwargs=ac_kwargs, steps_per_epoch=5000, epochs=200, logger_kwargs=logger_kwargs) # env_fn = lambda : gym.make('Walker2d-v2') # ac_kwargs = dict(hidden_sizes=[64,64], activation=tf.nn.relu) logger_kwargs = dict(output_dir='baseline_data/Humanoid/ddpg', exp_name='Humanoid_ddpg') ddpg(env_fn=env_fn, ac_kwargs=ac_kwargs, steps_per_epoch=5000, epochs=200, logger_kwargs=logger_kwargs) logger_kwargs = dict(output_dir='baseline_data/Humanoid/sac', exp_name='Humanoid_sac') sac(env_fn=env_fn, ac_kwargs=ac_kwargs, steps_per_epoch=5000, epochs=200, logger_kwargs=logger_kwargs)