Exemple #1
0
def train(alg, task):
    if task == 'reach':
        env_fn = lambda: SawyerReachEnv(n_substeps=25, reward_type='dense')
    elif task == 'grasp':
        env_fn = lambda: SawyerGraspEnv(n_substeps=5, reward_type='dense')

    ac_kwargs = dict(hidden_sizes=[64, 64], activation=tf.nn.relu)
    save_path = os.path.join(SAVE_PATH, task, alg)
    if alg == 'ppo':
        # mpi_fork(2)

        logger_kwargs = dict(output_dir=save_path, exp_name=EXP_NAME)
        ppo(env_fn=env_fn,
            steps_per_epoch=4000,
            epochs=20000,
            logger_kwargs=logger_kwargs,
            max_ep_len=1000)

    elif alg == 'ddpg':

        logger_kwargs = dict(output_dir=SAVE_PATH + '/ddpg_suite',
                             exp_name=EXP_NAME)
        ddpg(env_fn=env_fn,
             steps_per_epoch=5000,
             batch_size=256,
             epochs=2000,
             logger_kwargs=logger_kwargs,
             max_ep_len=200)

    elif alg == 'trpo':

        logger_kwargs = dict(output_dir=SAVE_PATH + '/trpo_suite',
                             exp_name=EXP_NAME)
        trpo(env_fn=env_fn,
             ac_kwargs=ac_kwargs,
             steps_per_epoch=5000,
             epochs=2000,
             logger_kwargs=logger_kwargs,
             max_ep_len=200)

    elif alg == 'td3':

        logger_kwargs = dict(output_dir=save_path, exp_name=EXP_NAME)
        td3(env_fn=env_fn,
            start_steps=100000,
            steps_per_epoch=5000,
            epochs=2000,
            logger_kwargs=logger_kwargs,
            max_ep_len=1000)

    elif alg == 'sac':

        logger_kwargs = dict(output_dir=save_path, exp_name=EXP_NAME)
        sac(env_fn=env_fn,
            start_steps=100000,
            steps_per_epoch=5000,
            epochs=2000,
            logger_kwargs=logger_kwargs,
            max_ep_len=200)
Exemple #2
0
    def __call__(self, *args, **kwargs):

        ac_kwargs = dict(hidden_sizes=[400, 300, 200, 100],
                         activation=torch.nn.ReLU)

        logger_kwargs = dict(output_dir=self.outdir, exp_name=self.expt_name)

        ddpg(env_fn=self.env,
             ac_kwargs=ac_kwargs,
             steps_per_epoch=250,
             epochs=400,
             logger_kwargs=logger_kwargs)
Exemple #3
0
 def ddpg_with_actor_critic(bugged, **kwargs):
     actor_critic = bugged_mlp_actor_critic if bugged else mlp_actor_critic
     return ddpg(actor_critic=actor_critic,
                 ac_kwargs=dict(hidden_sizes=[args.h] * args.l),
                 start_steps=5000,
                 max_ep_len=150,
                 batch_size=64,
                 polyak=0.95,
                 **kwargs)
Exemple #4
0
 def ddpg_with_actor_critic(bugged, **kwargs):
     from spinup.exercises.pytorch.problem_set_2.exercise2_2 import BuggedMLPActorCritic
     actor_critic = BuggedMLPActorCritic if bugged else MLPActorCritic
     return ddpg(actor_critic=actor_critic, 
                 ac_kwargs=dict(hidden_sizes=[args.h]*args.l),
                 start_steps=5000,
                 max_ep_len=150,
                 batch_size=64,
                 polyak=0.95,
                 **kwargs)
# directory_naf = "logging/awake/NAF"
# if not os.path.exists(directory_naf):
#     os.makedirs(directory_naf)
# else:
#     for f in os.listdir(directory_naf):
#         print('Deleting: ', directory_naf + '/' + f)
#         os.remove(directory_naf + '/' + f)
#     time.sleep(3)
output_dir = 'logging/awake/NAF/'

logger_kwargs = dict(output_dir=output_dir, exp_name='transport_awake')

agent = ddpg(env_fn=env_fn,
             epochs=10,
             steps_per_epoch=100,
             ac_kwargs=ac_kwargs,
             logger_kwargs=logger_kwargs,
             start_steps=1e6,
             seed=random_seed)

plot_name = 'Stats'
name = plot_name
data = pd.read_csv(output_dir + '/progress.txt', sep="\t")

data.index = data['TotalEnvInteracts']
data_plot = data[['EpLen', 'MinEpRet', 'AverageEpRet']]
data_plot.plot(secondary_y=['MinEpRet', 'AverageEpRet'])

label = 'Classic DDPG on: ' + env.__name__
plt.title(label=label)
plt.ylim(-10, 0)
Exemple #6
0
from spinup import ddpg
import tensorflow as tf
import gym
import gym_foo

env_fn = lambda: gym.make('pro-v0')

logger_kwargs = dict(output_dir='./output', exp_name='ddpg')

ddpg(env_fn=env_fn,
     seed=0,
     steps_per_epoch=5000,
     epochs=15,
     replay_size=int(1e6),
     gamma=0.99,
     polyak=0.995,
     pi_lr=0.001,
     q_lr=0.001,
     batch_size=100,
     start_steps=10000,
     act_noise=0.1,
     max_ep_len=1000,
     logger_kwargs=logger_kwargs,
     save_freq=1)

# ppo(env_fn=env_fn, seed=0, steps_per_epoch=4000, epochs=50, gamma=0.99, clip_ratio=0.2, pi_lr=0.0003, vf_lr=0.001,
#  train_pi_iters=80, train_v_iters=80, lam=0.97, max_ep_len=1000, target_kl=0.01, logger_kwargs=logger_kwargs, save_freq=10)
# env = gym.make('foo-v0')
#ac_kwargs = dict(hidden_sizes=[40,40], activation=tf.nn.relu)
# ppo(env_fn=env_fn, ac_kwargs=ac_kwargs, steps_per_epoch=5000, epochs=250, logger_kwargs=logger_kwargs)
Exemple #7
0
.. _Pendulum environment:
  https://gym.openai.com/envs/Pendulum-v0/


"""

import gym
import argparse
from spinup import ddpg_pytorch as ddpg
# from spinup import ddpg_tf1 as ddpg_tf1
from spinup.utils.run_utils import setup_logger_kwargs


if __name__ == '__main__':
        parser = argparse.ArgumentParser()
        parser.add_argument('--env', type=str, default='Pendulum-v0')
        parser.add_argument('--hid', type=int, default=256)
        parser.add_argument('--l', type=int, default=2)
        parser.add_argument('--gamma', type=float, default=0.99)
        parser.add_argument('--seed', '-s', type=int, default=7)
        parser.add_argument('--epochs', type=int, default=50)
        parser.add_argument('--exp_name', type=str, default='ddpg')
        args = parser.parse_args()

        logger_kwargs = setup_logger_kwargs(args.exp_name, args.seed)

        ddpg(lambda : gym.make(args.env),
                ac_kwargs=dict(hidden_sizes=[args.hid]*args.l),
                gamma=args.gamma, seed=args.seed, epochs=args.epochs,
                logger_kwargs=logger_kwargs)
Exemple #8
0
# ddpg(env_fn=env_fn, ac_kwargs=ac_kwargs, steps_per_epoch=5000, epochs=200, logger_kwargs=logger_kwargs)

# logger_kwargs = dict(output_dir='baseline_data/HalfCheetah/sac', exp_name='HalfCheetah_sac')
# sac(env_fn=env_fn, ac_kwargs=ac_kwargs, steps_per_epoch=5000, epochs=200, logger_kwargs=logger_kwargs)

# env_fn = lambda : gym.make('Ant-v2')
# logger_kwargs = dict(output_dir='baseline_data/Ant/ppo', exp_name='Ant_ppo')
# ppo(env_fn=env_fn, ac_kwargs=ac_kwargs, steps_per_epoch=5000, epochs=200, logger_kwargs=logger_kwargs)

# # env_fn = lambda : gym.make('Walker2d-v2')
# # ac_kwargs = dict(hidden_sizes=[64,64], activation=tf.nn.relu)
# logger_kwargs = dict(output_dir='baseline_data/Ant/ddpg', exp_name='Ant_ddpg')
# ddpg(env_fn=env_fn, ac_kwargs=ac_kwargs, steps_per_epoch=5000, epochs=200, logger_kwargs=logger_kwargs)

# logger_kwargs = dict(output_dir='baseline_data/Ant/sac', exp_name='Ant_sac')
# sac(env_fn=env_fn, ac_kwargs=ac_kwargs, steps_per_epoch=5000, epochs=200, logger_kwargs=logger_kwargs)


env_fn = lambda : gym.make('Humanoid-v2')
# logger_kwargs = dict(output_dir='baseline_data/Humanoid/ppo', exp_name='Humanoid_ppo')
# ppo(env_fn=env_fn, ac_kwargs=ac_kwargs, steps_per_epoch=5000, epochs=200, logger_kwargs=logger_kwargs)

# env_fn = lambda : gym.make('Walker2d-v2')
# ac_kwargs = dict(hidden_sizes=[64,64], activation=tf.nn.relu)
logger_kwargs = dict(output_dir='baseline_data/Humanoid/ddpg', exp_name='Humanoid_ddpg')
ddpg(env_fn=env_fn, ac_kwargs=ac_kwargs, steps_per_epoch=5000, epochs=200, logger_kwargs=logger_kwargs)

logger_kwargs = dict(output_dir='baseline_data/Humanoid/sac', exp_name='Humanoid_sac')
sac(env_fn=env_fn, ac_kwargs=ac_kwargs, steps_per_epoch=5000, epochs=200, logger_kwargs=logger_kwargs)