def main(): parser = Trainer.get_argument() parser = OnPolicyTrainer.get_argument(parser) parser = SAC.get_argument(parser) parser = PPO.get_argument(parser) parser = TD3.get_argument(parser) parser.add_argument('--SAC', action='store_true') parser.add_argument('--PPO', action='store_true') parser.add_argument('--TD3', action='store_true') parser.add_argument('--DEBUG', action='store_true') parser.add_argument('--env', type=int, default=0) parser.set_defaults(batch_size=32) #100 parser.set_defaults(n_warmup=10000) #10000 parser.set_defaults(max_steps=2e6) parser.set_defaults(gpu=0) parser.set_defaults(test_interval=200 * 100) parser.set_defaults(test_episodes=3) args = parser.parse_args() print(vars(args)) run(parser)
from tf2rl.algos.ppo import PPO from tf2rl.experiments.on_policy_trainer import OnPolicyTrainer from tf2rl.envs.utils import is_discrete, get_act_dim from arm_env import ArmEnvironment import rospy import numpy as np normalise_obs = False static_goal = True testing = False slow_step = True rospy.init_node("RL_agent") parser = OnPolicyTrainer.get_argument() parser = PPO.get_argument(parser) slow_suffix = "_slow" if(slow_step) else "" #parser.set_defaults(max_steps=5) if(static_goal and normalise_obs): parser.set_defaults(model_dir='model_PPO_static_normed'+slow_suffix) parser.set_defaults(logdir='results/PPO_static_normed'+slow_suffix) elif(static_goal): parser.set_defaults(model_dir='model_PPO_static'+slow_suffix) parser.set_defaults(logdir='results/PPO_static'+slow_suffix) elif(normalise_obs): parser.set_defaults(model_dir='model_PPO_normed'+slow_suffix) parser.set_defaults(logdir='results/PPO_normed'+slow_suffix) else: parser.set_defaults(model_dir='model_PPO'+slow_suffix) parser.set_defaults(logdir='results/PPO'+slow_suffix) parser.set_defaults(normalise_obs=normalise_obs)