Beispiel #1
0
def main():
    parser = Trainer.get_argument()
    parser = OnPolicyTrainer.get_argument(parser)
    parser = SAC.get_argument(parser)
    parser = PPO.get_argument(parser)
    parser = TD3.get_argument(parser)

    parser.add_argument('--SAC', action='store_true')
    parser.add_argument('--PPO', action='store_true')
    parser.add_argument('--TD3', action='store_true')
    parser.add_argument('--DEBUG', action='store_true')
    parser.add_argument('--env', type=int, default=0)

    parser.set_defaults(batch_size=32)  #100
    parser.set_defaults(n_warmup=10000)  #10000
    parser.set_defaults(max_steps=2e6)
    parser.set_defaults(gpu=0)
    parser.set_defaults(test_interval=200 * 100)
    parser.set_defaults(test_episodes=3)

    args = parser.parse_args()
    print(vars(args))
    run(parser)
from tf2rl.algos.ppo import PPO
from tf2rl.experiments.on_policy_trainer import OnPolicyTrainer
from tf2rl.envs.utils import is_discrete, get_act_dim
from arm_env import ArmEnvironment
import rospy
import numpy as np

normalise_obs = False
static_goal = True
testing = False
slow_step = True

rospy.init_node("RL_agent")
parser = OnPolicyTrainer.get_argument()
parser = PPO.get_argument(parser)

slow_suffix = "_slow" if(slow_step) else ""
#parser.set_defaults(max_steps=5)
if(static_goal and normalise_obs):
    parser.set_defaults(model_dir='model_PPO_static_normed'+slow_suffix)
    parser.set_defaults(logdir='results/PPO_static_normed'+slow_suffix)
elif(static_goal):
    parser.set_defaults(model_dir='model_PPO_static'+slow_suffix)
    parser.set_defaults(logdir='results/PPO_static'+slow_suffix)
elif(normalise_obs):
    parser.set_defaults(model_dir='model_PPO_normed'+slow_suffix)
    parser.set_defaults(logdir='results/PPO_normed'+slow_suffix)
else:
    parser.set_defaults(model_dir='model_PPO'+slow_suffix)
    parser.set_defaults(logdir='results/PPO'+slow_suffix)
parser.set_defaults(normalise_obs=normalise_obs)