Esempio n. 1
0
 def setUpClass(cls):
     super().setUpClass()
     cls.agent = SAC(state_shape=cls.continuous_env.observation_space.shape,
                     action_dim=cls.continuous_env.action_space.low.size,
                     batch_size=cls.batch_size,
                     auto_alpha=True,
                     gpu=-1)
Esempio n. 2
0
def main():
    """ Main function to be run. """
    parser = argparse.ArgumentParser(description='Test force control')
    parser.add_argument('poldir',
                        type=str,
                        default=None,
                        help='Policy directory')
    parser.add_argument('-c',
                        '--common-test',
                        action='store_true',
                        help='Use common test parameters ')
    parser.add_argument('-n', type=int, help='number of tests', default=1)
    parser.add_argument('--custom-path',
                        action='store_true',
                        help='Execute custom path for successful tests')
    parser.add_argument('--training',
                        action='store_true',
                        help='Execute actions within random std')

    args = parser.parse_args()

    rospy.init_node('ur3e_tf2rl', anonymous=True, log_level=rospy.ERROR)

    clear_gym_params('ur_gym')

    policy_dir = os.path.join(os.getcwd(), args.poldir)
    assert os.path.exists(policy_dir)

    env = load_env(policy_dir, args.common_test)

    actor_class = rospy.get_param("ur_gym/actor_class", "default")

    policy = SAC(
        state_shape=env.observation_space.shape,
        action_dim=env.action_space.high.size,
        max_action=env.action_space.high[0],
        actor_class=actor_class,
    )

    rospy.set_param('ur_gym/output_dir', policy_dir)

    checkpoint = tf.train.Checkpoint(policy=policy)
    _latest_path_ckpt = tf.train.latest_checkpoint(policy_dir)
    checkpoint.restore(_latest_path_ckpt).expect_partial()
    print("Restored {}".format(_latest_path_ckpt))

    test_policy(env, policy, args.n, args.custom_path, args.training)
parser.set_defaults(batch_size=100)
parser.set_defaults(n_warmup=0)
parser.set_defaults(max_steps=5e4)
parser.set_defaults(episode_max_steps=150)
parser.set_defaults(model_dir=args.model_path + args.model_name)

args = parser.parse_args()

env = gym.make(args.env_name)
test_env = gym.make(args.env_name)

policy_ = SAC(state_shape=env.observation_space.shape,
              action_dim=env.action_space.high.size,
              gpu=-1,
              memory_capacity=args.memory_capacity,
              max_action=env.action_space.high,
              batch_size=args.batch_size,
              n_warmup=args.n_warmup,
              alpha=args.alpha,
              auto_alpha=args.auto_alpha,
              lr=1e-5)

trainer = Trainer(policy_, env, args, test_env=test_env)

#trainer()

current_steps = 0
max_steps = 5
total_steps = 0
episode_max_steps = 10

while total_steps <= max_steps:
Esempio n. 4
0
import roboschool
import gym

from tf2rl.algos.sac import SAC
from tf2rl.experiments.trainer import Trainer


if __name__ == '__main__':
    parser = Trainer.get_argument()
    parser = SAC.get_argument(parser)
    parser.add_argument('--env-name', type=str, default="RoboschoolAnt-v1")
    parser.set_defaults(batch_size=100)
    parser.set_defaults(n_warmup=10000)
    args = parser.parse_args()

    env = gym.make(args.env_name)
    test_env = gym.make(args.env_name)
    policy = SAC(
        state_shape=env.observation_space.shape,
        action_dim=env.action_space.high.size,
        gpu=args.gpu,
        memory_capacity=args.memory_capacity,
        max_action=env.action_space.high[0],
        batch_size=args.batch_size,
        n_warmup=args.n_warmup)
    trainer = Trainer(policy, env, args, test_env=test_env)
    trainer()
Esempio n. 5
0
import roboschool, gym

from tf2rl.algos.sac import SAC
from tf2rl.trainer.trainer import Trainer

if __name__ == '__main__':
    parser = Trainer.get_argument()
    parser.add_argument('--env-name', type=str, default="RoboschoolAnt-v1")
    args = parser.parse_args()

    env = gym.make(args.env_name)
    test_env = gym.make(args.env_name)
    policy = SAC(state_dim=env.observation_space.high.size,
                 action_dim=env.action_space.high.size,
                 gpu=args.gpu,
                 batch_size=100)
    trainer = Trainer(policy, env, args, test_env=test_env)
    trainer()
Esempio n. 6
0
def run(parser):

    args = parser.parse_args()

    if args.gpu < 0:
        tf.config.experimental.set_visible_devices([], 'GPU')
    else:
        physical_devices = tf.config.list_physical_devices('GPU')
        tf.config.set_visible_devices(physical_devices[args.gpu], 'GPU')
        tf.config.experimental.set_virtual_device_configuration(
            physical_devices[args.gpu], [
                tf.config.experimental.VirtualDeviceConfiguration(
                    memory_limit=1024 * 3)
            ])

    if args.env == 200:
        envname = 'ScratchItchPR2X'
    elif args.env == 201:
        envname = 'DressingPR2X'
    elif args.env == 202:
        envname = 'BedBathingPR2X'

    logdir = f'MFBox_Assistive'
    if args.SAC:
        wandb.init(config=vars(args),
                   project="Assistive Gym",
                   name=f'SAC on {envname}')
    elif args.PPO:
        wandb.init(config=vars(args),
                   project="Assistive Gym",
                   name=f'PPO on {envname}')
    elif args.TD3:
        wandb.init(config=vars(args),
                   project="Assistive Gym",
                   name=f'TD3 on {envname}')
    elif args.DEBUG:
        logdir = f'DEBUG_Assistive'
        wandb.init(config=vars(args),
                   project="Assistive Gym",
                   name=f'DEBUG on {envname}')
    else:
        print('PLEASE INDICATE THE ALGORITHM !!')

    if not os.path.exists(logdir):
        os.makedirs(logdir)
    parser.set_defaults(logdir=logdir)
    args = parser.parse_args()

    env = gym.make(f'{envname}-v0')
    #test_env = Monitor(env,logdir,force=True)
    test_env = gym.make(f'{envname}-v0')

    if args.SAC:

        policy = SAC(state_shape=env.observation_space.shape,
                     action_dim=env.action_space.high.size,
                     gpu=args.gpu,
                     memory_capacity=args.memory_capacity,
                     max_action=env.action_space.high[0],
                     batch_size=args.batch_size,
                     n_warmup=args.n_warmup,
                     alpha=args.alpha,
                     auto_alpha=args.auto_alpha)
        trainer = Trainer(policy, env, args, test_env=test_env)

    elif args.PPO:
        policy = PPO(state_shape=env.observation_space.shape,
                     action_dim=get_act_dim(env.action_space),
                     is_discrete=is_discrete(env.action_space),
                     max_action=None if is_discrete(env.action_space) else
                     env.action_space.high[0],
                     batch_size=args.batch_size,
                     actor_units=(64, 64),
                     critic_units=(64, 64),
                     n_epoch=10,
                     lr_actor=3e-4,
                     lr_critic=3e-4,
                     hidden_activation_actor="tanh",
                     hidden_activation_critic="tanh",
                     discount=0.99,
                     lam=0.95,
                     entropy_coef=0.,
                     horizon=args.horizon,
                     normalize_adv=args.normalize_adv,
                     enable_gae=args.enable_gae,
                     gpu=args.gpu)
        trainer = OnPolicyTrainer(policy, env, args, test_env=test_env)

    elif args.TD3:
        policy = TD3(state_shape=env.observation_space.shape,
                     action_dim=env.action_space.high.size,
                     gpu=args.gpu,
                     memory_capacity=args.memory_capacity,
                     max_action=env.action_space.high[0],
                     batch_size=args.batch_size,
                     n_warmup=args.n_warmup)
        trainer = Trainer(policy, env, args, test_env=test_env)

    elif args.DEBUG:

        policy = SAC(state_shape=env.observation_space.shape,
                     action_dim=env.action_space.high.size,
                     gpu=args.gpu,
                     memory_capacity=args.memory_capacity,
                     max_action=env.action_space.high[0],
                     batch_size=args.batch_size,
                     n_warmup=100,
                     alpha=args.alpha,
                     auto_alpha=args.auto_alpha)
        parser.set_defaults(test_interval=200)
        args = parser.parse_args()

        trainer = Trainer(policy, env, args, test_env=None)

    trainer()
Esempio n. 7
0
    ros_param_path = load_ros_params(rospackage_name="ur_rl",
                    rel_path_from_package_to_file="config",
                    yaml_file_name=param_file)

    args.episode_max_steps = rospy.get_param("ur_gym/steps_per_episode", 200)

    env = load_environment(
            rospy.get_param('ur_gym/env_id'),
            max_episode_steps=args.episode_max_steps)
    actor_class = rospy.get_param("ur_gym/actor_class", "default")

    policy = SAC(
        state_shape=env.observation_space.shape,
        action_dim=env.action_space.high.size,
        max_action=env.action_space.high[0],
        actor_class=actor_class,
        batch_size=args.batch_size,
        n_warmup=args.n_warmup,
        auto_alpha=args.auto_alpha,
        lr=args.lr
        )
    trainer = Trainer(policy, env, args, test_env=None)
    outdir = trainer._output_dir
    rospy.set_param('ur_gym/output_dir', outdir)
    log_ros_params(outdir)
    copyfile(ros_param_path, outdir + "/ros_gym_env_params.yaml")
    trainer()

    print("duration", (timeit.default_timer() - start_time)/60.,"min")

# rosrun ur3e_rl tf2rl_sac.py --env-id=0