def setUpClass(cls): super().setUpClass() cls.agent = SAC(state_shape=cls.continuous_env.observation_space.shape, action_dim=cls.continuous_env.action_space.low.size, batch_size=cls.batch_size, auto_alpha=True, gpu=-1)
def main(): """ Main function to be run. """ parser = argparse.ArgumentParser(description='Test force control') parser.add_argument('poldir', type=str, default=None, help='Policy directory') parser.add_argument('-c', '--common-test', action='store_true', help='Use common test parameters ') parser.add_argument('-n', type=int, help='number of tests', default=1) parser.add_argument('--custom-path', action='store_true', help='Execute custom path for successful tests') parser.add_argument('--training', action='store_true', help='Execute actions within random std') args = parser.parse_args() rospy.init_node('ur3e_tf2rl', anonymous=True, log_level=rospy.ERROR) clear_gym_params('ur_gym') policy_dir = os.path.join(os.getcwd(), args.poldir) assert os.path.exists(policy_dir) env = load_env(policy_dir, args.common_test) actor_class = rospy.get_param("ur_gym/actor_class", "default") policy = SAC( state_shape=env.observation_space.shape, action_dim=env.action_space.high.size, max_action=env.action_space.high[0], actor_class=actor_class, ) rospy.set_param('ur_gym/output_dir', policy_dir) checkpoint = tf.train.Checkpoint(policy=policy) _latest_path_ckpt = tf.train.latest_checkpoint(policy_dir) checkpoint.restore(_latest_path_ckpt).expect_partial() print("Restored {}".format(_latest_path_ckpt)) test_policy(env, policy, args.n, args.custom_path, args.training)
parser.set_defaults(batch_size=100) parser.set_defaults(n_warmup=0) parser.set_defaults(max_steps=5e4) parser.set_defaults(episode_max_steps=150) parser.set_defaults(model_dir=args.model_path + args.model_name) args = parser.parse_args() env = gym.make(args.env_name) test_env = gym.make(args.env_name) policy_ = SAC(state_shape=env.observation_space.shape, action_dim=env.action_space.high.size, gpu=-1, memory_capacity=args.memory_capacity, max_action=env.action_space.high, batch_size=args.batch_size, n_warmup=args.n_warmup, alpha=args.alpha, auto_alpha=args.auto_alpha, lr=1e-5) trainer = Trainer(policy_, env, args, test_env=test_env) #trainer() current_steps = 0 max_steps = 5 total_steps = 0 episode_max_steps = 10 while total_steps <= max_steps:
import roboschool import gym from tf2rl.algos.sac import SAC from tf2rl.experiments.trainer import Trainer if __name__ == '__main__': parser = Trainer.get_argument() parser = SAC.get_argument(parser) parser.add_argument('--env-name', type=str, default="RoboschoolAnt-v1") parser.set_defaults(batch_size=100) parser.set_defaults(n_warmup=10000) args = parser.parse_args() env = gym.make(args.env_name) test_env = gym.make(args.env_name) policy = SAC( state_shape=env.observation_space.shape, action_dim=env.action_space.high.size, gpu=args.gpu, memory_capacity=args.memory_capacity, max_action=env.action_space.high[0], batch_size=args.batch_size, n_warmup=args.n_warmup) trainer = Trainer(policy, env, args, test_env=test_env) trainer()
import roboschool, gym from tf2rl.algos.sac import SAC from tf2rl.trainer.trainer import Trainer if __name__ == '__main__': parser = Trainer.get_argument() parser.add_argument('--env-name', type=str, default="RoboschoolAnt-v1") args = parser.parse_args() env = gym.make(args.env_name) test_env = gym.make(args.env_name) policy = SAC(state_dim=env.observation_space.high.size, action_dim=env.action_space.high.size, gpu=args.gpu, batch_size=100) trainer = Trainer(policy, env, args, test_env=test_env) trainer()
def run(parser): args = parser.parse_args() if args.gpu < 0: tf.config.experimental.set_visible_devices([], 'GPU') else: physical_devices = tf.config.list_physical_devices('GPU') tf.config.set_visible_devices(physical_devices[args.gpu], 'GPU') tf.config.experimental.set_virtual_device_configuration( physical_devices[args.gpu], [ tf.config.experimental.VirtualDeviceConfiguration( memory_limit=1024 * 3) ]) if args.env == 200: envname = 'ScratchItchPR2X' elif args.env == 201: envname = 'DressingPR2X' elif args.env == 202: envname = 'BedBathingPR2X' logdir = f'MFBox_Assistive' if args.SAC: wandb.init(config=vars(args), project="Assistive Gym", name=f'SAC on {envname}') elif args.PPO: wandb.init(config=vars(args), project="Assistive Gym", name=f'PPO on {envname}') elif args.TD3: wandb.init(config=vars(args), project="Assistive Gym", name=f'TD3 on {envname}') elif args.DEBUG: logdir = f'DEBUG_Assistive' wandb.init(config=vars(args), project="Assistive Gym", name=f'DEBUG on {envname}') else: print('PLEASE INDICATE THE ALGORITHM !!') if not os.path.exists(logdir): os.makedirs(logdir) parser.set_defaults(logdir=logdir) args = parser.parse_args() env = gym.make(f'{envname}-v0') #test_env = Monitor(env,logdir,force=True) test_env = gym.make(f'{envname}-v0') if args.SAC: policy = SAC(state_shape=env.observation_space.shape, action_dim=env.action_space.high.size, gpu=args.gpu, memory_capacity=args.memory_capacity, max_action=env.action_space.high[0], batch_size=args.batch_size, n_warmup=args.n_warmup, alpha=args.alpha, auto_alpha=args.auto_alpha) trainer = Trainer(policy, env, args, test_env=test_env) elif args.PPO: policy = PPO(state_shape=env.observation_space.shape, action_dim=get_act_dim(env.action_space), is_discrete=is_discrete(env.action_space), max_action=None if is_discrete(env.action_space) else env.action_space.high[0], batch_size=args.batch_size, actor_units=(64, 64), critic_units=(64, 64), n_epoch=10, lr_actor=3e-4, lr_critic=3e-4, hidden_activation_actor="tanh", hidden_activation_critic="tanh", discount=0.99, lam=0.95, entropy_coef=0., horizon=args.horizon, normalize_adv=args.normalize_adv, enable_gae=args.enable_gae, gpu=args.gpu) trainer = OnPolicyTrainer(policy, env, args, test_env=test_env) elif args.TD3: policy = TD3(state_shape=env.observation_space.shape, action_dim=env.action_space.high.size, gpu=args.gpu, memory_capacity=args.memory_capacity, max_action=env.action_space.high[0], batch_size=args.batch_size, n_warmup=args.n_warmup) trainer = Trainer(policy, env, args, test_env=test_env) elif args.DEBUG: policy = SAC(state_shape=env.observation_space.shape, action_dim=env.action_space.high.size, gpu=args.gpu, memory_capacity=args.memory_capacity, max_action=env.action_space.high[0], batch_size=args.batch_size, n_warmup=100, alpha=args.alpha, auto_alpha=args.auto_alpha) parser.set_defaults(test_interval=200) args = parser.parse_args() trainer = Trainer(policy, env, args, test_env=None) trainer()
ros_param_path = load_ros_params(rospackage_name="ur_rl", rel_path_from_package_to_file="config", yaml_file_name=param_file) args.episode_max_steps = rospy.get_param("ur_gym/steps_per_episode", 200) env = load_environment( rospy.get_param('ur_gym/env_id'), max_episode_steps=args.episode_max_steps) actor_class = rospy.get_param("ur_gym/actor_class", "default") policy = SAC( state_shape=env.observation_space.shape, action_dim=env.action_space.high.size, max_action=env.action_space.high[0], actor_class=actor_class, batch_size=args.batch_size, n_warmup=args.n_warmup, auto_alpha=args.auto_alpha, lr=args.lr ) trainer = Trainer(policy, env, args, test_env=None) outdir = trainer._output_dir rospy.set_param('ur_gym/output_dir', outdir) log_ros_params(outdir) copyfile(ros_param_path, outdir + "/ros_gym_env_params.yaml") trainer() print("duration", (timeit.default_timer() - start_time)/60.,"min") # rosrun ur3e_rl tf2rl_sac.py --env-id=0