def test_run_continuous(self): from tf2rl.algos.ddpg import DDPG parser = DDPG.get_argument(self.parser) parser.set_defaults(n_warmup=1) args, _ = parser.parse_known_args() def env_fn(): return gym.make('Pendulum-v0') def policy_fn(env, name, memory_capacity=int(1e6), gpu=-1, *args, **kwargs): return DDPG( state_shape=env.observation_space.shape, action_dim=env.action_space.high.size, n_warmup=500, gpu=-1) def get_weights_fn(policy): return [policy.actor.weights, policy.critic.weights, policy.critic_target.weights] def set_weights_fn(policy, weights): actor_weights, critic_weights, critic_target_weights = weights update_target_variables( policy.actor.weights, actor_weights, tau=1.) update_target_variables( policy.critic.weights, critic_weights, tau=1.) update_target_variables( policy.critic_target.weights, critic_target_weights, tau=1.) run(args, env_fn, policy_fn, get_weights_fn, set_weights_fn)
def policy_fn_continuous(env, name, memory_capacity=int(1e6), gpu=-1, *args, **kwargs): from tf2rl.algos.ddpg import DDPG return DDPG( state_shape=env.observation_space.shape, action_dim=env.action_space.high.size, n_warmup=500, gpu=-1)
def setUpClass(cls): super().setUpClass() cls.agent = DDPG( state_shape=cls.continuous_env.observation_space.shape, action_dim=cls.continuous_env.action_space.low.size, batch_size=cls.batch_size, gpu=-1)
def _test_run_continuous(parser): from tf2rl.algos.ddpg import DDPG parser = DDPG.get_argument(parser) args = parser.parse_args() def env_fn(): return gym.make('Pendulum-v0') sample_env = env_fn() def policy_fn(env, name, memory_capacity=int(1e6), gpu=-1): return DDPG(state_shape=env.observation_space.shape, action_dim=env.action_space.high.size, gpu=-1) def get_weights_fn(policy): return [ policy.actor.weights, policy.critic.weights, policy.critic_target.weights ] def set_weights_fn(policy, weights): actor_weights, critic_weights, critic_target_weights = weights update_target_variables(policy.actor.weights, actor_weights, tau=1.) update_target_variables(policy.critic.weights, critic_weights, tau=1.) update_target_variables(policy.critic_target.weights, critic_target_weights, tau=1.) run(args, env_fn, policy_fn, get_weights_fn, set_weights_fn)
def test_run_continuous(self): from tf2rl.algos.ddpg import DDPG parser = DDPG.get_argument(self.parser) parser.set_defaults(n_warmup=1) args, _ = parser.parse_known_args() run(args, env_fn_continuous, policy_fn_continuous, get_weights_fn_continuous, set_weights_fn_continuous)
def setUpClass(cls): super().setUpClass() cls.agent = DDPG( state_shape=cls.continuous_env.observation_space.shape, action_dim=cls.continuous_env.action_space.low.size, batch_size=cls.batch_size, sigma=0.5, # Make noise bigger for easier to test gpu=-1)
def policy_fn(env, name, memory_capacity=int(1e6), gpu=-1, noise_level=0.3): return DDPG(state_shape=env.observation_space.shape, action_dim=env.action_space.high.size, max_action=env.action_space.high[0], gpu=gpu, name=name, sigma=noise_level, batch_size=100, lr_actor=0.001, lr_critic=0.001, actor_units=[400, 300], critic_units=[400, 300], memory_capacity=memory_capacity)
def get_argument(parser=None): """ Create or update argument parser for command line program Args: parser (argparse.ArgParser, optional): argument parser Returns: argparse.ArgParser: argument parser """ parser = DDPG.get_argument(parser) parser.add_argument('--eta', type=float, default=0.05) return parser
def test_empty_args(self): """ Test empty args {} """ env = gym.make("Pendulum-v0") test_env = gym.make("Pendulum-v0") policy = DDPG(state_shape=env.observation_space.shape, action_dim=env.action_space.high.size, gpu=-1, memory_capacity=1000, max_action=env.action_space.high[0], batch_size=32, n_warmup=10) Trainer(policy, env, {}, test_env=test_env)
def test_invalid_args(self): """ Test with invalid args """ env = gym.make("Pendulum-v0") test_env = gym.make("Pendulum-v0") policy = DDPG(state_shape=env.observation_space.shape, action_dim=env.action_space.high.size, gpu=-1, memory_capacity=1000, max_action=env.action_space.high[0], batch_size=32, n_warmup=10) with self.assertRaises(ValueError): Trainer(policy, env, {"NOT_EXISTING_OPTIONS": 1}, test_env=test_env)
def test_with_args(self): """ Test with args """ max_steps = 400 env = gym.make("Pendulum-v0") test_env = gym.make("Pendulum-v0") policy = DDPG(state_shape=env.observation_space.shape, action_dim=env.action_space.high.size, gpu=-1, memory_capacity=1000, max_action=env.action_space.high[0], batch_size=32, n_warmup=10) trainer = Trainer(policy, env, {"max_steps": max_steps}, test_env=test_env) self.assertEqual(trainer._max_steps, max_steps)
def _get_generator(self): """Returns instantiated policy - parameters from ./examples/example_params/gail_params.json """ generator_params = self._params["ML"]["BehaviorGAILAgent"]["Generator"] policy = DDPG(state_shape=self._environment.observation_space.shape, action_dim=self._environment.action_space.high.size, max_action=self._environment.action_space.high, lr_actor=generator_params["LearningRateActor", "", 0.001], lr_critic=generator_params["LearningRateCritic", "", 0.001], actor_units=generator_params["ActorFcLayerParams", "", [400, 300]], critic_units=generator_params["CriticJointFcLayerParams", "", [400, 300]], sigma=generator_params["Sigma", "", 0.1], tau=generator_params["Tau", "", 0.005], n_warmup=generator_params["WarmUp", "", 1000], batch_size=generator_params["BatchSize", "", 100], gpu=self._params["ML"]["Settings"]["GPUUse", "", 0]) return policy
def policy_fn(env, name, memory_capacity=int(1e6), gpu=-1, *args, **kwargs): return DDPG( state_shape=env.observation_space.shape, action_dim=env.action_space.high.size, n_warmup=500, gpu=-1)
def get_argument(parser=None): parser = DDPG.get_argument(parser) parser.add_argument('--eta', type=float, default=0.05) return parser
def policy_fn(env, name, memory_capacity=int(1e6), gpu=-1): return DDPG(state_shape=env.observation_space.shape, action_dim=env.action_space.high.size, gpu=-1)
critic_units=[400, 300], memory_capacity=memory_capacity) def get_weights_fn(policy): # TODO: Check if following needed import tensorflow as tf with tf.device(policy.device): return [ policy.actor.weights, policy.critic.weights, policy.critic_target.weights ] def set_weights_fn(policy, weights): actor_weights, critic_weights, critic_target_weights = weights update_target_variables(policy.actor.weights, actor_weights, tau=1.) update_target_variables(policy.critic.weights, critic_weights, tau=1.) update_target_variables(policy.critic_target.weights, critic_target_weights, tau=1.) if __name__ == '__main__': parser = apex_argument() parser.add_argument('--env-name', type=str, default="Pendulum-v0") parser = DDPG.get_argument(parser) args = parser.parse_args() run(args, env_fn(args.env_name), policy_fn, get_weights_fn, set_weights_fn)
import gym from tf2rl.algos.ddpg import DDPG from tf2rl.experiments.trainer import Trainer parser = Trainer.get_argument() parser = DDPG.get_argument(parser) args = parser.parse_args() env = gym.make("Pendulum-v0") test_env = gym.make("Pendulum-v0") policy = DDPG( state_shape=env.observation_space.shape, action_dim=env.action_space.high.size, gpu=-1, # Run on CPU. If you want to run on GPU, specify GPU number memory_capacity=100, max_action=env.action_space.high[0], batch_size=32, n_warmup=500) trainer = Trainer(policy, env, args, test_env=test_env) trainer()
args = parser.parse_args() if args.expert_path_dir is None: print("Plaese generate demonstrations first") print("python examples/run_sac.py --env-name=RoboschoolReacher-v1 --save-test-path --test-interval=50000") exit() units = [400, 300] env = gym.make(args.env_name) test_env = gym.make(args.env_name) policy = DDPG( state_shape=env.observation_space.shape, action_dim=env.action_space.high.size, max_action=env.action_space.high[0], gpu=args.gpu, actor_units=units, critic_units=units, n_warmup=10000, batch_size=100) irl = VAIL( state_shape=env.observation_space.shape, action_dim=env.action_space.high.size, units=units, enable_sn=args.enable_sn, batch_size=32, gpu=args.gpu) expert_trajs = restore_latest_n_traj( args.expert_path_dir, n_path=20, max_steps=1000) trainer = IRLTrainer(policy, env, args, irl, expert_trajs["obses"], expert_trajs["next_obses"], expert_trajs["acts"], test_env)
import roboschool import gym from tf2rl.algos.ddpg import DDPG from tf2rl.experiments.trainer import Trainer if __name__ == '__main__': parser = Trainer.get_argument() parser = DDPG.get_argument(parser) parser.add_argument('--env-name', type=str, default="RoboschoolAnt-v1") parser.set_defaults(batch_size=100) parser.set_defaults(n_warmup=10000) args = parser.parse_args() env = gym.make(args.env_name) test_env = gym.make(args.env_name) policy = DDPG(state_shape=env.observation_space.shape, action_dim=env.action_space.high.size, gpu=args.gpu, memory_capacity=args.memory_capacity, max_action=env.action_space.high[0], batch_size=args.batch_size, n_warmup=args.n_warmup) trainer = Trainer(policy, env, args, test_env=test_env) trainer()