Ejemplo n.º 1
0
    def test_run_continuous(self):
        from tf2rl.algos.ddpg import DDPG
        parser = DDPG.get_argument(self.parser)
        parser.set_defaults(n_warmup=1)
        args, _ = parser.parse_known_args()

        def env_fn():
            return gym.make('Pendulum-v0')

        def policy_fn(env, name, memory_capacity=int(1e6), gpu=-1, *args, **kwargs):
            return DDPG(
                state_shape=env.observation_space.shape,
                action_dim=env.action_space.high.size,
                n_warmup=500,
                gpu=-1)

        def get_weights_fn(policy):
            return [policy.actor.weights,
                    policy.critic.weights,
                    policy.critic_target.weights]

        def set_weights_fn(policy, weights):
            actor_weights, critic_weights, critic_target_weights = weights
            update_target_variables(
                policy.actor.weights, actor_weights, tau=1.)
            update_target_variables(
                policy.critic.weights, critic_weights, tau=1.)
            update_target_variables(
                policy.critic_target.weights, critic_target_weights, tau=1.)

        run(args, env_fn, policy_fn, get_weights_fn, set_weights_fn)
Ejemplo n.º 2
0
def policy_fn_continuous(env, name, memory_capacity=int(1e6), gpu=-1, *args, **kwargs):
    from tf2rl.algos.ddpg import DDPG
    return DDPG(
        state_shape=env.observation_space.shape,
        action_dim=env.action_space.high.size,
        n_warmup=500,
        gpu=-1)
Ejemplo n.º 3
0
 def setUpClass(cls):
     super().setUpClass()
     cls.agent = DDPG(
         state_shape=cls.continuous_env.observation_space.shape,
         action_dim=cls.continuous_env.action_space.low.size,
         batch_size=cls.batch_size,
         gpu=-1)
Ejemplo n.º 4
0
def _test_run_continuous(parser):
    from tf2rl.algos.ddpg import DDPG
    parser = DDPG.get_argument(parser)
    args = parser.parse_args()

    def env_fn():
        return gym.make('Pendulum-v0')

    sample_env = env_fn()

    def policy_fn(env, name, memory_capacity=int(1e6), gpu=-1):
        return DDPG(state_shape=env.observation_space.shape,
                    action_dim=env.action_space.high.size,
                    gpu=-1)

    def get_weights_fn(policy):
        return [
            policy.actor.weights, policy.critic.weights,
            policy.critic_target.weights
        ]

    def set_weights_fn(policy, weights):
        actor_weights, critic_weights, critic_target_weights = weights
        update_target_variables(policy.actor.weights, actor_weights, tau=1.)
        update_target_variables(policy.critic.weights, critic_weights, tau=1.)
        update_target_variables(policy.critic_target.weights,
                                critic_target_weights,
                                tau=1.)

    run(args, env_fn, policy_fn, get_weights_fn, set_weights_fn)
Ejemplo n.º 5
0
    def test_run_continuous(self):
        from tf2rl.algos.ddpg import DDPG
        parser = DDPG.get_argument(self.parser)
        parser.set_defaults(n_warmup=1)
        args, _ = parser.parse_known_args()

        run(args, env_fn_continuous, policy_fn_continuous,
            get_weights_fn_continuous, set_weights_fn_continuous)
Ejemplo n.º 6
0
 def setUpClass(cls):
     super().setUpClass()
     cls.agent = DDPG(
         state_shape=cls.continuous_env.observation_space.shape,
         action_dim=cls.continuous_env.action_space.low.size,
         batch_size=cls.batch_size,
         sigma=0.5,  # Make noise bigger for easier to test
         gpu=-1)
Ejemplo n.º 7
0
def policy_fn(env, name, memory_capacity=int(1e6), gpu=-1, noise_level=0.3):
    return DDPG(state_shape=env.observation_space.shape,
                action_dim=env.action_space.high.size,
                max_action=env.action_space.high[0],
                gpu=gpu,
                name=name,
                sigma=noise_level,
                batch_size=100,
                lr_actor=0.001,
                lr_critic=0.001,
                actor_units=[400, 300],
                critic_units=[400, 300],
                memory_capacity=memory_capacity)
Ejemplo n.º 8
0
    def get_argument(parser=None):
        """
        Create or update argument parser for command line program

        Args:
            parser (argparse.ArgParser, optional): argument parser

        Returns:
            argparse.ArgParser: argument parser
        """
        parser = DDPG.get_argument(parser)
        parser.add_argument('--eta', type=float, default=0.05)
        return parser
Ejemplo n.º 9
0
 def test_empty_args(self):
     """
     Test empty args {}
     """
     env = gym.make("Pendulum-v0")
     test_env = gym.make("Pendulum-v0")
     policy = DDPG(state_shape=env.observation_space.shape,
                   action_dim=env.action_space.high.size,
                   gpu=-1,
                   memory_capacity=1000,
                   max_action=env.action_space.high[0],
                   batch_size=32,
                   n_warmup=10)
     Trainer(policy, env, {}, test_env=test_env)
Ejemplo n.º 10
0
 def test_invalid_args(self):
     """
     Test with invalid args
     """
     env = gym.make("Pendulum-v0")
     test_env = gym.make("Pendulum-v0")
     policy = DDPG(state_shape=env.observation_space.shape,
                   action_dim=env.action_space.high.size,
                   gpu=-1,
                   memory_capacity=1000,
                   max_action=env.action_space.high[0],
                   batch_size=32,
                   n_warmup=10)
     with self.assertRaises(ValueError):
         Trainer(policy, env, {"NOT_EXISTING_OPTIONS": 1}, test_env=test_env)
Ejemplo n.º 11
0
 def test_with_args(self):
     """
     Test with args
     """
     max_steps = 400
     env = gym.make("Pendulum-v0")
     test_env = gym.make("Pendulum-v0")
     policy = DDPG(state_shape=env.observation_space.shape,
                   action_dim=env.action_space.high.size,
                   gpu=-1,
                   memory_capacity=1000,
                   max_action=env.action_space.high[0],
                   batch_size=32,
                   n_warmup=10)
     trainer = Trainer(policy, env, {"max_steps": max_steps}, test_env=test_env)
     self.assertEqual(trainer._max_steps, max_steps)
Ejemplo n.º 12
0
    def _get_generator(self):
        """Returns instantiated policy -
    parameters from ./examples/example_params/gail_params.json
    """
        generator_params = self._params["ML"]["BehaviorGAILAgent"]["Generator"]

        policy = DDPG(state_shape=self._environment.observation_space.shape,
                      action_dim=self._environment.action_space.high.size,
                      max_action=self._environment.action_space.high,
                      lr_actor=generator_params["LearningRateActor", "",
                                                0.001],
                      lr_critic=generator_params["LearningRateCritic", "",
                                                 0.001],
                      actor_units=generator_params["ActorFcLayerParams", "",
                                                   [400, 300]],
                      critic_units=generator_params["CriticJointFcLayerParams",
                                                    "", [400, 300]],
                      sigma=generator_params["Sigma", "", 0.1],
                      tau=generator_params["Tau", "", 0.005],
                      n_warmup=generator_params["WarmUp", "", 1000],
                      batch_size=generator_params["BatchSize", "", 100],
                      gpu=self._params["ML"]["Settings"]["GPUUse", "", 0])
        return policy
Ejemplo n.º 13
0
 def policy_fn(env, name, memory_capacity=int(1e6), gpu=-1, *args, **kwargs):
     return DDPG(
         state_shape=env.observation_space.shape,
         action_dim=env.action_space.high.size,
         n_warmup=500,
         gpu=-1)
Ejemplo n.º 14
0
 def get_argument(parser=None):
     parser = DDPG.get_argument(parser)
     parser.add_argument('--eta', type=float, default=0.05)
     return parser
Ejemplo n.º 15
0
 def policy_fn(env, name, memory_capacity=int(1e6), gpu=-1):
     return DDPG(state_shape=env.observation_space.shape,
                 action_dim=env.action_space.high.size,
                 gpu=-1)
Ejemplo n.º 16
0
                critic_units=[400, 300],
                memory_capacity=memory_capacity)


def get_weights_fn(policy):
    # TODO: Check if following needed
    import tensorflow as tf
    with tf.device(policy.device):
        return [
            policy.actor.weights, policy.critic.weights,
            policy.critic_target.weights
        ]


def set_weights_fn(policy, weights):
    actor_weights, critic_weights, critic_target_weights = weights
    update_target_variables(policy.actor.weights, actor_weights, tau=1.)
    update_target_variables(policy.critic.weights, critic_weights, tau=1.)
    update_target_variables(policy.critic_target.weights,
                            critic_target_weights,
                            tau=1.)


if __name__ == '__main__':
    parser = apex_argument()
    parser.add_argument('--env-name', type=str, default="Pendulum-v0")
    parser = DDPG.get_argument(parser)
    args = parser.parse_args()

    run(args, env_fn(args.env_name), policy_fn, get_weights_fn, set_weights_fn)
Ejemplo n.º 17
0
import gym
from tf2rl.algos.ddpg import DDPG
from tf2rl.experiments.trainer import Trainer

parser = Trainer.get_argument()
parser = DDPG.get_argument(parser)
args = parser.parse_args()

env = gym.make("Pendulum-v0")
test_env = gym.make("Pendulum-v0")
policy = DDPG(
    state_shape=env.observation_space.shape,
    action_dim=env.action_space.high.size,
    gpu=-1,  # Run on CPU. If you want to run on GPU, specify GPU number
    memory_capacity=100,
    max_action=env.action_space.high[0],
    batch_size=32,
    n_warmup=500)
trainer = Trainer(policy, env, args, test_env=test_env)
trainer()
Ejemplo n.º 18
0
    args = parser.parse_args()

    if args.expert_path_dir is None:
        print("Plaese generate demonstrations first")
        print("python examples/run_sac.py --env-name=RoboschoolReacher-v1 --save-test-path --test-interval=50000")
        exit()

    units = [400, 300]

    env = gym.make(args.env_name)
    test_env = gym.make(args.env_name)
    policy = DDPG(
        state_shape=env.observation_space.shape,
        action_dim=env.action_space.high.size,
        max_action=env.action_space.high[0],
        gpu=args.gpu,
        actor_units=units,
        critic_units=units,
        n_warmup=10000,
        batch_size=100)
    irl = VAIL(
        state_shape=env.observation_space.shape,
        action_dim=env.action_space.high.size,
        units=units,
        enable_sn=args.enable_sn,
        batch_size=32,
        gpu=args.gpu)
    expert_trajs = restore_latest_n_traj(
        args.expert_path_dir, n_path=20, max_steps=1000)
    trainer = IRLTrainer(policy, env, args, irl, expert_trajs["obses"],
                         expert_trajs["next_obses"], expert_trajs["acts"], test_env)
Ejemplo n.º 19
0
import roboschool
import gym

from tf2rl.algos.ddpg import DDPG
from tf2rl.experiments.trainer import Trainer

if __name__ == '__main__':
    parser = Trainer.get_argument()
    parser = DDPG.get_argument(parser)
    parser.add_argument('--env-name', type=str, default="RoboschoolAnt-v1")
    parser.set_defaults(batch_size=100)
    parser.set_defaults(n_warmup=10000)
    args = parser.parse_args()

    env = gym.make(args.env_name)
    test_env = gym.make(args.env_name)
    policy = DDPG(state_shape=env.observation_space.shape,
                  action_dim=env.action_space.high.size,
                  gpu=args.gpu,
                  memory_capacity=args.memory_capacity,
                  max_action=env.action_space.high[0],
                  batch_size=args.batch_size,
                  n_warmup=args.n_warmup)
    trainer = Trainer(policy, env, args, test_env=test_env)
    trainer()