Ejemplo n.º 1
0
 def setUpClass(cls):
     super().setUpClass()
     cls.policy = GaussianActor(
         state_shape=cls.continuous_env.observation_space.shape,
         action_dim=cls.continuous_env.action_space.low.size,
         max_action=1.,
         units=[4, 4])
     cls.const_std = 0.1
     cls.policy_fixed_sigma = GaussianActor(
         state_shape=cls.continuous_env.observation_space.shape,
         action_dim=cls.continuous_env.action_space.low.size,
         max_action=1.,
         units=[4, 4],
         fix_std=True,
         const_std=cls.const_std)
Ejemplo n.º 2
0
    def __init__(self,
                 state_shape,
                 action_dim,
                 is_discrete,
                 actor=None,
                 critic=None,
                 actor_critic=None,
                 max_action=1.,
                 actor_units=[256, 256],
                 critic_units=[256, 256],
                 lr_actor=1e-3,
                 lr_critic=3e-3,
                 fix_std=False,
                 const_std=0.3,
                 hidden_activation_actor="relu",
                 hidden_activation_critic="relu",
                 name="VPG",
                 **kwargs):
        super().__init__(name=name, **kwargs)
        self._is_discrete = is_discrete

        # TODO: clean codes
        if actor_critic is not None:
            self.actor_critic = actor_critic
            self.actor_critic_optimizer = tf.keras.optimizers.Adam(
                learning_rate=lr_actor)
            self.actor = None
            self.critic = None
        else:
            self.actor_critic = None
            if actor is None:
                if is_discrete:
                    self.actor = CategoricalActor(state_shape, action_dim,
                                                  actor_units)
                else:
                    self.actor = GaussianActor(
                        state_shape,
                        action_dim,
                        max_action,
                        actor_units,
                        hidden_activation=hidden_activation_actor,
                        fix_std=fix_std,
                        const_std=const_std,
                        state_independent_std=True)
            else:
                self.actor = actor
            if critic is None:
                self.critic = CriticV(
                    state_shape,
                    critic_units,
                    hidden_activation=hidden_activation_critic)
            else:
                self.critic = critic
            self.actor_optimizer = tf.keras.optimizers.Adam(
                learning_rate=lr_actor)
            self.critic_optimizer = tf.keras.optimizers.Adam(
                learning_rate=lr_critic)

        # This is used to check if input state to `get_action` is multiple (batch) or single
        self._state_ndim = np.array(state_shape).shape[0]
Ejemplo n.º 3
0
 def setUpClass(cls):
     super().setUpClass()
     cls.policy = GaussianActor(
         state_shape=cls.continuous_env.observation_space.shape,
         action_dim=cls.continuous_env.action_space.low.size,
         max_action=1.,
         units=[4, 4])
Ejemplo n.º 4
0
 def __init__(self,
              state_shape,
              action_dim,
              is_discrete,
              max_action=1.,
              actor_units=[256, 256],
              critic_units=[256, 256],
              lr_actor=1e-3,
              lr_critic=3e-3,
              fix_std=False,
              tanh_std=False,
              const_std=0.3,
              name="VPG",
              **kwargs):
     super().__init__(name=name, **kwargs)
     self._is_discrete = is_discrete
     if is_discrete:
         self.actor = CategoricalActor(state_shape, action_dim, actor_units)
     else:
         self.actor = GaussianActor(state_shape,
                                    action_dim,
                                    max_action,
                                    actor_units,
                                    fix_std=fix_std,
                                    tanh_std=tanh_std,
                                    const_std=const_std)
     self.critic = CriticV(state_shape, critic_units)
     self._action_dim = action_dim
     self.actor_optimizer = tf.keras.optimizers.Adam(learning_rate=lr_actor)
     self.critic_optimizer = tf.keras.optimizers.Adam(
         learning_rate=lr_critic)
Ejemplo n.º 5
0
 def _setup_actor(self,
                  state_shape,
                  action_dim,
                  actor_units,
                  lr,
                  max_action=1.):
     self.actor = GaussianActor(state_shape,
                                action_dim,
                                max_action,
                                squash=True,
                                units=actor_units)
     self.actor_optimizer = tf.keras.optimizers.Adam(learning_rate=lr)
Ejemplo n.º 6
0
    def __init__(self,
                 state_shape,
                 action_dim,
                 name="SAC",
                 max_action=1.,
                 lr=3e-4,
                 actor_units=[256, 256],
                 tau=0.005,
                 scale_reward=5.,
                 n_warmup=int(1e4),
                 memory_capacity=int(1e6),
                 **kwargs):
        super().__init__(name=name,
                         memory_capacity=memory_capacity,
                         n_warmup=n_warmup,
                         **kwargs)

        self.actor = GaussianActor(state_shape,
                                   action_dim,
                                   max_action,
                                   squash=True,
                                   tanh_mean=False,
                                   tanh_std=False)
        self.actor_optimizer = tf.keras.optimizers.Adam(learning_rate=lr)

        self.vf = CriticV(state_shape)
        self.vf_target = CriticV(state_shape)
        update_target_variables(self.vf_target.weights,
                                self.vf.weights,
                                tau=1.)
        self.vf_optimizer = tf.keras.optimizers.Adam(learning_rate=lr)

        self.qf1 = CriticQ(state_shape, action_dim, name="qf1")
        self.qf2 = CriticQ(state_shape, action_dim, name="qf2")
        self.qf1_optimizer = tf.keras.optimizers.Adam(learning_rate=lr)
        self.qf2_optimizer = tf.keras.optimizers.Adam(learning_rate=lr)

        # Set hyper-parameters
        self.tau = tau
        self.scale_reward = scale_reward