예제 #1
0
파일: vpg.py 프로젝트: ykawamura96/tf2rl
    def __init__(self,
                 state_shape,
                 action_dim,
                 is_discrete,
                 actor=None,
                 critic=None,
                 actor_critic=None,
                 max_action=1.,
                 actor_units=[256, 256],
                 critic_units=[256, 256],
                 lr_actor=1e-3,
                 lr_critic=3e-3,
                 fix_std=False,
                 const_std=0.3,
                 hidden_activation_actor="relu",
                 hidden_activation_critic="relu",
                 name="VPG",
                 **kwargs):
        super().__init__(name=name, **kwargs)
        self._is_discrete = is_discrete

        # TODO: clean codes
        if actor_critic is not None:
            self.actor_critic = actor_critic
            self.actor_critic_optimizer = tf.keras.optimizers.Adam(
                learning_rate=lr_actor)
            self.actor = None
            self.critic = None
        else:
            self.actor_critic = None
            if actor is None:
                if is_discrete:
                    self.actor = CategoricalActor(state_shape, action_dim,
                                                  actor_units)
                else:
                    self.actor = GaussianActor(
                        state_shape,
                        action_dim,
                        max_action,
                        actor_units,
                        hidden_activation=hidden_activation_actor,
                        fix_std=fix_std,
                        const_std=const_std,
                        state_independent_std=True)
            else:
                self.actor = actor
            if critic is None:
                self.critic = CriticV(
                    state_shape,
                    critic_units,
                    hidden_activation=hidden_activation_critic)
            else:
                self.critic = critic
            self.actor_optimizer = tf.keras.optimizers.Adam(
                learning_rate=lr_actor)
            self.critic_optimizer = tf.keras.optimizers.Adam(
                learning_rate=lr_critic)

        # This is used to check if input state to `get_action` is multiple (batch) or single
        self._state_ndim = np.array(state_shape).shape[0]
예제 #2
0
파일: vpg.py 프로젝트: xunyiljg/tf2rl
 def __init__(self,
              state_shape,
              action_dim,
              is_discrete,
              max_action=1.,
              actor_units=[256, 256],
              critic_units=[256, 256],
              lr_actor=1e-3,
              lr_critic=3e-3,
              fix_std=False,
              tanh_std=False,
              const_std=0.3,
              name="VPG",
              **kwargs):
     super().__init__(name=name, **kwargs)
     self._is_discrete = is_discrete
     if is_discrete:
         self.actor = CategoricalActor(state_shape, action_dim, actor_units)
     else:
         self.actor = GaussianActor(state_shape,
                                    action_dim,
                                    max_action,
                                    actor_units,
                                    fix_std=fix_std,
                                    tanh_std=tanh_std,
                                    const_std=const_std)
     self.critic = CriticV(state_shape, critic_units)
     self._action_dim = action_dim
     self.actor_optimizer = tf.keras.optimizers.Adam(learning_rate=lr_actor)
     self.critic_optimizer = tf.keras.optimizers.Adam(
         learning_rate=lr_critic)
예제 #3
0
 def setUpClass(cls):
     super().setUpClass()
     cls.policy = CategoricalActor(
         state_shape=cls.discrete_env.observation_space.shape,
         action_dim=cls.discrete_env.action_space.n,
         units=[4, 4])