def __init__(self, state_shape, action_dim, is_discrete, actor=None, critic=None, actor_critic=None, max_action=1., actor_units=[256, 256], critic_units=[256, 256], lr_actor=1e-3, lr_critic=3e-3, fix_std=False, const_std=0.3, hidden_activation_actor="relu", hidden_activation_critic="relu", name="VPG", **kwargs): super().__init__(name=name, **kwargs) self._is_discrete = is_discrete # TODO: clean codes if actor_critic is not None: self.actor_critic = actor_critic self.actor_critic_optimizer = tf.keras.optimizers.Adam( learning_rate=lr_actor) self.actor = None self.critic = None else: self.actor_critic = None if actor is None: if is_discrete: self.actor = CategoricalActor(state_shape, action_dim, actor_units) else: self.actor = GaussianActor( state_shape, action_dim, max_action, actor_units, hidden_activation=hidden_activation_actor, fix_std=fix_std, const_std=const_std, state_independent_std=True) else: self.actor = actor if critic is None: self.critic = CriticV( state_shape, critic_units, hidden_activation=hidden_activation_critic) else: self.critic = critic self.actor_optimizer = tf.keras.optimizers.Adam( learning_rate=lr_actor) self.critic_optimizer = tf.keras.optimizers.Adam( learning_rate=lr_critic) # This is used to check if input state to `get_action` is multiple (batch) or single self._state_ndim = np.array(state_shape).shape[0]
def __init__(self, state_shape, action_dim, is_discrete, max_action=1., actor_units=[256, 256], critic_units=[256, 256], lr_actor=1e-3, lr_critic=3e-3, fix_std=False, tanh_std=False, const_std=0.3, name="VPG", **kwargs): super().__init__(name=name, **kwargs) self._is_discrete = is_discrete if is_discrete: self.actor = CategoricalActor(state_shape, action_dim, actor_units) else: self.actor = GaussianActor(state_shape, action_dim, max_action, actor_units, fix_std=fix_std, tanh_std=tanh_std, const_std=const_std) self.critic = CriticV(state_shape, critic_units) self._action_dim = action_dim self.actor_optimizer = tf.keras.optimizers.Adam(learning_rate=lr_actor) self.critic_optimizer = tf.keras.optimizers.Adam( learning_rate=lr_critic)
def setUpClass(cls): super().setUpClass() cls.policy = CategoricalActor( state_shape=cls.discrete_env.observation_space.shape, action_dim=cls.discrete_env.action_space.n, units=[4, 4])