def create_encoder( self, state_in: tf.Tensor, action_in: tf.Tensor, done_in: tf.Tensor, reuse: bool ) -> Tuple[tf.Tensor, tf.Tensor, tf.Tensor]: """ Creates the encoder for the discriminator :param state_in: The encoded observation input :param action_in: The action input :param done_in: The done flags input :param reuse: If true, the weights will be shared with the previous encoder created """ with tf.variable_scope("GAIL_model"): if self.use_actions: concat_input = tf.concat([state_in, action_in, done_in], axis=1) else: concat_input = state_in hidden_1 = tf.layers.dense( concat_input, self.h_size, activation=ModelUtils.swish, name="gail_d_hidden_1", reuse=reuse, ) hidden_2 = tf.layers.dense( hidden_1, self.h_size, activation=ModelUtils.swish, name="gail_d_hidden_2", reuse=reuse, ) z_mean = None if self.use_vail: # Latent representation z_mean = tf.layers.dense( hidden_2, self.z_size, reuse=reuse, name="gail_z_mean", kernel_initializer=ModelUtils.scaled_init(0.01), ) self.noise = tf.random_normal(tf.shape(z_mean), dtype=tf.float32) # Sampled latent code self.z = z_mean + self.z_sigma * self.noise * self.use_noise estimate_input = self.z else: estimate_input = hidden_2 estimate = tf.layers.dense( estimate_input, 1, activation=tf.nn.sigmoid, name="gail_d_estimate", reuse=reuse, ) return estimate, z_mean, concat_input
def _create_policy_branches(self, logits: tf.Tensor, act_size: List[int]) -> List[tf.Tensor]: policy_branches = [] for size in act_size: policy_branches.append( tf.layers.dense( logits, size, activation=None, use_bias=False, kernel_initializer=ModelUtils.scaled_init(0.01), )) return policy_branches
def _create_mu_log_sigma( self, logits: tf.Tensor, act_size: List[int], log_sigma_min: float, log_sigma_max: float, condition_sigma: bool, ) -> "GaussianDistribution.MuSigmaTensors": mu = tf.layers.dense( logits, act_size[0], activation=None, name="mu", kernel_initializer=ModelUtils.scaled_init(0.01), reuse=tf.AUTO_REUSE, ) if condition_sigma: # Policy-dependent log_sigma_sq log_sigma = tf.layers.dense( logits, act_size[0], activation=None, name="log_std", kernel_initializer=ModelUtils.scaled_init(0.01), ) else: log_sigma = tf.get_variable( "log_std", [act_size[0]], dtype=tf.float32, initializer=tf.zeros_initializer(), ) log_sigma = tf.clip_by_value(log_sigma, log_sigma_min, log_sigma_max) sigma = tf.exp(log_sigma) return self.MuSigmaTensors(mu, log_sigma, sigma)