def q_func_builder(input_placeholder, num_actions, scope, reuse=False): with tf.compat.v1.variable_scope(scope, reuse=reuse): latent = network(input_placeholder) if isinstance(latent, tuple): if latent[1] is not None: raise NotImplementedError("DQN is not compatible with recurrent policies yet") latent = latent[0] latent = tf.compat.v1.layers.flatten(latent) with tf.compat.v1.variable_scope("action_value"): action_out = latent for hidden in hiddens: action_out = tf_slim.fully_connected(action_out, num_outputs=hidden, activation_fn=None) if layer_norm_bool: action_out = tf_slim.layer_norm(action_out, center=True, scale=True) action_out = tf.nn.relu(action_out) action_scores = tf_slim.fully_connected(action_out, num_outputs=num_actions, activation_fn=None) if dueling: with tf.compat.v1.variable_scope("state_value"): state_out = latent for hidden in hiddens: state_out = tf_slim.fully_connected(state_out, num_outputs=hidden, activation_fn=None) if layer_norm_bool: state_out = tf_slim.layer_norm(state_out, center=True, scale=True) state_out = tf.nn.relu(state_out) state_score = tf_slim.fully_connected(state_out, num_outputs=1, activation_fn=None) action_scores_mean = tf.reduce_mean(action_scores, 1) action_scores_centered = action_scores - tf.expand_dims(action_scores_mean, 1) q_out = state_score + action_scores_centered else: q_out = action_scores return q_out
def _norm(self, inp, scope, dtype=tf.float32): shape = inp.get_shape()[-1:] gamma_init = tf.constant_initializer(self._norm_gain) beta_init = tf.constant_initializer(self._norm_shift) with tf.variable_scope(scope): # Initialize beta and gamma for use by layer_norm. tf.get_variable("gamma", shape=shape, initializer=gamma_init, dtype=dtype) tf.get_variable("beta", shape=shape, initializer=beta_init, dtype=dtype) normalized = tf_slim.layer_norm(inp, reuse=True, scope=scope) return normalized
def layer_norm(input_tensor, name=None): """Run layer normalization on the last dimension of the tensor.""" return tf_slim.layer_norm( inputs=input_tensor, begin_norm_axis=-1, begin_params_axis=-1, scope=name)
def layer_norm(inputs, name): return slim.layer_norm(inputs, scope=name)