def _log_loss(self): if self.log_tensorboard: with tf.name_scope('loss'): tf.compat.v1.summary.scalar('loss_', self.loss) with tf.name_scope('networks'): stats_summary('Q', self.Qnets.Q)
def attention(self, q, k, v, mask=None): # softmax(QK^T/)V dot_product = tf.matmul(q, k, transpose_b=True) # [B, H, N, N] if mask: dot_product *= mask weights = tf.nn.softmax(dot_product) # [B, H, N, N] x = tf.matmul(weights, v) # [B, H, N, V] # Test code to monitor saturation of softmax if hasattr(self, 'log_tensorboard') and self.log_tensorboard: with tf.name_scope('attention'): tf_utils.stats_summary('softmax', weights, hist=True) tf_utils.stats_summary('output', x) return x
def noisy(self, x, units, kernel_initializer=tc.layers.xavier_initializer(), name=None, sigma=.4, factorized=True, return_noise=False): """ noisy layer using factorized Gaussian noise """ name = self.get_name(name, 'noisy') y = self.dense(x, units, kernel_initializer=kernel_initializer) with tf.variable_scope(name): # params for the noisy layer features = x.shape.as_list()[-1] if factorized: w_in_dim = [features, 1] w_out_dim = [1, units] w_shape = [features, units] b_shape = [units] epsilon_w_in = tf.random.truncated_normal(w_in_dim, stddev=sigma) epsilon_w_in = tf.math.sign(epsilon_w_in) * tf.math.sqrt(tf.math.abs(epsilon_w_in)) epsilon_w_out = tf.random.truncated_normal(w_out_dim, stddev=sigma) epsilon_w_out = tf.math.sign(epsilon_w_out) * tf.math.sqrt(tf.math.abs(epsilon_w_out)) epsilon_w = tf.matmul(epsilon_w_in, epsilon_w_out, name='epsilon_w') epsilon_b = tf.reshape(epsilon_w_out, b_shape) else: w_shape = [features, units] b_shape = [units] epsilon_w = tf.random.truncated_normal(w_shape, stddev=sigma, name='epsilon_w') epsilon_b = tf.random.truncated_normal(b_shape, stddev=sigma, name='epsilon_b') noisy_w = tf.get_variable('noisy_w', shape=w_shape, initializer=kernel_initializer, regularizer=self.l2_regularizer) noisy_b = tf.get_variable('noisy_b', shape=b_shape, initializer=tf.constant_initializer(sigma / np.sqrt(units))) # output of the noisy layer o = tf.matmul(x, noisy_w * epsilon_w) + noisy_b * epsilon_b if hasattr(self, 'log_tensorboard') and self.log_tensorboard: with tf.name_scope(f'{name}_log'): tf_utils.stats_summary('x', x, std=True, hist=True) tf_utils.stats_summary('w', noisy_w, std=True, hist=True) tf_utils.stats_summary('b', noisy_b, std=True, hist=True) tf_utils.stats_summary('o', o, std=True, hist=True) tf_utils.stats_summary('y', y, std=True, hist=True) if return_noise: return o, y else: return o + y
def _log_loss(self): if self.log_tensorboard: with tf.name_scope('info'): tf.compat.v1.summary.scalar('actor_loss_', self.actor_loss) tf.compat.v1.summary.scalar('critic_loss_', self.critic_loss) stats_summary('Q_with_actor', self.critic.Q_with_actor, max=True, hist=True) stats_summary('reward', self.data['reward'], min=True, hist=True) stats_summary('priority', self.priority, hist=True, max=True)
def _log_loss(self): if self.log_tensorboard: with tf.name_scope('info'): stats_summary('reward', self.data['reward'], min=True, max=True, hist=True) with tf.name_scope('actor'): stats_summary('orig_action', self.actor.orig_action) stats_summary('entropy', self.actor.action_distribution.entropy()) stats_summary('action_std', self.actor.action_distribution.std) stats_summary('orig_logpi', self.actor.orig_logpi) tf.compat.v1.summary.scalar('orig_logpi_0', self.actor.orig_logpi[0][0]) stats_summary('action', self.actor.action) stats_summary('logpi', self.actor.logpi) tf.compat.v1.summary.scalar('actor_loss_', self.actor_loss) with tf.name_scope('critic'): stats_summary('Q1_with_actor', self.critic.Q1_with_actor, min=True, max=True) stats_summary('Q2_with_actor', self.critic.Q2_with_actor, min=True, max=True) if self.buffer_type == 'proportional': stats_summary('priority', self.priority, std=True, max=True, hist=True) tf.compat.v1.summary.scalar('Q1_loss_', self.Q1_loss) tf.compat.v1.summary.scalar('Q2_loss_', self.Q2_loss) tf.compat.v1.summary.scalar('critic_loss_', self.critic_loss) if self.raw_temperature == 'auto': with tf.name_scope('alpha'): stats_summary('alpha', self.alpha, std=True) tf.compat.v1.summary.scalar('alpha_loss', self.alpha_loss)
def _log_info(self): if self.log_tensorboard: with tf.name_scope('info'): with tf.name_scope('value'): stats_summary('V', self.ac.V) stats_summary('advantage', self.env_phs['advantage']) stats_summary('return', self.env_phs['return']) stats_summary('V_mask', self.ac.V * self.env_phs['mask'], hist=True) stats_summary( 'advantage_mask', self.env_phs['advantage'] * self.env_phs['mask']) stats_summary( 'return_mask', self.env_phs['return'] * self.env_phs['mask']) stats_summary('mask', self.env_phs['mask']) with tf.name_scope('policy'): stats_summary('mean_', self.ac.action_distribution.mean) stats_summary('std_', self.ac.action_distribution.std)