def kl(self, other): a0 = self.logits - U.max(self.logits, axis=-1, keepdims=True) a1 = other.logits - U.max(other.logits, axis=-1, keepdims=True) ea0 = tf.exp(a0) ea1 = tf.exp(a1) z0 = U.sum(ea0, axis=-1, keepdims=True) z1 = U.sum(ea1, axis=-1, keepdims=True) p0 = ea0 / z0 return U.sum(p0 * (a0 - tf.log(z0) - a1 + tf.log(z1)), axis=-1)
def kl(self, other): assert isinstance(other, DiagGaussianPd) return U.sum( other.logstd - self.logstd + (tf.square(self.std) + tf.square(self.mean - other.mean)) / (2.0 * tf.square(other.std)) - 0.5, axis=-1)
def entropy(self): return U.sum(tf.nn.sigmoid_cross_entropy_with_logits( logits=self.logits, labels=self.ps), axis=-1)
def kl(self, other): return U.sum(tf.nn.sigmoid_cross_entropy_with_logits( logits=other.logits, labels=self.ps), axis=-1) - U.sum(tf.nn.sigmoid_cross_entropy_with_logits( logits=self.logits, labels=self.ps), axis=-1)
def neglogp(self, x): return U.sum(tf.nn.sigmoid_cross_entropy_with_logits( logits=self.logits, labels=tf.to_float(x)), axis=-1)
def entropy(self): return U.sum(self.logstd + .5 * np.log(2.0 * np.pi * np.e), axis=-1)
def neglogp(self, x): return 0.5 * U.sum(tf.square((x - self.mean) / self.std), axis=-1) \ + 0.5 * np.log(2.0 * np.pi) * tf.to_float(tf.shape(x)[-1]) \ + U.sum(self.logstd, axis=-1)
def entropy(self): a0 = self.logits - U.max(self.logits, axis=-1, keepdims=True) ea0 = tf.exp(a0) z0 = U.sum(ea0, axis=-1, keepdims=True) p0 = ea0 / z0 return U.sum(p0 * (tf.log(z0) - a0), axis=-1)