def divergence_from_states(self, lhs, rhs, mask=None): lhs = self.dist_from_state(lhs, mask) rhs = self.dist_from_state(rhs, mask) divergence = tfd.kl_divergence(lhs, rhs) if mask is not None: divergence = tools.mask(divergence, mask) return divergence
def dist_from_state(self, state, mask=None): if mask is not None: stddev = tools.mask(state['stddev'], mask, value=1) else: stddev = state['stddev'] dist = tfd.MultivariateNormalDiag(state['mean'], stddev) return dist