def log_prob(self, data, z, reuse=False): """Calculate log probability of model given data and latent variables. log p(x, z) = log p(x | z) + \sum_n^N p(z_n | z_{n + 1}) """ cfg = self.config distributions = {} p_z_L = dist.Normal(loc=np.zeros(cfg['z_dim'], cfg['dtype']), scale=np.ones(cfg['z_dim'], dtype=cfg['dtype']), validate_args=False) if not reuse: distributions['layer_%d' % (cfg['p/n_layers'] - 1)] = p_z_L log_p_z = 0. log_p_z += tf.reduce_sum(p_z_L.log_prob(z[-1]), -1) with util.get_or_create_scope('model', reuse=reuse): for n in range(cfg['p/n_layers'] - 1, 0, -1): p_z = self.build_stochastic_layer(n - 1, layer_input=z[n], reuse=reuse) if not reuse: distributions['layer_%d' % (n - 1)] = p_z log_p_z += tf.reduce_sum(p_z.log_prob(z[n - 1]), -1) log_lik = tf.reduce_sum( self.likelihood(z[0]).log_prob(data), [2, 3, 4]) if not reuse: self.distributions = distributions return log_lik + log_p_z
def likelihood(self, z, reuse=False): """Build likelihood p(x | z_0). """ cfg = self.config n_samples = z.get_shape().as_list()[0] with util.get_or_create_scope('model', reuse=reuse): n_out = int(np.prod(cfg['train_data/shape'])) net = z with slim.arg_scope( [slim.fully_connected], activation_fn=util.get_activation(cfg['p_net/activation']), outputs_collections=[tf.GraphKeys.ACTIVATIONS], variables_collections=['model'], weights_initializer=layers.variance_scaling_initializer( factor=np.square(cfg['p_net/init_w_stddev']))): for i in range(cfg['p_net/n_layers']): net = slim.fully_connected(net, cfg['p_net/hidden_size'], scope='fc%d' % i) logits = slim.fully_connected(net, n_out, activation_fn=None, scope='fc_lik') logits = tf.reshape(logits, [n_samples, cfg['batch_size']] + cfg['train_data/shape']) return dist.Bernoulli(logits=logits, validate_args=False)
def sample(self, data, n_samples, reuse=False): """Sample from the model.""" cfg = self.config data_centered = data['input_data'] - tf.expand_dims( data['data_mean'], 0) with util.get_or_create_scope('variational', reuse=reuse): distributions = {} q_h, h = [], [] data_centered = tf.reshape(data_centered, [cfg['batch_size'], -1]) data_stacked = tf.stack([data_centered] * n_samples) q_h_0, h_0 = self.layer_q_and_h(0, data_stacked, reuse=reuse) q_h.append(q_h_0) distributions['layer_%d' % 0] = q_h_0 h.append(h_0) for n in range(1, cfg['p/n_layers']): q_h_n, h_n = self.layer_q_and_h(n, h[n - 1]) distributions['layer_%d' % n] = q_h_n q_h.append(q_h_n) h.append(h_n) if not reuse: self.q_h = q_h self.h = h self.distributions = distributions else: self.q_h_reuse = q_h self.h_reuse = h self.distributions_reuse = distributions return h
def sample(self, x, n_samples=1, reuse=False): """Draw a sample from the posterior z ~ q(z | x).""" cfg = self.config with util.get_or_create_scope('variational', reuse=reuse): q_z, z = [], [] q_z_0 = self.build_stochastic_layer(n=0, layer_input=x, reuse=reuse) if not reuse: distributions = {} distributions['layer_0'] = q_z_0 z_0 = q_z_0.sample(n_samples) q_z.append(q_z_0) z.append(z_0) for n in range(1, cfg['p/n_layers']): q_z_n = self.build_stochastic_layer(n=n, layer_input=z[n - 1], reuse=reuse) if not reuse: distributions['layer_%d' % n] = q_z_n z_n = q_z_n.sample() q_z.append(q_z_n) z.append(z_n) if not reuse: self.distributions = distributions self.q_z = q_z self.z = z return z
def log_prob(self, data, h, reuse=False): """Log joint of the model. log f(x, h) = log p(x | h) + \sum_{i} log p(h_i | h_{i + 1}) """ cfg = self.config n_samples = h[0].get_shape().as_list()[0] distributions = {} kwargs = {} if cfg['p/w_eps'] != 0.: kwargs.update({ 'weights_initializer': tf.constant_initializer(cfg['p/w_eps']) }) with util.get_or_create_scope('model', reuse=reuse): with slim.arg_scope([slim.fully_connected], **kwargs): if cfg['p/learn_prior']: a = tf.get_variable('prior_logits', shape=cfg['p/h_dim'], dtype=cfg['dtype'], initializer=tf.constant_initializer( scipy.special.logit( cfg['p/bernoulli_p']))) else: a = tf.constant( (np.zeros(cfg['p/h_dim'], dtype=cfg['dtype']) + scipy.special.logit(cfg['p/bernoulli_p']))) p_h_L = dist.Bernoulli(logits=a, name='p_h_%d' % (cfg['p/n_layers'] - 1), validate_args=False) distributions['layer_%d' % (cfg['p/n_layers'] - 1)] = p_h_L log_p_h_L = p_h_L.log_prob(h[-1]) log_p_h = tf.reduce_sum(log_p_h_L, -1) for n in range(cfg['p/n_layers'] - 1, 0, -1): p_h_n = self.build_stochastic_layer(n=n, h_above=h[n]) distributions['layer_%d' % (n - 1)] = p_h_n log_p_h_n = tf.reduce_sum(p_h_n.log_prob(h[n - 1]), -1) log_p_h += log_p_h_n p_x_given_h = self.likelihood(h[0]) log_p_x_given_h = tf.reduce_sum( p_x_given_h.log_prob(data['input_data']), [2, 3, 4]) log_p_x_h = log_p_x_given_h + log_p_h if not reuse: for name, p in distributions.items(): tf.summary.scalar(name + '_probs', tf.reduce_mean(p.probs)) tf.summary.scalar('likelihood' + '_probs', tf.reduce_mean(p_x_given_h.probs)) self.p_h_L = p_h_L self.distributions = distributions return log_p_x_h
def prior_predictive(self): """Sample from the prior and pass it through the layers.""" cfg = self.config n = cfg['batch_size'] * cfg['q/n_samples'] n_samples = cfg['q/n_samples'] with util.get_or_create_scope('model', reuse=True): h_prior = tf.cast(self.p_h_L.sample(n), cfg['dtype']) h_prior = tf.reshape(h_prior, [cfg['q/n_samples'], cfg['batch_size'], -1]) h = [None] * cfg['p/n_layers'] h[cfg['p/n_layers'] - 1] = h_prior for n in range(cfg['p/n_layers'] - 1, 0, -1): p_h_n = self.build_stochastic_layer(n, h_above=h[n]) h[n - 1] = tf.cast(p_h_n.sample(), cfg['dtype']) return self.likelihood(h[0])
def prior_predictive(self, reuse=True): """Sample from the prior predictive distribution.""" cfg = self.config n_samples = cfg['q/n_samples'] * cfg['batch_size'] with util.get_or_create_scope('model', reuse=reuse): z_L = tf.random_normal( [n_samples, cfg['batch_size'], cfg['z_dim']], dtype=cfg['dtype']) z = [None] * cfg['p/n_layers'] z[cfg['p/n_layers'] - 1] = z_L for n in range(cfg['p/n_layers'] - 1, 0, -1): p_n = self.build_stochastic_layer(n - 1, layer_input=z[n], reuse=reuse) z_n = p_n.sample() z[n - 1] = z_n return self.likelihood(z[0], reuse=reuse)
def likelihood(self, h_0, reuse=False): """Log likelihood of the data.""" cfg = self.config n_samples = h_0.get_shape().as_list()[0] with util.get_or_create_scope('model', reuse=reuse): h_0 = tf.reshape(h_0, [n_samples * cfg['batch_size'], cfg['p/h_dim']]) n_out = np.prod(cfg['train_data/shape']).tolist() p_logits = slim.fully_connected(h_0, n_out, activation_fn=None, scope='fc0') out_shape = ([n_samples, cfg['batch_size']] + cfg['train_data/shape']) p_logits = tf.reshape(p_logits, out_shape) p_x_given_h = dist.Bernoulli(logits=p_logits, name='p_x_given_h_0') return p_x_given_h
def build_stochastic_layer(self, n, layer_input, reuse=False): """Build the distribution for a layer of the model, q(z_n | z_{n - 1}).""" cfg = self.config in_shape = layer_input.get_shape().as_list() if len(in_shape) == 4: n_samples = 1 else: n_samples = in_shape[0] if n == 0: # first layer is the data outer_dim = -1 else: outer_dim = cfg['z_dim'] flat_shape = [n_samples * cfg['batch_size'], outer_dim] net = tf.reshape(layer_input, flat_shape) layer_dim = 2 * cfg['z_dim'] w_shape = [layer_input.get_shape().as_list()[-1], layer_dim] with util.get_or_create_scope('variational', reuse): with slim.arg_scope( [slim.fully_connected], outputs_collections=[tf.GraphKeys.ACTIVATIONS], variables_collections=['variational'], weights_initializer=util.get_initializer( cfg['q_net/weights_initializer']), activation_fn=util.get_activation(cfg['q_net/activation']), biases_initializer=tf.constant_initializer(0.1)): for i in range(cfg['q_net/n_layers']): net = slim.fully_connected(net, cfg['q_net/hidden_size'], scope='layer_%d_fc%d' % (n, i)) net = slim.fully_connected(net, 2 * cfg['z_dim'], activation_fn=None, scope='layer_%d_fc_out' % n) if n == 0: net = tf.reshape(net, [cfg['batch_size'], 2 * cfg['z_dim']]) mu = net[:, 0:cfg['z_dim']] sp_arg = net[:, cfg['z_dim']:] else: net = tf.reshape( net, [n_samples, cfg['batch_size'], 2 * cfg['z_dim']]) mu = net[:, :, 0:cfg['z_dim']] sp_arg = net[:, :, cfg['z_dim']:] sigma = 1e-6 + tf.nn.softplus(sp_arg) return dist.Normal(loc=mu, scale=sigma, validate_args=False)