def __init__(self, data, labels, arch, act=tf.nn.tanh, batch_size=None, loc=0.0, prec=1.0): """ Bayesian Neural Net model (assume Normal prior) :param data: data for Logistic Regression task :param labels: label for Logistic Regression task :param batch_size: batch size for Logistic Regression; setting it to None adds flexibility at the cost of speed. :param loc: mean of the Normal prior :param scale: std of the Normal prior """ self.arch = arch self.theta_dim = np.sum( [arch[i] * arch[i + 1] for i in range(len(arch) - 1)]) self.act = act self.x_dim = data.shape[1] self.y_dim = labels.shape[1] self.prec_prior = prec self.data = tf.constant(data, tf.float32) self.labels = tf.constant(labels, tf.float32) ensure_directory('logs/net.log') self.logger = create_logger(__name__, log_dir="logs/net/", file_name='net.log')
import numpy as np import tensorflow as tf from a_nice_mc.objectives import Energy from a_nice_mc.utils.evaluation import batch_effective_sample_size as effective_sample_size from a_nice_mc.utils.evaluation import acceptance_rate from a_nice_mc.utils.logger import save_ess, create_logger logger = create_logger(__name__) class NN(Energy): def __init__(self, data, labels, arch, act=tf.nn.tanh, prec=1.0): """ Bayesian Neural Network Model (assumes factored Normal prior) :param data: data for Regression task :param labels: label for Regression task :param scale: std of the Normal prior :param arch: list of layer widths for feed-forward network """ super(NN, self).__init__() self.arch = arch self.theta_dim = np.sum( [arch[i] * arch[i + 1] for i in range(len(arch) - 1)]) self.act = act self.x_dim = data.shape[1] self.y_dim = labels.shape[1] self.prec_prior = prec self.z = tf.placeholder(tf.float32, [None, self.theta_dim]) self.data = tf.constant(data, tf.float32) self.labels = tf.constant(labels, tf.float32)
def __init__(self, network, energy_fn, discriminator, noise_sampler, b, m, eta=1.0, scale=10.0): self.energy_fn = energy_fn self.logger = create_logger(__name__) self.train_op = TrainingOperator(network) self.infer_op = InferenceOperator(network, energy_fn) self.b = tf.to_int32(tf.reshape(tf.multinomial(tf.ones([1, b]), 1), [])) + 1 self.m = tf.to_int32(tf.reshape(tf.multinomial(tf.ones([1, m]), 1), [])) + 1 self.network = network self.x_dim, self.v_dim = network.x_dim, network.v_dim self.z = tf.placeholder(tf.float32, [None, self.x_dim]) self.x = tf.placeholder(tf.float32, [None, self.x_dim]) self.xl = tf.placeholder(tf.float32, [None, self.x_dim]) self.steps = tf.placeholder(tf.int32, []) self.nice_steps = tf.placeholder(tf.int32, []) bx, bz = tf.shape(self.x)[0], tf.shape(self.z)[0] # Obtain values from inference ops # `infer_op` contains Metropolis step v = tf.random_normal(tf.stack([bz, self.v_dim])) self.z_, self.v_ = self.infer_op((self.z, v), self.steps, self.nice_steps) # Reshape for pairwise discriminator x = tf.reshape(self.x, [-1, 2 * self.x_dim]) xl = tf.reshape(self.xl, [-1, 2 * self.x_dim]) # Obtain values from train ops v1 = tf.random_normal(tf.stack([bz, self.v_dim])) x1_, v1_ = self.train_op((self.z, v1), self.b) x1_ = x1_[-1] x1_sg = tf.stop_gradient(x1_) v2 = tf.random_normal(tf.stack([bx, self.v_dim])) x2_, v2_ = self.train_op((self.x, v2), self.m) x2_ = x2_[-1] v3 = tf.random_normal(tf.stack([bx, self.v_dim])) x3_, v3_ = self.train_op((x1_sg, v3), self.m) x3_ = x3_[-1] # The pairwise discriminator has two components: # (x, x2) from x -> x2 # (x1, x3) from z -> x1 -> x3 # # The optimal case is achieved when x1, x2, x3 # are all from the data distribution x_ = tf.concat([ tf.concat([x2_, self.x], 1), tf.concat([x3_, x1_], 1) ], 0) # Concat all v values for log-likelihood training v1_ = v1_[-1] v2_ = v2_[-1] v3_ = v3_[-1] v_ = tf.concat([v1_, v2_, v3_], 0) v_ = tf.reshape(v_, [-1, self.v_dim]) d = discriminator(x, reuse=False) d_ = discriminator(x_) # generator loss # TODO: MMD loss (http://szhao.me/2017/06/10/a-tutorial-on-mmd-variational-autoencoders.html) # it is easy to implement, but maybe we should wait after this codebase is settled. self.v_loss = tf.reduce_mean(0.5 * tf.multiply(v_, v_)) self.g_loss = tf.reduce_mean(d_) + self.v_loss * eta # discriminator loss self.d_loss = tf.reduce_mean(d) - tf.reduce_mean(d_) epsilon = tf.random_uniform([], 0.0, 1.0) x_hat = xl * epsilon + x_ * (1 - epsilon) d_hat = discriminator(x_hat) ddx = tf.gradients(d_hat, x_hat)[0] ddx = tf.norm(ddx, axis=1) ddx = tf.reduce_mean(tf.square(ddx - 1.0) * scale) self.d_loss = self.d_loss + ddx # I don't have a good solution to the tf variable scope mess. # So I basically force the NiceLayer to contain the 'generator' scope. # See `nice/__init__.py`. g_vars = [var for var in tf.global_variables() if 'generator' in var.name] d_vars = [var for var in tf.global_variables() if discriminator.name in var.name] self.d_train = tf.train.AdamOptimizer(learning_rate=5e-4, beta1=0.5, beta2=0.9)\ .minimize(self.d_loss, var_list=d_vars) self.g_train = tf.train.AdamOptimizer(learning_rate=5e-4, beta1=0.5, beta2=0.9)\ .minimize(self.g_loss, var_list=g_vars) self.init_op = tf.group( tf.global_variables_initializer(), tf.local_variables_initializer() ) gpu_options = tf.GPUOptions(allow_growth=True) self.sess = tf.Session(config=tf.ConfigProto( inter_op_parallelism_threads=1, intra_op_parallelism_threads=1, gpu_options=gpu_options, )) self.sess.run(self.init_op) self.ns = noise_sampler self.ds = None self.path = 'logs/' + energy_fn.name try: os.makedirs(self.path) except OSError: pass