def make_sampling_computation_graph(model_path, num_samples): f = file(model_path, 'rb') model = cPickle.load(f)#main_loop = load(model_path)# f.close() #model = main_loop.model selector = Selector(model.top_bricks) decoder_mlp1, = selector.select('/decoder_network1').bricks decoder_mlp2, = selector.select('/decoder_network2').bricks decoder_mlp3, = selector.select('/decoder_network3').bricks theano_rng = Random().theano_rng z2 = theano_rng.normal(size=(num_samples, decoder_mlp1.input_dim), dtype=theano.config.floatX) h2 = decoder_mlp1.apply(z2) h2 = h2[:, :50] + theano.tensor.exp(0.5 * h2[:, 50:]) * theano_rng.normal(size=(num_samples, 50), dtype=theano.config.floatX) z1 = theano_rng.normal(size=(num_samples, 10), dtype=theano.config.floatX) h1 = decoder_mlp2.apply(theano.tensor.concatenate([h2, z1], axis=1)) h1 = h1[:, :50] + theano.tensor.exp(0.5 * h1[:, 50:]) * theano_rng.normal(size=(num_samples, 50), dtype=theano.config.floatX) p = decoder_mlp3.apply(theano.tensor.concatenate([h1, h2], axis=1)).reshape((num_samples, 28, 28)) return ComputationGraph([p])
def generate_forward_diffusion_sample(self, X_noiseless): """ Corrupt a training image with t steps worth of Gaussian noise, and return the corrupted image, as well as the mean and covariance of the posterior q(x^{t-1}|x^t, x^0). """ X_noiseless = X_noiseless.reshape( (-1, self.n_colors, self.spatial_width, self.spatial_width)) n_images = X_noiseless.shape[0].astype('int16') rng = Random().theano_rng # choose a timestep in [1, self.trajectory_length-1]. # note the reverse process is fixed for the very # first timestep, so we skip it. # TODO for some reason random_integer is missing from the Blocks # theano random number generator. t = T.floor(rng.uniform(size=(1,1), low=1, high=self.trajectory_length, dtype=theano.config.floatX)) t_weights = self.get_t_weights(t) N = rng.normal(size=(n_images, self.n_colors, self.spatial_width, self.spatial_width), dtype=theano.config.floatX) # noise added this time step beta_forward = self.get_beta_forward(t) # decay in noise variance due to original signal this step alpha_forward = 1. - beta_forward # compute total decay in the fraction of the variance due to X_noiseless alpha_arr = 1. - self.beta_arr alpha_cum_forward_arr = T.extra_ops.cumprod(alpha_arr).reshape((self.trajectory_length,1)) alpha_cum_forward = T.dot(t_weights.T, alpha_cum_forward_arr) # total fraction of the variance due to noise being mixed in beta_cumulative = 1. - alpha_cum_forward # total fraction of the variance due to noise being mixed in one step ago beta_cumulative_prior_step = 1. - alpha_cum_forward/alpha_forward # generate the corrupted training data X_uniformnoise = X_noiseless + (rng.uniform(size=(n_images, self.n_colors, self.spatial_width, self.spatial_width), dtype=theano.config.floatX)-T.constant(0.5,dtype=theano.config.floatX))*T.constant(self.uniform_noise,dtype=theano.config.floatX) X_noisy = X_uniformnoise*T.sqrt(alpha_cum_forward) + N*T.sqrt(1. - alpha_cum_forward) # compute the mean and covariance of the posterior distribution mu1_scl = T.sqrt(alpha_cum_forward / alpha_forward) mu2_scl = 1. / T.sqrt(alpha_forward) cov1 = 1. - alpha_cum_forward/alpha_forward cov2 = beta_forward / alpha_forward lam = 1./cov1 + 1./cov2 mu = ( X_uniformnoise * mu1_scl / cov1 + X_noisy * mu2_scl / cov2 ) / lam sigma = T.sqrt(1./lam) sigma = sigma.reshape((1,1,1,1)) mu.name = 'mu q posterior' sigma.name = 'sigma q posterior' X_noisy.name = 'X_noisy' t.name = 't' return X_noisy, t, mu, sigma
def make_sampling_computation_graph(model_path, num_samples): f = file(model_path, 'rb') model = cPickle.load(f)#main_loop = load(model_path)# f.close() #model = main_loop.model selector = Selector(model.top_bricks) decoder_mlp1, = selector.select('/decoder_network1').bricks decoder_mlp2, = selector.select('/decoder_network2').bricks decoder_mlp3, = selector.select('/decoder_network3').bricks theano_rng = Random().theano_rng z1 = theano_rng.normal(size=(num_samples, decoder_mlp1.input_dim), dtype=theano.config.floatX) z2 = decoder_mlp1.apply(z1) z2 = z2[:, :40]# + theano.tensor.exp(0.5 * z2[:, 40:]) * theano_rng.normal(size=(num_samples, 40), # dtype=theano.config.floatX) z3 = decoder_mlp2.apply(z2) z3 = z3[:, :100] + theano.tensor.exp(0.5 * z3[:, 100:]) * theano_rng.normal(size=(num_samples, 100), dtype=theano.config.floatX) p = decoder_mlp3.apply(z3).reshape((num_samples, 28, 28)) return ComputationGraph([p])
def make_sampling_computation_graph(model_path, num_samples): f = file(model_path, 'rb') model = cPickle.load(f)#main_loop = load(model_path)# f.close() #model = main_loop.model selector = Selector(model.top_bricks) decoder_mlp, = selector.select('/decoder_network').bricks theano_rng = Random().theano_rng z = theano_rng.normal(size=(num_samples, decoder_mlp.input_dim), dtype=theano.config.floatX) p = decoder_mlp.apply(z).reshape((num_samples, 28, 28)) return ComputationGraph([p])
def make_sampling_computation_graph(model_path, num_samples): f = file(model_path, 'rb') model = cPickle.load(f)#main_loop = load(model_path)# f.close() #model = main_loop.model selector = Selector(model.top_bricks) decoder_mlp2, = selector.select('/decoder_network2').bricks decoder_mlp1, = selector.select('/decoder_network1').bricks upsample_mlp2, = selector.select('/upsample_network2').bricks upsample_mlp1, = selector.select('/upsample_network1').bricks theano_rng = Random().theano_rng z2 = theano_rng.normal(size=(num_samples, decoder_mlp2.input_dim), dtype=theano.config.floatX) h2_params = decoder_mlp2.apply(z2) length = int(h2_params.eval().shape[1]/2) h2_mu = h2_params[:, :length] h2_lognu = h2_params[:, length:] h2 = h2_mu + theano.tensor.exp(0.5 * h2_lognu) * theano_rng.normal(size=h2_mu.shape, dtype=h2_mu.dtype) z1 = theano_rng.normal(size=(num_samples, decoder_mlp1.input_dim), dtype=theano.config.floatX) h1_tilde_params = decoder_mlp1.apply(z1) length = int(h1_tilde_params.eval().shape[1]/2) h1_tilde_mu = h1_tilde_params[:, :length] h1_tilde_lognu = h1_tilde_params[:, length:] h1_tilde = h1_tilde_mu + theano.tensor.exp(0.5 * h1_tilde_lognu) * theano_rng.normal(size=h1_tilde_mu.shape, dtype=h1_tilde_mu.dtype) import pdb; pdb.set_trace() h1 = upsample_mlp1.apply(h2) + h1_tilde p = upsample_mlp2.apply(h1).reshape((num_samples, 28, 28)) return ComputationGraph([p])
def get_image_encoder_function(model): selector = Selector(model.top_bricks) encoder_convnet, = selector.select('/encoder_convnet').bricks encoder_mlp, = selector.select('/encoder_mlp').bricks print('Building computation graph...') x = tensor.tensor4('features') phi = encoder_mlp.apply(encoder_convnet.apply(x).flatten(ndim=2)) nlat = encoder_mlp.output_dim // 2 mu_phi = phi[:, :nlat] log_sigma_phi = phi[:, nlat:] epsilon = Random().theano_rng.normal(size=mu_phi.shape, dtype=mu_phi.dtype) z = mu_phi + epsilon * tensor.exp(log_sigma_phi) computation_graph = ComputationGraph([x, z]) print('Compiling reconstruction function...') encoder_function = theano.function( computation_graph.inputs, computation_graph.outputs) return encoder_function
def create_training_computation_graphs(z_dim, image_size, net_depth, discriminative_regularization, classifer, vintage, reconstruction_factor, kl_factor, discriminative_factor, disc_weights): x = tensor.tensor4('features') pi = numpy.cast[theano.config.floatX](numpy.pi) bricks = create_model_bricks(z_dim=z_dim, image_size=image_size, depth=net_depth) encoder_convnet, encoder_mlp, decoder_convnet, decoder_mlp = bricks if discriminative_regularization: if vintage: classifier_model = Model(load(classifer).algorithm.cost) else: with open(classifer, 'rb') as src: classifier_model = Model(load(src).algorithm.cost) selector = Selector(classifier_model.top_bricks) classifier_convnet, = selector.select('/convnet').bricks classifier_mlp, = selector.select('/mlp').bricks random_brick = Random() # Initialize conditional variances log_sigma_theta = shared_floatx(numpy.zeros((3, image_size, image_size)), name='log_sigma_theta') add_role(log_sigma_theta, PARAMETER) variance_parameters = [log_sigma_theta] num_disc_layers = 0 if discriminative_regularization: # We add discriminative regularization for the batch-normalized output # of the strided layers of the classifier. for layer in classifier_convnet.layers[1::3]: log_sigma = shared_floatx(numpy.zeros(layer.get_dim('output')), name='{}_log_sigma'.format(layer.name)) add_role(log_sigma, PARAMETER) variance_parameters.append(log_sigma) # include mlp # DISABLED # log_sigma = shared_floatx( # numpy.zeros([classifier_mlp.output_dim]), # name='{}_log_sigma'.format("MLP")) # add_role(log_sigma, PARAMETER) # variance_parameters.append(log_sigma) # diagnostic num_disc_layers = len(variance_parameters) - 1 print("Applying discriminative regularization on {} layers".format( num_disc_layers)) # Computation graph creation is encapsulated within this function in order # to allow selecting which parts of the graph will use batch statistics for # batch normalization and which parts will use population statistics. # Specifically, we'd like to use population statistics for the classifier # even in the training graph. def create_computation_graph(): # Encode phi = encoder_mlp.apply(encoder_convnet.apply(x).flatten(ndim=2)) nlat = encoder_mlp.output_dim // 2 mu_phi = phi[:, :nlat] log_sigma_phi = phi[:, nlat:] # Sample from the approximate posterior epsilon = random_brick.theano_rng.normal(size=mu_phi.shape, dtype=mu_phi.dtype) z = mu_phi + epsilon * tensor.exp(log_sigma_phi) # Decode mu_theta = decoder_convnet.apply( decoder_mlp.apply(z).reshape((-1, ) + decoder_convnet.get_dim('input_'))) log_sigma = log_sigma_theta.dimshuffle('x', 0, 1, 2) # Compute KL and reconstruction terms kl_term = 0.5 * (tensor.exp(2 * log_sigma_phi) + mu_phi**2 - 2 * log_sigma_phi - 1).sum(axis=1) reconstruction_term = -0.5 * ( tensor.log(2 * pi) + 2 * log_sigma + (x - mu_theta)**2 / tensor.exp(2 * log_sigma)).sum(axis=[1, 2, 3]) discriminative_layer_terms = [None] * num_disc_layers for i in range(num_disc_layers): discriminative_layer_terms[i] = tensor.zeros_like(kl_term) discriminative_term = tensor.zeros_like(kl_term) if discriminative_regularization: # Propagate both the input and the reconstruction through the classifier acts_cg = ComputationGraph([ classifier_mlp.apply( classifier_convnet.apply(x).flatten(ndim=2)) ]) acts_hat_cg = ComputationGraph([ classifier_mlp.apply( classifier_convnet.apply(mu_theta).flatten(ndim=2)) ]) # Retrieve activations of interest and compute discriminative # regularization reconstruction terms cur_layer = 0 # CLASSIFIER MLP DISABLED # for i, zip_pair in enumerate(zip(classifier_convnet.layers[1::3] + [classifier_mlp], for i, zip_pair in enumerate( zip(classifier_convnet.layers[1::3], variance_parameters[1:])): layer, log_sigma = zip_pair variable_filter = VariableFilter(roles=[OUTPUT], bricks=[layer]) d, = variable_filter(acts_cg) d_hat, = variable_filter(acts_hat_cg) # TODO: this conditional could be less brittle if "mlp" in layer.name.lower(): log_sigma = log_sigma.dimshuffle('x', 0) sumaxis = [1] else: log_sigma = log_sigma.dimshuffle('x', 0, 1, 2) sumaxis = [1, 2, 3] discriminative_layer_term_unweighted = -0.5 * ( tensor.log(2 * pi) + 2 * log_sigma + (d - d_hat)**2 / tensor.exp(2 * log_sigma)).sum( axis=sumaxis) discriminative_layer_terms[ i] = discriminative_factor * disc_weights[ cur_layer] * discriminative_layer_term_unweighted discriminative_term = discriminative_term + discriminative_layer_terms[ i] cur_layer = cur_layer + 1 # scale terms (disc is prescaled by layer) reconstruction_term = reconstruction_factor * reconstruction_term kl_term = kl_factor * kl_term # total_reconstruction_term is reconstruction + discriminative total_reconstruction_term = reconstruction_term + discriminative_term # cost is mean(kl - total reconstruction) cost = (kl_term - total_reconstruction_term).mean() return ComputationGraph( [cost, kl_term, reconstruction_term, discriminative_term] + discriminative_layer_terms) cg = create_computation_graph() with batch_normalization(encoder_convnet, encoder_mlp, decoder_convnet, decoder_mlp): bn_cg = create_computation_graph() return cg, bn_cg, variance_parameters
def test_random_brick(): random = Random() # This makes sure that a Random brick doesn't instantiate more than one # Theano RNG during its lifetime (see PR #485 on Github) assert random.theano_rng is random.theano_rng
def create_training_computation_graphs(discriminative_regularization): x = tensor.tensor4('features') pi = numpy.cast[theano.config.floatX](numpy.pi) bricks = create_model_bricks() encoder_convnet, encoder_mlp, decoder_convnet, decoder_mlp = bricks if discriminative_regularization: classifier_model = Model(load('celeba_classifier.zip').algorithm.cost) selector = Selector(classifier_model.top_bricks) classifier_convnet, = selector.select('/convnet').bricks random_brick = Random() # Initialize conditional variances log_sigma_theta = shared_floatx(numpy.zeros((3, 64, 64)), name='log_sigma_theta') add_role(log_sigma_theta, PARAMETER) variance_parameters = [log_sigma_theta] if discriminative_regularization: # We add discriminative regularization for the batch-normalized output # of the strided layers of the classifier. for layer in classifier_convnet.layers[4::6]: log_sigma = shared_floatx(numpy.zeros(layer.get_dim('output')), name='{}_log_sigma'.format(layer.name)) add_role(log_sigma, PARAMETER) variance_parameters.append(log_sigma) # Computation graph creation is encapsulated within this function in order # to allow selecting which parts of the graph will use batch statistics for # batch normalization and which parts will use population statistics. # Specifically, we'd like to use population statistics for the classifier # even in the training graph. def create_computation_graph(): # Encode phi = encoder_mlp.apply(encoder_convnet.apply(x).flatten(ndim=2)) nlat = encoder_mlp.output_dim // 2 mu_phi = phi[:, :nlat] log_sigma_phi = phi[:, nlat:] # Sample from the approximate posterior epsilon = random_brick.theano_rng.normal(size=mu_phi.shape, dtype=mu_phi.dtype) z = mu_phi + epsilon * tensor.exp(log_sigma_phi) # Decode mu_theta = decoder_convnet.apply( decoder_mlp.apply(z).reshape((-1, ) + decoder_convnet.get_dim('input_'))) log_sigma = log_sigma_theta.dimshuffle('x', 0, 1, 2) # Compute KL and reconstruction terms kl_term = 0.5 * (tensor.exp(2 * log_sigma_phi) + mu_phi**2 - 2 * log_sigma_phi - 1).sum(axis=1) reconstruction_term = -0.5 * ( tensor.log(2 * pi) + 2 * log_sigma + (x - mu_theta)**2 / tensor.exp(2 * log_sigma)).sum(axis=[1, 2, 3]) total_reconstruction_term = reconstruction_term if discriminative_regularization: # Propagate both the input and the reconstruction through the # classifier acts_cg = ComputationGraph([classifier_convnet.apply(x)]) acts_hat_cg = ComputationGraph( [classifier_convnet.apply(mu_theta)]) # Retrieve activations of interest and compute discriminative # regularization reconstruction terms for layer, log_sigma in zip(classifier_convnet.layers[4::6], variance_parameters[1:]): variable_filter = VariableFilter(roles=[OUTPUT], bricks=[layer]) d, = variable_filter(acts_cg) d_hat, = variable_filter(acts_hat_cg) log_sigma = log_sigma.dimshuffle('x', 0, 1, 2) total_reconstruction_term += -0.5 * ( tensor.log(2 * pi) + 2 * log_sigma + (d - d_hat)**2 / tensor.exp(2 * log_sigma)).sum( axis=[1, 2, 3]) cost = (kl_term - total_reconstruction_term).mean() return ComputationGraph([cost, kl_term, reconstruction_term]) cg = create_computation_graph() with batch_normalization(encoder_convnet, encoder_mlp, decoder_convnet, decoder_mlp): bn_cg = create_computation_graph() return cg, bn_cg, variance_parameters