z = encoder + epsilon * srng.uniform(size=encoder.shape, low=-1., high=1.) decoder_network = MLP(activations=[Tanh(), Logistic()], dims=[n_latent, n_hidden, n_vis], biases_init=Constant(0), name='decoder_network') decoder_p = decoder_network.apply(z) # Define the cost prior_term = -0.5 * (n_latent * T.log(2 * 3.14159) + (z ** 2).sum(axis=1)) reconstruction_term = (x * T.log(decoder_p) + (1 - x) * T.log(1 - decoder_p)).sum(axis=1) log_ball_volume = n_latent * (0.5 * T.log(3.14159) + T.log(epsilon)) - gammaln(0.5 * n_latent + 1) cost = -(prior_term + reconstruction_term).mean() cost.name = 'negative_log_likelihood' # Initialize the parameters encoder_network._push_initialization_config() for layer in encoder_network.linear_transformations: layer.weights_init = Uniform( width=12. / (layer.input_dim + layer.output_dim)) encoder_network.initialize() decoder_network._push_initialization_config() for layer in decoder_network.linear_transformations: layer.weights_init = Uniform(
#valid_set_x = theano.shared(numpy.asarray(valid_set_x > 0.5, # dtype=theano.config.floatX), # borrow=True) #n_train_batches = train_set_x.get_value(borrow=True).shape[0] / batch_size #n_valid_batches = valid_set_x.get_value(borrow=True).shape[0] / batch_size # Define cost and train and validate functions ##### log_ball_vol_D = n_latent * (0.5 * T.log(3.14159) + T.log(epsilon)) - T.log(gammaln(0.5 * n_latent + 1)) log_ball_vol_D_minus_1 = (n_latent - 1) * (0.5 * T.log(3.14159) + T.log(epsilon)) - T.log(gammaln(0.5 * n_latent + 0.5)) KL_term = T.cast(-log_ball_vol_D + 0.5 * T.log(2 * 3.14159) + epsilon * n_latent / 3 * T.exp(log_ball_vol_D_minus_1 - log_ball_vol_D) * (epsilon **2 + 3 * (encoder_2 * encoder_2).sum(axis=1)), theano.config.floatX) #KL_divergence = T.cast(-0.5*(1 + encoder_lognu - encoder_mu**2 # - T.exp(encoder_lognu)).sum(axis=1), 'float32') reconstruction_term = T.cast((x*T.log(decoder_p) + (1-x)*T.log(1 - decoder_p)).sum(axis=1), 'float32') cost = (KL_term - reconstruction_term).mean()