def cost(X, Z_prior_mean, Z_prior_logvar, Z_mean, Z_logvar, X_mean, X_logvar, lengths): mask = T.arange(X.shape[0]).dimshuffle(0,'x')\ < lengths.dimshuffle('x',0) encoding_cost = mask * vae.kl_divergence( mean_1=Z_prior_mean, logvar_1=Z_prior_logvar, mean_2=Z_mean, logvar_2=Z_logvar ) reconstruction_cost = mask * vae.gaussian_nll(X, X_mean, X_logvar) return -T.sum(encoding_cost + reconstruction_cost)/T.sum(mask)
def cost(X, Z_prior_mean, Z_prior_std, Z_mean, Z_std, X_mean, X_std, lengths): mask = T.arange(X.shape[0]).dimshuffle(0,'x')\ < lengths.dimshuffle('x',0) encoding_cost = T.switch( mask, vae.kl_divergence( mean_1=Z_mean, std_1=Z_std, mean_2=Z_prior_mean, std_2=Z_prior_std, ), 0) reconstruction_cost = T.switch(mask, vae.gaussian_nll(X, X_mean, X_std), 0) return -T.sum(encoding_cost + reconstruction_cost) / T.sum(mask)
def cost(X, Z_prior_mean, Z_prior_std, Z_mean, Z_std, X_mean, X_std, lengths): mask = T.arange(X.shape[0]).dimshuffle(0,'x')\ < lengths.dimshuffle('x',0) encoding_cost = T.switch(mask, vae.kl_divergence( mean_1=Z_mean, std_1=Z_std, mean_2=Z_prior_mean, std_2=Z_prior_std, ), 0 ) reconstruction_cost = T.switch(mask, vae.gaussian_nll(X, X_mean, X_std), 0 ) return -T.sum(encoding_cost + reconstruction_cost)/T.sum(mask)
import model import math from pprint import pprint import vae if __name__ == "__main__": chunk_size = 512 batch_size = 64 P = Parameters() autoencoder, inpaint = model.build(P) parameters = P.values() X = T.itensor4('X') X_hat, posteriors, priors = autoencoder(T.cast(X, 'float32') / 255.) latent_kls = [ T.mean(vae.kl_divergence(po_m, po_s, pr_m, pr_s), axis=0) for (po_m, po_s), (pr_m, pr_s) in zip(posteriors, priors) ] beta_start = 500 * (np.arange(len(latent_kls)) + 1) beta_lin = theano.shared(np.float32(0)) betas_ = (beta_lin - beta_start) / np.float32(500) betas_ = T.switch(betas_ < 0, 0, betas_) betas = T.switch(betas_ > 1, 1, betas_)[::-1] print betas.eval() train_latent_kl = sum(betas[i] * kl for i, kl in enumerate(latent_kls)) latent_kl = sum(latent_kls) recon_loss = model.cost(X_hat, X[:, :, 16:-16, 16:-16]) pprint(parameters) l2 = sum(T.sum(T.sqr(w)) for w in parameters)
def reg_loss(z_means, z_stds, alphas): gaussian_loss = T.sum(vae.kl_divergence(z_means, z_stds, 0, 1), axis=0) stick_break_loss = T.sum(stick_break_vae.kl_divergence(alphas[:-1]), axis=0) return gaussian_loss + stick_break_loss