def _weight_hessian_bb( self, X, loc, scale, ): one_minus_loc = 1 - loc loc_times_scale = loc * scale one_minus_loc_times_scale = one_minus_loc * scale scalar_one = tf.constant(1, shape=(), dtype=self.dtype) if isinstance(X, tf.SparseTensor): # Using the dense matrix of the location model to serve the correct shapes for the sparse X. const1 = tf.sparse_add( tf.zeros_like(loc), X).__div__(-tf.sparse.add(X, -tf.ones_like(loc))) # Adding tf1.zeros_like(loc) is a hack to avoid bug thrown by log on sparse matrix below, # to_dense does not work. const2 = loc * (tf.log(tf.sparse_add(tf.zeros_like(loc), X)) - tf.digamma(loc_times_scale)) \ - one_minus_loc * (tf.digamma(one_minus_loc_times_scale) + tf.log(const1)) \ + tf.digamma(scale) else: const1 = tf.log(X / (1 - X)) const2 = loc * (tf.log(X) - tf.digamma(loc_times_scale))\ - one_minus_loc * (tf.digamma(one_minus_loc_times_scale) + tf.log(const1)) \ + tf.digamma(scale) const3 = scale * (- tf.square(loc) * tf.polygamma(scalar_one, loc_times_scale)\ + tf.polygamma(scalar_one, scale)\ - tf.polygamma(scalar_one, one_minus_loc_times_scale) * tf.square(one_minus_loc)) const = scale * (const2 + const3) return const
def _weight_hessian_aa( self, X, loc, scale, ): one_minus_loc = 1 - loc loc_times_scale = loc * scale one_minus_loc_times_scale = one_minus_loc * scale if isinstance(X, tf.SparseTensor): # Using the dense matrix of the location model to serve the correct shapes for the sparse X. const1 = tf.sparse_add( tf.zeros_like(loc), X).__div__(-tf.sparse.add(X, -tf.ones_like(loc))) # Adding tf1.zeros_like(loc) is a hack to avoid bug thrown by log on sparse matrix below, # to_dense does not work. else: const1 = tf.log(X / (tf.ones_like(X) - X)) const2 = (1 - 2 * loc) * (-tf.digamma(loc_times_scale) + tf.digamma(one_minus_loc_times_scale) + const1) const3 = loc * one_minus_loc_times_scale * ( -tf.polygamma(tf.ones_like(loc), loc_times_scale) - tf.polygamma(tf.ones_like(loc), one_minus_loc_times_scale)) const = loc * one_minus_loc_times_scale * (const2 + const3) return const
def to_sd(self, alpha): """ :param alpha: (Tensor) :return: sigma (Tensor) """ _one = K.cast(1, dtype=alpha.dtype) d1 = self.latent_dim - 1 var = (T.polygamma(_one, alpha[:, :d1]) + T.polygamma(_one, alpha[:, -1:])) sigma = T.sqrt(var) return sigma
def nuStep(cls, nu, n, delta, p=1.): three = tf.constant(3., dtype=nu.dtype) for i in range(2): w = (nu+p)/(nu+delta) fp = (-tf.digamma(nu/2) + tf.log(nu/2) + 1./n*tf.reduce_sum(tf.log((nu+p)/(nu+delta)) - w, axis=0) + 1 + tf.digamma((p+nu)/2) - tf.log((p+nu)/2)) fpp = (tf.polygamma(three, nu/2)/2. + 1./nu + tf.polygamma(three, (p+nu)/2)/2. - 1./(nu+p) + 1./n*tf.reduce_sum((delta-p)/(nu+delta)**2*(w-1), axis=0)) nu = nu + fp/fpp return(nu)
def q_z_x(name, x, K, reuse=False): with tf.variable_scope('q_z_x' + name, reuse=reuse): # h1 = tf.nn.relu(tf.layers.dense(x, units=h_dim[1])) h1 = x z_Ralpha1 = tf.layers.dense(h1, units=K, kernel_initializer=tf.random_normal_initializer(0, 0.01)) z_Ralpha1 = max_m_grad(min_z_alpha_rate, z_Ralpha1) # z_Ralpha1 = tf.maximum(min_z_alpha_rate, z_Ralpha1) z_alpha1 = tf.nn.softplus(z_Ralpha1) z_Rbeta1 = tf.layers.dense(h1, units=K, kernel_initializer=tf.random_normal_initializer(0, 0.01)) z_Rbeta1 = max_m_grad(min_z_beta_rate, z_Rbeta1) # z_Rbeta1 = tf.maximum(min_z_beta_rate, z_Rbeta1) z_beta1 = tf.nn.softplus(z_Rbeta1) # z_beta1 = min_m_grad(z_alpha1 / min_mean, z_beta1) if MethodName == 'GO': z_hat1s = tf.random_gamma([1], tf.stop_gradient(z_alpha1), 1.) z_hat1s = tf.maximum(min_z, tf.squeeze(z_hat1s, 0)) Grad_z_alpha1 = GO_Gamma_v2(tf.stop_gradient(z_hat1s), tf.stop_gradient(z_alpha1)) z_hat1 = z_alpha1 * tf.stop_gradient(Grad_z_alpha1) - \ tf.stop_gradient(z_alpha1 * Grad_z_alpha1) + \ tf.stop_gradient(z_hat1s) z1_Fcorr = tf.zeros([1]) if MethodName == 'GRep': posi0 = tf.polygamma(tf.constant(0,dtype=tf.float32),z_alpha1) posi1 = tf.polygamma(tf.constant(1,dtype=tf.float32),z_alpha1) z_hat1s = tf.random_gamma([1], tf.stop_gradient(z_alpha1), 1.) z_hat1s = tf.maximum(min_z, tf.squeeze(z_hat1s, 0)) epsilo = tf.stop_gradient( (tf.log(z_hat1s)-posi0)/tf.maximum((tf.pow(posi1,0.5)),1e-5) ) log_z_hat1 = epsilo*tf.pow(posi1,0.5)+posi0 z_hat1 = tf.exp( log_z_hat1 ) z1_Fcorr = tf.reduce_sum( - tf.lgamma(z_alpha1) + (z_alpha1-1.)*log_z_hat1 - z_hat1 + log_z_hat1 + 0.5 * tf.log( posi1 ) ) if MethodName == 'RSVI': lambda_z1 = tf.squeeze(tf.random_gamma([1], z_alpha1 + Bf, 1.), 0) lambda_z1 = tf.stop_gradient(tf.maximum(min_z, lambda_z1)) z_hat1, z1_Fcorr = reject_h_boosted(lambda_z1, z_alpha1) z1 = z_hat1 / z_beta1 # z1 = tf.maximum(min_z, z1) z1 = max_m_grad(min_z, z1) return z1, z_alpha1, z_beta1, z1_Fcorr
def q_W(name, V, K, reuse=False): with tf.variable_scope('q_W' + name, reuse=reuse): W_aW = tf.get_variable("W_aW", [V, K], tf.float32, tf.random_uniform_initializer(0.1, 10)) RW_aW = max_m_grad(min_W_alpha_rate, W_aW) # RW_aW = tf.maximum(min_W_alpha_rate, W_aW) W_alpha = tf.nn.softplus(RW_aW) W_bW = tf.get_variable("W_bW", [V, K], tf.float32, tf.random_uniform_initializer(0.1, 10)) RW_bW = max_m_grad(min_W_beta_rate, W_bW) # RW_bW = tf.maximum(min_W_beta_rate, W_bW) W_beta = tf.nn.softplus(RW_bW) # W_beta = tf.nn.softplus(W_bW) # W_beta = min_m_grad(W_alpha / min_mean, W_beta) if MethodName == 'GO': W_hat1s = tf.random_gamma([1], tf.stop_gradient(W_alpha), 1.) W_hat1s = tf.maximum(min_W, tf.squeeze(W_hat1s, 0)) Grad_W_alpha1 = GO_Gamma_v2(tf.stop_gradient(W_hat1s), tf.stop_gradient(W_alpha)) W_hat1 = W_alpha * tf.stop_gradient(Grad_W_alpha1) - \ tf.stop_gradient(W_alpha * Grad_W_alpha1) + \ tf.stop_gradient(W_hat1s) W1_Fcorr = tf.zeros([1]) if MethodName == 'GRep': posi0 = tf.polygamma(tf.constant(0,dtype=tf.float32),W_alpha) posi1 = tf.polygamma(tf.constant(1,dtype=tf.float32),W_alpha) W_hat1s = tf.random_gamma([1], tf.stop_gradient(W_alpha), 1.) W_hat1s = tf.maximum(min_W, tf.squeeze(W_hat1s, 0)) epsilo = tf.stop_gradient( (tf.log(W_hat1s)-posi0)/tf.maximum((tf.pow(posi1,0.5)),1e-8) ) log_W_hat1 = epsilo*tf.pow(posi1,0.5)+posi0 W_hat1 = tf.exp( log_W_hat1 ) W1_Fcorr = tf.reduce_sum( - tf.lgamma(W_alpha) + (W_alpha-1.)*log_W_hat1 - W_hat1 + log_W_hat1 + 0.5 * tf.log( posi1 ) ) if MethodName == 'RSVI': lambda_W1 = tf.squeeze(tf.random_gamma([1], W_alpha + Bf, 1.), 0) lambda_W1 = tf.stop_gradient(tf.maximum(min_W, lambda_W1)) W_hat1, W1_Fcorr = reject_h_boosted(lambda_W1, W_alpha) W = W_hat1 / W_beta # W = tf.maximum(min_W, W) W = max_m_grad(min_W, W) return W, W_alpha, W_beta, W1_Fcorr
def fitGamma(cls, tau): alpha = 0.5 / ( tf.log(tf.reduce_mean(tau)) + 1e-6 # added due to numerical instability - tf.reduce_mean(tf.log(tau))) for i in range(20): alpha = ( 1. / (1. / alpha + (tf.reduce_mean(tf.log(tau)) - tf.log(tf.reduce_mean(tau)) + tf.log(alpha) - tf.digamma(alpha)) / (alpha**2 * (1. / alpha - tf.polygamma(tf.ones_like(alpha), alpha))))) beta = alpha / tf.reduce_mean(tau) return (alpha, beta)
def autoencoder(x_hat, x, dim_img, dim_z, n_hidden, keep_prob, last_term, Component_Count): # encoding mu1, sigma1, mix1 = Create_Encoder_MNIST(x_hat, n_hidden, dim_z, keep_prob, "encoder1") mu2, sigma2, mix2 = Create_Encoder_MNIST(x_hat, n_hidden, dim_z, keep_prob, "encoder2") mu3, sigma3, mix3 = Create_Encoder_MNIST(x_hat, n_hidden, dim_z, keep_prob, "encoder3") mu4, sigma4, mix4 = Create_Encoder_MNIST(x_hat, n_hidden, dim_z, keep_prob, "encoder4") mu5, sigma5, mix5 = Create_Encoder_MNIST(x_hat, n_hidden, dim_z, keep_prob, "encoder5") mu6, sigma6, mix6 = Create_Encoder_MNIST(x_hat, n_hidden, dim_z, keep_prob, "encoder6") z1 = distributions.Normal(loc=mu1, scale=sigma1) z2 = distributions.Normal(loc=mu2, scale=sigma2) z3 = distributions.Normal(loc=mu3, scale=sigma3) z4 = distributions.Normal(loc=mu4, scale=sigma4) z5 = distributions.Normal(loc=mu5, scale=sigma5) z6 = distributions.Normal(loc=mu6, scale=sigma6) p = 0.5 # a = p / (1.0-p) ard_init = -10. dropout_a = tf.get_variable("dropout", shape=[1], initializer=tf.constant_initializer(ard_init)) # Dropout of components m1 = np.ones(batch_size) s1 = np.zeros(batch_size) dropout_a = tf.cast(dropout_a, tf.float64) dropout_dis = distributions.Normal(loc=m1, scale=dropout_a) dropout_samples = dropout_dis.sample(sample_shape=(6)) dropout_samples = tf.transpose(dropout_samples) dropout_samples = tf.cast(dropout_samples, tf.float32) dropout_samples = tf.clip_by_value(dropout_samples, 1e-8, 1 - 1e-8) sum1 = mix1 + mix2 + mix3 + mix4+mix6+mix5 mix1 = mix1 / sum1 mix2 = mix2 / sum1 mix3 = mix3 / sum1 mix4 = mix4 / sum1 mix5 = mix5 / sum1 mix6 = mix6 / sum1 mix = tf.concat([mix1, mix2, mix3, mix4,mix5,mix6], 1) mix_parameters = mix dist = tf.distributions.Dirichlet(mix) mix_samples = dist.sample() mix = mix_samples mix_dropout1 = dropout_samples[:, 0:1] * mix_samples[:, 0:1] mix_dropout2 = dropout_samples[:, 1:2] * mix_samples[:, 1:2] mix_dropout3 = dropout_samples[:, 2:3] * mix_samples[:, 2:3] mix_dropout4 = dropout_samples[:, 3:4] * mix_samples[:, 3:4] mix_dropout5 = dropout_samples[:, 4:5] * mix_samples[:, 4:5] mix_dropout6 = dropout_samples[:, 5:6] * mix_samples[:, 5:6] sum1 = mix_dropout1 + mix_dropout2 + mix_dropout3 + mix_dropout4+mix_dropout5+mix_dropout6 mix_dropout1 = mix_dropout1 / sum1 mix_dropout2 = mix_dropout2 / sum1 mix_dropout3 = mix_dropout3 / sum1 mix_dropout4 = mix_dropout4 / sum1 mix_dropout5 = mix_dropout5 / sum1 mix_dropout6 = mix_dropout6 / sum1 # sampling by re-parameterization technique # z = mu + sigma * tf.random_normal(tf.shape(mu), 0, 1, dtype=tf.float32) z1_samples = z1.sample() z2_samples = z2.sample() z3_samples = z3.sample() z4_samples = z4.sample() z5_samples = z5.sample() z6_samples = z6.sample() ttf = [] ttf.append(z1_samples) ttf.append(z2_samples) ttf.append(z3_samples) ttf.append(z4_samples) ttf.append(z5_samples) ttf.append(z6_samples) dHSIC_Value = dHSIC(ttf) # decoding y1 = Create_SubDecoder(z1_samples, n_hidden, dim_img, keep_prob, "decoder1") y2 = Create_SubDecoder(z2_samples, n_hidden, dim_img, keep_prob, "decoder2") y3 = Create_SubDecoder(z3_samples, n_hidden, dim_img, keep_prob, "decoder3") y4 = Create_SubDecoder(z4_samples, n_hidden, dim_img, keep_prob, "decoder4") y5 = Create_SubDecoder(z4_samples, n_hidden, dim_img, keep_prob, "decoder5") y6 = Create_SubDecoder(z4_samples, n_hidden, dim_img, keep_prob, "decoder6") # dropout out y1 = y1 * mix_dropout1 y2 = y2 * mix_dropout2 y3 = y3 * mix_dropout3 y4 = y4 * mix_dropout4 y5 = y5 * mix_dropout5 y6 = y6 * mix_dropout6 y = y1 + y2 + y3 + y4+y5+y6 output = Create_FinalDecoder(y, n_hidden, dim_img, keep_prob, "final") y = output m1 = np.zeros(dim_z, dtype=np.float32) m1[:] = 0 v1 = np.zeros(dim_z, dtype=np.float32) v1[:] = 1 # p_z1 = distributions.Normal(loc=np.zeros(dim_z, dtype=np.float32), # scale=np.ones(dim_z, dtype=np.float32)) p_z1 = distributions.Normal(loc=m1, scale=v1) m2 = np.zeros(dim_z, dtype=np.float32) m2[:] = 0 v2 = np.zeros(dim_z, dtype=np.float32) v2[:] = 1 p_z2 = distributions.Normal(loc=m2, scale=v2) m3 = np.zeros(dim_z, dtype=np.float32) m3[:] = 0 v3 = np.zeros(dim_z, dtype=np.float32) v3[:] = 1 p_z3 = distributions.Normal(loc=m3, scale=v3) m4 = np.zeros(dim_z, dtype=np.float32) m4[:] = 0 v4 = np.zeros(dim_z, dtype=np.float32) v4[:] = 1 p_z4 = distributions.Normal(loc=m4, scale=v4) kl1 = tf.reduce_mean(tf.reduce_sum(distributions.kl_divergence(z1, p_z1), 1)) kl2 = tf.reduce_mean(tf.reduce_sum(distributions.kl_divergence(z2, p_z2), 1)) kl3 = tf.reduce_mean(tf.reduce_sum(distributions.kl_divergence(z3, p_z3), 1)) kl4 = tf.reduce_mean(tf.reduce_sum(distributions.kl_divergence(z4, p_z4), 1)) kl5 = tf.reduce_mean(tf.reduce_sum(distributions.kl_divergence(z5, p_z4), 1)) kl6 = tf.reduce_mean(tf.reduce_sum(distributions.kl_divergence(z6, p_z4), 1)) KL_divergence = (kl1 + kl2 + kl3 + kl4+kl5+kl6) / 6.0 # loss marginal_likelihood = tf.reduce_sum(x * tf.log(y) + (1 - x) * tf.log(1 - y), 1) marginal_likelihood = tf.reduce_mean(marginal_likelihood) # KL divergence between two Dirichlet distributions a1 = tf.clip_by_value(mix_parameters, 0.1, 0.8) a2 = tf.constant((0.17, 0.17, 0.17, 0.17,0.17,0.17), shape=(batch_size, 6)) r = tf.reduce_sum((a1 - a2) * (tf.polygamma(0.0, a1) - tf.polygamma(0.0, 1)), axis=1) a = tf.lgamma(tf.reduce_sum(a1, axis=1)) - tf.lgamma(tf.reduce_sum(a2, axis=1)) + tf.reduce_sum(tf.lgamma(a2), axis=-1) - tf.reduce_sum( tf.lgamma(a1), axis=1) + r kl = a kl = tf.reduce_mean(kl) p1 = 1 p2 = 1 p4 = 1 ELBO = marginal_likelihood - KL_divergence * p2 loss = -ELBO + kl * p1 + p4 * dHSIC_Value + KL_Dropout2(dropout_a) z = z1_samples return y, z, loss, -marginal_likelihood, KL_divergence,dropout_samples
def autoencoder(x_hat, x, dim_img, dim_z, n_hidden, keep_prob, last_term, Component_Count): # encoding mu1, sigma1, mix1 = Create_Encoder_MNIST(x_hat, n_hidden, dim_z, keep_prob, "encoder1") z1 = distributions.Normal(loc=mu1, scale=sigma1) mu2, sigma2, mix2 = Create_Encoder_MNIST(x_hat, n_hidden, dim_z, keep_prob, "encoder2") z2 = distributions.Normal(loc=mu2, scale=sigma2) mu3, sigma3, mix3 = Create_Encoder_MNIST(x_hat, n_hidden, dim_z, keep_prob, "encoder3") z3 = distributions.Normal(loc=mu3, scale=sigma3) mu4, sigma4, mix4 = Create_Encoder_MNIST(x_hat, n_hidden, dim_z, keep_prob, "encoder4") z4 = distributions.Normal(loc=mu4, scale=sigma4) mu5, sigma5, mix5 = Create_Encoder_MNIST(x_hat, n_hidden, dim_z, keep_prob, "encoder5") mu6, sigma6, mix6 = Create_Encoder_MNIST(x_hat, n_hidden, dim_z, keep_prob, "encoder6") mu7, sigma7, mix7 = Create_Encoder_MNIST(x_hat, n_hidden, dim_z, keep_prob, "encoder7") mu8, sigma8, mix8 = Create_Encoder_MNIST(x_hat, n_hidden, dim_z, keep_prob, "encoder8") mu9, sigma9, mix9 = Create_Encoder_MNIST(x_hat, n_hidden, dim_z, keep_prob, "encoder9") mu10, sigma10, mix10 = Create_Encoder_MNIST(x_hat, n_hidden, dim_z, keep_prob, "encoder10") z1 = distributions.Normal(loc=mu1, scale=sigma1) z2 = distributions.Normal(loc=mu2, scale=sigma2) z3 = distributions.Normal(loc=mu3, scale=sigma3) z4 = distributions.Normal(loc=mu4, scale=sigma4) z5 = distributions.Normal(loc=mu5, scale=sigma5) z6 = distributions.Normal(loc=mu6, scale=sigma6) z7 = distributions.Normal(loc=mu7, scale=sigma7) z8 = distributions.Normal(loc=mu8, scale=sigma8) z9 = distributions.Normal(loc=mu9, scale=sigma9) z10 = distributions.Normal(loc=mu10, scale=sigma10) p = 0.5 #a = p / (1.0-p) ard_init = -10. dropout_a = tf.get_variable("dropout", shape=[1], initializer=tf.constant_initializer(ard_init)) #Dropout of components m1 = np.ones(batch_size) s1 = np.zeros(batch_size) dropout_a = tf.cast(dropout_a, tf.float64) dropout_dis = distributions.Normal(loc=m1, scale=dropout_a) dropout_samples = dropout_dis.sample(sample_shape=(10)) dropout_samples = tf.transpose(dropout_samples) dropout_samples = tf.cast(dropout_samples, tf.float32) dropout_samples = tf.clip_by_value(dropout_samples, 1e-8, 1 - 1e-8) mix1 = dropout_samples[:, 0:1] mix2 = dropout_samples[:, 1:2] mix3 = dropout_samples[:, 2:3] mix4 = dropout_samples[:, 3:4] mix5 = dropout_samples[:, 4:5] mix6 = dropout_samples[:, 5:6] mix7 = dropout_samples[:, 6:7] mix8 = dropout_samples[:, 7:8] mix9 = dropout_samples[:, 8:9] mix10 = dropout_samples[:, 9:10] sum1 = mix1 + mix2 + mix3 + mix4 + mix5 + mix6 + mix7 + mix8 + mix9 + mix10 mix1 = mix1 / sum1 mix2 = mix2 / sum1 mix3 = mix3 / sum1 mix4 = mix4 / sum1 mix5 = mix5 / sum1 mix6 = mix6 / sum1 mix7 = mix7 / sum1 mix8 = mix8 / sum1 mix9 = mix9 / sum1 mix10 = mix10 / sum1 mix = tf.concat( [mix1, mix2, mix3, mix4, mix5, mix6, mix7, mix8, mix9, mix10], 1) mix_parameters = mix dist = tf.distributions.Dirichlet(mix) mix_samples = dist.sample() mix = mix_samples # sampling by re-parameterization technique # z = mu + sigma * tf.random_normal(tf.shape(mu), 0, 1, dtype=tf.float32) z1_samples = z1.sample() z2_samples = z2.sample() z3_samples = z3.sample() z4_samples = z4.sample() z5_samples = z5.sample() z6_samples = z6.sample() z7_samples = z7.sample() z8_samples = z8.sample() z9_samples = z9.sample() z10_samples = z10.sample() ttf = [] ttf.append(z1_samples) ttf.append(z2_samples) ttf.append(z3_samples) ttf.append(z4_samples) ttf.append(z5_samples) ttf.append(z6_samples) ttf.append(z7_samples) ttf.append(z8_samples) ttf.append(z9_samples) ttf.append(z10_samples) ''' h1 = hsic_individual(z1_samples,z2_samples) h2 = hsic_individual(z1_samples,z3_samples) h3 = hsic_individual(z1_samples,z4_samples) h4 = hsic_individual(z2_samples,z3_samples) h5 = hsic_individual(z2_samples,z4_samples) h6 = hsic_individual(z3_samples,z4_samples) dHSIC_Value = h1+h2+h3+h4+h5+h6 ''' dHSIC_Value = dHSIC(ttf) #dHSIC_Value = last_term # decoding y1 = Create_SubDecoder(z1_samples, n_hidden, dim_img, keep_prob, "decoder1") #y1 = tf.clip_by_value(y1, 1e-8, 1 - 1e-8) y2 = Create_SubDecoder(z2_samples, n_hidden, dim_img, keep_prob, "decoder2") #y2 = tf.clip_by_value(y2, 1e-8, 1 - 1e-8) y3 = Create_SubDecoder(z3_samples, n_hidden, dim_img, keep_prob, "decoder3") #y3 = tf.clip_by_value(y3, 1e-8, 1 - 1e-8) y4 = Create_SubDecoder(z4_samples, n_hidden, dim_img, keep_prob, "decoder4") #y4 = tf.clip_by_value(y4, 1e-8, 1 - 1e-8) y5 = Create_SubDecoder(z5_samples, n_hidden, dim_img, keep_prob, "decoder5") #y5 = tf.clip_by_value(y5, 1e-8, 1 - 1e-8) y6 = Create_SubDecoder(z6_samples, n_hidden, dim_img, keep_prob, "decoder6") #y6 = tf.clip_by_value(y6, 1e-8, 1 - 1e-8) y7 = Create_SubDecoder(z7_samples, n_hidden, dim_img, keep_prob, "decoder7") #y7 = tf.clip_by_value(y7, 1e-8, 1 - 1e-8) y8 = Create_SubDecoder(z8_samples, n_hidden, dim_img, keep_prob, "decoder8") #y8 = tf.clip_by_value(y8, 1e-8, 1 - 1e-8) y9 = Create_SubDecoder(z9_samples, n_hidden, dim_img, keep_prob, "decoder9") #y9 = tf.clip_by_value(y9, 1e-8, 1 - 1e-8) y10 = Create_SubDecoder(z10_samples, n_hidden, dim_img, keep_prob, "decoder10") #y10 = tf.clip_by_value(y10, 1e-8, 1 - 1e-8) #dropout out y1 = y1 * mix_samples[:, 0:1] y2 = y2 * mix_samples[:, 1:2] y3 = y3 * mix_samples[:, 2:3] y4 = y4 * mix_samples[:, 3:4] y5 = y5 * mix_samples[:, 4:5] y6 = y6 * mix_samples[:, 5:6] y7 = y7 * mix_samples[:, 6:7] y8 = y8 * mix_samples[:, 7:8] y9 = y9 * mix_samples[:, 8:9] y10 = y10 * mix_samples[:, 9:10] y = y1 + y2 + y3 + y4 + y5 + y6 + y7 + y8 + y9 + y10 output = Create_FinalDecoder(y, n_hidden, dim_img, keep_prob, "final", reuse=False) #output = tf.clip_by_value(output, 1e-8, 1 - 1e-8) y = output m1 = np.zeros(dim_z, dtype=np.float32) m1[:] = 0 v1 = np.zeros(dim_z, dtype=np.float32) v1[:] = 1 # p_z1 = distributions.Normal(loc=np.zeros(dim_z, dtype=np.float32), # scale=np.ones(dim_z, dtype=np.float32)) p_z1 = distributions.Normal(loc=m1, scale=v1) m2 = np.zeros(dim_z, dtype=np.float32) m2[:] = 0 v2 = np.zeros(dim_z, dtype=np.float32) v2[:] = 1 p_z2 = distributions.Normal(loc=m2, scale=v2) m3 = np.zeros(dim_z, dtype=np.float32) m3[:] = 0 v3 = np.zeros(dim_z, dtype=np.float32) v3[:] = 1 p_z3 = distributions.Normal(loc=m3, scale=v3) m4 = np.zeros(dim_z, dtype=np.float32) m4[:] = 0 v4 = np.zeros(dim_z, dtype=np.float32) v4[:] = 1 p_z4 = distributions.Normal(loc=m4, scale=v4) kl1 = tf.reduce_mean( tf.reduce_sum(distributions.kl_divergence(z1, p_z1), 1)) kl2 = tf.reduce_mean( tf.reduce_sum(distributions.kl_divergence(z2, p_z2), 1)) kl3 = tf.reduce_mean( tf.reduce_sum(distributions.kl_divergence(z3, p_z3), 1)) kl4 = tf.reduce_mean( tf.reduce_sum(distributions.kl_divergence(z4, p_z4), 1)) kl5 = tf.reduce_mean( tf.reduce_sum(distributions.kl_divergence(z5, p_z4), 1)) kl6 = tf.reduce_mean( tf.reduce_sum(distributions.kl_divergence(z6, p_z4), 1)) kl7 = tf.reduce_mean( tf.reduce_sum(distributions.kl_divergence(z7, p_z4), 1)) kl8 = tf.reduce_mean( tf.reduce_sum(distributions.kl_divergence(z8, p_z4), 1)) kl9 = tf.reduce_mean( tf.reduce_sum(distributions.kl_divergence(z9, p_z4), 1)) kl10 = tf.reduce_mean( tf.reduce_sum(distributions.kl_divergence(z10, p_z4), 1)) kl1 = kl1 kl2 = kl2 kl3 = kl3 kl4 = kl4 kl5 = kl5 kl6 = kl6 kl7 = kl7 kl8 = kl8 kl9 = kl9 kl10 = kl10 KL_divergence = (kl1 + kl2 + kl3 + kl4 + kl5 + kl6 + kl7 + kl8 + kl9 + kl10) / 10.0 # loss marginal_likelihood = tf.reduce_sum( x * tf.log(y) + (1 - x) * tf.log(1 - y), 1) marginal_likelihood = tf.reduce_mean(marginal_likelihood) # KL divergence between two Dirichlet distributions a1 = mix_parameters a2 = tf.constant((0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1), shape=(batch_size, 10)) r = tf.reduce_sum( (a1 - a2) * (tf.polygamma(0.0, a1) - tf.polygamma(0.0, 1)), axis=1) a = tf.lgamma(tf.reduce_sum(a1, axis=1)) - tf.lgamma( tf.reduce_sum(a2, axis=1)) + tf.reduce_sum( tf.lgamma(a2), axis=-1) - tf.reduce_sum(tf.lgamma(a1), axis=1) + r kl = a kl = tf.reduce_mean(kl) p1 = 1 p2 = 1 p4 = 1 ELBO = marginal_likelihood - KL_divergence * p2 loss = -ELBO + kl * p1 + p4 * dHSIC_Value #diverse_KL_divergence z = z1_samples return y, z, loss, -marginal_likelihood, KL_divergence
def autoencoder(x_hat, x, dim_img, dim_z, n_hidden, keep_prob, last_term): # encoding mu1, sigma1, mix1 = Create_Celeba_Encoder(x_hat, 64, "encoder1") mu2, sigma2, mix2 = Create_Celeba_Encoder(x_hat, 64, "encoder2") mu3, sigma3, mix3 = Create_Celeba_Encoder(x_hat, 64, "encoder3") mu4, sigma4, mix4 = Create_Celeba_Encoder(x_hat, 64, "encoder4") z1 = distributions.Normal(loc=mu1, scale=sigma1) z2 = distributions.Normal(loc=mu2, scale=sigma2) z3 = distributions.Normal(loc=mu3, scale=sigma3) z4 = distributions.Normal(loc=mu4, scale=sigma4) p = 0.5 # a = p / (1.0-p) ard_init = 0.5 dropout_a = tf.get_variable("dropout", shape=[1], initializer=tf.constant_initializer(ard_init)) # Dropout of components m1 = np.ones(batch_size) s1 = np.zeros(batch_size) dropout_a = tf.cast(dropout_a, tf.float64) dropout_a = tf.clip_by_value(dropout_a, 0.2, 1) dropout_dis = distributions.Bernoulli(logits=None, probs=dropout_a) dropout_samples = dropout_dis.sample(sample_shape=(batch_size, 4)) dropout_samples = tf.reshape(dropout_samples, (batch_size, 4)) dropout_samples = tf.cast(dropout_samples, tf.float32) dropout_samples = tf.clip_by_value(dropout_samples, 1e-8, 1 - 1e-8) mix1 = mix1 * dropout_samples[:, 0:1] mix2 = mix2 * dropout_samples[:, 1:2] mix3 = mix3 * dropout_samples[:, 2:3] mix4 = mix4 * dropout_samples[:, 3:4] sum1 = mix1 + mix2 + mix3 + mix4 mix1 = mix1 / sum1 mix2 = mix2 / sum1 mix3 = mix3 / sum1 mix4 = mix4 / sum1 mix = tf.concat([mix1, mix2, mix3, mix4], 1) mix_parameters = mix dist = tf.distributions.Dirichlet(mix) mix_samples = dist.sample() mix = mix_samples # sampling by re-parameterization technique # z = mu + sigma * tf.random_normal(tf.shape(mu), 0, 1, dtype=tf.float32) z1_samples = z1.sample() z2_samples = z2.sample() z3_samples = z3.sample() z4_samples = z4.sample() ttf = [] ttf.append(z1_samples) ttf.append(z2_samples) ttf.append(z3_samples) ttf.append(z4_samples) dHSIC_Value = dHSIC(ttf) # decoding y1 = Create_Celeba_SubDecoder_(z1_samples, 64, "decoder1") y2 = Create_Celeba_SubDecoder_(z2_samples, 64, "decoder2") y3 = Create_Celeba_SubDecoder_(z3_samples, 64, "decoder3") y4 = Create_Celeba_SubDecoder_(z4_samples, 64, "decoder4") y1 = tf.reshape(y1, (-1, 8 * 8 * 256)) y2 = tf.reshape(y2, (-1, 8 * 8 * 256)) y3 = tf.reshape(y3, (-1, 8 * 8 * 256)) y4 = tf.reshape(y4, (-1, 8 * 8 * 256)) y1 = y1 * mix_samples[:, 0:1] y2 = y2 * mix_samples[:, 1:2] y3 = y3 * mix_samples[:, 2:3] y4 = y4 * mix_samples[:, 3:4] y1 = tf.reshape(y1, (batch_size, 8, 8, 256)) y2 = tf.reshape(y2, (batch_size, 8, 8, 256)) y3 = tf.reshape(y3, (batch_size, 8, 8, 256)) y4 = tf.reshape(y4, (batch_size, 8, 8, 256)) y = y1 + y2 + y3 + y4 y = Create_Celeba_Generator_(y, 64, "final") m1 = np.zeros(dim_z, dtype=np.float32) m1[:] = 0 v1 = np.zeros(dim_z, dtype=np.float32) v1[:] = 1 # p_z1 = distributions.Normal(loc=np.zeros(dim_z, dtype=np.float32), # scale=np.ones(dim_z, dtype=np.float32)) p_z1 = distributions.Normal(loc=m1, scale=v1) m2 = np.zeros(dim_z, dtype=np.float32) m2[:] = 0 v2 = np.zeros(dim_z, dtype=np.float32) v2[:] = 1 p_z2 = distributions.Normal(loc=m2, scale=v2) m3 = np.zeros(dim_z, dtype=np.float32) m3[:] = 0 v3 = np.zeros(dim_z, dtype=np.float32) v3[:] = 1 p_z3 = distributions.Normal(loc=m3, scale=v3) m4 = np.zeros(dim_z, dtype=np.float32) m4[:] = 0 v4 = np.zeros(dim_z, dtype=np.float32) v4[:] = 1 p_z4 = distributions.Normal(loc=m4, scale=v4) z = z1 mu = mu1 sigma = sigma1 epsilon = 1e-8 # additional loss reconstruction_loss = tf.reduce_mean( tf.reduce_sum(tf.square(x - y), [1, 2, 3])) # kl_divergence = tf.reduce_mean(- 0.5 * tf.reduce_sum(1 + sigma - tf.square(mu) - tf.exp(sigma), 1)) kl1 = tf.reduce_mean( tf.reduce_sum(distributions.kl_divergence(z1, p_z1), 1)) kl2 = tf.reduce_mean( tf.reduce_sum(distributions.kl_divergence(z2, p_z2), 1)) kl3 = tf.reduce_mean( tf.reduce_sum(distributions.kl_divergence(z3, p_z3), 1)) kl4 = tf.reduce_mean( tf.reduce_sum(distributions.kl_divergence(z4, p_z4), 1)) KL_divergence = (kl1 + kl2 + kl3 + kl4) / 4.0 # KL divergence between two Dirichlet distributions a1 = tf.clip_by_value(mix_parameters, 0.1, 0.8) a2 = tf.constant((0.25, 0.25, 0.25, 0.25), shape=(batch_size, 4)) r = tf.reduce_sum( (a1 - a2) * (tf.polygamma(0.0, a1) - tf.polygamma(0.0, 1)), axis=1) a = tf.lgamma(tf.reduce_sum(a1, axis=1)) - tf.lgamma( tf.reduce_sum(a2, axis=1)) + tf.reduce_sum( tf.lgamma(a2), axis=-1) - tf.reduce_sum(tf.lgamma(a1), axis=1) + r kl = a kl = tf.reduce_mean(kl) p1 = 0.1 loss = reconstruction_loss + KL_divergence * p1 + kl + dHSIC_Value marginal_likelihood = reconstruction_loss return y, z, loss, -marginal_likelihood, KL_divergence
def autoencoder(x_hat, x, dim_img, dim_z, n_hidden, keep_prob, last_term, dropout_in): # encoding mu1, sigma1, mix1 = Create_Encoder_MNIST(x_hat, n_hidden, dim_z, keep_prob, "encoder1") mu2, sigma2, mix2 = Create_Encoder_MNIST(x_hat, n_hidden, dim_z, keep_prob, "encoder2") mu3, sigma3, mix3 = Create_Encoder_MNIST(x_hat, n_hidden, dim_z, keep_prob, "encoder3") mu4, sigma4, mix4 = Create_Encoder_MNIST(x_hat, n_hidden, dim_z, keep_prob, "encoder4") z1 = distributions.Normal(loc=mu1, scale=sigma1) z2 = distributions.Normal(loc=mu2, scale=sigma2) z3 = distributions.Normal(loc=mu3, scale=sigma3) z4 = distributions.Normal(loc=mu4, scale=sigma4) init_min = 0.1 init_max = 0.1 init_min = (np.log(init_min) - np.log(1. - init_min)) init_max = (np.log(init_max) - np.log(1. - init_max)) dropout_a = tf.get_variable(name='dropout', shape=None, initializer=tf.random_uniform((1, ), init_min, init_max), dtype=tf.float32, trainable=True) dropout_p = tf.nn.sigmoid(dropout_a) dropout_b = 1 - dropout_p dropout_log = tf.log(dropout_p) dropout_log2 = tf.log(dropout_b) cats_range = np.zeros((batch_size * 4, 2)) cats_range[:, 0] = 0 cats_range[:, 1] = 1 dropout_samples = gumbel_softmax_sample3(dropout_log, dropout_log2, cats_range, [batch_size * 4]) dropout_samples = tf.reshape(dropout_samples, (-1, 4)) dropout_regularizer = dropout_p * tf.log(dropout_p) dropout_regularizer += (1. - dropout_p) * tf.log(1. - dropout_p) dropout_regularizer *= dropout_regularizer * 10 * -1 dropout_regularizer = tf.clip_by_value(dropout_regularizer, -10, 0) sum1 = mix1 + mix2 + mix3 + mix4 mix1 = mix1 / sum1 mix2 = mix2 / sum1 mix3 = mix3 / sum1 mix4 = mix4 / sum1 mix = tf.concat([mix1, mix2, mix3, mix4], 1) mix_parameters = mix dist = tf.distributions.Dirichlet(mix) mix_samples = dist.sample() mix = mix_samples mix_dropout1 = dropout_samples[:, 0:1] * mix_samples[:, 0:1] mix_dropout2 = dropout_samples[:, 1:2] * mix_samples[:, 1:2] mix_dropout3 = dropout_samples[:, 2:3] * mix_samples[:, 2:3] mix_dropout4 = dropout_samples[:, 3:4] * mix_samples[:, 3:4] sum1 = mix_dropout1 + mix_dropout2 + mix_dropout3 + mix_dropout4 mix_dropout1 = mix_dropout1 / sum1 mix_dropout2 = mix_dropout2 / sum1 mix_dropout3 = mix_dropout3 / sum1 mix_dropout4 = mix_dropout4 / sum1 # sampling by re-parameterization technique # z = mu + sigma * tf.random_normal(tf.shape(mu), 0, 1, dtype=tf.float32) z1_samples = z1.sample() z2_samples = z2.sample() z3_samples = z3.sample() z4_samples = z4.sample() ttf = [] ttf.append(z1_samples) ttf.append(z2_samples) ttf.append(z3_samples) ttf.append(z4_samples) dHSIC_Value = dHSIC(ttf) # decoding y1 = Create_SubDecoder(z1_samples, n_hidden, dim_img, keep_prob, "decoder1") y2 = Create_SubDecoder(z2_samples, n_hidden, dim_img, keep_prob, "decoder2") y3 = Create_SubDecoder(z3_samples, n_hidden, dim_img, keep_prob, "decoder3") y4 = Create_SubDecoder(z4_samples, n_hidden, dim_img, keep_prob, "decoder4") # dropout out y1 = y1 * mix_dropout1 y2 = y2 * mix_dropout2 y3 = y3 * mix_dropout3 y4 = y4 * mix_dropout4 y = y1 + y2 + y3 + y4 output = Create_FinalDecoder(y, n_hidden, dim_img, keep_prob, "final") y = output m1 = np.zeros(dim_z, dtype=np.float32) m1[:] = 0 v1 = np.zeros(dim_z, dtype=np.float32) v1[:] = 1 # p_z1 = distributions.Normal(loc=np.zeros(dim_z, dtype=np.float32), # scale=np.ones(dim_z, dtype=np.float32)) p_z1 = distributions.Normal(loc=m1, scale=v1) m2 = np.zeros(dim_z, dtype=np.float32) m2[:] = 0 v2 = np.zeros(dim_z, dtype=np.float32) v2[:] = 1 p_z2 = distributions.Normal(loc=m2, scale=v2) m3 = np.zeros(dim_z, dtype=np.float32) m3[:] = 0 v3 = np.zeros(dim_z, dtype=np.float32) v3[:] = 1 p_z3 = distributions.Normal(loc=m3, scale=v3) m4 = np.zeros(dim_z, dtype=np.float32) m4[:] = 0 v4 = np.zeros(dim_z, dtype=np.float32) v4[:] = 1 p_z4 = distributions.Normal(loc=m4, scale=v4) kl1 = tf.reduce_mean( tf.reduce_sum(distributions.kl_divergence(z1, p_z1), 1)) kl2 = tf.reduce_mean( tf.reduce_sum(distributions.kl_divergence(z2, p_z2), 1)) kl3 = tf.reduce_mean( tf.reduce_sum(distributions.kl_divergence(z3, p_z3), 1)) kl4 = tf.reduce_mean( tf.reduce_sum(distributions.kl_divergence(z4, p_z4), 1)) KL_divergence = (kl1 + kl2 + kl3 + kl4) / 4.0 # loss marginal_likelihood = tf.reduce_sum( x * tf.log(y) + (1 - x) * tf.log(1 - y), 1) marginal_likelihood = tf.reduce_mean(marginal_likelihood) # KL divergence between two Dirichlet distributions a1 = tf.clip_by_value(mix_parameters, 0.1, 0.8) a2 = tf.constant((0.25, 0.25, 0.25, 0.25), shape=(batch_size, 4)) r = tf.reduce_sum( (a1 - a2) * (tf.polygamma(0.0, a1) - tf.polygamma(0.0, 1)), axis=1) a = tf.lgamma(tf.reduce_sum(a1, axis=1)) - tf.lgamma( tf.reduce_sum(a2, axis=1)) + tf.reduce_sum( tf.lgamma(a2), axis=-1) - tf.reduce_sum(tf.lgamma(a1), axis=1) + r kl = a kl = tf.reduce_mean(kl) p1 = 1 p2 = 1 p4 = 1 ELBO = marginal_likelihood - KL_divergence * p2 loss = -ELBO + kl * p1 + p4 * dHSIC_Value + dropout_regularizer z = z1_samples return y, z, loss, -marginal_likelihood, dropout_regularizer, dropout_p, dropout_samples
import tensorflow as tf """tf.polygamma(a,x,name=None) 功能:计算psi^{(a)}(x),psi^{(a)}(x) = ({d^a}/{dx^a})*psi(x),psi即为polygamma。 输入:x为张量,可以为`float32`, `float64`类型。a=tf.constant(1,tf.float64) """ a = tf.constant(1, tf.float64) x = tf.constant([[1, 2, 3, 4]], tf.float64) z = tf.polygamma(a, x) sess = tf.Session() print(sess.run(z)) sess.close() # z==>[[1.64493407 0.64493407 0.39493407 0.28382296]]
def autoencoder(x_hat, x, dim_img, dim_z, n_hidden, keep_prob, last_term): # encoding mu1, sigma1, mix1 = Create_Celeba_Encoder(x_hat, 64, "encoder1") mu2, sigma2, mix2 = Create_Celeba_Encoder(x_hat, 64, "encoder2") mu3, sigma3, mix3 = Create_Celeba_Encoder(x_hat, 64, "encoder3") mu4, sigma4, mix4 = Create_Celeba_Encoder(x_hat, 64, "encoder4") mu5, sigma5, mix5 = Create_Celeba_Encoder(x_hat, 64, "encoder5") mu6, sigma6, mix6 = Create_Celeba_Encoder(x_hat, 64, "encoder6") z1 = distributions.Normal(loc=mu1, scale=sigma1) z2 = distributions.Normal(loc=mu2, scale=sigma2) z3 = distributions.Normal(loc=mu3, scale=sigma3) z4 = distributions.Normal(loc=mu4, scale=sigma4) z5 = distributions.Normal(loc=mu5, scale=sigma5) z6 = distributions.Normal(loc=mu6, scale=sigma6) init_min = 0.1 init_max = 0.1 init_min = (np.log(init_min) - np.log(1. - init_min)) init_max = (np.log(init_max) - np.log(1. - init_max)) dropout_a = tf.get_variable(name='dropout', shape=None, initializer=tf.random_uniform((1, ), init_min, init_max), dtype=tf.float32, trainable=True) dropout_p = tf.nn.sigmoid(dropout_a) dropout_b = 1 - dropout_p dropout_log = tf.log(dropout_p) dropout_log2 = tf.log(dropout_b) cats_range = np.zeros((batch_size * 6, 2)) cats_range[:, 0] = 0 cats_range[:, 1] = 1 dropout_samples = gumbel_softmax_sample3(dropout_log, dropout_log2, cats_range, [batch_size * 6]) dropout_samples = tf.reshape(dropout_samples, (-1, 6)) dropout_regularizer = dropout_p * tf.log(dropout_p) dropout_regularizer += (1. - dropout_p) * tf.log(1. - dropout_p) dropout_regularizer *= dropout_regularizer * 10 * -1 dropout_regularizer = tf.clip_by_value(dropout_regularizer, -10, 0) mix1 = mix1 * dropout_samples[:, 0:1] mix2 = mix2 * dropout_samples[:, 1:2] mix3 = mix3 * dropout_samples[:, 2:3] mix4 = mix4 * dropout_samples[:, 3:4] mix5 = mix5 * dropout_samples[:, 4:5] mix6 = mix6 * dropout_samples[:, 5:6] sum1 = mix1 + mix2 + mix3 + mix4 + mix5 + mix6 mix1 = mix1 / sum1 mix2 = mix2 / sum1 mix3 = mix3 / sum1 mix4 = mix4 / sum1 mix5 = mix5 / sum1 mix6 = mix6 / sum1 sum1 = mix1 + mix2 + mix3 + mix4 + mix5 + mix6 mix1 = mix1 / sum1 mix2 = mix2 / sum1 mix3 = mix3 / sum1 mix4 = mix4 / sum1 mix5 = mix5 / sum1 mix6 = mix6 / sum1 mix = tf.concat([mix1, mix2, mix3, mix4, mix5, mix6], 1) mix_parameters = mix dist = tf.distributions.Dirichlet(mix) mix_samples = dist.sample() mix = mix_samples # sampling by re-parameterization technique # z = mu + sigma * tf.random_normal(tf.shape(mu), 0, 1, dtype=tf.float32) z1_samples = z1.sample() z2_samples = z2.sample() z3_samples = z3.sample() z4_samples = z4.sample() z5_samples = z5.sample() z6_samples = z6.sample() ttf = [] ttf.append(z1_samples) ttf.append(z2_samples) ttf.append(z3_samples) ttf.append(z4_samples) ttf.append(z5_samples) ttf.append(z6_samples) dHSIC_Value = dHSIC(ttf) # decoding y1 = Create_Celeba_SubDecoder_(z1_samples, 64, "decoder1") y2 = Create_Celeba_SubDecoder_(z2_samples, 64, "decoder2") y3 = Create_Celeba_SubDecoder_(z3_samples, 64, "decoder3") y4 = Create_Celeba_SubDecoder_(z4_samples, 64, "decoder4") y5 = Create_Celeba_SubDecoder_(z5_samples, 64, "decoder5") y6 = Create_Celeba_SubDecoder_(z6_samples, 64, "decoder6") y1 = tf.reshape(y1, (-1, 8 * 8 * 256)) y2 = tf.reshape(y2, (-1, 8 * 8 * 256)) y3 = tf.reshape(y3, (-1, 8 * 8 * 256)) y4 = tf.reshape(y4, (-1, 8 * 8 * 256)) y5 = tf.reshape(y5, (-1, 8 * 8 * 256)) y6 = tf.reshape(y6, (-1, 8 * 8 * 256)) # dropout out y1 = y1 * mix_samples[:, 0:1] y2 = y2 * mix_samples[:, 1:2] y3 = y3 * mix_samples[:, 2:3] y4 = y4 * mix_samples[:, 3:4] y5 = y5 * mix_samples[:, 4:5] y6 = y6 * mix_samples[:, 5:6] y1 = tf.reshape(y1, (batch_size, 8, 8, 256)) y2 = tf.reshape(y2, (batch_size, 8, 8, 256)) y3 = tf.reshape(y3, (batch_size, 8, 8, 256)) y4 = tf.reshape(y4, (batch_size, 8, 8, 256)) y5 = tf.reshape(y5, (batch_size, 8, 8, 256)) y6 = tf.reshape(y6, (batch_size, 8, 8, 256)) y = y1 + y2 + y3 + y4 + y5 + y6 y = Create_Celeba_Generator_(y, 64, "final") m1 = np.zeros(dim_z, dtype=np.float32) m1[:] = 0 v1 = np.zeros(dim_z, dtype=np.float32) v1[:] = 1 # p_z1 = distributions.Normal(loc=np.zeros(dim_z, dtype=np.float32), # scale=np.ones(dim_z, dtype=np.float32)) p_z1 = distributions.Normal(loc=m1, scale=v1) m2 = np.zeros(dim_z, dtype=np.float32) m2[:] = 0 v2 = np.zeros(dim_z, dtype=np.float32) v2[:] = 1 p_z2 = distributions.Normal(loc=m2, scale=v2) m3 = np.zeros(dim_z, dtype=np.float32) m3[:] = 0 v3 = np.zeros(dim_z, dtype=np.float32) v3[:] = 1 p_z3 = distributions.Normal(loc=m3, scale=v3) m4 = np.zeros(dim_z, dtype=np.float32) m4[:] = 0 v4 = np.zeros(dim_z, dtype=np.float32) v4[:] = 1 p_z4 = distributions.Normal(loc=m4, scale=v4) z = z1 mu = mu1 sigma = sigma1 epsilon = 1e-8 # additional loss reconstruction_loss = tf.reduce_mean( tf.reduce_sum(tf.square(x - y), [1, 2, 3])) # kl_divergence = tf.reduce_mean(- 0.5 * tf.reduce_sum(1 + sigma - tf.square(mu) - tf.exp(sigma), 1)) kl1 = tf.reduce_mean( tf.reduce_sum(distributions.kl_divergence(z1, p_z1), 1)) kl2 = tf.reduce_mean( tf.reduce_sum(distributions.kl_divergence(z2, p_z2), 1)) kl3 = tf.reduce_mean( tf.reduce_sum(distributions.kl_divergence(z3, p_z3), 1)) kl4 = tf.reduce_mean( tf.reduce_sum(distributions.kl_divergence(z4, p_z4), 1)) kl5 = tf.reduce_mean( tf.reduce_sum(distributions.kl_divergence(z5, p_z4), 1)) kl6 = tf.reduce_mean( tf.reduce_sum(distributions.kl_divergence(z6, p_z4), 1)) kl = kl1 + kl2 + kl3 + kl4 + kl5 + kl6 kl_divergence = kl / 6.0 # KL divergence between two Dirichlet distributions a1 = tf.clip_by_value(mix_parameters, 0.1, 0.8) a2 = tf.constant((0.17, 0.17, 0.17, 0.17, 0.17, 0.17), shape=(batch_size, 6)) r = tf.reduce_sum( (a1 - a2) * (tf.polygamma(0.0, a1) - tf.polygamma(0.0, 1)), axis=1) a = tf.lgamma(tf.reduce_sum(a1, axis=1)) - tf.lgamma( tf.reduce_sum(a2, axis=1)) + tf.reduce_sum( tf.lgamma(a2), axis=-1) - tf.reduce_sum(tf.lgamma(a1), axis=1) + r kl = a kl = tf.reduce_mean(kl) p1 = 1 loss = reconstruction_loss + kl_divergence * p1 + kl + dHSIC_Value + dropout_regularizer KL_divergence = kl_divergence marginal_likelihood = reconstruction_loss return y, z, loss, -marginal_likelihood, kl_divergence, dropout_p, dropout_samples
tf.strided_slice_grad() tf.gather() tf.gather_nd() tf.gather_v2() tf.get_summary_op() tf.gradients() tf.boolean_mask() tf.sparse_mask() tf.sequence_mask() tf.random_gamma() tf.digamma() tf.igamma() tf.lgamma() tf.polygamma() tf.igammac() tf.tensor_shape.as_shape() # gfile tf.gfile.Copy() tf.gfile.DeleteRecursively() tf.gfile.Exists() tf.gfile.Glob() tf.gfile.IsDirectory() tf.gfile.ListDirectory() tf.gfile.MakeDirs() tf.gfile.MkDir() tf.gfile.Remove() tf.gfile.Rename()