예제 #1
0
    def _weight_hessian_bb(
        self,
        X,
        loc,
        scale,
    ):
        one_minus_loc = 1 - loc
        loc_times_scale = loc * scale
        one_minus_loc_times_scale = one_minus_loc * scale
        scalar_one = tf.constant(1, shape=(), dtype=self.dtype)

        if isinstance(X, tf.SparseTensor):
            # Using the dense matrix  of the location model to serve the correct shapes for the sparse X.
            const1 = tf.sparse_add(
                tf.zeros_like(loc),
                X).__div__(-tf.sparse.add(X, -tf.ones_like(loc)))
            # Adding tf1.zeros_like(loc) is a hack to avoid bug thrown by log on sparse matrix below,
            # to_dense does not work.
            const2 = loc * (tf.log(tf.sparse_add(tf.zeros_like(loc), X)) - tf.digamma(loc_times_scale)) \
                     - one_minus_loc * (tf.digamma(one_minus_loc_times_scale) + tf.log(const1)) \
                     + tf.digamma(scale)
        else:
            const1 = tf.log(X / (1 - X))
            const2 = loc * (tf.log(X) - tf.digamma(loc_times_scale))\
                     - one_minus_loc * (tf.digamma(one_minus_loc_times_scale) + tf.log(const1)) \
                     + tf.digamma(scale)
        const3 = scale * (- tf.square(loc) * tf.polygamma(scalar_one, loc_times_scale)\
                          + tf.polygamma(scalar_one, scale)\
                          - tf.polygamma(scalar_one, one_minus_loc_times_scale) * tf.square(one_minus_loc))
        const = scale * (const2 + const3)

        return const
예제 #2
0
    def _weight_hessian_aa(
        self,
        X,
        loc,
        scale,
    ):
        one_minus_loc = 1 - loc
        loc_times_scale = loc * scale
        one_minus_loc_times_scale = one_minus_loc * scale

        if isinstance(X, tf.SparseTensor):
            # Using the dense matrix  of the location model to serve the correct shapes for the sparse X.
            const1 = tf.sparse_add(
                tf.zeros_like(loc),
                X).__div__(-tf.sparse.add(X, -tf.ones_like(loc)))
            # Adding tf1.zeros_like(loc) is a hack to avoid bug thrown by log on sparse matrix below,
            # to_dense does not work.
        else:
            const1 = tf.log(X / (tf.ones_like(X) - X))

        const2 = (1 -
                  2 * loc) * (-tf.digamma(loc_times_scale) +
                              tf.digamma(one_minus_loc_times_scale) + const1)
        const3 = loc * one_minus_loc_times_scale * (
            -tf.polygamma(tf.ones_like(loc), loc_times_scale) -
            tf.polygamma(tf.ones_like(loc), one_minus_loc_times_scale))
        const = loc * one_minus_loc_times_scale * (const2 + const3)
        return const
예제 #3
0
 def to_sd(self, alpha):
     """
     :param alpha: (Tensor)
     :return: sigma (Tensor)
     """
     _one = K.cast(1, dtype=alpha.dtype)
     d1 = self.latent_dim - 1
     var = (T.polygamma(_one, alpha[:, :d1]) +
            T.polygamma(_one, alpha[:, -1:]))
     sigma = T.sqrt(var)
     return sigma
예제 #4
0
 def nuStep(cls, nu, n, delta, p=1.):
     three = tf.constant(3., dtype=nu.dtype)
     for i in range(2):
         w = (nu+p)/(nu+delta)
         fp = (-tf.digamma(nu/2) + tf.log(nu/2)
               + 1./n*tf.reduce_sum(tf.log((nu+p)/(nu+delta)) - w,
                                    axis=0)
               + 1
               + tf.digamma((p+nu)/2) - tf.log((p+nu)/2))
         fpp = (tf.polygamma(three, nu/2)/2. + 1./nu
                + tf.polygamma(three, (p+nu)/2)/2. - 1./(nu+p)
                + 1./n*tf.reduce_sum((delta-p)/(nu+delta)**2*(w-1),
                                     axis=0))
         nu = nu + fp/fpp
     return(nu)
예제 #5
0
def q_z_x(name, x, K, reuse=False):
    with tf.variable_scope('q_z_x' + name, reuse=reuse):
        # h1 = tf.nn.relu(tf.layers.dense(x, units=h_dim[1]))
        h1 = x

        z_Ralpha1 = tf.layers.dense(h1, units=K, kernel_initializer=tf.random_normal_initializer(0, 0.01))
        z_Ralpha1 = max_m_grad(min_z_alpha_rate, z_Ralpha1)
        # z_Ralpha1 = tf.maximum(min_z_alpha_rate, z_Ralpha1)
        z_alpha1 = tf.nn.softplus(z_Ralpha1)

        z_Rbeta1 = tf.layers.dense(h1, units=K, kernel_initializer=tf.random_normal_initializer(0, 0.01))
        z_Rbeta1 = max_m_grad(min_z_beta_rate, z_Rbeta1)
        # z_Rbeta1 = tf.maximum(min_z_beta_rate, z_Rbeta1)
        z_beta1 = tf.nn.softplus(z_Rbeta1)
        # z_beta1 = min_m_grad(z_alpha1 / min_mean, z_beta1)

        if MethodName == 'GO':
            z_hat1s = tf.random_gamma([1], tf.stop_gradient(z_alpha1), 1.)
            z_hat1s = tf.maximum(min_z, tf.squeeze(z_hat1s, 0))
            Grad_z_alpha1 = GO_Gamma_v2(tf.stop_gradient(z_hat1s), tf.stop_gradient(z_alpha1))
            z_hat1 = z_alpha1 * tf.stop_gradient(Grad_z_alpha1) - \
                     tf.stop_gradient(z_alpha1 * Grad_z_alpha1) + \
                     tf.stop_gradient(z_hat1s)
            z1_Fcorr = tf.zeros([1])

        if MethodName == 'GRep':
            posi0 = tf.polygamma(tf.constant(0,dtype=tf.float32),z_alpha1)
            posi1 = tf.polygamma(tf.constant(1,dtype=tf.float32),z_alpha1)
            z_hat1s = tf.random_gamma([1], tf.stop_gradient(z_alpha1), 1.)
            z_hat1s = tf.maximum(min_z, tf.squeeze(z_hat1s, 0))
            epsilo = tf.stop_gradient( (tf.log(z_hat1s)-posi0)/tf.maximum((tf.pow(posi1,0.5)),1e-5) )
            log_z_hat1 = epsilo*tf.pow(posi1,0.5)+posi0
            z_hat1 = tf.exp( log_z_hat1 )
            z1_Fcorr = tf.reduce_sum(
                    - tf.lgamma(z_alpha1) + (z_alpha1-1.)*log_z_hat1 - z_hat1
                    + log_z_hat1 + 0.5 * tf.log( posi1 )
                    ) 

        if MethodName == 'RSVI':
            lambda_z1 = tf.squeeze(tf.random_gamma([1], z_alpha1 + Bf, 1.), 0)
            lambda_z1 = tf.stop_gradient(tf.maximum(min_z, lambda_z1))
            z_hat1, z1_Fcorr = reject_h_boosted(lambda_z1, z_alpha1)

        z1 = z_hat1 / z_beta1
        # z1 = tf.maximum(min_z, z1)
        z1 = max_m_grad(min_z, z1)

        return z1, z_alpha1, z_beta1, z1_Fcorr
예제 #6
0
def q_W(name, V, K, reuse=False):
    with tf.variable_scope('q_W' + name, reuse=reuse):
        W_aW = tf.get_variable("W_aW", [V, K], tf.float32,
                               tf.random_uniform_initializer(0.1, 10))
        RW_aW = max_m_grad(min_W_alpha_rate, W_aW)
        # RW_aW = tf.maximum(min_W_alpha_rate, W_aW)
        W_alpha = tf.nn.softplus(RW_aW)

        W_bW = tf.get_variable("W_bW", [V, K], tf.float32,
                               tf.random_uniform_initializer(0.1, 10))
        RW_bW = max_m_grad(min_W_beta_rate, W_bW)
        # RW_bW = tf.maximum(min_W_beta_rate, W_bW)
        W_beta = tf.nn.softplus(RW_bW)
        # W_beta = tf.nn.softplus(W_bW)
        # W_beta = min_m_grad(W_alpha / min_mean, W_beta)

        if MethodName == 'GO':
            W_hat1s = tf.random_gamma([1], tf.stop_gradient(W_alpha), 1.)
            W_hat1s = tf.maximum(min_W, tf.squeeze(W_hat1s, 0))
            Grad_W_alpha1 = GO_Gamma_v2(tf.stop_gradient(W_hat1s), tf.stop_gradient(W_alpha))
            W_hat1 = W_alpha * tf.stop_gradient(Grad_W_alpha1) - \
                     tf.stop_gradient(W_alpha * Grad_W_alpha1) + \
                     tf.stop_gradient(W_hat1s)
            W1_Fcorr = tf.zeros([1])

        if MethodName == 'GRep':
            posi0 = tf.polygamma(tf.constant(0,dtype=tf.float32),W_alpha)
            posi1 = tf.polygamma(tf.constant(1,dtype=tf.float32),W_alpha)
            W_hat1s = tf.random_gamma([1], tf.stop_gradient(W_alpha), 1.)
            W_hat1s = tf.maximum(min_W, tf.squeeze(W_hat1s, 0))
            epsilo = tf.stop_gradient( (tf.log(W_hat1s)-posi0)/tf.maximum((tf.pow(posi1,0.5)),1e-8) )
            log_W_hat1 = epsilo*tf.pow(posi1,0.5)+posi0
            W_hat1 = tf.exp( log_W_hat1 )
            W1_Fcorr = tf.reduce_sum(
                    - tf.lgamma(W_alpha) + (W_alpha-1.)*log_W_hat1 - W_hat1
                    + log_W_hat1 + 0.5 * tf.log( posi1 )
                    ) 

        if MethodName == 'RSVI':
            lambda_W1 = tf.squeeze(tf.random_gamma([1], W_alpha + Bf, 1.), 0)
            lambda_W1 = tf.stop_gradient(tf.maximum(min_W, lambda_W1))
            W_hat1, W1_Fcorr = reject_h_boosted(lambda_W1, W_alpha)

        W = W_hat1 / W_beta
        # W = tf.maximum(min_W, W)
        W = max_m_grad(min_W, W)

        return W, W_alpha, W_beta, W1_Fcorr
    def fitGamma(cls, tau):
        alpha = 0.5 / (
            tf.log(tf.reduce_mean(tau)) +
            1e-6  # added due to numerical instability
            - tf.reduce_mean(tf.log(tau)))
        for i in range(20):
            alpha = (
                1. /
                (1. / alpha +
                 (tf.reduce_mean(tf.log(tau)) - tf.log(tf.reduce_mean(tau)) +
                  tf.log(alpha) - tf.digamma(alpha)) /
                 (alpha**2 *
                  (1. / alpha - tf.polygamma(tf.ones_like(alpha), alpha)))))

        beta = alpha / tf.reduce_mean(tau)
        return (alpha, beta)
예제 #8
0
def autoencoder(x_hat, x, dim_img, dim_z, n_hidden, keep_prob, last_term, Component_Count):
    # encoding
    mu1, sigma1, mix1 = Create_Encoder_MNIST(x_hat, n_hidden, dim_z, keep_prob, "encoder1")
    mu2, sigma2, mix2 = Create_Encoder_MNIST(x_hat, n_hidden, dim_z, keep_prob, "encoder2")
    mu3, sigma3, mix3 = Create_Encoder_MNIST(x_hat, n_hidden, dim_z, keep_prob, "encoder3")
    mu4, sigma4, mix4 = Create_Encoder_MNIST(x_hat, n_hidden, dim_z, keep_prob, "encoder4")
    mu5, sigma5, mix5 = Create_Encoder_MNIST(x_hat, n_hidden, dim_z, keep_prob, "encoder5")
    mu6, sigma6, mix6 = Create_Encoder_MNIST(x_hat, n_hidden, dim_z, keep_prob, "encoder6")

    z1 = distributions.Normal(loc=mu1, scale=sigma1)
    z2 = distributions.Normal(loc=mu2, scale=sigma2)
    z3 = distributions.Normal(loc=mu3, scale=sigma3)
    z4 = distributions.Normal(loc=mu4, scale=sigma4)
    z5 = distributions.Normal(loc=mu5, scale=sigma5)
    z6 = distributions.Normal(loc=mu6, scale=sigma6)

    p = 0.5
    # a = p / (1.0-p)
    ard_init = -10.
    dropout_a = tf.get_variable("dropout", shape=[1], initializer=tf.constant_initializer(ard_init))

    # Dropout of components
    m1 = np.ones(batch_size)
    s1 = np.zeros(batch_size)
    dropout_a = tf.cast(dropout_a, tf.float64)

    dropout_dis = distributions.Normal(loc=m1, scale=dropout_a)
    dropout_samples = dropout_dis.sample(sample_shape=(6))
    dropout_samples = tf.transpose(dropout_samples)
    dropout_samples = tf.cast(dropout_samples, tf.float32)
    dropout_samples = tf.clip_by_value(dropout_samples, 1e-8, 1 - 1e-8)

    sum1 = mix1 + mix2 + mix3 + mix4+mix6+mix5
    mix1 = mix1 / sum1
    mix2 = mix2 / sum1
    mix3 = mix3 / sum1
    mix4 = mix4 / sum1
    mix5 = mix5 / sum1
    mix6 = mix6 / sum1

    mix = tf.concat([mix1, mix2, mix3, mix4,mix5,mix6], 1)
    mix_parameters = mix
    dist = tf.distributions.Dirichlet(mix)
    mix_samples = dist.sample()
    mix = mix_samples

    mix_dropout1 = dropout_samples[:, 0:1] * mix_samples[:, 0:1]
    mix_dropout2 = dropout_samples[:, 1:2] * mix_samples[:, 1:2]
    mix_dropout3 = dropout_samples[:, 2:3] * mix_samples[:, 2:3]
    mix_dropout4 = dropout_samples[:, 3:4] * mix_samples[:, 3:4]
    mix_dropout5 = dropout_samples[:, 4:5] * mix_samples[:, 4:5]
    mix_dropout6 = dropout_samples[:, 5:6] * mix_samples[:, 5:6]

    sum1 = mix_dropout1 + mix_dropout2 + mix_dropout3 + mix_dropout4+mix_dropout5+mix_dropout6
    mix_dropout1 = mix_dropout1 / sum1
    mix_dropout2 = mix_dropout2 / sum1
    mix_dropout3 = mix_dropout3 / sum1
    mix_dropout4 = mix_dropout4 / sum1
    mix_dropout5 = mix_dropout5 / sum1
    mix_dropout6 = mix_dropout6 / sum1

    # sampling by re-parameterization technique
    # z = mu + sigma * tf.random_normal(tf.shape(mu), 0, 1, dtype=tf.float32)

    z1_samples = z1.sample()
    z2_samples = z2.sample()
    z3_samples = z3.sample()
    z4_samples = z4.sample()
    z5_samples = z5.sample()
    z6_samples = z6.sample()

    ttf = []
    ttf.append(z1_samples)
    ttf.append(z2_samples)
    ttf.append(z3_samples)
    ttf.append(z4_samples)
    ttf.append(z5_samples)
    ttf.append(z6_samples)

    dHSIC_Value = dHSIC(ttf)


    # decoding
    y1 = Create_SubDecoder(z1_samples, n_hidden, dim_img, keep_prob, "decoder1")
    y2 = Create_SubDecoder(z2_samples, n_hidden, dim_img, keep_prob, "decoder2")
    y3 = Create_SubDecoder(z3_samples, n_hidden, dim_img, keep_prob, "decoder3")
    y4 = Create_SubDecoder(z4_samples, n_hidden, dim_img, keep_prob, "decoder4")
    y5 = Create_SubDecoder(z4_samples, n_hidden, dim_img, keep_prob, "decoder5")
    y6 = Create_SubDecoder(z4_samples, n_hidden, dim_img, keep_prob, "decoder6")

    # dropout out
    y1 = y1 * mix_dropout1
    y2 = y2 * mix_dropout2
    y3 = y3 * mix_dropout3
    y4 = y4 * mix_dropout4
    y5 = y5 * mix_dropout5
    y6 = y6 * mix_dropout6

    y = y1 + y2 + y3 + y4+y5+y6
    output = Create_FinalDecoder(y, n_hidden, dim_img, keep_prob, "final")
    y = output

    m1 = np.zeros(dim_z, dtype=np.float32)
    m1[:] = 0
    v1 = np.zeros(dim_z, dtype=np.float32)
    v1[:] = 1

    # p_z1 = distributions.Normal(loc=np.zeros(dim_z, dtype=np.float32),
    #                           scale=np.ones(dim_z, dtype=np.float32))
    p_z1 = distributions.Normal(loc=m1,
                                scale=v1)

    m2 = np.zeros(dim_z, dtype=np.float32)
    m2[:] = 0
    v2 = np.zeros(dim_z, dtype=np.float32)
    v2[:] = 1
    p_z2 = distributions.Normal(loc=m2,
                                scale=v2)

    m3 = np.zeros(dim_z, dtype=np.float32)
    m3[:] = 0
    v3 = np.zeros(dim_z, dtype=np.float32)
    v3[:] = 1
    p_z3 = distributions.Normal(loc=m3,
                                scale=v3)

    m4 = np.zeros(dim_z, dtype=np.float32)
    m4[:] = 0
    v4 = np.zeros(dim_z, dtype=np.float32)
    v4[:] = 1
    p_z4 = distributions.Normal(loc=m4,
                                scale=v4)

    kl1 = tf.reduce_mean(tf.reduce_sum(distributions.kl_divergence(z1, p_z1), 1))
    kl2 = tf.reduce_mean(tf.reduce_sum(distributions.kl_divergence(z2, p_z2), 1))
    kl3 = tf.reduce_mean(tf.reduce_sum(distributions.kl_divergence(z3, p_z3), 1))
    kl4 = tf.reduce_mean(tf.reduce_sum(distributions.kl_divergence(z4, p_z4), 1))
    kl5 = tf.reduce_mean(tf.reduce_sum(distributions.kl_divergence(z5, p_z4), 1))
    kl6 = tf.reduce_mean(tf.reduce_sum(distributions.kl_divergence(z6, p_z4), 1))

    KL_divergence = (kl1 + kl2 + kl3 + kl4+kl5+kl6) / 6.0

    # loss
    marginal_likelihood = tf.reduce_sum(x * tf.log(y) + (1 - x) * tf.log(1 - y), 1)

    marginal_likelihood = tf.reduce_mean(marginal_likelihood)

    # KL divergence between two Dirichlet distributions
    a1 = tf.clip_by_value(mix_parameters, 0.1, 0.8)
    a2 = tf.constant((0.17, 0.17, 0.17, 0.17,0.17,0.17), shape=(batch_size, 6))

    r = tf.reduce_sum((a1 - a2) * (tf.polygamma(0.0, a1) - tf.polygamma(0.0, 1)), axis=1)
    a = tf.lgamma(tf.reduce_sum(a1, axis=1)) - tf.lgamma(tf.reduce_sum(a2, axis=1)) + tf.reduce_sum(tf.lgamma(a2),
                                                                                                    axis=-1) - tf.reduce_sum(
        tf.lgamma(a1), axis=1) + r
    kl = a
    kl = tf.reduce_mean(kl)

    p1 = 1
    p2 = 1
    p4 = 1
    ELBO = marginal_likelihood - KL_divergence * p2

    loss = -ELBO + kl * p1 + p4 * dHSIC_Value + KL_Dropout2(dropout_a)

    z = z1_samples
    return y, z, loss, -marginal_likelihood, KL_divergence,dropout_samples
예제 #9
0
def autoencoder(x_hat, x, dim_img, dim_z, n_hidden, keep_prob, last_term,
                Component_Count):
    # encoding
    mu1, sigma1, mix1 = Create_Encoder_MNIST(x_hat, n_hidden, dim_z, keep_prob,
                                             "encoder1")
    z1 = distributions.Normal(loc=mu1, scale=sigma1)

    mu2, sigma2, mix2 = Create_Encoder_MNIST(x_hat, n_hidden, dim_z, keep_prob,
                                             "encoder2")
    z2 = distributions.Normal(loc=mu2, scale=sigma2)

    mu3, sigma3, mix3 = Create_Encoder_MNIST(x_hat, n_hidden, dim_z, keep_prob,
                                             "encoder3")
    z3 = distributions.Normal(loc=mu3, scale=sigma3)

    mu4, sigma4, mix4 = Create_Encoder_MNIST(x_hat, n_hidden, dim_z, keep_prob,
                                             "encoder4")
    z4 = distributions.Normal(loc=mu4, scale=sigma4)

    mu5, sigma5, mix5 = Create_Encoder_MNIST(x_hat, n_hidden, dim_z, keep_prob,
                                             "encoder5")
    mu6, sigma6, mix6 = Create_Encoder_MNIST(x_hat, n_hidden, dim_z, keep_prob,
                                             "encoder6")
    mu7, sigma7, mix7 = Create_Encoder_MNIST(x_hat, n_hidden, dim_z, keep_prob,
                                             "encoder7")
    mu8, sigma8, mix8 = Create_Encoder_MNIST(x_hat, n_hidden, dim_z, keep_prob,
                                             "encoder8")
    mu9, sigma9, mix9 = Create_Encoder_MNIST(x_hat, n_hidden, dim_z, keep_prob,
                                             "encoder9")
    mu10, sigma10, mix10 = Create_Encoder_MNIST(x_hat, n_hidden, dim_z,
                                                keep_prob, "encoder10")

    z1 = distributions.Normal(loc=mu1, scale=sigma1)
    z2 = distributions.Normal(loc=mu2, scale=sigma2)
    z3 = distributions.Normal(loc=mu3, scale=sigma3)
    z4 = distributions.Normal(loc=mu4, scale=sigma4)
    z5 = distributions.Normal(loc=mu5, scale=sigma5)
    z6 = distributions.Normal(loc=mu6, scale=sigma6)
    z7 = distributions.Normal(loc=mu7, scale=sigma7)
    z8 = distributions.Normal(loc=mu8, scale=sigma8)
    z9 = distributions.Normal(loc=mu9, scale=sigma9)
    z10 = distributions.Normal(loc=mu10, scale=sigma10)

    p = 0.5
    #a = p / (1.0-p)
    ard_init = -10.
    dropout_a = tf.get_variable("dropout",
                                shape=[1],
                                initializer=tf.constant_initializer(ard_init))

    #Dropout of components
    m1 = np.ones(batch_size)
    s1 = np.zeros(batch_size)
    dropout_a = tf.cast(dropout_a, tf.float64)

    dropout_dis = distributions.Normal(loc=m1, scale=dropout_a)
    dropout_samples = dropout_dis.sample(sample_shape=(10))
    dropout_samples = tf.transpose(dropout_samples)
    dropout_samples = tf.cast(dropout_samples, tf.float32)
    dropout_samples = tf.clip_by_value(dropout_samples, 1e-8, 1 - 1e-8)

    mix1 = dropout_samples[:, 0:1]
    mix2 = dropout_samples[:, 1:2]
    mix3 = dropout_samples[:, 2:3]
    mix4 = dropout_samples[:, 3:4]
    mix5 = dropout_samples[:, 4:5]
    mix6 = dropout_samples[:, 5:6]
    mix7 = dropout_samples[:, 6:7]
    mix8 = dropout_samples[:, 7:8]
    mix9 = dropout_samples[:, 8:9]
    mix10 = dropout_samples[:, 9:10]

    sum1 = mix1 + mix2 + mix3 + mix4 + mix5 + mix6 + mix7 + mix8 + mix9 + mix10
    mix1 = mix1 / sum1
    mix2 = mix2 / sum1
    mix3 = mix3 / sum1
    mix4 = mix4 / sum1
    mix5 = mix5 / sum1
    mix6 = mix6 / sum1
    mix7 = mix7 / sum1
    mix8 = mix8 / sum1
    mix9 = mix9 / sum1
    mix10 = mix10 / sum1

    mix = tf.concat(
        [mix1, mix2, mix3, mix4, mix5, mix6, mix7, mix8, mix9, mix10], 1)
    mix_parameters = mix
    dist = tf.distributions.Dirichlet(mix)
    mix_samples = dist.sample()
    mix = mix_samples

    # sampling by re-parameterization technique
    # z = mu + sigma * tf.random_normal(tf.shape(mu), 0, 1, dtype=tf.float32)

    z1_samples = z1.sample()
    z2_samples = z2.sample()
    z3_samples = z3.sample()
    z4_samples = z4.sample()
    z5_samples = z5.sample()
    z6_samples = z6.sample()
    z7_samples = z7.sample()
    z8_samples = z8.sample()
    z9_samples = z9.sample()
    z10_samples = z10.sample()

    ttf = []
    ttf.append(z1_samples)
    ttf.append(z2_samples)
    ttf.append(z3_samples)
    ttf.append(z4_samples)
    ttf.append(z5_samples)
    ttf.append(z6_samples)
    ttf.append(z7_samples)
    ttf.append(z8_samples)
    ttf.append(z9_samples)
    ttf.append(z10_samples)
    '''
    h1 = hsic_individual(z1_samples,z2_samples)
    h2 = hsic_individual(z1_samples,z3_samples)
    h3 = hsic_individual(z1_samples,z4_samples)
    h4 = hsic_individual(z2_samples,z3_samples)
    h5 = hsic_individual(z2_samples,z4_samples)
    h6 = hsic_individual(z3_samples,z4_samples)
    dHSIC_Value = h1+h2+h3+h4+h5+h6
    '''
    dHSIC_Value = dHSIC(ttf)
    #dHSIC_Value = last_term

    # decoding
    y1 = Create_SubDecoder(z1_samples, n_hidden, dim_img, keep_prob,
                           "decoder1")
    #y1 = tf.clip_by_value(y1, 1e-8, 1 - 1e-8)

    y2 = Create_SubDecoder(z2_samples, n_hidden, dim_img, keep_prob,
                           "decoder2")
    #y2 = tf.clip_by_value(y2, 1e-8, 1 - 1e-8)

    y3 = Create_SubDecoder(z3_samples, n_hidden, dim_img, keep_prob,
                           "decoder3")
    #y3 = tf.clip_by_value(y3, 1e-8, 1 - 1e-8)

    y4 = Create_SubDecoder(z4_samples, n_hidden, dim_img, keep_prob,
                           "decoder4")
    #y4 = tf.clip_by_value(y4, 1e-8, 1 - 1e-8)

    y5 = Create_SubDecoder(z5_samples, n_hidden, dim_img, keep_prob,
                           "decoder5")
    #y5 = tf.clip_by_value(y5, 1e-8, 1 - 1e-8)

    y6 = Create_SubDecoder(z6_samples, n_hidden, dim_img, keep_prob,
                           "decoder6")
    #y6 = tf.clip_by_value(y6, 1e-8, 1 - 1e-8)

    y7 = Create_SubDecoder(z7_samples, n_hidden, dim_img, keep_prob,
                           "decoder7")
    #y7 = tf.clip_by_value(y7, 1e-8, 1 - 1e-8)

    y8 = Create_SubDecoder(z8_samples, n_hidden, dim_img, keep_prob,
                           "decoder8")
    #y8 = tf.clip_by_value(y8, 1e-8, 1 - 1e-8)

    y9 = Create_SubDecoder(z9_samples, n_hidden, dim_img, keep_prob,
                           "decoder9")
    #y9 = tf.clip_by_value(y9, 1e-8, 1 - 1e-8)

    y10 = Create_SubDecoder(z10_samples, n_hidden, dim_img, keep_prob,
                            "decoder10")
    #y10 = tf.clip_by_value(y10, 1e-8, 1 - 1e-8)

    #dropout out
    y1 = y1 * mix_samples[:, 0:1]
    y2 = y2 * mix_samples[:, 1:2]
    y3 = y3 * mix_samples[:, 2:3]
    y4 = y4 * mix_samples[:, 3:4]
    y5 = y5 * mix_samples[:, 4:5]
    y6 = y6 * mix_samples[:, 5:6]
    y7 = y7 * mix_samples[:, 6:7]
    y8 = y8 * mix_samples[:, 7:8]
    y9 = y9 * mix_samples[:, 8:9]
    y10 = y10 * mix_samples[:, 9:10]

    y = y1 + y2 + y3 + y4 + y5 + y6 + y7 + y8 + y9 + y10
    output = Create_FinalDecoder(y,
                                 n_hidden,
                                 dim_img,
                                 keep_prob,
                                 "final",
                                 reuse=False)
    #output = tf.clip_by_value(output, 1e-8, 1 - 1e-8)
    y = output

    m1 = np.zeros(dim_z, dtype=np.float32)
    m1[:] = 0
    v1 = np.zeros(dim_z, dtype=np.float32)
    v1[:] = 1

    # p_z1 = distributions.Normal(loc=np.zeros(dim_z, dtype=np.float32),
    #                           scale=np.ones(dim_z, dtype=np.float32))
    p_z1 = distributions.Normal(loc=m1, scale=v1)

    m2 = np.zeros(dim_z, dtype=np.float32)
    m2[:] = 0
    v2 = np.zeros(dim_z, dtype=np.float32)
    v2[:] = 1
    p_z2 = distributions.Normal(loc=m2, scale=v2)

    m3 = np.zeros(dim_z, dtype=np.float32)
    m3[:] = 0
    v3 = np.zeros(dim_z, dtype=np.float32)
    v3[:] = 1
    p_z3 = distributions.Normal(loc=m3, scale=v3)

    m4 = np.zeros(dim_z, dtype=np.float32)
    m4[:] = 0
    v4 = np.zeros(dim_z, dtype=np.float32)
    v4[:] = 1
    p_z4 = distributions.Normal(loc=m4, scale=v4)

    kl1 = tf.reduce_mean(
        tf.reduce_sum(distributions.kl_divergence(z1, p_z1), 1))
    kl2 = tf.reduce_mean(
        tf.reduce_sum(distributions.kl_divergence(z2, p_z2), 1))
    kl3 = tf.reduce_mean(
        tf.reduce_sum(distributions.kl_divergence(z3, p_z3), 1))
    kl4 = tf.reduce_mean(
        tf.reduce_sum(distributions.kl_divergence(z4, p_z4), 1))
    kl5 = tf.reduce_mean(
        tf.reduce_sum(distributions.kl_divergence(z5, p_z4), 1))
    kl6 = tf.reduce_mean(
        tf.reduce_sum(distributions.kl_divergence(z6, p_z4), 1))
    kl7 = tf.reduce_mean(
        tf.reduce_sum(distributions.kl_divergence(z7, p_z4), 1))
    kl8 = tf.reduce_mean(
        tf.reduce_sum(distributions.kl_divergence(z8, p_z4), 1))
    kl9 = tf.reduce_mean(
        tf.reduce_sum(distributions.kl_divergence(z9, p_z4), 1))
    kl10 = tf.reduce_mean(
        tf.reduce_sum(distributions.kl_divergence(z10, p_z4), 1))

    kl1 = kl1
    kl2 = kl2
    kl3 = kl3
    kl4 = kl4
    kl5 = kl5
    kl6 = kl6
    kl7 = kl7
    kl8 = kl8
    kl9 = kl9
    kl10 = kl10

    KL_divergence = (kl1 + kl2 + kl3 + kl4 + kl5 + kl6 + kl7 + kl8 + kl9 +
                     kl10) / 10.0

    # loss
    marginal_likelihood = tf.reduce_sum(
        x * tf.log(y) + (1 - x) * tf.log(1 - y), 1)

    marginal_likelihood = tf.reduce_mean(marginal_likelihood)

    # KL divergence between two Dirichlet distributions
    a1 = mix_parameters
    a2 = tf.constant((0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1),
                     shape=(batch_size, 10))

    r = tf.reduce_sum(
        (a1 - a2) * (tf.polygamma(0.0, a1) - tf.polygamma(0.0, 1)), axis=1)
    a = tf.lgamma(tf.reduce_sum(a1, axis=1)) - tf.lgamma(
        tf.reduce_sum(a2, axis=1)) + tf.reduce_sum(
            tf.lgamma(a2), axis=-1) - tf.reduce_sum(tf.lgamma(a1), axis=1) + r
    kl = a
    kl = tf.reduce_mean(kl)

    p1 = 1
    p2 = 1
    p4 = 1
    ELBO = marginal_likelihood - KL_divergence * p2

    loss = -ELBO + kl * p1 + p4 * dHSIC_Value  #diverse_KL_divergence

    z = z1_samples
    return y, z, loss, -marginal_likelihood, KL_divergence
def autoencoder(x_hat, x, dim_img, dim_z, n_hidden, keep_prob, last_term):
    # encoding
    mu1, sigma1, mix1 = Create_Celeba_Encoder(x_hat, 64, "encoder1")
    mu2, sigma2, mix2 = Create_Celeba_Encoder(x_hat, 64, "encoder2")
    mu3, sigma3, mix3 = Create_Celeba_Encoder(x_hat, 64, "encoder3")
    mu4, sigma4, mix4 = Create_Celeba_Encoder(x_hat, 64, "encoder4")

    z1 = distributions.Normal(loc=mu1, scale=sigma1)
    z2 = distributions.Normal(loc=mu2, scale=sigma2)
    z3 = distributions.Normal(loc=mu3, scale=sigma3)
    z4 = distributions.Normal(loc=mu4, scale=sigma4)

    p = 0.5
    # a = p / (1.0-p)
    ard_init = 0.5
    dropout_a = tf.get_variable("dropout",
                                shape=[1],
                                initializer=tf.constant_initializer(ard_init))
    # Dropout of components
    m1 = np.ones(batch_size)
    s1 = np.zeros(batch_size)
    dropout_a = tf.cast(dropout_a, tf.float64)

    dropout_a = tf.clip_by_value(dropout_a, 0.2, 1)

    dropout_dis = distributions.Bernoulli(logits=None, probs=dropout_a)
    dropout_samples = dropout_dis.sample(sample_shape=(batch_size, 4))
    dropout_samples = tf.reshape(dropout_samples, (batch_size, 4))
    dropout_samples = tf.cast(dropout_samples, tf.float32)
    dropout_samples = tf.clip_by_value(dropout_samples, 1e-8, 1 - 1e-8)

    mix1 = mix1 * dropout_samples[:, 0:1]
    mix2 = mix2 * dropout_samples[:, 1:2]
    mix3 = mix3 * dropout_samples[:, 2:3]
    mix4 = mix4 * dropout_samples[:, 3:4]

    sum1 = mix1 + mix2 + mix3 + mix4
    mix1 = mix1 / sum1
    mix2 = mix2 / sum1
    mix3 = mix3 / sum1
    mix4 = mix4 / sum1

    mix = tf.concat([mix1, mix2, mix3, mix4], 1)
    mix_parameters = mix
    dist = tf.distributions.Dirichlet(mix)
    mix_samples = dist.sample()
    mix = mix_samples

    # sampling by re-parameterization technique
    # z = mu + sigma * tf.random_normal(tf.shape(mu), 0, 1, dtype=tf.float32)

    z1_samples = z1.sample()
    z2_samples = z2.sample()
    z3_samples = z3.sample()
    z4_samples = z4.sample()

    ttf = []
    ttf.append(z1_samples)
    ttf.append(z2_samples)
    ttf.append(z3_samples)
    ttf.append(z4_samples)

    dHSIC_Value = dHSIC(ttf)

    # decoding
    y1 = Create_Celeba_SubDecoder_(z1_samples, 64, "decoder1")
    y2 = Create_Celeba_SubDecoder_(z2_samples, 64, "decoder2")
    y3 = Create_Celeba_SubDecoder_(z3_samples, 64, "decoder3")
    y4 = Create_Celeba_SubDecoder_(z4_samples, 64, "decoder4")

    y1 = tf.reshape(y1, (-1, 8 * 8 * 256))
    y2 = tf.reshape(y2, (-1, 8 * 8 * 256))
    y3 = tf.reshape(y3, (-1, 8 * 8 * 256))
    y4 = tf.reshape(y4, (-1, 8 * 8 * 256))

    y1 = y1 * mix_samples[:, 0:1]
    y2 = y2 * mix_samples[:, 1:2]
    y3 = y3 * mix_samples[:, 2:3]
    y4 = y4 * mix_samples[:, 3:4]

    y1 = tf.reshape(y1, (batch_size, 8, 8, 256))
    y2 = tf.reshape(y2, (batch_size, 8, 8, 256))
    y3 = tf.reshape(y3, (batch_size, 8, 8, 256))
    y4 = tf.reshape(y4, (batch_size, 8, 8, 256))

    y = y1 + y2 + y3 + y4
    y = Create_Celeba_Generator_(y, 64, "final")

    m1 = np.zeros(dim_z, dtype=np.float32)
    m1[:] = 0
    v1 = np.zeros(dim_z, dtype=np.float32)
    v1[:] = 1

    # p_z1 = distributions.Normal(loc=np.zeros(dim_z, dtype=np.float32),
    #                           scale=np.ones(dim_z, dtype=np.float32))
    p_z1 = distributions.Normal(loc=m1, scale=v1)

    m2 = np.zeros(dim_z, dtype=np.float32)
    m2[:] = 0
    v2 = np.zeros(dim_z, dtype=np.float32)
    v2[:] = 1
    p_z2 = distributions.Normal(loc=m2, scale=v2)

    m3 = np.zeros(dim_z, dtype=np.float32)
    m3[:] = 0
    v3 = np.zeros(dim_z, dtype=np.float32)
    v3[:] = 1
    p_z3 = distributions.Normal(loc=m3, scale=v3)

    m4 = np.zeros(dim_z, dtype=np.float32)
    m4[:] = 0
    v4 = np.zeros(dim_z, dtype=np.float32)
    v4[:] = 1
    p_z4 = distributions.Normal(loc=m4, scale=v4)

    z = z1

    mu = mu1
    sigma = sigma1
    epsilon = 1e-8

    # additional loss
    reconstruction_loss = tf.reduce_mean(
        tf.reduce_sum(tf.square(x - y), [1, 2, 3]))
    # kl_divergence = tf.reduce_mean(- 0.5 * tf.reduce_sum(1 + sigma - tf.square(mu) - tf.exp(sigma), 1))

    kl1 = tf.reduce_mean(
        tf.reduce_sum(distributions.kl_divergence(z1, p_z1), 1))
    kl2 = tf.reduce_mean(
        tf.reduce_sum(distributions.kl_divergence(z2, p_z2), 1))
    kl3 = tf.reduce_mean(
        tf.reduce_sum(distributions.kl_divergence(z3, p_z3), 1))
    kl4 = tf.reduce_mean(
        tf.reduce_sum(distributions.kl_divergence(z4, p_z4), 1))

    KL_divergence = (kl1 + kl2 + kl3 + kl4) / 4.0

    # KL divergence between two Dirichlet distributions
    a1 = tf.clip_by_value(mix_parameters, 0.1, 0.8)
    a2 = tf.constant((0.25, 0.25, 0.25, 0.25), shape=(batch_size, 4))

    r = tf.reduce_sum(
        (a1 - a2) * (tf.polygamma(0.0, a1) - tf.polygamma(0.0, 1)), axis=1)
    a = tf.lgamma(tf.reduce_sum(a1, axis=1)) - tf.lgamma(
        tf.reduce_sum(a2, axis=1)) + tf.reduce_sum(
            tf.lgamma(a2), axis=-1) - tf.reduce_sum(tf.lgamma(a1), axis=1) + r
    kl = a
    kl = tf.reduce_mean(kl)

    p1 = 0.1

    loss = reconstruction_loss + KL_divergence * p1 + kl + dHSIC_Value
    marginal_likelihood = reconstruction_loss

    return y, z, loss, -marginal_likelihood, KL_divergence
예제 #11
0
def autoencoder(x_hat, x, dim_img, dim_z, n_hidden, keep_prob, last_term,
                dropout_in):
    # encoding
    mu1, sigma1, mix1 = Create_Encoder_MNIST(x_hat, n_hidden, dim_z, keep_prob,
                                             "encoder1")
    mu2, sigma2, mix2 = Create_Encoder_MNIST(x_hat, n_hidden, dim_z, keep_prob,
                                             "encoder2")
    mu3, sigma3, mix3 = Create_Encoder_MNIST(x_hat, n_hidden, dim_z, keep_prob,
                                             "encoder3")
    mu4, sigma4, mix4 = Create_Encoder_MNIST(x_hat, n_hidden, dim_z, keep_prob,
                                             "encoder4")

    z1 = distributions.Normal(loc=mu1, scale=sigma1)
    z2 = distributions.Normal(loc=mu2, scale=sigma2)
    z3 = distributions.Normal(loc=mu3, scale=sigma3)
    z4 = distributions.Normal(loc=mu4, scale=sigma4)

    init_min = 0.1
    init_max = 0.1
    init_min = (np.log(init_min) - np.log(1. - init_min))
    init_max = (np.log(init_max) - np.log(1. - init_max))
    dropout_a = tf.get_variable(name='dropout',
                                shape=None,
                                initializer=tf.random_uniform((1, ), init_min,
                                                              init_max),
                                dtype=tf.float32,
                                trainable=True)
    dropout_p = tf.nn.sigmoid(dropout_a)

    dropout_b = 1 - dropout_p
    dropout_log = tf.log(dropout_p)
    dropout_log2 = tf.log(dropout_b)

    cats_range = np.zeros((batch_size * 4, 2))
    cats_range[:, 0] = 0
    cats_range[:, 1] = 1
    dropout_samples = gumbel_softmax_sample3(dropout_log, dropout_log2,
                                             cats_range, [batch_size * 4])
    dropout_samples = tf.reshape(dropout_samples, (-1, 4))

    dropout_regularizer = dropout_p * tf.log(dropout_p)
    dropout_regularizer += (1. - dropout_p) * tf.log(1. - dropout_p)
    dropout_regularizer *= dropout_regularizer * 10 * -1
    dropout_regularizer = tf.clip_by_value(dropout_regularizer, -10, 0)

    sum1 = mix1 + mix2 + mix3 + mix4
    mix1 = mix1 / sum1
    mix2 = mix2 / sum1
    mix3 = mix3 / sum1
    mix4 = mix4 / sum1

    mix = tf.concat([mix1, mix2, mix3, mix4], 1)
    mix_parameters = mix
    dist = tf.distributions.Dirichlet(mix)
    mix_samples = dist.sample()
    mix = mix_samples

    mix_dropout1 = dropout_samples[:, 0:1] * mix_samples[:, 0:1]
    mix_dropout2 = dropout_samples[:, 1:2] * mix_samples[:, 1:2]
    mix_dropout3 = dropout_samples[:, 2:3] * mix_samples[:, 2:3]
    mix_dropout4 = dropout_samples[:, 3:4] * mix_samples[:, 3:4]

    sum1 = mix_dropout1 + mix_dropout2 + mix_dropout3 + mix_dropout4
    mix_dropout1 = mix_dropout1 / sum1
    mix_dropout2 = mix_dropout2 / sum1
    mix_dropout3 = mix_dropout3 / sum1
    mix_dropout4 = mix_dropout4 / sum1

    # sampling by re-parameterization technique
    # z = mu + sigma * tf.random_normal(tf.shape(mu), 0, 1, dtype=tf.float32)

    z1_samples = z1.sample()
    z2_samples = z2.sample()
    z3_samples = z3.sample()
    z4_samples = z4.sample()

    ttf = []
    ttf.append(z1_samples)
    ttf.append(z2_samples)
    ttf.append(z3_samples)
    ttf.append(z4_samples)

    dHSIC_Value = dHSIC(ttf)

    # decoding
    y1 = Create_SubDecoder(z1_samples, n_hidden, dim_img, keep_prob,
                           "decoder1")
    y2 = Create_SubDecoder(z2_samples, n_hidden, dim_img, keep_prob,
                           "decoder2")
    y3 = Create_SubDecoder(z3_samples, n_hidden, dim_img, keep_prob,
                           "decoder3")
    y4 = Create_SubDecoder(z4_samples, n_hidden, dim_img, keep_prob,
                           "decoder4")

    # dropout out
    y1 = y1 * mix_dropout1
    y2 = y2 * mix_dropout2
    y3 = y3 * mix_dropout3
    y4 = y4 * mix_dropout4

    y = y1 + y2 + y3 + y4
    output = Create_FinalDecoder(y, n_hidden, dim_img, keep_prob, "final")
    y = output

    m1 = np.zeros(dim_z, dtype=np.float32)
    m1[:] = 0
    v1 = np.zeros(dim_z, dtype=np.float32)
    v1[:] = 1

    # p_z1 = distributions.Normal(loc=np.zeros(dim_z, dtype=np.float32),
    #                           scale=np.ones(dim_z, dtype=np.float32))
    p_z1 = distributions.Normal(loc=m1, scale=v1)

    m2 = np.zeros(dim_z, dtype=np.float32)
    m2[:] = 0
    v2 = np.zeros(dim_z, dtype=np.float32)
    v2[:] = 1
    p_z2 = distributions.Normal(loc=m2, scale=v2)

    m3 = np.zeros(dim_z, dtype=np.float32)
    m3[:] = 0
    v3 = np.zeros(dim_z, dtype=np.float32)
    v3[:] = 1
    p_z3 = distributions.Normal(loc=m3, scale=v3)

    m4 = np.zeros(dim_z, dtype=np.float32)
    m4[:] = 0
    v4 = np.zeros(dim_z, dtype=np.float32)
    v4[:] = 1
    p_z4 = distributions.Normal(loc=m4, scale=v4)

    kl1 = tf.reduce_mean(
        tf.reduce_sum(distributions.kl_divergence(z1, p_z1), 1))
    kl2 = tf.reduce_mean(
        tf.reduce_sum(distributions.kl_divergence(z2, p_z2), 1))
    kl3 = tf.reduce_mean(
        tf.reduce_sum(distributions.kl_divergence(z3, p_z3), 1))
    kl4 = tf.reduce_mean(
        tf.reduce_sum(distributions.kl_divergence(z4, p_z4), 1))

    KL_divergence = (kl1 + kl2 + kl3 + kl4) / 4.0

    # loss
    marginal_likelihood = tf.reduce_sum(
        x * tf.log(y) + (1 - x) * tf.log(1 - y), 1)

    marginal_likelihood = tf.reduce_mean(marginal_likelihood)

    # KL divergence between two Dirichlet distributions
    a1 = tf.clip_by_value(mix_parameters, 0.1, 0.8)
    a2 = tf.constant((0.25, 0.25, 0.25, 0.25), shape=(batch_size, 4))

    r = tf.reduce_sum(
        (a1 - a2) * (tf.polygamma(0.0, a1) - tf.polygamma(0.0, 1)), axis=1)
    a = tf.lgamma(tf.reduce_sum(a1, axis=1)) - tf.lgamma(
        tf.reduce_sum(a2, axis=1)) + tf.reduce_sum(
            tf.lgamma(a2), axis=-1) - tf.reduce_sum(tf.lgamma(a1), axis=1) + r
    kl = a
    kl = tf.reduce_mean(kl)

    p1 = 1
    p2 = 1
    p4 = 1
    ELBO = marginal_likelihood - KL_divergence * p2

    loss = -ELBO + kl * p1 + p4 * dHSIC_Value + dropout_regularizer

    z = z1_samples
    return y, z, loss, -marginal_likelihood, dropout_regularizer, dropout_p, dropout_samples
예제 #12
0
import tensorflow as tf

"""tf.polygamma(a,x,name=None)
功能:计算psi^{(a)}(x),psi^{(a)}(x) = ({d^a}/{dx^a})*psi(x),psi即为polygamma。    
输入:x为张量,可以为`float32`, `float64`类型。a=tf.constant(1,tf.float64) """

a = tf.constant(1, tf.float64)
x = tf.constant([[1, 2, 3, 4]], tf.float64)
z = tf.polygamma(a, x)

sess = tf.Session()
print(sess.run(z))
sess.close()
# z==>[[1.64493407 0.64493407 0.39493407 0.28382296]]
def autoencoder(x_hat, x, dim_img, dim_z, n_hidden, keep_prob, last_term):
    # encoding
    mu1, sigma1, mix1 = Create_Celeba_Encoder(x_hat, 64, "encoder1")
    mu2, sigma2, mix2 = Create_Celeba_Encoder(x_hat, 64, "encoder2")
    mu3, sigma3, mix3 = Create_Celeba_Encoder(x_hat, 64, "encoder3")
    mu4, sigma4, mix4 = Create_Celeba_Encoder(x_hat, 64, "encoder4")
    mu5, sigma5, mix5 = Create_Celeba_Encoder(x_hat, 64, "encoder5")
    mu6, sigma6, mix6 = Create_Celeba_Encoder(x_hat, 64, "encoder6")

    z1 = distributions.Normal(loc=mu1, scale=sigma1)
    z2 = distributions.Normal(loc=mu2, scale=sigma2)
    z3 = distributions.Normal(loc=mu3, scale=sigma3)
    z4 = distributions.Normal(loc=mu4, scale=sigma4)
    z5 = distributions.Normal(loc=mu5, scale=sigma5)
    z6 = distributions.Normal(loc=mu6, scale=sigma6)

    init_min = 0.1
    init_max = 0.1
    init_min = (np.log(init_min) - np.log(1. - init_min))
    init_max = (np.log(init_max) - np.log(1. - init_max))
    dropout_a = tf.get_variable(name='dropout',
                                shape=None,
                                initializer=tf.random_uniform((1, ), init_min,
                                                              init_max),
                                dtype=tf.float32,
                                trainable=True)
    dropout_p = tf.nn.sigmoid(dropout_a)

    dropout_b = 1 - dropout_p
    dropout_log = tf.log(dropout_p)
    dropout_log2 = tf.log(dropout_b)

    cats_range = np.zeros((batch_size * 6, 2))
    cats_range[:, 0] = 0
    cats_range[:, 1] = 1
    dropout_samples = gumbel_softmax_sample3(dropout_log, dropout_log2,
                                             cats_range, [batch_size * 6])
    dropout_samples = tf.reshape(dropout_samples, (-1, 6))

    dropout_regularizer = dropout_p * tf.log(dropout_p)
    dropout_regularizer += (1. - dropout_p) * tf.log(1. - dropout_p)
    dropout_regularizer *= dropout_regularizer * 10 * -1
    dropout_regularizer = tf.clip_by_value(dropout_regularizer, -10, 0)

    mix1 = mix1 * dropout_samples[:, 0:1]
    mix2 = mix2 * dropout_samples[:, 1:2]
    mix3 = mix3 * dropout_samples[:, 2:3]
    mix4 = mix4 * dropout_samples[:, 3:4]
    mix5 = mix5 * dropout_samples[:, 4:5]
    mix6 = mix6 * dropout_samples[:, 5:6]

    sum1 = mix1 + mix2 + mix3 + mix4 + mix5 + mix6
    mix1 = mix1 / sum1
    mix2 = mix2 / sum1
    mix3 = mix3 / sum1
    mix4 = mix4 / sum1
    mix5 = mix5 / sum1
    mix6 = mix6 / sum1

    sum1 = mix1 + mix2 + mix3 + mix4 + mix5 + mix6
    mix1 = mix1 / sum1
    mix2 = mix2 / sum1
    mix3 = mix3 / sum1
    mix4 = mix4 / sum1
    mix5 = mix5 / sum1
    mix6 = mix6 / sum1

    mix = tf.concat([mix1, mix2, mix3, mix4, mix5, mix6], 1)
    mix_parameters = mix
    dist = tf.distributions.Dirichlet(mix)
    mix_samples = dist.sample()
    mix = mix_samples

    # sampling by re-parameterization technique
    # z = mu + sigma * tf.random_normal(tf.shape(mu), 0, 1, dtype=tf.float32)

    z1_samples = z1.sample()
    z2_samples = z2.sample()
    z3_samples = z3.sample()
    z4_samples = z4.sample()
    z5_samples = z5.sample()
    z6_samples = z6.sample()

    ttf = []
    ttf.append(z1_samples)
    ttf.append(z2_samples)
    ttf.append(z3_samples)
    ttf.append(z4_samples)
    ttf.append(z5_samples)
    ttf.append(z6_samples)

    dHSIC_Value = dHSIC(ttf)

    # decoding
    y1 = Create_Celeba_SubDecoder_(z1_samples, 64, "decoder1")
    y2 = Create_Celeba_SubDecoder_(z2_samples, 64, "decoder2")
    y3 = Create_Celeba_SubDecoder_(z3_samples, 64, "decoder3")
    y4 = Create_Celeba_SubDecoder_(z4_samples, 64, "decoder4")
    y5 = Create_Celeba_SubDecoder_(z5_samples, 64, "decoder5")
    y6 = Create_Celeba_SubDecoder_(z6_samples, 64, "decoder6")

    y1 = tf.reshape(y1, (-1, 8 * 8 * 256))
    y2 = tf.reshape(y2, (-1, 8 * 8 * 256))
    y3 = tf.reshape(y3, (-1, 8 * 8 * 256))
    y4 = tf.reshape(y4, (-1, 8 * 8 * 256))
    y5 = tf.reshape(y5, (-1, 8 * 8 * 256))
    y6 = tf.reshape(y6, (-1, 8 * 8 * 256))

    # dropout out
    y1 = y1 * mix_samples[:, 0:1]
    y2 = y2 * mix_samples[:, 1:2]
    y3 = y3 * mix_samples[:, 2:3]
    y4 = y4 * mix_samples[:, 3:4]
    y5 = y5 * mix_samples[:, 4:5]
    y6 = y6 * mix_samples[:, 5:6]

    y1 = tf.reshape(y1, (batch_size, 8, 8, 256))
    y2 = tf.reshape(y2, (batch_size, 8, 8, 256))
    y3 = tf.reshape(y3, (batch_size, 8, 8, 256))
    y4 = tf.reshape(y4, (batch_size, 8, 8, 256))
    y5 = tf.reshape(y5, (batch_size, 8, 8, 256))
    y6 = tf.reshape(y6, (batch_size, 8, 8, 256))

    y = y1 + y2 + y3 + y4 + y5 + y6
    y = Create_Celeba_Generator_(y, 64, "final")

    m1 = np.zeros(dim_z, dtype=np.float32)
    m1[:] = 0
    v1 = np.zeros(dim_z, dtype=np.float32)
    v1[:] = 1

    # p_z1 = distributions.Normal(loc=np.zeros(dim_z, dtype=np.float32),
    #                           scale=np.ones(dim_z, dtype=np.float32))
    p_z1 = distributions.Normal(loc=m1, scale=v1)

    m2 = np.zeros(dim_z, dtype=np.float32)
    m2[:] = 0
    v2 = np.zeros(dim_z, dtype=np.float32)
    v2[:] = 1
    p_z2 = distributions.Normal(loc=m2, scale=v2)

    m3 = np.zeros(dim_z, dtype=np.float32)
    m3[:] = 0
    v3 = np.zeros(dim_z, dtype=np.float32)
    v3[:] = 1
    p_z3 = distributions.Normal(loc=m3, scale=v3)

    m4 = np.zeros(dim_z, dtype=np.float32)
    m4[:] = 0
    v4 = np.zeros(dim_z, dtype=np.float32)
    v4[:] = 1
    p_z4 = distributions.Normal(loc=m4, scale=v4)

    z = z1

    mu = mu1
    sigma = sigma1
    epsilon = 1e-8

    # additional loss
    reconstruction_loss = tf.reduce_mean(
        tf.reduce_sum(tf.square(x - y), [1, 2, 3]))
    # kl_divergence = tf.reduce_mean(- 0.5 * tf.reduce_sum(1 + sigma - tf.square(mu) - tf.exp(sigma), 1))
    kl1 = tf.reduce_mean(
        tf.reduce_sum(distributions.kl_divergence(z1, p_z1), 1))
    kl2 = tf.reduce_mean(
        tf.reduce_sum(distributions.kl_divergence(z2, p_z2), 1))
    kl3 = tf.reduce_mean(
        tf.reduce_sum(distributions.kl_divergence(z3, p_z3), 1))
    kl4 = tf.reduce_mean(
        tf.reduce_sum(distributions.kl_divergence(z4, p_z4), 1))
    kl5 = tf.reduce_mean(
        tf.reduce_sum(distributions.kl_divergence(z5, p_z4), 1))
    kl6 = tf.reduce_mean(
        tf.reduce_sum(distributions.kl_divergence(z6, p_z4), 1))

    kl = kl1 + kl2 + kl3 + kl4 + kl5 + kl6
    kl_divergence = kl / 6.0

    # KL divergence between two Dirichlet distributions
    a1 = tf.clip_by_value(mix_parameters, 0.1, 0.8)
    a2 = tf.constant((0.17, 0.17, 0.17, 0.17, 0.17, 0.17),
                     shape=(batch_size, 6))

    r = tf.reduce_sum(
        (a1 - a2) * (tf.polygamma(0.0, a1) - tf.polygamma(0.0, 1)), axis=1)
    a = tf.lgamma(tf.reduce_sum(a1, axis=1)) - tf.lgamma(
        tf.reduce_sum(a2, axis=1)) + tf.reduce_sum(
            tf.lgamma(a2), axis=-1) - tf.reduce_sum(tf.lgamma(a1), axis=1) + r
    kl = a
    kl = tf.reduce_mean(kl)

    p1 = 1

    loss = reconstruction_loss + kl_divergence * p1 + kl + dHSIC_Value + dropout_regularizer
    KL_divergence = kl_divergence
    marginal_likelihood = reconstruction_loss

    return y, z, loss, -marginal_likelihood, kl_divergence, dropout_p, dropout_samples
예제 #14
0
tf.strided_slice_grad()

tf.gather()
tf.gather_nd()
tf.gather_v2()
tf.get_summary_op()
tf.gradients()
tf.boolean_mask()
tf.sparse_mask()
tf.sequence_mask()

tf.random_gamma()
tf.digamma()
tf.igamma()
tf.lgamma()
tf.polygamma()
tf.igammac()

tf.tensor_shape.as_shape()

# gfile
tf.gfile.Copy()
tf.gfile.DeleteRecursively()
tf.gfile.Exists()
tf.gfile.Glob()
tf.gfile.IsDirectory()
tf.gfile.ListDirectory()
tf.gfile.MakeDirs()
tf.gfile.MkDir()
tf.gfile.Remove()
tf.gfile.Rename()