Ejemplo n.º 1
0
 def __init__(self,
              num,
              definition,
              mean=0,
              stdev=None,
              internal_rng=False):
     self.mean = mean
     if len(definition) != 1:
         raise ValueError(
             'definition should have 1 parameter (dim), not %d' %
             len(definition))
     try:
         dim = int(definition[0])
     except ValueError:
         raise ValueError('non-integer dim: %s' % dim)
     if stdev is None:
         var = 2 * np.log(2)
         stdev = var**0.5
     else:
         var = stdev**2
     self.var, self.stdev = (floatX(x) for x in (var, stdev))
     self.recon_dim = self.sample_dim = dim
     self.num = num
     if internal_rng:
         self.placeholders = [
             t_rng.normal(size=(num, dim), avg=mean, std=self.stdev)
         ]
     else:
         self.placeholders = [T.matrix()]
     self.flat_data = [Output(self.placeholders[0], shape=(num, dim))]
Ejemplo n.º 2
0
def marginal_loglikelihood(X, num_samples = 512):
    mu, log_sigma = conv_encoder(X, *enc_params)

    epsilon_shape = (num_samples, X.shape[0], mu.shape[1])
    epsilon = t_rng.normal(epsilon_shape)

    mu = mu.dimshuffle('x', 0, 1)
    log_sigma = log_sigma.dimshuffle('x', 0, 1)
    #log_sigma = log_sigma * 2.

    # compute z
    z = mu + T.exp(0.5 * log_sigma) * epsilon

    # Decode p(x | z) in roder to do flatten MLP compatible
    flat_z = z.reshape((epsilon.shape[0] * epsilon.shape[1],
            epsilon.shape[2]))

    reconstructed_x, _ = conv_decoder(X, flat_z, *dec_params)
    reconstructed_x = reconstructed_x.reshape((epsilon.shape[0], epsilon.shape[1], X.shape[1] * X.shape[2] * X.shape[3]))

    # compute log-probabilities
    log_q_z_x = -0.5 * (T.log(2 * math.pi) + log_sigma + (z - mu) ** 2 / T.exp(log_sigma)).sum(axis=2)
    log_p_z = -0.5 * (T.log(2 * math.pi) + (z ** 2)).sum(axis=2)

    # if self.continuous:
    #     # need to rewrite and finish this
    #     log_p_x_z = -0.5 * (T.log(2 * math.pi) + self.gauss_sigma + (X.dimshuffle('x', 0, 1) - reconstructed_x) ** 2 /T.exp(self.gauss_sigma)).sum(axis=2)
    # else:
    X_flatten = X.flatten(2)
    log_p_x_z = - T.nnet.binary_crossentropy(reconstructed_x, X_flatten.dimshuffle('x', 0, 1)).sum(axis=2)

    return T.mean( log_sum_exp(
            log_p_z + log_p_x_z - log_q_z_x,
            axis=0
            ) - T.log(T.cast(num_samples, 'float32'))  )
Ejemplo n.º 3
0
def sampler(mu, log_sigma):

    eps = t_rng.normal(mu.shape)
    # Reparametrize
    z = mu + T.exp(0.5 * log_sigma) * eps
    # z = mu + T.exp(log_sigma) * eps
    return z
Ejemplo n.º 4
0
 def logits_to_sample(self, recon_logits):
     recon_mean = recon_logits[:, :self.slice_point]
     recon_log_var = recon_logits[:, self.slice_point:]
     if self.log_var_bias != 0:
         recon_log_var += self.log_var_bias
     recon_logstd = recon_log_var / 2
     recon_std = T.exp(recon_logstd)
     standard_sample = t_rng.normal(size=recon_mean.shape)
     sample = recon_mean + standard_sample * recon_std
     sample = [Output(sample, (self.num, self.sample_dim))]
     return sample
Ejemplo n.º 5
0
 def generator_function(hidden_data, is_train=True):
     # layer 0 (linear)
     h0     = T.dot(hidden_data, linear_w0)
     h0     = h0 + t_rng.normal(size=h0.shape, std=0.01, dtype=t_floatX)
     h0     = relu(batchnorm(X=h0, g=linear_bn_w0, b=linear_bn_b0))
     h0     = h0.reshape((h0.shape[0], num_gen_filters0, init_image_size, init_image_size))
     # layer 1 (deconv)
     h1     = deconv(h0, conv_w1, subsample=(2, 2), border_mode=(2, 2))
     h1     = h1 + t_rng.normal(size=h1.shape, std=0.01, dtype=t_floatX)
     h1     = relu(batchnorm(h1, g=conv_bn_w1, b=conv_bn_b1))
     # layer 2 (deconv)
     h2     = deconv(h1, conv_w2, subsample=(2, 2), border_mode=(2, 2))
     h2     = h2 + t_rng.normal(size=h2.shape, std=0.01, dtype=t_floatX)
     h2     = relu(batchnorm(h2, g=conv_bn_w2, b=conv_bn_b2))
     # layer 3 (deconv)
     h3     = deconv(h2, conv_w3, subsample=(2, 2), border_mode=(2, 2))
     h3     = h3 + t_rng.normal(size=h3.shape, std=0.01, dtype=t_floatX)
     h3     = relu(batchnorm(h3, g=conv_bn_w3, b=conv_bn_b3))
     # layer 4 (deconv)
     output = tanh(deconv(h3, conv_w4, subsample=(2, 2), border_mode=(2, 2))+conv_b4.dimshuffle('x', 0, 'x', 'x'))
     return output
Ejemplo n.º 6
0
def generate_captions(
        As_words, As_masks, h_enc, gen_init0_lang, gen_init0_lang_Y, Qs_masks,
        U_attention_gen, W_attention_gen, b_attention_gen, v_attention_gen,
        W_init_h0, b_init_h0, W_init_c0, b_init_c0, W1_M, b1_M, W2_M, WM_mu_zt,
        bM_mu_zt, WM_sigma_zt, bM_sigma_zt, W3_M, b3_M, Wp_M_mu, bp_M_mu,
        Wp_M_sigma, bp_M_sigma, W1_M0, b1_M0, W2_M0, WM_mu_zt0, bM_mu_zt0,
        WM_sigma_zt0, bM_sigma_zt0, W3_M0, b3_M0, Wp_M_mu0, bp_M_mu0,
        Wp_M_sigma0, bp_M_sigma0, W_LSTM_hidden_gen, W_LSTM_in_gen, b_LSTM_gen,
        W_word_gen, b_word_gen, W_softmax_gen, b_softmax_gen, W_bow1, b_bow1,
        W_bow2, b_bow2, W_softmax_bow, b_softmax_bow, W_bow1t, b_bow1t,
        W_bow2t, b_bow2t, W_softmax_bowt, b_softmax_bowt):
    ###Discourse - level###
    ###calculate Q(zd|Y,X)  : X gen_init0_lang   Y gen_init0_lang_Y
    m_10 = lrelu(
        T.dot(gen_init0_lang, W1_M0) + T.dot(gen_init0_lang_Y, W2_M0) +
        b1_M0)  #  batch_size x 2*lstm    lstm

    u_zt0 = T.dot(m_10, WM_mu_zt0) + bM_mu_zt0  # batch_size x lstm
    log_sigma_zt0 = T.dot(m_10, WM_sigma_zt0) + bM_sigma_zt0

    #sample Q(Zd)
    eps0 = t_rng.normal(size=(u_zt0.shape[0], u_zt0.shape[1]),
                        avg=0.0,
                        std=1.0,
                        dtype=theano.config.floatX)
    Zt0 = u_zt0 + T.exp(log_sigma_zt0) * eps0  #batch_size x dim_atten

    ########################calculate BOWs loss

    t_bow1 = lrelu(
        T.dot(Zt0, W_bow1) + b_bow1
    )  #batch * middle_dim  W_bow1, b_bow1, W_bow2,  b_bow2,  W_softmax_bow,  b_softmax_bow
    t_bow2 = lrelu(T.dot(t_bow1, W_bow2) + b_bow2)
    word_soft_bow = T.dot(t_bow2, W_softmax_bow) + b_softmax_bow
    bow_K = T.nnet.softmax(word_soft_bow)

    #calculate p(Zd)
    h_prior_00 = lrelu(T.dot(gen_init0_lang, W3_M0) +
                       b3_M0)  #batch_size x dim_atten
    u_0t0 = T.dot(h_prior_00, Wp_M_mu0) + bp_M_mu0
    log_sigma_0t0 = T.dot(h_prior_00, Wp_M_sigma0) + bp_M_sigma0

    #calculate KL_d
    KL_t0 = (log_sigma_0t0 - log_sigma_zt0) + (
        (T.exp(2 * log_sigma_zt0) +
         (u_zt0 - u_0t0)**2) / (2 * T.exp(2 * log_sigma_0t0))) - 0.5
    KL_t0 = T.sum(KL_t0)

    KL_t0 = (KL_t0 / u_0t0.shape[0]).astype(theano.config.floatX)

    LSTM_h0 = T.tanh(T.dot(Zt0, W_init_h0) + b_init_h0)
    cell0 = T.tanh(T.dot(Zt0, W_init_c0) + b_init_c0)

    word0 = (T.extra_ops.repeat(word_start, repeats=As_words.shape[1],
                                axis=1)).astype(theano.config.floatX)

    this_real_words = T.concatenate([word0, As_words], axis=0)

    eps_list = t_rng.normal(size=(As_masks.shape[0], Zt0.shape[0],
                                  Zt0.shape[1]),
                            avg=0.0,
                            std=1.0,
                            dtype=theano.config.floatX)

    def recurrence(word_t_prior, word_t, t_mask, eps, h_t_prior, c_t_prior,
                   z_t_prior, W_LSTM_in_gen, W_LSTM_hidden_gen, b_LSTM_gen,
                   W1_M, W2_M, b1_M, WM_mu_zt, bM_mu_zt, WM_sigma_zt,
                   bM_sigma_zt, W3_M, b3_M, Wp_M_mu, bp_M_mu, Wp_M_sigma,
                   bp_M_sigma):

        ################################################ calculate input
        word_t_prior = T.concatenate([word_t_prior, z_t_prior], axis=1)

        lstm_t = T.dot(h_t_prior, W_LSTM_hidden_gen) + T.dot(
            word_t_prior, W_LSTM_in_gen) + b_LSTM_gen
        i_t_enc = T.nnet.sigmoid(lstm_t[:, 0 * n_LSTM:1 * n_LSTM])
        f_t_enc = T.nnet.sigmoid(lstm_t[:, 1 * n_LSTM:2 * n_LSTM])

        cell_t_enc = f_t_enc * c_t_prior + i_t_enc * T.tanh(
            lstm_t[:, 2 * n_LSTM:3 * n_LSTM])
        cell_t_enc = t_mask.dimshuffle([0, 'x']) * cell_t_enc + (
            1. - t_mask.dimshuffle([0, 'x'])) * c_t_prior

        o_t_enc = T.nnet.sigmoid(lstm_t[:, 3 * n_LSTM:4 * n_LSTM])

        h_t = o_t_enc * T.tanh(cell_t_enc)
        h_t = t_mask.dimshuffle(
            [0, 'x']) * h_t + (1. - t_mask.dimshuffle([0, 'x'])) * h_t_prior

        ###################################Word - level###

        m_1 = lrelu(T.dot(h_t, W1_M) + T.dot(word_t, W2_M) +
                    b1_M)  #  using h_t    T_dec  x batch_size x dim_atten

        u_zt = T.dot(m_1,
                     WM_mu_zt) + bM_mu_zt  #T_dec  x batch_size x dim_atten
        log_sigma_zt = T.dot(m_1, WM_sigma_zt) + bM_sigma_zt

        #sample Q(Zwt)

        z_w_t = u_zt + T.exp(
            log_sigma_zt) * eps  #T_dec  x batch_size x dim_atten

        #calculate p(Zwt)
        h_prior_0 = lrelu(T.dot(h_t, W3_M) +
                          b3_M)  #T_dec  x batch_size x dim_atten
        u_0t = T.dot(h_prior_0, Wp_M_mu) + bp_M_mu
        log_sigma_0t = T.dot(h_prior_0, Wp_M_sigma) + bp_M_sigma

        #calculate KL_t  using : mask_t[:, None]
        KL_t = (log_sigma_0t - log_sigma_zt) + (
            (T.exp(2 * log_sigma_zt) +
             (u_zt - u_0t)**2) / (2 * T.exp(2 * log_sigma_0t))) - 0.5
        KL_t = T.sum(KL_t * t_mask.dimshuffle([0, 'x']))

        KL_t = (KL_t / h_t.shape[0]).astype(theano.config.floatX)

        return h_t.astype(theano.config.floatX), cell_t_enc.astype(
            theano.config.floatX), z_w_t.astype(
                theano.config.floatX), KL_t.astype(theano.config.floatX)

    (h_list, _, Zt, KL_t_list), _ = theano.scan(
        recurrence,
        sequences=[this_real_words[0:-1], As_words, As_masks, eps_list],
        outputs_info=[LSTM_h0, cell0, Zt0, None],
        non_sequences=[
            W_LSTM_in_gen, W_LSTM_hidden_gen, b_LSTM_gen, W1_M, W2_M, b1_M,
            WM_mu_zt, bM_mu_zt, WM_sigma_zt, bM_sigma_zt, W3_M, b3_M, Wp_M_mu,
            bp_M_mu, Wp_M_sigma, bp_M_sigma
        ],
        n_steps=As_masks.shape[0],
        strict=True)

    hid_align = T.dot(h_enc, U_attention_gen)  # T_enc*Batch* dimAtten

    h_t_info = T.concatenate([Zt, this_real_words[0:-1]],
                             axis=2)  # T_dec*Batch* (n_LSTM+dim word)

    hdec_align = T.dot(h_t_info, W_attention_gen)  # T_dec*Batch* dimAtten

    all_align = T.tanh(
        hid_align.dimshuffle([0, 'x', 1, 2]) +
        hdec_align.dimshuffle(['x', 0, 1, 2]) +
        b_attention_gen.dimshuffle(['x', 'x', 'x', 0]))
    # T_enc x T_dec x batch_size x dimAttention

    e = all_align * v_attention_gen.dimshuffle(['x', 'x', 'x', 0])
    e = e.sum(axis=3) * Qs_masks.dimshuffle(
        [0, 'x', 1])  # (T_enc_2M) x T_dec x batch_size
    e = e.dimshuffle([1, 2, 0])  # T_dec x batch_size x T_enc

    e2 = T.reshape(e, [e.shape[0] * e.shape[1], e.shape[2]],
                   ndim=2)  # (T_dec x batch_size) x T_enc

    # normalize
    alpha = T.nnet.softmax(e2)  #  #  (T_dec x batch_size) * T_enc

    alpha = T.reshape(alpha, [e.shape[0], e.shape[1], e.shape[2]],
                      ndim=3)  #  T_dec x batch_size * T_enc

    attention_enc = alpha.dimshuffle([0, 2, 1, 'x']) * h_enc.dimshuffle(
        ['x', 0, 1, 2])  #  T_dec x T_enc x batch_size x h_dim
    attention_enc = attention_enc.sum(
        axis=1
    )  # T_dec x T_enc x batch_size x h_dim --> T_dec  x batch_size x h_dim

    ################################  word
    prepare_word = T.concatenate([attention_enc, h_list, Zt], axis=2)

    word_t = lrelu(T.dot(prepare_word, W_word_gen) +
                   b_word_gen)  #T * batch * middle_dim
    word_soft = T.dot(word_t, W_softmax_gen) + b_softmax_gen
    word_soft_K = T.nnet.softmax(
        T.reshape(
            word_soft,
            [word_soft.shape[0] * word_soft.shape[1], word_soft.shape[2]],
            ndim=2))

    ################################# Auxiliary-path

    t_bow1t = lrelu(
        T.dot(Zt, W_bow1t) + b_bow1t
    )  #batch * middle_dim  W_bow1, b_bow1, W_bow2,  b_bow2,  W_softmax_bow,  b_softmax_bow
    t_bow2t = lrelu(T.dot(t_bow1t, W_bow2t) + b_bow2t)
    word_soft_bowt = T.dot(t_bow2t, W_softmax_bowt) + b_softmax_bowt
    word_soft_K_Zt = T.nnet.softmax(
        T.reshape(word_soft_bowt, [
            word_soft_bowt.shape[0] * word_soft_bowt.shape[1],
            word_soft_bowt.shape[2]
        ],
                  ndim=2))

    return word_soft_K, (KL_t0).astype(
        theano.config.floatX), (T.sum(KL_t_list)).astype(
            theano.config.floatX), (bow_K).astype(
                theano.config.floatX), word_soft_K_Zt.astype(
                    theano.config.floatX)  ### (T *batch ) * n_word_dict
def generate_next(h_t_prior, word_t_prior, z_t_prior, c_t_prior, Qs_masks,
                  h_enc, hid_align, W_LSTM_in_gen, W_LSTM_hidden_gen,
                  b_LSTM_gen, W_attention_gen, b_attention_gen,
                  v_attention_gen, W_word_gen, b_word_gen, W_softmax_gen,
                  b_softmax_gen, W3_M, b3_M, Wp_M_mu, bp_M_mu, Wp_M_sigma,
                  bp_M_sigma):  #x_temp :  batch_size * dim_features

    ################################################ calculate input
    word_t_prior2 = T.concatenate([word_t_prior, z_t_prior], axis=1)

    lstm_t = T.dot(h_t_prior, W_LSTM_hidden_gen) + T.dot(
        word_t_prior2, W_LSTM_in_gen) + b_LSTM_gen
    i_t_enc = T.nnet.sigmoid(lstm_t[:, 0 * n_LSTM:1 * n_LSTM])
    f_t_enc = T.nnet.sigmoid(lstm_t[:, 1 * n_LSTM:2 * n_LSTM])

    cell_t_enc = f_t_enc * c_t_prior + i_t_enc * T.tanh(
        lstm_t[:, 2 * n_LSTM:3 * n_LSTM])
    #cell_t_enc = t_mask.dimshuffle([0, 'x']) * cell_t_enc + (1. - t_mask.dimshuffle([0, 'x'])) * c_t_prior

    o_t_enc = T.nnet.sigmoid(lstm_t[:, 3 * n_LSTM:4 * n_LSTM])

    h_list = o_t_enc * T.tanh(cell_t_enc)
    #h_t = t_mask.dimshuffle([0, 'x']) * h_t + (1. - t_mask.dimshuffle([0, 'x'])) * h_t_prior

    #################################VAE   VAE calculate p(Zt)
    h_prior_0 = lrelu(T.dot(h_list, W3_M) +
                      b3_M)  #T_dec  x batch_size x dim_atten
    u_0t = T.dot(h_prior_0, Wp_M_mu) + bp_M_mu
    log_sigma_0t = T.dot(h_prior_0, Wp_M_sigma) + bp_M_sigma

    eps = t_rng.normal(size=(u_0t.shape[0], u_0t.shape[1]),
                       avg=0.0,
                       std=1.0,
                       dtype=theano.config.floatX)
    Zt = u_0t + T.exp(log_sigma_0t) * eps  #T_dec  x batch_size x dim_atten

    #################################

    #hid_align = T.dot(h_enc, U_attention_gen)  # T_enc*Batch* dimAtten

    h_t_info = T.concatenate([Zt, word_t_prior], axis=1)

    hdec_align = T.dot(h_t_info, W_attention_gen)  # *Batch* dimAtten

    all_align = T.tanh(hid_align + hdec_align.dimshuffle(['x', 0, 1]) +
                       b_attention_gen.dimshuffle(['x', 'x', 0]))
    # T_enc  x batch_size x dimAttention

    e = all_align * v_attention_gen.dimshuffle(['x', 'x', 0])
    e = e.sum(axis=2) * Qs_masks  # T_enc  x batch_size

    # normalize
    alpha = T.nnet.softmax(e.T)  #  #  (batch_size) * T_enc

    # conv_feature representation at time T
    attention_enc = alpha.dimshuffle([1, 0, 'x'
                                      ]) * h_enc  # T_enc x batch_size x h_dim
    attention_enc = attention_enc.sum(
        axis=0
    )  # T_dec x T_enc x batch_size x h_dim --> T_dec  x batch_size x h_dim

    prepare_word = T.concatenate([attention_enc, h_list, Zt], axis=1)

    word_t = lrelu(T.dot(prepare_word, W_word_gen) +
                   b_word_gen)  #T * batch * middle_dim
    word_soft = T.dot(word_t, W_softmax_gen) + b_softmax_gen
    word_soft_K = T.nnet.softmax(word_soft)

    return word_soft_K.astype(theano.config.floatX), h_list.astype(
        theano.config.floatX), cell_t_enc.astype(
            theano.config.floatX), Zt.astype(theano.config.floatX)
#As_word_list_flat = T.flatten(As_word_list.T,outdim=1) #words x #samples
#As_word_vecs = shared_Word_vecs[As_word_list_flat].reshape([As_word_list.shape[1], As_word_list.shape[0], n_word_dim]) # T * batch * n_dim

h_t_lang, gen_init0_lang = encoder_network(Qs_word_vecs, Qs_mask.T,
                                           *enc_params)  # batch * n_LSTM

#calculate p(Zt)
h_prior_00 = lrelu(T.dot(gen_init0_lang, W3_M0) +
                   b3_M0)  #batch_size x dim_atten
u_0t0 = T.dot(h_prior_00, Wp_M_mu0) + bp_M_mu0
log_sigma_0t0 = T.dot(h_prior_00, Wp_M_sigma0) + bp_M_sigma0

scale_Z = T.scalar('scale_Z', dtype='float32')
eps = t_rng.normal(size=(u_0t0.shape[0], u_0t0.shape[1]),
                   avg=0.0,
                   std=1.0,
                   dtype=theano.config.floatX)
#eps = t_rng.binomial(size=(u_0t.shape[0],u_0t.shape[1]), p=0.5, dtype=theano.config.floatX)
Zt = (u_0t0 + T.exp(log_sigma_0t0) * eps * scale_Z).astype(
    theano.config.floatX)

LSTM_h0 = T.tanh(T.dot(Zt, W_init_h0) + b_init_h0)
cell0 = T.tanh(T.dot(Zt, W_init_c0) + b_init_c0)
################################

word0 = (T.extra_ops.repeat(word_start, repeats=Qs_word_list.shape[0],
                            axis=1)).astype(theano.config.floatX)

#Total_M_h_enc= T.concatenate([Total_m0.dimshuffle([1, 0, 2]),h_t_lang], axis=0)
#Qs_mask_in= T.concatenate([T.ones((Total_m0.shape[1],Total_m0.shape[0]),dtype=theano.config.floatX), Qs_mask.T], axis=0)   # Qs_mask: batch * T