def __init__(self, num, definition, mean=0, stdev=None, internal_rng=False): self.mean = mean if len(definition) != 1: raise ValueError( 'definition should have 1 parameter (dim), not %d' % len(definition)) try: dim = int(definition[0]) except ValueError: raise ValueError('non-integer dim: %s' % dim) if stdev is None: var = 2 * np.log(2) stdev = var**0.5 else: var = stdev**2 self.var, self.stdev = (floatX(x) for x in (var, stdev)) self.recon_dim = self.sample_dim = dim self.num = num if internal_rng: self.placeholders = [ t_rng.normal(size=(num, dim), avg=mean, std=self.stdev) ] else: self.placeholders = [T.matrix()] self.flat_data = [Output(self.placeholders[0], shape=(num, dim))]
def marginal_loglikelihood(X, num_samples = 512): mu, log_sigma = conv_encoder(X, *enc_params) epsilon_shape = (num_samples, X.shape[0], mu.shape[1]) epsilon = t_rng.normal(epsilon_shape) mu = mu.dimshuffle('x', 0, 1) log_sigma = log_sigma.dimshuffle('x', 0, 1) #log_sigma = log_sigma * 2. # compute z z = mu + T.exp(0.5 * log_sigma) * epsilon # Decode p(x | z) in roder to do flatten MLP compatible flat_z = z.reshape((epsilon.shape[0] * epsilon.shape[1], epsilon.shape[2])) reconstructed_x, _ = conv_decoder(X, flat_z, *dec_params) reconstructed_x = reconstructed_x.reshape((epsilon.shape[0], epsilon.shape[1], X.shape[1] * X.shape[2] * X.shape[3])) # compute log-probabilities log_q_z_x = -0.5 * (T.log(2 * math.pi) + log_sigma + (z - mu) ** 2 / T.exp(log_sigma)).sum(axis=2) log_p_z = -0.5 * (T.log(2 * math.pi) + (z ** 2)).sum(axis=2) # if self.continuous: # # need to rewrite and finish this # log_p_x_z = -0.5 * (T.log(2 * math.pi) + self.gauss_sigma + (X.dimshuffle('x', 0, 1) - reconstructed_x) ** 2 /T.exp(self.gauss_sigma)).sum(axis=2) # else: X_flatten = X.flatten(2) log_p_x_z = - T.nnet.binary_crossentropy(reconstructed_x, X_flatten.dimshuffle('x', 0, 1)).sum(axis=2) return T.mean( log_sum_exp( log_p_z + log_p_x_z - log_q_z_x, axis=0 ) - T.log(T.cast(num_samples, 'float32')) )
def sampler(mu, log_sigma): eps = t_rng.normal(mu.shape) # Reparametrize z = mu + T.exp(0.5 * log_sigma) * eps # z = mu + T.exp(log_sigma) * eps return z
def logits_to_sample(self, recon_logits): recon_mean = recon_logits[:, :self.slice_point] recon_log_var = recon_logits[:, self.slice_point:] if self.log_var_bias != 0: recon_log_var += self.log_var_bias recon_logstd = recon_log_var / 2 recon_std = T.exp(recon_logstd) standard_sample = t_rng.normal(size=recon_mean.shape) sample = recon_mean + standard_sample * recon_std sample = [Output(sample, (self.num, self.sample_dim))] return sample
def generator_function(hidden_data, is_train=True): # layer 0 (linear) h0 = T.dot(hidden_data, linear_w0) h0 = h0 + t_rng.normal(size=h0.shape, std=0.01, dtype=t_floatX) h0 = relu(batchnorm(X=h0, g=linear_bn_w0, b=linear_bn_b0)) h0 = h0.reshape((h0.shape[0], num_gen_filters0, init_image_size, init_image_size)) # layer 1 (deconv) h1 = deconv(h0, conv_w1, subsample=(2, 2), border_mode=(2, 2)) h1 = h1 + t_rng.normal(size=h1.shape, std=0.01, dtype=t_floatX) h1 = relu(batchnorm(h1, g=conv_bn_w1, b=conv_bn_b1)) # layer 2 (deconv) h2 = deconv(h1, conv_w2, subsample=(2, 2), border_mode=(2, 2)) h2 = h2 + t_rng.normal(size=h2.shape, std=0.01, dtype=t_floatX) h2 = relu(batchnorm(h2, g=conv_bn_w2, b=conv_bn_b2)) # layer 3 (deconv) h3 = deconv(h2, conv_w3, subsample=(2, 2), border_mode=(2, 2)) h3 = h3 + t_rng.normal(size=h3.shape, std=0.01, dtype=t_floatX) h3 = relu(batchnorm(h3, g=conv_bn_w3, b=conv_bn_b3)) # layer 4 (deconv) output = tanh(deconv(h3, conv_w4, subsample=(2, 2), border_mode=(2, 2))+conv_b4.dimshuffle('x', 0, 'x', 'x')) return output
def generate_captions( As_words, As_masks, h_enc, gen_init0_lang, gen_init0_lang_Y, Qs_masks, U_attention_gen, W_attention_gen, b_attention_gen, v_attention_gen, W_init_h0, b_init_h0, W_init_c0, b_init_c0, W1_M, b1_M, W2_M, WM_mu_zt, bM_mu_zt, WM_sigma_zt, bM_sigma_zt, W3_M, b3_M, Wp_M_mu, bp_M_mu, Wp_M_sigma, bp_M_sigma, W1_M0, b1_M0, W2_M0, WM_mu_zt0, bM_mu_zt0, WM_sigma_zt0, bM_sigma_zt0, W3_M0, b3_M0, Wp_M_mu0, bp_M_mu0, Wp_M_sigma0, bp_M_sigma0, W_LSTM_hidden_gen, W_LSTM_in_gen, b_LSTM_gen, W_word_gen, b_word_gen, W_softmax_gen, b_softmax_gen, W_bow1, b_bow1, W_bow2, b_bow2, W_softmax_bow, b_softmax_bow, W_bow1t, b_bow1t, W_bow2t, b_bow2t, W_softmax_bowt, b_softmax_bowt): ###Discourse - level### ###calculate Q(zd|Y,X) : X gen_init0_lang Y gen_init0_lang_Y m_10 = lrelu( T.dot(gen_init0_lang, W1_M0) + T.dot(gen_init0_lang_Y, W2_M0) + b1_M0) # batch_size x 2*lstm lstm u_zt0 = T.dot(m_10, WM_mu_zt0) + bM_mu_zt0 # batch_size x lstm log_sigma_zt0 = T.dot(m_10, WM_sigma_zt0) + bM_sigma_zt0 #sample Q(Zd) eps0 = t_rng.normal(size=(u_zt0.shape[0], u_zt0.shape[1]), avg=0.0, std=1.0, dtype=theano.config.floatX) Zt0 = u_zt0 + T.exp(log_sigma_zt0) * eps0 #batch_size x dim_atten ########################calculate BOWs loss t_bow1 = lrelu( T.dot(Zt0, W_bow1) + b_bow1 ) #batch * middle_dim W_bow1, b_bow1, W_bow2, b_bow2, W_softmax_bow, b_softmax_bow t_bow2 = lrelu(T.dot(t_bow1, W_bow2) + b_bow2) word_soft_bow = T.dot(t_bow2, W_softmax_bow) + b_softmax_bow bow_K = T.nnet.softmax(word_soft_bow) #calculate p(Zd) h_prior_00 = lrelu(T.dot(gen_init0_lang, W3_M0) + b3_M0) #batch_size x dim_atten u_0t0 = T.dot(h_prior_00, Wp_M_mu0) + bp_M_mu0 log_sigma_0t0 = T.dot(h_prior_00, Wp_M_sigma0) + bp_M_sigma0 #calculate KL_d KL_t0 = (log_sigma_0t0 - log_sigma_zt0) + ( (T.exp(2 * log_sigma_zt0) + (u_zt0 - u_0t0)**2) / (2 * T.exp(2 * log_sigma_0t0))) - 0.5 KL_t0 = T.sum(KL_t0) KL_t0 = (KL_t0 / u_0t0.shape[0]).astype(theano.config.floatX) LSTM_h0 = T.tanh(T.dot(Zt0, W_init_h0) + b_init_h0) cell0 = T.tanh(T.dot(Zt0, W_init_c0) + b_init_c0) word0 = (T.extra_ops.repeat(word_start, repeats=As_words.shape[1], axis=1)).astype(theano.config.floatX) this_real_words = T.concatenate([word0, As_words], axis=0) eps_list = t_rng.normal(size=(As_masks.shape[0], Zt0.shape[0], Zt0.shape[1]), avg=0.0, std=1.0, dtype=theano.config.floatX) def recurrence(word_t_prior, word_t, t_mask, eps, h_t_prior, c_t_prior, z_t_prior, W_LSTM_in_gen, W_LSTM_hidden_gen, b_LSTM_gen, W1_M, W2_M, b1_M, WM_mu_zt, bM_mu_zt, WM_sigma_zt, bM_sigma_zt, W3_M, b3_M, Wp_M_mu, bp_M_mu, Wp_M_sigma, bp_M_sigma): ################################################ calculate input word_t_prior = T.concatenate([word_t_prior, z_t_prior], axis=1) lstm_t = T.dot(h_t_prior, W_LSTM_hidden_gen) + T.dot( word_t_prior, W_LSTM_in_gen) + b_LSTM_gen i_t_enc = T.nnet.sigmoid(lstm_t[:, 0 * n_LSTM:1 * n_LSTM]) f_t_enc = T.nnet.sigmoid(lstm_t[:, 1 * n_LSTM:2 * n_LSTM]) cell_t_enc = f_t_enc * c_t_prior + i_t_enc * T.tanh( lstm_t[:, 2 * n_LSTM:3 * n_LSTM]) cell_t_enc = t_mask.dimshuffle([0, 'x']) * cell_t_enc + ( 1. - t_mask.dimshuffle([0, 'x'])) * c_t_prior o_t_enc = T.nnet.sigmoid(lstm_t[:, 3 * n_LSTM:4 * n_LSTM]) h_t = o_t_enc * T.tanh(cell_t_enc) h_t = t_mask.dimshuffle( [0, 'x']) * h_t + (1. - t_mask.dimshuffle([0, 'x'])) * h_t_prior ###################################Word - level### m_1 = lrelu(T.dot(h_t, W1_M) + T.dot(word_t, W2_M) + b1_M) # using h_t T_dec x batch_size x dim_atten u_zt = T.dot(m_1, WM_mu_zt) + bM_mu_zt #T_dec x batch_size x dim_atten log_sigma_zt = T.dot(m_1, WM_sigma_zt) + bM_sigma_zt #sample Q(Zwt) z_w_t = u_zt + T.exp( log_sigma_zt) * eps #T_dec x batch_size x dim_atten #calculate p(Zwt) h_prior_0 = lrelu(T.dot(h_t, W3_M) + b3_M) #T_dec x batch_size x dim_atten u_0t = T.dot(h_prior_0, Wp_M_mu) + bp_M_mu log_sigma_0t = T.dot(h_prior_0, Wp_M_sigma) + bp_M_sigma #calculate KL_t using : mask_t[:, None] KL_t = (log_sigma_0t - log_sigma_zt) + ( (T.exp(2 * log_sigma_zt) + (u_zt - u_0t)**2) / (2 * T.exp(2 * log_sigma_0t))) - 0.5 KL_t = T.sum(KL_t * t_mask.dimshuffle([0, 'x'])) KL_t = (KL_t / h_t.shape[0]).astype(theano.config.floatX) return h_t.astype(theano.config.floatX), cell_t_enc.astype( theano.config.floatX), z_w_t.astype( theano.config.floatX), KL_t.astype(theano.config.floatX) (h_list, _, Zt, KL_t_list), _ = theano.scan( recurrence, sequences=[this_real_words[0:-1], As_words, As_masks, eps_list], outputs_info=[LSTM_h0, cell0, Zt0, None], non_sequences=[ W_LSTM_in_gen, W_LSTM_hidden_gen, b_LSTM_gen, W1_M, W2_M, b1_M, WM_mu_zt, bM_mu_zt, WM_sigma_zt, bM_sigma_zt, W3_M, b3_M, Wp_M_mu, bp_M_mu, Wp_M_sigma, bp_M_sigma ], n_steps=As_masks.shape[0], strict=True) hid_align = T.dot(h_enc, U_attention_gen) # T_enc*Batch* dimAtten h_t_info = T.concatenate([Zt, this_real_words[0:-1]], axis=2) # T_dec*Batch* (n_LSTM+dim word) hdec_align = T.dot(h_t_info, W_attention_gen) # T_dec*Batch* dimAtten all_align = T.tanh( hid_align.dimshuffle([0, 'x', 1, 2]) + hdec_align.dimshuffle(['x', 0, 1, 2]) + b_attention_gen.dimshuffle(['x', 'x', 'x', 0])) # T_enc x T_dec x batch_size x dimAttention e = all_align * v_attention_gen.dimshuffle(['x', 'x', 'x', 0]) e = e.sum(axis=3) * Qs_masks.dimshuffle( [0, 'x', 1]) # (T_enc_2M) x T_dec x batch_size e = e.dimshuffle([1, 2, 0]) # T_dec x batch_size x T_enc e2 = T.reshape(e, [e.shape[0] * e.shape[1], e.shape[2]], ndim=2) # (T_dec x batch_size) x T_enc # normalize alpha = T.nnet.softmax(e2) # # (T_dec x batch_size) * T_enc alpha = T.reshape(alpha, [e.shape[0], e.shape[1], e.shape[2]], ndim=3) # T_dec x batch_size * T_enc attention_enc = alpha.dimshuffle([0, 2, 1, 'x']) * h_enc.dimshuffle( ['x', 0, 1, 2]) # T_dec x T_enc x batch_size x h_dim attention_enc = attention_enc.sum( axis=1 ) # T_dec x T_enc x batch_size x h_dim --> T_dec x batch_size x h_dim ################################ word prepare_word = T.concatenate([attention_enc, h_list, Zt], axis=2) word_t = lrelu(T.dot(prepare_word, W_word_gen) + b_word_gen) #T * batch * middle_dim word_soft = T.dot(word_t, W_softmax_gen) + b_softmax_gen word_soft_K = T.nnet.softmax( T.reshape( word_soft, [word_soft.shape[0] * word_soft.shape[1], word_soft.shape[2]], ndim=2)) ################################# Auxiliary-path t_bow1t = lrelu( T.dot(Zt, W_bow1t) + b_bow1t ) #batch * middle_dim W_bow1, b_bow1, W_bow2, b_bow2, W_softmax_bow, b_softmax_bow t_bow2t = lrelu(T.dot(t_bow1t, W_bow2t) + b_bow2t) word_soft_bowt = T.dot(t_bow2t, W_softmax_bowt) + b_softmax_bowt word_soft_K_Zt = T.nnet.softmax( T.reshape(word_soft_bowt, [ word_soft_bowt.shape[0] * word_soft_bowt.shape[1], word_soft_bowt.shape[2] ], ndim=2)) return word_soft_K, (KL_t0).astype( theano.config.floatX), (T.sum(KL_t_list)).astype( theano.config.floatX), (bow_K).astype( theano.config.floatX), word_soft_K_Zt.astype( theano.config.floatX) ### (T *batch ) * n_word_dict
def generate_next(h_t_prior, word_t_prior, z_t_prior, c_t_prior, Qs_masks, h_enc, hid_align, W_LSTM_in_gen, W_LSTM_hidden_gen, b_LSTM_gen, W_attention_gen, b_attention_gen, v_attention_gen, W_word_gen, b_word_gen, W_softmax_gen, b_softmax_gen, W3_M, b3_M, Wp_M_mu, bp_M_mu, Wp_M_sigma, bp_M_sigma): #x_temp : batch_size * dim_features ################################################ calculate input word_t_prior2 = T.concatenate([word_t_prior, z_t_prior], axis=1) lstm_t = T.dot(h_t_prior, W_LSTM_hidden_gen) + T.dot( word_t_prior2, W_LSTM_in_gen) + b_LSTM_gen i_t_enc = T.nnet.sigmoid(lstm_t[:, 0 * n_LSTM:1 * n_LSTM]) f_t_enc = T.nnet.sigmoid(lstm_t[:, 1 * n_LSTM:2 * n_LSTM]) cell_t_enc = f_t_enc * c_t_prior + i_t_enc * T.tanh( lstm_t[:, 2 * n_LSTM:3 * n_LSTM]) #cell_t_enc = t_mask.dimshuffle([0, 'x']) * cell_t_enc + (1. - t_mask.dimshuffle([0, 'x'])) * c_t_prior o_t_enc = T.nnet.sigmoid(lstm_t[:, 3 * n_LSTM:4 * n_LSTM]) h_list = o_t_enc * T.tanh(cell_t_enc) #h_t = t_mask.dimshuffle([0, 'x']) * h_t + (1. - t_mask.dimshuffle([0, 'x'])) * h_t_prior #################################VAE VAE calculate p(Zt) h_prior_0 = lrelu(T.dot(h_list, W3_M) + b3_M) #T_dec x batch_size x dim_atten u_0t = T.dot(h_prior_0, Wp_M_mu) + bp_M_mu log_sigma_0t = T.dot(h_prior_0, Wp_M_sigma) + bp_M_sigma eps = t_rng.normal(size=(u_0t.shape[0], u_0t.shape[1]), avg=0.0, std=1.0, dtype=theano.config.floatX) Zt = u_0t + T.exp(log_sigma_0t) * eps #T_dec x batch_size x dim_atten ################################# #hid_align = T.dot(h_enc, U_attention_gen) # T_enc*Batch* dimAtten h_t_info = T.concatenate([Zt, word_t_prior], axis=1) hdec_align = T.dot(h_t_info, W_attention_gen) # *Batch* dimAtten all_align = T.tanh(hid_align + hdec_align.dimshuffle(['x', 0, 1]) + b_attention_gen.dimshuffle(['x', 'x', 0])) # T_enc x batch_size x dimAttention e = all_align * v_attention_gen.dimshuffle(['x', 'x', 0]) e = e.sum(axis=2) * Qs_masks # T_enc x batch_size # normalize alpha = T.nnet.softmax(e.T) # # (batch_size) * T_enc # conv_feature representation at time T attention_enc = alpha.dimshuffle([1, 0, 'x' ]) * h_enc # T_enc x batch_size x h_dim attention_enc = attention_enc.sum( axis=0 ) # T_dec x T_enc x batch_size x h_dim --> T_dec x batch_size x h_dim prepare_word = T.concatenate([attention_enc, h_list, Zt], axis=1) word_t = lrelu(T.dot(prepare_word, W_word_gen) + b_word_gen) #T * batch * middle_dim word_soft = T.dot(word_t, W_softmax_gen) + b_softmax_gen word_soft_K = T.nnet.softmax(word_soft) return word_soft_K.astype(theano.config.floatX), h_list.astype( theano.config.floatX), cell_t_enc.astype( theano.config.floatX), Zt.astype(theano.config.floatX)
#As_word_list_flat = T.flatten(As_word_list.T,outdim=1) #words x #samples #As_word_vecs = shared_Word_vecs[As_word_list_flat].reshape([As_word_list.shape[1], As_word_list.shape[0], n_word_dim]) # T * batch * n_dim h_t_lang, gen_init0_lang = encoder_network(Qs_word_vecs, Qs_mask.T, *enc_params) # batch * n_LSTM #calculate p(Zt) h_prior_00 = lrelu(T.dot(gen_init0_lang, W3_M0) + b3_M0) #batch_size x dim_atten u_0t0 = T.dot(h_prior_00, Wp_M_mu0) + bp_M_mu0 log_sigma_0t0 = T.dot(h_prior_00, Wp_M_sigma0) + bp_M_sigma0 scale_Z = T.scalar('scale_Z', dtype='float32') eps = t_rng.normal(size=(u_0t0.shape[0], u_0t0.shape[1]), avg=0.0, std=1.0, dtype=theano.config.floatX) #eps = t_rng.binomial(size=(u_0t.shape[0],u_0t.shape[1]), p=0.5, dtype=theano.config.floatX) Zt = (u_0t0 + T.exp(log_sigma_0t0) * eps * scale_Z).astype( theano.config.floatX) LSTM_h0 = T.tanh(T.dot(Zt, W_init_h0) + b_init_h0) cell0 = T.tanh(T.dot(Zt, W_init_c0) + b_init_c0) ################################ word0 = (T.extra_ops.repeat(word_start, repeats=Qs_word_list.shape[0], axis=1)).astype(theano.config.floatX) #Total_M_h_enc= T.concatenate([Total_m0.dimshuffle([1, 0, 2]),h_t_lang], axis=0) #Qs_mask_in= T.concatenate([T.ones((Total_m0.shape[1],Total_m0.shape[0]),dtype=theano.config.floatX), Qs_mask.T], axis=0) # Qs_mask: batch * T