def ae(x, y, is_train, opt, epoch_t, opt_t=None): # print x.get_shape() # batch L if not opt_t: opt_t = opt x_emb, W_norm = embedding(x, opt) # batch L emb x_emb = tf.expand_dims(x_emb, 3) # batch L emb 1 res = {} # cnn encoder H_enc, res = conv_encoder(x_emb, is_train, opt, res) # infer latent variable z from H_enc biasInit = tf.constant_initializer(0.001, dtype=tf.float32) z = layers.linear(H_enc, num_outputs=opt.z_dim, biases_initializer=biasInit, scope='z') logits = discriminator_linear(z, opt, prefix='classify_', is_train=is_train) # batch * 1 prob = tf.nn.sigmoid(logits) correct_prediction = tf.equal(tf.round(prob), tf.round(y)) accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32)) loss = tf.reduce_mean( tf.nn.sigmoid_cross_entropy_with_logits(labels=y, logits=logits)) tf.summary.scalar('loss', loss) summaries = [ "learning_rate", "loss", # "gradients", # "gradient_norm", ] global_step = tf.Variable(0, trainable=False) t_vars = tf.trainable_variables() d_vars = [var for var in t_vars if 'dis' in var.name] train_op = layers.optimize_loss( loss, global_step=global_step, variables=d_vars, #aggregation_method=tf.AggregationMethod.EXPERIMENTAL_ACCUMULATE_N, #framework.get_global_step(), optimizer=opt.optimizer, clip_gradients=( lambda grad: _clip_gradients_seperate_norm(grad, opt.clip_grad)) if opt.clip_grad else None, learning_rate_decay_fn=lambda lr, g: tf.train.exponential_decay( learning_rate=lr, global_step=g, decay_rate=opt.decay_rate, decay_steps=int(epoch_t * opt.decay_ep)), learning_rate=opt.lr, summaries=summaries) return loss, train_op, accuracy
def ae(x, x_org, is_train, opt, epoch_t, opt_t=None): # print x.get_shape() # batch L if not opt_t: opt_t = opt x_emb, W_norm = embedding(x, opt) # batch L emb x_emb = tf.expand_dims(x_emb, 3) # batch L emb 1 res = {} # cnn encoder H_enc, res = conv_encoder(x_emb, is_train, opt, res) # infer latent variable z from H_enc biasInit = tf.constant_initializer(0.001, dtype=tf.float32) z = layers.linear(H_enc, num_outputs=opt.z_dim, biases_initializer=biasInit, scope='z') rec_loss, rec_sent_1, _ = lstm_decoder_embedding(z, x_org, W_norm, opt_t, is_train) _, rec_sent_2, _ = lstm_decoder_embedding(z, x_org, W_norm, opt_t, is_train, feed_previous=True, is_reuse=True) res['rec_sents_feed_y'] = rec_sent_1 res['rec_sents'] = rec_sent_2 # compute total loss loss = rec_loss tf.summary.scalar('loss', loss) summaries = [ "learning_rate", "loss", # "gradients", # "gradient_norm", ] global_step = tf.Variable(0, trainable=False) train_op = layers.optimize_loss( loss, global_step=global_step, #aggregation_method=tf.AggregationMethod.EXPERIMENTAL_ACCUMULATE_N, #framework.get_global_step(), optimizer=opt.optimizer, clip_gradients=( lambda grad: _clip_gradients_seperate_norm(grad, opt.clip_grad)) if opt.clip_grad else None, learning_rate_decay_fn=lambda lr, g: tf.train.exponential_decay( learning_rate=lr, global_step=g, decay_rate=opt.decay_rate, decay_steps=int(epoch_t * opt.decay_ep)), learning_rate=opt.lr, summaries=summaries) return res, loss, train_op
def auto_encoder(x, x_org, is_train, opt, opt_t=None): if not opt_t: opt_t = opt x_emb, W_norm = embedding(x, opt) # batch L emb x_emb = tf.expand_dims(x_emb, 3) # batch L emb 1 res = {} # cnn encoder H_enc, res = conv_encoder(x_emb, is_train, opt, res) H_dec = H_enc if opt.model == 'rnn_rnn': loss, rec_sent_1, _ = seq2seq(x, x_org, opt) _, rec_sent_2, _ = seq2seq(x, x_org, opt, feed_previous=True, is_reuse=True) res['rec_sents_feed_y'] = rec_sent_1 res['rec_sents'] = rec_sent_2 elif opt.model == 'cnn_rnn': # lstm decoder H_dec2 = tf.identity(H_dec) loss, rec_sent_1, _ = lstm_decoder(H_dec, x_org, opt) # _, rec_sent_2, _ = lstm_decoder(H_dec, x_org, opt, feed_previous=True, is_reuse=True) res['rec_sents_feed_y'] = rec_sent_1 res['rec_sents'] = rec_sent_2 else: # deconv decoder loss, res = deconv_decoder(H_dec, x_org, W_norm, is_train, opt_t, res) tf.summary.scalar('loss', loss) summaries = [ "learning_rate", "loss", "gradients", "gradient_norm", ] global_step = tf.Variable(0, trainable=False) train_op = layers.optimize_loss( loss, global_step=global_step, #aggregation_method=tf.AggregationMethod.EXPERIMENTAL_ACCUMULATE_N, #framework.get_global_step(), optimizer=opt.optimizer, clip_gradients=(lambda grad: _clip_gradients_seperate_norm(grad, opt.clip_grad)) if opt.clip_grad else None, learning_rate_decay_fn=lambda lr,g: tf.train.exponential_decay(learning_rate=lr, global_step=g, decay_rate=opt.decay_rate, decay_steps=3000), learning_rate=opt.lr, summaries=summaries ) return res, loss, train_op
def vae(beta, x, x_org, is_train, opt, lr, opt_t=None): # print x.get_shape() # batch L if not opt_t: opt_t = opt x_emb, W_norm = embedding(x, opt) # batch L emb x_emb = tf.expand_dims(x_emb, 3) # batch L emb 1 res = {} # cnn encoder H_enc, res = conv_encoder(x_emb, is_train, opt, res) # infer latent variable z from H_enc biasInit = tf.constant_initializer(0.001, dtype=tf.float32) mu = layers.linear(H_enc, num_outputs=opt.z_dim, biases_initializer=biasInit, scope='mu') logvar = layers.linear(H_enc, num_outputs=opt.z_dim, biases_initializer=biasInit, scope='logvar') z = sample_z(mu, logvar) kl_loss = tf.reduce_mean(-0.5 * tf.reduce_sum(1 + logvar - tf.square(mu) - tf.exp(logvar), axis=-1)) rec_loss, rec_sent_1, _ = lstm_decoder_embedding(z, x_org, W_norm, opt_t, is_train) _, rec_sent_2, _ = lstm_decoder_embedding(z, x_org, W_norm, opt_t, is_train, feed_previous=True, is_reuse=True) res['rec_sents_feed_y'] = rec_sent_1 res['rec_sents'] = rec_sent_2 # compute total loss loss = rec_loss + beta * kl_loss tf.summary.scalar('beta', beta) tf.summary.scalar('loss', loss) tf.summary.scalar('kl_loss', kl_loss) tf.summary.scalar('rec_loss', rec_loss) summaries = [ "learning_rate", "loss", # "gradients", # "gradient_norm", ] global_step = tf.Variable(0, trainable=False) train_op = layers.optimize_loss( loss, global_step=global_step, #aggregation_method=tf.AggregationMethod.EXPERIMENTAL_ACCUMULATE_N, #framework.get_global_step(), optimizer=opt.optimizer, clip_gradients=(lambda grad: _clip_gradients_seperate_norm(grad, opt.clip_grad)) if opt.clip_grad else None, learning_rate=lr, summaries=summaries ) return res, loss, rec_loss, kl_loss, train_op
def conditional_s2s(src, tgt, z, opt, opt_t=None, is_reuse_generator = None): if not opt_t: opt_t = opt res = {} if opt.use_tgt_z: W_norm_d = embedding_only(opt, prefix = 'd_', is_reuse = None) z, _ = encoder(tgt, W_norm_d, opt, l_temp = 1, prefix = 'd_' , is_reuse = None, is_prob=None, is_padded= False) syn_sent, syn_one_hot, H_dec, sup_loss, sample_loss, sup_loss_all = s2s(z, src, tgt, opt, is_reuse = is_reuse_generator, prefix ='g_') if opt.global_feature: z_hat, _ = encoder(syn_one_hot, W_norm_d, opt, l_temp = 1, prefix = 'd_' , is_reuse = True, is_prob=True, is_padded= False) z_loss = tf.reduce_sum(tf.square(z - z_hat))/opt.batch_size/opt.n_hid res['z'] = z res['z_hat'] = z_hat res['z_loss'] = z_loss res['syn_sent'] = syn_sent g_cost = sup_loss + (z_loss*opt.lambda_z if opt.global_feature else 0) tf.summary.scalar('sup_loss', sup_loss) if opt.global_feature: tf.summary.scalar('z_loss', z_loss) summaries = [ "learning_rate", "loss", ] t_vars = tf.trainable_variables() g_vars = [var for var in t_vars if 'g_' in var.name] train_op_g = layers.optimize_loss( g_cost, framework.get_global_step(), optimizer=opt.optimizer, clip_gradients=(lambda grad: _clip_gradients_seperate_norm(grad, opt.clip_grad)) if opt.clip_grad else None, variables=g_vars, learning_rate=opt.lr_g, summaries=summaries) return res, g_cost, train_op_g
def auto_encoder(x, x_org, is_train, opt, opt_t=None): # print x.get_shape() # batch L with tf.variable_scope("pretrain"): if not opt_t: opt_t = opt x_emb, W_norm = embedding(x, opt) # batch L emb x_emb = tf.expand_dims(x_emb, 3) # batch L emb 1 res = {} H, res = conv_encoder(x_emb, is_train, opt, res) H_mean, H_log_sigma_sq = vae_classifier_2layer(H, opt) eps = tf.random_normal([opt.batch_size, opt.ef_dim], 0, 1, dtype=tf.float32) H_dec = tf.add(H_mean, tf.multiply(tf.sqrt(tf.exp(H_log_sigma_sq)), eps)) H_dec2 = tf.identity(H_dec) # print x_rec.get_shape() if opt.model == 'rnn_rnn': loss, rec_sent_1, _ = seq2seq(x, x_org, opt) _, rec_sent_2, _ = seq2seq(x, x_org, opt, feed_previous=True, is_reuse=True) #res['logits'] = logits res['rec_sents_feed_y'] = rec_sent_1 res['rec_sents'] = rec_sent_2 elif opt.model == 'cnn_rnn': # lstm decoder if opt.rnn_share_emb: loss, rec_sent_1, _ = lstm_decoder_embedding( H_dec2, x_org, W_norm, opt_t) # random_z = tf.random_normal([opt.batch_size, opt.ef_dim]) _, rec_sent_2, _ = lstm_decoder_embedding(H_dec2, x_org, W_norm, opt_t, feed_previous=True, is_reuse=True) else: loss, rec_sent_1, _ = lstm_decoder(H_dec2, x_org, opt_t) # _, rec_sent_2, _ = lstm_decoder(H_dec2, x_org, opt_t, feed_previous=True, is_reuse=True) kl_loss = tf.reduce_mean(-0.5 * tf.reduce_mean( 1 + H_log_sigma_sq - tf.square(H_mean) - tf.exp(H_log_sigma_sq), 1)) loss += kl_loss res['rec_sents_feed_y'] = rec_sent_1 res['rec_sents'] = rec_sent_2 else: # deconv decoder H_dec2 = tf.expand_dims(tf.expand_dims(H_dec, 1), 1) loss, res = deconv_decoder(H_dec2, x_org, W_norm, is_train, opt_t, res) res['rec_sents'] = res['rec_sents'][:, (opt.filter_shape - 1):(opt.filter_shape - 1 + opt.sentence)] # *tf.cast(tf.not_equal(x_temp,0), tf.float32) tf.summary.scalar('loss', loss) tf.summary.scalar('kl_loss', kl_loss) summaries = [ "learning_rate", "loss", # "gradients", # "gradient_norm", ] global_step = tf.Variable(0, trainable=False) train_op = layers.optimize_loss( loss, global_step=global_step, #aggregation_method=tf.AggregationMethod.EXPERIMENTAL_ACCUMULATE_N, #framework.get_global_step(), optimizer=opt.optimizer, clip_gradients=( lambda grad: _clip_gradients_seperate_norm(grad, opt.clip_grad)) if opt.clip_grad else None, learning_rate_decay_fn=lambda lr, g: tf.train.exponential_decay( learning_rate=lr, global_step=g, decay_rate=opt.decay_rate, decay_steps=3000), learning_rate=opt.lr, summaries=summaries) # optimizer = tf.train.AdamOptimizer(learning_rate=opt.lr) # Or another optimization algorithm. # train_op = optimizer.minimize( # loss, # aggregation_method=tf.AggregationMethod.EXPERIMENTAL_ACCUMULATE_N) return res, loss, train_op #, fake_gen
def textGAN(x, opt): #res = {} res_ = {} with tf.variable_scope("pretrain"): #z = tf.random_uniform([opt.batch_size, opt.latent_size], minval=-1.,maxval=1.) z = tf.random_normal([opt.batch_size, opt.latent_size]) W_norm = embedding_only(opt, is_reuse = None) _, syn_sent, logits = lstm_decoder_embedding(z, tf.ones_like(x), W_norm, opt, add_go = True, feed_previous=True, is_reuse=None, is_softargmax = True, is_sampling = False) prob = [tf.nn.softmax(l*opt.L) for l in logits] prob = tf.stack(prob,1) # _, syn_onehot, rec_sent, _ = lstm_decoder_embedding(z, x_org, W_norm, opt) # x_emb_fake = tf.tensordot(syn_onehot, W_norm, [[2],[0]]) # x_emb_fake = tf.expand_dims(x_emb_fake, 3) with tf.variable_scope("d_net"): logits_real, H_real = discriminator(x, opt) ## Real Trail # x_emb, W_norm = embedding(x, opt, is_reuse = None) # batch L emb # x_emb = tf.expand_dims(x_emb, 3) # batch L emb 1 # H_enc, res = conv_encoder(x_emb, opt, res, is_reuse = None) with tf.variable_scope("d_net"): logits_fake, H_fake = discriminator(prob, opt, is_prob = True, is_reuse = True) # H_enc_fake, res_ = conv_encoder(x_emb_fake, is_train, opt, res_, is_reuse=True) # logits_real = discriminator_2layer(H_enc, opt) # logits_syn = discriminator_2layer(H_enc_fake, opt, is_reuse=True) res_['syn_sent'] = syn_sent res_['real_f'] = tf.squeeze(H_real) # Loss D_loss = tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(labels = tf.ones_like(logits_real), logits = logits_real)) + \ tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(labels = tf.zeros_like(logits_fake), logits = logits_fake)) fake_mean = tf.reduce_mean(H_fake,axis=0) real_mean = tf.reduce_mean(H_real,axis=0) mean_dist = tf.sqrt(tf.reduce_mean((fake_mean - real_mean)**2)) res_['mean_dist'] = mean_dist # cov_fake = acc_fake_xx - tensor.dot(acc_fake_mean.dimshuffle(0, 'x'), acc_fake_mean.dimshuffle(0, 'x').T) +identity # cov_real = acc_real_xx - tensor.dot(acc_real_mean.dimshuffle(0, 'x'), acc_real_mean.dimshuffle(0, 'x').T) +identity # cov_fake_inv = tensor.nlinalg.matrix_inverse(cov_fake) # cov_real_inv = tensor.nlinalg.matrix_inverse(cov_real) #tensor.nlinalg.trace(tensor.dot(cov_fake_inv,cov_real) + tensor.dot(cov_real_inv,cov_fake)) GAN_loss = tf.nn.sigmoid_cross_entropy_with_logits(logits=logits_fake, labels=tf.ones_like(logits_fake)) MMD_loss = compute_MMD_loss(tf.squeeze(H_fake), tf.squeeze(H_real), opt) G_loss = mean_dist #MMD_loss # + tf.reduce_mean(GAN_loss) # mean_dist # res_['mmd'] = MMD_loss res_['gan'] = tf.reduce_mean(GAN_loss) # *tf.cast(tf.not_equal(x_temp,0), tf.float32) tf.summary.scalar('D_loss', D_loss) tf.summary.scalar('G_loss', G_loss) summaries = [ "learning_rate", #"G_loss", #"D_loss" # "gradients", # "gradient_norm", ] global_step = tf.Variable(0, trainable=False) all_vars = tf.trainable_variables() g_vars = [var for var in all_vars if var.name.startswith('pretrain')] d_vars = [var for var in all_vars if var.name.startswith('d_')] print [g.name for g in g_vars] generator_op = layers.optimize_loss( G_loss, global_step = global_step, #aggregation_method=tf.AggregationMethod.EXPERIMENTAL_ACCUMULATE_N, #framework.get_global_step(), optimizer=opt.optimizer, clip_gradients=(lambda grad: _clip_gradients_seperate_norm(grad, opt.clip_grad)) if opt.clip_grad else None, learning_rate_decay_fn=lambda lr,g: tf.train.exponential_decay(learning_rate=lr, global_step = g, decay_rate=opt.decay_rate, decay_steps=3000), learning_rate=opt.lr, variables = g_vars, summaries = summaries ) discriminator_op = layers.optimize_loss( D_loss, global_step = global_step, #aggregation_method=tf.AggregationMethod.EXPERIMENTAL_ACCUMULATE_N, #framework.get_global_step(), optimizer=opt.optimizer, clip_gradients=(lambda grad: _clip_gradients_seperate_norm(grad, opt.clip_grad)) if opt.clip_grad else None, learning_rate_decay_fn=lambda lr,g: tf.train.exponential_decay(learning_rate=lr, global_step = g, decay_rate=opt.decay_rate, decay_steps=3000), learning_rate=opt.lr, variables = d_vars, summaries = summaries ) # optimizer = tf.train.AdamOptimizer(learning_rate=opt.lr) # Or another optimization algorithm. # train_op = optimizer.minimize( # loss, # aggregation_method=tf.AggregationMethod.EXPERIMENTAL_ACCUMULATE_N) return res_, G_loss, D_loss, generator_op, discriminator_op
def dialog_gan(src, tgt, opt, opt_t=None): z = tf.random_uniform(shape=[opt.fake_size, opt.n_z], minval=-1., maxval=1.) if opt.two_side: res_dict, gan_cost_d, gan_cost_g = conditional_gan(src, tgt, z, opt, opt_t=opt_t) src_rev, tgt_rev = tf.concat([ tf.cast(tf.zeros([opt.batch_size, (opt.filter_shape - 1)]), tf.int32), tgt ], 1), src[:, (opt.filter_shape - 1):] rev_res_dict, gan_cost_d_rev, gan_cost_g_rev = conditional_gan( src_rev, tgt_rev, z, opt, opt_t=opt_t, is_reuse_generator=tf.AUTO_REUSE) gan_cost_d += opt.lambda_backward * gan_cost_d_rev gan_cost_g += opt.lambda_backward * gan_cost_g_rev else: res_dict, gan_cost_d, gan_cost_g = conditional_gan(src, tgt, z, opt, opt_t=opt_t) t_vars = tf.trainable_variables() d_vars = [var for var in t_vars if 'd_' in var.name] if opt.g_fix: g_vars = [var for var in t_vars if 'g_g_' in var.name] print("Fix most G params, except" + " ".join([v.name for v in g_vars])) else: g_vars = [var for var in t_vars if 'g_' in var.name] tf.summary.scalar('loss_d', gan_cost_d) tf.summary.scalar('loss_g', gan_cost_g) summaries = [ "learning_rate", "loss", ] global_step = tf.Variable(0, trainable=False) train_op_d = layers.optimize_loss( gan_cost_d, global_step=global_step, optimizer=opt.optimizer, clip_gradients=( lambda grad: _clip_gradients_seperate_norm(grad, opt.clip_grad)) if opt.clip_grad else None, learning_rate_decay_fn=lambda lr, g: tf.train.exponential_decay( learning_rate=lr, global_step=g, decay_rate=opt.decay_rate, decay_steps=3000), variables=d_vars, learning_rate=opt.lr_d, summaries=summaries) train_op_g = layers.optimize_loss( gan_cost_g, global_step=global_step, optimizer=opt.optimizer, clip_gradients=( lambda grad: _clip_gradients_seperate_norm(grad, opt.clip_grad)) if opt.clip_grad else None, learning_rate_decay_fn=lambda lr, g: tf.train.exponential_decay( learning_rate=lr, global_step=g, decay_rate=opt.decay_rate, decay_steps=3000), variables=g_vars, learning_rate=opt.lr_g, summaries=summaries) return res_dict, gan_cost_d, train_op_d, gan_cost_g, train_op_g
def semi_classifier(alpha, x, x_org, x_lab, y, dp_ratio, opt, opt_t=None): # print x.get_shape() # batch L is_train = True if not opt_t: opt_t = opt x_lab_emb, W_norm = embedding(x_lab, opt) # batch L emb x_emb = tf.nn.embedding_lookup(W_norm, x) x_emb = tf.expand_dims(x_emb, 3) # batch L emb 1 x_lab_emb = tf.expand_dims(x_lab_emb, 3) # batch L emb 1 x_lab_emb = tf.nn.dropout(x_lab_emb, dp_ratio) res = {} # cnn encoder H_enc, res = conv_encoder(x_emb, is_train, opt, res) H_lab_enc, res = conv_encoder(x_lab_emb, is_train, opt, res, is_reuse=True) H_dec = H_enc #H_lab_enc = tf.nn.dropout(H_lab_enc, opt.dropout_ratio) logits = classifier_2layer(H_lab_enc, opt, dropout=dp_ratio, prefix='classify', is_reuse=None) dis_loss = tf.reduce_mean( tf.nn.sigmoid_cross_entropy_with_logits(labels=y, logits=logits)) # calculate the accuracy prob = tf.nn.sigmoid(logits) # if opt.model == 'rnn_rnn': # rec_loss, rec_sent_1, _ = seq2seq(x, x_org, opt) # _, rec_sent_2, _ = seq2seq(x, x_org, opt, feed_previous=True, is_reuse=True) # res['rec_sents_feed_y'] = rec_sent_1 # res['rec_sents'] = rec_sent_2 # elif opt.model == 'cnn_rnn': # # lstm decoder # H_dec2 = tf.identity(H_dec) # rec_loss, rec_sent_1, _ = lstm_decoder(H_dec, x_org, opt) # # _, rec_sent_2, _ = lstm_decoder(H_dec, x_org, opt, feed_previous=True, is_reuse=True) # res['rec_sents_feed_y'] = rec_sent_1 # res['rec_sents'] = rec_sent_2 # else: # # deconv decoder rec_loss, res = deconv_decoder(H_dec, x_org, W_norm, is_train, opt_t, res) correct_prediction = tf.equal(tf.round(prob), tf.round(y)) accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32)) # calculate the total loss loss = alpha * rec_loss + (1 - alpha) * dis_loss tf.summary.scalar('loss', loss) tf.summary.scalar('rec_loss', rec_loss) tf.summary.scalar('dis_loss', dis_loss) summaries = [ # "learning_rate", "loss" # "gradients", # "gradient_norm", ] global_step = tf.Variable(0, trainable=False) train_op = layers.optimize_loss( loss, global_step=global_step, # framework.get_global_step(), optimizer=opt.optimizer, clip_gradients=( lambda grad: _clip_gradients_seperate_norm(grad, opt.clip_grad)) if opt.clip_grad else None, #learning_rate_decay_fn=lambda lr, g: tf.train.exponential_decay(learning_rate=lr, global_step=g, # decay_rate=opt.decay_rate, decay_steps=3000), learning_rate=opt.lr, summaries=summaries) return res, dis_loss, rec_loss, loss, train_op, prob, accuracy
def auto_encoder(x, x_org, is_train, opt, opt_t=None): # print x.get_shape() # batch L if not opt_t: opt_t = opt x_emb, W_norm = embedding(x, opt) # batch L emb x_emb = tf.expand_dims(x_emb, 3) # batch L emb 1 res = {} #res['W'] = W_norm # cnn encoder H_enc, res = conv_encoder(x_emb, is_train, opt, res) # H_dec = layers.relu(Y4, 200, biases_initializer=biasInit) H_dec = H_enc # print x_rec.get_shape() if opt.model == 'rnn_rnn': loss, rec_sent_1, _ = seq2seq(x, x_org, opt) _, rec_sent_2, _ = seq2seq(x, x_org, opt, feed_previous=True, is_reuse=True) #res['logits'] = logits res['rec_sents_feed_y'] = rec_sent_1 res['rec_sents'] = rec_sent_2 elif opt.model == 'cnn_rnn': # lstm decoder H_dec2 = tf.identity(H_dec) if opt.rnn_share_emb: loss, rec_sent_1, _ = lstm_decoder_embedding(H_dec2, x_org, W_norm, opt_t) # _, rec_sent_2, _ = lstm_decoder_embedding(H_dec2, x_org, W_norm, opt_t, feed_previous=True, is_reuse=True) else: loss, rec_sent_1, _ = lstm_decoder(H_dec2, x_org, opt_t) # _, rec_sent_2, _ = lstm_decoder(H_dec2, x_org, opt_t, feed_previous=True, is_reuse=True) res['rec_sents_feed_y'] = rec_sent_1 res['rec_sents'] = rec_sent_2 # res['H1'],res['H2'],res['o1'],res['o2'] = H1, H2, o1, o2 else: # deconv decoder loss, res = deconv_decoder(H_dec, x_org, W_norm, is_train, opt_t, res) # *tf.cast(tf.not_equal(x_temp,0), tf.float32) tf.summary.scalar('loss', loss) summaries = [ "learning_rate", "loss", # "gradients", # "gradient_norm", ] global_step = tf.Variable(0, trainable=False) train_op = layers.optimize_loss( loss, global_step = global_step, #aggregation_method=tf.AggregationMethod.EXPERIMENTAL_ACCUMULATE_N, #framework.get_global_step(), optimizer=opt.optimizer, clip_gradients=(lambda grad: _clip_gradients_seperate_norm(grad, opt.clip_grad)) if opt.clip_grad else None, learning_rate_decay_fn=lambda lr,g: tf.train.exponential_decay(learning_rate=lr, global_step = g, decay_rate=opt.decay_rate, decay_steps=3000), learning_rate=opt.lr, summaries = summaries ) # optimizer = tf.train.AdamOptimizer(learning_rate=opt.lr) # Or another optimization algorithm. # train_op = optimizer.minimize( # loss, # aggregation_method=tf.AggregationMethod.EXPERIMENTAL_ACCUMULATE_N) return res, loss, train_op
def conditional_s2s(src, tgt, is_train, opt, opt_t=None, is_reuse_generator=None): if not opt_t: opt_t = opt W_norm_d = embedding_only(opt, prefix='d_', is_reuse=None) res = {} z_all, z_tgt, loss_pred_z = feature_vector(src, tgt, is_train, W_norm_d, opt, prefix='d_') if opt.local_feature: z_all_l, z_tgt_l, loss_pred_z_l = feature_vector(src, tgt, is_train, W_norm_d, opt, prefix='l_') # B Z z_all = tf.concat([z_all, z_all_l], axis=1) z_tgt = tf.concat([z_tgt, z_tgt_l], axis=1) loss_pred_z += loss_pred_z_l if opt.multiple_src: syn_sent, syn_one_hot, H_dec, sup_loss, sample_loss, sup_loss_all = s2s( z_all, src, tgt, opt, is_reuse=is_reuse_generator, prefix='g_') else: syn_sent, syn_one_hot, H_dec, sup_loss, sample_loss, sup_loss_all = s2s( z_all, src[-1], tgt, opt, is_reuse=is_reuse_generator, prefix='g_') is_logit = (opt.z_loss != 'L2') if opt.global_feature: _, z_hat = encoder(syn_one_hot, W_norm_d, opt, num_outputs=opt.n_z, l_temp=1, prefix='d_', is_reuse=True, is_prob=True, is_padded=False, is_logit=is_logit) if opt.local_feature: _, z_hat_l = encoder(syn_one_hot, W_norm_d, opt, num_outputs=opt.n_z, l_temp=1, prefix='l_', is_reuse=True, is_prob=True, is_padded=False, is_logit=is_logit) z_hat = tf.concat([z_hat, z_hat_l], axis=1) # B Z if opt.z_loss == 'L2': z_loss = tf.reduce_mean(tf.square(z_all - z_hat)) else: z_loss = tf.reduce_mean( tf.nn.sigmoid_cross_entropy_with_logits(labels=z_all, logits=z_hat)) res['z'] = z_all res['z_hat'] = z_hat res['z_tgt'] = z_tgt res['z_loss'] = z_loss res['z_loss_pred'] = loss_pred_z res['syn_sent'] = syn_sent g_cost = sup_loss + (z_loss * opt.lambda_z if opt.global_feature else 0) + loss_pred_z tf.summary.scalar('sup_loss', sup_loss) if opt.global_feature: tf.summary.scalar('z_loss', z_loss) summaries = [ "learning_rate", "loss", ] t_vars = tf.trainable_variables() g_vars = [var for var in t_vars if 'g_' in var.name] train_op_g = layers.optimize_loss( g_cost, framework.get_global_step(), optimizer=opt.optimizer, clip_gradients=( lambda grad: _clip_gradients_seperate_norm(grad, opt.clip_grad)) if opt.clip_grad else None, variables=(t_vars if opt.relax_d else g_vars), learning_rate=opt.lr_g, summaries=summaries) return res, g_cost, train_op_g