def classifier(): T = tb.utils.TensorDict( dict(sess=tf.Session(config=tb.growth_config()), src_x=placeholder((None, 32, 32, 3)), src_y=placeholder((None, 10)), trg_x=placeholder((None, 32, 32, 3)), trg_y=placeholder((None, 10)), test_x=placeholder((None, 32, 32, 3)), test_y=placeholder((None, 10)), phase=placeholder((), tf.bool))) # Supervised and conditional entropy minimization src_y = net.classifier(T.src_x, phase=True, internal_update=False) trg_y = net.classifier(T.trg_x, phase=True, internal_update=True, reuse=True) loss_class = tf.reduce_mean(softmax_xent(labels=T.src_y, logits=src_y)) # Evaluation (non-EMA) test_y = net.classifier(T.test_x, phase=False, scope='class', reuse=True) # Evaluation (EMA) ema = tf.train.ExponentialMovingAverage(decay=0.998) ema_op = ema.apply(tf.get_collection('trainable_variables', 'class/')) T.ema_y = net.classifier(T.test_x, phase=False, reuse=True, getter=get_getter(ema)) src_acc = basic_accuracy(T.src_y, src_y) trg_acc = basic_accuracy(T.trg_y, trg_y) ema_acc = basic_accuracy(T.test_y, T.ema_y) fn_ema_acc = tb.function(T.sess, [T.test_x, T.test_y], ema_acc) # Optimizer loss_main = loss_class var_main = tf.get_collection('trainable_variables', 'class') train_main = tf.train.AdamOptimizer(args.lr, 0.5).minimize(loss_main, var_list=var_main) train_main = tf.group(train_main, ema_op) # Summarizations summary_main = [ tf.summary.scalar('class/loss_class', loss_class), tf.summary.scalar('acc/src_acc', src_acc), tf.summary.scalar('acc/trg_acc', trg_acc) ] summary_main = tf.summary.merge(summary_main) # Saved ops c = tf.constant T.ops_print = [c('class'), loss_class] T.ops_main = [summary_main, train_main] T.fn_ema_acc = fn_ema_acc return T
def model(FLAGS): print(colored("Model is called.", "blue")) T = tb.utils.TensorDict( dict(sess=tf.Session(config=tb.growth_config()), sv=placeholder((FLAGS.bs, 369539)), ts=placeholder((FLAGS.bs, )), test_sv=placeholder((FLAGS.bs, 369539)), test_ts=placeholder((FLAGS.bs, )))) # h = dense(T.sv, FLAGS.d, scope='hidden', bn=False, phase=True, reuse=tf.AUTO_REUSE) # o = dense(h, 1, scope='out', bn=False, phase=True, reuse=tf.AUTO_REUSE) # test_h = dense(T.test_sv, FLAGS.d, scope='hidden', bn=False, phase=False, reuse=tf.AUTO_REUSE) # test_o = dense(test_h, 1, scope='out', bn=False, phase=False, reuse=tf.AUTO_REUSE) hidden1 = tf.get_variable("hidden1", [369539, FLAGS.d]) hidden2 = tf.get_variable("hidden2", [FLAGS.d, 1]) h = tf.matmul(T.sv, hidden1) o = tf.matmul(h, hidden2) test_h = tf.matmul(T.test_sv, hidden1) test_o = tf.matmul(test_h, hidden2) loss = tf.reduce_mean(tf.squared_difference(o, T.ts)) test_o_mean = tf.reduce_mean(test_o) test_ts_mean = tf.reduce_mean(T.test_ts) test_error = tf.reduce_mean((test_o - T.test_ts) / T.test_ts) * 100.0 error = tf.reduce_mean((o - T.ts) / T.ts) * 100.0 # optimizer = tf.train.AdagradOptimizer(FLAGS.lr).minimize(loss) optimizer = tf.train.AdamOptimizer(FLAGS.lr).minimize(loss) summary = [ tf.summary.scalar('loss', loss), tf.summary.scalar('error', error) ] summary = tf.summary.merge(summary) c = tf.constant T.ops_print = [ c('loss'), loss, c('error'), error, c('test_error'), test_error, c('test_o_mean'), test_o_mean, c('test_ts_mean'), test_ts_mean ] T.ops = [summary, optimizer] print(colored("Model is initialized.", "blue")) return T
def build_graph(): T = tb.TensorDict( dict( sess=tf.Session(config=tb.growth_config()), x=tb.nn.placeholder((None, 32, 32, 3)), y=tb.nn.placeholder((None, 10)), )) y = classifier(T.x, phase=True) loss = tf.reduce_mean(softmax_xent(labels=T.y, logits=y)) train_main = tf.train.AdamOptimizer().minimize(loss) T.main_ops = [train_main, loss] T.sess.run(tf.global_variables_initializer()) return T
def dirtt(): T = tb.utils.TensorDict(dict( sess = tf.Session(config=tb.growth_config()), src_x = placeholder((None, 32, 32, 3)), src_y = placeholder((None, args.Y)), trg_x = placeholder((None, 32, 32, 3)), trg_y = placeholder((None, args.Y)), test_x = placeholder((None, 32, 32, 3)), test_y = placeholder((None, args.Y)), )) # Supervised and conditional entropy minimization src_e = nn.classifier(T.src_x, phase=True, enc_phase=1, trim=args.trim) trg_e = nn.classifier(T.trg_x, phase=True, enc_phase=1, trim=args.trim, reuse=True, internal_update=True) src_p = nn.classifier(src_e, phase=True, enc_phase=0, trim=args.trim) trg_p = nn.classifier(trg_e, phase=True, enc_phase=0, trim=args.trim, reuse=True, internal_update=True) loss_src_class = tf.reduce_mean(softmax_xent(labels=T.src_y, logits=src_p)) loss_trg_cent = tf.reduce_mean(softmax_xent_two(labels=trg_p, logits=trg_p)) # Domain confusion if args.dw > 0 and args.dirt == 0: real_logit = nn.feature_discriminator(src_e, phase=True) fake_logit = nn.feature_discriminator(trg_e, phase=True, reuse=True) loss_disc = 0.5 * tf.reduce_mean( sigmoid_xent(labels=tf.ones_like(real_logit), logits=real_logit) + sigmoid_xent(labels=tf.zeros_like(fake_logit), logits=fake_logit)) loss_domain = 0.5 * tf.reduce_mean( sigmoid_xent(labels=tf.zeros_like(real_logit), logits=real_logit) + sigmoid_xent(labels=tf.ones_like(fake_logit), logits=fake_logit)) else: loss_disc = constant(0) loss_domain = constant(0) # Virtual adversarial training (turn off src in non-VADA phase) loss_src_vat = vat_loss(T.src_x, src_p, nn.classifier) if args.sw > 0 and args.dirt == 0 else constant(0) loss_trg_vat = vat_loss(T.trg_x, trg_p, nn.classifier) if args.tw > 0 else constant(0) # Evaluation (EMA) ema = tf.train.ExponentialMovingAverage(decay=0.998) var_class = tf.get_collection('trainable_variables', 'class/') ema_op = ema.apply(var_class) ema_p = nn.classifier(T.test_x, phase=False, reuse=True, getter=tb.tfutils.get_getter(ema)) # Teacher model (a back-up of EMA model) teacher_p = nn.classifier(T.test_x, phase=False, scope='teacher') var_main = tf.get_collection('variables', 'class/(?!.*ExponentialMovingAverage:0)') var_teacher = tf.get_collection('variables', 'teacher/(?!.*ExponentialMovingAverage:0)') teacher_assign_ops = [] for t, m in zip(var_teacher, var_main): ave = ema.average(m) ave = ave if ave else m teacher_assign_ops += [tf.assign(t, ave)] update_teacher = tf.group(*teacher_assign_ops) teacher = tb.function(T.sess, [T.test_x], tf.nn.softmax(teacher_p)) # Accuracies src_acc = basic_accuracy(T.src_y, src_p) trg_acc = basic_accuracy(T.trg_y, trg_p) ema_acc = basic_accuracy(T.test_y, ema_p) fn_ema_acc = tb.function(T.sess, [T.test_x, T.test_y], ema_acc) # Optimizer dw = constant(args.dw) if args.dirt == 0 else constant(0) cw = constant(1) if args.dirt == 0 else constant(args.bw) sw = constant(args.sw) if args.dirt == 0 else constant(0) tw = constant(args.tw) loss_main = (dw * loss_domain + cw * loss_src_class + sw * loss_src_vat + tw * loss_trg_cent + tw * loss_trg_vat) var_main = tf.get_collection('trainable_variables', 'class') train_main = tf.train.AdamOptimizer(args.lr, 0.5).minimize(loss_main, var_list=var_main) train_main = tf.group(train_main, ema_op) if args.dw > 0 and args.dirt == 0: var_disc = tf.get_collection('trainable_variables', 'disc') train_disc = tf.train.AdamOptimizer(args.lr, 0.5).minimize(loss_disc, var_list=var_disc) else: train_disc = constant(0) # Summarizations summary_disc = [tf.summary.scalar('domain/loss_disc', loss_disc),] summary_main = [tf.summary.scalar('domain/loss_domain', loss_domain), tf.summary.scalar('class/loss_src_class', loss_src_class), tf.summary.scalar('class/loss_trg_cent', loss_trg_cent), tf.summary.scalar('lipschitz/loss_trg_vat', loss_trg_vat), tf.summary.scalar('lipschitz/loss_src_vat', loss_src_vat), tf.summary.scalar('hyper/dw', dw), tf.summary.scalar('hyper/cw', cw), tf.summary.scalar('hyper/sw', sw), tf.summary.scalar('hyper/tw', tw), tf.summary.scalar('acc/src_acc', src_acc), tf.summary.scalar('acc/trg_acc', trg_acc)] # Merge summaries summary_disc = tf.summary.merge(summary_disc) summary_main = tf.summary.merge(summary_main) # Saved ops c = tf.constant T.ops_print = [c('disc'), loss_disc, c('domain'), loss_domain, c('class'), loss_src_class, c('cent'), loss_trg_cent, c('trg_vat'), loss_trg_vat, c('src_vat'), loss_src_vat, c('src'), src_acc, c('trg'), trg_acc] T.ops_disc = [summary_disc, train_disc] T.ops_main = [summary_main, train_main] T.fn_ema_acc = fn_ema_acc T.teacher = teacher T.update_teacher = update_teacher return T
def model(FLAGS, gpu_config): """ :param FLAGS: Contains the experiment info :return: (TensorDict) the model """ print(colored("Model initialization started", "blue")) nn = network(FLAGS) sz = FLAGS.sz ch = FLAGS.ch bs = FLAGS.bs sbs = FLAGS.sbs alpha = constant(FLAGS.alpha) beta = constant(FLAGS.beta) theta = constant(FLAGS.theta) delta = constant(FLAGS.delta) T = tb.utils.TensorDict( dict(sess=tf.Session(config=tb.growth_config()), x=placeholder((bs, sz, sz, ch)), z=placeholder((bs, FLAGS.nz)), pos=placeholder((bs * FLAGS.jcb, FLAGS.nz)), iorth=placeholder((bs, FLAGS.jcb, FLAGS.jcb)), lrD=placeholder(None), lrG=placeholder(None), seq_in=placeholder((10, sbs, sz, sz, ch)), seq_out=placeholder((10, sbs, sz, sz, ch)), val_seq_in=placeholder((10, 10, sz, sz, ch)), val_seq_out=placeholder((10, 10, sz, sz, ch)), test_seq_in=placeholder((10, 10, sz, sz, ch)), lr=placeholder(None))) # Compute G(x, z) and G(x, 0) fake_x = nn.generator(T.x, T.z, phase=True) # T.fake_x0 = fake_x0 = nn.generator(T.x, tf.zeros_like(T.z), phase=True) fake_x0 = nn.generator(T.x, tf.zeros_like(T.z), phase=True) # Compute discriminator logits real_logit = nn.discriminator(T.x, phase=True) fake_logit = nn.discriminator(fake_x, phase=True) fake0_logit = nn.discriminator(fake_x0, phase=True) # Adversarial generator loss_disc = tf.reduce_mean( sigmoid_xent(labels=tf.ones_like(real_logit), logits=real_logit) + sigmoid_xent(labels=tf.zeros_like(fake_logit), logits=fake_logit) + theta * sigmoid_xent(labels=tf.zeros_like(fake0_logit), logits=fake0_logit)) loss_fake = tf.reduce_mean( sigmoid_xent(labels=tf.ones_like(fake_logit), logits=fake_logit) + theta * sigmoid_xent(labels=tf.ones_like(fake0_logit), logits=fake0_logit)) # Locality loss_local = tf.reduce_mean(abs_diff(labels=T.x, predictions=fake_x0)) # Orthogonality pos = T.pos * delta tiled_real_x = tf.tile(T.x, [FLAGS.jcb, 1, 1, 1]) pos_fake_x = nn.generator(tiled_real_x, pos, phase=True) neg_fake_x = nn.generator(tiled_real_x, -pos, phase=True) jx = (pos_fake_x - neg_fake_x) / (2 * delta) jx = tf.reshape(jx, [bs, FLAGS.jcb, -1]) jx_t = tf.transpose(jx, [0, 2, 1]) loss_orth = tf.reduce_mean(abs_diff(tf.matmul(jx, jx_t), T.iorth)) loss_gen = loss_fake + alpha * loss_local + beta * loss_orth # Optimizer var_disc = tf.get_collection('trainable_variables', 'lgan/dsc') train_disc = tf.train.AdamOptimizer(T.lrD, 0.5).minimize(loss_disc, var_list=var_disc) if FLAGS.clip: clip_disc = [ p.assign(tf.clip_by_value(p, -0.01, 0.01)) for p in var_disc ] var_gen = tf.get_collection('trainable_variables', 'lgan/gen') train_gen = tf.train.AdamOptimizer(T.lrG, 0.5).minimize(loss_gen, var_list=var_gen) # Summarizations summary_disc = [ tf.summary.scalar('disc/loss_disc', loss_disc), ] summary_gen = [ tf.summary.scalar('gen/loss_gen', loss_gen), tf.summary.scalar('gen/loss_fake', loss_fake), tf.summary.scalar('gen/loss_local', loss_local), tf.summary.scalar('gen/loss_orth', loss_orth), tf.summary.scalar('hyper/alpha', alpha), tf.summary.scalar('hyper/beta', beta), tf.summary.scalar('hyper/theta', theta), tf.summary.scalar('hyper/delta', delta), tf.summary.scalar('hyper/lrD', T.lrD), tf.summary.scalar('hyper/lrG', T.lrG), tf.summary.scalar('hyper/var', FLAGS.var) ] summary_image = [ tf.summary.image('image/x', T.x), tf.summary.image('image/fake_x', fake_x), tf.summary.image('image/fake_x0', fake_x0) ] # Merge summaries summary_disc = tf.summary.merge(summary_disc) summary_gen = tf.summary.merge(summary_gen) summary_image = tf.summary.merge(summary_image) # Saved ops c = tf.constant T.ops_print = [ c('disc'), loss_disc, c('gen'), loss_gen, c('fake'), loss_fake, c('local'), loss_local, c('orth'), loss_orth ] # T.ops_disc = [summary_disc, train_disc] if FLAGS.clip: T.ops_disc = [summary_disc, train_disc, clip_disc] else: T.ops_disc = [summary_disc, train_disc] T.ops_gen = [summary_gen, train_gen] T.ops_image = summary_image if FLAGS.phase: # LSTM initialization seq_in = tf.reshape(T.seq_in, [-1, sz, sz, ch]) seq_out = tf.reshape(T.seq_out, [-1, sz, sz, ch]) val_seq_in = tf.reshape(T.val_seq_in, [-1, sz, sz, ch]) test_seq_in = tf.reshape(T.test_seq_in, [-1, sz, sz, ch]) enc_in = nn.generator(seq_in, tf.zeros((10 * sbs, FLAGS.nz)), phase=True, enc=True) enc_out = nn.generator(seq_out, tf.zeros((10 * sbs, FLAGS.nz)), phase=True, enc=True) val_enc_in = nn.generator(val_seq_in, tf.zeros((10 * 10, FLAGS.nz)), phase=True, enc=True) test_enc_in = nn.generator(test_seq_in, tf.zeros((10 * 10, FLAGS.nz)), phase=True, enc=True) enc_in = tf.stop_gradient(enc_in) enc_out = tf.stop_gradient(enc_out) val_enc_in = tf.stop_gradient(val_enc_in) test_enc_in = tf.stop_gradient(test_enc_in) enc_in = tf.squeeze(enc_in) enc_out = tf.squeeze(enc_out) val_enc_in = tf.squeeze(val_enc_in) test_enc_in = tf.squeeze(test_enc_in) enc_in = tf.reshape(enc_in, [-1, sbs, 3 * FLAGS.nz]) enc_out = tf.reshape(enc_out, [-1, sbs, 3 * FLAGS.nz]) val_enc_in = tf.reshape(val_enc_in, [-1, 10, 3 * FLAGS.nz]) test_enc_in = tf.reshape(test_enc_in, [-1, 10, 3 * FLAGS.nz]) with tf.variable_scope('lstm/in'): in_cell = tf.contrib.cudnn_rnn.CudnnLSTM(FLAGS.nhl, FLAGS.nhw, dropout=0.5) _, in_states = in_cell(enc_in, initial_state=None, training=True) _, val_in_states = in_cell(val_enc_in, initial_state=None, training=False) _, test_in_states = in_cell(test_enc_in, initial_state=None, training=False) with tf.variable_scope('lstm/out'): out_cell = tf.contrib.cudnn_rnn.CudnnLSTM(FLAGS.nhl, FLAGS.nhw, dropout=0.5) outputs, _ = out_cell(tf.zeros_like(enc_out), initial_state=in_states, training=True) val_outputs, _ = out_cell(tf.zeros_like(val_enc_in), initial_state=val_in_states, training=False) test_outputs, _ = out_cell(tf.zeros_like(test_enc_in), initial_state=test_in_states, training=False) enc_out_pred = tf.layers.dense(outputs, 3 * FLAGS.nz, activation=None, name='lstm_dense', reuse=tf.AUTO_REUSE) val_enc_out_pred = tf.layers.dense(val_outputs, 3 * FLAGS.nz, activation=None, name='lstm_dense', reuse=tf.AUTO_REUSE) test_enc_out_pred = tf.layers.dense(test_outputs, 3 * FLAGS.nz, activation=None, name='lstm_dense', reuse=tf.AUTO_REUSE) enc_out_pred_reshape = tf.reshape(enc_out_pred, [-1, 3 * FLAGS.nz]) enc_out_pred_reshape = tf.expand_dims( tf.expand_dims(enc_out_pred_reshape, 1), 1) val_enc_out_pred_reshape = tf.reshape(val_enc_out_pred, [-1, 3 * FLAGS.nz]) val_enc_out_pred_reshape = tf.expand_dims( tf.expand_dims(val_enc_out_pred_reshape, 1), 1) test_enc_out_pred_reshape = tf.reshape(test_enc_out_pred, [-1, 3 * FLAGS.nz]) test_enc_out_pred_reshape = tf.expand_dims( tf.expand_dims(test_enc_out_pred_reshape, 1), 1) seq_out_pred = nn.generator(enc_out_pred_reshape, tf.zeros((10 * sbs, FLAGS.nz)), phase=True, dec=True) seq_out_pred = tf.reshape(seq_out_pred, [10, sbs, sz, sz, ch]) val_seq_out_pred = nn.generator(val_enc_out_pred_reshape, tf.zeros((10 * 10, FLAGS.nz)), phase=True, dec=True) val_seq_out_pred = tf.reshape(val_seq_out_pred, [10, 10, sz, sz, ch]) test_seq_out_pred = nn.generator(test_enc_out_pred_reshape, tf.zeros((10 * 10, FLAGS.nz)), phase=True, dec=True) T.test_seq_out_pred = tf.reshape(test_seq_out_pred, [10, 10, sz, sz, ch]) T.val_mae = tf.reduce_mean( abs_diff(labels=T.val_seq_out, predictions=val_seq_out_pred)) loss_lstm = tf.reduce_mean( abs_diff(labels=enc_out, predictions=enc_out_pred)) var_lstm = tf.get_collection('trainable_variables', 'lstm') # train_lstm = tf.train.AdamOptimizer(FLAGS.lr, 0.5).minimize(loss_lstm, var_list=var_lstm) train_lstm = tf.train.AdamOptimizer(T.lr, 0.5).minimize(loss_lstm, var_list=var_lstm) summary_lstm = [tf.summary.scalar('lstm/loss_lstm', loss_lstm)] summary_lstm_image = [ tf.summary.image('lstm/seq_out', T.seq_out[:, 0, :, :, :]), tf.summary.image('lstm/seq_out_pred', seq_out_pred[:, 0, :, :, :]) ] summary_lstm = tf.summary.merge(summary_lstm) summary_lstm_image = tf.summary.merge(summary_lstm_image) T.ops_lstm_print = [c('loss_lstm'), loss_lstm] T.ops_lstm = [summary_lstm, train_lstm] T.ops_lstm_image = summary_lstm_image # T.test1 = seq_out_pred print(colored("Model initialization ended", "blue")) return T
def vae(): T = tb.utils.TensorDict( dict( sess=tf.Session(config=tb.growth_config()), trg_x=placeholder((None, 32, 32, 3), name='target_x'), fake_z=placeholder((None, args.Z), name='fake_z'), )) # Inference z, z_post = nn.encoder(T.trg_x, phase=True, internal_update=True) # Generation x = nn.generator(z, phase=True, internal_update=True) # Loss loss_rec, loss_kl, loss_gen = vae_loss(x, T.trg_x, z, z_post) # Evaluation (embedding, reconstruction, loss) test_z, test_z_post = nn.encoder(T.trg_x, phase=False, reuse=True) test_x = nn.generator(test_z, phase=False, reuse=True) _, _, test_loss = vae_loss(test_x, T.trg_x, test_z, test_z_post) fn_embed = tb.function(T.sess, [T.trg_x], test_z_post) fn_recon = tb.function(T.sess, [T.trg_x], test_x) fn_loss = tb.function(T.sess, [T.trg_x], test_loss) # Evaluation (generation) fake_x = nn.generator(T.fake_z, phase=False, reuse=True) fn_generate = tb.function(T.sess, [T.fake_z], fake_x) # Optimizer var_main = tf.get_collection('trainable_variables', 'gen/') var_main += tf.get_collection('trainable_variables', 'enc/') loss_main = loss_gen train_main = tf.train.AdamOptimizer(args.lr, 0.5).minimize(loss_main, var_list=var_main) # Summarizations summary_main = [ tf.summary.scalar('gen/loss_gen', loss_gen), tf.summary.scalar('gen/loss_kl', loss_kl), tf.summary.scalar('gen/loss_rec', loss_rec), ] summary_image = [tf.summary.image('gen/gen', generate_image(nn.generator))] # Merge summaries summary_main = tf.summary.merge(summary_main) summary_image = tf.summary.merge(summary_image) # Saved ops c = tf.constant T.ops_print = [ c('gen'), loss_gen, c('kl'), loss_kl, c('rec'), loss_rec, ] T.ops_main = [summary_main, train_main] T.ops_image = summary_image T.fn_embed = fn_embed T.fn_recon = fn_recon T.fn_loss = fn_loss T.fn_generate = fn_generate return T
def model(FLAGS, gpu_config): """ :param FLAGS: Contains the experiment info :return: (TensorDict) the model """ print(colored("Model initialization started", "blue")) nn = network(FLAGS) sz = FLAGS.sz ch = FLAGS.ch bs = FLAGS.bs sbs = FLAGS.sbs T = tb.utils.TensorDict(dict( sess=tf.Session(config=tb.growth_config()), x=placeholder((bs, sz, sz, ch)), lrD=placeholder(None), lrG=placeholder(None), seq_in=placeholder((10, sbs, sz, sz, ch)), seq_out=placeholder((10, sbs, sz, sz, ch)), val_seq_in=placeholder((10, 10, sz, sz, ch)), val_seq_out=placeholder((10, 10, sz, sz, ch)), test_seq_in=placeholder((10, 10, sz, sz, ch)), lr=placeholder(None) )) recon_x = nn.generator(T.x, phase=True) # Compute discriminator logits real_logit = nn.discriminator(T.x, phase=True) fake_logit = nn.discriminator(recon_x, phase=True) # Adversarial generator loss_disc = tf.reduce_mean( sigmoid_xent(labels=tf.ones_like(real_logit), logits=real_logit) + sigmoid_xent(labels=tf.zeros_like(fake_logit), logits=fake_logit)) loss_fake = tf.reduce_mean( sigmoid_xent(labels=tf.ones_like(fake_logit), logits=fake_logit)) loss_local = tf.reduce_mean(abs_diff(labels=T.x, predictions=recon_x)) loss_gen = loss_fake + FLAGS.alpha * loss_local var_gen = tf.get_collection('trainable_variables', 'lgan/gen') train_gen = tf.train.AdamOptimizer(T.lrG, 0.5).minimize(loss_gen, var_list=var_gen) var_disc = tf.get_collection('trainable_variables', 'lgan/dsc') train_disc = tf.train.AdamOptimizer(T.lrD, 0.5).minimize(loss_disc, var_list=var_disc) # Summarizations summary_disc = [tf.summary.scalar('disc/loss_disc', loss_disc)] summary_gen = [tf.summary.scalar('gen/loss_gen', loss_gen), tf.summary.scalar('gen/loss_local', loss_local), tf.summary.scalar('gen/loss_fake', loss_fake), tf.summary.scalar('hyper/lrD', T.lrD), tf.summary.scalar('hyper/lrG', T.lrG)] summary_image = [tf.summary.image('image/x', T.x), tf.summary.image('image/recon_x', recon_x)] # Merge summaries summary_disc = tf.summary.merge(summary_disc) summary_gen = tf.summary.merge(summary_gen) summary_image = tf.summary.merge(summary_image) # Saved ops c = tf.constant T.ops_print = [c('disc'), loss_disc, c('gen'), loss_gen, c('local'), loss_local, c('fake'), loss_fake] T.ops_disc = [summary_disc, train_disc] T.ops_gen = [summary_gen, train_gen] T.ops_image = summary_image if FLAGS.phase: # LSTM initialization seq_in = tf.reshape(T.seq_in, [-1, sz, sz, ch]) seq_out = tf.reshape(T.seq_out, [-1, sz, sz, ch]) val_seq_in = tf.reshape(T.val_seq_in, [-1, sz, sz, ch]) test_seq_in = tf.reshape(T.test_seq_in, [-1, sz, sz, ch]) enc_in = nn.generator(seq_in, phase=True, enc=True) enc_out = nn.generator(seq_out, phase=True, enc=True) val_enc_in = nn.generator(val_seq_in, phase=True, enc=True) test_enc_in = nn.generator(test_seq_in, phase=True, enc=True) enc_in = tf.stop_gradient(enc_in) enc_out = tf.stop_gradient(enc_out) val_enc_in = tf.stop_gradient(val_enc_in) test_enc_in = tf.stop_gradient(test_enc_in) enc_in = tf.squeeze(enc_in) enc_out = tf.squeeze(enc_out) val_enc_in = tf.squeeze(val_enc_in) test_enc_in = tf.squeeze(test_enc_in) enc_in = tf.reshape(enc_in, [-1, sbs, FLAGS.nz]) enc_out = tf.reshape(enc_out, [-1, sbs, FLAGS.nz]) val_enc_in = tf.reshape(val_enc_in, [-1, 10, FLAGS.nz]) test_enc_in = tf.reshape(test_enc_in, [-1, 10, FLAGS.nz]) with tf.variable_scope('lstm/in'): in_cell = tf.contrib.cudnn_rnn.CudnnLSTM(FLAGS.nhl, FLAGS.nhw, dropout=0.5) _, in_states = in_cell(enc_in, initial_state=None, training=True) _, val_in_states = in_cell(val_enc_in, initial_state=None, training=False) _, test_in_states = in_cell(test_enc_in, initial_state=None, training=False) with tf.variable_scope('lstm/out'): out_cell = tf.contrib.cudnn_rnn.CudnnLSTM(FLAGS.nhl, FLAGS.nhw, dropout=0.5) outputs, _ = out_cell(tf.zeros_like(enc_out), initial_state=in_states, training=True) val_outputs, _ = out_cell(tf.zeros_like(val_enc_in), initial_state=val_in_states, training=False) test_outputs, _ = out_cell(tf.zeros_like(test_enc_in), initial_state=test_in_states, training=False) enc_out_pred = tf.layers.dense(outputs, FLAGS.nz, activation=None, name='lstm_dense', reuse=tf.AUTO_REUSE) val_enc_out_pred = tf.layers.dense(val_outputs, FLAGS.nz, activation=None, name='lstm_dense', reuse=tf.AUTO_REUSE) test_enc_out_pred = tf.layers.dense(test_outputs, FLAGS.nz, activation=None, name='lstm_dense', reuse=tf.AUTO_REUSE) enc_out_pred_reshape = tf.reshape(enc_out_pred, [-1, FLAGS.nz]) enc_out_pred_reshape = tf.expand_dims(tf.expand_dims(enc_out_pred_reshape, 1), 1) val_enc_out_pred_reshape = tf.reshape(val_enc_out_pred, [-1, FLAGS.nz]) val_enc_out_pred_reshape = tf.expand_dims(tf.expand_dims(val_enc_out_pred_reshape, 1), 1) test_enc_out_pred_reshape = tf.reshape(test_enc_out_pred, [-1, FLAGS.nz]) test_enc_out_pred_reshape = tf.expand_dims(tf.expand_dims(test_enc_out_pred_reshape, 1), 1) seq_out_pred = nn.generator(enc_out_pred_reshape, phase=True, dec=True) seq_out_pred = tf.reshape(seq_out_pred, [10, sbs, sz, sz, ch]) val_seq_out_pred = nn.generator(val_enc_out_pred_reshape, phase=True, dec=True) val_seq_out_pred = tf.reshape(val_seq_out_pred, [10, 10, sz, sz, ch]) test_seq_out_pred = nn.generator(test_enc_out_pred_reshape, phase=True, dec=True) T.test_seq_out_pred = tf.reshape(test_seq_out_pred, [10, 10, sz, sz, ch]) T.val_mae = tf.reduce_mean(abs_diff(labels=T.val_seq_out, predictions=val_seq_out_pred)) loss_lstm = tf.reduce_mean(abs_diff(labels=enc_out, predictions=enc_out_pred)) var_lstm = tf.get_collection('trainable_variables', 'lstm') # train_lstm = tf.train.AdamOptimizer(FLAGS.lr, 0.5).minimize(loss_lstm, var_list=var_lstm) train_lstm = tf.train.AdamOptimizer(T.lr, 0.5).minimize(loss_lstm, var_list=var_lstm) summary_lstm = [tf.summary.scalar('lstm/loss_lstm', loss_lstm)] summary_lstm_image = [tf.summary.image('lstm/seq_out', T.seq_out[:, 0, :, :, :]), tf.summary.image('lstm/seq_out_pred', seq_out_pred[:, 0, :, :, :])] summary_lstm = tf.summary.merge(summary_lstm) summary_lstm_image = tf.summary.merge(summary_lstm_image) T.ops_lstm_print = [c('loss_lstm'), loss_lstm] T.ops_lstm = [summary_lstm, train_lstm] T.ops_lstm_image = summary_lstm_image print(colored("Model initialization ended", "blue")) return T
def dirtt(): T = tb.utils.TensorDict(dict( sess = tf.Session(config=tb.growth_config()), src_x = placeholder((None, 500, 60, 1)), src_y = placeholder((None, args.Y)), trg_x = placeholder((None, 500, 60, 1)), trg_y = placeholder((None, args.Y)), test_x = placeholder((None, 500, 60, 1)), test_y = placeholder((None, args.Y)), )) # Supervised and conditional entropy minimization src_e = nn.classifier(T.src_x, phase=True, enc_phase=1, trim=args.trim) trg_e = nn.classifier(T.trg_x, phase=True, enc_phase=1, trim=args.trim, reuse=True, internal_update=True) src_p = nn.classifier(src_e, phase=True, enc_phase=0, trim=args.trim) trg_p = nn.classifier(trg_e, phase=True, enc_phase=0, trim=args.trim, reuse=True, internal_update=True) loss_src_class = tf.reduce_mean(softmax_xent(labels=T.src_y, logits=src_p)) loss_trg_cent = tf.reduce_mean(softmax_xent_two(labels=trg_p, logits=trg_p)) # Domain confusion if args.dw > 0 and args.dirt == 0: real_logit = nn.feature_discriminator(src_e, phase=True) fake_logit = nn.feature_discriminator(trg_e, phase=True, reuse=True) loss_disc = 0.5 * tf.reduce_mean( sigmoid_xent(labels=tf.ones_like(real_logit), logits=real_logit) + sigmoid_xent(labels=tf.zeros_like(fake_logit), logits=fake_logit)) loss_domain = 0.5 * tf.reduce_mean( sigmoid_xent(labels=tf.zeros_like(real_logit), logits=real_logit) + sigmoid_xent(labels=tf.ones_like(fake_logit), logits=fake_logit)) else: loss_disc = constant(0) loss_domain = constant(0) # Virtual adversarial training (turn off src in non-VADA phase) loss_src_vat = vat_loss(T.src_x, src_p, nn.classifier) if args.sw > 0 and args.dirt == 0 else constant(0) loss_trg_vat = vat_loss(T.trg_x, trg_p, nn.classifier) if args.tw > 0 else constant(0) # Evaluation (EMA) ema = tf.train.ExponentialMovingAverage(decay=0.998) var_class = tf.get_collection('trainable_variables', 'class/') ema_op = ema.apply(var_class) ema_p = nn.classifier(T.test_x, phase=False, reuse=True, getter=tb.tfutils.get_getter(ema)) # Teacher model (a back-up of EMA model) teacher_p = nn.classifier(T.test_x, phase=False, scope='teacher') var_main = tf.get_collection('variables', 'class/(?!.*ExponentialMovingAverage:0)') var_teacher = tf.get_collection('variables', 'teacher/(?!.*ExponentialMovingAverage:0)') teacher_assign_ops = [] for t, m in zip(var_teacher, var_main): ave = ema.average(m) ave = ave if ave else m teacher_assign_ops += [tf.assign(t, ave)] update_teacher = tf.group(*teacher_assign_ops) teacher = tb.function(T.sess, [T.test_x], tf.nn.softmax(teacher_p)) # Accuracies src_acc = basic_accuracy(T.src_y, src_p) trg_acc = basic_accuracy(T.trg_y, trg_p) ema_acc = basic_accuracy(T.test_y, ema_p) fn_ema_acc = tb.function(T.sess, [T.test_x, T.test_y], ema_acc) # Optimizer dw = constant(args.dw) if args.dirt == 0 else constant(0) cw = constant(1) if args.dirt == 0 else constant(args.bw) sw = constant(args.sw) if args.dirt == 0 else constant(0) tw = constant(args.tw) loss_main = (dw * loss_domain + cw * loss_src_class + sw * loss_src_vat + tw * loss_trg_cent + tw * loss_trg_vat) var_main = tf.get_collection('trainable_variables', 'class') train_main = tf.train.AdamOptimizer(args.lr, 0.5).minimize(loss_main, var_list=var_main) train_main = tf.group(train_main, ema_op) if args.dw > 0 and args.dirt == 0: var_disc = tf.get_collection('trainable_variables', 'disc') train_disc = tf.train.AdamOptimizer(args.lr, 0.5).minimize(loss_disc, var_list=var_disc) else: train_disc = constant(0) # Summarizations summary_disc = [tf.summary.scalar('domain/loss_disc', loss_disc),] summary_main = [tf.summary.scalar('domain/loss_domain', loss_domain), tf.summary.scalar('class/loss_src_class', loss_src_class), tf.summary.scalar('class/loss_trg_cent', loss_trg_cent), tf.summary.scalar('lipschitz/loss_trg_vat', loss_trg_vat), tf.summary.scalar('lipschitz/loss_src_vat', loss_src_vat), tf.summary.scalar('hyper/dw', dw), tf.summary.scalar('hyper/cw', cw), tf.summary.scalar('hyper/sw', sw), tf.summary.scalar('hyper/tw', tw), tf.summary.scalar('acc/src_acc', src_acc), tf.summary.scalar('acc/trg_acc', trg_acc)] # Merge summaries summary_disc = tf.summary.merge(summary_disc) summary_main = tf.summary.merge(summary_main) # Saved ops c = tf.constant T.ops_print = [c('disc'), loss_disc, c('domain'), loss_domain, c('class'), loss_src_class, c('cent'), loss_trg_cent, c('trg_vat'), loss_trg_vat, c('src_vat'), loss_src_vat, c('src'), src_acc, c('trg'), trg_acc] T.ops_disc = [summary_disc, train_disc] T.ops_main = [summary_main, train_main] T.fn_ema_acc = fn_ema_acc T.teacher = teacher T.update_teacher = update_teacher return T
def gada(): T = tb.utils.TensorDict(dict( sess = tf.Session(config=tb.growth_config()), src_x = placeholder((None, 32, 32, 3)), src_y = placeholder((None, args.Y)), trg_x = placeholder((None, 32, 32, 3)), trg_y = placeholder((None, args.Y)), trg_z = placeholder((None, 100)), test_x = placeholder((None, 32, 32, 3)), test_y = placeholder((None, args.Y)), )) # Supervised and conditional entropy minimization src_e = nn.classifier(T.src_x, phase=True, enc_phase=1, enc_trim=args.etrim) src_g = nn.classifier(src_e, phase=True, gen_trim=args.gtrim, gen_phase=1, enc_trim=args.etrim) src_p = nn.classifier(src_g, phase=True, gen_trim=args.gtrim) trg_e = nn.classifier(T.trg_x, phase=True, enc_phase=1, enc_trim=args.etrim, reuse=True, internal_update=True) trg_g = nn.classifier(trg_e, phase=True, gen_trim=args.gtrim, gen_phase=1, enc_trim=args.etrim, reuse=True, internal_update=True) trg_p = nn.classifier(trg_g, phase=True, gen_trim=args.gtrim, reuse=True, internal_update=True) loss_src_class = tf.reduce_mean(softmax_xent(labels=T.src_y, logits=src_p)) loss_trg_cent = tf.reduce_mean(softmax_xent_two(labels=trg_p, logits=trg_p)) if args.tw > 0 else constant(0) # Domain confusion if args.dw > 0 and args.dirt == 0: real_logit = nn.real_feature_discriminator(src_e, phase=True) fake_logit = nn.real_feature_discriminator(trg_e, phase=True, reuse=True) loss_disc = 0.5 * tf.reduce_mean( sigmoid_xent(labels=tf.ones_like(real_logit), logits=real_logit) + sigmoid_xent(labels=tf.zeros_like(fake_logit), logits=fake_logit)) loss_domain = 0.5 * tf.reduce_mean( sigmoid_xent(labels=tf.zeros_like(real_logit), logits=real_logit) + sigmoid_xent(labels=tf.ones_like(fake_logit), logits=fake_logit)) else: loss_disc = constant(0) loss_domain = constant(0) # Virtual adversarial training (turn off src in non-VADA phase) loss_src_vat = vat_loss(T.src_x, src_p, nn.classifier) if args.sw > 0 and args.dirt == 0 else constant(0) loss_trg_vat = vat_loss(T.trg_x, trg_p, nn.classifier) if args.tw > 0 else constant(0) # Generate images and process generated images trg_gen_x = nn.trg_generator(T.trg_z) trg_gen_e = nn.classifier(trg_gen_x, phase=True, enc_phase=1, enc_trim=args.etrim, reuse=True, internal_update=True) trg_gen_g = nn.classifier(trg_gen_e, phase=True, gen_trim=args.gtrim, gen_phase=1, enc_trim=args.etrim, reuse=True, internal_update=True) trg_gen_p = nn.classifier(trg_gen_g, phase=True, gen_trim=args.gtrim, reuse=True, internal_update=True) # Feature matching loss function for generator loss_trg_gen_fm = tf.reduce_mean(tf.square(tf.reduce_mean(trg_g, axis=0) - tf.reduce_mean(trg_gen_g, axis=0))) if args.dirt == 0 else constant(0) # Unsupervised loss function if args.dirt == 0: logit_real = tf.reduce_logsumexp(trg_p, axis=1) logit_fake = tf.reduce_logsumexp(trg_gen_p, axis=1) dis_loss_real = -0.5*tf.reduce_mean(logit_real) + 0.5*tf.reduce_mean(tf.nn.softplus(logit_real)) dis_loss_fake = 0.5*tf.reduce_mean(tf.nn.softplus(logit_fake)) loss_trg_usv = dis_loss_real + dis_loss_fake # UnSuperVised loss function else: loss_trg_usv = constant(0) # Evaluation (EMA) ema = tf.train.ExponentialMovingAverage(decay=0.998) var_class = tf.get_collection('trainable_variables', 'class/') ema_op = ema.apply(var_class) ema_p = nn.classifier(T.test_x, enc_phase=1, enc_trim=0, phase=False, reuse=True, getter=tb.tfutils.get_getter(ema)) # Teacher model (a back-up of EMA model) teacher_p = nn.classifier(T.test_x, enc_phase=1, enc_trim=0, phase=False, scope='teacher') var_main = tf.get_collection('variables', 'class/(?!.*ExponentialMovingAverage:0)') var_teacher = tf.get_collection('variables', 'teacher/(?!.*ExponentialMovingAverage:0)') teacher_assign_ops = [] for t, m in zip(var_teacher, var_main): ave = ema.average(m) ave = ave if ave else m teacher_assign_ops += [tf.assign(t, ave)] update_teacher = tf.group(*teacher_assign_ops) teacher = tb.function(T.sess, [T.test_x], tf.nn.softmax(teacher_p)) # Accuracies src_acc = basic_accuracy(T.src_y, src_p) trg_acc = basic_accuracy(T.trg_y, trg_p) ema_acc = basic_accuracy(T.test_y, ema_p) fn_ema_acc = tb.function(T.sess, [T.test_x, T.test_y], ema_acc) # Optimizer dw = constant(args.dw) if args.dirt == 0 else constant(0) cw = constant(1) if args.dirt == 0 else constant(args.bw) sw = constant(args.sw) if args.dirt == 0 else constant(0) tw = constant(args.tw) uw = constant(args.uw) if args.dirt == 0 else constant(0) loss_main = (dw * loss_domain + cw * loss_src_class + sw * loss_src_vat + tw * loss_trg_cent + tw * loss_trg_vat + uw * loss_trg_usv) var_main = tf.get_collection('trainable_variables', 'class') train_main = tf.train.AdamOptimizer(args.lr, 0.5).minimize(loss_main, var_list=var_main) train_main = tf.group(train_main, ema_op) # Optimizer for feature discriminator if args.dw > 0 and args.dirt == 0: var_disc = tf.get_collection('trainable_variables', 'disc_real') train_disc = tf.train.AdamOptimizer(args.lr, 0.5).minimize(loss_disc, var_list=var_disc) else: train_disc = constant(0) # Optimizer for generators if args.dirt == 0: fmw = constant(1) loss_trg_gen = (fmw * loss_trg_gen_fm) var_trg_gen = tf.get_collection('trainable_variables', 'trg_gen') trg_gen_update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS, scope='trg_gen') with tf.control_dependencies(trg_gen_update_ops): train_trg_gen = tf.train.AdamOptimizer(args.lr, 0.5).minimize(loss_trg_gen, var_list=var_trg_gen) train_gen = train_trg_gen else: fmw = constant(0) train_gen = constant(0) # Summarizations summary_disc = [tf.summary.scalar('domain/loss_disc', loss_disc),] summary_main = [tf.summary.scalar('domain/loss_domain', loss_domain), tf.summary.scalar('class/loss_src_class', loss_src_class), tf.summary.scalar('class/loss_trg_cent', loss_trg_cent), tf.summary.scalar('class/loss_trg_usv', loss_trg_usv), tf.summary.scalar('lipschitz/loss_trg_vat', loss_trg_vat), tf.summary.scalar('lipschitz/loss_src_vat', loss_src_vat), tf.summary.scalar('hyper/dw', dw), tf.summary.scalar('hyper/cw', cw), tf.summary.scalar('hyper/sw', sw), tf.summary.scalar('hyper/tw', tw), tf.summary.scalar('hyper/uw', uw), tf.summary.scalar('hyper/fmw', fmw), tf.summary.scalar('acc/src_acc', src_acc), tf.summary.scalar('acc/trg_acc', trg_acc)] summary_gen = [tf.summary.scalar('gen/loss_trg_gen_fm', loss_trg_gen_fm), tf.summary.image('gen/trg_gen_img', trg_gen_x),] # Merge summaries summary_disc = tf.summary.merge(summary_disc) summary_main = tf.summary.merge(summary_main) summary_gen = tf.summary.merge(summary_gen) # Saved ops c = tf.constant T.ops_print = [c('disc'), loss_disc, c('domain'), loss_domain, c('class'), loss_src_class, c('cent'), loss_trg_cent, c('trg_vat'), loss_trg_vat, c('src_vat'), loss_src_vat, c('src'), src_acc, c('trg'), trg_acc] T.ops_disc = [summary_disc, train_disc] T.ops_main = [summary_main, train_main] T.ops_gen = [summary_gen , train_gen] T.fn_ema_acc = fn_ema_acc T.teacher = teacher T.update_teacher = update_teacher T.trg_gen_x = trg_gen_x T.trg_gen_p = trg_gen_p T.src_p = src_p T.trg_p = trg_p T.ema_p = ema_p return T