Esempio n. 1
0
def generate_img():
    ncol = 20
    z = np.tile(np.random.randn(ncol, 100), (10, 1))
    y = np.tile(np.eye(10), (ncol, 1))
    y = y.T.reshape(ncol * 10, -1)

    z, y = constant(z), constant(y)
    img = des.generator(z, y, phase=False, reuse=True)
    img = tf.reshape(img, [10, ncol, 32, 32, 3])
    img = tf.reshape(tf.transpose(img, [0, 2, 1, 3, 4]), [1, 10 * 32, ncol * 32, 3])
    img = t2s(img)
    return img
def generate_img():
    n_img = 20
    z = np.tile(np.random.randn(n_img, 100), (Y, 1))
    y = np.tile(np.eye(Y), (n_img, 1)).reshape(n_img, Y, -1)
    y = y.swapaxes(0, 1).reshape(n_img * Y, -1)

    z, y = constant(z), constant(y)
    img = des.generator(z, y, phase=True, reuse=True)
    img = tf.reshape(img, [Y, n_img, 32, 32, 3])
    img = tf.reshape(tf.transpose(img, [0, 2, 1, 3, 4]), [1, Y * 32, n_img * 32, 3])
    img = (img + 1) / 2
    img = tf.clip_by_value(img, 0, 1)
    return img
Esempio n. 3
0
def generate_image(generator):
    ncol = 20
    with tb.nputils.FixedSeed(0):
        z = np.random.randn(10 * ncol, args.Z)

    z = constant(z)
    img = generator(z, phase=False, reuse=True)
    img = tf.reshape(img, [10, ncol, 32, 32, 3])
    img = tf.reshape(tf.transpose(img, [0, 2, 1, 3, 4]), [1, 10 * 32, ncol * 32, 3])
    return t2s(img)
Esempio n. 4
0
    def __init__(self, k=10, n_x=784, n_z=64):
        self.k = k
        self.n_x = n_x
        self.n_z = n_z
        tf.reset_default_graph()
        x = placeholder((None, n_x), name='x')
        phase = tf.placeholder(tf.bool, name='phase')

        # create a y "placeholder"
        with tf.name_scope('y_'):
            y_ = tf.fill(tf.stack([tf.shape(x)[0], k]), 0.0)

        # propose distribution over y
        self.qy_logit, self.qy = qy_graph(x, k, phase)

        # for each proposed y, infer z and reconstruct x
        self.z, \
        self.zm, \
        self.zv, \
        self.zm_prior, \
        self.zv_prior, \
        self.xm, \
        self.xv, \
        self.y = [[None] * k for i in range(8)]
        for i in range(k):
            with tf.name_scope('graphs/hot_at{:d}'.format(i)):
                y = tf.add(
                    y_, constant(np.eye(k)[i], name='hot_at_{:d}'.format(i)))
                self.z[i], self.zm[i], self.zv[i] = qz_graph(x, y, n_z, phase)
                self.y[i], \
                self.zm_prior[i], \
                self.zv_prior[i] = pz_graph(y, n_z, phase)
                self.xm[i], self.xv[i] = px_graph(self.z[i], n_x, phase)

        # Aggressive name scoping for pretty graph visualization :P
        with tf.name_scope('loss'):
            with tf.name_scope('neg_entropy'):
                self.nent = -tf.nn.softmax_cross_entropy_with_logits_v2(
                    labels=self.qy, logits=self.qy_logit)
            losses = [None] * k
            for i in range(k):
                with tf.name_scope('loss_at{:d}'.format(i)):
                    losses[i] = labeled_loss(x, self.xm[i], self.xv[i],
                                             self.z[i], self.zm[i], self.zv[i],
                                             self.zm_prior[i],
                                             self.zv_prior[i])
            with tf.name_scope('final_loss'):
                self.loss = tf.add_n(
                    #[self.nent] +
                    [self.qy[:, i] * losses[i] for i in range(k)])

        self.train_step = tf.train.AdamOptimizer(0.00001).minimize(self.loss)

        show_default_graph()
def dann_embed():
    config = tf.ConfigProto()
    config.gpu_options.allow_growth = True
    # config.gpu_options.per_process_gpu_memory_fraction = 0.45
    T = tb.utils.TensorDict(dict(
        sess = tf.Session(config=config),
        src_x = placeholder((None, H, W, 3)),
        src_y = placeholder((None, Y)),
        trg_x = placeholder((None, H, W, 3)),
        trg_y = placeholder((None, Y)),
        fake_z = placeholder((None, 100)),
        fake_y = placeholder((None, Y)),
        test_x = placeholder((None, H, W, 3)),
        test_y = placeholder((None, Y)),
        phase = placeholder((), tf.bool)
    ))

    # Schedules
    start, end = args.pivot - 1, args.pivot
    global_step = tf.Variable(0., trainable=False)
    # Ramp down dw
    ramp_dw = conditional_ramp_weight(args.dwdn, global_step, args.dw, 0, start, end)
    # Ramp down src
    ramp_class = conditional_ramp_weight(args.dn, global_step, 1, args.dcval, start, end)
    ramp_sbw = conditional_ramp_weight(args.dn, global_step, args.sbw, 0, start, end)
    # Ramp up trg (never more than src)
    ramp_cw = conditional_ramp_weight(args.up, global_step, args.cw, args.uval, start, end)
    ramp_gw = conditional_ramp_weight(args.up, global_step, args.gw, args.uval, start, end)
    ramp_gbw = conditional_ramp_weight(args.up, global_step, args.gbw, args.uval, start, end)
    ramp_tbw = conditional_ramp_weight(args.up, global_step, args.tbw, args.uval, start, end)

    # Supervised and conditional entropy minimization
    src_e = des.classifier(T.src_x, T.phase, enc_phase=1, trim=args.trim, scope='class', internal_update=False)
    trg_e = des.classifier(T.trg_x, T.phase, enc_phase=1, trim=args.trim, scope='class', reuse=True, internal_update=True)
    src_y = des.classifier(src_e, T.phase, enc_phase=0, trim=args.trim, scope='class', internal_update=False)
    trg_y = des.classifier(trg_e, T.phase, enc_phase=0, trim=args.trim, scope='class', reuse=True, internal_update=True)

    loss_class = tf.reduce_mean(softmax_xent(labels=T.src_y, logits=src_y))
    loss_cent = tf.reduce_mean(softmax_xent_two(labels=trg_y, logits=trg_y))

    # Image generation
    if args.gw > 0:
        fake_x = des.generator(T.fake_z, T.fake_y, T.phase)
        fake_logit = des.discriminator(fake_x, T.phase)
        real_logit = des.discriminator(T.trg_x, T.phase, reuse=True)
        fake_e = des.classifier(fake_x, T.phase, enc_phase=1, trim=args.trim, scope='class', reuse=True)
        fake_y = des.classifier(fake_e, T.phase, enc_phase=0, trim=args.trim, scope='class', reuse=True)

        loss_gdisc = 0.5 * tf.reduce_mean(
            sigmoid_xent(labels=tf.ones_like(real_logit), logits=real_logit) +
            sigmoid_xent(labels=tf.zeros_like(fake_logit), logits=fake_logit))
        loss_gen = tf.reduce_mean(sigmoid_xent(labels=tf.ones_like(fake_logit), logits=fake_logit))
        loss_info = tf.reduce_mean(softmax_xent(labels=T.fake_y, logits=fake_y))

    else:
        loss_gdisc = constant(0)
        loss_gen = constant(0)
        loss_info = constant(0)

    # Domain confusion
    if args.dw > 0 and args.phase == 0:
        real_logit = des.feature_discriminator(src_e, T.phase)
        fake_logit = des.feature_discriminator(trg_e, T.phase, reuse=True)

        loss_ddisc = 0.5 * tf.reduce_mean(
            sigmoid_xent(labels=tf.ones_like(real_logit), logits=real_logit) +
            sigmoid_xent(labels=tf.zeros_like(fake_logit), logits=fake_logit))
        loss_domain = 0.5 * tf.reduce_mean(
            sigmoid_xent(labels=tf.zeros_like(real_logit), logits=real_logit) +
            sigmoid_xent(labels=tf.ones_like(fake_logit), logits=fake_logit))

    else:
        loss_ddisc = constant(0)
        loss_domain = constant(0)

    # Smoothing
    loss_t_ball = constant(0) if args.tbw == 0 else smoothing_loss(T.trg_x, trg_y, T.phase)
    loss_s_ball = constant(0) if args.sbw == 0 or args.phase == 1 else smoothing_loss(T.src_x, src_y, T.phase)
    loss_g_ball = constant(0) if args.gbw == 0 else smoothing_loss(fake_x, fake_y, T.phase)

    loss_t_emb = constant(0) if args.te == 0 else smoothing_loss(T.trg_x, trg_e, T.phase, is_embedding=True)
    loss_s_emb = constant(0) if args.se == 0 else smoothing_loss(T.src_x, src_e, T.phase, is_embedding=True)

    # Evaluation (non-EMA)
    test_y = des.classifier(T.test_x, False, enc_phase=1, trim=0, scope='class', reuse=True)

    # Evaluation (EMA)
    ema = tf.train.ExponentialMovingAverage(decay=0.998)
    var_class = tf.get_collection('trainable_variables', 'class/')
    ema_op = ema.apply(var_class)
    T.ema_e = des.classifier(T.test_x, False, enc_phase=1, trim=args.trim, scope='class', reuse=True, getter=get_getter(ema))
    ema_y = des.classifier(T.ema_e, False, enc_phase=0, trim=args.trim, scope='class', reuse=True, getter=get_getter(ema))

    # Back-up (teacher) model
    back_y = des.classifier(T.test_x, False, enc_phase=1, trim=0, scope='back')
    var_main = tf.get_collection('variables', 'class/(?!.*ExponentialMovingAverage:0)')
    var_back = tf.get_collection('variables', 'back/(?!.*ExponentialMovingAverage:0)')
    back_assigns = []
    init_assigns = []
    for b, m in zip(var_back, var_main):
        ave = ema.average(m)
        target = ave if ave else m
        back_assigns += [tf.assign(b, target)]
        init_assigns += [tf.assign(m, target)]
        # print "Assign {} -> {}, {}".format(target.name, b.name, m.name)
    back_update = tf.group(*back_assigns)
    init_update = tf.group(*init_assigns)

    src_acc = basic_accuracy(T.src_y, src_y)
    trg_acc = basic_accuracy(T.trg_y, trg_y)
    test_acc = basic_accuracy(T.test_y, test_y)
    ema_acc = basic_accuracy(T.test_y, ema_y)
    fn_test_acc = tb.function(T.sess, [T.test_x, T.test_y], test_acc)
    fn_ema_acc = tb.function(T.sess, [T.test_x, T.test_y], ema_acc)

    # Optimizer
    loss_main = (ramp_class * loss_class +
                 ramp_dw * loss_domain +
                 ramp_cw * loss_cent +
                 ramp_tbw * loss_t_ball +
                 ramp_gbw * loss_g_ball +
                 ramp_sbw * loss_s_ball +
                 args.te * loss_t_emb +
                 args.se * loss_s_emb +
                 ramp_gw * loss_gen +
                 ramp_gw * loss_info)
    var_main = tf.get_collection('trainable_variables', 'class')
    var_main += tf.get_collection('trainable_variables', 'gen')
    train_main = tf.train.AdamOptimizer(args.lr, 0.5).minimize(loss_main,
                                                               var_list=var_main,
                                                               global_step=global_step)
    train_main = tf.group(train_main, ema_op)

    if (args.dw > 0 and args.phase == 0) or args.gw > 0:
        loss_disc = loss_ddisc + loss_gdisc
        var_disc = tf.get_collection('trainable_variables', 'disc')
        train_disc = tf.train.AdamOptimizer(args.lr, 0.5).minimize(loss_disc,
                                                                   var_list=var_disc)
    else:
        train_disc = constant(0)

    # Summarizations
#    embedding = tf.Variable(tf.zeros([1000,12800]),name='embedding')
#    embedding = tf.reshape(trg_e[:1000], [-1,12800])

    summary_disc = [tf.summary.scalar('domain/loss_ddisc', loss_ddisc),
                    tf.summary.scalar('gen/loss_gdisc', loss_gdisc)]

    summary_main = [tf.summary.scalar('class/loss_class', loss_class),
                    tf.summary.scalar('class/loss_cent', loss_cent),
                    tf.summary.scalar('domain/loss_domain', loss_domain),
                    tf.summary.scalar('lipschitz/loss_t_ball', loss_t_ball),
                    tf.summary.scalar('lipschitz/loss_g_ball', loss_g_ball),
                    tf.summary.scalar('lipschitz/loss_s_ball', loss_s_ball),
                    tf.summary.scalar('embedding/loss_t_emb', loss_t_emb),
                    tf.summary.scalar('embedding/loss_s_emb', loss_s_emb),
                    tf.summary.scalar('gen/loss_gen', loss_gen),
                    tf.summary.scalar('gen/loss_info', loss_info),
                    tf.summary.scalar('ramp/ramp_class', ramp_class),
                    tf.summary.scalar('ramp/ramp_dw', ramp_dw),
                    tf.summary.scalar('ramp/ramp_cw', ramp_cw),
                    tf.summary.scalar('ramp/ramp_gw', ramp_gw),
                    tf.summary.scalar('ramp/ramp_tbw', ramp_tbw),
                    tf.summary.scalar('ramp/ramp_sbw', ramp_sbw),
                    tf.summary.scalar('ramp/ramp_gbw', ramp_gbw),
                    tf.summary.scalar('acc/src_acc', src_acc),
                    tf.summary.scalar('acc/trg_acc', trg_acc)]

    summary_disc = tf.summary.merge(summary_disc)
    summary_main = tf.summary.merge(summary_main)

    # Saved ops
    c = tf.constant
    T.ops_print = [c('ddisc'), loss_ddisc,
                   c('domain'), loss_domain,
                   c('gdisc'), loss_gdisc,
                   c('gen'), loss_gen,
                   c('info'), loss_info,
                   c('class'), loss_class,
                   c('cent'), loss_cent,
                   c('t_ball'), loss_t_ball,
                   c('g_ball'), loss_g_ball,
                   c('s_ball'), loss_s_ball,
                   c('t_emb'), loss_t_emb,
                   c('s_emb'), loss_s_emb,
                   c('src'), src_acc,
                   c('trg'), trg_acc]

    T.ops_disc = [summary_disc, train_disc]
    T.ops_main = [summary_main, train_main]
    T.fn_test_acc = fn_test_acc
    T.fn_ema_acc = fn_ema_acc
    T.back_y = tf.nn.softmax(back_y)  # Access to backed-up eval model softmax
    T.back_update = back_update       # Update op eval -> backed-up eval model
    T.init_update = init_update       # Update op eval -> student eval model
    T.global_step = global_step
    T.ramp_class = ramp_class
    if args.gw > 0:
        summary_image = tf.summary.image('image/gen', generate_img())
        T.ops_image = summary_image

    return T
from utils import get_var, plot_labeled_data

k = 2
n_x = 2
sample_size = 500
tf.reset_default_graph()

#with tf.name_scope('y_'):
#   y_ = tf.fill(tf.stack([None, k]), 0.0)

# for each proposed y, infer z and reconstruct x
zm_prior, \
zv_prior, \
z, \
x = [[None] * k for i in range(4)]
zm_prior[0] = constant(np.ones((sample_size, 2)))
zv_prior[0] = constant(np.ones((sample_size, 2)))
zm_prior[1] = constant(-1 * np.ones((sample_size, 2)))
zv_prior[1] = constant(np.ones((sample_size, 2)))
for i in range(k):
    with tf.name_scope('graphs/hot_at{:d}'.format(i)):
        z[i] = z_graph(zm_prior[i], zv_prior[i])
        x[i] = px_fixed_graph(z[i])

show_default_graph()

with tf.Session() as sess:
    sess.run(tf.global_variables_initializer())
    x_0, x_1, z0, z1 = sess.run([x[0], x[1], z[0], z[1]])
    np.save('./generatedData/generated_from_cluster0.npy', x_0)
    np.save('./generatedData/generated_from_cluster1.npy', x_1)
Esempio n. 7
0
def vae():
    config = tf.ConfigProto()
    config.gpu_options.allow_growth = True
    T = tb.utils.TensorDict(dict(
        sess = tf.Session(config=config),
        src_x = placeholder((None, 32, 32, 3),  name='source_x'),
        src_y = placeholder((None, 10),         name='source_y'),
        trg_x = placeholder((None, 32, 32, 3),  name='target_x'),
        trg_y = placeholder((None, 10),         name='target_y'),
        test_x = placeholder((None, 32, 32, 3), name='test_x'),
        test_y = placeholder((None, 10),        name='test_y'),
        fake_z = placeholder((None, 100),       name='fake_z'),
        fake_y = placeholder((None, 10),        name='fake_y'),
        tau = placeholder((),                   name='tau'),
        phase = placeholder((), tf.bool,        name='phase'),
    ))

    if args.gw > 0:
        # Variational inference
        y_logit = des.classifier(T.trg_x, T.phase, internal_update=True)
        y = gumbel_softmax(y_logit, T.tau)
        z, z_post = des.encoder(T.trg_x, y, T.phase, internal_update=True)

        # Generation
        x = des.generator(z, y, T.phase, internal_update=True)

        # Loss
        z_prior = (0., 1.)
        kl_z = tf.reduce_mean(log_normal(z, *z_post) - log_normal(z, *z_prior))

        y_q = tf.nn.softmax(y_logit)
        log_y_q = tf.nn.log_softmax(y_logit)
        kl_y = tf.reduce_mean(tf.reduce_sum(y_q * (log_y_q - tf.log(0.1)), axis=1))

        loss_kl = kl_z + kl_y
        loss_rec = args.rw * tf.reduce_mean(tf.reduce_sum(tf.square(T.trg_x - x), axis=[1,2,3]))
        loss_gen = loss_rec + loss_kl
        trg_acc = basic_accuracy(T.trg_y, y_logit)

    else:
        loss_kl = constant(0)
        loss_rec = constant(0)
        loss_gen = constant(0)
        trg_acc = constant(0)

    # Posterior regularization (labeled classification)
    src_y = des.classifier(T.src_x, T.phase, reuse=True)
    loss_class = tf.reduce_mean(softmax_xent(labels=T.src_y, logits=src_y))
    src_acc = basic_accuracy(T.src_y, src_y)

    # Evaluation (classification)
    test_y = des.classifier(T.test_x, phase=False, reuse=True)
    test_acc = basic_accuracy(T.test_y, test_y)
    fn_test_acc = tb.function(T.sess, [T.test_x, T.test_y], test_acc)

    # Evaluation (generation)
    if args.gw > 0:
        fake_x = des.generator(T.fake_z, T.fake_y, phase=False, reuse=True)
        fn_fake_x = tb.function(T.sess, [T.fake_z, T.fake_y], fake_x)

    # Optimizer
    var_main = tf.get_collection('trainable_variables', 'gen/')
    var_main += tf.get_collection('trainable_variables', 'enc/')
    var_main += tf.get_collection('trainable_variables', 'class/')
    loss_main = args.gw * loss_gen + loss_class
    train_main = tf.train.AdamOptimizer(args.lr, 0.5).minimize(loss_main, var_list=var_main)

    # Summarizations
    summary_main = [
        tf.summary.scalar('gen/loss_gen', loss_gen),
        tf.summary.scalar('gen/loss_rec', loss_rec),
        tf.summary.scalar('gen/loss_kl', loss_kl),
        tf.summary.scalar('class/loss_class', loss_class),
        tf.summary.scalar('acc/src_acc', src_acc),
        tf.summary.scalar('acc/trg_acc', trg_acc),
    ]
    summary_main = tf.summary.merge(summary_main)

    if args.gw > 0:
        summary_image = tf.summary.image('image/gen', generate_img())

    # Saved ops
    c = tf.constant
    T.ops_print = [
        c('tau'), tf.identity(T.tau),
        c('gen'), loss_gen,
        c('rec'), loss_rec,
        c('kl'), loss_kl,
        c('class'), loss_class,
    ]

    T.ops_main = [summary_main, train_main]
    T.fn_test_acc = fn_test_acc

    if args.gw > 0:
        T.fn_fake_x = fn_fake_x
        T.ops_image = summary_image

    return T
Esempio n. 8
0
def dirtt():
    T = tb.utils.TensorDict(dict(
        sess = tf.Session(config=tb.growth_config()),
        src_x = placeholder((None, 32, 32, 3)),
        src_y = placeholder((None, args.Y)),
        trg_x = placeholder((None, 32, 32, 3)),
        trg_y = placeholder((None, args.Y)),
        test_x = placeholder((None, 32, 32, 3)),
        test_y = placeholder((None, args.Y)),
    ))
    # Supervised and conditional entropy minimization
    src_e = nn.classifier(T.src_x, phase=True, enc_phase=1, trim=args.trim)
    trg_e = nn.classifier(T.trg_x, phase=True, enc_phase=1, trim=args.trim, reuse=True, internal_update=True)
    src_p = nn.classifier(src_e, phase=True, enc_phase=0, trim=args.trim)
    trg_p = nn.classifier(trg_e, phase=True, enc_phase=0, trim=args.trim, reuse=True, internal_update=True)

    loss_src_class = tf.reduce_mean(softmax_xent(labels=T.src_y, logits=src_p))
    loss_trg_cent = tf.reduce_mean(softmax_xent_two(labels=trg_p, logits=trg_p))

    # Domain confusion
    if args.dw > 0 and args.dirt == 0:
        real_logit = nn.feature_discriminator(src_e, phase=True)
        fake_logit = nn.feature_discriminator(trg_e, phase=True, reuse=True)

        loss_disc = 0.5 * tf.reduce_mean(
            sigmoid_xent(labels=tf.ones_like(real_logit), logits=real_logit) +
            sigmoid_xent(labels=tf.zeros_like(fake_logit), logits=fake_logit))
        loss_domain = 0.5 * tf.reduce_mean(
            sigmoid_xent(labels=tf.zeros_like(real_logit), logits=real_logit) +
            sigmoid_xent(labels=tf.ones_like(fake_logit), logits=fake_logit))

    else:
        loss_disc = constant(0)
        loss_domain = constant(0)

    # Virtual adversarial training (turn off src in non-VADA phase)
    loss_src_vat = vat_loss(T.src_x, src_p, nn.classifier) if args.sw > 0 and args.dirt == 0 else constant(0)
    loss_trg_vat = vat_loss(T.trg_x, trg_p, nn.classifier) if args.tw > 0 else constant(0)

    # Evaluation (EMA)
    ema = tf.train.ExponentialMovingAverage(decay=0.998)
    var_class = tf.get_collection('trainable_variables', 'class/')
    ema_op = ema.apply(var_class)
    ema_p = nn.classifier(T.test_x, phase=False, reuse=True, getter=tb.tfutils.get_getter(ema))

    # Teacher model (a back-up of EMA model)
    teacher_p = nn.classifier(T.test_x, phase=False, scope='teacher')
    var_main = tf.get_collection('variables', 'class/(?!.*ExponentialMovingAverage:0)')
    var_teacher = tf.get_collection('variables', 'teacher/(?!.*ExponentialMovingAverage:0)')
    teacher_assign_ops = []
    for t, m in zip(var_teacher, var_main):
        ave = ema.average(m)
        ave = ave if ave else m
        teacher_assign_ops += [tf.assign(t, ave)]
    update_teacher = tf.group(*teacher_assign_ops)
    teacher = tb.function(T.sess, [T.test_x], tf.nn.softmax(teacher_p))

    # Accuracies
    src_acc = basic_accuracy(T.src_y, src_p)
    trg_acc = basic_accuracy(T.trg_y, trg_p)
    ema_acc = basic_accuracy(T.test_y, ema_p)
    fn_ema_acc = tb.function(T.sess, [T.test_x, T.test_y], ema_acc)

    # Optimizer
    dw = constant(args.dw) if args.dirt == 0 else constant(0)
    cw = constant(1)       if args.dirt == 0 else constant(args.bw)
    sw = constant(args.sw) if args.dirt == 0 else constant(0)
    tw = constant(args.tw)
    loss_main = (dw * loss_domain +
                 cw * loss_src_class +
                 sw * loss_src_vat +
                 tw * loss_trg_cent +
                 tw * loss_trg_vat)
    var_main = tf.get_collection('trainable_variables', 'class')
    train_main = tf.train.AdamOptimizer(args.lr, 0.5).minimize(loss_main, var_list=var_main)
    train_main = tf.group(train_main, ema_op)

    if args.dw > 0 and args.dirt == 0:
        var_disc = tf.get_collection('trainable_variables', 'disc')
        train_disc = tf.train.AdamOptimizer(args.lr, 0.5).minimize(loss_disc, var_list=var_disc)
    else:
        train_disc = constant(0)

    # Summarizations
    summary_disc = [tf.summary.scalar('domain/loss_disc', loss_disc),]
    summary_main = [tf.summary.scalar('domain/loss_domain', loss_domain),
                    tf.summary.scalar('class/loss_src_class', loss_src_class),
                    tf.summary.scalar('class/loss_trg_cent', loss_trg_cent),
                    tf.summary.scalar('lipschitz/loss_trg_vat', loss_trg_vat),
                    tf.summary.scalar('lipschitz/loss_src_vat', loss_src_vat),
                    tf.summary.scalar('hyper/dw', dw),
                    tf.summary.scalar('hyper/cw', cw),
                    tf.summary.scalar('hyper/sw', sw),
                    tf.summary.scalar('hyper/tw', tw),
                    tf.summary.scalar('acc/src_acc', src_acc),
                    tf.summary.scalar('acc/trg_acc', trg_acc)]

    # Merge summaries
    summary_disc = tf.summary.merge(summary_disc)
    summary_main = tf.summary.merge(summary_main)

    # Saved ops
    c = tf.constant
    T.ops_print = [c('disc'), loss_disc,
                   c('domain'), loss_domain,
                   c('class'), loss_src_class,
                   c('cent'), loss_trg_cent,
                   c('trg_vat'), loss_trg_vat,
                   c('src_vat'), loss_src_vat,
                   c('src'), src_acc,
                   c('trg'), trg_acc]
    T.ops_disc = [summary_disc, train_disc]
    T.ops_main = [summary_main, train_main]
    T.fn_ema_acc = fn_ema_acc
    T.teacher = teacher
    T.update_teacher = update_teacher

    return T
Esempio n. 9
0
def model(FLAGS, gpu_config):
    """
    :param FLAGS: Contains the experiment info
    :return: (TensorDict) the model
    """

    print(colored("Model initialization started", "blue"))

    nn = network(FLAGS)
    sz = FLAGS.sz
    ch = FLAGS.ch
    bs = FLAGS.bs
    sbs = FLAGS.sbs

    alpha = constant(FLAGS.alpha)
    beta = constant(FLAGS.beta)
    theta = constant(FLAGS.theta)
    delta = constant(FLAGS.delta)

    T = tb.utils.TensorDict(
        dict(sess=tf.Session(config=tb.growth_config()),
             x=placeholder((bs, sz, sz, ch)),
             z=placeholder((bs, FLAGS.nz)),
             pos=placeholder((bs * FLAGS.jcb, FLAGS.nz)),
             iorth=placeholder((bs, FLAGS.jcb, FLAGS.jcb)),
             lrD=placeholder(None),
             lrG=placeholder(None),
             seq_in=placeholder((10, sbs, sz, sz, ch)),
             seq_out=placeholder((10, sbs, sz, sz, ch)),
             val_seq_in=placeholder((10, 10, sz, sz, ch)),
             val_seq_out=placeholder((10, 10, sz, sz, ch)),
             test_seq_in=placeholder((10, 10, sz, sz, ch)),
             lr=placeholder(None)))

    # Compute G(x, z) and G(x, 0)
    fake_x = nn.generator(T.x, T.z, phase=True)
    # T.fake_x0 = fake_x0 = nn.generator(T.x, tf.zeros_like(T.z), phase=True)
    fake_x0 = nn.generator(T.x, tf.zeros_like(T.z), phase=True)

    # Compute discriminator logits
    real_logit = nn.discriminator(T.x, phase=True)
    fake_logit = nn.discriminator(fake_x, phase=True)
    fake0_logit = nn.discriminator(fake_x0, phase=True)

    # Adversarial generator
    loss_disc = tf.reduce_mean(
        sigmoid_xent(labels=tf.ones_like(real_logit), logits=real_logit) +
        sigmoid_xent(labels=tf.zeros_like(fake_logit), logits=fake_logit) +
        theta *
        sigmoid_xent(labels=tf.zeros_like(fake0_logit), logits=fake0_logit))

    loss_fake = tf.reduce_mean(
        sigmoid_xent(labels=tf.ones_like(fake_logit), logits=fake_logit) +
        theta *
        sigmoid_xent(labels=tf.ones_like(fake0_logit), logits=fake0_logit))

    # Locality
    loss_local = tf.reduce_mean(abs_diff(labels=T.x, predictions=fake_x0))

    # Orthogonality
    pos = T.pos * delta
    tiled_real_x = tf.tile(T.x, [FLAGS.jcb, 1, 1, 1])
    pos_fake_x = nn.generator(tiled_real_x, pos, phase=True)
    neg_fake_x = nn.generator(tiled_real_x, -pos, phase=True)

    jx = (pos_fake_x - neg_fake_x) / (2 * delta)
    jx = tf.reshape(jx, [bs, FLAGS.jcb, -1])
    jx_t = tf.transpose(jx, [0, 2, 1])
    loss_orth = tf.reduce_mean(abs_diff(tf.matmul(jx, jx_t), T.iorth))

    loss_gen = loss_fake + alpha * loss_local + beta * loss_orth

    # Optimizer
    var_disc = tf.get_collection('trainable_variables', 'lgan/dsc')
    train_disc = tf.train.AdamOptimizer(T.lrD, 0.5).minimize(loss_disc,
                                                             var_list=var_disc)

    if FLAGS.clip:
        clip_disc = [
            p.assign(tf.clip_by_value(p, -0.01, 0.01)) for p in var_disc
        ]

    var_gen = tf.get_collection('trainable_variables', 'lgan/gen')
    train_gen = tf.train.AdamOptimizer(T.lrG, 0.5).minimize(loss_gen,
                                                            var_list=var_gen)

    # Summarizations
    summary_disc = [
        tf.summary.scalar('disc/loss_disc', loss_disc),
    ]
    summary_gen = [
        tf.summary.scalar('gen/loss_gen', loss_gen),
        tf.summary.scalar('gen/loss_fake', loss_fake),
        tf.summary.scalar('gen/loss_local', loss_local),
        tf.summary.scalar('gen/loss_orth', loss_orth),
        tf.summary.scalar('hyper/alpha', alpha),
        tf.summary.scalar('hyper/beta', beta),
        tf.summary.scalar('hyper/theta', theta),
        tf.summary.scalar('hyper/delta', delta),
        tf.summary.scalar('hyper/lrD', T.lrD),
        tf.summary.scalar('hyper/lrG', T.lrG),
        tf.summary.scalar('hyper/var', FLAGS.var)
    ]
    summary_image = [
        tf.summary.image('image/x', T.x),
        tf.summary.image('image/fake_x', fake_x),
        tf.summary.image('image/fake_x0', fake_x0)
    ]
    # Merge summaries
    summary_disc = tf.summary.merge(summary_disc)
    summary_gen = tf.summary.merge(summary_gen)
    summary_image = tf.summary.merge(summary_image)

    # Saved ops
    c = tf.constant
    T.ops_print = [
        c('disc'), loss_disc,
        c('gen'), loss_gen,
        c('fake'), loss_fake,
        c('local'), loss_local,
        c('orth'), loss_orth
    ]
    # T.ops_disc = [summary_disc, train_disc]

    if FLAGS.clip:
        T.ops_disc = [summary_disc, train_disc, clip_disc]
    else:
        T.ops_disc = [summary_disc, train_disc]

    T.ops_gen = [summary_gen, train_gen]
    T.ops_image = summary_image

    if FLAGS.phase:
        # LSTM initialization
        seq_in = tf.reshape(T.seq_in, [-1, sz, sz, ch])
        seq_out = tf.reshape(T.seq_out, [-1, sz, sz, ch])
        val_seq_in = tf.reshape(T.val_seq_in, [-1, sz, sz, ch])
        test_seq_in = tf.reshape(T.test_seq_in, [-1, sz, sz, ch])
        enc_in = nn.generator(seq_in,
                              tf.zeros((10 * sbs, FLAGS.nz)),
                              phase=True,
                              enc=True)
        enc_out = nn.generator(seq_out,
                               tf.zeros((10 * sbs, FLAGS.nz)),
                               phase=True,
                               enc=True)
        val_enc_in = nn.generator(val_seq_in,
                                  tf.zeros((10 * 10, FLAGS.nz)),
                                  phase=True,
                                  enc=True)
        test_enc_in = nn.generator(test_seq_in,
                                   tf.zeros((10 * 10, FLAGS.nz)),
                                   phase=True,
                                   enc=True)
        enc_in = tf.stop_gradient(enc_in)
        enc_out = tf.stop_gradient(enc_out)
        val_enc_in = tf.stop_gradient(val_enc_in)
        test_enc_in = tf.stop_gradient(test_enc_in)
        enc_in = tf.squeeze(enc_in)
        enc_out = tf.squeeze(enc_out)
        val_enc_in = tf.squeeze(val_enc_in)
        test_enc_in = tf.squeeze(test_enc_in)
        enc_in = tf.reshape(enc_in, [-1, sbs, 3 * FLAGS.nz])
        enc_out = tf.reshape(enc_out, [-1, sbs, 3 * FLAGS.nz])
        val_enc_in = tf.reshape(val_enc_in, [-1, 10, 3 * FLAGS.nz])
        test_enc_in = tf.reshape(test_enc_in, [-1, 10, 3 * FLAGS.nz])

        with tf.variable_scope('lstm/in'):
            in_cell = tf.contrib.cudnn_rnn.CudnnLSTM(FLAGS.nhl,
                                                     FLAGS.nhw,
                                                     dropout=0.5)

            _, in_states = in_cell(enc_in, initial_state=None, training=True)
            _, val_in_states = in_cell(val_enc_in,
                                       initial_state=None,
                                       training=False)
            _, test_in_states = in_cell(test_enc_in,
                                        initial_state=None,
                                        training=False)

        with tf.variable_scope('lstm/out'):
            out_cell = tf.contrib.cudnn_rnn.CudnnLSTM(FLAGS.nhl,
                                                      FLAGS.nhw,
                                                      dropout=0.5)

            outputs, _ = out_cell(tf.zeros_like(enc_out),
                                  initial_state=in_states,
                                  training=True)
            val_outputs, _ = out_cell(tf.zeros_like(val_enc_in),
                                      initial_state=val_in_states,
                                      training=False)
            test_outputs, _ = out_cell(tf.zeros_like(test_enc_in),
                                       initial_state=test_in_states,
                                       training=False)

            enc_out_pred = tf.layers.dense(outputs,
                                           3 * FLAGS.nz,
                                           activation=None,
                                           name='lstm_dense',
                                           reuse=tf.AUTO_REUSE)
            val_enc_out_pred = tf.layers.dense(val_outputs,
                                               3 * FLAGS.nz,
                                               activation=None,
                                               name='lstm_dense',
                                               reuse=tf.AUTO_REUSE)
            test_enc_out_pred = tf.layers.dense(test_outputs,
                                                3 * FLAGS.nz,
                                                activation=None,
                                                name='lstm_dense',
                                                reuse=tf.AUTO_REUSE)

        enc_out_pred_reshape = tf.reshape(enc_out_pred, [-1, 3 * FLAGS.nz])
        enc_out_pred_reshape = tf.expand_dims(
            tf.expand_dims(enc_out_pred_reshape, 1), 1)
        val_enc_out_pred_reshape = tf.reshape(val_enc_out_pred,
                                              [-1, 3 * FLAGS.nz])
        val_enc_out_pred_reshape = tf.expand_dims(
            tf.expand_dims(val_enc_out_pred_reshape, 1), 1)
        test_enc_out_pred_reshape = tf.reshape(test_enc_out_pred,
                                               [-1, 3 * FLAGS.nz])
        test_enc_out_pred_reshape = tf.expand_dims(
            tf.expand_dims(test_enc_out_pred_reshape, 1), 1)

        seq_out_pred = nn.generator(enc_out_pred_reshape,
                                    tf.zeros((10 * sbs, FLAGS.nz)),
                                    phase=True,
                                    dec=True)
        seq_out_pred = tf.reshape(seq_out_pred, [10, sbs, sz, sz, ch])
        val_seq_out_pred = nn.generator(val_enc_out_pred_reshape,
                                        tf.zeros((10 * 10, FLAGS.nz)),
                                        phase=True,
                                        dec=True)
        val_seq_out_pred = tf.reshape(val_seq_out_pred, [10, 10, sz, sz, ch])
        test_seq_out_pred = nn.generator(test_enc_out_pred_reshape,
                                         tf.zeros((10 * 10, FLAGS.nz)),
                                         phase=True,
                                         dec=True)
        T.test_seq_out_pred = tf.reshape(test_seq_out_pred,
                                         [10, 10, sz, sz, ch])

        T.val_mae = tf.reduce_mean(
            abs_diff(labels=T.val_seq_out, predictions=val_seq_out_pred))
        loss_lstm = tf.reduce_mean(
            abs_diff(labels=enc_out, predictions=enc_out_pred))
        var_lstm = tf.get_collection('trainable_variables', 'lstm')
        # train_lstm = tf.train.AdamOptimizer(FLAGS.lr, 0.5).minimize(loss_lstm, var_list=var_lstm)
        train_lstm = tf.train.AdamOptimizer(T.lr,
                                            0.5).minimize(loss_lstm,
                                                          var_list=var_lstm)

        summary_lstm = [tf.summary.scalar('lstm/loss_lstm', loss_lstm)]
        summary_lstm_image = [
            tf.summary.image('lstm/seq_out', T.seq_out[:, 0, :, :, :]),
            tf.summary.image('lstm/seq_out_pred', seq_out_pred[:, 0, :, :, :])
        ]
        summary_lstm = tf.summary.merge(summary_lstm)
        summary_lstm_image = tf.summary.merge(summary_lstm_image)

        T.ops_lstm_print = [c('loss_lstm'), loss_lstm]
        T.ops_lstm = [summary_lstm, train_lstm]
        T.ops_lstm_image = summary_lstm_image

        # T.test1 = seq_out_pred

    print(colored("Model initialization ended", "blue"))

    return T
Esempio n. 10
0
def dirtt():
    T = tb.utils.TensorDict(dict(
        sess = tf.Session(config=tb.growth_config()),
        src_x = placeholder((None, 500, 60, 1)),
        src_y = placeholder((None, args.Y)),
        trg_x = placeholder((None, 500, 60, 1)),
        trg_y = placeholder((None, args.Y)),
        test_x = placeholder((None, 500, 60, 1)),
        test_y = placeholder((None, args.Y)),
    ))
    # Supervised and conditional entropy minimization
    src_e = nn.classifier(T.src_x, phase=True, enc_phase=1, trim=args.trim)
    trg_e = nn.classifier(T.trg_x, phase=True, enc_phase=1, trim=args.trim, reuse=True, internal_update=True)
    src_p = nn.classifier(src_e, phase=True, enc_phase=0, trim=args.trim)
    trg_p = nn.classifier(trg_e, phase=True, enc_phase=0, trim=args.trim, reuse=True, internal_update=True)

    loss_src_class = tf.reduce_mean(softmax_xent(labels=T.src_y, logits=src_p))
    loss_trg_cent = tf.reduce_mean(softmax_xent_two(labels=trg_p, logits=trg_p))

    # Domain confusion
    if args.dw > 0 and args.dirt == 0:
        real_logit = nn.feature_discriminator(src_e, phase=True)
        fake_logit = nn.feature_discriminator(trg_e, phase=True, reuse=True)

        loss_disc = 0.5 * tf.reduce_mean(
            sigmoid_xent(labels=tf.ones_like(real_logit), logits=real_logit) +
            sigmoid_xent(labels=tf.zeros_like(fake_logit), logits=fake_logit))
        loss_domain = 0.5 * tf.reduce_mean(
            sigmoid_xent(labels=tf.zeros_like(real_logit), logits=real_logit) +
            sigmoid_xent(labels=tf.ones_like(fake_logit), logits=fake_logit))

    else:
        loss_disc = constant(0)
        loss_domain = constant(0)

    # Virtual adversarial training (turn off src in non-VADA phase)
    loss_src_vat = vat_loss(T.src_x, src_p, nn.classifier) if args.sw > 0 and args.dirt == 0 else constant(0)
    loss_trg_vat = vat_loss(T.trg_x, trg_p, nn.classifier) if args.tw > 0 else constant(0)

    # Evaluation (EMA)
    ema = tf.train.ExponentialMovingAverage(decay=0.998)
    var_class = tf.get_collection('trainable_variables', 'class/')
    ema_op = ema.apply(var_class)
    ema_p = nn.classifier(T.test_x, phase=False, reuse=True, getter=tb.tfutils.get_getter(ema))

    # Teacher model (a back-up of EMA model)
    teacher_p = nn.classifier(T.test_x, phase=False, scope='teacher')
    var_main = tf.get_collection('variables', 'class/(?!.*ExponentialMovingAverage:0)')
    var_teacher = tf.get_collection('variables', 'teacher/(?!.*ExponentialMovingAverage:0)')
    teacher_assign_ops = []
    for t, m in zip(var_teacher, var_main):
        ave = ema.average(m)
        ave = ave if ave else m
        teacher_assign_ops += [tf.assign(t, ave)]
    update_teacher = tf.group(*teacher_assign_ops)
    teacher = tb.function(T.sess, [T.test_x], tf.nn.softmax(teacher_p))

    # Accuracies
    src_acc = basic_accuracy(T.src_y, src_p)
    trg_acc = basic_accuracy(T.trg_y, trg_p)
    ema_acc = basic_accuracy(T.test_y, ema_p)
    fn_ema_acc = tb.function(T.sess, [T.test_x, T.test_y], ema_acc)

    # Optimizer
    dw = constant(args.dw) if args.dirt == 0 else constant(0)
    cw = constant(1)       if args.dirt == 0 else constant(args.bw)
    sw = constant(args.sw) if args.dirt == 0 else constant(0)
    tw = constant(args.tw)
    loss_main = (dw * loss_domain +
                 cw * loss_src_class +
                 sw * loss_src_vat +
                 tw * loss_trg_cent +
                 tw * loss_trg_vat)
    var_main = tf.get_collection('trainable_variables', 'class')
    train_main = tf.train.AdamOptimizer(args.lr, 0.5).minimize(loss_main, var_list=var_main)
    train_main = tf.group(train_main, ema_op)

    if args.dw > 0 and args.dirt == 0:
        var_disc = tf.get_collection('trainable_variables', 'disc')
        train_disc = tf.train.AdamOptimizer(args.lr, 0.5).minimize(loss_disc, var_list=var_disc)
    else:
        train_disc = constant(0)

    # Summarizations
    summary_disc = [tf.summary.scalar('domain/loss_disc', loss_disc),]
    summary_main = [tf.summary.scalar('domain/loss_domain', loss_domain),
                    tf.summary.scalar('class/loss_src_class', loss_src_class),
                    tf.summary.scalar('class/loss_trg_cent', loss_trg_cent),
                    tf.summary.scalar('lipschitz/loss_trg_vat', loss_trg_vat),
                    tf.summary.scalar('lipschitz/loss_src_vat', loss_src_vat),
                    tf.summary.scalar('hyper/dw', dw),
                    tf.summary.scalar('hyper/cw', cw),
                    tf.summary.scalar('hyper/sw', sw),
                    tf.summary.scalar('hyper/tw', tw),
                    tf.summary.scalar('acc/src_acc', src_acc),
                    tf.summary.scalar('acc/trg_acc', trg_acc)]

    # Merge summaries
    summary_disc = tf.summary.merge(summary_disc)
    summary_main = tf.summary.merge(summary_main)

    # Saved ops
    c = tf.constant
    T.ops_print = [c('disc'), loss_disc,
                   c('domain'), loss_domain,
                   c('class'), loss_src_class,
                   c('cent'), loss_trg_cent,
                   c('trg_vat'), loss_trg_vat,
                   c('src_vat'), loss_src_vat,
                   c('src'), src_acc,
                   c('trg'), trg_acc]
    T.ops_disc = [summary_disc, train_disc]
    T.ops_main = [summary_main, train_main]
    T.fn_ema_acc = fn_ema_acc
    T.teacher = teacher
    T.update_teacher = update_teacher

    return T
Esempio n. 11
0
def gada():
    T = tb.utils.TensorDict(dict(
        sess = tf.Session(config=tb.growth_config()),
        src_x = placeholder((None, 32, 32, 3)),
        src_y = placeholder((None, args.Y)),
        trg_x = placeholder((None, 32, 32, 3)),
        trg_y = placeholder((None, args.Y)),
        trg_z = placeholder((None, 100)),
        test_x = placeholder((None, 32, 32, 3)),
        test_y = placeholder((None, args.Y)),
    ))

    # Supervised and conditional entropy minimization
    src_e = nn.classifier(T.src_x, phase=True, enc_phase=1, enc_trim=args.etrim)
    src_g = nn.classifier(src_e, phase=True, gen_trim=args.gtrim, gen_phase=1, enc_trim=args.etrim)
    src_p = nn.classifier(src_g, phase=True, gen_trim=args.gtrim)
    trg_e = nn.classifier(T.trg_x, phase=True, enc_phase=1, enc_trim=args.etrim, reuse=True, internal_update=True)
    trg_g = nn.classifier(trg_e, phase=True, gen_trim=args.gtrim, gen_phase=1, enc_trim=args.etrim, reuse=True, internal_update=True)
    trg_p = nn.classifier(trg_g, phase=True, gen_trim=args.gtrim, reuse=True, internal_update=True)

    loss_src_class = tf.reduce_mean(softmax_xent(labels=T.src_y, logits=src_p))
    loss_trg_cent = tf.reduce_mean(softmax_xent_two(labels=trg_p, logits=trg_p)) if args.tw > 0 else constant(0)

    # Domain confusion
    if args.dw > 0 and args.dirt == 0:
        real_logit = nn.real_feature_discriminator(src_e, phase=True)
        fake_logit = nn.real_feature_discriminator(trg_e, phase=True, reuse=True)

        loss_disc = 0.5 * tf.reduce_mean(
            sigmoid_xent(labels=tf.ones_like(real_logit), logits=real_logit) +
            sigmoid_xent(labels=tf.zeros_like(fake_logit), logits=fake_logit))
        loss_domain = 0.5 * tf.reduce_mean(
            sigmoid_xent(labels=tf.zeros_like(real_logit), logits=real_logit) +
            sigmoid_xent(labels=tf.ones_like(fake_logit), logits=fake_logit))

    else:
        loss_disc = constant(0)
        loss_domain = constant(0)

    # Virtual adversarial training (turn off src in non-VADA phase)
    loss_src_vat = vat_loss(T.src_x, src_p, nn.classifier) if args.sw > 0 and args.dirt == 0 else constant(0)
    loss_trg_vat = vat_loss(T.trg_x, trg_p, nn.classifier) if args.tw > 0 else constant(0)

    # Generate images and process generated images
    trg_gen_x = nn.trg_generator(T.trg_z)
    trg_gen_e = nn.classifier(trg_gen_x, phase=True, enc_phase=1, enc_trim=args.etrim, reuse=True, internal_update=True)
    trg_gen_g = nn.classifier(trg_gen_e, phase=True, gen_trim=args.gtrim, gen_phase=1, enc_trim=args.etrim, reuse=True, internal_update=True)
    trg_gen_p = nn.classifier(trg_gen_g, phase=True, gen_trim=args.gtrim, reuse=True, internal_update=True)

    # Feature matching loss function for generator
    loss_trg_gen_fm = tf.reduce_mean(tf.square(tf.reduce_mean(trg_g, axis=0) - tf.reduce_mean(trg_gen_g, axis=0))) if args.dirt == 0 else constant(0)

    # Unsupervised loss function
    if args.dirt == 0:
        logit_real = tf.reduce_logsumexp(trg_p, axis=1)
        logit_fake = tf.reduce_logsumexp(trg_gen_p, axis=1)
        dis_loss_real = -0.5*tf.reduce_mean(logit_real) + 0.5*tf.reduce_mean(tf.nn.softplus(logit_real))
        dis_loss_fake = 0.5*tf.reduce_mean(tf.nn.softplus(logit_fake))
        loss_trg_usv = dis_loss_real + dis_loss_fake    # UnSuperVised loss function
    else:
        loss_trg_usv = constant(0)

    # Evaluation (EMA)
    ema = tf.train.ExponentialMovingAverage(decay=0.998)
    var_class = tf.get_collection('trainable_variables', 'class/')
    ema_op = ema.apply(var_class)
    ema_p = nn.classifier(T.test_x, enc_phase=1, enc_trim=0, phase=False, reuse=True, getter=tb.tfutils.get_getter(ema))

    # Teacher model (a back-up of EMA model)
    teacher_p = nn.classifier(T.test_x, enc_phase=1, enc_trim=0, phase=False, scope='teacher')
    var_main = tf.get_collection('variables', 'class/(?!.*ExponentialMovingAverage:0)')
    var_teacher = tf.get_collection('variables', 'teacher/(?!.*ExponentialMovingAverage:0)')
    teacher_assign_ops = []
    for t, m in zip(var_teacher, var_main):
        ave = ema.average(m)
        ave = ave if ave else m
        teacher_assign_ops += [tf.assign(t, ave)]
    update_teacher = tf.group(*teacher_assign_ops)
    teacher = tb.function(T.sess, [T.test_x], tf.nn.softmax(teacher_p))

    # Accuracies
    src_acc = basic_accuracy(T.src_y, src_p)
    trg_acc = basic_accuracy(T.trg_y, trg_p)
    ema_acc = basic_accuracy(T.test_y, ema_p)
    fn_ema_acc = tb.function(T.sess, [T.test_x, T.test_y], ema_acc)

    # Optimizer
    dw = constant(args.dw) if args.dirt == 0 else constant(0)
    cw = constant(1)       if args.dirt == 0 else constant(args.bw)
    sw = constant(args.sw) if args.dirt == 0 else constant(0)
    tw = constant(args.tw)
    uw = constant(args.uw) if args.dirt == 0 else constant(0)
    loss_main = (dw * loss_domain +
                 cw * loss_src_class +
                 sw * loss_src_vat +
                 tw * loss_trg_cent +
                 tw * loss_trg_vat +
                 uw * loss_trg_usv)
    var_main = tf.get_collection('trainable_variables', 'class')
    train_main = tf.train.AdamOptimizer(args.lr, 0.5).minimize(loss_main, var_list=var_main)
    train_main = tf.group(train_main, ema_op)

    # Optimizer for feature discriminator
    if args.dw > 0 and args.dirt == 0:
        var_disc = tf.get_collection('trainable_variables', 'disc_real')
        train_disc = tf.train.AdamOptimizer(args.lr, 0.5).minimize(loss_disc, var_list=var_disc)
    else:
        train_disc = constant(0)

    # Optimizer for generators
    if args.dirt == 0:
        fmw = constant(1)
        loss_trg_gen = (fmw * loss_trg_gen_fm)
        var_trg_gen = tf.get_collection('trainable_variables', 'trg_gen')
        trg_gen_update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS, scope='trg_gen')
        with tf.control_dependencies(trg_gen_update_ops):
            train_trg_gen = tf.train.AdamOptimizer(args.lr, 0.5).minimize(loss_trg_gen, var_list=var_trg_gen)
        train_gen = train_trg_gen
    else:
        fmw = constant(0)
        train_gen = constant(0)

    # Summarizations
    summary_disc = [tf.summary.scalar('domain/loss_disc', loss_disc),]
    summary_main = [tf.summary.scalar('domain/loss_domain', loss_domain),
                    tf.summary.scalar('class/loss_src_class', loss_src_class),
                    tf.summary.scalar('class/loss_trg_cent', loss_trg_cent),
                    tf.summary.scalar('class/loss_trg_usv', loss_trg_usv),
                    tf.summary.scalar('lipschitz/loss_trg_vat', loss_trg_vat),
                    tf.summary.scalar('lipschitz/loss_src_vat', loss_src_vat),
                    tf.summary.scalar('hyper/dw', dw),
                    tf.summary.scalar('hyper/cw', cw),
                    tf.summary.scalar('hyper/sw', sw),
                    tf.summary.scalar('hyper/tw', tw),
                    tf.summary.scalar('hyper/uw', uw),
                    tf.summary.scalar('hyper/fmw', fmw),
                    tf.summary.scalar('acc/src_acc', src_acc),
                    tf.summary.scalar('acc/trg_acc', trg_acc)]
    summary_gen  = [tf.summary.scalar('gen/loss_trg_gen_fm', loss_trg_gen_fm),
                    tf.summary.image('gen/trg_gen_img', trg_gen_x),]

    # Merge summaries
    summary_disc = tf.summary.merge(summary_disc)
    summary_main = tf.summary.merge(summary_main)
    summary_gen  = tf.summary.merge(summary_gen)

    # Saved ops
    c = tf.constant
    T.ops_print = [c('disc'), loss_disc,
                   c('domain'), loss_domain,
                   c('class'), loss_src_class,
                   c('cent'), loss_trg_cent,
                   c('trg_vat'), loss_trg_vat,
                   c('src_vat'), loss_src_vat,
                   c('src'), src_acc,
                   c('trg'), trg_acc]
    T.ops_disc = [summary_disc, train_disc]
    T.ops_main = [summary_main, train_main]
    T.ops_gen  = [summary_gen , train_gen]
    T.fn_ema_acc = fn_ema_acc
    T.teacher = teacher
    T.update_teacher = update_teacher
    T.trg_gen_x = trg_gen_x
    T.trg_gen_p = trg_gen_p
    T.src_p = src_p
    T.trg_p = trg_p
    T.ema_p = ema_p

    return T