コード例 #1
0
ファイル: dirtt.py プロジェクト: coolshan008/dirt-t
def dirtt():
    T = tb.utils.TensorDict(dict(
        sess = tf.Session(config=tb.growth_config()),
        src_x = placeholder((None, 32, 32, 3)),
        src_y = placeholder((None, args.Y)),
        trg_x = placeholder((None, 32, 32, 3)),
        trg_y = placeholder((None, args.Y)),
        test_x = placeholder((None, 32, 32, 3)),
        test_y = placeholder((None, args.Y)),
    ))
    # Supervised and conditional entropy minimization
    src_e = nn.classifier(T.src_x, phase=True, enc_phase=1, trim=args.trim)
    trg_e = nn.classifier(T.trg_x, phase=True, enc_phase=1, trim=args.trim, reuse=True, internal_update=True)
    src_p = nn.classifier(src_e, phase=True, enc_phase=0, trim=args.trim)
    trg_p = nn.classifier(trg_e, phase=True, enc_phase=0, trim=args.trim, reuse=True, internal_update=True)

    loss_src_class = tf.reduce_mean(softmax_xent(labels=T.src_y, logits=src_p))
    loss_trg_cent = tf.reduce_mean(softmax_xent_two(labels=trg_p, logits=trg_p))

    # Domain confusion
    if args.dw > 0 and args.dirt == 0:
        real_logit = nn.feature_discriminator(src_e, phase=True)
        fake_logit = nn.feature_discriminator(trg_e, phase=True, reuse=True)

        loss_disc = 0.5 * tf.reduce_mean(
            sigmoid_xent(labels=tf.ones_like(real_logit), logits=real_logit) +
            sigmoid_xent(labels=tf.zeros_like(fake_logit), logits=fake_logit))
        loss_domain = 0.5 * tf.reduce_mean(
            sigmoid_xent(labels=tf.zeros_like(real_logit), logits=real_logit) +
            sigmoid_xent(labels=tf.ones_like(fake_logit), logits=fake_logit))

    else:
        loss_disc = constant(0)
        loss_domain = constant(0)

    # Virtual adversarial training (turn off src in non-VADA phase)
    loss_src_vat = vat_loss(T.src_x, src_p, nn.classifier) if args.sw > 0 and args.dirt == 0 else constant(0)
    loss_trg_vat = vat_loss(T.trg_x, trg_p, nn.classifier) if args.tw > 0 else constant(0)

    # Evaluation (EMA)
    ema = tf.train.ExponentialMovingAverage(decay=0.998)
    var_class = tf.get_collection('trainable_variables', 'class/')
    ema_op = ema.apply(var_class)
    ema_p = nn.classifier(T.test_x, phase=False, reuse=True, getter=tb.tfutils.get_getter(ema))

    # Teacher model (a back-up of EMA model)
    teacher_p = nn.classifier(T.test_x, phase=False, scope='teacher')
    var_main = tf.get_collection('variables', 'class/(?!.*ExponentialMovingAverage:0)')
    var_teacher = tf.get_collection('variables', 'teacher/(?!.*ExponentialMovingAverage:0)')
    teacher_assign_ops = []
    for t, m in zip(var_teacher, var_main):
        ave = ema.average(m)
        ave = ave if ave else m
        teacher_assign_ops += [tf.assign(t, ave)]
    update_teacher = tf.group(*teacher_assign_ops)
    teacher = tb.function(T.sess, [T.test_x], tf.nn.softmax(teacher_p))

    # Accuracies
    src_acc = basic_accuracy(T.src_y, src_p)
    trg_acc = basic_accuracy(T.trg_y, trg_p)
    ema_acc = basic_accuracy(T.test_y, ema_p)
    fn_ema_acc = tb.function(T.sess, [T.test_x, T.test_y], ema_acc)

    # Optimizer
    dw = constant(args.dw) if args.dirt == 0 else constant(0)
    cw = constant(1)       if args.dirt == 0 else constant(args.bw)
    sw = constant(args.sw) if args.dirt == 0 else constant(0)
    tw = constant(args.tw)
    loss_main = (dw * loss_domain +
                 cw * loss_src_class +
                 sw * loss_src_vat +
                 tw * loss_trg_cent +
                 tw * loss_trg_vat)
    var_main = tf.get_collection('trainable_variables', 'class')
    train_main = tf.train.AdamOptimizer(args.lr, 0.5).minimize(loss_main, var_list=var_main)
    train_main = tf.group(train_main, ema_op)

    if args.dw > 0 and args.dirt == 0:
        var_disc = tf.get_collection('trainable_variables', 'disc')
        train_disc = tf.train.AdamOptimizer(args.lr, 0.5).minimize(loss_disc, var_list=var_disc)
    else:
        train_disc = constant(0)

    # Summarizations
    summary_disc = [tf.summary.scalar('domain/loss_disc', loss_disc),]
    summary_main = [tf.summary.scalar('domain/loss_domain', loss_domain),
                    tf.summary.scalar('class/loss_src_class', loss_src_class),
                    tf.summary.scalar('class/loss_trg_cent', loss_trg_cent),
                    tf.summary.scalar('lipschitz/loss_trg_vat', loss_trg_vat),
                    tf.summary.scalar('lipschitz/loss_src_vat', loss_src_vat),
                    tf.summary.scalar('hyper/dw', dw),
                    tf.summary.scalar('hyper/cw', cw),
                    tf.summary.scalar('hyper/sw', sw),
                    tf.summary.scalar('hyper/tw', tw),
                    tf.summary.scalar('acc/src_acc', src_acc),
                    tf.summary.scalar('acc/trg_acc', trg_acc)]

    # Merge summaries
    summary_disc = tf.summary.merge(summary_disc)
    summary_main = tf.summary.merge(summary_main)

    # Saved ops
    c = tf.constant
    T.ops_print = [c('disc'), loss_disc,
                   c('domain'), loss_domain,
                   c('class'), loss_src_class,
                   c('cent'), loss_trg_cent,
                   c('trg_vat'), loss_trg_vat,
                   c('src_vat'), loss_src_vat,
                   c('src'), src_acc,
                   c('trg'), trg_acc]
    T.ops_disc = [summary_disc, train_disc]
    T.ops_main = [summary_main, train_main]
    T.fn_ema_acc = fn_ema_acc
    T.teacher = teacher
    T.update_teacher = update_teacher

    return T
コード例 #2
0
def model(FLAGS, gpu_config):
    """
    :param FLAGS: Contains the experiment info
    :return: (TensorDict) the model
    """

    print(colored("Model initialization started", "blue"))

    nn = network(FLAGS)
    sz = FLAGS.sz
    ch = FLAGS.ch
    bs = FLAGS.bs
    sbs = FLAGS.sbs

    alpha = constant(FLAGS.alpha)
    beta = constant(FLAGS.beta)
    theta = constant(FLAGS.theta)
    delta = constant(FLAGS.delta)

    T = tb.utils.TensorDict(
        dict(sess=tf.Session(config=tb.growth_config()),
             x=placeholder((bs, sz, sz, ch)),
             z=placeholder((bs, FLAGS.nz)),
             pos=placeholder((bs * FLAGS.jcb, FLAGS.nz)),
             iorth=placeholder((bs, FLAGS.jcb, FLAGS.jcb)),
             lrD=placeholder(None),
             lrG=placeholder(None),
             seq_in=placeholder((10, sbs, sz, sz, ch)),
             seq_out=placeholder((10, sbs, sz, sz, ch)),
             val_seq_in=placeholder((10, 10, sz, sz, ch)),
             val_seq_out=placeholder((10, 10, sz, sz, ch)),
             test_seq_in=placeholder((10, 10, sz, sz, ch)),
             lr=placeholder(None)))

    # Compute G(x, z) and G(x, 0)
    fake_x = nn.generator(T.x, T.z, phase=True)
    # T.fake_x0 = fake_x0 = nn.generator(T.x, tf.zeros_like(T.z), phase=True)
    fake_x0 = nn.generator(T.x, tf.zeros_like(T.z), phase=True)

    # Compute discriminator logits
    real_logit = nn.discriminator(T.x, phase=True)
    fake_logit = nn.discriminator(fake_x, phase=True)
    fake0_logit = nn.discriminator(fake_x0, phase=True)

    # Adversarial generator
    loss_disc = tf.reduce_mean(
        sigmoid_xent(labels=tf.ones_like(real_logit), logits=real_logit) +
        sigmoid_xent(labels=tf.zeros_like(fake_logit), logits=fake_logit) +
        theta *
        sigmoid_xent(labels=tf.zeros_like(fake0_logit), logits=fake0_logit))

    loss_fake = tf.reduce_mean(
        sigmoid_xent(labels=tf.ones_like(fake_logit), logits=fake_logit) +
        theta *
        sigmoid_xent(labels=tf.ones_like(fake0_logit), logits=fake0_logit))

    # Locality
    loss_local = tf.reduce_mean(abs_diff(labels=T.x, predictions=fake_x0))

    # Orthogonality
    pos = T.pos * delta
    tiled_real_x = tf.tile(T.x, [FLAGS.jcb, 1, 1, 1])
    pos_fake_x = nn.generator(tiled_real_x, pos, phase=True)
    neg_fake_x = nn.generator(tiled_real_x, -pos, phase=True)

    jx = (pos_fake_x - neg_fake_x) / (2 * delta)
    jx = tf.reshape(jx, [bs, FLAGS.jcb, -1])
    jx_t = tf.transpose(jx, [0, 2, 1])
    loss_orth = tf.reduce_mean(abs_diff(tf.matmul(jx, jx_t), T.iorth))

    loss_gen = loss_fake + alpha * loss_local + beta * loss_orth

    # Optimizer
    var_disc = tf.get_collection('trainable_variables', 'lgan/dsc')
    train_disc = tf.train.AdamOptimizer(T.lrD, 0.5).minimize(loss_disc,
                                                             var_list=var_disc)

    if FLAGS.clip:
        clip_disc = [
            p.assign(tf.clip_by_value(p, -0.01, 0.01)) for p in var_disc
        ]

    var_gen = tf.get_collection('trainable_variables', 'lgan/gen')
    train_gen = tf.train.AdamOptimizer(T.lrG, 0.5).minimize(loss_gen,
                                                            var_list=var_gen)

    # Summarizations
    summary_disc = [
        tf.summary.scalar('disc/loss_disc', loss_disc),
    ]
    summary_gen = [
        tf.summary.scalar('gen/loss_gen', loss_gen),
        tf.summary.scalar('gen/loss_fake', loss_fake),
        tf.summary.scalar('gen/loss_local', loss_local),
        tf.summary.scalar('gen/loss_orth', loss_orth),
        tf.summary.scalar('hyper/alpha', alpha),
        tf.summary.scalar('hyper/beta', beta),
        tf.summary.scalar('hyper/theta', theta),
        tf.summary.scalar('hyper/delta', delta),
        tf.summary.scalar('hyper/lrD', T.lrD),
        tf.summary.scalar('hyper/lrG', T.lrG),
        tf.summary.scalar('hyper/var', FLAGS.var)
    ]
    summary_image = [
        tf.summary.image('image/x', T.x),
        tf.summary.image('image/fake_x', fake_x),
        tf.summary.image('image/fake_x0', fake_x0)
    ]
    # Merge summaries
    summary_disc = tf.summary.merge(summary_disc)
    summary_gen = tf.summary.merge(summary_gen)
    summary_image = tf.summary.merge(summary_image)

    # Saved ops
    c = tf.constant
    T.ops_print = [
        c('disc'), loss_disc,
        c('gen'), loss_gen,
        c('fake'), loss_fake,
        c('local'), loss_local,
        c('orth'), loss_orth
    ]
    # T.ops_disc = [summary_disc, train_disc]

    if FLAGS.clip:
        T.ops_disc = [summary_disc, train_disc, clip_disc]
    else:
        T.ops_disc = [summary_disc, train_disc]

    T.ops_gen = [summary_gen, train_gen]
    T.ops_image = summary_image

    if FLAGS.phase:
        # LSTM initialization
        seq_in = tf.reshape(T.seq_in, [-1, sz, sz, ch])
        seq_out = tf.reshape(T.seq_out, [-1, sz, sz, ch])
        val_seq_in = tf.reshape(T.val_seq_in, [-1, sz, sz, ch])
        test_seq_in = tf.reshape(T.test_seq_in, [-1, sz, sz, ch])
        enc_in = nn.generator(seq_in,
                              tf.zeros((10 * sbs, FLAGS.nz)),
                              phase=True,
                              enc=True)
        enc_out = nn.generator(seq_out,
                               tf.zeros((10 * sbs, FLAGS.nz)),
                               phase=True,
                               enc=True)
        val_enc_in = nn.generator(val_seq_in,
                                  tf.zeros((10 * 10, FLAGS.nz)),
                                  phase=True,
                                  enc=True)
        test_enc_in = nn.generator(test_seq_in,
                                   tf.zeros((10 * 10, FLAGS.nz)),
                                   phase=True,
                                   enc=True)
        enc_in = tf.stop_gradient(enc_in)
        enc_out = tf.stop_gradient(enc_out)
        val_enc_in = tf.stop_gradient(val_enc_in)
        test_enc_in = tf.stop_gradient(test_enc_in)
        enc_in = tf.squeeze(enc_in)
        enc_out = tf.squeeze(enc_out)
        val_enc_in = tf.squeeze(val_enc_in)
        test_enc_in = tf.squeeze(test_enc_in)
        enc_in = tf.reshape(enc_in, [-1, sbs, 3 * FLAGS.nz])
        enc_out = tf.reshape(enc_out, [-1, sbs, 3 * FLAGS.nz])
        val_enc_in = tf.reshape(val_enc_in, [-1, 10, 3 * FLAGS.nz])
        test_enc_in = tf.reshape(test_enc_in, [-1, 10, 3 * FLAGS.nz])

        with tf.variable_scope('lstm/in'):
            in_cell = tf.contrib.cudnn_rnn.CudnnLSTM(FLAGS.nhl,
                                                     FLAGS.nhw,
                                                     dropout=0.5)

            _, in_states = in_cell(enc_in, initial_state=None, training=True)
            _, val_in_states = in_cell(val_enc_in,
                                       initial_state=None,
                                       training=False)
            _, test_in_states = in_cell(test_enc_in,
                                        initial_state=None,
                                        training=False)

        with tf.variable_scope('lstm/out'):
            out_cell = tf.contrib.cudnn_rnn.CudnnLSTM(FLAGS.nhl,
                                                      FLAGS.nhw,
                                                      dropout=0.5)

            outputs, _ = out_cell(tf.zeros_like(enc_out),
                                  initial_state=in_states,
                                  training=True)
            val_outputs, _ = out_cell(tf.zeros_like(val_enc_in),
                                      initial_state=val_in_states,
                                      training=False)
            test_outputs, _ = out_cell(tf.zeros_like(test_enc_in),
                                       initial_state=test_in_states,
                                       training=False)

            enc_out_pred = tf.layers.dense(outputs,
                                           3 * FLAGS.nz,
                                           activation=None,
                                           name='lstm_dense',
                                           reuse=tf.AUTO_REUSE)
            val_enc_out_pred = tf.layers.dense(val_outputs,
                                               3 * FLAGS.nz,
                                               activation=None,
                                               name='lstm_dense',
                                               reuse=tf.AUTO_REUSE)
            test_enc_out_pred = tf.layers.dense(test_outputs,
                                                3 * FLAGS.nz,
                                                activation=None,
                                                name='lstm_dense',
                                                reuse=tf.AUTO_REUSE)

        enc_out_pred_reshape = tf.reshape(enc_out_pred, [-1, 3 * FLAGS.nz])
        enc_out_pred_reshape = tf.expand_dims(
            tf.expand_dims(enc_out_pred_reshape, 1), 1)
        val_enc_out_pred_reshape = tf.reshape(val_enc_out_pred,
                                              [-1, 3 * FLAGS.nz])
        val_enc_out_pred_reshape = tf.expand_dims(
            tf.expand_dims(val_enc_out_pred_reshape, 1), 1)
        test_enc_out_pred_reshape = tf.reshape(test_enc_out_pred,
                                               [-1, 3 * FLAGS.nz])
        test_enc_out_pred_reshape = tf.expand_dims(
            tf.expand_dims(test_enc_out_pred_reshape, 1), 1)

        seq_out_pred = nn.generator(enc_out_pred_reshape,
                                    tf.zeros((10 * sbs, FLAGS.nz)),
                                    phase=True,
                                    dec=True)
        seq_out_pred = tf.reshape(seq_out_pred, [10, sbs, sz, sz, ch])
        val_seq_out_pred = nn.generator(val_enc_out_pred_reshape,
                                        tf.zeros((10 * 10, FLAGS.nz)),
                                        phase=True,
                                        dec=True)
        val_seq_out_pred = tf.reshape(val_seq_out_pred, [10, 10, sz, sz, ch])
        test_seq_out_pred = nn.generator(test_enc_out_pred_reshape,
                                         tf.zeros((10 * 10, FLAGS.nz)),
                                         phase=True,
                                         dec=True)
        T.test_seq_out_pred = tf.reshape(test_seq_out_pred,
                                         [10, 10, sz, sz, ch])

        T.val_mae = tf.reduce_mean(
            abs_diff(labels=T.val_seq_out, predictions=val_seq_out_pred))
        loss_lstm = tf.reduce_mean(
            abs_diff(labels=enc_out, predictions=enc_out_pred))
        var_lstm = tf.get_collection('trainable_variables', 'lstm')
        # train_lstm = tf.train.AdamOptimizer(FLAGS.lr, 0.5).minimize(loss_lstm, var_list=var_lstm)
        train_lstm = tf.train.AdamOptimizer(T.lr,
                                            0.5).minimize(loss_lstm,
                                                          var_list=var_lstm)

        summary_lstm = [tf.summary.scalar('lstm/loss_lstm', loss_lstm)]
        summary_lstm_image = [
            tf.summary.image('lstm/seq_out', T.seq_out[:, 0, :, :, :]),
            tf.summary.image('lstm/seq_out_pred', seq_out_pred[:, 0, :, :, :])
        ]
        summary_lstm = tf.summary.merge(summary_lstm)
        summary_lstm_image = tf.summary.merge(summary_lstm_image)

        T.ops_lstm_print = [c('loss_lstm'), loss_lstm]
        T.ops_lstm = [summary_lstm, train_lstm]
        T.ops_lstm_image = summary_lstm_image

        # T.test1 = seq_out_pred

    print(colored("Model initialization ended", "blue"))

    return T
コード例 #3
0
ファイル: dirtt.py プロジェクト: duynht/dirt-t
def dirtt():
    T = tb.utils.TensorDict(dict(
        sess = tf.Session(config=tb.growth_config()),
        src_x = placeholder((None, 500, 60, 1)),
        src_y = placeholder((None, args.Y)),
        trg_x = placeholder((None, 500, 60, 1)),
        trg_y = placeholder((None, args.Y)),
        test_x = placeholder((None, 500, 60, 1)),
        test_y = placeholder((None, args.Y)),
    ))
    # Supervised and conditional entropy minimization
    src_e = nn.classifier(T.src_x, phase=True, enc_phase=1, trim=args.trim)
    trg_e = nn.classifier(T.trg_x, phase=True, enc_phase=1, trim=args.trim, reuse=True, internal_update=True)
    src_p = nn.classifier(src_e, phase=True, enc_phase=0, trim=args.trim)
    trg_p = nn.classifier(trg_e, phase=True, enc_phase=0, trim=args.trim, reuse=True, internal_update=True)

    loss_src_class = tf.reduce_mean(softmax_xent(labels=T.src_y, logits=src_p))
    loss_trg_cent = tf.reduce_mean(softmax_xent_two(labels=trg_p, logits=trg_p))

    # Domain confusion
    if args.dw > 0 and args.dirt == 0:
        real_logit = nn.feature_discriminator(src_e, phase=True)
        fake_logit = nn.feature_discriminator(trg_e, phase=True, reuse=True)

        loss_disc = 0.5 * tf.reduce_mean(
            sigmoid_xent(labels=tf.ones_like(real_logit), logits=real_logit) +
            sigmoid_xent(labels=tf.zeros_like(fake_logit), logits=fake_logit))
        loss_domain = 0.5 * tf.reduce_mean(
            sigmoid_xent(labels=tf.zeros_like(real_logit), logits=real_logit) +
            sigmoid_xent(labels=tf.ones_like(fake_logit), logits=fake_logit))

    else:
        loss_disc = constant(0)
        loss_domain = constant(0)

    # Virtual adversarial training (turn off src in non-VADA phase)
    loss_src_vat = vat_loss(T.src_x, src_p, nn.classifier) if args.sw > 0 and args.dirt == 0 else constant(0)
    loss_trg_vat = vat_loss(T.trg_x, trg_p, nn.classifier) if args.tw > 0 else constant(0)

    # Evaluation (EMA)
    ema = tf.train.ExponentialMovingAverage(decay=0.998)
    var_class = tf.get_collection('trainable_variables', 'class/')
    ema_op = ema.apply(var_class)
    ema_p = nn.classifier(T.test_x, phase=False, reuse=True, getter=tb.tfutils.get_getter(ema))

    # Teacher model (a back-up of EMA model)
    teacher_p = nn.classifier(T.test_x, phase=False, scope='teacher')
    var_main = tf.get_collection('variables', 'class/(?!.*ExponentialMovingAverage:0)')
    var_teacher = tf.get_collection('variables', 'teacher/(?!.*ExponentialMovingAverage:0)')
    teacher_assign_ops = []
    for t, m in zip(var_teacher, var_main):
        ave = ema.average(m)
        ave = ave if ave else m
        teacher_assign_ops += [tf.assign(t, ave)]
    update_teacher = tf.group(*teacher_assign_ops)
    teacher = tb.function(T.sess, [T.test_x], tf.nn.softmax(teacher_p))

    # Accuracies
    src_acc = basic_accuracy(T.src_y, src_p)
    trg_acc = basic_accuracy(T.trg_y, trg_p)
    ema_acc = basic_accuracy(T.test_y, ema_p)
    fn_ema_acc = tb.function(T.sess, [T.test_x, T.test_y], ema_acc)

    # Optimizer
    dw = constant(args.dw) if args.dirt == 0 else constant(0)
    cw = constant(1)       if args.dirt == 0 else constant(args.bw)
    sw = constant(args.sw) if args.dirt == 0 else constant(0)
    tw = constant(args.tw)
    loss_main = (dw * loss_domain +
                 cw * loss_src_class +
                 sw * loss_src_vat +
                 tw * loss_trg_cent +
                 tw * loss_trg_vat)
    var_main = tf.get_collection('trainable_variables', 'class')
    train_main = tf.train.AdamOptimizer(args.lr, 0.5).minimize(loss_main, var_list=var_main)
    train_main = tf.group(train_main, ema_op)

    if args.dw > 0 and args.dirt == 0:
        var_disc = tf.get_collection('trainable_variables', 'disc')
        train_disc = tf.train.AdamOptimizer(args.lr, 0.5).minimize(loss_disc, var_list=var_disc)
    else:
        train_disc = constant(0)

    # Summarizations
    summary_disc = [tf.summary.scalar('domain/loss_disc', loss_disc),]
    summary_main = [tf.summary.scalar('domain/loss_domain', loss_domain),
                    tf.summary.scalar('class/loss_src_class', loss_src_class),
                    tf.summary.scalar('class/loss_trg_cent', loss_trg_cent),
                    tf.summary.scalar('lipschitz/loss_trg_vat', loss_trg_vat),
                    tf.summary.scalar('lipschitz/loss_src_vat', loss_src_vat),
                    tf.summary.scalar('hyper/dw', dw),
                    tf.summary.scalar('hyper/cw', cw),
                    tf.summary.scalar('hyper/sw', sw),
                    tf.summary.scalar('hyper/tw', tw),
                    tf.summary.scalar('acc/src_acc', src_acc),
                    tf.summary.scalar('acc/trg_acc', trg_acc)]

    # Merge summaries
    summary_disc = tf.summary.merge(summary_disc)
    summary_main = tf.summary.merge(summary_main)

    # Saved ops
    c = tf.constant
    T.ops_print = [c('disc'), loss_disc,
                   c('domain'), loss_domain,
                   c('class'), loss_src_class,
                   c('cent'), loss_trg_cent,
                   c('trg_vat'), loss_trg_vat,
                   c('src_vat'), loss_src_vat,
                   c('src'), src_acc,
                   c('trg'), trg_acc]
    T.ops_disc = [summary_disc, train_disc]
    T.ops_main = [summary_main, train_main]
    T.fn_ema_acc = fn_ema_acc
    T.teacher = teacher
    T.update_teacher = update_teacher

    return T
コード例 #4
0
def model(FLAGS, gpu_config):
    """
    :param FLAGS: Contains the experiment info
    :return: (TensorDict) the model
    """

    print(colored("Model initialization started", "blue"))

    nn = network(FLAGS)
    sz = FLAGS.sz
    ch = FLAGS.ch
    bs = FLAGS.bs
    sbs = FLAGS.sbs

    T = tb.utils.TensorDict(dict(
        sess=tf.Session(config=tb.growth_config()),
        x=placeholder((bs, sz, sz, ch)),
        lrD=placeholder(None),
        lrG=placeholder(None),
        seq_in=placeholder((10, sbs, sz, sz, ch)),
        seq_out=placeholder((10, sbs, sz, sz, ch)),
        val_seq_in=placeholder((10, 10, sz, sz, ch)),
        val_seq_out=placeholder((10, 10, sz, sz, ch)),
        test_seq_in=placeholder((10, 10, sz, sz, ch)),
        lr=placeholder(None)
    ))

    recon_x = nn.generator(T.x, phase=True)

    # Compute discriminator logits
    real_logit = nn.discriminator(T.x, phase=True)
    fake_logit = nn.discriminator(recon_x, phase=True)

    # Adversarial generator
    loss_disc = tf.reduce_mean(
        sigmoid_xent(labels=tf.ones_like(real_logit), logits=real_logit) +
        sigmoid_xent(labels=tf.zeros_like(fake_logit), logits=fake_logit))
    loss_fake = tf.reduce_mean(
        sigmoid_xent(labels=tf.ones_like(fake_logit), logits=fake_logit))

    loss_local = tf.reduce_mean(abs_diff(labels=T.x, predictions=recon_x))

    loss_gen = loss_fake + FLAGS.alpha * loss_local

    var_gen = tf.get_collection('trainable_variables', 'lgan/gen')
    train_gen = tf.train.AdamOptimizer(T.lrG, 0.5).minimize(loss_gen, var_list=var_gen)

    var_disc = tf.get_collection('trainable_variables', 'lgan/dsc')
    train_disc = tf.train.AdamOptimizer(T.lrD, 0.5).minimize(loss_disc, var_list=var_disc)

    # Summarizations
    summary_disc = [tf.summary.scalar('disc/loss_disc', loss_disc)]
    summary_gen = [tf.summary.scalar('gen/loss_gen', loss_gen),
                   tf.summary.scalar('gen/loss_local', loss_local),
                   tf.summary.scalar('gen/loss_fake', loss_fake),
                   tf.summary.scalar('hyper/lrD', T.lrD),
                   tf.summary.scalar('hyper/lrG', T.lrG)]
    summary_image = [tf.summary.image('image/x', T.x),
                     tf.summary.image('image/recon_x', recon_x)]

    # Merge summaries
    summary_disc = tf.summary.merge(summary_disc)
    summary_gen = tf.summary.merge(summary_gen)
    summary_image = tf.summary.merge(summary_image)

    # Saved ops
    c = tf.constant
    T.ops_print = [c('disc'), loss_disc,
                   c('gen'), loss_gen,
                   c('local'), loss_local,
                   c('fake'), loss_fake]
    T.ops_disc = [summary_disc, train_disc]
    T.ops_gen = [summary_gen, train_gen]
    T.ops_image = summary_image

    if FLAGS.phase:
        # LSTM initialization
        seq_in = tf.reshape(T.seq_in, [-1, sz, sz, ch])
        seq_out = tf.reshape(T.seq_out, [-1, sz, sz, ch])
        val_seq_in = tf.reshape(T.val_seq_in, [-1, sz, sz, ch])
        test_seq_in = tf.reshape(T.test_seq_in, [-1, sz, sz, ch])
        enc_in = nn.generator(seq_in, phase=True, enc=True)
        enc_out = nn.generator(seq_out, phase=True, enc=True)
        val_enc_in = nn.generator(val_seq_in, phase=True, enc=True)
        test_enc_in = nn.generator(test_seq_in, phase=True, enc=True)
        enc_in = tf.stop_gradient(enc_in)
        enc_out = tf.stop_gradient(enc_out)
        val_enc_in = tf.stop_gradient(val_enc_in)
        test_enc_in = tf.stop_gradient(test_enc_in)
        enc_in = tf.squeeze(enc_in)
        enc_out = tf.squeeze(enc_out)
        val_enc_in = tf.squeeze(val_enc_in)
        test_enc_in = tf.squeeze(test_enc_in)
        enc_in = tf.reshape(enc_in, [-1, sbs, FLAGS.nz])
        enc_out = tf.reshape(enc_out, [-1, sbs, FLAGS.nz])
        val_enc_in = tf.reshape(val_enc_in, [-1, 10, FLAGS.nz])
        test_enc_in = tf.reshape(test_enc_in, [-1, 10, FLAGS.nz])

        with tf.variable_scope('lstm/in'):
            in_cell = tf.contrib.cudnn_rnn.CudnnLSTM(FLAGS.nhl, FLAGS.nhw, dropout=0.5)

            _, in_states = in_cell(enc_in, initial_state=None, training=True)
            _, val_in_states = in_cell(val_enc_in, initial_state=None, training=False)
            _, test_in_states = in_cell(test_enc_in, initial_state=None, training=False)

        with tf.variable_scope('lstm/out'):
            out_cell = tf.contrib.cudnn_rnn.CudnnLSTM(FLAGS.nhl, FLAGS.nhw, dropout=0.5)

            outputs, _ = out_cell(tf.zeros_like(enc_out), initial_state=in_states, training=True)
            val_outputs, _ = out_cell(tf.zeros_like(val_enc_in), initial_state=val_in_states, training=False)
            test_outputs, _ = out_cell(tf.zeros_like(test_enc_in), initial_state=test_in_states, training=False)

            enc_out_pred = tf.layers.dense(outputs, FLAGS.nz, activation=None, name='lstm_dense', reuse=tf.AUTO_REUSE)
            val_enc_out_pred = tf.layers.dense(val_outputs, FLAGS.nz, activation=None, name='lstm_dense',
                                               reuse=tf.AUTO_REUSE)
            test_enc_out_pred = tf.layers.dense(test_outputs, FLAGS.nz, activation=None, name='lstm_dense',
                                               reuse=tf.AUTO_REUSE)

        enc_out_pred_reshape = tf.reshape(enc_out_pred, [-1, FLAGS.nz])
        enc_out_pred_reshape = tf.expand_dims(tf.expand_dims(enc_out_pred_reshape, 1), 1)
        val_enc_out_pred_reshape = tf.reshape(val_enc_out_pred, [-1, FLAGS.nz])
        val_enc_out_pred_reshape = tf.expand_dims(tf.expand_dims(val_enc_out_pred_reshape, 1), 1)
        test_enc_out_pred_reshape = tf.reshape(test_enc_out_pred, [-1, FLAGS.nz])
        test_enc_out_pred_reshape = tf.expand_dims(tf.expand_dims(test_enc_out_pred_reshape, 1), 1)

        seq_out_pred = nn.generator(enc_out_pred_reshape, phase=True, dec=True)
        seq_out_pred = tf.reshape(seq_out_pred, [10, sbs, sz, sz, ch])
        val_seq_out_pred = nn.generator(val_enc_out_pred_reshape, phase=True, dec=True)
        val_seq_out_pred = tf.reshape(val_seq_out_pred, [10, 10, sz, sz, ch])
        test_seq_out_pred = nn.generator(test_enc_out_pred_reshape, phase=True, dec=True)
        T.test_seq_out_pred = tf.reshape(test_seq_out_pred, [10, 10, sz, sz, ch])

        T.val_mae = tf.reduce_mean(abs_diff(labels=T.val_seq_out, predictions=val_seq_out_pred))
        loss_lstm = tf.reduce_mean(abs_diff(labels=enc_out, predictions=enc_out_pred))
        var_lstm = tf.get_collection('trainable_variables', 'lstm')
        # train_lstm = tf.train.AdamOptimizer(FLAGS.lr, 0.5).minimize(loss_lstm, var_list=var_lstm)
        train_lstm = tf.train.AdamOptimizer(T.lr, 0.5).minimize(loss_lstm, var_list=var_lstm)

        summary_lstm = [tf.summary.scalar('lstm/loss_lstm', loss_lstm)]
        summary_lstm_image = [tf.summary.image('lstm/seq_out', T.seq_out[:, 0, :, :, :]),
                              tf.summary.image('lstm/seq_out_pred', seq_out_pred[:, 0, :, :, :])]
        summary_lstm = tf.summary.merge(summary_lstm)
        summary_lstm_image = tf.summary.merge(summary_lstm_image)

        T.ops_lstm_print = [c('loss_lstm'), loss_lstm]
        T.ops_lstm = [summary_lstm, train_lstm]
        T.ops_lstm_image = summary_lstm_image

    print(colored("Model initialization ended", "blue"))

    return T
コード例 #5
0
ファイル: gada.py プロジェクト: haitran14/gada
def gada():
    T = tb.utils.TensorDict(dict(
        sess = tf.Session(config=tb.growth_config()),
        src_x = placeholder((None, 32, 32, 3)),
        src_y = placeholder((None, args.Y)),
        trg_x = placeholder((None, 32, 32, 3)),
        trg_y = placeholder((None, args.Y)),
        trg_z = placeholder((None, 100)),
        test_x = placeholder((None, 32, 32, 3)),
        test_y = placeholder((None, args.Y)),
    ))

    # Supervised and conditional entropy minimization
    src_e = nn.classifier(T.src_x, phase=True, enc_phase=1, enc_trim=args.etrim)
    src_g = nn.classifier(src_e, phase=True, gen_trim=args.gtrim, gen_phase=1, enc_trim=args.etrim)
    src_p = nn.classifier(src_g, phase=True, gen_trim=args.gtrim)
    trg_e = nn.classifier(T.trg_x, phase=True, enc_phase=1, enc_trim=args.etrim, reuse=True, internal_update=True)
    trg_g = nn.classifier(trg_e, phase=True, gen_trim=args.gtrim, gen_phase=1, enc_trim=args.etrim, reuse=True, internal_update=True)
    trg_p = nn.classifier(trg_g, phase=True, gen_trim=args.gtrim, reuse=True, internal_update=True)

    loss_src_class = tf.reduce_mean(softmax_xent(labels=T.src_y, logits=src_p))
    loss_trg_cent = tf.reduce_mean(softmax_xent_two(labels=trg_p, logits=trg_p)) if args.tw > 0 else constant(0)

    # Domain confusion
    if args.dw > 0 and args.dirt == 0:
        real_logit = nn.real_feature_discriminator(src_e, phase=True)
        fake_logit = nn.real_feature_discriminator(trg_e, phase=True, reuse=True)

        loss_disc = 0.5 * tf.reduce_mean(
            sigmoid_xent(labels=tf.ones_like(real_logit), logits=real_logit) +
            sigmoid_xent(labels=tf.zeros_like(fake_logit), logits=fake_logit))
        loss_domain = 0.5 * tf.reduce_mean(
            sigmoid_xent(labels=tf.zeros_like(real_logit), logits=real_logit) +
            sigmoid_xent(labels=tf.ones_like(fake_logit), logits=fake_logit))

    else:
        loss_disc = constant(0)
        loss_domain = constant(0)

    # Virtual adversarial training (turn off src in non-VADA phase)
    loss_src_vat = vat_loss(T.src_x, src_p, nn.classifier) if args.sw > 0 and args.dirt == 0 else constant(0)
    loss_trg_vat = vat_loss(T.trg_x, trg_p, nn.classifier) if args.tw > 0 else constant(0)

    # Generate images and process generated images
    trg_gen_x = nn.trg_generator(T.trg_z)
    trg_gen_e = nn.classifier(trg_gen_x, phase=True, enc_phase=1, enc_trim=args.etrim, reuse=True, internal_update=True)
    trg_gen_g = nn.classifier(trg_gen_e, phase=True, gen_trim=args.gtrim, gen_phase=1, enc_trim=args.etrim, reuse=True, internal_update=True)
    trg_gen_p = nn.classifier(trg_gen_g, phase=True, gen_trim=args.gtrim, reuse=True, internal_update=True)

    # Feature matching loss function for generator
    loss_trg_gen_fm = tf.reduce_mean(tf.square(tf.reduce_mean(trg_g, axis=0) - tf.reduce_mean(trg_gen_g, axis=0))) if args.dirt == 0 else constant(0)

    # Unsupervised loss function
    if args.dirt == 0:
        logit_real = tf.reduce_logsumexp(trg_p, axis=1)
        logit_fake = tf.reduce_logsumexp(trg_gen_p, axis=1)
        dis_loss_real = -0.5*tf.reduce_mean(logit_real) + 0.5*tf.reduce_mean(tf.nn.softplus(logit_real))
        dis_loss_fake = 0.5*tf.reduce_mean(tf.nn.softplus(logit_fake))
        loss_trg_usv = dis_loss_real + dis_loss_fake    # UnSuperVised loss function
    else:
        loss_trg_usv = constant(0)

    # Evaluation (EMA)
    ema = tf.train.ExponentialMovingAverage(decay=0.998)
    var_class = tf.get_collection('trainable_variables', 'class/')
    ema_op = ema.apply(var_class)
    ema_p = nn.classifier(T.test_x, enc_phase=1, enc_trim=0, phase=False, reuse=True, getter=tb.tfutils.get_getter(ema))

    # Teacher model (a back-up of EMA model)
    teacher_p = nn.classifier(T.test_x, enc_phase=1, enc_trim=0, phase=False, scope='teacher')
    var_main = tf.get_collection('variables', 'class/(?!.*ExponentialMovingAverage:0)')
    var_teacher = tf.get_collection('variables', 'teacher/(?!.*ExponentialMovingAverage:0)')
    teacher_assign_ops = []
    for t, m in zip(var_teacher, var_main):
        ave = ema.average(m)
        ave = ave if ave else m
        teacher_assign_ops += [tf.assign(t, ave)]
    update_teacher = tf.group(*teacher_assign_ops)
    teacher = tb.function(T.sess, [T.test_x], tf.nn.softmax(teacher_p))

    # Accuracies
    src_acc = basic_accuracy(T.src_y, src_p)
    trg_acc = basic_accuracy(T.trg_y, trg_p)
    ema_acc = basic_accuracy(T.test_y, ema_p)
    fn_ema_acc = tb.function(T.sess, [T.test_x, T.test_y], ema_acc)

    # Optimizer
    dw = constant(args.dw) if args.dirt == 0 else constant(0)
    cw = constant(1)       if args.dirt == 0 else constant(args.bw)
    sw = constant(args.sw) if args.dirt == 0 else constant(0)
    tw = constant(args.tw)
    uw = constant(args.uw) if args.dirt == 0 else constant(0)
    loss_main = (dw * loss_domain +
                 cw * loss_src_class +
                 sw * loss_src_vat +
                 tw * loss_trg_cent +
                 tw * loss_trg_vat +
                 uw * loss_trg_usv)
    var_main = tf.get_collection('trainable_variables', 'class')
    train_main = tf.train.AdamOptimizer(args.lr, 0.5).minimize(loss_main, var_list=var_main)
    train_main = tf.group(train_main, ema_op)

    # Optimizer for feature discriminator
    if args.dw > 0 and args.dirt == 0:
        var_disc = tf.get_collection('trainable_variables', 'disc_real')
        train_disc = tf.train.AdamOptimizer(args.lr, 0.5).minimize(loss_disc, var_list=var_disc)
    else:
        train_disc = constant(0)

    # Optimizer for generators
    if args.dirt == 0:
        fmw = constant(1)
        loss_trg_gen = (fmw * loss_trg_gen_fm)
        var_trg_gen = tf.get_collection('trainable_variables', 'trg_gen')
        trg_gen_update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS, scope='trg_gen')
        with tf.control_dependencies(trg_gen_update_ops):
            train_trg_gen = tf.train.AdamOptimizer(args.lr, 0.5).minimize(loss_trg_gen, var_list=var_trg_gen)
        train_gen = train_trg_gen
    else:
        fmw = constant(0)
        train_gen = constant(0)

    # Summarizations
    summary_disc = [tf.summary.scalar('domain/loss_disc', loss_disc),]
    summary_main = [tf.summary.scalar('domain/loss_domain', loss_domain),
                    tf.summary.scalar('class/loss_src_class', loss_src_class),
                    tf.summary.scalar('class/loss_trg_cent', loss_trg_cent),
                    tf.summary.scalar('class/loss_trg_usv', loss_trg_usv),
                    tf.summary.scalar('lipschitz/loss_trg_vat', loss_trg_vat),
                    tf.summary.scalar('lipschitz/loss_src_vat', loss_src_vat),
                    tf.summary.scalar('hyper/dw', dw),
                    tf.summary.scalar('hyper/cw', cw),
                    tf.summary.scalar('hyper/sw', sw),
                    tf.summary.scalar('hyper/tw', tw),
                    tf.summary.scalar('hyper/uw', uw),
                    tf.summary.scalar('hyper/fmw', fmw),
                    tf.summary.scalar('acc/src_acc', src_acc),
                    tf.summary.scalar('acc/trg_acc', trg_acc)]
    summary_gen  = [tf.summary.scalar('gen/loss_trg_gen_fm', loss_trg_gen_fm),
                    tf.summary.image('gen/trg_gen_img', trg_gen_x),]

    # Merge summaries
    summary_disc = tf.summary.merge(summary_disc)
    summary_main = tf.summary.merge(summary_main)
    summary_gen  = tf.summary.merge(summary_gen)

    # Saved ops
    c = tf.constant
    T.ops_print = [c('disc'), loss_disc,
                   c('domain'), loss_domain,
                   c('class'), loss_src_class,
                   c('cent'), loss_trg_cent,
                   c('trg_vat'), loss_trg_vat,
                   c('src_vat'), loss_src_vat,
                   c('src'), src_acc,
                   c('trg'), trg_acc]
    T.ops_disc = [summary_disc, train_disc]
    T.ops_main = [summary_main, train_main]
    T.ops_gen  = [summary_gen , train_gen]
    T.fn_ema_acc = fn_ema_acc
    T.teacher = teacher
    T.update_teacher = update_teacher
    T.trg_gen_x = trg_gen_x
    T.trg_gen_p = trg_gen_p
    T.src_p = src_p
    T.trg_p = trg_p
    T.ema_p = ema_p

    return T