예제 #1
0
def build_training_graph(x, y, ul_x, lr, mom):
    global_step = tf.get_variable(
        name="global_step",
        shape=[],
        dtype=tf.float32,
        initializer=tf.constant_initializer(0.0),
        trainable=False,
    )
    logit = vat.forward(x)
    nll_loss = L.ce_loss(logit, y)
    with tf.variable_scope(tf.get_variable_scope(), reuse=True):
        if FLAGS.method == 'vat':
            ul_logit = vat.forward(ul_x,
                                   is_training=True,
                                   update_batch_stats=False)
            vat_loss = vat.virtual_adversarial_loss(ul_x, ul_logit)
            additional_loss = vat_loss
        elif FLAGS.method == 'vatent':
            ul_logit = vat.forward(ul_x,
                                   is_training=True,
                                   update_batch_stats=False)
            vat_loss = vat.virtual_adversarial_loss(ul_x, ul_logit)
            ent_loss = L.entropy_y_x(ul_logit)
            additional_loss = vat_loss + ent_loss
        elif FLAGS.method == 'baseline':
            additional_loss = 0
        else:
            raise NotImplementedError
        loss = nll_loss + additional_loss

    opt = tf.train.AdamOptimizer(learning_rate=lr, beta1=mom)
    tvars = tf.trainable_variables()
    grads_and_vars = opt.compute_gradients(loss, tvars)
    train_op = opt.apply_gradients(grads_and_vars, global_step=global_step)
    return loss, train_op, global_step
예제 #2
0
def adversarial_loss(x, y, loss, is_training=True, name="at_loss"):
    r_adv = generate_adversarial_perturbation(x, loss)
    logit = forward(x + r_adv,
                    is_training=is_training,
                    update_batch_stats=False)
    loss = L.ce_loss(logit, y)
    return loss
예제 #3
0
def build_eval_graph(x, y, ul_x):
    losses = {}
    logit = forward(x, is_training=False, update_batch_stats=False)
    nll_loss = L.ce_loss(logit, y)
    losses['NLL'] = nll_loss
    acc = L.accuracy(logit, y)
    losses['Acc'] = acc
    return losses
예제 #4
0
def build_test_graph(x, y):
    losses = {}
    logit = adt.forward(x, is_training=False, update_batch_stats=False)
    nll_loss = L.ce_loss(logit, y)
    losses['No_NLL'] = nll_loss
    acc = L.accuracy(logit, y)
    losses['No_Acc'] = acc
    return losses
예제 #5
0
def build_eval_graph(X_l, Y_l):
    #Todo: implement this
    losses = {}
    logit_l = sup_classifier.logit_small(X_l,
                                         is_training=False,
                                         update_batch_stats=False)

    losses['xent_sup'] = L.ce_loss(logit_l, Y_l)
    return losses
예제 #6
0
def build_eval_graph(x, y, ul_x):
    losses = {}
    logit = vat.forward(x, is_training=False, update_batch_stats=False)
    nll_loss = L.ce_loss(logit, y)
    losses['NLL'] = nll_loss
    acc = L.accuracy(logit, y)
    losses['Acc'] = acc
    scope = tf.get_variable_scope()
    scope.reuse_variables()
    at_loss = vat.adversarial_loss(x, y, nll_loss, is_training=True)
    losses['AT_loss'] = at_loss
    ul_logit = vat.forward(ul_x, is_training=False, update_batch_stats=False)
    vat_loss = vat.virtual_adversarial_loss(ul_x, ul_logit, is_training=False)
    losses['VAT_loss'] = vat_loss
    return losses
예제 #7
0
def build_training_graph(x, y, lr, mom):
    global_step = tf.get_variable(
        name="global_step",
        shape=[],
        dtype=tf.float32,
        initializer=tf.constant_initializer(0.0),
        trainable=False,
    )
    logit = cnn.forward(x)
    loss = L.ce_loss(logit, y)
    opt = tf.train.AdamOptimizer(learning_rate=lr, beta1=mom)
    tvars = tf.trainable_variables()
    grads_and_vars = opt.compute_gradients(loss, tvars)
    train_op = opt.apply_gradients(grads_and_vars, global_step=global_step)
    return loss, train_op, global_step
예제 #8
0
def build_training_graph(x_1, x_2, y, ul_x_1, ul_x_2, lr, mom, lamb):
    global_step = tf.get_variable(
        name="global_step",
        shape=[],
        dtype=tf.float32,
        initializer=tf.constant_initializer(0.0),
        trainable=False,
    )

    logit = adt.forward(x_1, update_batch_stats=True)
    nll_loss = L.ce_loss(logit, y)

    with tf.variable_scope(tf.get_variable_scope(), reuse=True):

        if FLAGS.method == 'VAdD':
            ul_logit = adt.forward(ul_x_1)
            ent_loss = L.entropy_y_x(ul_logit)
            vadt_loss = adt.virtual_adversarial_dropout_loss(ul_x_2, ul_logit)
            loss = nll_loss + lamb * vadt_loss + ent_loss

        elif FLAGS.method == 'VAT':
            ul_logit = adt.forward(ul_x_1, update_batch_stats=False)
            ent_loss = L.entropy_y_x(ul_logit)
            vat_loss = adt.virtual_adversarial_loss(ul_x_1, ul_logit)
            loss = nll_loss + vat_loss + ent_loss

        elif FLAGS.method == 'Pi':
            ul_logit = adt.forward(ul_x_1, update_batch_stats=True)
            ul_adt_logit = adt.forward(ul_x_2, update_batch_stats=True)
            additional_loss = L.qe_loss(ul_logit, ul_adt_logit)  #*4.0
            ent_loss = L.entropy_y_x(ul_logit)
            loss = nll_loss + lamb * additional_loss + ent_loss

        else:
            raise NotImplementedError

    with tf.variable_scope(tf.get_variable_scope(), reuse=False):
        opt = tf.train.AdamOptimizer(learning_rate=lr, beta1=mom, beta2=0.999)
        tvars = tf.trainable_variables()
        grads_and_vars = opt.compute_gradients(loss, tvars)
        train_op = opt.apply_gradients(grads_and_vars, global_step=global_step)
    return loss, train_op, global_step
예제 #9
0
def build_training_graph(is_training, X_l, Y_l, ID_l, X_u, ID_u, K, lr, mom,
                         lgc_alpha):
    CACHE_F = tf.get_variable("cache_f")
    W = tf.get_variable("Affinity_matrix")
    D = tf.get_variable("D")
    assert (K.shape[0] == W.shape[0])
    with tf.variable_scope("CNN", reuse=tf.AUTO_REUSE) as scope:
        losses = {}
        #logit_l = sup_classifier.logit_small(X_l,num_classes = Y_l.shape[1], is_training=is_training, update_batch_stats=False)
        #logit_u = sup_classifier.logit_small(X_u,num_classes = Y_l.shape[1], is_training=is_training, update_batch_stats=is_training)
    logit_l = tf.gather(CACHE_F, ID_l)
    logit_u = tf.gather(CACHE_F, ID_u)

    losses['xent_sup'] = L.ce_loss(logit_l, Y_l)
    losses['mse_sup'] = tf.losses.mean_squared_error(tf.nn.softmax(logit_l),
                                                     Y_l)
    losses['mean_acc'] = L.accuracy(logit_l, Y_l)

    #Concatenate ids and logits
    ids = tf.concat([ID_l, ID_u], 0)
    logits = tf.concat([logit_l, logit_u], 0)
    logits = tf.nn.softmax(logits)
    #assert ids.shape[0] == logits.shape[0]

    if FLAGS.loss_func == "lgc":
        #Unsupervised loss
        K_ids = tf.gather(K, ids, axis=0)  #Get neighbor pairs
        K_ids_i = tf.reshape(K_ids[:, :, 0], [-1])
        K_ids_j = tf.reshape(K_ids[:, :, 1], [-1])
        logits_ids = tf.reshape(
            tf.tile(tf.expand_dims(tf.range(tf.shape(logits)[0]), -1),
                    [1, FLAGS.affmat_k]), [-1])

        #all_F_i = tf.gather(logits,logits_ids,axis=0)
        all_F_i = tf.gather(tf.nn.softmax(CACHE_F), K_ids_i, axis=0)
        all_F_j = tf.gather(tf.nn.softmax(CACHE_F), K_ids_j,
                            axis=0)  #F_j comes from cache
        all_Wij = tf.reshape(tf.gather(W, ids, axis=0), [-1])
        all_Dii = tf.gather(D, K_ids_i, axis=0)
        all_Djj = tf.gather(D, K_ids_j, axis=0)
        all_Dii = tf.tile(all_Dii[:, None], [1, all_F_i.shape[1]])
        all_Djj = tf.tile(all_Djj[:, None], [1, all_F_j.shape[1]])

        all_Fi = tf.multiply(all_Dii, all_F_i)
        all_Fj = tf.multiply(all_Djj, all_F_j)
        LGC_unsupervised_loss = (tf.multiply(
            tf.reduce_sum(tf.square(all_Fi - all_Fj), axis=1), all_Wij))
        #LGC_unsupervised_loss = tf.reduce_sum(tf.square(all_F_i - all_F_j),axis=1)
        losses["lgc_unsupervised_loss"] = tf.reduce_sum(LGC_unsupervised_loss)
        losses["lgc_supervised_loss"] = losses['xent_sup']

        losses["lgc_unsupervised_loss"] = (
            int(K.shape[0]) / int(FLAGS.batch_size + FLAGS.ul_batch_size)
        ) * losses["lgc_unsupervised_loss"]
        losses["lgc_supervised_loss"] = (
            FLAGS.num_labeled / int(FLAGS.batch_size)) * losses['mse_sup']

        lgc_lamb = 1 / lgc_alpha - 1
        losses["lgc_loss"] = losses[
            "lgc_unsupervised_loss"] + lgc_lamb * losses["lgc_supervised_loss"]

        #Assign to cache
        assign_op_l = tf.scatter_update(ref=CACHE_F,
                                        indices=ID_l,
                                        updates=tf.nn.softmax(logit_l))
        assign_op_u = tf.scatter_update(ref=CACHE_F,
                                        indices=ID_u,
                                        updates=tf.nn.softmax(logit_u))
        #assign_to_cache = tf.group(assign_op_l,assign_op_u)
        assign_to_cache = tf.no_op()

        #Get Trainable vars
        tvars = tf.trainable_variables()
        tvars_W = list(
            filter(lambda x: np.char.find(x.name, "Affinity_matrix") != -1,
                   tvars))
        tvars_D = list(filter(lambda x: np.char.find(x.name, "D") != -1,
                              tvars))
        tvars_CNN = list(
            filter(lambda x: np.char.find(x.name, "CNN") != -1, tvars))
        tvars_CACHE = list(
            filter(lambda x: np.char.find(x.name, "cache") != -1, tvars))

        print([var.name for var in tvars_CACHE])

        opt = tf.train.AdamOptimizer(learning_rate=lr, beta1=mom)
        grads_and_vars = opt.compute_gradients(losses['lgc_loss'], tvars_CACHE)
        train_op = opt.apply_gradients(
            grads_and_vars, global_step=tf.train.get_or_create_global_step())
    else:
        assign_to_cache = None
        opt = tf.train.AdamOptimizer(learning_rate=lr, beta1=mom)
        grads_and_vars = opt.compute_gradients(losses['xent_sup'], tvars)
        train_op = opt.apply_gradients(
            grads_and_vars, global_step=tf.train.get_or_create_global_step())

    return losses, train_op, assign_to_cache, logit_l, all_Wij
예제 #10
0
def build_training_graph(x_1, x_2, y, lr, mom, lamb):
    global_step = tf.get_variable(
        name="global_step",
        shape=[],
        dtype=tf.float32,
        initializer=tf.constant_initializer(0.0),
        trainable=False,
    )
    logit = adt.forward(x_1)
    nll_loss = L.ce_loss(logit, y)

    with tf.variable_scope(tf.get_variable_scope(), reuse=True):

        if FLAGS.method == 'SAdD':
            adt_logit = adt.forward_adv_drop(x_2,
                                             y,
                                             FLAGS.delta,
                                             is_training=True)
            additional_loss = L.ce_loss(adt_logit, y)
            loss = nll_loss + lamb * additional_loss
        elif FLAGS.method == 'VAdD-KL':
            logit_p = logit
            adt_logit = adt.forward_adv_drop(x_2,
                                             logit_p,
                                             FLAGS.delta,
                                             is_training=True,
                                             mode=FLAGS.method)
            additional_loss = L.kl_divergence_with_logit(logit_p, adt_logit)
            loss = nll_loss + lamb * additional_loss
        elif FLAGS.method == 'VAdD-QE':
            logit_p = logit
            adt_logit = adt.forward_adv_drop(x_2,
                                             logit_p,
                                             FLAGS.delta,
                                             is_training=True,
                                             mode=FLAGS.method)
            additional_loss = L.qe_loss(adt_logit, logit_p)
            loss = nll_loss + lamb * additional_loss
        elif FLAGS.method == 'VAT+VAdD-KL':
            logit_p = logit
            adt_logit = adt.forward_adv_drop(x_2,
                                             logit_p,
                                             FLAGS.delta,
                                             is_training=True,
                                             mode='VAdD-KL')
            additional_loss = L.kl_divergence_with_logit(logit_p, adt_logit)
            vat_loss = adt.virtual_adversarial_loss(x_1, logit_p)
            loss = nll_loss + lamb * additional_loss + vat_loss
        elif FLAGS.method == 'VAT+VAdD-QE':
            logit_p = logit
            adt_logit = adt.forward_adv_drop(x_2,
                                             logit_p,
                                             FLAGS.delta,
                                             is_training=True,
                                             mode='VAdD-QE')
            additional_loss = L.qe_loss(adt_logit, logit_p)
            vat_loss = adt.virtual_adversarial_loss(x_1, logit_p)
            loss = nll_loss + lamb * additional_loss + vat_loss
        elif FLAGS.method == 'VAT':
            logit_p = tf.stop_gradient(logit)
            logit_p = logit
            vat_loss = adt.virtual_adversarial_loss(x_1, logit_p)
            loss = nll_loss + vat_loss
        elif FLAGS.method == 'Pi':
            adt_logit = adt.forward(x_2)
            additional_loss = L.qe_loss(adt_logit, logit)
            loss = nll_loss + lamb * additional_loss
        elif FLAGS.method == 'baseline':
            additional_loss = 0
            adt_masks = masks
        else:
            raise NotImplementedError

    with tf.variable_scope(tf.get_variable_scope(), reuse=False):
        opt = tf.train.AdamOptimizer(learning_rate=lr, beta1=mom)
        tvars = tf.trainable_variables()
        grads_and_vars = opt.compute_gradients(loss, tvars)
        train_op = opt.apply_gradients(grads_and_vars, global_step=global_step)
    return loss, train_op, global_step