def build_training_graph(x, y, ul_x, lr, mom): global_step = tf.get_variable( name="global_step", shape=[], dtype=tf.float32, initializer=tf.constant_initializer(0.0), trainable=False, ) logit = vat.forward(x) nll_loss = L.ce_loss(logit, y) with tf.variable_scope(tf.get_variable_scope(), reuse=True): if FLAGS.method == 'vat': ul_logit = vat.forward(ul_x, is_training=True, update_batch_stats=False) vat_loss = vat.virtual_adversarial_loss(ul_x, ul_logit) additional_loss = vat_loss elif FLAGS.method == 'vatent': ul_logit = vat.forward(ul_x, is_training=True, update_batch_stats=False) vat_loss = vat.virtual_adversarial_loss(ul_x, ul_logit) ent_loss = L.entropy_y_x(ul_logit) additional_loss = vat_loss + ent_loss elif FLAGS.method == 'baseline': additional_loss = 0 else: raise NotImplementedError loss = nll_loss + additional_loss opt = tf.train.AdamOptimizer(learning_rate=lr, beta1=mom) tvars = tf.trainable_variables() grads_and_vars = opt.compute_gradients(loss, tvars) train_op = opt.apply_gradients(grads_and_vars, global_step=global_step) return loss, train_op, global_step
def adversarial_loss(x, y, loss, is_training=True, name="at_loss"): r_adv = generate_adversarial_perturbation(x, loss) logit = forward(x + r_adv, is_training=is_training, update_batch_stats=False) loss = L.ce_loss(logit, y) return loss
def build_eval_graph(x, y, ul_x): losses = {} logit = forward(x, is_training=False, update_batch_stats=False) nll_loss = L.ce_loss(logit, y) losses['NLL'] = nll_loss acc = L.accuracy(logit, y) losses['Acc'] = acc return losses
def build_test_graph(x, y): losses = {} logit = adt.forward(x, is_training=False, update_batch_stats=False) nll_loss = L.ce_loss(logit, y) losses['No_NLL'] = nll_loss acc = L.accuracy(logit, y) losses['No_Acc'] = acc return losses
def build_eval_graph(X_l, Y_l): #Todo: implement this losses = {} logit_l = sup_classifier.logit_small(X_l, is_training=False, update_batch_stats=False) losses['xent_sup'] = L.ce_loss(logit_l, Y_l) return losses
def build_eval_graph(x, y, ul_x): losses = {} logit = vat.forward(x, is_training=False, update_batch_stats=False) nll_loss = L.ce_loss(logit, y) losses['NLL'] = nll_loss acc = L.accuracy(logit, y) losses['Acc'] = acc scope = tf.get_variable_scope() scope.reuse_variables() at_loss = vat.adversarial_loss(x, y, nll_loss, is_training=True) losses['AT_loss'] = at_loss ul_logit = vat.forward(ul_x, is_training=False, update_batch_stats=False) vat_loss = vat.virtual_adversarial_loss(ul_x, ul_logit, is_training=False) losses['VAT_loss'] = vat_loss return losses
def build_training_graph(x, y, lr, mom): global_step = tf.get_variable( name="global_step", shape=[], dtype=tf.float32, initializer=tf.constant_initializer(0.0), trainable=False, ) logit = cnn.forward(x) loss = L.ce_loss(logit, y) opt = tf.train.AdamOptimizer(learning_rate=lr, beta1=mom) tvars = tf.trainable_variables() grads_and_vars = opt.compute_gradients(loss, tvars) train_op = opt.apply_gradients(grads_and_vars, global_step=global_step) return loss, train_op, global_step
def build_training_graph(x_1, x_2, y, ul_x_1, ul_x_2, lr, mom, lamb): global_step = tf.get_variable( name="global_step", shape=[], dtype=tf.float32, initializer=tf.constant_initializer(0.0), trainable=False, ) logit = adt.forward(x_1, update_batch_stats=True) nll_loss = L.ce_loss(logit, y) with tf.variable_scope(tf.get_variable_scope(), reuse=True): if FLAGS.method == 'VAdD': ul_logit = adt.forward(ul_x_1) ent_loss = L.entropy_y_x(ul_logit) vadt_loss = adt.virtual_adversarial_dropout_loss(ul_x_2, ul_logit) loss = nll_loss + lamb * vadt_loss + ent_loss elif FLAGS.method == 'VAT': ul_logit = adt.forward(ul_x_1, update_batch_stats=False) ent_loss = L.entropy_y_x(ul_logit) vat_loss = adt.virtual_adversarial_loss(ul_x_1, ul_logit) loss = nll_loss + vat_loss + ent_loss elif FLAGS.method == 'Pi': ul_logit = adt.forward(ul_x_1, update_batch_stats=True) ul_adt_logit = adt.forward(ul_x_2, update_batch_stats=True) additional_loss = L.qe_loss(ul_logit, ul_adt_logit) #*4.0 ent_loss = L.entropy_y_x(ul_logit) loss = nll_loss + lamb * additional_loss + ent_loss else: raise NotImplementedError with tf.variable_scope(tf.get_variable_scope(), reuse=False): opt = tf.train.AdamOptimizer(learning_rate=lr, beta1=mom, beta2=0.999) tvars = tf.trainable_variables() grads_and_vars = opt.compute_gradients(loss, tvars) train_op = opt.apply_gradients(grads_and_vars, global_step=global_step) return loss, train_op, global_step
def build_training_graph(is_training, X_l, Y_l, ID_l, X_u, ID_u, K, lr, mom, lgc_alpha): CACHE_F = tf.get_variable("cache_f") W = tf.get_variable("Affinity_matrix") D = tf.get_variable("D") assert (K.shape[0] == W.shape[0]) with tf.variable_scope("CNN", reuse=tf.AUTO_REUSE) as scope: losses = {} #logit_l = sup_classifier.logit_small(X_l,num_classes = Y_l.shape[1], is_training=is_training, update_batch_stats=False) #logit_u = sup_classifier.logit_small(X_u,num_classes = Y_l.shape[1], is_training=is_training, update_batch_stats=is_training) logit_l = tf.gather(CACHE_F, ID_l) logit_u = tf.gather(CACHE_F, ID_u) losses['xent_sup'] = L.ce_loss(logit_l, Y_l) losses['mse_sup'] = tf.losses.mean_squared_error(tf.nn.softmax(logit_l), Y_l) losses['mean_acc'] = L.accuracy(logit_l, Y_l) #Concatenate ids and logits ids = tf.concat([ID_l, ID_u], 0) logits = tf.concat([logit_l, logit_u], 0) logits = tf.nn.softmax(logits) #assert ids.shape[0] == logits.shape[0] if FLAGS.loss_func == "lgc": #Unsupervised loss K_ids = tf.gather(K, ids, axis=0) #Get neighbor pairs K_ids_i = tf.reshape(K_ids[:, :, 0], [-1]) K_ids_j = tf.reshape(K_ids[:, :, 1], [-1]) logits_ids = tf.reshape( tf.tile(tf.expand_dims(tf.range(tf.shape(logits)[0]), -1), [1, FLAGS.affmat_k]), [-1]) #all_F_i = tf.gather(logits,logits_ids,axis=0) all_F_i = tf.gather(tf.nn.softmax(CACHE_F), K_ids_i, axis=0) all_F_j = tf.gather(tf.nn.softmax(CACHE_F), K_ids_j, axis=0) #F_j comes from cache all_Wij = tf.reshape(tf.gather(W, ids, axis=0), [-1]) all_Dii = tf.gather(D, K_ids_i, axis=0) all_Djj = tf.gather(D, K_ids_j, axis=0) all_Dii = tf.tile(all_Dii[:, None], [1, all_F_i.shape[1]]) all_Djj = tf.tile(all_Djj[:, None], [1, all_F_j.shape[1]]) all_Fi = tf.multiply(all_Dii, all_F_i) all_Fj = tf.multiply(all_Djj, all_F_j) LGC_unsupervised_loss = (tf.multiply( tf.reduce_sum(tf.square(all_Fi - all_Fj), axis=1), all_Wij)) #LGC_unsupervised_loss = tf.reduce_sum(tf.square(all_F_i - all_F_j),axis=1) losses["lgc_unsupervised_loss"] = tf.reduce_sum(LGC_unsupervised_loss) losses["lgc_supervised_loss"] = losses['xent_sup'] losses["lgc_unsupervised_loss"] = ( int(K.shape[0]) / int(FLAGS.batch_size + FLAGS.ul_batch_size) ) * losses["lgc_unsupervised_loss"] losses["lgc_supervised_loss"] = ( FLAGS.num_labeled / int(FLAGS.batch_size)) * losses['mse_sup'] lgc_lamb = 1 / lgc_alpha - 1 losses["lgc_loss"] = losses[ "lgc_unsupervised_loss"] + lgc_lamb * losses["lgc_supervised_loss"] #Assign to cache assign_op_l = tf.scatter_update(ref=CACHE_F, indices=ID_l, updates=tf.nn.softmax(logit_l)) assign_op_u = tf.scatter_update(ref=CACHE_F, indices=ID_u, updates=tf.nn.softmax(logit_u)) #assign_to_cache = tf.group(assign_op_l,assign_op_u) assign_to_cache = tf.no_op() #Get Trainable vars tvars = tf.trainable_variables() tvars_W = list( filter(lambda x: np.char.find(x.name, "Affinity_matrix") != -1, tvars)) tvars_D = list(filter(lambda x: np.char.find(x.name, "D") != -1, tvars)) tvars_CNN = list( filter(lambda x: np.char.find(x.name, "CNN") != -1, tvars)) tvars_CACHE = list( filter(lambda x: np.char.find(x.name, "cache") != -1, tvars)) print([var.name for var in tvars_CACHE]) opt = tf.train.AdamOptimizer(learning_rate=lr, beta1=mom) grads_and_vars = opt.compute_gradients(losses['lgc_loss'], tvars_CACHE) train_op = opt.apply_gradients( grads_and_vars, global_step=tf.train.get_or_create_global_step()) else: assign_to_cache = None opt = tf.train.AdamOptimizer(learning_rate=lr, beta1=mom) grads_and_vars = opt.compute_gradients(losses['xent_sup'], tvars) train_op = opt.apply_gradients( grads_and_vars, global_step=tf.train.get_or_create_global_step()) return losses, train_op, assign_to_cache, logit_l, all_Wij
def build_training_graph(x_1, x_2, y, lr, mom, lamb): global_step = tf.get_variable( name="global_step", shape=[], dtype=tf.float32, initializer=tf.constant_initializer(0.0), trainable=False, ) logit = adt.forward(x_1) nll_loss = L.ce_loss(logit, y) with tf.variable_scope(tf.get_variable_scope(), reuse=True): if FLAGS.method == 'SAdD': adt_logit = adt.forward_adv_drop(x_2, y, FLAGS.delta, is_training=True) additional_loss = L.ce_loss(adt_logit, y) loss = nll_loss + lamb * additional_loss elif FLAGS.method == 'VAdD-KL': logit_p = logit adt_logit = adt.forward_adv_drop(x_2, logit_p, FLAGS.delta, is_training=True, mode=FLAGS.method) additional_loss = L.kl_divergence_with_logit(logit_p, adt_logit) loss = nll_loss + lamb * additional_loss elif FLAGS.method == 'VAdD-QE': logit_p = logit adt_logit = adt.forward_adv_drop(x_2, logit_p, FLAGS.delta, is_training=True, mode=FLAGS.method) additional_loss = L.qe_loss(adt_logit, logit_p) loss = nll_loss + lamb * additional_loss elif FLAGS.method == 'VAT+VAdD-KL': logit_p = logit adt_logit = adt.forward_adv_drop(x_2, logit_p, FLAGS.delta, is_training=True, mode='VAdD-KL') additional_loss = L.kl_divergence_with_logit(logit_p, adt_logit) vat_loss = adt.virtual_adversarial_loss(x_1, logit_p) loss = nll_loss + lamb * additional_loss + vat_loss elif FLAGS.method == 'VAT+VAdD-QE': logit_p = logit adt_logit = adt.forward_adv_drop(x_2, logit_p, FLAGS.delta, is_training=True, mode='VAdD-QE') additional_loss = L.qe_loss(adt_logit, logit_p) vat_loss = adt.virtual_adversarial_loss(x_1, logit_p) loss = nll_loss + lamb * additional_loss + vat_loss elif FLAGS.method == 'VAT': logit_p = tf.stop_gradient(logit) logit_p = logit vat_loss = adt.virtual_adversarial_loss(x_1, logit_p) loss = nll_loss + vat_loss elif FLAGS.method == 'Pi': adt_logit = adt.forward(x_2) additional_loss = L.qe_loss(adt_logit, logit) loss = nll_loss + lamb * additional_loss elif FLAGS.method == 'baseline': additional_loss = 0 adt_masks = masks else: raise NotImplementedError with tf.variable_scope(tf.get_variable_scope(), reuse=False): opt = tf.train.AdamOptimizer(learning_rate=lr, beta1=mom) tvars = tf.trainable_variables() grads_and_vars = opt.compute_gradients(loss, tvars) train_op = opt.apply_gradients(grads_and_vars, global_step=global_step) return loss, train_op, global_step