def __init__(self, flags, is_training=True): self.is_training = is_training # None = batch_size self.image_ph = tf.placeholder(tf.float32, shape=(None, flags.feature_size)) self.hard_label_ph = tf.placeholder(tf.float32, shape=(None, flags.num_label)) # None = batch_size * sample_size self.gen_sample_ph = tf.placeholder(tf.int32, shape=(None, 2)) self.gen_label_ph = tf.placeholder(tf.float32, shape=(None,)) self.tch_sample_ph = tf.placeholder(tf.int32, shape=(None, 2)) self.tch_label_ph = tf.placeholder(tf.float32, shape=(None,)) self.dis_scope = dis_scope = 'dis' model_scope = nets_factory.arg_scopes_map[flags.image_model] with tf.variable_scope(dis_scope) as scope: with slim.arg_scope(model_scope(weight_decay=flags.image_weight_decay)): net = self.image_ph net = slim.dropout(net, flags.image_keep_prob, is_training=is_training) net = slim.fully_connected(net, flags.num_label, activation_fn=None) self.logits = net self.gen_rewards = self.get_rewards(self.gen_sample_ph) self.tch_rewards = self.get_rewards(self.tch_sample_ph) if not is_training: return save_dict = {} for variable in tf.trainable_variables(): if not variable.name.startswith(dis_scope): continue print('%-50s added to DIS saver' % variable.name) save_dict[variable.name] = variable self.saver = tf.train.Saver(save_dict) self.global_step = global_step = tf.Variable(0, trainable=False) tn_size = utils.get_tn_size(flags.dataset) learning_rate = flags.dis_learning_rate self.learning_rate = utils.get_lr(flags, tn_size, global_step, learning_rate, dis_scope) # pre train pre_losses = self.get_pre_losses() pre_losses.extend(self.get_regularization_losses()) print('#pre_losses wt regularization=%d' % (len(pre_losses))) self.pre_loss = tf.add_n(pre_losses, name='%s_pre_loss' % dis_scope) pre_optimizer = utils.get_opt(flags, self.learning_rate) self.pre_update = pre_optimizer.minimize(self.pre_loss, global_step=global_step) # gan train gan_losses = self.get_gan_losses(flags) gan_losses.extend(self.get_regularization_losses()) print('#gan_losses wt regularization=%d' % (len(gan_losses))) self.gan_loss = tf.add_n(gan_losses, name='%s_gan_loss' % dis_scope) gan_optimizer = utils.get_opt(flags, self.learning_rate) self.gan_update = gan_optimizer.minimize(self.gan_loss, global_step=global_step)
def __init__(self, flags, dataset, is_training=True): self.is_training = is_training # None = batch_size num_feature = flags.image_size * flags.image_size * flags.channels self.image_ph = tf.placeholder(tf.float32, shape=(None, num_feature)) self.hard_label_ph = tf.placeholder(tf.float32, shape=(None, flags.num_label)) self.soft_logit_ph = tf.placeholder(tf.float32, shape=(None, flags.num_label)) # None = batch_size * sample_size self.sample_ph = tf.placeholder(tf.int32, shape=(None, 2)) self.reward_ph = tf.placeholder(tf.float32, shape=(None,)) self.tch_scope = tch_scope = 'tch' with tf.variable_scope(tch_scope) as scope: self.logits = utils.get_logits(flags, self.image_ph, flags.tch_model_name, flags.tch_weight_decay, flags.tch_keep_prob, is_training=is_training) self.labels = tf.nn.softmax(self.logits) if not is_training: self.predictions = tf.argmax(self.logits, axis=1) self.accuracy = tf.equal(self.predictions, tf.argmax(self.hard_label_ph, 1)) self.accuracy = tf.reduce_mean(tf.cast(self.accuracy, tf.float32)) return save_dict = {} for variable in tf.trainable_variables(): if not variable.name.startswith(tch_scope): continue print('%-50s added to TCH saver' % variable.name) save_dict[variable.name] = variable self.saver = tf.train.Saver(save_dict) self.global_step = tf.Variable(0, trainable=False) self.learning_rate = tf.Variable(flags.gen_learning_rate, trainable=False) # self.lr_update = tf.assign(self.learning_rate, self.learning_rate * flags.learning_rate_decay_factor) pre_losses = self.get_pre_losses() pre_losses.extend(self.get_regularization_losses()) print('#pre_losses wt regularization=%d' % (len(pre_losses))) self.pre_loss = tf.add_n(pre_losses, '%s_pre_loss' % tch_scope) pre_optimizer = utils.get_opt(flags, self.learning_rate) self.pre_update = pre_optimizer.minimize(self.pre_loss, global_step=self.global_step) # kdgan train kdgan_losses = self.get_kdgan_losses(flags) kdgan_losses.extend(self.get_regularization_losses()) print('#kdgan_losses wt regularization=%d' % (len(kdgan_losses))) self.kdgan_loss = tf.add_n(kdgan_losses, name='%s_kdgan_loss' % tch_scope) kdgan_optimizer = utils.get_opt(flags, self.learning_rate) self.kdgan_update = kdgan_optimizer.minimize(self.kdgan_loss, global_step=self.global_step)
def __init__(self, flags, is_training=True): self.is_training = is_training # None = batch_size self.image_ph = tf.placeholder( tf.float32, shape=(flags.batch_size, flags.image_size, flags.image_size, flags.channels)) self.hard_label_ph = tf.placeholder(tf.int32, shape=(flags.batch_size, flags.num_label)) self.soft_logit_ph = tf.placeholder(tf.float32, shape=(flags.batch_size, flags.num_label)) # None = batch_size * sample_size self.sample_ph = tf.placeholder(tf.int32, shape=(None, 2)) self.reward_ph = tf.placeholder(tf.float32, shape=(None, )) self.std_scope = std_scope = 'std' with tf.variable_scope(std_scope) as scope: self.logits = lenet_utils.inference(self.image_ph) self.labels = tf.nn.softmax(self.logits) if not is_training: predictions = tf.argmax(self.labels, axis=1) groundtruth = tf.argmax(self.hard_label_ph, axis=1) accuracy_list = tf.equal(predictions, groundtruth) self.accuracy = tf.reduce_mean( tf.cast(accuracy_list, tf.float32)) return save_dict, var_list = {}, [] for variable in tf.trainable_variables(): if not variable.name.startswith(std_scope): continue print('%-64s added to STD saver' % variable.name) save_dict[variable.name] = variable var_list.append(variable) self.saver = tf.train.Saver(save_dict) self.global_step = global_step = tf.Variable(0, trainable=False) self.learning_rate = tf.Variable(flags.std_learning_rate, trainable=False) # pre train pre_losses = self.get_pre_losses() print('#pre_losses wo regularization=%d' % (len(pre_losses))) pre_losses.extend(self.get_regularization_losses()) print('#pre_losses wt regularization=%d' % (len(pre_losses))) self.pre_loss = tf.add_n(pre_losses, name='%s_pre_loss' % std_scope) self.pre_train = lenet_utils.get_train_op(self.pre_loss, global_step) # kd train kd_losses = self.get_kd_losses(flags) print('#kd_losses wo regularization=%d' % (len(kd_losses))) kd_losses.extend(self.get_regularization_losses()) print('#kd_losses wt regularization=%d' % (len(kd_losses))) self.kd_loss = tf.add_n(kd_losses, name='%s_kd_loss' % std_scope) # self.kd_train = lenet_utils.get_train_op(self.kd_loss, global_step) kd_optimizer = utils.get_opt(flags, self.learning_rate) self.kd_train = kd_optimizer.minimize(self.kd_loss, global_step=global_step) # gan train gan_losses = self.get_gan_losses() print('#gan_losses wo regularization=%d' % (len(gan_losses))) gan_losses.extend(self.get_regularization_losses()) print('#gan_losses wt regularization=%d' % (len(gan_losses))) self.gan_loss = tf.add_n(gan_losses, name='%s_gan_loss' % std_scope) # self.gan_train = lenet_utils.get_train_op(self.gan_loss, global_step) gan_optimizer = utils.get_opt(flags, self.learning_rate) self.gan_train = gan_optimizer.minimize(self.gan_loss, global_step=global_step) # kdgan train kdgan_losses = self.get_kdgan_losses(flags) print('#kdgan_losses wo regularization=%d' % (len(kdgan_losses))) kdgan_losses.extend(self.get_regularization_losses()) print('#kdgan_losses wt regularization=%d' % (len(kdgan_losses))) self.kdgan_loss = tf.add_n(kdgan_losses, name='%s_kdgan_loss' % std_scope) self.kdgan_train = lenet_utils.get_train_op( self.kdgan_loss, global_step)
def __init__(self, flags, dataset, is_training=True, gen_scope='gen'): self.is_training = is_training # None = batch_size num_feature = flags.image_size * flags.image_size * flags.channels self.image_ph = tf.placeholder(tf.float32, shape=(None, num_feature)) self.hard_label_ph = tf.placeholder(tf.float32, shape=(None, flags.num_label)) self.soft_logit_ph = tf.placeholder(tf.float32, shape=(None, flags.num_label)) # None = batch_size * sample_size self.sample_ph = tf.placeholder(tf.int32, shape=(None, 2)) self.reward_ph = tf.placeholder(tf.float32, shape=(None,)) self.gen_scope = gen_scope # = 'gen' with tf.variable_scope(gen_scope): self.logits = utils.get_logits(flags, self.image_ph, flags.gen_model_name, flags.gen_weight_decay, flags.gen_keep_prob, is_training=is_training) self.labels = tf.nn.softmax(self.logits) if not is_training: self.predictions = tf.argmax(self.logits, axis=1) self.accuracy = tf.equal(self.predictions, tf.argmax(self.hard_label_ph, 1)) self.accuracy = tf.reduce_mean(tf.cast(self.accuracy, tf.float32)) return save_dict, var_list = {}, [] for variable in tf.trainable_variables(): if not variable.name.startswith(gen_scope): continue # print('%-50s added to GEN saver' % variable.name) save_dict[variable.name] = variable var_list.append(variable) self.saver = tf.train.Saver(save_dict) self.var_list = var_list self.global_step = tf.Variable(0, trainable=False) self.learning_rate = tf.Variable(flags.gen_learning_rate, trainable=False) # self.lr_update = tf.assign(self.learning_rate, self.learning_rate * flags.learning_rate_decay_factor) # pre train pre_losses = self.get_pre_losses(flags) self.pre_loss = tf.add_n(pre_losses, '%s_pre_loss' % gen_scope) pre_optimizer = utils.get_opt(flags, self.learning_rate) ## no clipping self.pre_update = pre_optimizer.minimize(self.pre_loss, global_step=self.global_step) # pre_grads_and_vars = pre_optimizer.compute_gradients(self.pre_loss, var_list) # pre_capped_grads_and_vars = [(gv[0], gv[1]) for gv in pre_grads_and_vars] # self.pre_update = pre_optimizer.apply_gradients(pre_capped_grads_and_vars, global_step=self.global_step) ## global clipping # pre_grads, pre_vars = zip(*pre_optimizer.compute_gradients(self.pre_loss, var_list)) # pre_grads, _ = tf.clip_by_global_norm(pre_grads, flags.clip_norm) # self.pre_update = pre_optimizer.apply_gradients(zip(pre_grads, pre_vars), global_step=self.global_step) # kd train kd_losses = self.get_kd_losses(flags) self.kd_loss = tf.add_n(kd_losses, name='%s_kd_loss' % gen_scope) kd_optimizer = utils.get_opt(flags, self.learning_rate) self.kd_update = kd_optimizer.minimize(self.kd_loss, global_step=self.global_step) # gan train gan_losses = self.get_gan_losses(flags) self.gan_loss = tf.add_n(gan_losses, name='%s_gan_loss' % gen_scope) gan_optimizer = utils.get_opt(flags, self.learning_rate) # gan_optimizer = tf.train.AdamOptimizer(self.learning_rate) # gan_optimizer = tf.train.GradientDescentOptimizer(self.learning_rate) self.gan_update = gan_optimizer.minimize(self.gan_loss, global_step=self.global_step) # kdgan train kdgan_losses = self.get_kdgan_losses(flags) self.kdgan_loss = tf.add_n(kdgan_losses, name='%s_kdgan_loss' % gen_scope) kdgan_optimizer = utils.get_opt(flags, self.learning_rate) self.kdgan_update = kdgan_optimizer.minimize(self.kdgan_loss, global_step=self.global_step)
def __init__(self, flags, is_training=True): self.is_training = is_training # None = batch_size self.image_ph = tf.placeholder(tf.float32, shape=(None, flags.feature_size)) self.hard_label_ph = tf.placeholder(tf.float32, shape=(None, flags.num_label)) self.soft_logit_ph = tf.placeholder(tf.float32, shape=(None, flags.num_label)) # None = batch_size * sample_size self.sample_ph = tf.placeholder(tf.int32, shape=(None, 2)) self.reward_ph = tf.placeholder(tf.float32, shape=(None, )) self.gen_scope = gen_scope = 'gen' model_scope = nets_factory.arg_scopes_map[flags.image_model] with tf.variable_scope(gen_scope) as scope: with slim.arg_scope( model_scope(weight_decay=flags.image_weight_decay)): net = self.image_ph net = slim.dropout(net, flags.image_keep_prob, is_training=is_training) net = slim.fully_connected(net, flags.num_label, activation_fn=None) self.logits = net self.labels = tf.nn.softmax(self.logits) if not is_training: return save_dict, var_list = {}, [] for variable in tf.trainable_variables(): if not variable.name.startswith(gen_scope): continue print('%-50s added to GEN saver' % variable.name) save_dict[variable.name] = variable var_list.append(variable) self.saver = tf.train.Saver(save_dict) self.global_step = global_step = tf.Variable(0, trainable=False) tn_size = utils.get_tn_size(flags.dataset) learning_rate = flags.gen_learning_rate self.learning_rate = utils.get_lr(flags, tn_size, global_step, learning_rate, gen_scope) # pre train pre_losses = self.get_pre_losses() print('#pre_losses wo regularization=%d' % (len(pre_losses))) pre_losses.extend(self.get_regularization_losses()) print('#pre_losses wt regularization=%d' % (len(pre_losses))) self.pre_loss = tf.add_n(pre_losses, name='%s_pre_loss' % gen_scope) pre_optimizer = utils.get_opt(flags, self.learning_rate) self.pre_update = pre_optimizer.minimize(self.pre_loss, global_step=global_step) # kd train kd_losses = self.get_kd_losses(flags) print('#kd_losses wo regularization=%d' % (len(kd_losses))) self.kd_loss = tf.add_n(kd_losses, name='%s_kd_loss' % gen_scope) kd_optimizer = utils.get_opt(flags, self.learning_rate) self.kd_update = kd_optimizer.minimize(self.kd_loss, global_step=global_step) # gan train gan_losses = self.get_gan_losses() print('#gan_losses wo regularization=%d' % (len(gan_losses))) gan_losses.extend(self.get_regularization_losses()) print('#gan_losses wt regularization=%d' % (len(gan_losses))) self.gan_loss = tf.add_n(gan_losses, name='%s_gan_loss' % gen_scope) gan_optimizer = utils.get_opt(flags, self.learning_rate) self.gan_update = gan_optimizer.minimize(self.gan_loss, global_step=global_step) # kdgan train kdgan_losses = self.get_kdgan_losses(flags) print('#kdgan_losses wo regularization=%d' % (len(kdgan_losses))) kdgan_losses.extend(self.get_regularization_losses()) print('#kdgan_losses wt regularization=%d' % (len(kdgan_losses))) self.kdgan_loss = tf.add_n(kdgan_losses, name='%s_kdgan_loss' % gen_scope) kdgan_optimizer = utils.get_opt(flags, self.learning_rate) # self.kdgan_update = kdgan_optimizer.minimize(self.kdgan_loss, global_step=global_step) gvs = kdgan_optimizer.compute_gradients(self.kdgan_loss, var_list) cgvs = [(tf.clip_by_norm(gv[0], config.max_norm), gv[1]) for gv in gvs] self.kdgan_update = kdgan_optimizer.apply_gradients( cgvs, global_step=global_step)
def __init__(self, flags, is_training=True): self.is_training = is_training # None = batch_size self.image_ph = tf.placeholder(tf.float32, shape=(None, flags.feature_size)) self.text_ph = tf.placeholder(tf.int64, shape=(None, None)) self.hard_label_ph = tf.placeholder(tf.float32, shape=(None, flags.num_label)) # None = batch_size * sample_size self.sample_ph = tf.placeholder(tf.int32, shape=(None, 2)) self.reward_ph = tf.placeholder(tf.float32, shape=(None, )) self.tch_scope = tch_scope = 'tch' model_scope = nets_factory.arg_scopes_map[flags.image_model] vocab_size = utils.get_vocab_size(flags.dataset) with tf.variable_scope(tch_scope) as scope: with slim.arg_scope( model_scope(weight_decay=flags.image_weight_decay)): iembed = self.image_ph iembed = slim.dropout(iembed, flags.image_keep_prob, is_training=is_training) with slim.arg_scope([slim.fully_connected], weights_regularizer=slim.l2_regularizer( flags.text_weight_decay)): wembed = slim.variable( 'wembed', shape=[vocab_size, flags.embedding_size], initializer=tf.random_uniform_initializer(-0.1, 0.1)) tembed = tf.nn.embedding_lookup(wembed, self.text_ph) tembed = tf.reduce_mean(tembed, axis=-2) with slim.arg_scope([slim.fully_connected], weights_regularizer=slim.l2_regularizer( flags.tch_weight_decay), biases_initializer=tf.zeros_initializer()): # cembed = tf.concat([tembed], 1) cembed = tf.concat([iembed, tembed], 1) self.logits = slim.fully_connected(cembed, flags.num_label, activation_fn=None) self.labels = tf.nn.softmax(self.logits) if not is_training: return save_dict = {} for variable in tf.trainable_variables(): if not variable.name.startswith(tch_scope): continue print('%-50s added to TCH saver' % variable.name) save_dict[variable.name] = variable self.saver = tf.train.Saver(save_dict) self.global_step = global_step = tf.Variable(0, trainable=False) tn_size = utils.get_tn_size(flags.dataset) learning_rate = flags.tch_learning_rate self.learning_rate = utils.get_lr(flags, tn_size, global_step, learning_rate, tch_scope) # pre train pre_losses = self.get_pre_losses() self.pre_loss = tf.add_n(pre_losses, name='%s_pre_loss' % tch_scope) pre_losses.extend(self.get_regularization_losses()) print('#pre_losses wt regularization=%d' % (len(pre_losses))) pre_optimizer = utils.get_opt(flags, self.learning_rate) self.pre_update = pre_optimizer.minimize(self.pre_loss, global_step=global_step) # kdgan train kdgan_losses = self.get_kdgan_losses(flags) self.kdgan_loss = tf.add_n(kdgan_losses, name='%s_kdgan_loss' % tch_scope) kdgan_optimizer = utils.get_opt(flags, self.learning_rate) self.kdgan_update = kdgan_optimizer.minimize( self.kdgan_loss, global_step=global_step)