Exemple #1
0
def create_model(slot_file, slot_common_file, all_slot_file):
    join_common_model = ModelJoinCommon(slot_file, slot_common_file,
                                        all_slot_file, 20)
    update_model = Model(slot_file, all_slot_file, False, 0, True)
    with open("join_common_main_program.pbtxt", "w") as fout:
        print >> fout, join_common_model._train_program
    with open("join_common_startup_program.pbtxt", "w") as fout:
        print >> fout, join_common_model._startup_program
    with open("update_main_program.pbtxt", "w") as fout:
        print >> fout, update_model._train_program
    with open("update_startup_program.pbtxt", "w") as fout:
        print >> fout, update_model._startup_program
    return [join_common_model, update_model]
Exemple #2
0
def create_model(sess, config, cate_list, action_list):

    # print(json.dumps(config,indent=4),flush=True)
    model = Model(config, cate_list, action_list)

    print('All global variables:')
    for v in tf.global_variables():
        if v not in tf.trainable_variables():
            print('\t',v)
        else:
            print('\t',v,'trainable')

    ckpt = tf.train.get_checkpoint_state(FLAGS.model_dir)
    if ckpt and tf.train.checkpoint_exists(ckpt.model_checkpoint_path):
        print('Reloading model parameters.....',flush=True)
        model.restore(sess,ckpt.model_checkpoint_path)
    else:
        if not os.path.exists(FLAGS.model_dir):
            os.makedirs(FLAGS.model_dir)
        print('Created new model parameters....',flush=True)
        sess.run(tf.global_variables_initializer())
        sess.run(tf.local_variables_initializer())

    return model
Exemple #3
0
def evaluate(path_to_checkpoint, ds, val_data, val_labels, num_examples,
             global_step):

    batch_size = 128
    num_batches = num_examples // batch_size
    needs_include_length = False

    with tf.Graph().as_default():
        '''
        image_batch, length_batch, digits_batch = Donkey.build_batch(path_to_tfrecords_file,
                                                                     num_examples=num_examples,
                                                                     batch_size=batch_size,
                                                                     shuffled=False)
        length_logits, digits_logits = Model.layers(image_batch, drop_rate=0.0)
        '''
        with tf.name_scope('test_inputs'):
            xs = tf.placeholder(shape=[None, 54, 54, 3], dtype=tf.float32)
            ys1 = tf.placeholder(shape=[
                None,
            ], dtype=tf.int32)
            ys2 = tf.placeholder(shape=[None, 5], dtype=tf.int32)
        '''
        image_batch, label = ds.build_batch(val_data, val_labels, batch_size, is_train=False, shuffle=False)
        length_batch = label[:, 0]
        digits_batch = label[:, 1:6]
	
        image_batch = tf.convert_to_tensor(image_batch, dtype=tf.float32)
        length_batch = tf.convert_to_tensor(length_batch, dtype=tf.int32)
        digits_batch = tf.convert_to_tensor(digits_batch, dtype=tf.int32)	
        '''
        length_logits, digits_logits = Model.layers(xs, drop_rate=0.3)

        length_predictions = tf.argmax(length_logits, axis=1)
        digits_predictions = tf.argmax(digits_logits, axis=2)

        if needs_include_length:
            labels = tf.concat([tf.reshape(ys1, [-1, 1]), ys2], axis=1)
            predictions = tf.concat(
                [tf.reshape(length_predictions, [-1, 1]), digits_predictions],
                axis=1)
        else:
            labels = ys2
            predictions = digits_predictions

        labels_string = tf.reduce_join(tf.as_string(labels), axis=1)
        predictions_string = tf.reduce_join(tf.as_string(predictions), axis=1)

        accuracy, update_accuracy = tf.metrics.accuracy(
            labels=labels_string, predictions=predictions_string)

        tf.summary.image('image', xs)
        tf.summary.scalar('accuracy', accuracy)
        tf.summary.histogram(
            'variables',
            tf.concat(
                [tf.reshape(var, [-1]) for var in tf.trainable_variables()],
                axis=0))
        summary = tf.summary.merge_all()

        with tf.Session() as sess:
            sess.run([
                tf.global_variables_initializer(),
                tf.local_variables_initializer()
            ])
            coord = tf.train.Coordinator()
            threads = tf.train.start_queue_runners(sess=sess, coord=coord)

            restorer = tf.train.Saver()
            restorer.restore(sess, path_to_checkpoint)

            for _ in range(num_batches):
                image_batch, label = ds.build_batch(val_data,
                                                    val_labels,
                                                    batch_size,
                                                    is_train=False,
                                                    shuffle=False)
                length_batch = label[:, 0]
                digits_batch = label[:, 1:6]

                acc, update = sess.run([accuracy, update_accuracy],
                                       feed_dict={
                                           xs: image_batch,
                                           ys1: length_batch,
                                           ys2: digits_batch
                                       })
                #print (acc, update)
            #summary_writer = tf.summary.FileWriter('log/eval')
            #accuracy_val, summary_val = sess.run([accuracy, summary])
            #summary_writer.add_summary(summary_val, global_step=global_step)

            coord.request_stop()
            coord.join(threads)

    return acc
Exemple #4
0
def my_training(ds,
                train_data,
                train_labels,
                val_data,
                val_labels,
                num_train,
                num_val,
                conv_featmap=[48, 64, 128, 160, 192],
                fc_units=[84],
                conv_kernel_size=[[5, 5], [2, 2]],
                pooling_size=[2],
                l2_norm=0.015,
                learning_rate=1e-2,
                batch_size=32,
                decay=0.9,
                dropout=0.3,
                verbose=False,
                pre_trained_model=None):
    print("Building my SVHN_CNN. Parameters: ")
    print("conv_featmap={}".format(conv_featmap))
    print("fc_units={}".format(fc_units))
    print("conv_kernel_size={}".format(conv_kernel_size))
    print("pooling_size={}".format(pooling_size))
    print("l2_norm={}".format(l2_norm))
    print("learning_rate={}".format(learning_rate))
    #print("decay={}").format(decay)
    #print("dropout").format(dropout)
    sess = tf.Session(config=tf.ConfigProto(log_device_placement=True))
    #ds = du.dataset()
    #train_data, test_data, train_labels, test_labels = ds.load_image([54,54])

    with tf.Graph().as_default():
        '''
        image_batch, length_batch, digits_batch = Donkey.build_batch(train_data,
                                                                     num_examples=num_train,
                                                                     batch_size=batch_size,
                                                                     shuffled=True)
        '''
        #print (train_data.shape)
        with tf.name_scope('inputs'):
            xs = tf.placeholder(shape=[None, 54, 54, 3], dtype=tf.float32)
            ys1 = tf.placeholder(shape=[
                None,
            ], dtype=tf.int32)
            ys2 = tf.placeholder(shape=[None, 5], dtype=tf.int32)
        '''
        image_batch, label = ds.build_batch(train_data, train_labels, batch_size, is_train=True, shuffle=False)
        length_batch = label[:, 0]
        digits_batch = label[:, 1:6]
	
        print(ds.idx_train)
        image_batch = tf.convert_to_tensor(image_batch, dtype=tf.float32)
        length_batch = tf.convert_to_tensor(length_batch, dtype=tf.int32)
        digits_batch = tf.convert_to_tensor(digits_batch, dtype=tf.int32)	
        '''
        length_logtis, digits_logits = Model.layers(xs, drop_rate=0.2)
        loss = Model.loss(length_logtis, digits_logits, ys1, ys2)

        global_step = tf.Variable(0, name='global_step', trainable=False)
        learning_rate = tf.train.exponential_decay(learning_rate,
                                                   global_step=global_step,
                                                   decay_steps=10000,
                                                   decay_rate=decay,
                                                   staircase=True)

        optimizer = tf.train.GradientDescentOptimizer(learning_rate).minimize(
            loss, global_step=global_step)
        #train_op

        #tf.summary.image('image', image_batch)
        tf.summary.scalar('SVHN_loss', loss)
        tf.summary.scalar('learning_rate', learning_rate)

        cur_model_name = 'SVHN_CNN_{}'.format(int(time.time()))

        with tf.Session() as sess:
            merge = tf.summary.merge_all()
            writer = tf.summary.FileWriter("log/{}".format(cur_model_name),
                                           sess.graph)
            saver = tf.train.Saver()
            sess.run(tf.global_variables_initializer())
            coord = tf.train.Coordinator()
            threads = tf.train.start_queue_runners(sess=sess, coord=coord)

            if pre_trained_model is not None:
                try:

                    print("Load the model from: {}".format(pre_trained_model))
                    saver.restore(sess, 'model/{}'.format(pre_trained_model))
                except Exception:
                    print("Load model Failed!")
                    pass

            print('Start training')
            init_tolerance = 100
            best_acc = 0.0
            duration = 0.0

            while True:
                start_time = time.time()
                image_batch, label = ds.build_batch(train_data,
                                                    train_labels,
                                                    batch_size,
                                                    is_train=True,
                                                    shuffle=True)
                length_batch = label[:, 0]
                digits_batch = label[:, 1:6]

                #print(ds.idx_train)
                #image_batch = tf.convert_to_tensor(image_batch, dtype=tf.float32)
                #length_batch = tf.convert_to_tensor(length_batch, dtype=tf.int32)
                #digits_batch = tf.convert_to_tensor(digits_batch, dtype=tf.int32)
                _, loss_train, summary_train, global_step_train, learning_rate_train = sess.run(
                    [optimizer, loss, merge, global_step, learning_rate],
                    feed_dict={
                        xs: image_batch,
                        ys1: length_batch,
                        ys2: digits_batch
                    })
                duration += time.time() - start_time

                if global_step_train % 100 == 0:

                    duration = 0.0
                    print('%s: iter_total %d, loss = %f' %
                          (datetime.now(), global_step_train, loss_train))

                if global_step_train % 1000 == 0:

                    writer.add_summary(summary_train,
                                       global_step=global_step_train)

                    checkoutfile = saver.save(
                        sess, os.path.join('model/', 'latest.ckpt'))
                    accuracy = evaluate(checkoutfile, ds, val_data, val_labels,
                                        num_val, global_step_train)
                    print('accuracy = %f' % (accuracy))

                    if accuracy > best_acc:
                        modelfile = saver.save(sess,
                                               os.path.join(
                                                   'model/', 'model.ckpt'),
                                               global_step=global_step_train)
                        print('Best validation accuracy!' + modelfile)
                        tolerance = init_tolerance
                        best_acc = accuracy
                    else:
                        tolerance -= 1

                    print('remaining tolerance = %d' % tolerance)
                    if tolerance == 0:
                        break

            coord.request_stop()
            coord.join(threads)
            print("Traning ends. The best valid accuracy is {}.".format(
                best_acc))
Exemple #5
0
X_test = torch.from_numpy(X_test).float().to(device)
Y_test = torch.from_numpy(Y_test).float().to(device)
print("X_test.size: ", X_test.size())
print("Y_test.size: ", Y_test.size())

# Data Loader
def data_loader(tensor_X, tensor_Y, batch_size, shuffle):
    dataset = torch.utils.data.TensorDataset(tensor_X, tensor_Y)
    data_loader = torch.utils.data.DataLoader(dataset, batch_size=batch_size, shuffle=shuffle)
    return data_loader
train_loader1 = data_loader(X_train, Y_train, batch_size, shuffle=True)
test_loader1 = data_loader(X_test, Y_test, batch_size, shuffle=False)

# model
model = Model().to(device)
#emb_f13 = FC(X_train[:23].size(1)).to(device)
#emb_f2 = GCN(X_train[23:-3].size(1)/6, 6).to(device)
#emb_f2

# Loss and optimizer
criterion = nn.BCELoss() #nn.MSELoss() #
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)

# Training
total_step = len(train_loader1)
best_acc = 0
for epoch in range(num_epochs):
    start_time = time.time()
    for i, (X_mini, Y_mini) in enumerate(train_loader1):
        # Forward pass
Exemple #6
0
def train(tf_seed, np_seed, train_steps, finetune_train_steps, out_steps,
          summary_steps, checkpoint_steps, step_size_schedule, weight_decay,
          momentum, train_batch_size, epsilon, replay_m, model_dir,
          source_model_dir, dataset, beta, gamma, disc_update_steps,
          adv_update_steps_per_iter, disc_layers, disc_base_channels,
          steps_before_adv_opt, adv_encoder_type, enc_output_activation,
          sep_opt_version, grad_image_ratio, final_grad_image_ratio,
          num_grad_image_ratios, normalize_zero_mean, eval_adv_attack,
          same_optimizer, only_fully_connected, finetuned_source_model_dir,
          train_finetune_source_model, finetune_img_random_pert,
          img_random_pert, only_finetune, finetune_whole_model, model_suffix,
          **kwargs):
    tf.set_random_seed(tf_seed)
    np.random.seed(np_seed)

    model_dir = model_dir + 'IGAM-%s_b%d_beta_%.3f_gamma_%.3f_disc_update_steps%d_l%dbc%d' % (
        dataset, train_batch_size, beta, gamma, disc_update_steps, disc_layers,
        disc_base_channels)  # TODO Replace with not defaults

    if img_random_pert:
        model_dir = model_dir + '_imgpert'

    if steps_before_adv_opt != 0:
        model_dir = model_dir + '_advdelay%d' % (steps_before_adv_opt)

    if train_steps != 80000:
        model_dir = model_dir + '_%dsteps' % (train_steps)
    if same_optimizer == False:
        model_dir = model_dir + '_adamDopt'

    if tf_seed != 451760341:
        model_dir = model_dir + '_tf_seed%d' % (tf_seed)
    if np_seed != 216105420:
        model_dir = model_dir + '_np_seed%d' % (np_seed)
    model_dir = model_dir + model_suffix

    # Setting up the data and the model
    data_path = get_path_dir(dataset=dataset, **kwargs)
    if dataset == 'cifar10':
        raw_data = cifar10_input.CIFAR10Data(data_path)
    else:
        raw_data = cifar100_input.CIFAR100Data(data_path)

    global_step = tf.train.get_or_create_global_step()
    increment_global_step_op = tf.assign(global_step, global_step + 1)
    reset_global_step_op = tf.assign(global_step, 0)
    source_model = ModelExtendedLogits(mode='train',
                                       target_task_class_num=100,
                                       train_batch_size=train_batch_size)
    model = Model(mode='train',
                  dataset=dataset,
                  train_batch_size=train_batch_size,
                  normalize_zero_mean=normalize_zero_mean)

    # Setting up the optimizers
    boundaries = [int(sss[0]) for sss in step_size_schedule][1:]
    values = [sss[1] for sss in step_size_schedule]
    learning_rate = tf.train.piecewise_constant(tf.cast(global_step, tf.int32),
                                                boundaries, values)
    c_optimizer = tf.train.MomentumOptimizer(learning_rate, momentum)
    finetune_optimizer = tf.train.AdamOptimizer(learning_rate=0.001)

    if same_optimizer:
        d_optimizer = tf.train.MomentumOptimizer(learning_rate, momentum)
    else:
        print("Using ADAM opt for DISC model")
        d_optimizer = tf.train.AdamOptimizer(learning_rate=0.001)

    # Compute input gradient (saliency map)
    input_grad = tf.gradients(model.target_softmax,
                              model.x_input,
                              name="gradients_ig")[0]
    source_model_input_grad = tf.gradients(source_model.target_softmax,
                                           source_model.x_input,
                                           name="gradients_ig_source_model")[0]

    # lp norm diff between input_grad & source_model_input_grad
    input_grad_l2_norm_diff = tf.reduce_mean(
        tf.reduce_sum(tf.pow(tf.subtract(input_grad, source_model_input_grad),
                             2.0),
                      keepdims=True))

    # Setting up the discriminator model
    labels_input_grad = tf.zeros(tf.shape(input_grad)[0], dtype=tf.int64)
    labels_source_model_input_grad = tf.ones(tf.shape(input_grad)[0],
                                             dtype=tf.int64)
    disc_model = IgamConvDiscriminatorModel(
        mode='train',
        dataset=dataset,
        train_batch_size=train_batch_size,
        num_conv_layers=disc_layers,
        base_num_channels=disc_base_channels,
        normalize_zero_mean=normalize_zero_mean,
        x_modelgrad_input_tensor=input_grad,
        y_modelgrad_input_tensor=labels_input_grad,
        x_source_modelgrad_input_tensor=source_model_input_grad,
        y_source_modelgrad_input_tensor=labels_source_model_input_grad,
        only_fully_connected=only_fully_connected)

    t_vars = tf.trainable_variables()
    C_vars = [var for var in t_vars if 'classifier' in var.name]
    D_vars = [var for var in t_vars if 'discriminator' in var.name]
    source_model_vars = [
        var for var in t_vars
        if ('discriminator' not in var.name and 'classifier' not in var.name
            and 'target_task_logit' not in var.name)
    ]
    source_model_target_logit_vars = [
        var for var in t_vars if 'target_task_logit' in var.name
    ]

    source_model_saver = tf.train.Saver(var_list=source_model_vars)
    finetuned_source_model_vars = source_model_vars + source_model_target_logit_vars
    finetuned_source_model_saver = tf.train.Saver(
        var_list=finetuned_source_model_vars)

    # Source model finetune optimization
    source_model_finetune_loss = source_model.target_task_mean_xent + weight_decay * source_model.weight_decay_loss
    total_loss = model.mean_xent + weight_decay * model.weight_decay_loss - beta * disc_model.mean_xent + gamma * input_grad_l2_norm_diff

    classification_c_loss = model.mean_xent + weight_decay * model.weight_decay_loss
    adv_c_loss = -beta * disc_model.mean_xent

    # Discriminator: Optimizating computation
    # discriminator loss
    total_d_loss = disc_model.mean_xent + weight_decay * disc_model.weight_decay_loss

    # Finetune source_model
    if finetune_whole_model:
        finetune_min_step = finetune_optimizer.minimize(
            source_model_finetune_loss, var_list=finetuned_source_model_vars)
    else:
        finetune_min_step = finetune_optimizer.minimize(
            source_model_finetune_loss,
            var_list=source_model_target_logit_vars)
    # Train classifier
    # classifier opt step
    final_grads = c_optimizer.compute_gradients(total_loss, var_list=C_vars)
    no_pert_grad = [(tf.zeros_like(v), v) if 'perturbation' in v.name else
                    (g, v) for g, v in final_grads]
    c_min_step = c_optimizer.apply_gradients(no_pert_grad)
    # c_min_step = c_optimizer.minimize(total_loss, var_list=C_vars)

    classification_final_grads = c_optimizer.compute_gradients(
        classification_c_loss, var_list=C_vars)
    classification_no_pert_grad = [(tf.zeros_like(v),
                                    v) if 'perturbation' in v.name else (g, v)
                                   for g, v in classification_final_grads]
    c_classification_min_step = c_optimizer.apply_gradients(
        classification_no_pert_grad)

    # discriminator opt step
    d_min_step = d_optimizer.minimize(total_d_loss, var_list=D_vars)

    # Loss gradients to the model params
    logit_weights = tf.get_default_graph().get_tensor_by_name(
        'classifier/logit/DW:0')
    last_conv_weights = tf.get_default_graph().get_tensor_by_name(
        'classifier/unit_3_4/sub2/conv2/DW:0')
    first_conv_weights = tf.get_default_graph().get_tensor_by_name(
        'classifier/input/init_conv/DW:0')

    model_xent_logit_grad_norm = tf.norm(tf.gradients(model.mean_xent,
                                                      logit_weights)[0],
                                         ord='euclidean')
    disc_xent_logit_grad_norm = tf.norm(tf.gradients(disc_model.mean_xent,
                                                     logit_weights)[0],
                                        ord='euclidean')
    input_grad_l2_norm_diff_logit_grad_norm = tf.norm(tf.gradients(
        input_grad_l2_norm_diff, logit_weights)[0],
                                                      ord='euclidean')

    model_xent_last_conv_grad_norm = tf.norm(tf.gradients(
        model.mean_xent, last_conv_weights)[0],
                                             ord='euclidean')
    disc_xent_last_conv_grad_norm = tf.norm(tf.gradients(
        disc_model.mean_xent, last_conv_weights)[0],
                                            ord='euclidean')
    input_grad_l2_norm_diff_last_conv_grad_norm = tf.norm(tf.gradients(
        input_grad_l2_norm_diff, last_conv_weights)[0],
                                                          ord='euclidean')
    model_xent_first_conv_grad_norm = tf.norm(tf.gradients(
        model.mean_xent, first_conv_weights)[0],
                                              ord='euclidean')
    disc_xent_first_conv_grad_norm = tf.norm(tf.gradients(
        disc_model.mean_xent, first_conv_weights)[0],
                                             ord='euclidean')
    input_grad_l2_norm_diff_first_conv_grad_norm = tf.norm(tf.gradients(
        input_grad_l2_norm_diff, first_conv_weights)[0],
                                                           ord='euclidean')

    # Setting up the Tensorboard and checkpoint outputs
    if not os.path.exists(model_dir):
        os.makedirs(model_dir)

    saver = tf.train.Saver(max_to_keep=1)
    tf.summary.scalar('C accuracy', model.accuracy)
    tf.summary.scalar('D accuracy', disc_model.accuracy)
    tf.summary.scalar('C xent', model.xent / train_batch_size)
    tf.summary.scalar('D xent', disc_model.xent / train_batch_size)
    tf.summary.scalar('total C loss', total_loss / train_batch_size)
    tf.summary.scalar('total D loss', total_d_loss / train_batch_size)
    tf.summary.scalar('adv C loss', adv_c_loss / train_batch_size)
    tf.summary.scalar('C cls xent loss', model.mean_xent)
    tf.summary.scalar('D xent loss', disc_model.mean_xent)
    # Loss gradients
    tf.summary.scalar('model_xent_logit_grad_norm', model_xent_logit_grad_norm)
    tf.summary.scalar('disc_xent_logit_grad_norm', disc_xent_logit_grad_norm)
    tf.summary.scalar('input_grad_l2_norm_diff_logit_grad_norm',
                      input_grad_l2_norm_diff_logit_grad_norm)
    tf.summary.scalar('model_xent_last_conv_grad_norm',
                      model_xent_last_conv_grad_norm)
    tf.summary.scalar('disc_xent_last_conv_grad_norm',
                      disc_xent_last_conv_grad_norm)
    tf.summary.scalar('input_grad_l2_norm_diff_last_conv_grad_norm',
                      input_grad_l2_norm_diff_last_conv_grad_norm)
    tf.summary.scalar('model_xent_first_conv_grad_norm',
                      model_xent_first_conv_grad_norm)
    tf.summary.scalar('disc_xent_first_conv_grad_norm',
                      disc_xent_first_conv_grad_norm)
    tf.summary.scalar('input_grad_l2_norm_diff_first_conv_grad_norm',
                      input_grad_l2_norm_diff_first_conv_grad_norm)
    merged_summaries = tf.summary.merge_all()

    with tf.Session() as sess:
        print(
            'important params >>> \n model dir: %s \n dataset: %s \n training batch size: %d \n'
            % (model_dir, dataset, train_batch_size))
        # initialize data augmentation
        if dataset == 'cifar10':
            data = cifar10_input.AugmentedCIFAR10Data(raw_data, sess, model)
        else:
            data = cifar100_input.AugmentedCIFAR100Data(raw_data, sess, model)

        # Initialize the summary writer, global variables, and our time counter.
        summary_writer = tf.summary.FileWriter(model_dir + '/train',
                                               sess.graph)
        eval_summary_writer = tf.summary.FileWriter(model_dir + '/eval')
        sess.run(tf.global_variables_initializer())

        # Restore source model
        source_model_file = tf.train.latest_checkpoint(source_model_dir)
        source_model_saver.restore(sess, source_model_file)

        # Finetune source model here
        if train_finetune_source_model:
            time_before_finetuning = datetime.now()
            for ii in tqdm(range(finetune_train_steps)):
                x_batch, y_batch = data.train_data.get_next_batch(
                    train_batch_size, multiple_passes=True)
                if finetune_img_random_pert:
                    x_batch = x_batch + np.random.uniform(
                        -epsilon, epsilon, x_batch.shape)
                    x_batch = np.clip(x_batch, 0,
                                      255)  # ensure valid pixel range

                nat_dict = {
                    source_model.x_input: x_batch,
                    source_model.y_input: y_batch
                }

                # Output to stdout
                if ii % summary_steps == 0:
                    train_finetune_acc, train_finetune_loss = sess.run(
                        [
                            source_model.target_task_accuracy,
                            source_model_finetune_loss
                        ],
                        feed_dict=nat_dict)

                    x_eval_batch, y_eval_batch = data.eval_data.get_next_batch(
                        train_batch_size, multiple_passes=True)
                    if img_random_pert:
                        x_eval_batch = x_eval_batch + np.random.uniform(
                            -epsilon, epsilon, x_eval_batch.shape)
                        x_eval_batch = np.clip(x_eval_batch, 0,
                                               255)  # ensure valid pixel range

                    eval_dict = {
                        source_model.x_input: x_eval_batch,
                        source_model.y_input: y_eval_batch
                    }
                    val_finetune_acc, val_finetune_loss = sess.run(
                        [
                            source_model.target_task_accuracy,
                            source_model_finetune_loss
                        ],
                        feed_dict=eval_dict)
                    print('Source Model Finetune Step {}:    ({})'.format(
                        ii, datetime.now()))
                    print(
                        '    training nat accuracy {:.4}% -- validation nat accuracy {:.4}%'
                        .format(train_finetune_acc * 100,
                                val_finetune_acc * 100))
                    print('    training nat c loss: {}'.format(
                        train_finetune_loss))
                    print('    validation nat c loss: {}'.format(
                        val_finetune_loss))

                    sys.stdout.flush()

                sess.run(finetune_min_step, feed_dict=nat_dict)
                sess.run(increment_global_step_op)

            time_after_finetuning = datetime.now()
            finetuning_time = time_after_finetuning - time_before_finetuning

            finetuning_time_file_path = os.path.join(model_dir,
                                                     'finetuning_time.txt')
            with open(finetuning_time_file_path, "w") as f:
                f.write("Total finetuning time: {}".format(
                    str(finetuning_time)))
            print("Total finetuning time: {}".format(str(finetuning_time)))

            finetuned_source_model_saver.save(sess,
                                              os.path.join(
                                                  finetuned_source_model_dir,
                                                  'checkpoint'),
                                              global_step=global_step)
            if only_finetune:
                return
        else:
            finetuned_source_model_file = tf.train.latest_checkpoint(
                finetuned_source_model_dir)
            finetuned_source_model_saver.restore(sess,
                                                 finetuned_source_model_file)

        # reset global step to 0 before running main training loop
        sess.run(reset_global_step_op)

        time_before_training = datetime.now()
        # Main training loop
        for ii in tqdm(range(train_steps)):
            x_batch, y_batch = data.train_data.get_next_batch(
                train_batch_size, multiple_passes=True)
            if img_random_pert:
                x_batch = x_batch + np.random.uniform(-epsilon, epsilon,
                                                      x_batch.shape)
                x_batch = np.clip(x_batch, 0, 255)  # ensure valid pixel range

            labels_source_modelgrad_disc = np.ones_like(y_batch,
                                                        dtype=np.int64)
            # Sample randinit input grads
            nat_dict = {
                model.x_input: x_batch,
                model.y_input: y_batch,
                source_model.x_input: x_batch,
                source_model.y_input: y_batch
            }

            # Output to stdout
            if ii % summary_steps == 0:
                train_acc, train_disc_acc, train_c_loss, train_d_loss, train_adv_c_loss, summary = sess.run(
                    [
                        model.accuracy, disc_model.accuracy, total_loss,
                        total_d_loss, adv_c_loss, merged_summaries
                    ],
                    feed_dict=nat_dict)
                summary_writer.add_summary(summary, global_step.eval(sess))

                x_eval_batch, y_eval_batch = data.eval_data.get_next_batch(
                    train_batch_size, multiple_passes=True)
                if img_random_pert:
                    x_eval_batch = x_eval_batch + np.random.uniform(
                        -epsilon, epsilon, x_eval_batch.shape)
                    x_eval_batch = np.clip(x_eval_batch, 0,
                                           255)  # ensure valid pixel range

                labels_source_modelgrad_disc = np.ones_like(y_eval_batch,
                                                            dtype=np.int64)
                eval_dict = {
                    model.x_input: x_eval_batch,
                    model.y_input: y_eval_batch,
                    source_model.x_input: x_eval_batch,
                    source_model.y_input: y_eval_batch
                }
                val_acc, val_disc_acc, val_c_loss, val_d_loss, val_adv_c_loss, summary = sess.run(
                    [
                        model.accuracy, disc_model.accuracy, total_loss,
                        total_d_loss, adv_c_loss, merged_summaries
                    ],
                    feed_dict=eval_dict)
                eval_summary_writer.add_summary(summary,
                                                global_step.eval(sess))
                print('Step {}:    ({})'.format(ii, datetime.now()))
                print(
                    '    training nat accuracy {:.4}% -- validation nat accuracy {:.4}%'
                    .format(train_acc * 100, val_acc * 100))
                print(
                    '    training nat disc accuracy {:.4}% -- validation nat disc accuracy {:.4}%'
                    .format(train_disc_acc * 100, val_disc_acc * 100))
                print(
                    '    training nat c loss: {},     d loss: {},     adv c loss: {}'
                    .format(train_c_loss, train_d_loss, train_adv_c_loss))
                print(
                    '    validation nat c loss: {},     d loss: {},     adv c loss: {}'
                    .format(val_c_loss, val_d_loss, val_adv_c_loss))

                sys.stdout.flush()
            # Tensorboard summaries
            elif ii % out_steps == 0:
                nat_acc, nat_disc_acc, nat_c_loss, nat_d_loss, nat_adv_c_loss = sess.run(
                    [
                        model.accuracy, disc_model.accuracy, total_loss,
                        total_d_loss, adv_c_loss
                    ],
                    feed_dict=nat_dict)
                print('Step {}:    ({})'.format(ii, datetime.now()))
                print('    training nat accuracy {:.4}%'.format(nat_acc * 100))
                print('    training nat disc accuracy {:.4}%'.format(
                    nat_disc_acc * 100))
                print(
                    '    training nat c loss: {},     d loss: {},      adv c loss: {}'
                    .format(nat_c_loss, nat_d_loss, nat_adv_c_loss))

            # Write a checkpoint
            if (ii + 1) % checkpoint_steps == 0:
                saver.save(sess,
                           os.path.join(model_dir, 'checkpoint'),
                           global_step=global_step)

            # default mode
            if sep_opt_version == 1:
                if ii >= steps_before_adv_opt:
                    # Actual training step for Classifier
                    sess.run(c_min_step, feed_dict=nat_dict)
                    sess.run(increment_global_step_op)

                    if ii % disc_update_steps == 0:
                        # Actual training step for Discriminator
                        sess.run(d_min_step, feed_dict=nat_dict)
                else:
                    # only train on classification loss
                    sess.run(c_classification_min_step, feed_dict=nat_dict)
                    sess.run(increment_global_step_op)

            elif sep_opt_version == 2:
                # Actual training step for Classifier
                if ii >= steps_before_adv_opt:
                    if adv_update_steps_per_iter > 1:
                        sess.run(c_classification_min_step, feed_dict=nat_dict)
                        sess.run(increment_global_step_op)
                        for i in range(adv_update_steps_per_iter):
                            x_batch, y_batch = data.train_data.get_next_batch(
                                train_batch_size, multiple_passes=True)
                            if img_random_pert:
                                x_batch = x_batch + np.random.uniform(
                                    -epsilon, epsilon, x_batch.shape)
                                x_batch = np.clip(
                                    x_batch, 0,
                                    255)  # ensure valid pixel range

                            nat_dict = {
                                model.x_input: x_batch,
                                model.y_input: y_batch,
                                source_model.x_input: x_batch,
                                source_model.y_input: y_batch
                            }

                            sess.run(c_adv_min_step, feed_dict=nat_dict)
                    else:
                        sess.run(c_min_step, feed_dict=nat_dict)
                        sess.run(increment_global_step_op)

                    if ii % disc_update_steps == 0:
                        # Actual training step for Discriminator
                        sess.run(d_min_step, feed_dict=nat_dict)
                else:
                    # only train on classification loss
                    sess.run(c_classification_min_step, feed_dict=nat_dict)
                    sess.run(increment_global_step_op)
            elif sep_opt_version == 0:
                if ii >= steps_before_adv_opt:
                    if ii % disc_update_steps == 0:
                        sess.run([c_min_step, d_min_step], feed_dict=nat_dict)
                        sess.run(increment_global_step_op)
                    else:
                        sess.run(c_min_step, feed_dict=nat_dict)
                        sess.run(increment_global_step_op)
                else:
                    sess.run(c_classification_min_step, feed_dict=nat_dict)
                    sess.run(increment_global_step_op)

        time_after_training = datetime.now()
        training_time = time_after_training - time_before_training

        training_time_file_path = os.path.join(model_dir, 'training_time.txt')
        with open(training_time_file_path, "w") as f:
            f.write("Total Training time: {}".format(str(training_time)))
        print("Total Training time: {}".format(str(training_time)))

        # full test evaluation
        if dataset == 'cifar10':
            raw_data = cifar10_input.CIFAR10Data(data_path)
        else:
            raw_data = cifar100_input.CIFAR100Data(data_path)
        data_size = raw_data.eval_data.n
        if data_size % train_batch_size == 0:
            eval_steps = data_size // train_batch_size
        else:
            eval_steps = data_size // train_batch_size
            # eval_steps = data_size // train_batch_size + 1
        total_num_correct = 0
        for ii in tqdm(range(eval_steps)):
            x_eval_batch, y_eval_batch = raw_data.eval_data.get_next_batch(
                train_batch_size, multiple_passes=False)
            eval_dict = {
                model.x_input: x_eval_batch,
                model.y_input: y_eval_batch
            }
            num_correct = sess.run(model.num_correct, feed_dict=eval_dict)
            total_num_correct += num_correct
        eval_acc = total_num_correct / data_size

        clean_eval_file_path = os.path.join(model_dir,
                                            'full_clean_eval_acc.txt')
        with open(clean_eval_file_path, "a+") as f:
            f.write("Full clean eval_acc: {}%".format(eval_acc * 100))
        print("Full clean eval_acc: {}%".format(eval_acc * 100))

        devices = sess.list_devices()
        for d in devices:
            print("sess' device names:")
            print(d.name)

    return model_dir