def create_model(slot_file, slot_common_file, all_slot_file): join_common_model = ModelJoinCommon(slot_file, slot_common_file, all_slot_file, 20) update_model = Model(slot_file, all_slot_file, False, 0, True) with open("join_common_main_program.pbtxt", "w") as fout: print >> fout, join_common_model._train_program with open("join_common_startup_program.pbtxt", "w") as fout: print >> fout, join_common_model._startup_program with open("update_main_program.pbtxt", "w") as fout: print >> fout, update_model._train_program with open("update_startup_program.pbtxt", "w") as fout: print >> fout, update_model._startup_program return [join_common_model, update_model]
def create_model(sess, config, cate_list, action_list): # print(json.dumps(config,indent=4),flush=True) model = Model(config, cate_list, action_list) print('All global variables:') for v in tf.global_variables(): if v not in tf.trainable_variables(): print('\t',v) else: print('\t',v,'trainable') ckpt = tf.train.get_checkpoint_state(FLAGS.model_dir) if ckpt and tf.train.checkpoint_exists(ckpt.model_checkpoint_path): print('Reloading model parameters.....',flush=True) model.restore(sess,ckpt.model_checkpoint_path) else: if not os.path.exists(FLAGS.model_dir): os.makedirs(FLAGS.model_dir) print('Created new model parameters....',flush=True) sess.run(tf.global_variables_initializer()) sess.run(tf.local_variables_initializer()) return model
def evaluate(path_to_checkpoint, ds, val_data, val_labels, num_examples, global_step): batch_size = 128 num_batches = num_examples // batch_size needs_include_length = False with tf.Graph().as_default(): ''' image_batch, length_batch, digits_batch = Donkey.build_batch(path_to_tfrecords_file, num_examples=num_examples, batch_size=batch_size, shuffled=False) length_logits, digits_logits = Model.layers(image_batch, drop_rate=0.0) ''' with tf.name_scope('test_inputs'): xs = tf.placeholder(shape=[None, 54, 54, 3], dtype=tf.float32) ys1 = tf.placeholder(shape=[ None, ], dtype=tf.int32) ys2 = tf.placeholder(shape=[None, 5], dtype=tf.int32) ''' image_batch, label = ds.build_batch(val_data, val_labels, batch_size, is_train=False, shuffle=False) length_batch = label[:, 0] digits_batch = label[:, 1:6] image_batch = tf.convert_to_tensor(image_batch, dtype=tf.float32) length_batch = tf.convert_to_tensor(length_batch, dtype=tf.int32) digits_batch = tf.convert_to_tensor(digits_batch, dtype=tf.int32) ''' length_logits, digits_logits = Model.layers(xs, drop_rate=0.3) length_predictions = tf.argmax(length_logits, axis=1) digits_predictions = tf.argmax(digits_logits, axis=2) if needs_include_length: labels = tf.concat([tf.reshape(ys1, [-1, 1]), ys2], axis=1) predictions = tf.concat( [tf.reshape(length_predictions, [-1, 1]), digits_predictions], axis=1) else: labels = ys2 predictions = digits_predictions labels_string = tf.reduce_join(tf.as_string(labels), axis=1) predictions_string = tf.reduce_join(tf.as_string(predictions), axis=1) accuracy, update_accuracy = tf.metrics.accuracy( labels=labels_string, predictions=predictions_string) tf.summary.image('image', xs) tf.summary.scalar('accuracy', accuracy) tf.summary.histogram( 'variables', tf.concat( [tf.reshape(var, [-1]) for var in tf.trainable_variables()], axis=0)) summary = tf.summary.merge_all() with tf.Session() as sess: sess.run([ tf.global_variables_initializer(), tf.local_variables_initializer() ]) coord = tf.train.Coordinator() threads = tf.train.start_queue_runners(sess=sess, coord=coord) restorer = tf.train.Saver() restorer.restore(sess, path_to_checkpoint) for _ in range(num_batches): image_batch, label = ds.build_batch(val_data, val_labels, batch_size, is_train=False, shuffle=False) length_batch = label[:, 0] digits_batch = label[:, 1:6] acc, update = sess.run([accuracy, update_accuracy], feed_dict={ xs: image_batch, ys1: length_batch, ys2: digits_batch }) #print (acc, update) #summary_writer = tf.summary.FileWriter('log/eval') #accuracy_val, summary_val = sess.run([accuracy, summary]) #summary_writer.add_summary(summary_val, global_step=global_step) coord.request_stop() coord.join(threads) return acc
def my_training(ds, train_data, train_labels, val_data, val_labels, num_train, num_val, conv_featmap=[48, 64, 128, 160, 192], fc_units=[84], conv_kernel_size=[[5, 5], [2, 2]], pooling_size=[2], l2_norm=0.015, learning_rate=1e-2, batch_size=32, decay=0.9, dropout=0.3, verbose=False, pre_trained_model=None): print("Building my SVHN_CNN. Parameters: ") print("conv_featmap={}".format(conv_featmap)) print("fc_units={}".format(fc_units)) print("conv_kernel_size={}".format(conv_kernel_size)) print("pooling_size={}".format(pooling_size)) print("l2_norm={}".format(l2_norm)) print("learning_rate={}".format(learning_rate)) #print("decay={}").format(decay) #print("dropout").format(dropout) sess = tf.Session(config=tf.ConfigProto(log_device_placement=True)) #ds = du.dataset() #train_data, test_data, train_labels, test_labels = ds.load_image([54,54]) with tf.Graph().as_default(): ''' image_batch, length_batch, digits_batch = Donkey.build_batch(train_data, num_examples=num_train, batch_size=batch_size, shuffled=True) ''' #print (train_data.shape) with tf.name_scope('inputs'): xs = tf.placeholder(shape=[None, 54, 54, 3], dtype=tf.float32) ys1 = tf.placeholder(shape=[ None, ], dtype=tf.int32) ys2 = tf.placeholder(shape=[None, 5], dtype=tf.int32) ''' image_batch, label = ds.build_batch(train_data, train_labels, batch_size, is_train=True, shuffle=False) length_batch = label[:, 0] digits_batch = label[:, 1:6] print(ds.idx_train) image_batch = tf.convert_to_tensor(image_batch, dtype=tf.float32) length_batch = tf.convert_to_tensor(length_batch, dtype=tf.int32) digits_batch = tf.convert_to_tensor(digits_batch, dtype=tf.int32) ''' length_logtis, digits_logits = Model.layers(xs, drop_rate=0.2) loss = Model.loss(length_logtis, digits_logits, ys1, ys2) global_step = tf.Variable(0, name='global_step', trainable=False) learning_rate = tf.train.exponential_decay(learning_rate, global_step=global_step, decay_steps=10000, decay_rate=decay, staircase=True) optimizer = tf.train.GradientDescentOptimizer(learning_rate).minimize( loss, global_step=global_step) #train_op #tf.summary.image('image', image_batch) tf.summary.scalar('SVHN_loss', loss) tf.summary.scalar('learning_rate', learning_rate) cur_model_name = 'SVHN_CNN_{}'.format(int(time.time())) with tf.Session() as sess: merge = tf.summary.merge_all() writer = tf.summary.FileWriter("log/{}".format(cur_model_name), sess.graph) saver = tf.train.Saver() sess.run(tf.global_variables_initializer()) coord = tf.train.Coordinator() threads = tf.train.start_queue_runners(sess=sess, coord=coord) if pre_trained_model is not None: try: print("Load the model from: {}".format(pre_trained_model)) saver.restore(sess, 'model/{}'.format(pre_trained_model)) except Exception: print("Load model Failed!") pass print('Start training') init_tolerance = 100 best_acc = 0.0 duration = 0.0 while True: start_time = time.time() image_batch, label = ds.build_batch(train_data, train_labels, batch_size, is_train=True, shuffle=True) length_batch = label[:, 0] digits_batch = label[:, 1:6] #print(ds.idx_train) #image_batch = tf.convert_to_tensor(image_batch, dtype=tf.float32) #length_batch = tf.convert_to_tensor(length_batch, dtype=tf.int32) #digits_batch = tf.convert_to_tensor(digits_batch, dtype=tf.int32) _, loss_train, summary_train, global_step_train, learning_rate_train = sess.run( [optimizer, loss, merge, global_step, learning_rate], feed_dict={ xs: image_batch, ys1: length_batch, ys2: digits_batch }) duration += time.time() - start_time if global_step_train % 100 == 0: duration = 0.0 print('%s: iter_total %d, loss = %f' % (datetime.now(), global_step_train, loss_train)) if global_step_train % 1000 == 0: writer.add_summary(summary_train, global_step=global_step_train) checkoutfile = saver.save( sess, os.path.join('model/', 'latest.ckpt')) accuracy = evaluate(checkoutfile, ds, val_data, val_labels, num_val, global_step_train) print('accuracy = %f' % (accuracy)) if accuracy > best_acc: modelfile = saver.save(sess, os.path.join( 'model/', 'model.ckpt'), global_step=global_step_train) print('Best validation accuracy!' + modelfile) tolerance = init_tolerance best_acc = accuracy else: tolerance -= 1 print('remaining tolerance = %d' % tolerance) if tolerance == 0: break coord.request_stop() coord.join(threads) print("Traning ends. The best valid accuracy is {}.".format( best_acc))
X_test = torch.from_numpy(X_test).float().to(device) Y_test = torch.from_numpy(Y_test).float().to(device) print("X_test.size: ", X_test.size()) print("Y_test.size: ", Y_test.size()) # Data Loader def data_loader(tensor_X, tensor_Y, batch_size, shuffle): dataset = torch.utils.data.TensorDataset(tensor_X, tensor_Y) data_loader = torch.utils.data.DataLoader(dataset, batch_size=batch_size, shuffle=shuffle) return data_loader train_loader1 = data_loader(X_train, Y_train, batch_size, shuffle=True) test_loader1 = data_loader(X_test, Y_test, batch_size, shuffle=False) # model model = Model().to(device) #emb_f13 = FC(X_train[:23].size(1)).to(device) #emb_f2 = GCN(X_train[23:-3].size(1)/6, 6).to(device) #emb_f2 # Loss and optimizer criterion = nn.BCELoss() #nn.MSELoss() # optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate) # Training total_step = len(train_loader1) best_acc = 0 for epoch in range(num_epochs): start_time = time.time() for i, (X_mini, Y_mini) in enumerate(train_loader1): # Forward pass
def train(tf_seed, np_seed, train_steps, finetune_train_steps, out_steps, summary_steps, checkpoint_steps, step_size_schedule, weight_decay, momentum, train_batch_size, epsilon, replay_m, model_dir, source_model_dir, dataset, beta, gamma, disc_update_steps, adv_update_steps_per_iter, disc_layers, disc_base_channels, steps_before_adv_opt, adv_encoder_type, enc_output_activation, sep_opt_version, grad_image_ratio, final_grad_image_ratio, num_grad_image_ratios, normalize_zero_mean, eval_adv_attack, same_optimizer, only_fully_connected, finetuned_source_model_dir, train_finetune_source_model, finetune_img_random_pert, img_random_pert, only_finetune, finetune_whole_model, model_suffix, **kwargs): tf.set_random_seed(tf_seed) np.random.seed(np_seed) model_dir = model_dir + 'IGAM-%s_b%d_beta_%.3f_gamma_%.3f_disc_update_steps%d_l%dbc%d' % ( dataset, train_batch_size, beta, gamma, disc_update_steps, disc_layers, disc_base_channels) # TODO Replace with not defaults if img_random_pert: model_dir = model_dir + '_imgpert' if steps_before_adv_opt != 0: model_dir = model_dir + '_advdelay%d' % (steps_before_adv_opt) if train_steps != 80000: model_dir = model_dir + '_%dsteps' % (train_steps) if same_optimizer == False: model_dir = model_dir + '_adamDopt' if tf_seed != 451760341: model_dir = model_dir + '_tf_seed%d' % (tf_seed) if np_seed != 216105420: model_dir = model_dir + '_np_seed%d' % (np_seed) model_dir = model_dir + model_suffix # Setting up the data and the model data_path = get_path_dir(dataset=dataset, **kwargs) if dataset == 'cifar10': raw_data = cifar10_input.CIFAR10Data(data_path) else: raw_data = cifar100_input.CIFAR100Data(data_path) global_step = tf.train.get_or_create_global_step() increment_global_step_op = tf.assign(global_step, global_step + 1) reset_global_step_op = tf.assign(global_step, 0) source_model = ModelExtendedLogits(mode='train', target_task_class_num=100, train_batch_size=train_batch_size) model = Model(mode='train', dataset=dataset, train_batch_size=train_batch_size, normalize_zero_mean=normalize_zero_mean) # Setting up the optimizers boundaries = [int(sss[0]) for sss in step_size_schedule][1:] values = [sss[1] for sss in step_size_schedule] learning_rate = tf.train.piecewise_constant(tf.cast(global_step, tf.int32), boundaries, values) c_optimizer = tf.train.MomentumOptimizer(learning_rate, momentum) finetune_optimizer = tf.train.AdamOptimizer(learning_rate=0.001) if same_optimizer: d_optimizer = tf.train.MomentumOptimizer(learning_rate, momentum) else: print("Using ADAM opt for DISC model") d_optimizer = tf.train.AdamOptimizer(learning_rate=0.001) # Compute input gradient (saliency map) input_grad = tf.gradients(model.target_softmax, model.x_input, name="gradients_ig")[0] source_model_input_grad = tf.gradients(source_model.target_softmax, source_model.x_input, name="gradients_ig_source_model")[0] # lp norm diff between input_grad & source_model_input_grad input_grad_l2_norm_diff = tf.reduce_mean( tf.reduce_sum(tf.pow(tf.subtract(input_grad, source_model_input_grad), 2.0), keepdims=True)) # Setting up the discriminator model labels_input_grad = tf.zeros(tf.shape(input_grad)[0], dtype=tf.int64) labels_source_model_input_grad = tf.ones(tf.shape(input_grad)[0], dtype=tf.int64) disc_model = IgamConvDiscriminatorModel( mode='train', dataset=dataset, train_batch_size=train_batch_size, num_conv_layers=disc_layers, base_num_channels=disc_base_channels, normalize_zero_mean=normalize_zero_mean, x_modelgrad_input_tensor=input_grad, y_modelgrad_input_tensor=labels_input_grad, x_source_modelgrad_input_tensor=source_model_input_grad, y_source_modelgrad_input_tensor=labels_source_model_input_grad, only_fully_connected=only_fully_connected) t_vars = tf.trainable_variables() C_vars = [var for var in t_vars if 'classifier' in var.name] D_vars = [var for var in t_vars if 'discriminator' in var.name] source_model_vars = [ var for var in t_vars if ('discriminator' not in var.name and 'classifier' not in var.name and 'target_task_logit' not in var.name) ] source_model_target_logit_vars = [ var for var in t_vars if 'target_task_logit' in var.name ] source_model_saver = tf.train.Saver(var_list=source_model_vars) finetuned_source_model_vars = source_model_vars + source_model_target_logit_vars finetuned_source_model_saver = tf.train.Saver( var_list=finetuned_source_model_vars) # Source model finetune optimization source_model_finetune_loss = source_model.target_task_mean_xent + weight_decay * source_model.weight_decay_loss total_loss = model.mean_xent + weight_decay * model.weight_decay_loss - beta * disc_model.mean_xent + gamma * input_grad_l2_norm_diff classification_c_loss = model.mean_xent + weight_decay * model.weight_decay_loss adv_c_loss = -beta * disc_model.mean_xent # Discriminator: Optimizating computation # discriminator loss total_d_loss = disc_model.mean_xent + weight_decay * disc_model.weight_decay_loss # Finetune source_model if finetune_whole_model: finetune_min_step = finetune_optimizer.minimize( source_model_finetune_loss, var_list=finetuned_source_model_vars) else: finetune_min_step = finetune_optimizer.minimize( source_model_finetune_loss, var_list=source_model_target_logit_vars) # Train classifier # classifier opt step final_grads = c_optimizer.compute_gradients(total_loss, var_list=C_vars) no_pert_grad = [(tf.zeros_like(v), v) if 'perturbation' in v.name else (g, v) for g, v in final_grads] c_min_step = c_optimizer.apply_gradients(no_pert_grad) # c_min_step = c_optimizer.minimize(total_loss, var_list=C_vars) classification_final_grads = c_optimizer.compute_gradients( classification_c_loss, var_list=C_vars) classification_no_pert_grad = [(tf.zeros_like(v), v) if 'perturbation' in v.name else (g, v) for g, v in classification_final_grads] c_classification_min_step = c_optimizer.apply_gradients( classification_no_pert_grad) # discriminator opt step d_min_step = d_optimizer.minimize(total_d_loss, var_list=D_vars) # Loss gradients to the model params logit_weights = tf.get_default_graph().get_tensor_by_name( 'classifier/logit/DW:0') last_conv_weights = tf.get_default_graph().get_tensor_by_name( 'classifier/unit_3_4/sub2/conv2/DW:0') first_conv_weights = tf.get_default_graph().get_tensor_by_name( 'classifier/input/init_conv/DW:0') model_xent_logit_grad_norm = tf.norm(tf.gradients(model.mean_xent, logit_weights)[0], ord='euclidean') disc_xent_logit_grad_norm = tf.norm(tf.gradients(disc_model.mean_xent, logit_weights)[0], ord='euclidean') input_grad_l2_norm_diff_logit_grad_norm = tf.norm(tf.gradients( input_grad_l2_norm_diff, logit_weights)[0], ord='euclidean') model_xent_last_conv_grad_norm = tf.norm(tf.gradients( model.mean_xent, last_conv_weights)[0], ord='euclidean') disc_xent_last_conv_grad_norm = tf.norm(tf.gradients( disc_model.mean_xent, last_conv_weights)[0], ord='euclidean') input_grad_l2_norm_diff_last_conv_grad_norm = tf.norm(tf.gradients( input_grad_l2_norm_diff, last_conv_weights)[0], ord='euclidean') model_xent_first_conv_grad_norm = tf.norm(tf.gradients( model.mean_xent, first_conv_weights)[0], ord='euclidean') disc_xent_first_conv_grad_norm = tf.norm(tf.gradients( disc_model.mean_xent, first_conv_weights)[0], ord='euclidean') input_grad_l2_norm_diff_first_conv_grad_norm = tf.norm(tf.gradients( input_grad_l2_norm_diff, first_conv_weights)[0], ord='euclidean') # Setting up the Tensorboard and checkpoint outputs if not os.path.exists(model_dir): os.makedirs(model_dir) saver = tf.train.Saver(max_to_keep=1) tf.summary.scalar('C accuracy', model.accuracy) tf.summary.scalar('D accuracy', disc_model.accuracy) tf.summary.scalar('C xent', model.xent / train_batch_size) tf.summary.scalar('D xent', disc_model.xent / train_batch_size) tf.summary.scalar('total C loss', total_loss / train_batch_size) tf.summary.scalar('total D loss', total_d_loss / train_batch_size) tf.summary.scalar('adv C loss', adv_c_loss / train_batch_size) tf.summary.scalar('C cls xent loss', model.mean_xent) tf.summary.scalar('D xent loss', disc_model.mean_xent) # Loss gradients tf.summary.scalar('model_xent_logit_grad_norm', model_xent_logit_grad_norm) tf.summary.scalar('disc_xent_logit_grad_norm', disc_xent_logit_grad_norm) tf.summary.scalar('input_grad_l2_norm_diff_logit_grad_norm', input_grad_l2_norm_diff_logit_grad_norm) tf.summary.scalar('model_xent_last_conv_grad_norm', model_xent_last_conv_grad_norm) tf.summary.scalar('disc_xent_last_conv_grad_norm', disc_xent_last_conv_grad_norm) tf.summary.scalar('input_grad_l2_norm_diff_last_conv_grad_norm', input_grad_l2_norm_diff_last_conv_grad_norm) tf.summary.scalar('model_xent_first_conv_grad_norm', model_xent_first_conv_grad_norm) tf.summary.scalar('disc_xent_first_conv_grad_norm', disc_xent_first_conv_grad_norm) tf.summary.scalar('input_grad_l2_norm_diff_first_conv_grad_norm', input_grad_l2_norm_diff_first_conv_grad_norm) merged_summaries = tf.summary.merge_all() with tf.Session() as sess: print( 'important params >>> \n model dir: %s \n dataset: %s \n training batch size: %d \n' % (model_dir, dataset, train_batch_size)) # initialize data augmentation if dataset == 'cifar10': data = cifar10_input.AugmentedCIFAR10Data(raw_data, sess, model) else: data = cifar100_input.AugmentedCIFAR100Data(raw_data, sess, model) # Initialize the summary writer, global variables, and our time counter. summary_writer = tf.summary.FileWriter(model_dir + '/train', sess.graph) eval_summary_writer = tf.summary.FileWriter(model_dir + '/eval') sess.run(tf.global_variables_initializer()) # Restore source model source_model_file = tf.train.latest_checkpoint(source_model_dir) source_model_saver.restore(sess, source_model_file) # Finetune source model here if train_finetune_source_model: time_before_finetuning = datetime.now() for ii in tqdm(range(finetune_train_steps)): x_batch, y_batch = data.train_data.get_next_batch( train_batch_size, multiple_passes=True) if finetune_img_random_pert: x_batch = x_batch + np.random.uniform( -epsilon, epsilon, x_batch.shape) x_batch = np.clip(x_batch, 0, 255) # ensure valid pixel range nat_dict = { source_model.x_input: x_batch, source_model.y_input: y_batch } # Output to stdout if ii % summary_steps == 0: train_finetune_acc, train_finetune_loss = sess.run( [ source_model.target_task_accuracy, source_model_finetune_loss ], feed_dict=nat_dict) x_eval_batch, y_eval_batch = data.eval_data.get_next_batch( train_batch_size, multiple_passes=True) if img_random_pert: x_eval_batch = x_eval_batch + np.random.uniform( -epsilon, epsilon, x_eval_batch.shape) x_eval_batch = np.clip(x_eval_batch, 0, 255) # ensure valid pixel range eval_dict = { source_model.x_input: x_eval_batch, source_model.y_input: y_eval_batch } val_finetune_acc, val_finetune_loss = sess.run( [ source_model.target_task_accuracy, source_model_finetune_loss ], feed_dict=eval_dict) print('Source Model Finetune Step {}: ({})'.format( ii, datetime.now())) print( ' training nat accuracy {:.4}% -- validation nat accuracy {:.4}%' .format(train_finetune_acc * 100, val_finetune_acc * 100)) print(' training nat c loss: {}'.format( train_finetune_loss)) print(' validation nat c loss: {}'.format( val_finetune_loss)) sys.stdout.flush() sess.run(finetune_min_step, feed_dict=nat_dict) sess.run(increment_global_step_op) time_after_finetuning = datetime.now() finetuning_time = time_after_finetuning - time_before_finetuning finetuning_time_file_path = os.path.join(model_dir, 'finetuning_time.txt') with open(finetuning_time_file_path, "w") as f: f.write("Total finetuning time: {}".format( str(finetuning_time))) print("Total finetuning time: {}".format(str(finetuning_time))) finetuned_source_model_saver.save(sess, os.path.join( finetuned_source_model_dir, 'checkpoint'), global_step=global_step) if only_finetune: return else: finetuned_source_model_file = tf.train.latest_checkpoint( finetuned_source_model_dir) finetuned_source_model_saver.restore(sess, finetuned_source_model_file) # reset global step to 0 before running main training loop sess.run(reset_global_step_op) time_before_training = datetime.now() # Main training loop for ii in tqdm(range(train_steps)): x_batch, y_batch = data.train_data.get_next_batch( train_batch_size, multiple_passes=True) if img_random_pert: x_batch = x_batch + np.random.uniform(-epsilon, epsilon, x_batch.shape) x_batch = np.clip(x_batch, 0, 255) # ensure valid pixel range labels_source_modelgrad_disc = np.ones_like(y_batch, dtype=np.int64) # Sample randinit input grads nat_dict = { model.x_input: x_batch, model.y_input: y_batch, source_model.x_input: x_batch, source_model.y_input: y_batch } # Output to stdout if ii % summary_steps == 0: train_acc, train_disc_acc, train_c_loss, train_d_loss, train_adv_c_loss, summary = sess.run( [ model.accuracy, disc_model.accuracy, total_loss, total_d_loss, adv_c_loss, merged_summaries ], feed_dict=nat_dict) summary_writer.add_summary(summary, global_step.eval(sess)) x_eval_batch, y_eval_batch = data.eval_data.get_next_batch( train_batch_size, multiple_passes=True) if img_random_pert: x_eval_batch = x_eval_batch + np.random.uniform( -epsilon, epsilon, x_eval_batch.shape) x_eval_batch = np.clip(x_eval_batch, 0, 255) # ensure valid pixel range labels_source_modelgrad_disc = np.ones_like(y_eval_batch, dtype=np.int64) eval_dict = { model.x_input: x_eval_batch, model.y_input: y_eval_batch, source_model.x_input: x_eval_batch, source_model.y_input: y_eval_batch } val_acc, val_disc_acc, val_c_loss, val_d_loss, val_adv_c_loss, summary = sess.run( [ model.accuracy, disc_model.accuracy, total_loss, total_d_loss, adv_c_loss, merged_summaries ], feed_dict=eval_dict) eval_summary_writer.add_summary(summary, global_step.eval(sess)) print('Step {}: ({})'.format(ii, datetime.now())) print( ' training nat accuracy {:.4}% -- validation nat accuracy {:.4}%' .format(train_acc * 100, val_acc * 100)) print( ' training nat disc accuracy {:.4}% -- validation nat disc accuracy {:.4}%' .format(train_disc_acc * 100, val_disc_acc * 100)) print( ' training nat c loss: {}, d loss: {}, adv c loss: {}' .format(train_c_loss, train_d_loss, train_adv_c_loss)) print( ' validation nat c loss: {}, d loss: {}, adv c loss: {}' .format(val_c_loss, val_d_loss, val_adv_c_loss)) sys.stdout.flush() # Tensorboard summaries elif ii % out_steps == 0: nat_acc, nat_disc_acc, nat_c_loss, nat_d_loss, nat_adv_c_loss = sess.run( [ model.accuracy, disc_model.accuracy, total_loss, total_d_loss, adv_c_loss ], feed_dict=nat_dict) print('Step {}: ({})'.format(ii, datetime.now())) print(' training nat accuracy {:.4}%'.format(nat_acc * 100)) print(' training nat disc accuracy {:.4}%'.format( nat_disc_acc * 100)) print( ' training nat c loss: {}, d loss: {}, adv c loss: {}' .format(nat_c_loss, nat_d_loss, nat_adv_c_loss)) # Write a checkpoint if (ii + 1) % checkpoint_steps == 0: saver.save(sess, os.path.join(model_dir, 'checkpoint'), global_step=global_step) # default mode if sep_opt_version == 1: if ii >= steps_before_adv_opt: # Actual training step for Classifier sess.run(c_min_step, feed_dict=nat_dict) sess.run(increment_global_step_op) if ii % disc_update_steps == 0: # Actual training step for Discriminator sess.run(d_min_step, feed_dict=nat_dict) else: # only train on classification loss sess.run(c_classification_min_step, feed_dict=nat_dict) sess.run(increment_global_step_op) elif sep_opt_version == 2: # Actual training step for Classifier if ii >= steps_before_adv_opt: if adv_update_steps_per_iter > 1: sess.run(c_classification_min_step, feed_dict=nat_dict) sess.run(increment_global_step_op) for i in range(adv_update_steps_per_iter): x_batch, y_batch = data.train_data.get_next_batch( train_batch_size, multiple_passes=True) if img_random_pert: x_batch = x_batch + np.random.uniform( -epsilon, epsilon, x_batch.shape) x_batch = np.clip( x_batch, 0, 255) # ensure valid pixel range nat_dict = { model.x_input: x_batch, model.y_input: y_batch, source_model.x_input: x_batch, source_model.y_input: y_batch } sess.run(c_adv_min_step, feed_dict=nat_dict) else: sess.run(c_min_step, feed_dict=nat_dict) sess.run(increment_global_step_op) if ii % disc_update_steps == 0: # Actual training step for Discriminator sess.run(d_min_step, feed_dict=nat_dict) else: # only train on classification loss sess.run(c_classification_min_step, feed_dict=nat_dict) sess.run(increment_global_step_op) elif sep_opt_version == 0: if ii >= steps_before_adv_opt: if ii % disc_update_steps == 0: sess.run([c_min_step, d_min_step], feed_dict=nat_dict) sess.run(increment_global_step_op) else: sess.run(c_min_step, feed_dict=nat_dict) sess.run(increment_global_step_op) else: sess.run(c_classification_min_step, feed_dict=nat_dict) sess.run(increment_global_step_op) time_after_training = datetime.now() training_time = time_after_training - time_before_training training_time_file_path = os.path.join(model_dir, 'training_time.txt') with open(training_time_file_path, "w") as f: f.write("Total Training time: {}".format(str(training_time))) print("Total Training time: {}".format(str(training_time))) # full test evaluation if dataset == 'cifar10': raw_data = cifar10_input.CIFAR10Data(data_path) else: raw_data = cifar100_input.CIFAR100Data(data_path) data_size = raw_data.eval_data.n if data_size % train_batch_size == 0: eval_steps = data_size // train_batch_size else: eval_steps = data_size // train_batch_size # eval_steps = data_size // train_batch_size + 1 total_num_correct = 0 for ii in tqdm(range(eval_steps)): x_eval_batch, y_eval_batch = raw_data.eval_data.get_next_batch( train_batch_size, multiple_passes=False) eval_dict = { model.x_input: x_eval_batch, model.y_input: y_eval_batch } num_correct = sess.run(model.num_correct, feed_dict=eval_dict) total_num_correct += num_correct eval_acc = total_num_correct / data_size clean_eval_file_path = os.path.join(model_dir, 'full_clean_eval_acc.txt') with open(clean_eval_file_path, "a+") as f: f.write("Full clean eval_acc: {}%".format(eval_acc * 100)) print("Full clean eval_acc: {}%".format(eval_acc * 100)) devices = sess.list_devices() for d in devices: print("sess' device names:") print(d.name) return model_dir