############################# save sorted df file_name = f"{self.n_clusters}Clusters{len(self.randomStateList)}RandomStates.csv" csv_file_path = get_external_fittment_file_path( dataset_index, file_name) sorted_df.to_csv(csv_file_path) # ---------- Save bar plot ---------- algo_name_average_fit_dict = sorted_df.mean().to_dict() print(algo_name_average_fit_dict) fig, ax = plt.subplots(figsize=(10, 8)) # algoNameAverageFitDict = dict(sorted(algoNameAverageFitDict.items(), key=lambda item: item[1])) ax.bar(list(algo_name_average_fit_dict.keys()), algo_name_average_fit_dict.values()) fig.suptitle( # f"Sorted Average Of Kullback-Leibler Divergence Between \n Prediction Labels And External Classifier With {self.n_clusters} Clusters For \n Data-Set {dataset_index} Across {len(self.randomStateList)} Random States", fontsize = 17) f"Sorted Average Of Kullback-Leibler Divergence Between \n Prediction Labels And External Classifier With {self.n_clusters} Clusters For \n Data-Set {dataset_index} Across {len(self.randomStateList)} Random States", fontsize = 17) f"Sorted Average Of Kullback-Leibler Divergence Between Prediction Labels And \n External Classifier With {self.n_clusters} Clusters For Data-Set {dataset_index} Across {len(self.randomStateList)} Random States", fontsize=18) bar_chart_file_path = self.get_bar_chart_fie_path(dataset_index) fig.savefig(bar_chart_file_path) plt.close() # Due to some memory interference issues with saving the figure and acessing the CSV file for loading the data, this function can't loop over the datasets and save their figures in one run. if __name__ == '__main__': fec = FitExternalClass() # fec.createCSV(Dataset1()) fec.createCSV(Dataset2())
def train(): flatten_flag = True # flatten output of G or not? opt = opts.parse_opt() opt.input_data = "MNIST" # mapping [0,1] -> [-1,1] # load data # mnist = input_data.read_data_sets('MNIST_data', one_hot=True) # train_data = mnist.train.images * 2.0 - 1.0 # train_label = mnist.train.labels # test_data = mnist.test.images * 2.0 - 1.0 # test_label = mnist.test.labels loaded = np.load('MNIST_data/B.npz') train_data, train_label, test_data, test_label = \ loaded['train_data'], loaded['train_label'], \ loaded['test_data'], loaded['test_label'] # We create the label clues here. if opt.cgan_gen is True: label_clue = np.zeros((train_label.shape[1], opt.img_dim, opt.img_dim, train_label.shape[1])) for lbl in range(train_label.shape[1]): label_clue[lbl, :, :, lbl] = 1 if opt.cgan_gen: output_samples, output_labels = output_sample(test_data, test_label, True) else: output_samples = output_sample(test_data, test_label) print output_samples.shape print 'Shape of data:' print '\tTraining data: ' + str(train_data.shape) print '\tTraining label: ' + str(train_label.shape) print '\tTest data: ' + str(test_data.shape) print '\tTest label: ' + str(test_label.shape) x_dim = train_data.shape[1] y_dim = train_label.shape[1] opt.input_c_dim = 1 opt.output_c_dim = 1 opt.input_dim = x_dim opt.label_dim = y_dim batch_size = opt.batch_size NUM_THREADS = 2 tf_config = tf.ConfigProto() tf_config.intra_op_parallelism_threads = NUM_THREADS tf_config.gpu_options.allow_growth = True with tf.Session(config=tf_config) as sess: # Initialize the variables, and restore the variables form checkpoint if there is. # and initialize the writer global_step = 0 print '\tRetrieving evil model from "%s"' % opt.evil_model_path evil_model = MNISTModel(opt.evil_model_path) print '\tRetrieving good model from "%s"' % opt.good_model_path good_model = OddEvenMNIST(opt.good_model_path) # model = advGAN(whitebox_model, model_store, opt, sess) model = advGAN(good_model, evil_model, opt, sess) min_adv_accuracy = 10e10 max_accuracy_diff = -np.inf # summary_dir = "logs/MNIST/g_%d_ld_%d_gl_%d_L2_%.2f_dn_%d" % ( # opt.G_lambda, opt.ld, opt.good_loss_coeff, # opt.L2_lambda, opt.d_train_num) summary_dir = "logs/MNIST/dn_%d_gn_%d" % (opt.d_train_num, opt.g_train_num) duplicate_num = 0 while os.path.isdir(summary_dir + '_' + str(duplicate_num) + '/'): duplicate_num += 1 summary_dir += '_' + str(duplicate_num) + '/' print 'Creating directory %s for logs.' % summary_dir os.mkdir(summary_dir) writer = tf.summary.FileWriter(summary_dir, sess.graph) loader = Dataset2(train_data, train_label) print 'Training data loaded.' best_evil_accuracy = -1.0 best_res_epoch = -1 best_res = None for epoch_num in range(opt.max_epoch): print 'Epoch %d' % epoch_num # Randomly shuffle the data. random_indices = np.arange(train_data.shape[0]) np.random.shuffle(random_indices) train_data = train_data[random_indices, :] train_label = train_label[random_indices, :] real_buckets = [] for lbl in range(train_label.shape[1]): real_buckets.append(np.where(train_label[:, lbl] == 1)[0]) # Mini-batch Gradient Descent. batch_no = 0 while (batch_no * batch_size) < train_data.shape[0]: head = batch_no * batch_size if head + batch_size <= train_data.shape[0]: tail = head + batch_size else: tail = train_data.shape[0] head = train_data.shape[0] - batch_size feed_data = train_data[head:tail, :] evil_labels = train_label[head:tail, :] good_labels = odd_even_labels(evil_labels) # Finding randomly sampled real data. real_data = np.zeros_like(feed_data) # Indices of training batch with specific label. # label_indices[i] = indices of feed data, that have evil_label[i] == 1. label_indices = [np.where(evil_labels[:, lbl] == 1)[0] \ for lbl in range(evil_labels.shape[1])] for lbl in range(evil_labels.shape[1]): # We take a random sample of size |label_indices[lbl]| # from the real bucket of `lbl`. selected_real_data = np.random.choice( real_buckets[lbl], label_indices[lbl].shape[0]) # We put this random sample in the same index of their # corresponding batch training data. real_data[label_indices[lbl], :] = train_data[ selected_real_data, :] feed = { model.source: feed_data, model.target: real_data, model.good_labels: good_labels, model.evil_labels: evil_labels } # Train G. for _ in range(opt.g_train_num): summary_str, G_loss, gan_loss, hinge_loss, l1_loss, l2_loss, \ good_fn_loss, evil_fn_loss, adv_loss, total_loss, _ = sess.run([ model.total_loss_merge_sum, model.g_loss, model.gan_loss, model.hinge_loss, model.l1_loss, model.l2_loss, model.good_fn_loss, model.evil_fn_loss, model.adv_loss, model.total_loss, model.G_train_op], feed) writer.add_summary(summary_str, global_step) # Train D. for _ in range(opt.d_train_num): summary_str, D_loss, _ = sess.run([ model.total_loss_merge_sum, model.d_loss, model.D_pre_train_op ], feed) writer.add_summary(summary_str, global_step) global_step += 1 batch_no += 1 # Validation after each trainig epoch. print '\tD: %.4f, G: %.4f\n\thinge(%.1f): %.4f, L1(%.1f): %.4f, L2(%.1f): %.4f' % ( D_loss, G_loss, opt.H_lambda, hinge_loss, opt.L1_lambda, l1_loss, opt.L2_lambda, l2_loss) print '\t\tGAN total loss: %.4f' % gan_loss print '\tGood: %.4f, Evil: %.4f' % (good_fn_loss, evil_fn_loss) print '\tAdv: %.4f, Total: %.4f' % (adv_loss, total_loss) new_pred_data = [] head = 0 last_batch = False while head < test_data.shape[0]: if head + batch_size <= test_data.shape[0]: tail = head + batch_size else: tail = test_data.shape[0] head = test_data.shape[0] - batch_size last_batch = True if opt.cgan_gen: cur_data = sess.run( model.fake_images_sample, {model.evil_labels: test_label[head:tail, :]}) else: cur_data = sess.run( model.fake_images_sample, {model.source: test_data[head:tail, :]}) if last_batch: new_pred_data.append( cur_data[-(test_data.shape[0] % batch_size):, :]) else: new_pred_data.append(cur_data) head += batch_size new_pred_data = np.concatenate(new_pred_data) good_pred = np.argmax( model.good_model.model.predict(new_pred_data), axis=1) evil_pred = np.argmax( model.evil_model.model.predict(new_pred_data), axis=1) evil_true = np.argmax(test_label, axis=1) good_true = np.argmax(odd_even_labels(test_label), axis=1) good_accuracy = accuracy_score(good_true, good_pred) evil_accuracy = accuracy_score(evil_true, evil_pred) total_good_confusion = confusion_matrix(good_true, good_pred) total_evil_confusion = confusion_matrix(evil_true, evil_pred, labels=range( opt.evil_label_num)) print '\tGood Accuracy: %.4f, Evil Accuracy: %.4f' % ( good_accuracy, evil_accuracy) print '\tAccuracy diff: %f' % (good_accuracy - evil_accuracy) print 'Good confusion matrix:' print total_good_confusion print 'Evil confusion matrix:' print total_evil_confusion # Creating snapshots to save. if opt.cgan_gen: fake_samples = sess.run(model.fake_images_sample, {model.evil_labels: output_labels}) else: fake_samples, fake_noise = sess.run( [model.fake_images_sample, model.sample_noise], {model.source: output_samples}) max_accuracy_diff = good_accuracy - evil_accuracy fakes = merge(fake_samples[:100, :], [10, 10]) separator = np.ones((280, 2)) original = merge(output_samples[:100].reshape(-1, 28, 28, 1), [10, 10]) if opt.cgan_gen: scipy.misc.imsave( 'snapshot_%d.png' % epoch_num, np.concatenate([fakes, separator, original], axis=1)) else: noise = merge(fake_noise[:100], [10, 10]) scipy.misc.imsave( 'snapshot_%d.png' % epoch_num, np.concatenate([fakes, noise, original], axis=1)) # Only for the purpose of finding best D and G training times. if evil_accuracy > best_evil_accuracy: best_evil_accuracy = evil_accuracy best_res_epoch = epoch_num if opt.cgan_gen: best_res = np.concatenate([fakes, separator, original], axis=1) else: best_res = np.concatenate([fakes, noise, original], axis=1) best_image_path = 'best_dn_%d_gn_%d_%d_epoch_%d.png' % \ (opt.d_train_num, opt.g_train_num, duplicate_num, best_res_epoch) scipy.misc.imsave(best_image_path, best_res) # print 'Maximum iterations: %d' % opt.max_iteration # while iteration < opt.max_iteration: # # this function returns (data, label, np.array(target)). # # data = loader.next_batch(batch_size, negative=False) # feed_data, evil_labels, real_data = loader.next_batch( # batch_size, negative=False) # good_labels = odd_even_labels(evil_labels) # feed = { # model.source: feed_data, # model.target: real_data, # model.good_labels: good_labels, # model.evil_labels: evil_labels # } # # if opt.cgan_gen: # # feed[model.label_clue] = label_clue[evil_labels.argmax(axis=1)] # # Training G once. # # summary_str, G_loss, _ = sess.run( # # [model.total_loss_merge_sum, model.g_loss, model.G_train_op], feed) # # writer.add_summary(summary_str, iteration) # # Training G twice. # summary_str, G_loss, gan_loss, hinge_loss, l1_loss, l2_loss, \ # good_fn_loss, evil_fn_loss, adv_loss, total_loss, _ = sess.run([ # model.total_loss_merge_sum, # model.g_loss, # model.gan_loss, # model.hinge_loss, # model.l1_loss, # model.l2_loss, # model.good_fn_loss, # model.evil_fn_loss, # model.adv_loss, # model.total_loss, # model.G_train_op], feed) # writer.add_summary(summary_str, iteration) # # Training D. # for _ in range(opt.d_train_num): # summary_str, D_loss, _ = sess.run( # [model.total_loss_merge_sum, model.d_loss, model.D_pre_train_op], feed) # writer.add_summary(summary_str, iteration) # if iteration % opt.losses_log_every == 0: # # if iteration != 0 and iteration % opt.save_checkpoint_every == 0: # # checkpoint_path = os.path.join(opt.checkpoint_path, 'checkpoint.ckpt') # # print 'Saving the model in "%s"' % checkpoint_path # # model.saver.save(sess, checkpoint_path, global_step=iteration) # # test_loader = Dataset2(test_data, test_label) # # test_num = test_loader._num_examples # # test_iter_num = (test_num - batch_size) / batch_size # # total_evil_accuracy = 0.0 # # total_good_accuracy = 0.0 # # fake_samples = [[] for _ in range(test_loader._num_labels)] # # fake_noise = [[] for _ in range(test_loader._num_labels)] # # original_samples = [[] for _ in range(test_loader._num_labels)] # # for _ in range(test_iter_num): # # # Loading the next batch of test images # # test_input_data, test_evil_labels, _ = \ # # test_loader.next_batch(batch_size) # # evil_categorical_labels = np.argmax(test_evil_labels, axis=1) # # test_good_labels = odd_even_labels(test_evil_labels) # # feed = { # # model.source: test_input_data, # # model.evil_labels: test_evil_labels, # # model.good_labels: test_good_labels # # } # # # if opt.cgan_gen: # # # feed[model.label_clue] = label_clue[test_evil_labels.argmax(axis=1)] # # evil_accuracy, good_accuracy = sess.run( # # [model.evil_accuracy, model.good_accuracy], feed) # # # We divide the total accuracy by the number of test iterations. # # total_good_accuracy += good_accuracy # # total_evil_accuracy += evil_accuracy # # # print 'Evil accuracy: %.6f\tGood accuracy: %.6f' % ( # # # evil_accuracy, good_accuracy) # # # test_accuracy, test_adv_accuracy = sess.run( # # # [model.accuracy, model.adv_accuracy], feed) # # # test_acc += test_accuracy # # # test_adv_acc += test_adv_accuracy # # # fake_images, g_x = sess.run( # # # [model.fake_images_sample, model.sample_noise], # # # {model.source: test_input_data}) # # # for lbl in range(test_loader._num_labels): # # # if len(fake_samples[lbl]) < 10: # # # idx = np.where(evil_categorical_labels == lbl)[0] # # # if idx.shape[0] >= 10: # # # fake_samples[lbl] = fake_images[idx[:10]] # # # fake_noise[lbl] = g_x[idx[:10]] # # # original_samples[lbl] = test_input_data[idx[:10]] # # # for lbl, sample, noise in zip(test_evil_labels, fake_images, fake_noise): # # # if len(fake_samples[lbl]) > 10: # # # continue # # # fake_samples[lbl].append(sample) # # # fake_noise[lbl].append(noise) # # # pdb.set_trace() # # # print fake_images.shape # # # Finding those predicted labels that are equal to the target label # # # idxs = np.where(out_predict_labels == target_label)[0] # # # save_images(samples[:100], [10, 10], 'CIFAR10/result2/test_' + str(source_idx) + str(target_idx)+ '_.png') # # # pdb.set_trace() # # # show_samples.append(samples) # # # input_samples.append(s_imgs) # # # save_samples.append(samples) # # # if opt.is_advGAN: # # # save_samples.append(samples[idxs]) # # # else: # # # We add all samples. # # # show_samples = np.concatenate(show_samples, axis=0) # # # save_samples = np.concatenate(save_samples, axis=0) # # good_accuracy = total_good_accuracy / float(test_iter_num) # # evil_accuracy = total_evil_accuracy / float(test_iter_num) # # print '\tAccuracy diff: %f' % (good_accuracy - evil_accuracy) # # print '\tGood accuracy %f, Evil accuracy %f' % ( # # good_accuracy, evil_accuracy) # # Resizing the samples to save them later on. # # fake_samples = np.reshape(np.array(fake_samples), [100, -1]) # # original_samples = np.reshape(np.array(original_samples), [100, -1]) # # fake_noise = np.reshape(np.array(fake_noise), [100, -1]) # # if (good_accuracy - evil_accuracy) > max_accuracy_diff: # # test_accuracy = test_acc / float(test_iter_num) # # test_adv_accuracy = test_adv_acc / float(test_iter_num) # # if (good_accuracy - evil_accuracy) > max_accuracy_diff: # # max_accuracy_diff = good_accuracy - evil_accuracy # # if min_adv_accuracy > test_adv_accuracy: # # min_adv_accuracy = test_adv_accuracy # # save_images(fake_images[:100], [10, 10], 'fake.png') # # save_images(test_input_data[:100], [10, 10], 'real.png') # # all_idx = np.arange(100) # # odds = np.where((all_idx / 10) % 2 == 1)[0] # # evens = np.where((all_idx / 10) % 2 == 0)[0] # # order = np.concatenate((odds, evens)) # # save_images(fake_samples[order], [10, 10], 'best_images.png') # # save_images(fake_noise[order], [10, 10], 'best_noise.png') # # save_images(original_samples[order], [10, 10], 'best_original.png') # # save_anything = True # # Saving the best yet model. # # best_model_path = os.path.join(opt.checkpoint_path, 'best.ckpt') # # print 'Saving the best model yet at "%s"' % best_model_path # # model.saver.save(sess, best_model_path) # # if save_anything is False: # # # Nothing is saved. We save a version here. # # save_images(fake_samples[:100], [10, 10], 'last_images.png') # # save_images(fake_noise[:100], [10, 10], 'last_noise.png') # # save_anything = True # iteration += 1 # We can transform the training and test data given in the beginning here. # This is only half the actual data. if opt.save_data: # if opt.cgan_gen: raise NotImplementedError( 'Saving data for CGAN_GEN is not yet implemented.')
def train(): opt = opts.parse_opt() opt.input_data = "MNIST" img_size = (opt.img_dim, opt.img_dim) print 'Dimension of images:', img_size train_data, train_label, id_gender = \ get_30_people_chunk(opt.image_path, 1, gender_meta=True, img_size=img_size) test_data, test_label = get_30_people_chunk(opt.image_path, 2, img_size=img_size) names = get_people_names(opt.image_path, 30) if opt.balance_data: ratio = opt.balance_ratio print 'Balancing dataset with ratio %f' % ratio train_data, train_label = balance_dataset(train_data, train_label) test_data, test_label = balance_dataset(test_data, test_label) if opt.balance_gender: print train_data.shape, train_label.shape print test_data.shape, test_label.shape print 'Balancing genders' selected_people = [] for i in range(id_gender.shape[1]): indices, = np.where(id_gender[:, i] == 1) selected_people.append(np.random.choice(indices, 5, replace=False)) selected_people = np.concatenate(selected_people) print 'Selected people are:' print np.array(names)[selected_people] selected_imgs = train_label[:, selected_people].sum(axis=1) != 0 train_data = train_data[selected_imgs, :] train_label = train_label[selected_imgs, :] selected_imgs = test_label[:, selected_people].sum(axis=1) != 0 test_data = test_data[selected_imgs, :] test_label = test_label[selected_imgs, :] print 'Shape of data:' print '\tTraining data: ' + str(train_data.shape) print '\tTraining label: ' + str(train_label.shape) print '\tMax, Min Train: %.4f, %.4f' % (np.max(train_data), np.min(train_data)) print '\tTest data: ' + str(test_data.shape) print '\tTest label: ' + str(test_label.shape) print '\tMax, Min Test: %.4f, %.4f' % (np.max(test_data), np.min(test_data)) x_dim = train_data.shape[1] y_dim = train_label.shape[1] opt.input_c_dim = 3 opt.output_c_dim = 3 opt.input_dim = x_dim opt.label_dim = y_dim input_shape = (x_dim, x_dim, opt.input_c_dim) batch_size = opt.batch_size print 'Batch size: %d' % batch_size NUM_REPR = 5 NUM_SAMPLES_EACH = int(batch_size / NUM_REPR / 2) output_samples = get_output_samples(train_data, train_label, id_gender, NUM_REPR, NUM_SAMPLES_EACH) NUM_THREADS = 2 tf_config = tf.ConfigProto() tf_config.intra_op_parallelism_threads = NUM_THREADS tf_config.gpu_options.allow_growth = True iteration_time = [] with tf.Session(config=tf_config) as sess: id_model_path = '%s_%d_id_0' % (opt.lfw_base_path, x_dim) print '\tRetrieving evil model from "%s"' % id_model_path evil_model = FaceRecognizer(id_model_path, train_label.shape[1], input_shape, opt.input_c_dim) gender_model_path = '%s_%d_gender_0' % (opt.lfw_base_path, x_dim) print '\tRetrieving good model from "%s"' % gender_model_path good_model = FaceRecognizer(gender_model_path, 2, input_shape, opt.input_c_dim) model = advGAN(good_model, evil_model, opt, sess, mnist=False) iteration = 0 if opt.resnet_gen: generator_mode = 'ResNet' else: generator_mode = 'Regular' summary_dir = "logs/LFW/g_%d_ld_%d_gl_%d_L2_%.2f_lr_%.4f_%s/" % ( opt.G_lambda, opt.ld, opt.good_loss_coeff, opt.L2_lambda, opt.learning_rate, generator_mode) if os.path.isdir(summary_dir) is False: print 'Creating directory %s for logs.' % summary_dir os.mkdir(summary_dir) # else: # print 'Removing all files in %s' % (summary_dir + '*') # shutil.rmtree(summary_dir) writer = tf.summary.FileWriter(summary_dir, sess.graph) loader = Dataset2(train_data, train_label) print 'Training data loaded.' print 'Maximum iterations: %d' % opt.max_iteration max_acc_diff = -1.0 while iteration < opt.max_iteration: # this function returns (data, label, np.array(target)). feed_data, evil_labels, real_data = loader.next_batch( batch_size, negative=False) good_labels = id_gender[np.argmax(evil_labels, axis=1)] feed = { model.source: feed_data, model.target: real_data, model.good_labels: good_labels, model.evil_labels: evil_labels } # Training G once. summary_str, G_loss, _ = sess.run( [model.total_loss_merge_sum, model.g_loss, model.G_train_op], feed) writer.add_summary(summary_str, iteration) # Training G twice. summary_str, G_loss, gan_loss, hinge_loss, l1_loss, l2_loss, \ good_fn_loss, evil_fn_loss, adv_loss, total_loss, _ = sess.run([ model.total_loss_merge_sum, model.g_loss, model.gan_loss, model.hinge_loss, model.l1_loss, model.l2_loss, model.good_fn_loss, model.evil_fn_loss, model.adv_loss, model.total_loss, model.G_train_op], feed) writer.add_summary(summary_str, iteration) # Training D. summary_str, D_loss, _ = \ sess.run([model.total_loss_merge_sum, model.d_loss, model.D_pre_train_op], feed) writer.add_summary(summary_str, iteration) if iteration % opt.losses_log_every == 0: print "iteration: ", iteration print '\tD: %.4f, G: %.4f\n\thinge(%.2f): %.4f, L1(%.2f): %.4f, L2(%.2f): %.4f' % ( D_loss, G_loss, opt.H_lambda, hinge_loss, opt.L1_lambda, l1_loss, opt.L2_lambda, l2_loss) print '\t\tGAN total loss: %.4f' % gan_loss print '\tGood: %.4f, Evil: %.4f' % (good_fn_loss, evil_fn_loss) print '\tAdv: %.4f, Total: %.4f' % (adv_loss, total_loss) new_test_data = [] new_pred_data = [] head = 0 last_batch = False while head < test_data.shape[0]: if head + batch_size <= test_data.shape[0]: tail = head + batch_size else: tail = test_data.shape[0] head = test_data.shape[0] - batch_size last_batch = True cur_data, pred_data = sess.run( [model.fake_images_output, model.prediction_ready], {model.source: test_data[head:tail, :]}) if last_batch: new_test_data.append( cur_data[-(test_data.shape[0] % batch_size):, :]) new_pred_data.append( pred_data[-(test_data.shape[0] % batch_size):, :]) else: new_test_data.append(cur_data) new_pred_data.append(pred_data) head += batch_size new_test_data = np.concatenate(new_test_data) new_pred_data = np.concatenate(new_pred_data) good_pred = np.argmax( model.good_model.model.predict(new_pred_data), axis=1) evil_pred = np.argmax( model.evil_model.model.predict(new_pred_data), axis=1) evil_true = np.argmax(test_label, axis=1) good_true = np.argmax(id_gender[evil_true, :], axis=1) good_accuracy = accuracy_score(good_true, good_pred) evil_accuracy = accuracy_score(evil_true, evil_pred) total_good_confusion = confusion_matrix(good_true, good_pred) total_evil_confusion = confusion_matrix( evil_true, evil_pred, labels=range(opt.evil_label_num)) print '\tGood Accuracy: %.4f, Evil Accuracy: %.4f' % ( good_accuracy, evil_accuracy) print '\tAccuracy diff: %f' % (good_accuracy - evil_accuracy) print 'Good confusion matrix:' print total_good_confusion evil_misclass = total_evil_confusion.sum( axis=0) - np.diag(total_evil_confusion) evil_idxs = np.argsort(-evil_misclass) print 'Top 3 Misclassifications:' print np.array(names)[evil_idxs][:3] print evil_misclass[evil_idxs][:3] evil_tp = np.diag(total_evil_confusion) evil_idxs = np.argsort(-evil_tp) print 'Top 3 True classifications:' print np.array(names)[evil_idxs][:3] print evil_tp[evil_idxs][:3] # print 'Selected people are:' # print names[evil_idxs].tolist() # print evil_tp # print total_evil_confusion # print evil_idxs fake_samples, fake_noise = sess.run( [model.fake_images_output, model.fake_noise_output], {model.source: output_samples}) fakes = merge(fake_samples, [2 * NUM_REPR, NUM_SAMPLES_EACH]) original = merge(output_samples, [2 * NUM_REPR, NUM_SAMPLES_EACH]) noise = merge(fake_noise, [2 * NUM_REPR, NUM_SAMPLES_EACH]) final_image = np.concatenate([fakes, noise, original], axis=1) scipy_imsave('snapshot_%d.png' % iteration, final_image) if (good_accuracy - evil_accuracy) > max(0.5, max_acc_diff): print '\tSaving new training data at accuracy diff: %.4f' % ( good_accuracy - evil_accuracy), max_acc_diff = good_accuracy - evil_accuracy # other_good = FaceRecognizer('%s_%d_gender_0' % (opt.lfw_base_path, x_dim), # 2, input_shape, opt.input_c_dim) # other_pred = np.argmax(other_good.model.predict(new_pred_data), axis=1) # print 'Other Good accuracy: %.4f' % accuracy_score(good_true, other_pred) # other_pred = np.argmax(other_good.model.predict( # preprocess_images(new_test_data * 255.0)), axis=1) # print '\tTest data processeced accuracy: %.4f' % \ # accuracy_score(good_true, other_pred) # other_evil = FaceRecognizer('%s_%d_id_0' % (opt.lfw_base_path, x_dim), # 34, input_shape, opt.input_c_dim) # other_pred = np.argmax(other_evil.model.predict(new_pred_data), axis=1) # print 'Other Evil accuracy: %.4f' % accuracy_score(evil_true, other_pred) # other_pred = np.argmax(other_evil.model.predict( # preprocess_images(new_test_data * 255.0)), axis=1) # print '\tTest data processeced accuracy: %.4f' % \ # accuracy_score(evil_true, other_pred) new_train_data = [] head = 0 last_batch = False while head < train_data.shape[0]: if head + batch_size <= train_data.shape[0]: tail = head + batch_size else: tail = train_data.shape[0] head = train_data.shape[0] - batch_size last_batch = True cur_data = sess.run( model.fake_images_output, {model.source: train_data[head:tail, :]}) if last_batch: new_train_data.append( cur_data[-(train_data.shape[0] % batch_size):, :]) else: new_train_data.append(cur_data) head += batch_size new_train_data = np.concatenate(new_train_data) np.savez_compressed(opt.output_path, train_data=new_train_data, org_train_data=train_data, train_label=train_label, test_data=new_test_data, org_test_data=test_data, test_label=test_label, id_gender=id_gender) print '\t[DONE]' iteration += 1
batch_size = 6 NUM_CLASSES = batch_size # Regression Network Parameters num_input = 577 # data input (64 * 3 * 3 + 1) n_hidden_1 = 1024 # 1st layer number of neurons n_hidden_2 = 256 # 2nd layer number of neurons n_hidden_3 = 1024 # 3rd layder number of neurous alpha = 0.01 # alpha for leaky relu # tf Graph input X = tf.placeholder("float", [batch_size, num_input]) phase = tf.placeholder(tf.bool, name='phase') global_step = tf.Variable(0, trainable=False) ## input data data = Dataset2("", val = False, defstat = True, numC = NUM_CLASSES) max_xtd_out = tf.constant(data.max_xtd_out) min_xtd_out = tf.constant(data.min_xtd_out) max_xtd_in = tf.constant(data.max_xtd_in) min_xtd_in = tf.constant(data.min_xtd_in) # network architecture X_norm = (X - min_xtd_in) / (max_xtd_in - min_xtd_in) w_pred = regress_net(X_norm, phase, num_input, n_hidden_1, n_hidden_2, n_hidden_3) w_pred_denorm = w_pred * (max_xtd_out - min_xtd_out) + min_xtd_out gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=0.2) init = tf.global_variables_initializer() with tf.Session(config=tf.ConfigProto(gpu_options=gpu_options)) as sess:
global_step = tf.Variable(0, trainable=False) ## decreasing weights for regression loss lambda_tf = tf.train.exponential_decay(lambda1, global_step, 1000, 0.8, staircase=True) lambda_tf = tf.maximum(tf.constant(lambda_min), lambda_tf) # decreasing learning rate lr = tf.train.exponential_decay(learning_rate, global_step, 5000, 0.95, staircase=True) ## input data data = Dataset2(args.pair, val=False, defstat=True, numC=NUM_CLASSES) # stat from data max_xtd_out = tf.constant(data.max_xtd_out) min_xtd_out = tf.constant(data.min_xtd_out) max_xtd_in = tf.constant(data.max_xtd_in) min_xtd_in = tf.constant(data.min_xtd_in) ##network architecture w_pred = regress_net(X, phase, num_input, n_hidden_1, n_hidden_2, n_hidden_3) w_pred_denorm = w_pred * (max_xtd_out - min_xtd_out) + min_xtd_out y_pred_seg_pred = prediction_net(F, w_pred_denorm) y_pred_seg_pred_prob = tf.nn.softmax(y_pred_seg_pred, dim=1) w_true = Y ## define loss and optimizer
dataset_index = dataset.get_index() n_clusters = dataset.get_n_clusters() sill_scores_csv_file_path = get_csv_file_path(num_random_stats, dataset_index, n_clusters) result_df = GlobalFunctions.get_df_by_path(sill_scores_csv_file_path) stat_test_results_df, sorted_df = sort_df_by_stat_test(result_df) ############################# save stat test results file_name = f"{num_random_stats}RandomStatesWith{n_clusters}ClustersStatisiticalTestResults.csv" csv_file_path = get_csv_file_path(len(self.randomStateList), dataset_index, n_clusters, file_name) print(stat_test_results_df) stat_test_results_df.to_csv(csv_file_path) ############################ save sorted. file_name = f"{num_random_stats}RandomStatesWith{n_clusters}ClustersSorted.csv" csv_file_path = get_csv_file_path(len(self.randomStateList), dataset_index, n_clusters, file_name) sorted_df.loc['mean'] = sorted_df.mean() sorted_df.to_csv(csv_file_path) if __name__ == "__main__": ST = StatisticalTest() ds = Dataset2() ds.prepareDataset() ST.createCSV(ds)
x = np.arange(len(labels)) width = 0.3 if self.is_remove_outliers: ax[i, j].bar(x - width / 2, algoNameSilhouetteScoreDict.values(), width=width, color='black', label='All Data') ax[i, j].bar(x + width / 2, clean_data_algo_name_sil_score_dict.values(), width=width, label='Data With Anomalous Points Removed') ax[i, j].set_xticks(x) ax[i, j].set_xticklabels(labels) ax[i, j].legend() else: ax[i, j].bar(x, algoNameSilhouetteScoreDict.values()) ax[i, j].set_title(f"Silhouette Score", fontsize=fontsize) # ---------- Save Plot ---------- plot_file_path = self.get_plot_file_path(randomState, datasetIndex) plt.savefig(plot_file_path) plt.close() # Due to some memory interference issues with saving the figure and acessing the CSV file for loading the data, this function can't loop over the datasets and save their figures in one run. if __name__ == '__main__': plotClusters = PlotClusters() # plotClusters.plotAndSaveOne(Dataset1()) plotClusters.plotAndSaveOne(Dataset2())
from Dataset1 import Dataset1 from Dataset2 import Dataset2 global dataset_obj_list dataset_obj_list = [Dataset1(), Dataset2()]