コード例 #1
0
        ############################# save sorted df
        file_name = f"{self.n_clusters}Clusters{len(self.randomStateList)}RandomStates.csv"
        csv_file_path = get_external_fittment_file_path(
            dataset_index, file_name)
        sorted_df.to_csv(csv_file_path)

        # ---------- Save bar plot ----------
        algo_name_average_fit_dict = sorted_df.mean().to_dict()
        print(algo_name_average_fit_dict)
        fig, ax = plt.subplots(figsize=(10, 8))
        # algoNameAverageFitDict = dict(sorted(algoNameAverageFitDict.items(), key=lambda item: item[1]))

        ax.bar(list(algo_name_average_fit_dict.keys()),
               algo_name_average_fit_dict.values())

        fig.suptitle(
            # f"Sorted Average Of Kullback-Leibler Divergence Between \n Prediction Labels And External Classifier With {self.n_clusters} Clusters For \n Data-Set {dataset_index} Across {len(self.randomStateList)} Random States", fontsize = 17)            f"Sorted Average Of Kullback-Leibler Divergence Between \n Prediction Labels And External Classifier With {self.n_clusters} Clusters For \n Data-Set {dataset_index} Across {len(self.randomStateList)} Random States", fontsize = 17)
            f"Sorted Average Of Kullback-Leibler Divergence Between Prediction Labels And \n External Classifier With {self.n_clusters} Clusters For Data-Set {dataset_index} Across {len(self.randomStateList)} Random States",
            fontsize=18)

        bar_chart_file_path = self.get_bar_chart_fie_path(dataset_index)
        fig.savefig(bar_chart_file_path)
        plt.close()


# Due to some memory interference issues with saving the figure and acessing the CSV file for loading the data, this function can't loop over the datasets and save their figures in one run.
if __name__ == '__main__':
    fec = FitExternalClass()
    # fec.createCSV(Dataset1())
    fec.createCSV(Dataset2())
コード例 #2
0
def train():
    flatten_flag = True  # flatten output of G or not?
    opt = opts.parse_opt()
    opt.input_data = "MNIST"
    # mapping [0,1] -> [-1,1]
    # load data
    # mnist = input_data.read_data_sets('MNIST_data', one_hot=True)
    # train_data = mnist.train.images * 2.0 - 1.0
    # train_label = mnist.train.labels

    # test_data = mnist.test.images * 2.0 - 1.0
    # test_label = mnist.test.labels

    loaded = np.load('MNIST_data/B.npz')
    train_data, train_label, test_data, test_label = \
        loaded['train_data'], loaded['train_label'], \
        loaded['test_data'], loaded['test_label']

    # We create the label clues here.
    if opt.cgan_gen is True:
        label_clue = np.zeros((train_label.shape[1], opt.img_dim, opt.img_dim,
                               train_label.shape[1]))
        for lbl in range(train_label.shape[1]):
            label_clue[lbl, :, :, lbl] = 1

    if opt.cgan_gen:
        output_samples, output_labels = output_sample(test_data, test_label,
                                                      True)
    else:
        output_samples = output_sample(test_data, test_label)
    print output_samples.shape

    print 'Shape of data:'
    print '\tTraining data: ' + str(train_data.shape)
    print '\tTraining label: ' + str(train_label.shape)
    print '\tTest data: ' + str(test_data.shape)
    print '\tTest label: ' + str(test_label.shape)

    x_dim = train_data.shape[1]
    y_dim = train_label.shape[1]

    opt.input_c_dim = 1
    opt.output_c_dim = 1
    opt.input_dim = x_dim
    opt.label_dim = y_dim

    batch_size = opt.batch_size

    NUM_THREADS = 2
    tf_config = tf.ConfigProto()
    tf_config.intra_op_parallelism_threads = NUM_THREADS
    tf_config.gpu_options.allow_growth = True

    with tf.Session(config=tf_config) as sess:
        # Initialize the variables, and restore the variables form checkpoint if there is.
        # and initialize the writer
        global_step = 0

        print '\tRetrieving evil model from "%s"' % opt.evil_model_path
        evil_model = MNISTModel(opt.evil_model_path)
        print '\tRetrieving good model from "%s"' % opt.good_model_path
        good_model = OddEvenMNIST(opt.good_model_path)
        # model = advGAN(whitebox_model, model_store, opt, sess)
        model = advGAN(good_model, evil_model, opt, sess)

        min_adv_accuracy = 10e10
        max_accuracy_diff = -np.inf

        # summary_dir = "logs/MNIST/g_%d_ld_%d_gl_%d_L2_%.2f_dn_%d" % (
        #     opt.G_lambda, opt.ld, opt.good_loss_coeff,
        #     opt.L2_lambda, opt.d_train_num)

        summary_dir = "logs/MNIST/dn_%d_gn_%d" % (opt.d_train_num,
                                                  opt.g_train_num)

        duplicate_num = 0
        while os.path.isdir(summary_dir + '_' + str(duplicate_num) + '/'):
            duplicate_num += 1
        summary_dir += '_' + str(duplicate_num) + '/'
        print 'Creating directory %s for logs.' % summary_dir
        os.mkdir(summary_dir)

        writer = tf.summary.FileWriter(summary_dir, sess.graph)
        loader = Dataset2(train_data, train_label)
        print 'Training data loaded.'

        best_evil_accuracy = -1.0
        best_res_epoch = -1
        best_res = None
        for epoch_num in range(opt.max_epoch):
            print 'Epoch %d' % epoch_num

            # Randomly shuffle the data.
            random_indices = np.arange(train_data.shape[0])
            np.random.shuffle(random_indices)
            train_data = train_data[random_indices, :]
            train_label = train_label[random_indices, :]

            real_buckets = []
            for lbl in range(train_label.shape[1]):
                real_buckets.append(np.where(train_label[:, lbl] == 1)[0])

            # Mini-batch Gradient Descent.
            batch_no = 0
            while (batch_no * batch_size) < train_data.shape[0]:
                head = batch_no * batch_size
                if head + batch_size <= train_data.shape[0]:
                    tail = head + batch_size
                else:
                    tail = train_data.shape[0]
                    head = train_data.shape[0] - batch_size

                feed_data = train_data[head:tail, :]
                evil_labels = train_label[head:tail, :]
                good_labels = odd_even_labels(evil_labels)

                # Finding randomly sampled real data.
                real_data = np.zeros_like(feed_data)
                # Indices of training batch with specific label.
                # label_indices[i] = indices of feed data, that have evil_label[i] == 1.
                label_indices = [np.where(evil_labels[:, lbl] == 1)[0] \
                    for lbl in range(evil_labels.shape[1])]

                for lbl in range(evil_labels.shape[1]):
                    # We take a random sample of size |label_indices[lbl]|
                    # from the real bucket of `lbl`.
                    selected_real_data = np.random.choice(
                        real_buckets[lbl], label_indices[lbl].shape[0])

                    # We put this random sample in the same index of their
                    # corresponding batch training data.
                    real_data[label_indices[lbl], :] = train_data[
                        selected_real_data, :]

                feed = {
                    model.source: feed_data,
                    model.target: real_data,
                    model.good_labels: good_labels,
                    model.evil_labels: evil_labels
                }

                # Train G.
                for _ in range(opt.g_train_num):
                    summary_str, G_loss, gan_loss, hinge_loss, l1_loss, l2_loss, \
                        good_fn_loss, evil_fn_loss, adv_loss, total_loss, _ = sess.run([
                            model.total_loss_merge_sum,
                            model.g_loss,
                            model.gan_loss,
                            model.hinge_loss,
                            model.l1_loss,
                            model.l2_loss,
                            model.good_fn_loss,
                            model.evil_fn_loss,
                            model.adv_loss,
                            model.total_loss,
                            model.G_train_op], feed)
                    writer.add_summary(summary_str, global_step)

                # Train D.
                for _ in range(opt.d_train_num):
                    summary_str, D_loss, _ = sess.run([
                        model.total_loss_merge_sum, model.d_loss,
                        model.D_pre_train_op
                    ], feed)
                    writer.add_summary(summary_str, global_step)

                global_step += 1
                batch_no += 1

            # Validation after each trainig epoch.
            print '\tD: %.4f, G: %.4f\n\thinge(%.1f): %.4f, L1(%.1f): %.4f, L2(%.1f): %.4f' % (
                D_loss, G_loss, opt.H_lambda, hinge_loss, opt.L1_lambda,
                l1_loss, opt.L2_lambda, l2_loss)
            print '\t\tGAN total loss: %.4f' % gan_loss
            print '\tGood: %.4f, Evil: %.4f' % (good_fn_loss, evil_fn_loss)
            print '\tAdv: %.4f, Total: %.4f' % (adv_loss, total_loss)

            new_pred_data = []
            head = 0
            last_batch = False
            while head < test_data.shape[0]:
                if head + batch_size <= test_data.shape[0]:
                    tail = head + batch_size
                else:
                    tail = test_data.shape[0]
                    head = test_data.shape[0] - batch_size
                    last_batch = True
                if opt.cgan_gen:
                    cur_data = sess.run(
                        model.fake_images_sample,
                        {model.evil_labels: test_label[head:tail, :]})
                else:
                    cur_data = sess.run(
                        model.fake_images_sample,
                        {model.source: test_data[head:tail, :]})

                if last_batch:
                    new_pred_data.append(
                        cur_data[-(test_data.shape[0] % batch_size):, :])
                else:
                    new_pred_data.append(cur_data)
                head += batch_size
            new_pred_data = np.concatenate(new_pred_data)

            good_pred = np.argmax(
                model.good_model.model.predict(new_pred_data), axis=1)
            evil_pred = np.argmax(
                model.evil_model.model.predict(new_pred_data), axis=1)
            evil_true = np.argmax(test_label, axis=1)
            good_true = np.argmax(odd_even_labels(test_label), axis=1)

            good_accuracy = accuracy_score(good_true, good_pred)
            evil_accuracy = accuracy_score(evil_true, evil_pred)
            total_good_confusion = confusion_matrix(good_true, good_pred)
            total_evil_confusion = confusion_matrix(evil_true,
                                                    evil_pred,
                                                    labels=range(
                                                        opt.evil_label_num))

            print '\tGood Accuracy: %.4f, Evil Accuracy: %.4f' % (
                good_accuracy, evil_accuracy)
            print '\tAccuracy diff: %f' % (good_accuracy - evil_accuracy)
            print 'Good confusion matrix:'
            print total_good_confusion
            print 'Evil confusion matrix:'
            print total_evil_confusion

            # Creating snapshots to save.
            if opt.cgan_gen:
                fake_samples = sess.run(model.fake_images_sample,
                                        {model.evil_labels: output_labels})
            else:
                fake_samples, fake_noise = sess.run(
                    [model.fake_images_sample, model.sample_noise],
                    {model.source: output_samples})
            max_accuracy_diff = good_accuracy - evil_accuracy

            fakes = merge(fake_samples[:100, :], [10, 10])
            separator = np.ones((280, 2))
            original = merge(output_samples[:100].reshape(-1, 28, 28, 1),
                             [10, 10])

            if opt.cgan_gen:
                scipy.misc.imsave(
                    'snapshot_%d.png' % epoch_num,
                    np.concatenate([fakes, separator, original], axis=1))
            else:
                noise = merge(fake_noise[:100], [10, 10])
                scipy.misc.imsave(
                    'snapshot_%d.png' % epoch_num,
                    np.concatenate([fakes, noise, original], axis=1))

            # Only for the purpose of finding best D and G training times.
            if evil_accuracy > best_evil_accuracy:
                best_evil_accuracy = evil_accuracy
                best_res_epoch = epoch_num
                if opt.cgan_gen:
                    best_res = np.concatenate([fakes, separator, original],
                                              axis=1)
                else:
                    best_res = np.concatenate([fakes, noise, original], axis=1)

        best_image_path = 'best_dn_%d_gn_%d_%d_epoch_%d.png' % \
            (opt.d_train_num, opt.g_train_num, duplicate_num, best_res_epoch)
        scipy.misc.imsave(best_image_path, best_res)

        # print 'Maximum iterations: %d' % opt.max_iteration
        # while iteration < opt.max_iteration:
        #     # this function returns (data, label, np.array(target)).
        #     # data = loader.next_batch(batch_size, negative=False)
        #     feed_data, evil_labels, real_data = loader.next_batch(
        #         batch_size, negative=False)
        #     good_labels = odd_even_labels(evil_labels)

        #     feed = {
        #         model.source: feed_data,
        #         model.target: real_data,
        #         model.good_labels: good_labels,
        #         model.evil_labels: evil_labels
        #     }

        #     # if opt.cgan_gen:
        #     #     feed[model.label_clue] = label_clue[evil_labels.argmax(axis=1)]

        #     # Training G once.
        #     # summary_str, G_loss, _ = sess.run(
        #     #     [model.total_loss_merge_sum, model.g_loss, model.G_train_op], feed)
        #     # writer.add_summary(summary_str, iteration)

        #     # Training G twice.
        #     summary_str, G_loss, gan_loss, hinge_loss, l1_loss, l2_loss, \
        #         good_fn_loss, evil_fn_loss, adv_loss, total_loss, _ = sess.run([
        #             model.total_loss_merge_sum,
        #             model.g_loss,
        #             model.gan_loss,
        #             model.hinge_loss,
        #             model.l1_loss,
        #             model.l2_loss,
        #             model.good_fn_loss,
        #             model.evil_fn_loss,
        #             model.adv_loss,
        #             model.total_loss,
        #             model.G_train_op], feed)
        #     writer.add_summary(summary_str, iteration)

        #     # Training D.
        #     for _ in range(opt.d_train_num):
        #         summary_str, D_loss, _ = sess.run(
        #             [model.total_loss_merge_sum, model.d_loss, model.D_pre_train_op], feed)
        #         writer.add_summary(summary_str, iteration)

        #     if iteration % opt.losses_log_every == 0:

        #     # if iteration != 0 and iteration % opt.save_checkpoint_every == 0:
        #         # checkpoint_path = os.path.join(opt.checkpoint_path, 'checkpoint.ckpt')
        #         # print 'Saving the model in "%s"' % checkpoint_path

        #         # model.saver.save(sess, checkpoint_path, global_step=iteration)
        #         # test_loader = Dataset2(test_data, test_label)

        #         # test_num = test_loader._num_examples
        #         # test_iter_num = (test_num - batch_size) / batch_size

        #         # total_evil_accuracy = 0.0
        #         # total_good_accuracy = 0.0

        #         # fake_samples = [[] for _ in range(test_loader._num_labels)]
        #         # fake_noise = [[] for _ in range(test_loader._num_labels)]
        #         # original_samples = [[] for _ in range(test_loader._num_labels)]

        #         # for _ in range(test_iter_num):

        #         #     # Loading the next batch of test images
        #         #     test_input_data, test_evil_labels, _ = \
        #         #         test_loader.next_batch(batch_size)
        #         #     evil_categorical_labels = np.argmax(test_evil_labels, axis=1)
        #         #     test_good_labels = odd_even_labels(test_evil_labels)
        #         #     feed = {
        #         #         model.source: test_input_data,
        #         #         model.evil_labels: test_evil_labels,
        #         #         model.good_labels: test_good_labels
        #         #     }

        #         #     # if opt.cgan_gen:
        #         #     #     feed[model.label_clue] = label_clue[test_evil_labels.argmax(axis=1)]

        #         #     evil_accuracy, good_accuracy = sess.run(
        #         #         [model.evil_accuracy, model.good_accuracy], feed)
        #         #     # We divide the total accuracy by the number of test iterations.
        #         #     total_good_accuracy += good_accuracy
        #         #     total_evil_accuracy += evil_accuracy
        #         #     # print 'Evil accuracy: %.6f\tGood accuracy: %.6f' % (
        #         #     #     evil_accuracy, good_accuracy)
        #         #     # test_accuracy, test_adv_accuracy = sess.run(
        #         #     #     [model.accuracy, model.adv_accuracy], feed)
        #         #     # test_acc += test_accuracy
        #         #     # test_adv_acc += test_adv_accuracy

        #         #     # fake_images, g_x = sess.run(
        #         #     #     [model.fake_images_sample, model.sample_noise],
        #         #     #     {model.source: test_input_data})

        #         #     # for lbl in range(test_loader._num_labels):
        #         #     #     if len(fake_samples[lbl]) < 10:
        #         #     #         idx = np.where(evil_categorical_labels == lbl)[0]
        #         #     #         if idx.shape[0] >= 10:
        #         #     #             fake_samples[lbl] = fake_images[idx[:10]]
        #         #     #             fake_noise[lbl] = g_x[idx[:10]]
        #         #     #             original_samples[lbl] = test_input_data[idx[:10]]

        #         #     # for lbl, sample, noise in zip(test_evil_labels, fake_images, fake_noise):
        #         #     #     if len(fake_samples[lbl]) > 10:
        #         #     #         continue
        #         #     #     fake_samples[lbl].append(sample)
        #         #     #     fake_noise[lbl].append(noise)

        #         #     # pdb.set_trace()
        #         #     # print fake_images.shape

        #         #     # Finding those predicted labels that are equal to the target label
        #         #     # idxs = np.where(out_predict_labels == target_label)[0]
        #         #     # save_images(samples[:100], [10, 10], 'CIFAR10/result2/test_' + str(source_idx) + str(target_idx)+  '_.png')
        #         #     # pdb.set_trace()
        #         #     # show_samples.append(samples)
        #         #     # input_samples.append(s_imgs)
        #         #     # save_samples.append(samples)
        #         #     # if opt.is_advGAN:
        #         #     #     save_samples.append(samples[idxs])
        #         #     # else:
        #         #         # We add all samples.
        #         # # show_samples = np.concatenate(show_samples, axis=0)
        #         # # save_samples = np.concatenate(save_samples, axis=0)
        #         # good_accuracy = total_good_accuracy / float(test_iter_num)
        #         # evil_accuracy = total_evil_accuracy / float(test_iter_num)
        #         # print '\tAccuracy diff: %f' % (good_accuracy - evil_accuracy)
        #         # print '\tGood accuracy %f, Evil accuracy %f' % (
        #         #     good_accuracy, evil_accuracy)

        #         # Resizing the samples to save them later on.
        #         # fake_samples = np.reshape(np.array(fake_samples), [100, -1])
        #         # original_samples = np.reshape(np.array(original_samples), [100, -1])
        #         # fake_noise = np.reshape(np.array(fake_noise), [100, -1])

        #         # if (good_accuracy - evil_accuracy) > max_accuracy_diff:
        #         # test_accuracy = test_acc / float(test_iter_num)
        #         # test_adv_accuracy = test_adv_acc / float(test_iter_num)
        #         # if (good_accuracy - evil_accuracy) > max_accuracy_diff:
        #         #     max_accuracy_diff = good_accuracy - evil_accuracy
        #         # if min_adv_accuracy > test_adv_accuracy:
        #         #     min_adv_accuracy = test_adv_accuracy
        #         # save_images(fake_images[:100], [10, 10], 'fake.png')
        #         # save_images(test_input_data[:100], [10, 10], 'real.png')
        #         # all_idx = np.arange(100)
        #         # odds = np.where((all_idx / 10) % 2 == 1)[0]
        #         # evens = np.where((all_idx / 10) % 2 == 0)[0]
        #         # order = np.concatenate((odds, evens))
        #         # save_images(fake_samples[order], [10, 10], 'best_images.png')
        #         # save_images(fake_noise[order], [10, 10], 'best_noise.png')
        #         # save_images(original_samples[order], [10, 10], 'best_original.png')

        #         # save_anything = True
        #         # Saving the best yet model.
        #         # best_model_path = os.path.join(opt.checkpoint_path, 'best.ckpt')
        #         # print 'Saving the best model yet at "%s"' % best_model_path
        #         # model.saver.save(sess, best_model_path)

        #         # if save_anything is False:
        #         #     # Nothing is saved. We save a version here.
        #         #     save_images(fake_samples[:100], [10, 10], 'last_images.png')
        #         #     save_images(fake_noise[:100], [10, 10], 'last_noise.png')
        #         #     save_anything = True

        #     iteration += 1

        # We can transform the training and test data given in the beginning here.
        # This is only half the actual data.
        if opt.save_data:
            # if opt.cgan_gen:
            raise NotImplementedError(
                'Saving data for CGAN_GEN is not yet implemented.')
コード例 #3
0
ファイル: generate_lfw.py プロジェクト: williamwhe/privateGAN
def train():
    opt = opts.parse_opt()
    opt.input_data = "MNIST"

    img_size = (opt.img_dim, opt.img_dim)
    print 'Dimension of images:', img_size
    train_data, train_label, id_gender = \
        get_30_people_chunk(opt.image_path, 1, gender_meta=True, img_size=img_size)
    test_data, test_label = get_30_people_chunk(opt.image_path,
                                                2,
                                                img_size=img_size)
    names = get_people_names(opt.image_path, 30)

    if opt.balance_data:
        ratio = opt.balance_ratio
        print 'Balancing dataset with ratio %f' % ratio
        train_data, train_label = balance_dataset(train_data, train_label)
        test_data, test_label = balance_dataset(test_data, test_label)

    if opt.balance_gender:
        print train_data.shape, train_label.shape
        print test_data.shape, test_label.shape
        print 'Balancing genders'
        selected_people = []
        for i in range(id_gender.shape[1]):
            indices, = np.where(id_gender[:, i] == 1)
            selected_people.append(np.random.choice(indices, 5, replace=False))
        selected_people = np.concatenate(selected_people)

        print 'Selected people are:'
        print np.array(names)[selected_people]

        selected_imgs = train_label[:, selected_people].sum(axis=1) != 0
        train_data = train_data[selected_imgs, :]
        train_label = train_label[selected_imgs, :]

        selected_imgs = test_label[:, selected_people].sum(axis=1) != 0
        test_data = test_data[selected_imgs, :]
        test_label = test_label[selected_imgs, :]

    print 'Shape of data:'
    print '\tTraining data: ' + str(train_data.shape)
    print '\tTraining label: ' + str(train_label.shape)
    print '\tMax, Min Train: %.4f, %.4f' % (np.max(train_data),
                                            np.min(train_data))
    print '\tTest data: ' + str(test_data.shape)
    print '\tTest label: ' + str(test_label.shape)
    print '\tMax, Min Test: %.4f, %.4f' % (np.max(test_data),
                                           np.min(test_data))

    x_dim = train_data.shape[1]
    y_dim = train_label.shape[1]

    opt.input_c_dim = 3
    opt.output_c_dim = 3
    opt.input_dim = x_dim
    opt.label_dim = y_dim
    input_shape = (x_dim, x_dim, opt.input_c_dim)

    batch_size = opt.batch_size
    print 'Batch size: %d' % batch_size

    NUM_REPR = 5
    NUM_SAMPLES_EACH = int(batch_size / NUM_REPR / 2)
    output_samples = get_output_samples(train_data, train_label, id_gender,
                                        NUM_REPR, NUM_SAMPLES_EACH)

    NUM_THREADS = 2
    tf_config = tf.ConfigProto()
    tf_config.intra_op_parallelism_threads = NUM_THREADS
    tf_config.gpu_options.allow_growth = True

    iteration_time = []
    with tf.Session(config=tf_config) as sess:

        id_model_path = '%s_%d_id_0' % (opt.lfw_base_path, x_dim)
        print '\tRetrieving evil model from "%s"' % id_model_path
        evil_model = FaceRecognizer(id_model_path, train_label.shape[1],
                                    input_shape, opt.input_c_dim)

        gender_model_path = '%s_%d_gender_0' % (opt.lfw_base_path, x_dim)
        print '\tRetrieving good model from "%s"' % gender_model_path
        good_model = FaceRecognizer(gender_model_path, 2, input_shape,
                                    opt.input_c_dim)
        model = advGAN(good_model, evil_model, opt, sess, mnist=False)

        iteration = 0
        if opt.resnet_gen:
            generator_mode = 'ResNet'
        else:
            generator_mode = 'Regular'
        summary_dir = "logs/LFW/g_%d_ld_%d_gl_%d_L2_%.2f_lr_%.4f_%s/" % (
            opt.G_lambda, opt.ld, opt.good_loss_coeff, opt.L2_lambda,
            opt.learning_rate, generator_mode)
        if os.path.isdir(summary_dir) is False:
            print 'Creating directory %s for logs.' % summary_dir
            os.mkdir(summary_dir)
        # else:
        #     print 'Removing all files in %s' % (summary_dir + '*')
        #     shutil.rmtree(summary_dir)

        writer = tf.summary.FileWriter(summary_dir, sess.graph)
        loader = Dataset2(train_data, train_label)
        print 'Training data loaded.'

        print 'Maximum iterations: %d' % opt.max_iteration
        max_acc_diff = -1.0
        while iteration < opt.max_iteration:
            # this function returns (data, label, np.array(target)).
            feed_data, evil_labels, real_data = loader.next_batch(
                batch_size, negative=False)
            good_labels = id_gender[np.argmax(evil_labels, axis=1)]

            feed = {
                model.source: feed_data,
                model.target: real_data,
                model.good_labels: good_labels,
                model.evil_labels: evil_labels
            }

            # Training G once.
            summary_str, G_loss, _ = sess.run(
                [model.total_loss_merge_sum, model.g_loss, model.G_train_op],
                feed)
            writer.add_summary(summary_str, iteration)

            # Training G twice.
            summary_str, G_loss, gan_loss, hinge_loss, l1_loss, l2_loss, \
                good_fn_loss, evil_fn_loss, adv_loss, total_loss, _ = sess.run([
                    model.total_loss_merge_sum,
                    model.g_loss,
                    model.gan_loss,
                    model.hinge_loss,
                    model.l1_loss,
                    model.l2_loss,
                    model.good_fn_loss,
                    model.evil_fn_loss,
                    model.adv_loss,
                    model.total_loss,
                    model.G_train_op], feed)
            writer.add_summary(summary_str, iteration)

            # Training D.
            summary_str, D_loss, _ = \
                sess.run([model.total_loss_merge_sum, model.d_loss, model.D_pre_train_op], feed)
            writer.add_summary(summary_str, iteration)

            if iteration % opt.losses_log_every == 0:
                print "iteration: ", iteration
                print '\tD: %.4f, G: %.4f\n\thinge(%.2f): %.4f, L1(%.2f): %.4f, L2(%.2f): %.4f' % (
                    D_loss, G_loss, opt.H_lambda, hinge_loss, opt.L1_lambda,
                    l1_loss, opt.L2_lambda, l2_loss)
                print '\t\tGAN total loss: %.4f' % gan_loss
                print '\tGood: %.4f, Evil: %.4f' % (good_fn_loss, evil_fn_loss)
                print '\tAdv: %.4f, Total: %.4f' % (adv_loss, total_loss)

                new_test_data = []
                new_pred_data = []
                head = 0
                last_batch = False
                while head < test_data.shape[0]:
                    if head + batch_size <= test_data.shape[0]:
                        tail = head + batch_size
                    else:
                        tail = test_data.shape[0]
                        head = test_data.shape[0] - batch_size
                        last_batch = True
                    cur_data, pred_data = sess.run(
                        [model.fake_images_output, model.prediction_ready],
                        {model.source: test_data[head:tail, :]})

                    if last_batch:
                        new_test_data.append(
                            cur_data[-(test_data.shape[0] % batch_size):, :])
                        new_pred_data.append(
                            pred_data[-(test_data.shape[0] % batch_size):, :])
                    else:
                        new_test_data.append(cur_data)
                        new_pred_data.append(pred_data)
                    head += batch_size
                new_test_data = np.concatenate(new_test_data)
                new_pred_data = np.concatenate(new_pred_data)

                good_pred = np.argmax(
                    model.good_model.model.predict(new_pred_data), axis=1)
                evil_pred = np.argmax(
                    model.evil_model.model.predict(new_pred_data), axis=1)
                evil_true = np.argmax(test_label, axis=1)
                good_true = np.argmax(id_gender[evil_true, :], axis=1)

                good_accuracy = accuracy_score(good_true, good_pred)
                evil_accuracy = accuracy_score(evil_true, evil_pred)
                total_good_confusion = confusion_matrix(good_true, good_pred)
                total_evil_confusion = confusion_matrix(
                    evil_true, evil_pred, labels=range(opt.evil_label_num))

                print '\tGood Accuracy: %.4f, Evil Accuracy: %.4f' % (
                    good_accuracy, evil_accuracy)
                print '\tAccuracy diff: %f' % (good_accuracy - evil_accuracy)
                print 'Good confusion matrix:'
                print total_good_confusion
                evil_misclass = total_evil_confusion.sum(
                    axis=0) - np.diag(total_evil_confusion)
                evil_idxs = np.argsort(-evil_misclass)
                print 'Top 3 Misclassifications:'
                print np.array(names)[evil_idxs][:3]
                print evil_misclass[evil_idxs][:3]
                evil_tp = np.diag(total_evil_confusion)
                evil_idxs = np.argsort(-evil_tp)
                print 'Top 3 True classifications:'
                print np.array(names)[evil_idxs][:3]
                print evil_tp[evil_idxs][:3]

                # print 'Selected people are:'
                # print names[evil_idxs].tolist()
                # print evil_tp
                # print total_evil_confusion
                # print evil_idxs

                fake_samples, fake_noise = sess.run(
                    [model.fake_images_output, model.fake_noise_output],
                    {model.source: output_samples})

                fakes = merge(fake_samples, [2 * NUM_REPR, NUM_SAMPLES_EACH])
                original = merge(output_samples,
                                 [2 * NUM_REPR, NUM_SAMPLES_EACH])
                noise = merge(fake_noise, [2 * NUM_REPR, NUM_SAMPLES_EACH])
                final_image = np.concatenate([fakes, noise, original], axis=1)

                scipy_imsave('snapshot_%d.png' % iteration, final_image)

                if (good_accuracy - evil_accuracy) > max(0.5, max_acc_diff):
                    print '\tSaving new training data at accuracy diff: %.4f' % (
                        good_accuracy - evil_accuracy),
                    max_acc_diff = good_accuracy - evil_accuracy

                    # other_good = FaceRecognizer('%s_%d_gender_0' % (opt.lfw_base_path, x_dim),
                    #                             2, input_shape, opt.input_c_dim)

                    # other_pred = np.argmax(other_good.model.predict(new_pred_data), axis=1)
                    # print 'Other Good accuracy: %.4f' % accuracy_score(good_true, other_pred)

                    # other_pred = np.argmax(other_good.model.predict(
                    #     preprocess_images(new_test_data * 255.0)), axis=1)
                    # print '\tTest data processeced accuracy: %.4f' % \
                    #     accuracy_score(good_true, other_pred)

                    # other_evil = FaceRecognizer('%s_%d_id_0' % (opt.lfw_base_path, x_dim),
                    #                             34, input_shape, opt.input_c_dim)
                    # other_pred = np.argmax(other_evil.model.predict(new_pred_data), axis=1)
                    # print 'Other Evil accuracy: %.4f' % accuracy_score(evil_true, other_pred)
                    # other_pred = np.argmax(other_evil.model.predict(
                    #     preprocess_images(new_test_data * 255.0)), axis=1)
                    # print '\tTest data processeced accuracy: %.4f' % \
                    #     accuracy_score(evil_true, other_pred)

                    new_train_data = []
                    head = 0
                    last_batch = False
                    while head < train_data.shape[0]:
                        if head + batch_size <= train_data.shape[0]:
                            tail = head + batch_size
                        else:
                            tail = train_data.shape[0]
                            head = train_data.shape[0] - batch_size
                            last_batch = True
                        cur_data = sess.run(
                            model.fake_images_output,
                            {model.source: train_data[head:tail, :]})

                        if last_batch:
                            new_train_data.append(
                                cur_data[-(train_data.shape[0] %
                                           batch_size):, :])
                        else:
                            new_train_data.append(cur_data)
                        head += batch_size
                    new_train_data = np.concatenate(new_train_data)

                    np.savez_compressed(opt.output_path,
                                        train_data=new_train_data,
                                        org_train_data=train_data,
                                        train_label=train_label,
                                        test_data=new_test_data,
                                        org_test_data=test_data,
                                        test_label=test_label,
                                        id_gender=id_gender)
                    print '\t[DONE]'

            iteration += 1
コード例 #4
0
batch_size = 6
NUM_CLASSES = batch_size

# Regression Network Parameters
num_input = 577 #  data input (64 * 3 * 3 + 1)
n_hidden_1 = 1024 # 1st layer number of neurons
n_hidden_2 = 256 # 2nd layer number of neurons
n_hidden_3 = 1024 # 3rd layder number of neurous
alpha = 0.01 # alpha for leaky relu

# tf Graph input
X = tf.placeholder("float", [batch_size, num_input])
phase = tf.placeholder(tf.bool, name='phase')
global_step = tf.Variable(0, trainable=False)
## input data
data = Dataset2("", val = False, defstat = True, numC = NUM_CLASSES)
max_xtd_out = tf.constant(data.max_xtd_out)
min_xtd_out = tf.constant(data.min_xtd_out)
max_xtd_in = tf.constant(data.max_xtd_in)
min_xtd_in = tf.constant(data.min_xtd_in)

# network architecture
X_norm = (X - min_xtd_in) / (max_xtd_in - min_xtd_in)
w_pred = regress_net(X_norm, phase, num_input, n_hidden_1, n_hidden_2, n_hidden_3)
w_pred_denorm = w_pred * (max_xtd_out - min_xtd_out) + min_xtd_out


gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=0.2)
init = tf.global_variables_initializer()
with tf.Session(config=tf.ConfigProto(gpu_options=gpu_options)) as sess:
global_step = tf.Variable(0, trainable=False)
## decreasing weights for regression loss
lambda_tf = tf.train.exponential_decay(lambda1,
                                       global_step,
                                       1000,
                                       0.8,
                                       staircase=True)
lambda_tf = tf.maximum(tf.constant(lambda_min), lambda_tf)
# decreasing learning rate
lr = tf.train.exponential_decay(learning_rate,
                                global_step,
                                5000,
                                0.95,
                                staircase=True)
## input data
data = Dataset2(args.pair, val=False, defstat=True, numC=NUM_CLASSES)

# stat from data
max_xtd_out = tf.constant(data.max_xtd_out)
min_xtd_out = tf.constant(data.min_xtd_out)
max_xtd_in = tf.constant(data.max_xtd_in)
min_xtd_in = tf.constant(data.min_xtd_in)

##network architecture
w_pred = regress_net(X, phase, num_input, n_hidden_1, n_hidden_2, n_hidden_3)
w_pred_denorm = w_pred * (max_xtd_out - min_xtd_out) + min_xtd_out
y_pred_seg_pred = prediction_net(F, w_pred_denorm)
y_pred_seg_pred_prob = tf.nn.softmax(y_pred_seg_pred, dim=1)
w_true = Y

## define loss and optimizer
コード例 #6
0
        dataset_index = dataset.get_index()
        n_clusters = dataset.get_n_clusters()
        sill_scores_csv_file_path = get_csv_file_path(num_random_stats,
                                                      dataset_index,
                                                      n_clusters)

        result_df = GlobalFunctions.get_df_by_path(sill_scores_csv_file_path)

        stat_test_results_df, sorted_df = sort_df_by_stat_test(result_df)

        ############################# save stat test results
        file_name = f"{num_random_stats}RandomStatesWith{n_clusters}ClustersStatisiticalTestResults.csv"
        csv_file_path = get_csv_file_path(len(self.randomStateList),
                                          dataset_index, n_clusters, file_name)
        print(stat_test_results_df)
        stat_test_results_df.to_csv(csv_file_path)

        ############################ save sorted.
        file_name = f"{num_random_stats}RandomStatesWith{n_clusters}ClustersSorted.csv"
        csv_file_path = get_csv_file_path(len(self.randomStateList),
                                          dataset_index, n_clusters, file_name)
        sorted_df.loc['mean'] = sorted_df.mean()
        sorted_df.to_csv(csv_file_path)


if __name__ == "__main__":
    ST = StatisticalTest()
    ds = Dataset2()
    ds.prepareDataset()
    ST.createCSV(ds)
コード例 #7
0
        x = np.arange(len(labels))
        width = 0.3
        if self.is_remove_outliers:
            ax[i, j].bar(x - width / 2,
                         algoNameSilhouetteScoreDict.values(),
                         width=width,
                         color='black',
                         label='All Data')
            ax[i, j].bar(x + width / 2,
                         clean_data_algo_name_sil_score_dict.values(),
                         width=width,
                         label='Data With Anomalous Points Removed')
            ax[i, j].set_xticks(x)
            ax[i, j].set_xticklabels(labels)
            ax[i, j].legend()
        else:
            ax[i, j].bar(x, algoNameSilhouetteScoreDict.values())
        ax[i, j].set_title(f"Silhouette Score", fontsize=fontsize)

        # ---------- Save Plot ----------
        plot_file_path = self.get_plot_file_path(randomState, datasetIndex)
        plt.savefig(plot_file_path)
        plt.close()


# Due to some memory interference issues with saving the figure and acessing the CSV file for loading the data, this function can't loop over the datasets and save their figures in one run.
if __name__ == '__main__':
    plotClusters = PlotClusters()
    # plotClusters.plotAndSaveOne(Dataset1())
    plotClusters.plotAndSaveOne(Dataset2())
コード例 #8
0
from Dataset1 import Dataset1
from Dataset2 import Dataset2

global dataset_obj_list

dataset_obj_list = [Dataset1(), Dataset2()]