def main(parameters = settings.parameters, regenerate_parameters_cache=False):
    step = 0.01
    choice_K = np.arange(step, 2 + step, step)  # Let's try those K.

    letter_A_samples, _ = generate_data.load_A_letters(parameters=parameters)
    kernel_tsne_mapping = kernelized_tsne.generate_kernelized_tsne_mapping_function(
        parameters=parameters,
        regenerate_parameters_cache=regenerate_parameters_cache
    )
    kernelized_detailed_tsne_letter_As_results = list()
    kernelized_detailed_tsne_method_list = ["Kernelized tSNE; K=%.2f" % (k) for k in choice_K]
    kernelized_detailed_tsne_time = np.zeros((len(kernelized_detailed_tsne_method_list),))

    for j in range(len(choice_K)):
        k = choice_K[j]
        logging.info("%f", k)

        embedder_start_time = datetime.datetime.now()
        kernelized_detailed_tsne_letter_As_results.append(kernel_tsne_mapping(letter_A_samples, k=k))
        embedder_end_time = datetime.datetime.now()
        kernelized_detailed_tsne_time[j] = (embedder_end_time - embedder_start_time).total_seconds()
        logging.info("%f complete: %f s", k, kernelized_detailed_tsne_time[j])

    output_file = generate_letter_A_results_filename(parameters=parameters)
    with open(output_file,'wb') as f:
            pickle.dump((kernelized_detailed_tsne_letter_As_results, kernelized_detailed_tsne_time,
                         kernelized_detailed_tsne_method_list), f)
def main(regenerate_model1=False,
         regenerate_model2=False,
         regenerate_model3=False,
         parameters=settings.parameters):
    letter_A_samples, _ = generate_data.load_A_letters(parameters=parameters)

    models_and_results = neural_network_commons.train_or_load_models(
        regenerate_model1=regenerate_model1,
        regenerate_model3=regenerate_model3,
        regenerate_model2=regenerate_model2,
        parameters=parameters)

    model1, model2, model3 = models_and_results["models"]
    Y_nn1_mnist, Y_nn2_mnist, Y_nn3_mnist = models_and_results["Y_predicted"]

    Y_outl1_mnist = model1.predict(letter_A_samples)
    Y_outl2_mnist = model2.predict(letter_A_samples)
    Y_outl3_mnist = model3.predict(letter_A_samples)

    nn_models_orig = [Y_nn1_mnist, Y_nn2_mnist, Y_nn3_mnist]
    nn_method_list = [
        'NN - 2L; 250N; ReLu; D0.25', 'NN - 2L; 500N; ReLu; D0.5',
        'NN - 1L; 500N; tanh'
    ]

    nn_letter_As_results = [Y_outl1_mnist, Y_outl2_mnist, Y_outl3_mnist]
    output_file = generate_letter_A_results_filename(parameters)

    with open(output_file, 'wb') as f:
        pickle.dump((nn_letter_As_results, nn_models_orig, nn_method_list), f)
def get_common_info(parameters):
    res = {}
    res['dTSNE_mnist'] = generate_data.load_dtsne_mnist(parameters=parameters)
    res['X_mnist'] = generate_data.load_x_mnist(parameters=parameters)
    res['Y_mnist'] = generate_data.load_y_mnist(parameters=parameters)
    letter_A_samples, _ = generate_data.load_A_letters(parameters=parameters)
    res['letter_A_samples'] = letter_A_samples
    D_Y = distance.squareform(distance.pdist(res['Y_mnist']))
    # Now find distance to closest neighbor
    np.fill_diagonal(D_Y, np.inf)  # ... but not to itself
    res['nearest_neighbors_y_dist'] = np.min(D_Y,
                                             axis=1)  # Actually, whatever axis
    return res
def main(parameters=settings.parameters):
    dTSNE_mnist = generate_data.load_dtsne_mnist(parameters=parameters)
    Y_mnist = generate_data.load_y_mnist(parameters=parameters)
    letter_A_samples, _ = generate_data.load_A_letters(parameters=parameters)
    X_mnist = generate_data.load_x_mnist(parameters=parameters)

    D_Y = distance.squareform(distance.pdist(Y_mnist))
    # Now find distance to closest neighbor
    np.fill_diagonal(D_Y, np.inf)  # ... but not to itself
    nearest_neighbors_y_dist = np.min(D_Y, axis=1)  # Actually, whatever axis

    # ============== KL Divergence
    gd_method_list = [
        r'Closest $Y_{init}$', r'Random $Y_{init}$',
        r'Closest $Y_{init}$; new $\sigma$',
        r'Random $Y_{init}$; new $\sigma$', r'Closest $Y_{init}$; EE',
        r'Random $Y_{init}$; EE', r'Closest $Y_{init}$; new $\sigma$; EE',
        r'Random $Y_{init}$; new $\sigma$; EE'
    ]

    gd_results_file = exp_letter_A_test_GD.generate_letter_A_results_filename(
        parameters=parameters)
    with open(gd_results_file, 'rb') as f:
        (letters_A_y_gd_transformed,
         letters_A_y_gd_variance_recalc_transformed,
         letters_A_y_gd_transformed_random,
         letters_A_y_gd_variance_recalc_transformed_random,
         letters_A_y_gd_early_exagg_transformed_random,
         letters_A_y_gd_early_exagg_transformed,
         letters_A_y_gd_variance_recalc_early_exagg_transformed_random,
         picked_random_starting_positions,
         letters_A_y_gd_variance_recalc_early_exagg_transformed,
         covered_samples) = pickle.load(f)

    gd_letters_A_results = [
        letters_A_y_gd_transformed,
        letters_A_y_gd_transformed_random,
        letters_A_y_gd_variance_recalc_transformed,
        letters_A_y_gd_variance_recalc_transformed_random,
        letters_A_y_gd_early_exagg_transformed,
        letters_A_y_gd_early_exagg_transformed_random,
        letters_A_y_gd_variance_recalc_early_exagg_transformed,
        letters_A_y_gd_variance_recalc_early_exagg_transformed_random,
    ]

    gd_letters_A_kl = np.zeros((len(gd_method_list), len(letter_A_samples)))

    input_time_file = exp_letter_A_test_GD.generate_time_results_filename(
        parameters)
    with open(input_time_file, 'rb') as f:
        letter_As_y_time_gd_transformed, letter_As_y_time_gd_variance_recalc_transformed, \
        letter_As_y_time_gd_transformed_random, \
        letter_As_y_time_gd_variance_recalc_transformed_random, \
        letter_As_y_time_gd_early_exagg_transformed_random, \
        letter_As_y_time_gd_early_exagg_transformed, \
        letter_As_y_time_gd_variance_recalc_early_exagg_transformed_random, \
        letter_As_y_time_gd_variance_recalc_early_exagg_transformed, covered_samples = pickle.load(f)

    gd_time = [
        np.mean(letter_As_y_time_gd_transformed),
        np.mean(letter_As_y_time_gd_transformed_random),
        np.mean(letter_As_y_time_gd_variance_recalc_transformed),
        np.mean(letter_As_y_time_gd_variance_recalc_transformed_random),
        np.mean(letter_As_y_time_gd_early_exagg_transformed),
        np.mean(letter_As_y_time_gd_early_exagg_transformed_random),
        np.mean(letter_As_y_time_gd_variance_recalc_early_exagg_transformed),
        np.mean(
            letter_As_y_time_gd_variance_recalc_early_exagg_transformed_random
        ),
    ]

    processed_indices = list()

    kl_gd_letters_A_performance_file = generate_gd_kl_temp_filename(parameters)
    if os.path.isfile(kl_gd_letters_A_performance_file):
        with open(kl_gd_letters_A_performance_file, 'rb') as f:
            gd_letters_A_kl, processed_indices = pickle.load(f)

    # KL divergence increase for all 1000 samples is very slow to calculate. Main part of that is calculating P-matrix.
    per_sample_KL = np.zeros((len(letter_A_samples), ))
    for i in range(len(letter_A_samples)):
        if i in processed_indices:
            logging.info("Sample %d already processed. Results loaded.", i)
            continue
        logging.info("Processing sample %d", i)
        distance_matrix_dir = distance_matrix_dir_prefix + generate_data.combine_prefixes(
            settings.tsne_parameter_set | settings.letter_A_parameter_set,
            parameters, os.sep)
        distance_matrix_file = distance_matrix_dir + 'item' + str(i) + '.p'
        # Make sure you can load them one-by-one.
        if os.path.isfile(distance_matrix_file):
            logging.info("\tP-matrix file found. Loading.")
            with open(distance_matrix_file, 'rb') as f:
                new_P, _ = pickle.load(f)
        else:
            logging.info("\tP-matrix file not found. Creating and saving.")
            new_X = np.concatenate((X_mnist, letter_A_samples[i, :].reshape(
                (1, -1))),
                                   axis=0)
            new_D = distance.squareform(distance.pdist(new_X))
            new_P, new_sigmas = lion_tsne.get_p_and_sigma(
                distance_matrix=new_D, perplexity=dTSNE_mnist.perplexity)
            with open(distance_matrix_file, 'wb') as f:
                pickle.dump((new_P, new_sigmas), f)
        # For all of methods P-matrix is shared.
        for j in range(len(gd_letters_A_results)):
            # Single file with p matrix
            new_Y = np.concatenate(
                (Y_mnist, gd_letters_A_results[j][i, :].reshape((1, -1))),
                axis=0)
            gd_letters_A_kl[j, i], _ = lion_tsne.kl_divergence_and_gradient(
                p_matrix=new_P, y=new_Y)
        processed_indices.append(i)
        with open(kl_gd_letters_A_performance_file, 'wb') as f:
            pickle.dump((gd_letters_A_kl, processed_indices), f)
    # This should be fast
    gd_avg_letters_A_kl = np.mean(gd_letters_A_kl, axis=1)

    # ============== Distance percentiles
    gd_letters_A_percentiles_matrix = np.zeros(
        (len(letter_A_samples), len(gd_method_list)))
    gd_letters_A_distance_matrix = np.zeros(
        (len(letter_A_samples), len(gd_method_list)))
    for i in range(len(letter_A_samples)):
        for j in range(len(gd_method_list)):
            y = gd_letters_A_results[j][i, :]
            nn_dist = np.min(np.sqrt(np.sum((Y_mnist - y)**2, axis=1)))
            gd_letters_A_distance_matrix[i, j] = nn_dist
            gd_letters_A_percentiles_matrix[i, j] = stats.percentileofscore(
                nearest_neighbors_y_dist, nn_dist)
    gd_letters_A_distance_percentiles = np.mean(
        gd_letters_A_percentiles_matrix, axis=0)
    gd_letters_A_distances = np.mean(gd_letters_A_distance_matrix, axis=0)
    for j in range(len(gd_method_list)):
        logging.info("%s: %f, %f", gd_method_list[j],
                     gd_letters_A_distances[j],
                     gd_letters_A_distance_percentiles[j])

    output_file = generate_gd_postprocess_filename(parameters)
    with open(output_file, "wb") as f:
        pickle.dump((gd_method_list, gd_time, gd_avg_letters_A_kl,
                     gd_letters_A_distance_percentiles), f)
import matplotlib.pyplot as plt
import generate_data
import settings
import logging

logging.basicConfig(level=logging.INFO)

_, letter_A_samples_raw = generate_data.load_A_letters(
    parameters=settings.parameters)

width = 10  #total number to show
height = 1
start_index = 0

f, ax = plt.subplots(height, width, dpi=300)
f.set_size_inches(
    3.3, 0.33)  # 3.3, 1 - 3 rows, 3.3, 0.66 - 2 rows, 3.3, 0.33 - 1 row
f.subplots_adjust()
#f.tight_layout()
if height > 1:
    for i in range(height):
        for j in range(width):
            ax[i, j].imshow(letter_A_samples_raw[i * width + j, :].reshape(
                28, 28),
                            cmap='gray_r')
            #Set_axis_off does not fit. I want a bounding box.
            ax[i, j].axes.get_xaxis().set_visible(False)
            ax[i, j].axes.get_yaxis().set_visible(False)
else:
    for j in range(width):
        ax[j].imshow(letter_A_samples_raw[j, :].reshape(28, 28), cmap='gray_r')
import generate_data
import settings

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import settings

parameters = settings.parameters
X_mnist_raw = generate_data.load_x_mnist_raw(parameters=parameters)
letters_A, letters_A_raw = generate_data.load_A_letters(parameters=parameters)

print(letters_A_raw.shape, np.max(letters_A_raw[0, :]),
      np.min(letters_A_raw[0, :]))

width = 10  #total number to show in one row
start_index = 0

height = 10  # Number of rows /2 to show. half will go to labels, half to pictures.

f, ax = plt.subplots(height, width)
f.set_size_inches(16, 16)
f.subplots_adjust()
for i in range(int(height)):
    for j in range(width):
        ax[i][j].imshow(letters_A_raw[start_index + width * i + j, :].reshape(
            28, 28),
                        cmap='gray_r')
        #ax[2*i+1][j].text(text=str(letters_A_labels[start_index + width*i + j])

        #str(chr(
def main(parameters=settings.parameters, only_time=False):
    dTSNE_mnist = generate_data.load_dtsne_mnist(parameters=parameters)
    Y_mnist = generate_data.load_y_mnist(parameters=parameters)
    letter_A_samples, _ = generate_data.load_A_letters(parameters=parameters)

    # Doing it from scratch takes REALLY long time. If possible, save results & pre-load

    covered_samples = list()

    first_sample_inc = 0  # Change only if it is one of "Other notebooks just for parallelization"
    last_sample_exclusive = len(letter_A_samples)
    output_file = generate_letter_A_results_filename(parameters)
    output_time_file = generate_time_results_filename(parameters)

    letter_As_y_gd_transformed = np.zeros(
        (len(letter_A_samples), Y_mnist.shape[1]))
    letter_As_y_gd_variance_recalc_transformed = np.zeros(
        (len(letter_A_samples), Y_mnist.shape[1]))
    letter_As_y_gd_transformed_random = np.zeros(
        (len(letter_A_samples), Y_mnist.shape[1]))
    letter_As_y_gd_variance_recalc_transformed_random = np.zeros(
        (len(letter_A_samples), Y_mnist.shape[1]))

    letter_As_y_gd_early_exagg_transformed_random = np.zeros(
        (len(letter_A_samples), Y_mnist.shape[1]))
    letter_As_y_gd_early_exagg_transformed = np.zeros(
        (len(letter_A_samples), Y_mnist.shape[1]))
    letter_As_y_gd_variance_recalc_early_exagg_transformed_random = np.zeros(
        (len(letter_A_samples), Y_mnist.shape[1]))
    letter_As_y_gd_variance_recalc_early_exagg_transformed = np.zeros(
        (len(letter_A_samples), Y_mnist.shape[1]))

    letter_As_random_starting_positions = np.zeros(
        (len(letter_A_samples), Y_mnist.shape[1]))

    letter_As_y_time_gd_transformed = np.zeros((len(letter_A_samples), ))
    letter_As_y_time_gd_variance_recalc_transformed = np.zeros(
        (len(letter_A_samples), ))
    letter_As_y_time_gd_transformed_random = np.zeros(
        (len(letter_A_samples), ))
    letter_As_y_time_gd_variance_recalc_transformed_random = np.zeros(
        (len(letter_A_samples), ))

    letter_As_y_time_gd_early_exagg_transformed_random = np.zeros(
        (len(letter_A_samples), ))
    letter_As_y_time_gd_early_exagg_transformed = np.zeros(
        (len(letter_A_samples), ))
    letter_As_y_time_gd_variance_recalc_early_exagg_transformed_random = np.zeros(
        (len(letter_A_samples), ))
    letter_As_y_time_gd_variance_recalc_early_exagg_transformed = np.zeros(
        (len(letter_A_samples), ))

    for i in range(first_sample_inc, last_sample_exclusive):
        np.random.seed(
            i
        )  # We reset random seed every time. Otherwise, if you load partial results from file, everything
        # will depend on which parts were loaded, random sequence will "shift" depend on that, and reproducibility will be lost.
        # I.e. if put seed(0) before the loop and start from scratch, then you will have some random sequence [abc] for sample 0,
        # other (continuation of that sequence) [def] for sample 1, etc. But if you already loaded sample 0 from file, you will
        # have [abc] for sample 1, [def] for sample 2, etc. Reproducibility should not depend on what parts are loaded.
        # Hence, random seed every time, and it depends on ABSOLUTE sample number.
        logging.info(" ====================== Sample %d \n\n", i)
        if i in covered_samples:
            logging.info("Already loaded.")
        else:
            letter_A = letter_A_samples[i].reshape((1, -1))

            embedder_start_time = datetime.datetime.now()
            letter_As_y_gd_transformed[i, :] = dTSNE_mnist.transform(
                letter_A,
                y='closest',
                verbose=2,
                optimizer_kwargs={'early_exaggeration': None})
            embedder_end_time = datetime.datetime.now()
            letter_As_y_time_gd_transformed[i] = (
                embedder_end_time - embedder_start_time).total_seconds()
            logging.info("Time: %f s", letter_As_y_time_gd_transformed[i])

            embedder_start_time = datetime.datetime.now()
            letter_As_y_gd_variance_recalc_transformed[
                i, :] = dTSNE_mnist.transform(
                    letter_A,
                    keep_sigmas=False,
                    y='closest',
                    verbose=2,
                    optimizer_kwargs={'early_exaggeration': None})
            embedder_end_time = datetime.datetime.now()
            letter_As_y_time_gd_variance_recalc_transformed[i] = \
                (embedder_end_time - embedder_start_time).total_seconds()
            logging.info("Time (VR): %f s",
                         letter_As_y_time_gd_variance_recalc_transformed[i])

            # Let's pick random starts at any point. not necessary near the center.
            y_start = np.array([[
                np.random.uniform(np.min(Y_mnist[:, 0]), np.max(Y_mnist[:,
                                                                        0])),
                np.random.uniform(np.min(Y_mnist[:, 1]), np.max(Y_mnist[:, 1]))
            ]])

            letter_As_random_starting_positions[i, :] = y_start

            embedder_start_time = datetime.datetime.now()
            letter_As_y_gd_transformed_random[i, :] = dTSNE_mnist.transform(
                letter_A,
                y=y_start,  # y='random',
                verbose=2,
                optimizer_kwargs={'early_exaggeration': None})
            embedder_end_time = datetime.datetime.now()
            letter_As_y_time_gd_transformed_random[i] = \
                (embedder_end_time - embedder_start_time).total_seconds()
            logging.info("Time (random): %f s",
                         letter_As_y_time_gd_transformed_random[i])

            embedder_start_time = datetime.datetime.now()
            letter_As_y_gd_variance_recalc_transformed_random[
                i, :] = dTSNE_mnist.transform(
                    letter_A,
                    keep_sigmas=False,
                    y=y_start,
                    # y='random',
                    verbose=2,
                    optimizer_kwargs={'early_exaggeration': None})
            embedder_end_time = datetime.datetime.now()
            letter_As_y_time_gd_variance_recalc_transformed_random[i] = \
                (embedder_end_time - embedder_start_time).total_seconds()
            logging.info(
                "Time (VR, random): %f s",
                letter_As_y_time_gd_variance_recalc_transformed_random[i])

            embedder_start_time = datetime.datetime.now()
            letter_As_y_gd_early_exagg_transformed_random[
                i, :] = dTSNE_mnist.transform(
                    letter_A,
                    y=y_start,
                    # y='random',
                    verbose=2)
            embedder_end_time = datetime.datetime.now()
            letter_As_y_time_gd_early_exagg_transformed_random[i] = \
                (embedder_end_time - embedder_start_time).total_seconds()
            logging.info("Time (EE, random): %f s",
                         letter_As_y_time_gd_early_exagg_transformed_random[i])

            embedder_start_time = datetime.datetime.now()
            letter_As_y_gd_early_exagg_transformed[
                i, :] = dTSNE_mnist.transform(letter_A, y='closest', verbose=2)
            embedder_end_time = datetime.datetime.now()
            letter_As_y_time_gd_early_exagg_transformed[i] = \
                (embedder_end_time - embedder_start_time).total_seconds()
            logging.info("Time (EE): %f s",
                         letter_As_y_time_gd_early_exagg_transformed[i])

            embedder_start_time = datetime.datetime.now()
            letter_As_y_gd_variance_recalc_early_exagg_transformed_random[
                i, :] = dTSNE_mnist.transform(letter_A,
                                              y=y_start,
                                              keep_sigmas=False,
                                              verbose=2)
            embedder_end_time = datetime.datetime.now()
            letter_As_y_time_gd_variance_recalc_early_exagg_transformed_random[i] = \
                (embedder_end_time - embedder_start_time).total_seconds()
            logging.info(
                "Time (VR,EE,random): %f s",
                letter_As_y_time_gd_variance_recalc_early_exagg_transformed_random[
                    i])

            embedder_start_time = datetime.datetime.now()
            letter_As_y_gd_variance_recalc_early_exagg_transformed[
                i, :] = dTSNE_mnist.transform(letter_A,
                                              keep_sigmas=False,
                                              y='closest',
                                              verbose=2)
            embedder_end_time = datetime.datetime.now()
            letter_As_y_time_gd_variance_recalc_early_exagg_transformed[i] = \
                (embedder_end_time - embedder_start_time).total_seconds()
            logging.info(
                "Time (VR,EE): %f s",
                letter_As_y_time_gd_variance_recalc_early_exagg_transformed[i])

        covered_samples.append(i)
        logging.info("Saving...")
        # Gradient descent results take a long while. Let's cache.
        if not only_time:
            with open(output_file, 'wb') as f:
                pickle.dump((
                    letter_As_y_gd_transformed,
                    letter_As_y_gd_variance_recalc_transformed,
                    letter_As_y_gd_transformed_random,
                    letter_As_y_gd_variance_recalc_transformed_random,
                    letter_As_y_gd_early_exagg_transformed_random,
                    letter_As_y_gd_early_exagg_transformed,
                    letter_As_y_gd_variance_recalc_early_exagg_transformed_random,
                    letter_As_random_starting_positions,
                    letter_As_y_gd_variance_recalc_early_exagg_transformed,
                    covered_samples), f)
        with open(output_time_file, 'wb') as f:
            pickle.dump((
                letter_As_y_time_gd_transformed,
                letter_As_y_time_gd_variance_recalc_transformed,
                letter_As_y_time_gd_transformed_random,
                letter_As_y_time_gd_variance_recalc_transformed_random,
                letter_As_y_time_gd_early_exagg_transformed_random,
                letter_As_y_time_gd_early_exagg_transformed,
                letter_As_y_time_gd_variance_recalc_early_exagg_transformed_random,
                letter_As_y_time_gd_variance_recalc_early_exagg_transformed,
                covered_samples), f)
Ejemplo n.º 8
0
def main(parameters=settings.parameters, regenerate=False):
    dTSNE_mnist = generate_data.load_dtsne_mnist(parameters=parameters)
    Y_mnist = generate_data.load_y_mnist(parameters=parameters)
    letter_A_samples, _ = generate_data.load_A_letters(parameters=parameters)
    X_mnist = generate_data.load_x_mnist(parameters=parameters)

    D_Y = distance.squareform(distance.pdist(Y_mnist))
    # Now find distance to closest neighbor
    np.fill_diagonal(D_Y, np.inf)  # ... but not to itself
    nearest_neighbors_y_dist = np.min(D_Y, axis=1)  # Actually, whatever axis

    kernelized_results_file = exp_letter_A_test_kernelized.generate_letter_A_results_filename(
        parameters)
    with open(kernelized_results_file, 'rb') as f:
        kernelized_detailed_method_results, kernelized_detailed_tsne_time, kernelized_detailed_method_list = pickle.load(
            f)
    ind = [4, 24, 49]

    kernelized_method_list = [
        kernelized_detailed_method_list[i][:10] +
        kernelized_detailed_method_list[i][-8:] for i in ind
    ]
    kernelized_letters_results = [
        kernelized_detailed_method_results[i] for i in ind
    ]

    # =========== DISTANCE PERCENTILES ==========
    kernelized_letters_percentiles_matrix = np.zeros(
        (len(letter_A_samples), len(kernelized_method_list)))
    kernelized_letters_distance_matrix = np.zeros(
        (len(letter_A_samples), len(kernelized_method_list)))
    for i in range(len(letter_A_samples)):
        for j in range(len(kernelized_method_list)):
            y = kernelized_letters_results[j][i, :]
            nn_dist = np.min(np.sqrt(np.sum((Y_mnist - y)**2, axis=1)))
            kernelized_letters_distance_matrix[i, j] = nn_dist
            kernelized_letters_percentiles_matrix[i,
                                                  j] = stats.percentileofscore(
                                                      nearest_neighbors_y_dist,
                                                      nn_dist)
    kernelized_letters_distance_percentiles = np.mean(
        kernelized_letters_percentiles_matrix, axis=0)
    kernelized_letters_distances = np.mean(kernelized_letters_distance_matrix,
                                           axis=0)
    kernelized_per_item_time = kernelized_detailed_tsne_time / len(
        letter_A_samples)
    for j in range(len(kernelized_method_list)):
        logging.info("%s: %f, %f", kernelized_method_list[j],
                     kernelized_letters_distances[j],
                     kernelized_letters_distance_percentiles[j])

    kernelized_letters_kl = np.zeros(
        (len(kernelized_method_list), len(letter_A_samples)))
    processed_indices = list()

    kl_kernelized_tsne_letters_performance_file = generate_kernelized_kl_temp_filename(
        parameters)
    if os.path.isfile(
            kl_kernelized_tsne_letters_performance_file) and not regenerate:
        with open(kl_kernelized_tsne_letters_performance_file, 'rb') as f:
            kernelized_letters_kl, processed_indices = pickle.load(f)

    # =========== KL DIVERGENCE ==========
    # KL divergence increase for all 1000 samples is very slow to calculate. Main part of that is calculating P-matrix.
    per_sample_KL = np.zeros((len(letter_A_samples), ))
    for i in range(len(letter_A_samples)):
        if i in processed_indices:
            logging.info("Sample %d already processed. Results loaded.", i)
            continue
        logging.info("Processing sample %d", i)
        distance_matrix_dir = distance_matrix_dir_prefix + generate_data.combine_prefixes(
            settings.tsne_parameter_set | settings.letter_A_parameter_set,
            parameters, os.sep)
        distance_matrix_file = distance_matrix_dir + 'item' + str(j) + '.p'
        # Make sure you can load them one-by-one.
        if os.path.isfile(distance_matrix_file):
            logging.info("\tP-matrix file found. Loading.")
            with open(distance_matrix_file, 'rb') as f:
                new_P, _ = pickle.load(f)
        else:
            logging.info("\tP-matrix file not found. Creating and saving.")
            new_X = np.concatenate((X_mnist, letter_A_samples[i, :].reshape(
                (1, -1))),
                                   axis=0)
            new_D = distance.squareform(distance.pdist(new_X))
            new_P, new_sigmas = lion_tsne.get_p_and_sigma(
                distance_matrix=new_D, perplexity=dTSNE_mnist.perplexity)
            with open(distance_matrix_file, 'wb') as f:
                pickle.dump((new_P, new_sigmas), f)
        # For all of methods P-matrix is shared.
        for j in range(len(kernelized_letters_results)):
            # Single file with p matrix
            new_Y = np.concatenate(
                (Y_mnist, kernelized_letters_results[j][i, :].reshape(
                    (1, -1))),
                axis=0)
            kernelized_letters_kl[j,
                                  i], _ = lion_tsne.kl_divergence_and_gradient(
                                      p_matrix=new_P, y=new_Y)
        processed_indices.append(i)
        with open(kl_kernelized_tsne_letters_performance_file, 'wb') as f:
            pickle.dump((kernelized_letters_kl, processed_indices), f)
    # This should be fast
    kernelized_avg_letters_kl = np.mean(kernelized_letters_kl, axis=1)

    output_file = generate_kernelized_postprocess_filename(parameters)
    with open(output_file, "wb") as f:
        pickle.dump((kernelized_method_list, kernelized_avg_letters_kl,
                     kernelized_per_item_time,
                     kernelized_letters_distance_percentiles), f)