Esempio n. 1
0
def main(parameters=settings.parameters, regenerate_parameters_cache=False):
    step = 0.01
    choice_K = np.arange(step, 2 + step, step)  # Let's try those K.

    outlier_samples, _ = generate_data.load_outliers(parameters=parameters)
    kernel_tsne_mapping = kernelized_tsne.generate_kernelized_tsne_mapping_function(
        parameters=parameters,
        regenerate_parameters_cache=regenerate_parameters_cache)
    kernelized_detailed_tsne_method_list = [
        "Kernelized tSNE; K=%.2f" % (k) for k in choice_K
    ]
    kernelized_detailed_tsne_outliers_results = list()
    kernelized_detailed_tsne_time = np.zeros(
        (len(kernelized_detailed_tsne_method_list), ))

    for j in range(len(choice_K)):
        k = choice_K[j]
        logging.info("%f", k)

        embedder_start_time = datetime.datetime.now()
        kernelized_detailed_tsne_outliers_results.append(
            kernel_tsne_mapping(outlier_samples, k=k))
        embedder_end_time = datetime.datetime.now()
        kernelized_detailed_tsne_time[j] = (
            embedder_end_time - embedder_start_time).total_seconds()
        logging.info("%f complete: %f s", k, kernelized_detailed_tsne_time[j])

    output_file = generate_outlier_results_filename(parameters=parameters)
    with open(output_file, 'wb') as f:
        pickle.dump((kernelized_detailed_tsne_outliers_results,
                     kernelized_detailed_tsne_time,
                     kernelized_detailed_tsne_method_list), f)
def get_common_info(parameters):
    res = {}
    res['dTSNE_mnist'] = generate_data.load_dtsne_mnist(parameters=parameters)
    res['X_mnist'] = generate_data.load_x_mnist(parameters=parameters)
    res['Y_mnist'] = generate_data.load_y_mnist(parameters=parameters)
    outlier_samples, _ = generate_data.load_outliers(parameters=parameters)
    res['outlier_samples'] = outlier_samples
    D_Y = distance.squareform(distance.pdist(res['Y_mnist']))
    # Now find distance to closest neighbor
    np.fill_diagonal(D_Y, np.inf)  # ... but not to itself
    res['nearest_neighbors_y_dist'] = np.min(D_Y,
                                             axis=1)  # Actually, whatever axis
    return res
def main(regenerate_model1=False, regenerate_model2=False, regenerate_model3=False,
         parameters=settings.parameters):
    outlier_samples, _ = generate_data.load_outliers(parameters=parameters)

    models_and_results = neural_network_commons.train_or_load_models(regenerate_model1=regenerate_model1,
        regenerate_model3=regenerate_model3,regenerate_model2=regenerate_model2,parameters=parameters)

    model1, model2, model3 = models_and_results["models"]
    Y_nn1_mnist, Y_nn2_mnist, Y_nn3_mnist = models_and_results["Y_predicted"]

    Y_outl1_mnist = model1.predict(outlier_samples)
    Y_outl2_mnist = model2.predict(outlier_samples)
    Y_outl3_mnist = model3.predict(outlier_samples)

    nn_models_orig = [Y_nn1_mnist, Y_nn2_mnist, Y_nn3_mnist]
    nn_method_list = ['NN - 2L; 250N; ReLu; D0.25','NN - 2L; 500N; ReLu; D0.5', 'NN - 1L; 500N; tanh']

    nn_outliers_results = [Y_outl1_mnist, Y_outl2_mnist, Y_outl3_mnist]
    output_file = generate_outlier_results_filename(parameters)

    with open(output_file, 'wb') as f:
        pickle.dump((nn_outliers_results, nn_models_orig, nn_method_list), f)
def main(parameters=settings.parameters):
    dTSNE_mnist = generate_data.load_dtsne_mnist(parameters=parameters)
    Y_mnist = generate_data.load_y_mnist(parameters=parameters)
    X_mnist = generate_data.load_y_mnist(parameters=parameters)

    outlier_samples, _ = generate_data.load_outliers(parameters=parameters)

    nn_results_file = exp_outlier_test_NN.generate_outlier_results_filename(
        parameters)
    with open(nn_results_file, 'rb') as f:
        nn_outliers_results, nn_models_orig, nn_method_list = pickle.load(f)

    D_Y = distance.squareform(distance.pdist(Y_mnist))
    # Now find distance to closest neighbor
    np.fill_diagonal(D_Y, np.inf)  # ... but not to itself
    nearest_neighbors_y_dist = np.min(D_Y, axis=1)  # Actually, whatever axis

    # ================ KL DIVERGENCE ===================
    nn_outliers_kl = np.zeros((len(nn_method_list), len(outlier_samples)))

    processed_indices = list()

    kl_nn_outliers_performance_file = generate_nn_kl_temp_filename(parameters)

    # KL divergence increase for all 1000 samples is very slow to calculate. Main part of that is calculating P-matrix.
    per_sample_KL = np.zeros((len(outlier_samples), ))
    for i in range(len(outlier_samples)):
        if i in processed_indices:
            logging.info("Sample %d already processed. Results loaded.", i)
            continue
        logging.info("Processing sample %d", i)
        distance_matrix_dir = distance_matrix_dir_prefix + generate_data.combine_prefixes(
            settings.tsne_parameter_set | settings.outlier_parameter_set,
            parameters, os.sep)
        distance_matrix_file = distance_matrix_dir + 'item' + str(i) + '.p'
        # Make sure you can load them one-by-one.
        if os.path.isfile(distance_matrix_file):
            logging.info("\tP-matrix file found. Loading.")
            with open(distance_matrix_file, 'rb') as f:
                new_P, _ = pickle.load(f)
        else:
            logging.info("\tP-matrix file not found. Creating and saving.")
            new_X = np.concatenate((X_mnist, outlier_samples[i, :].reshape(
                (1, -1))),
                                   axis=0)
            new_D = distance.squareform(distance.pdist(new_X))
            new_P, new_sigmas = lion_tsne.get_p_and_sigma(
                distance_matrix=new_D, perplexity=dTSNE_mnist.perplexity)
            with open(distance_matrix_file, 'wb') as f:
                pickle.dump((new_P, new_sigmas), f)
        # For all of methods P-matrix is shared.
        for j in range(len(nn_outliers_results)):
            # Single file with p matrix
            new_Y = np.concatenate(
                (nn_models_orig[j], nn_outliers_results[j][i, :].reshape(
                    (1, -1))),
                axis=0)
            nn_outliers_kl[j, i], _ = lion_tsne.kl_divergence_and_gradient(
                p_matrix=new_P, y=new_Y)
        processed_indices.append(i)
        with open(kl_nn_outliers_performance_file, 'wb') as f:
            pickle.dump((nn_outliers_kl, processed_indices), f)
    # This should be fast
    nn_avg_outliers_kl = np.mean(nn_outliers_kl, axis=1)

    # ================ DISTANCE MATRICES ===================
    nn_outliers_percentiles_matrix = np.zeros(
        (len(outlier_samples), len(nn_method_list)))
    nn_outliers_distance_matrix = np.zeros(
        (len(outlier_samples), len(nn_method_list)))
    for i in range(len(outlier_samples)):
        for j in range(len(nn_method_list)):
            y = nn_outliers_results[j][i, :]
            nn_dist = np.min(
                np.sqrt(np.sum((nn_models_orig[j] - y)**2, axis=1)))
            nn_outliers_distance_matrix[i, j] = nn_dist
            nn_outliers_percentiles_matrix[i, j] = stats.percentileofscore(
                nearest_neighbors_y_dist, nn_dist)
    nn_outliers_distance_percentiles = np.mean(nn_outliers_percentiles_matrix,
                                               axis=0)
    nn_outliers_distances = np.mean(nn_outliers_distance_matrix, axis=0)
    for j in range(len(nn_method_list)):
        print(nn_method_list[j], nn_outliers_distances[j],
              nn_outliers_distance_percentiles[j])

    output_file = generate_nn_postprocess_filename(parameters)
    with open(output_file, "wb") as f:
        pickle.dump((nn_method_list, nn_avg_outliers_kl,
                     nn_outliers_distance_percentiles), f)
Esempio n. 5
0
import matplotlib.pyplot as plt
import generate_data
import settings
import logging

logging.basicConfig(level=logging.INFO)

_, outlier_samples_raw = generate_data.load_outliers(
    parameters=settings.parameters)

width = 10  #total number to show
height = 1
start_index = 0

f, ax = plt.subplots(height, width, dpi=300)
f.set_size_inches(
    3.3, 0.33)  # 3.3, 1 - 3 rows, 3.3, 0.66 - 2 rows, 3.3, 0.33 - 1 row
f.subplots_adjust()
#f.tight_layout()
if height > 1:
    for i in range(height):
        for j in range(width):
            ax[i,
               j].imshow(outlier_samples_raw[i * width + j, :].reshape(28, 28),
                         cmap='gray_r')
            #Set_axis_off does not fit. I want a bounding box.
            ax[i, j].axes.get_xaxis().set_visible(False)
            ax[i, j].axes.get_yaxis().set_visible(False)
else:
    for j in range(width):
        ax[j].imshow(outlier_samples_raw[j, :].reshape(28, 28), cmap='gray_r')
def main(parameters=settings.parameters):
    dTSNE_mnist = generate_data.load_dtsne_mnist(parameters=parameters)
    Y_mnist = generate_data.load_y_mnist(parameters=parameters)
    outlier_samples, _ = generate_data.load_outliers(parameters=parameters)
    X_mnist = generate_data.load_x_mnist(parameters=parameters)

    D_Y = distance.squareform(distance.pdist(Y_mnist))
    # Now find distance to closest neighbor
    np.fill_diagonal(D_Y, np.inf)  # ... but not to itself
    nearest_neighbors_y_dist = np.min(D_Y, axis=1)  # Actually, whatever axis

    # ============== KL Divergence
    gd_method_list = [
        r'Closest $Y_{init}$', r'Random $Y_{init}$',
        r'Closest $Y_{init}$; new $\sigma$',
        r'Random $Y_{init}$; new $\sigma$', r'Closest $Y_{init}$; EE',
        r'Random $Y_{init}$; EE', r'Closest $Y_{init}$; new $\sigma$; EE',
        r'Random $Y_{init}$; new $\sigma$; EE'
    ]

    gd_results_file = exp_outlier_test_GD.generate_outlier_results_filename(
        parameters=parameters)
    with open(gd_results_file, 'rb') as f:
        (outliers_y_gd_transformed, outliers_y_gd_variance_recalc_transformed,
         outliers_y_gd_transformed_random,
         outliers_y_gd_variance_recalc_transformed_random,
         outliers_y_gd_early_exagg_transformed_random,
         outliers_y_gd_early_exagg_transformed,
         outliers_y_gd_variance_recalc_early_exagg_transformed_random,
         picked_random_starting_positions,
         outliers_y_gd_variance_recalc_early_exagg_transformed,
         covered_samples) = pickle.load(f)

    gd_outliers_results = [
        outliers_y_gd_transformed,
        outliers_y_gd_transformed_random,
        outliers_y_gd_variance_recalc_transformed,
        outliers_y_gd_variance_recalc_transformed_random,
        outliers_y_gd_early_exagg_transformed,
        outliers_y_gd_early_exagg_transformed_random,
        outliers_y_gd_variance_recalc_early_exagg_transformed,
        outliers_y_gd_variance_recalc_early_exagg_transformed_random,
    ]

    input_time_file = exp_outlier_test_GD.generate_time_results_filename(
        parameters)
    with open(input_time_file, 'rb') as f:
        outliers_y_time_gd_transformed, outliers_y_time_gd_variance_recalc_transformed, \
        outliers_y_time_gd_transformed_random, \
        outliers_y_time_gd_variance_recalc_transformed_random, \
        outliers_y_time_gd_early_exagg_transformed_random, \
        outliers_y_time_gd_early_exagg_transformed, \
        outliers_y_time_gd_variance_recalc_early_exagg_transformed_random, \
        outliers_y_time_gd_variance_recalc_early_exagg_transformed, covered_samples = pickle.load(f)

    gd_time = [
        np.mean(outliers_y_time_gd_transformed),
        np.mean(outliers_y_time_gd_transformed_random),
        np.mean(outliers_y_time_gd_variance_recalc_transformed),
        np.mean(outliers_y_time_gd_variance_recalc_transformed_random),
        np.mean(outliers_y_time_gd_early_exagg_transformed),
        np.mean(outliers_y_time_gd_early_exagg_transformed_random),
        np.mean(outliers_y_time_gd_variance_recalc_early_exagg_transformed),
        np.mean(
            outliers_y_time_gd_variance_recalc_early_exagg_transformed_random),
    ]

    gd_outliers_kl = np.zeros((len(gd_method_list), len(outlier_samples)))

    processed_indices = list()

    kl_gd_outliers_performance_file = generate_gd_kl_temp_filename(parameters)
    if os.path.isfile(kl_gd_outliers_performance_file):
        with open(kl_gd_outliers_performance_file, 'rb') as f:
            gd_outliers_kl, processed_indices = pickle.load(f)

    # KL divergence increase for all 1000 samples is very slow to calculate. Main part of that is calculating P-matrix.
    per_sample_KL = np.zeros((len(outlier_samples), ))
    for i in range(len(outlier_samples)):
        if i in processed_indices:
            logging.info("Sample %d already processed. Results loaded.", i)
            continue
        logging.info("Processing sample %d", i)
        distance_matrix_dir = distance_matrix_dir_prefix + generate_data.combine_prefixes(
            settings.tsne_parameter_set | settings.outlier_parameter_set,
            parameters, os.sep)
        distance_matrix_file = distance_matrix_dir + 'item' + str(i) + '.p'
        # Make sure you can load them one-by-one.
        if os.path.isfile(distance_matrix_file):
            logging.info("\tP-matrix file found. Loading.")
            with open(distance_matrix_file, 'rb') as f:
                new_P, _ = pickle.load(f)
        else:
            logging.info("\tP-matrix file not found. Creating and saving.")
            new_X = np.concatenate((X_mnist, outlier_samples[i, :].reshape(
                (1, -1))),
                                   axis=0)
            new_D = distance.squareform(distance.pdist(new_X))
            new_P, new_sigmas = lion_tsne.get_p_and_sigma(
                distance_matrix=new_D, perplexity=dTSNE_mnist.perplexity)
            with open(distance_matrix_file, 'wb') as f:
                pickle.dump((new_P, new_sigmas), f)
        # For all of methods P-matrix is shared.
        for j in range(len(gd_outliers_results)):
            # Single file with p matrix
            new_Y = np.concatenate(
                (Y_mnist, gd_outliers_results[j][i, :].reshape((1, -1))),
                axis=0)
            gd_outliers_kl[j, i], _ = lion_tsne.kl_divergence_and_gradient(
                p_matrix=new_P, y=new_Y)
        processed_indices.append(i)
        with open(kl_gd_outliers_performance_file, 'wb') as f:
            pickle.dump((gd_outliers_kl, processed_indices), f)
    # This should be fast
    gd_avg_outliers_kl = np.mean(gd_outliers_kl, axis=1)

    # ============== Distance percentiles
    gd_outliers_percentiles_matrix = np.zeros(
        (len(outlier_samples), len(gd_method_list)))
    gd_outliers_distance_matrix = np.zeros(
        (len(outlier_samples), len(gd_method_list)))
    for i in range(len(outlier_samples)):
        for j in range(len(gd_method_list)):
            y = gd_outliers_results[j][i, :]
            nn_dist = np.min(np.sqrt(np.sum((Y_mnist - y)**2, axis=1)))
            gd_outliers_distance_matrix[i, j] = nn_dist
            gd_outliers_percentiles_matrix[i, j] = stats.percentileofscore(
                nearest_neighbors_y_dist, nn_dist)
    gd_outliers_distance_percentiles = np.mean(gd_outliers_percentiles_matrix,
                                               axis=0)
    gd_outliers_distances = np.mean(gd_outliers_distance_matrix, axis=0)
    for j in range(len(gd_method_list)):
        logging.info("%s: %f, %f", gd_method_list[j], gd_outliers_distances[j],
                     gd_outliers_distance_percentiles[j])

    output_file = generate_gd_postprocess_filename(parameters)
    with open(output_file, "wb") as f:
        pickle.dump((gd_method_list, gd_time, gd_avg_outliers_kl,
                     gd_outliers_distance_percentiles), f)
def main(parameters=settings.parameters, regenerate=False, only_time=False):
    dTSNE_mnist = generate_data.load_dtsne_mnist(parameters=parameters)
    Y_mnist= generate_data.load_y_mnist(parameters=parameters)
    outlier_samples, _ = generate_data.load_outliers(parameters=parameters)

    output_file = generate_outlier_results_filename(parameters)
    output_time_file = generate_time_results_filename(parameters)

    first_sample_inc = 0  # Change only if it is one of "Other notebooks just for parallelization"
    last_sample_exclusive = len(outlier_samples)

    # Doing it from scratch takes REALLY long time. If possible, save results & pre-load
    if os.path.isfile(output_file) and not regenerate:
        logging.info("Found previous partially completed test. Starting from there.")
        with open(output_file, 'rb') as f:
            (outliers_y_gd_transformed, outliers_y_gd_variance_recalc_transformed,
             outliers_y_gd_transformed_random,
             outliers_y_gd_variance_recalc_transformed_random,
             outliers_y_gd_early_exagg_transformed_random,
             outliers_y_gd_early_exagg_transformed,
             outliers_y_gd_variance_recalc_early_exagg_transformed_random,
             outliers_random_starting_positions,
             outliers_y_gd_variance_recalc_early_exagg_transformed, covered_samples) = pickle.load(f)
        with open(output_time_file, 'rb') as f:
            (outliers_y_time_gd_transformed, outliers_y_time_gd_variance_recalc_transformed,
             outliers_y_time_gd_transformed_random,
             outliers_y_time_gd_variance_recalc_transformed_random,
             outliers_y_time_gd_early_exagg_transformed_random,
             outliers_y_time_gd_early_exagg_transformed,
             outliers_y_time_gd_variance_recalc_early_exagg_transformed_random,
             outliers_y_time_gd_variance_recalc_early_exagg_transformed, covered_samples) = pickle.load(f)
    else:
        logging.info("No previous partially completed test, or regeneration requested. Starting from scratch.")
        covered_samples = list()

        outliers_y_gd_transformed = np.zeros((len(outlier_samples), Y_mnist.shape[1]))
        outliers_y_gd_variance_recalc_transformed = np.zeros((len(outlier_samples), Y_mnist.shape[1]))
        outliers_y_gd_transformed_random = np.zeros((len(outlier_samples), Y_mnist.shape[1]))
        outliers_y_gd_variance_recalc_transformed_random = np.zeros((len(outlier_samples), Y_mnist.shape[1]))

        outliers_y_gd_early_exagg_transformed_random = np.zeros((len(outlier_samples), Y_mnist.shape[1]))
        outliers_y_gd_early_exagg_transformed = np.zeros((len(outlier_samples), Y_mnist.shape[1]))
        outliers_y_gd_variance_recalc_early_exagg_transformed_random = np.zeros((len(outlier_samples), Y_mnist.shape[1]))
        outliers_y_gd_variance_recalc_early_exagg_transformed = np.zeros((len(outlier_samples), Y_mnist.shape[1]))

        outliers_random_starting_positions = np.zeros((len(outlier_samples), Y_mnist.shape[1]))

        outliers_y_time_gd_transformed = np.zeros((len(outlier_samples), ))
        outliers_y_time_gd_variance_recalc_transformed = np.zeros((len(outlier_samples), ))
        outliers_y_time_gd_transformed_random = np.zeros((len(outlier_samples), ))
        outliers_y_time_gd_variance_recalc_transformed_random = np.zeros((len(outlier_samples), ))

        outliers_y_time_gd_early_exagg_transformed_random = np.zeros((len(outlier_samples), ))
        outliers_y_time_gd_early_exagg_transformed = np.zeros((len(outlier_samples), ))
        outliers_y_time_gd_variance_recalc_early_exagg_transformed_random = np.zeros((len(outlier_samples), ))
        outliers_y_time_gd_variance_recalc_early_exagg_transformed = np.zeros((len(outlier_samples), ))

    for i in range(first_sample_inc, last_sample_exclusive):
        np.random.seed(
            i)  # We reset random seed every time. Otherwise, if you load partial results from file, everything
        # will depend on which parts were loaded, random sequence will "shift" depend on that, and reproducibility will be lost.
        # I.e. if put seed(0) before the loop and start from scratch, then you will have some random sequence [abc] for sample 0,
        # other (continuation of that sequence) [def] for sample 1, etc. But if you already loaded sample 0 from file, you will
        # have [abc] for sample 1, [def] for sample 2, etc. Reproducibility should not depend on what parts are loaded.
        # Hence, random seed every time, and it depends on ABSOLUTE sample number.
        logging.info(" ====================== Sample %d \n\n", i)
        if i in covered_samples:
            logging.info("Already loaded.")
        else:
            outlier = outlier_samples[i].reshape((1, -1))

            embedder_start_time = datetime.datetime.now()
            outliers_y_gd_transformed[i, :] = dTSNE_mnist.transform(outlier, y='closest',
                                                                    verbose=2,
                                                                    optimizer_kwargs={'early_exaggeration': None})
            embedder_end_time = datetime.datetime.now()
            outliers_y_time_gd_transformed[i] = (embedder_end_time - embedder_start_time).total_seconds()
            logging.info("Time: %f s", outliers_y_time_gd_transformed[i])

            embedder_start_time = datetime.datetime.now()
            outliers_y_gd_variance_recalc_transformed[i, :] = dTSNE_mnist.transform(outlier, keep_sigmas=False,
                                                                                    y='closest',
                                                                                    verbose=2, optimizer_kwargs={
                    'early_exaggeration': None})
            embedder_end_time = datetime.datetime.now()
            outliers_y_time_gd_variance_recalc_transformed[i] = \
                (embedder_end_time - embedder_start_time).total_seconds()
            logging.info("Time (VR): %f s", outliers_y_time_gd_variance_recalc_transformed[i])

            # Let's pick random starts at any point. not necessary near the center.
            y_start = np.array([[
                np.random.uniform(np.min(Y_mnist[:, 0]), np.max(Y_mnist[:, 0])),
                np.random.uniform(np.min(Y_mnist[:, 1]), np.max(Y_mnist[:, 1]))
            ]])

            outliers_random_starting_positions[i, :] = y_start

            embedder_start_time = datetime.datetime.now()
            outliers_y_gd_transformed_random[i, :] = dTSNE_mnist.transform(outlier, y=y_start,  # y='random',
                                                                           verbose=2, optimizer_kwargs={
                    'early_exaggeration': None})
            embedder_end_time = datetime.datetime.now()
            outliers_y_time_gd_transformed_random[i] = \
                (embedder_end_time - embedder_start_time).total_seconds()
            logging.info("Time (random): %f s", outliers_y_time_gd_transformed_random[i])


            embedder_start_time = datetime.datetime.now()
            outliers_y_gd_variance_recalc_transformed_random[i, :] = dTSNE_mnist.transform(outlier,
                                                                                           keep_sigmas=False, y=y_start,
                                                                                           # y='random',
                                                                                           verbose=2, optimizer_kwargs={
                    'early_exaggeration': None})
            embedder_end_time = datetime.datetime.now()
            outliers_y_time_gd_variance_recalc_transformed_random[i] = \
                (embedder_end_time - embedder_start_time).total_seconds()
            logging.info("Time (VR, random): %f s", outliers_y_time_gd_variance_recalc_transformed_random[i])

            embedder_start_time = datetime.datetime.now()
            outliers_y_gd_early_exagg_transformed_random[i, :] = dTSNE_mnist.transform(outlier, y=y_start,
                                                                                       # y='random',
                                                                                       verbose=2)
            embedder_end_time = datetime.datetime.now()
            outliers_y_time_gd_early_exagg_transformed_random[i] = \
                (embedder_end_time - embedder_start_time).total_seconds()
            logging.info("Time (EE, random): %f s", outliers_y_time_gd_early_exagg_transformed_random[i])

            embedder_start_time = datetime.datetime.now()
            outliers_y_gd_early_exagg_transformed[i, :] = dTSNE_mnist.transform(outlier, y='closest', verbose=2)
            embedder_end_time = datetime.datetime.now()
            outliers_y_time_gd_early_exagg_transformed[i] = \
                (embedder_end_time - embedder_start_time).total_seconds()
            logging.info("Time (EE): %f s", outliers_y_time_gd_early_exagg_transformed[i])


            embedder_start_time = datetime.datetime.now()
            outliers_y_gd_variance_recalc_early_exagg_transformed_random[i, :] = dTSNE_mnist.transform(outlier,
                                                                                                       y=y_start,
                                                                                                       keep_sigmas=False,
                                                                                                       verbose=2)
            embedder_end_time = datetime.datetime.now()
            outliers_y_time_gd_variance_recalc_early_exagg_transformed_random[i] = \
                (embedder_end_time - embedder_start_time).total_seconds()
            logging.info("Time (VR,EE,random): %f s",
                         outliers_y_time_gd_variance_recalc_early_exagg_transformed_random[i])


            embedder_start_time = datetime.datetime.now()
            outliers_y_gd_variance_recalc_early_exagg_transformed[i, :] = dTSNE_mnist.transform(outlier,
                                                                                                keep_sigmas=False,
                                                                                                y='closest', verbose=2)
            embedder_end_time = datetime.datetime.now()
            outliers_y_time_gd_variance_recalc_early_exagg_transformed[i] = \
                (embedder_end_time - embedder_start_time).total_seconds()
            logging.info("Time (VR,EE): %f s",
                         outliers_y_time_gd_variance_recalc_early_exagg_transformed[i])


        covered_samples.append(i)
        logging.info("Saving...")
        # Gradient descent results take a long while. Let's cache.
        if not only_time:
            with open(output_file, 'wb') as f:
                pickle.dump((outliers_y_gd_transformed, outliers_y_gd_variance_recalc_transformed,
                             outliers_y_gd_transformed_random,
                             outliers_y_gd_variance_recalc_transformed_random,
                             outliers_y_gd_early_exagg_transformed_random,
                             outliers_y_gd_early_exagg_transformed,
                             outliers_y_gd_variance_recalc_early_exagg_transformed_random,
                             outliers_random_starting_positions,
                             outliers_y_gd_variance_recalc_early_exagg_transformed, covered_samples), f)
        with open(output_time_file, 'wb') as f:
            pickle.dump((outliers_y_time_gd_transformed, outliers_y_time_gd_variance_recalc_transformed,
                         outliers_y_time_gd_transformed_random,
                         outliers_y_time_gd_variance_recalc_transformed_random,
                         outliers_y_time_gd_early_exagg_transformed_random,
                         outliers_y_time_gd_early_exagg_transformed,
                         outliers_y_time_gd_variance_recalc_early_exagg_transformed_random,
                         outliers_y_time_gd_variance_recalc_early_exagg_transformed, covered_samples), f)