def main(regenerate_model1=False, regenerate_model2=False, regenerate_model3=False, parameters=settings.parameters): letter_samples, _, _ = generate_data.load_letters(parameters=parameters) models_and_results = neural_network_commons.train_or_load_models( regenerate_model1=regenerate_model1, regenerate_model3=regenerate_model3, regenerate_model2=regenerate_model2, parameters=parameters) model1, model2, model3 = models_and_results["models"] Y_nn1_mnist, Y_nn2_mnist, Y_nn3_mnist = models_and_results["Y_predicted"] Y_outl1_mnist = model1.predict(letter_samples) Y_outl2_mnist = model2.predict(letter_samples) Y_outl3_mnist = model3.predict(letter_samples) nn_models_orig = [Y_nn1_mnist, Y_nn2_mnist, Y_nn3_mnist] nn_method_list = [ 'NN - 2L; 250N; ReLu; D0.25', 'NN - 2L; 500N; ReLu; D0.5', 'NN - 1L; 500N; tanh' ] nn_letters_results = [Y_outl1_mnist, Y_outl2_mnist, Y_outl3_mnist] output_file = generate_letter_results_filename(parameters) with open(output_file, 'wb') as f: pickle.dump((nn_letters_results, nn_models_orig, nn_method_list), f)
def main(parameters=settings.parameters, regenerate_parameters_cache=False): step = 0.01 choice_K = np.arange(step, 2 + step, step) # Let's try those K. letter_samples, _, _ = generate_data.load_letters(parameters=parameters) kernel_tsne_mapping = kernelized_tsne.generate_kernelized_tsne_mapping_function( parameters=parameters, regenerate_parameters_cache=regenerate_parameters_cache) kernelized_detailed_tsne_letters_results = list() kernelized_detailed_tsne_method_list = [ "Kernelized tSNE; K=%.2f" % (k) for k in choice_K ] kernelized_detailed_tsne_time = np.zeros( (len(kernelized_detailed_tsne_method_list), )) for j in range(len(choice_K)): k = choice_K[j] logging.info("%f", k) embedder_start_time = datetime.datetime.now() kernelized_detailed_tsne_letters_results.append( kernel_tsne_mapping(letter_samples, k=k)) embedder_end_time = datetime.datetime.now() kernelized_detailed_tsne_time[j] = ( embedder_end_time - embedder_start_time).total_seconds() logging.info("%f complete: %f s", k, kernelized_detailed_tsne_time[j]) output_file = generate_letter_results_filename(parameters=parameters) with open(output_file, 'wb') as f: pickle.dump((kernelized_detailed_tsne_letters_results, kernelized_detailed_tsne_time, kernelized_detailed_tsne_method_list), f)
def get_common_info(parameters): res = {} res['dTSNE_mnist'] = generate_data.load_dtsne_mnist(parameters=parameters) res['X_mnist'] = generate_data.load_x_mnist(parameters=parameters) res['Y_mnist'] = generate_data.load_y_mnist(parameters=parameters) letter_samples, _, _ = generate_data.load_letters(parameters=parameters) res['letter_samples'] = letter_samples D_Y = distance.squareform(distance.pdist(res['Y_mnist'])) # Now find distance to closest neighbor np.fill_diagonal(D_Y, np.inf) # ... but not to itself res['nearest_neighbors_y_dist'] = np.min(D_Y, axis=1) # Actually, whatever axis return res
def main(parameters=settings.parameters): dTSNE_mnist = generate_data.load_dtsne_mnist(parameters=parameters) Y_mnist = generate_data.load_y_mnist(parameters=parameters) letter_samples, _, _ = generate_data.load_letters(parameters=parameters) X_mnist = generate_data.load_x_mnist(parameters=parameters) D_Y = distance.squareform(distance.pdist(Y_mnist)) # Now find distance to closest neighbor np.fill_diagonal(D_Y, np.inf) # ... but not to itself nearest_neighbors_y_dist = np.min(D_Y, axis=1) # Actually, whatever axis # ============== KL Divergence gd_method_list = [ r'Closest $Y_{init}$', r'Random $Y_{init}$', r'Closest $Y_{init}$; new $\sigma$', r'Random $Y_{init}$; new $\sigma$', r'Closest $Y_{init}$; EE', r'Random $Y_{init}$; EE', r'Closest $Y_{init}$; new $\sigma$; EE', r'Random $Y_{init}$; new $\sigma$; EE' ] gd_results_file = exp_letter_test_GD.generate_letter_results_filename( parameters=parameters) with open(gd_results_file, 'rb') as f: (letters_y_gd_transformed, letters_y_gd_variance_recalc_transformed, letters_y_gd_transformed_random, letters_y_gd_variance_recalc_transformed_random, letters_y_gd_early_exagg_transformed_random, letters_y_gd_early_exagg_transformed, letters_y_gd_variance_recalc_early_exagg_transformed_random, picked_random_starting_positions, letters_y_gd_variance_recalc_early_exagg_transformed, covered_samples) = pickle.load(f) gd_letters_results = [ letters_y_gd_transformed, letters_y_gd_transformed_random, letters_y_gd_variance_recalc_transformed, letters_y_gd_variance_recalc_transformed_random, letters_y_gd_early_exagg_transformed, letters_y_gd_early_exagg_transformed_random, letters_y_gd_variance_recalc_early_exagg_transformed, letters_y_gd_variance_recalc_early_exagg_transformed_random, ] input_time_file = exp_letter_test_GD.generate_time_results_filename( parameters) with open(input_time_file, 'rb') as f: letters_y_time_gd_transformed, letters_y_time_gd_variance_recalc_transformed, \ letters_y_time_gd_transformed_random, \ letters_y_time_gd_variance_recalc_transformed_random, \ letters_y_time_gd_early_exagg_transformed_random, \ letters_y_time_gd_early_exagg_transformed, \ letters_y_time_gd_variance_recalc_early_exagg_transformed_random, \ letters_y_time_gd_variance_recalc_early_exagg_transformed, covered_samples = pickle.load(f) gd_time = [ np.mean(letters_y_time_gd_transformed), np.mean(letters_y_time_gd_transformed_random), np.mean(letters_y_time_gd_variance_recalc_transformed), np.mean(letters_y_time_gd_variance_recalc_transformed_random), np.mean(letters_y_time_gd_early_exagg_transformed), np.mean(letters_y_time_gd_early_exagg_transformed_random), np.mean(letters_y_time_gd_variance_recalc_early_exagg_transformed), np.mean( letters_y_time_gd_variance_recalc_early_exagg_transformed_random), ] gd_letters_kl = np.zeros((len(gd_method_list), len(letter_samples))) processed_indices = list() kl_gd_letters_performance_file = generate_gd_kl_temp_filename(parameters) if os.path.isfile(kl_gd_letters_performance_file): with open(kl_gd_letters_performance_file, 'rb') as f: gd_letters_kl, processed_indices = pickle.load(f) # KL divergence increase for all 1000 samples is very slow to calculate. Main part of that is calculating P-matrix. per_sample_KL = np.zeros((len(letter_samples), )) for i in range(len(letter_samples)): if i in processed_indices: logging.info("Sample %d already processed. Results loaded.", i) continue logging.info("Processing sample %d", i) distance_matrix_dir = distance_matrix_dir_prefix + generate_data.combine_prefixes( settings.tsne_parameter_set | settings.letter_parameter_set, parameters, os.sep) distance_matrix_file = distance_matrix_dir + 'item' + str(i) + '.p' # Make sure you can load them one-by-one. if os.path.isfile(distance_matrix_file): logging.info("\tP-matrix file found. Loading.") with open(distance_matrix_file, 'rb') as f: new_P, _ = pickle.load(f) else: logging.info("\tP-matrix file not found. Creating and saving.") new_X = np.concatenate((X_mnist, letter_samples[i, :].reshape( (1, -1))), axis=0) new_D = distance.squareform(distance.pdist(new_X)) new_P, new_sigmas = lion_tsne.get_p_and_sigma( distance_matrix=new_D, perplexity=dTSNE_mnist.perplexity) with open(distance_matrix_file, 'wb') as f: pickle.dump((new_P, new_sigmas), f) # For all of methods P-matrix is shared. for j in range(len(gd_letters_results)): # Single file with p matrix new_Y = np.concatenate( (Y_mnist, gd_letters_results[j][i, :].reshape((1, -1))), axis=0) gd_letters_kl[j, i], _ = lion_tsne.kl_divergence_and_gradient( p_matrix=new_P, y=new_Y) processed_indices.append(i) with open(kl_gd_letters_performance_file, 'wb') as f: pickle.dump((gd_letters_kl, processed_indices), f) # This should be fast gd_avg_letters_kl = np.mean(gd_letters_kl, axis=1) # ============== Distance percentiles gd_letters_percentiles_matrix = np.zeros( (len(letter_samples), len(gd_method_list))) gd_letters_distance_matrix = np.zeros( (len(letter_samples), len(gd_method_list))) for i in range(len(letter_samples)): for j in range(len(gd_method_list)): y = gd_letters_results[j][i, :] nn_dist = np.min(np.sqrt(np.sum((Y_mnist - y)**2, axis=1))) gd_letters_distance_matrix[i, j] = nn_dist gd_letters_percentiles_matrix[i, j] = stats.percentileofscore( nearest_neighbors_y_dist, nn_dist) gd_letters_distance_percentiles = np.mean(gd_letters_percentiles_matrix, axis=0) gd_letters_distances = np.mean(gd_letters_distance_matrix, axis=0) for j in range(len(gd_method_list)): logging.info("%s: %f, %f", gd_method_list[j], gd_letters_distances[j], gd_letters_distance_percentiles[j]) output_file = generate_gd_postprocess_filename(parameters) with open(output_file, "wb") as f: pickle.dump((gd_method_list, gd_time, gd_avg_letters_kl, gd_letters_distance_percentiles), f)
import matplotlib.pyplot as plt import generate_data import settings import logging logging.basicConfig(level=logging.INFO) _, letter_samples_raw, _ = generate_data.load_letters( parameters=settings.parameters) width = 10 #total number to show height = 1 start_index = 0 f, ax = plt.subplots(height, width, dpi=300) f.set_size_inches( 3.3, 0.33) # 3.3, 1 - 3 rows, 3.3, 0.66 - 2 rows, 3.3, 0.33 - 1 row f.subplots_adjust() #f.tight_layout() if height > 1: for i in range(height): for j in range(width): ax[i, j].imshow(letter_samples_raw[i * width + j, :].reshape(28, 28), cmap='gray_r') #Set_axis_off does not fit. I want a bounding box. ax[i, j].axes.get_xaxis().set_visible(False) ax[i, j].axes.get_yaxis().set_visible(False) else: for j in range(width): ax[j].imshow(letter_samples_raw[j, :].reshape(28, 28), cmap='gray_r')
def main(parameters=settings.parameters): dTSNE_mnist = generate_data.load_dtsne_mnist(parameters=parameters) Y_mnist = generate_data.load_y_mnist(parameters=parameters) X_mnist = generate_data.load_y_mnist(parameters=parameters) letter_samples, _, _ = generate_data.load_letters(parameters=parameters) nn_results_file = exp_letter_test_NN.generate_letter_results_filename( parameters) with open(nn_results_file, 'rb') as f: nn_letters_results, nn_models_orig, nn_method_list = pickle.load(f) D_Y = distance.squareform(distance.pdist(Y_mnist)) # Now find distance to closest neighbor np.fill_diagonal(D_Y, np.inf) # ... but not to itself nearest_neighbors_y_dist = np.min(D_Y, axis=1) # Actually, whatever axis # ================ KL DIVERGENCE =================== nn_letters_kl = np.zeros((len(nn_method_list), len(letter_samples))) processed_indices = list() kl_nn_letters_performance_file = generate_nn_kl_temp_filename(parameters) # KL divergence increase for all 1000 samples is very slow to calculate. Main part of that is calculating P-matrix. per_sample_KL = np.zeros((len(letter_samples), )) for i in range(len(letter_samples)): if i in processed_indices: logging.info("Sample %d already processed. Results loaded.", i) continue logging.info("Processing sample %d", i) distance_matrix_dir = distance_matrix_dir_prefix + generate_data.combine_prefixes( settings.tsne_parameter_set | settings.letter_parameter_set, parameters, os.sep) distance_matrix_file = distance_matrix_dir + 'item' + str(i) + '.p' # Make sure you can load them one-by-one. if os.path.isfile(distance_matrix_file): logging.info("\tP-matrix file found. Loading.") with open(distance_matrix_file, 'rb') as f: new_P, _ = pickle.load(f) else: logging.info("\tP-matrix file not found. Creating and saving.") new_X = np.concatenate((X_mnist, letter_samples[i, :].reshape( (1, -1))), axis=0) new_D = distance.squareform(distance.pdist(new_X)) new_P, new_sigmas = lion_tsne.get_p_and_sigma( distance_matrix=new_D, perplexity=dTSNE_mnist.perplexity) with open(distance_matrix_file, 'wb') as f: pickle.dump((new_P, new_sigmas), f) # For all of methods P-matrix is shared. for j in range(len(nn_letters_results)): # Single file with p matrix new_Y = np.concatenate( (nn_models_orig[j], nn_letters_results[j][i, :].reshape( (1, -1))), axis=0) nn_letters_kl[j, i], _ = lion_tsne.kl_divergence_and_gradient( p_matrix=new_P, y=new_Y) processed_indices.append(i) with open(kl_nn_letters_performance_file, 'wb') as f: pickle.dump((nn_letters_kl, processed_indices), f) # This should be fast nn_avg_letters_kl = np.mean(nn_letters_kl, axis=1) # ================ DISTANCE MATRICES =================== nn_letters_percentiles_matrix = np.zeros( (len(letter_samples), len(nn_method_list))) nn_letters_distance_matrix = np.zeros( (len(letter_samples), len(nn_method_list))) for i in range(len(letter_samples)): for j in range(len(nn_method_list)): y = nn_letters_results[j][i, :] nn_dist = np.min( np.sqrt(np.sum((nn_models_orig[j] - y)**2, axis=1))) nn_letters_distance_matrix[i, j] = nn_dist nn_letters_percentiles_matrix[i, j] = stats.percentileofscore( nearest_neighbors_y_dist, nn_dist) nn_letters_distance_percentiles = np.mean(nn_letters_percentiles_matrix, axis=0) nn_letters_distances = np.mean(nn_letters_distance_matrix, axis=0) for j in range(len(nn_method_list)): print(nn_method_list[j], nn_letters_distances[j], nn_letters_distance_percentiles[j]) output_file = generate_nn_postprocess_filename(parameters) with open(output_file, "wb") as f: pickle.dump((nn_method_list, nn_avg_letters_kl, nn_letters_distance_percentiles), f)
import generate_data import settings import pandas as pd import numpy as np import matplotlib.pyplot as plt import settings parameters = settings.parameters X_mnist_raw = generate_data.load_x_mnist_raw(parameters=parameters) letters, letters_raw, letters_labels = generate_data.load_letters(parameters=parameters) print(letters_raw.shape, np.max(letters_raw[0,:]), np.min(letters_raw[0,:])) print(letters_labels[:100]) width = 20 #total number to show in one row start_index = 0 height = 20 # Number of rows /2 to show. half will go to labels, half to pictures. f, ax = plt.subplots(height,width) f.set_size_inches(16,16) f.subplots_adjust() for i in range(int(height/2)): for j in range(width): ax[2*i][j].imshow(letters_raw[start_index + width*i + j,:].reshape(28,28), cmap='gray_r') ax[2*i+1][j].text(text=str(letters_labels[start_index + width*i + j]) #str(chr( #(ord('A') if letters_labels[start_index + width*i + j]<26 else ord('a'))+
def main(parameters=settings.parameters, regenerate=False, only_time=False): dTSNE_mnist = generate_data.load_dtsne_mnist(parameters=parameters) Y_mnist = generate_data.load_y_mnist(parameters=parameters) letter_samples, _, _ = generate_data.load_letters(parameters=parameters) # Doing it from scratch takes REALLY long time. If possible, save results & pre-load output_file = generate_letter_results_filename(parameters) output_time_file = generate_time_results_filename(parameters) first_sample_inc = 0 # Change only if it is one of "Other notebooks just for parallelization" last_sample_exclusive = len(letter_samples) if os.path.isfile(output_file): logging.info( "Found previous partially completed test. Starting from there.") with open(output_file, 'rb') as f: (letters_y_gd_transformed, letters_y_gd_variance_recalc_transformed, letters_y_gd_transformed_random, letters_y_gd_variance_recalc_transformed_random, letters_y_gd_early_exagg_transformed_random, letters_y_gd_early_exagg_transformed, letters_y_gd_variance_recalc_early_exagg_transformed_random, letters_random_starting_positions, letters_y_gd_variance_recalc_early_exagg_transformed, covered_samples) = pickle.load(f) with open(output_time_file, 'rb') as f: (letters_y_time_gd_transformed, letters_y_time_gd_variance_recalc_transformed, letters_y_time_gd_transformed_random, letters_y_time_gd_variance_recalc_transformed_random, letters_y_time_gd_early_exagg_transformed_random, letters_y_time_gd_early_exagg_transformed, letters_y_time_gd_variance_recalc_early_exagg_transformed_random, letters_y_time_gd_variance_recalc_early_exagg_transformed, covered_samples) = pickle.load(f) else: logging.info( "No previous partially completed test, or regeneration requested. Starting from scratch." ) covered_samples = list() letters_y_gd_transformed = np.zeros( (len(letter_samples), Y_mnist.shape[1])) letters_y_gd_variance_recalc_transformed = np.zeros( (len(letter_samples), Y_mnist.shape[1])) letters_y_gd_transformed_random = np.zeros( (len(letter_samples), Y_mnist.shape[1])) letters_y_gd_variance_recalc_transformed_random = np.zeros( (len(letter_samples), Y_mnist.shape[1])) letters_y_gd_early_exagg_transformed_random = np.zeros( (len(letter_samples), Y_mnist.shape[1])) letters_y_gd_early_exagg_transformed = np.zeros( (len(letter_samples), Y_mnist.shape[1])) letters_y_gd_variance_recalc_early_exagg_transformed_random = np.zeros( (len(letter_samples), Y_mnist.shape[1])) letters_y_gd_variance_recalc_early_exagg_transformed = np.zeros( (len(letter_samples), Y_mnist.shape[1])) letters_random_starting_positions = np.zeros( (len(letter_samples), Y_mnist.shape[1])) letters_y_time_gd_transformed = np.zeros((len(letter_samples), )) letters_y_time_gd_variance_recalc_transformed = np.zeros( (len(letter_samples), )) letters_y_time_gd_transformed_random = np.zeros( (len(letter_samples), )) letters_y_time_gd_variance_recalc_transformed_random = np.zeros( (len(letter_samples), )) letters_y_time_gd_early_exagg_transformed_random = np.zeros( (len(letter_samples), )) letters_y_time_gd_early_exagg_transformed = np.zeros( (len(letter_samples), )) letters_y_time_gd_variance_recalc_early_exagg_transformed_random = np.zeros( (len(letter_samples), )) letters_y_time_gd_variance_recalc_early_exagg_transformed = np.zeros( (len(letter_samples), )) for i in range(first_sample_inc, last_sample_exclusive): np.random.seed( i ) # We reset random seed every time. Otherwise, if you load partial results from file, everything # will depend on which parts were loaded, random sequence will "shift" depend on that, and reproducibility will be lost. # I.e. if put seed(0) before the loop and start from scratch, then you will have some random sequence [abc] for sample 0, # other (continuation of that sequence) [def] for sample 1, etc. But if you already loaded sample 0 from file, you will # have [abc] for sample 1, [def] for sample 2, etc. Reproducibility should not depend on what parts are loaded. # Hence, random seed every time, and it depends on ABSOLUTE sample number. logging.info(" ====================== Sample %d \n\n", i) if i in covered_samples: logging.info("Already loaded.") else: letter = letter_samples[i].reshape((1, -1)) embedder_start_time = datetime.datetime.now() letters_y_gd_transformed[i, :] = dTSNE_mnist.transform( letter, y='closest', verbose=2, optimizer_kwargs={'early_exaggeration': None}) embedder_end_time = datetime.datetime.now() letters_y_time_gd_transformed[i] = ( embedder_end_time - embedder_start_time).total_seconds() logging.info("Time: %f s", letters_y_time_gd_transformed[i]) embedder_start_time = datetime.datetime.now() letters_y_gd_variance_recalc_transformed[ i, :] = dTSNE_mnist.transform( letter, keep_sigmas=False, y='closest', verbose=2, optimizer_kwargs={'early_exaggeration': None}) embedder_end_time = datetime.datetime.now() letters_y_time_gd_variance_recalc_transformed[i] = \ (embedder_end_time - embedder_start_time).total_seconds() logging.info("Time (VR): %f s", letters_y_time_gd_variance_recalc_transformed[i]) # Let's pick random starts at any point. not necessary near the center. y_start = np.array([[ np.random.uniform(np.min(Y_mnist[:, 0]), np.max(Y_mnist[:, 0])), np.random.uniform(np.min(Y_mnist[:, 1]), np.max(Y_mnist[:, 1])) ]]) letters_random_starting_positions[i, :] = y_start embedder_start_time = datetime.datetime.now() letters_y_gd_transformed_random[i, :] = dTSNE_mnist.transform( letter, y=y_start, # y='random', verbose=2, optimizer_kwargs={'early_exaggeration': None}) embedder_end_time = datetime.datetime.now() letters_y_time_gd_transformed_random[i] = \ (embedder_end_time - embedder_start_time).total_seconds() logging.info("Time (random): %f s", letters_y_time_gd_transformed_random[i]) embedder_start_time = datetime.datetime.now() letters_y_gd_variance_recalc_transformed_random[ i, :] = dTSNE_mnist.transform( letter, keep_sigmas=False, y=y_start, # y='random', verbose=2, optimizer_kwargs={'early_exaggeration': None}) embedder_end_time = datetime.datetime.now() letters_y_time_gd_variance_recalc_transformed_random[i] = \ (embedder_end_time - embedder_start_time).total_seconds() logging.info( "Time (VR, random): %f s", letters_y_time_gd_variance_recalc_transformed_random[i]) embedder_start_time = datetime.datetime.now() letters_y_gd_early_exagg_transformed_random[ i, :] = dTSNE_mnist.transform( letter, y=y_start, # y='random', verbose=2) embedder_end_time = datetime.datetime.now() letters_y_time_gd_early_exagg_transformed_random[i] = \ (embedder_end_time - embedder_start_time).total_seconds() logging.info("Time (EE, random): %f s", letters_y_time_gd_early_exagg_transformed_random[i]) embedder_start_time = datetime.datetime.now() letters_y_gd_early_exagg_transformed[i, :] = dTSNE_mnist.transform( letter, y='closest', verbose=2) embedder_end_time = datetime.datetime.now() letters_y_time_gd_early_exagg_transformed[i] = \ (embedder_end_time - embedder_start_time).total_seconds() logging.info("Time (EE): %f s", letters_y_time_gd_early_exagg_transformed[i]) embedder_start_time = datetime.datetime.now() letters_y_gd_variance_recalc_early_exagg_transformed_random[ i, :] = dTSNE_mnist.transform(letter, y=y_start, keep_sigmas=False, verbose=2) embedder_end_time = datetime.datetime.now() letters_y_time_gd_variance_recalc_early_exagg_transformed_random[i] = \ (embedder_end_time - embedder_start_time).total_seconds() logging.info( "Time (VR,EE,random): %f s", letters_y_time_gd_variance_recalc_early_exagg_transformed_random[ i]) embedder_start_time = datetime.datetime.now() letters_y_gd_variance_recalc_early_exagg_transformed[ i, :] = dTSNE_mnist.transform(letter, keep_sigmas=False, y='closest', verbose=2) embedder_end_time = datetime.datetime.now() letters_y_time_gd_variance_recalc_early_exagg_transformed[i] = \ (embedder_end_time - embedder_start_time).total_seconds() logging.info( "Time (VR,EE): %f s", letters_y_time_gd_variance_recalc_early_exagg_transformed[i]) covered_samples.append(i) logging.info("Saving...") # Gradient descent results take a long while. Let's cache. if not only_time: with open(output_file, 'wb') as f: pickle.dump(( letters_y_gd_transformed, letters_y_gd_variance_recalc_transformed, letters_y_gd_transformed_random, letters_y_gd_variance_recalc_transformed_random, letters_y_gd_early_exagg_transformed_random, letters_y_gd_early_exagg_transformed, letters_y_gd_variance_recalc_early_exagg_transformed_random, letters_random_starting_positions, letters_y_gd_variance_recalc_early_exagg_transformed, covered_samples), f) with open(output_time_file, 'wb') as f: pickle.dump(( letters_y_time_gd_transformed, letters_y_time_gd_variance_recalc_transformed, letters_y_time_gd_transformed_random, letters_y_time_gd_variance_recalc_transformed_random, letters_y_time_gd_early_exagg_transformed_random, letters_y_time_gd_early_exagg_transformed, letters_y_time_gd_variance_recalc_early_exagg_transformed_random, letters_y_time_gd_variance_recalc_early_exagg_transformed, covered_samples), f)