def test_gradient_density_approximation(self): """Tests the function `gradient_density_approximation`. A histogram is saved at "tools/pseudo_visualization/gradient_density_approximation.png". The test is successful if the histogram absolute values are smaller than 1.e-9. """ nb_points_per_interval = 10 nb_intervals_per_side = 4 nb_points = 2*nb_intervals_per_side*nb_points_per_interval + 1 grid = numpy.linspace(-nb_intervals_per_side, nb_intervals_per_side, num=nb_points) parameters = scipy.stats.distributions.norm.pdf(grid, loc=0., scale=1.) # There is an intentional mismatch between # the probability density function for generating # the samples and the probability density function # for creating the parameters of the piecewise # linear function. samples = numpy.random.normal(loc=0., scale=0.6, size=200) gradients = tls.gradient_density_approximation(samples, parameters, nb_points_per_interval, nb_intervals_per_side) offset = 1.e-4 approx = numpy.zeros(nb_points) for i in range(nb_points): parameters_pos = parameters.copy() parameters_pos[i] += offset loss_pos = tls.loss_density_approximation(samples, parameters_pos, nb_points_per_interval, nb_intervals_per_side) parameters_neg = parameters.copy() parameters_neg[i] -= offset loss_neg = tls.loss_density_approximation(samples, parameters_neg, nb_points_per_interval, nb_intervals_per_side) approx[i] = 0.5*(loss_pos - loss_neg)/offset tls.histogram(gradients - approx, 'Gradient checking for the opposite mean probability', 'tools/pseudo_visualization/gradient_density_approximation.png')
def test_noise(self): """Tests the function `noise`. A histogram is saved at "tools/pseudo_visualization/noise.png". The test is successful if the histogram looks like that of the uniform distribution of support [-0.5, 0.5]. """ samples = tls.noise(100, 200) tls.histogram(samples.flatten(), 'Noise from the uniform distribution of support [-0.5, 0.5]', 'tools/pseudo_visualization/noise.png')
def test_histogram(self): """Tests the function `histogram`. A histogram is saved at "tools/pseudo_visualization/histogram.png". The test is successful if the selected number of bins (60) gives a good histogram of 2000 data points. """ data = numpy.random.normal(loc=0., scale=1., size=2000) tls.histogram(data, 'Standard normal distribution', 'tools/pseudo_visualization/histogram.png')
def checking_area_under_piecewise_linear_functions(self, sess, title, path): """Creates the histogram of the areas under the piecewise linear functions and saves the histogram. Parameters ---------- sess : Session Session that runs the graph. title : str Title of the histogram. path : str Path to the saved histogram. The path ends with ".png". """ area = sess.run(self.node_area) tls.histogram(area, title, path)
def test_encode_mini_batches(self): """Tests the function `encode_mini_batches` in the file "eae/batching.py". For i = 0 ... 3, an histogram is saved at "eae/pseudo_visualization/encode_mini_batches/latent_variables_i.png". The test is successful if, in the first histogram, all the values are around 0. In the third histogram, most of the values must be around 0. """ batch_size = 2 h_in = 64 w_in = 48 path_to_nb_itvs_per_side_load = '' path_to_restore = '' # 2 batches of luminance images will be created # by the function `encode_mini_batches`. luminances_uint8 = numpy.random.randint(0, high=256, size=(2*batch_size, h_in, w_in, 1), dtype=numpy.uint8) luminances_uint8[0, :, :, :] = 0 luminances_uint8[1, :, :, :] = 255 luminances_uint8[2, :, :, :] = 0 # Only a portion of the luminance image of index # 2 is white. luminances_uint8[2, 0:2, 0:2, :] = 255 entropy_ae = EntropyAutoencoder(batch_size, h_in, w_in, 1., 12000., path_to_nb_itvs_per_side_load, False) with tf.Session() as sess: entropy_ae.initialization(sess, path_to_restore) y_float32 = eae.batching.encode_mini_batches(luminances_uint8, sess, entropy_ae, batch_size) for i in range(luminances_uint8.shape[0]): tls.histogram(y_float32[i, :, :, :].flatten(), 'Latent variables distribution for the image of index {}'.format(i), 'eae/pseudo_visualization/encode_mini_batches/latent_variables_{}.png'.format(i))
def checking_p_1(self, str_scope, str_variable, title, path): """Creates the histogram of a variable and saves the histogram. Parameters ---------- str_scope : str Scope of the variable. str_variable : str Name of the variable. title : str Title of the histogram. path : str Path to the saved histogram. The path ends with ".png". """ with tf.variable_scope(str_scope, reuse=True): variable = tf.get_variable(str_variable, dtype=tf.float32).eval() tls.histogram(variable.flatten(), title, path)
def test_decode_mini_batches(self): """Tests the function `decode_mini_batches` in the file "eae/batching.py". For i = 0 ... 3, an histogram is saved at "eae/pseudo_visualization/decode_mini_batches/reconstructed_pixels_i.png". The test is successful if, in the first histogram, all the values are the same. """ batch_size = 2 h_in = 64 w_in = 48 path_to_restore = '' # 2 batches of quantized latent variables will be created # by the function `decode_mini_batches`. quantized_y_float32 = numpy.random.randint( -6, high=6, size=(2*batch_size, h_in//csts.STRIDE_PROD, w_in//csts.STRIDE_PROD, csts.NB_MAPS_3) ).astype(numpy.float32) quantized_y_float32[0, :, :, :] = 0. isolated_decoder = IsolatedDecoder(batch_size, h_in, w_in, False) with tf.Session() as sess: isolated_decoder.initialization(sess, path_to_restore) reconstruction_uint8 = eae.batching.decode_mini_batches(quantized_y_float32, sess, isolated_decoder, batch_size) for i in range(reconstruction_uint8.shape[0]): tls.histogram(reconstruction_uint8[i, :, :, :].flatten(), 'Pixel distribution for the reconstructed image of index {}'.format(i), 'eae/pseudo_visualization/decode_mini_batches/reconstructed_pixels_{}.png'.format(i))
def test_gradient_entropy(self): """Tests the function `gradient_entropy`. A histogram is saved at "tools/pseudo_visualization/gradient_entropy.png". The test is successful if the histogram absolute values are smaller than 1.e-9. """ nb_points_per_interval = 10 nb_intervals_per_side = 10 height_samples = 24 width_samples = 32 nb_points = 2*nb_intervals_per_side*nb_points_per_interval + 1 grid = numpy.linspace(-nb_intervals_per_side, nb_intervals_per_side, num=nb_points) parameters = scipy.stats.distributions.norm.pdf(grid, loc=0., scale=1.) samples = numpy.random.normal(loc=0., scale=1., size=(height_samples, width_samples)) gradients = tls.gradient_entropy(samples, parameters, nb_points_per_interval, nb_intervals_per_side) # `idx_initial` stores the linear piece # index of each sample before the gradient # checking. idx_initial = tls.index_linear_piece(samples.flatten(), nb_points_per_interval, nb_intervals_per_side) offset = 1.e-4 approx = numpy.zeros((height_samples, width_samples)) # `is_non_diff_fct` becomes true if the # non-differentiability of the piecewise # linear function at the edges of pieces # wrecks the gradient checking. is_non_diff_fct = False for i in range(height_samples): for j in range(width_samples): samples_pos = samples.copy() samples_pos[i, j] += offset # `idx_pos` stores the linear piece # index of each sample after adding # an offset. idx_pos = tls.index_linear_piece(samples_pos.flatten(), nb_points_per_interval, nb_intervals_per_side) diff_entropy_pos = tls.differential_entropy(samples_pos.flatten(), parameters, nb_points_per_interval, nb_intervals_per_side) samples_neg = samples.copy() samples_neg[i, j] -= offset # `idx_neg` stores the linear piece # index of each sample after subtracting # an offset. idx_neg = tls.index_linear_piece(samples_neg.flatten(), nb_points_per_interval, nb_intervals_per_side) diff_entropy_neg = tls.differential_entropy(samples_neg.flatten(), parameters, nb_points_per_interval, nb_intervals_per_side) approx[i, j] = 0.5*(diff_entropy_pos - diff_entropy_neg)/offset is_idx_pos_changed = not numpy.array_equal(idx_initial, idx_pos) is_idx_neg_changed = not numpy.array_equal(idx_initial, idx_neg) if is_idx_pos_changed or is_idx_neg_changed: is_non_diff_fct = True diff = (gradients/height_samples) - approx if is_non_diff_fct: warnings.warn('The non-differentiability of the piecewise linear function wrecks the gradient checking. Re-run it.') else: tls.histogram(diff.flatten(), 'Gradient checking for the differential entropy', 'tools/pseudo_visualization/gradient_entropy.png')
args.nb_test, paths_to_outputs) # `training_uint8.dtype` is equal to `numpy.uint8`. training_uint8 = numpy.load(paths_to_outputs[0]) mean_training = numpy.load(paths_to_outputs[3]) std_training = numpy.load(paths_to_outputs[4]) sample_uint8 = training_uint8[0:nb_display, :] # The function `svhn.svhn.preprocess_svhn` checks # that `sample_uint8.dtype` is equal to `numpy.uint8` # and `sample_uint8.ndim` is equal to 2. sample_float64 = svhn.svhn.preprocess_svhn(sample_uint8, mean_training, std_training) tls.visualize_rows(sample_uint8, 32, 32, 10, 'svhn/visualization/sample_training.png') mu = numpy.mean(sample_float64, axis=0) sigma = numpy.sqrt(numpy.mean((sample_float64 - numpy.tile(mu, (nb_display, 1)))**2, axis=0)) tls.histogram(mu, 'Mean of each pixel over {} training images after preprocessing'.format(nb_display), 'svhn/visualization/mean_after_preprocessing.png') tls.histogram(sigma, 'Std of each pixel over {} training images after preprocessing'.format(nb_display), 'svhn/visualization/std_after_preprocessing.png')
def fit_maps(y_float32, path_to_histogram_locations, path_to_histogram_scales, paths, idx_map_exception=None): """Fits a Laplace density to the normed histogram of each latent variable feature map. Parameters ---------- y_float32 : numpy.ndarray 4D array with data-type `numpy.float32`. Latent variables. `y_float32[i, :, :, j]` is the jth latent variable feature map of the ith example. path_to_histogram_locations : str Path to the histogram of the Laplace locations. The path ends with ".png". path_to_histogram_scales : str Path to the histogram of the Laplace scales. The path ends with ".png". paths : list `paths[i]` is the path to the fitted normed histogram for the ith latent variable feature map. Each path ends with ".png". idx_map_exception : int, optional Index of the latent variable feature map that is not compressed as the other maps. The default value is None. Raises ------ ValueError If `len(paths)` is not equal to `y_float32.shape[3]`. """ if len(paths) != y_float32.shape[3]: raise ValueError('`len(paths)` is not equal to `y_float32.shape[3]`.') locations = [] scales = [] for i in range(y_float32.shape[3]): map_float32 = y_float32[:, :, :, i] edge_left = numpy.floor(numpy.amin(map_float32)).item() edge_right = numpy.ceil(numpy.amax(map_float32)).item() # The grid below contains 50 points # per unit interval. grid = numpy.linspace(edge_left, edge_right, num=50*int(edge_right - edge_left) + 1) # Let's assume that `map_float32` contains i.i.d samples # from an unknown probability density function. The two # equations below result from the minimization of the # Kullback-Lieber divergence of the unknown probability # density function from our statistical model (Laplace # density of location `laplace_location` and scale # `laplace_scale`). Note that this minimization is # equivalent to the maximum likelihood estimator. # To dive into the details, see: # "Estimating distributions and densities". 36-402, # advanced data analysis, CMU, 27 January 2011. laplace_location = numpy.mean(map_float32).item() laplace_scale = numpy.mean(numpy.absolute(map_float32 - laplace_location)).item() laplace_pdf = scipy.stats.laplace.pdf(grid, loc=laplace_location, scale=laplace_scale) handle = [plt.plot(grid, laplace_pdf, color='red')[0]] hist, bin_edges = numpy.histogram(map_float32, bins=60, density=True) plt.bar(bin_edges[0:60], hist, width=bin_edges[1] - bin_edges[0], align='edge', color='blue') plt.title('Latent variable feature map {}'.format(i + 1)) plt.legend(handle, [r'$f( . ; {0}, {1})$'.format(str(round(laplace_location, 2)), str(round(laplace_scale, 2)))], prop={'size': 30}, loc=9) plt.savefig(paths[i]) plt.clf() if idx_map_exception is None: locations.append(laplace_location) scales.append(laplace_scale) else: if i != idx_map_exception: locations.append(laplace_location) scales.append(laplace_scale) # `len(locations)` and `len(scales)` are equal. tls.histogram(numpy.array(locations), 'Histogram of {} locations'.format(len(locations)), path_to_histogram_locations) tls.histogram(numpy.array(scales), 'Histogram of {} scales'.format(len(scales)), path_to_histogram_scales)