def test_gradient_density_approximation(self):
     """Tests the function `gradient_density_approximation`.
     
     A histogram is saved at
     "tools/pseudo_visualization/gradient_density_approximation.png".
     The test is successful if the histogram
     absolute values are smaller than 1.e-9.
     
     """
     nb_points_per_interval = 10
     nb_intervals_per_side = 4
     
     nb_points = 2*nb_intervals_per_side*nb_points_per_interval + 1
     grid = numpy.linspace(-nb_intervals_per_side,
                           nb_intervals_per_side,
                           num=nb_points)
     parameters = scipy.stats.distributions.norm.pdf(grid,
                                                     loc=0.,
                                                     scale=1.)
     
     # There is an intentional mismatch between
     # the probability density function for generating
     # the samples and the probability density function
     # for creating the parameters of the piecewise
     # linear function.
     samples = numpy.random.normal(loc=0.,
                                   scale=0.6,
                                   size=200)
     gradients = tls.gradient_density_approximation(samples,
                                                    parameters,
                                                    nb_points_per_interval,
                                                    nb_intervals_per_side)
     offset = 1.e-4
     approx = numpy.zeros(nb_points)
     for i in range(nb_points):
         parameters_pos = parameters.copy()
         parameters_pos[i] += offset
         loss_pos = tls.loss_density_approximation(samples,
                                                   parameters_pos,
                                                   nb_points_per_interval,
                                                   nb_intervals_per_side)
         parameters_neg = parameters.copy()
         parameters_neg[i] -= offset
         loss_neg = tls.loss_density_approximation(samples,
                                                   parameters_neg,
                                                   nb_points_per_interval,
                                                   nb_intervals_per_side)
         approx[i] = 0.5*(loss_pos - loss_neg)/offset
     
     tls.histogram(gradients - approx,
                   'Gradient checking for the opposite mean probability',
                   'tools/pseudo_visualization/gradient_density_approximation.png')
 def test_noise(self):
     """Tests the function `noise`.
     
     A histogram is saved at
     "tools/pseudo_visualization/noise.png".
     The test is successful if the histogram
     looks like that of the uniform distribution
     of support [-0.5, 0.5].
     
     """
     samples = tls.noise(100, 200)
     tls.histogram(samples.flatten(),
                   'Noise from the uniform distribution of support [-0.5, 0.5]',
                   'tools/pseudo_visualization/noise.png')
Ejemplo n.º 3
0
 def test_histogram(self):
     """Tests the function `histogram`.
     
     A histogram is saved at
     "tools/pseudo_visualization/histogram.png".
     The test is successful if the selected
     number of bins (60) gives a good histogram
     of 2000 data points.
     
     """
     data = numpy.random.normal(loc=0., scale=1., size=2000)
     tls.histogram(data,
                   'Standard normal distribution',
                   'tools/pseudo_visualization/histogram.png')
 def checking_area_under_piecewise_linear_functions(self, sess, title,
                                                    path):
     """Creates the histogram of the areas under the piecewise linear functions and saves the histogram.
     
     Parameters
     ----------
     sess : Session
         Session that runs the graph.
     title : str
         Title of the histogram.
     path : str
         Path to the saved histogram. The
         path ends with ".png".
     
     """
     area = sess.run(self.node_area)
     tls.histogram(area, title, path)
 def test_encode_mini_batches(self):
     """Tests the function `encode_mini_batches` in the file "eae/batching.py".
     
     For i = 0 ... 3, an histogram is saved at
     "eae/pseudo_visualization/encode_mini_batches/latent_variables_i.png".
     The test is successful if, in the first histogram,
     all the values are around 0. In the third histogram,
     most of the values must be around 0.
     
     """
     batch_size = 2
     h_in = 64
     w_in = 48
     path_to_nb_itvs_per_side_load = ''
     path_to_restore = ''
     
     # 2 batches of luminance images will be created
     # by the function `encode_mini_batches`.
     luminances_uint8 = numpy.random.randint(0,
                                             high=256,
                                             size=(2*batch_size, h_in, w_in, 1),
                                             dtype=numpy.uint8)
     luminances_uint8[0, :, :, :] = 0
     luminances_uint8[1, :, :, :] = 255
     luminances_uint8[2, :, :, :] = 0
     
     # Only a portion of the luminance image of index
     # 2 is white.
     luminances_uint8[2, 0:2, 0:2, :] = 255
     entropy_ae = EntropyAutoencoder(batch_size,
                                     h_in,
                                     w_in,
                                     1.,
                                     12000.,
                                     path_to_nb_itvs_per_side_load,
                                     False)
     with tf.Session() as sess:
         entropy_ae.initialization(sess, path_to_restore)
         y_float32 = eae.batching.encode_mini_batches(luminances_uint8,
                                                      sess,
                                                      entropy_ae,
                                                      batch_size)
     for i in range(luminances_uint8.shape[0]):
         tls.histogram(y_float32[i, :, :, :].flatten(),
                       'Latent variables distribution for the image of index {}'.format(i),
                       'eae/pseudo_visualization/encode_mini_batches/latent_variables_{}.png'.format(i))
 def checking_p_1(self, str_scope, str_variable, title, path):
     """Creates the histogram of a variable and saves the histogram.
     
     Parameters
     ----------
     str_scope : str
         Scope of the variable.
     str_variable : str
         Name of the variable.
     title : str
         Title of the histogram.
     path : str
         Path to the saved histogram. The
         path ends with ".png".
     
     """
     with tf.variable_scope(str_scope, reuse=True):
         variable = tf.get_variable(str_variable, dtype=tf.float32).eval()
     tls.histogram(variable.flatten(), title, path)
 def test_decode_mini_batches(self):
     """Tests the function `decode_mini_batches` in the file "eae/batching.py".
     
     For i = 0 ... 3, an histogram is saved at
     "eae/pseudo_visualization/decode_mini_batches/reconstructed_pixels_i.png".
     The test is successful if, in the first histogram,
     all the values are the same.
     
     """
     batch_size = 2
     h_in = 64
     w_in = 48
     path_to_restore = ''
     
     # 2 batches of quantized latent variables will be created
     # by the function `decode_mini_batches`.
     quantized_y_float32 = numpy.random.randint(
         -6,
         high=6,
         size=(2*batch_size, h_in//csts.STRIDE_PROD, w_in//csts.STRIDE_PROD, csts.NB_MAPS_3)
     ).astype(numpy.float32)
     quantized_y_float32[0, :, :, :] = 0.
     isolated_decoder = IsolatedDecoder(batch_size,
                                        h_in,
                                        w_in,
                                        False)
     with tf.Session() as sess:
         isolated_decoder.initialization(sess, path_to_restore)
         reconstruction_uint8 = eae.batching.decode_mini_batches(quantized_y_float32,
                                                                 sess,
                                                                 isolated_decoder,
                                                                 batch_size)
     for i in range(reconstruction_uint8.shape[0]):
         tls.histogram(reconstruction_uint8[i, :, :, :].flatten(),
                       'Pixel distribution for the reconstructed image of index {}'.format(i),
                       'eae/pseudo_visualization/decode_mini_batches/reconstructed_pixels_{}.png'.format(i))
 def test_gradient_entropy(self):
     """Tests the function `gradient_entropy`.
     
     A histogram is saved at
     "tools/pseudo_visualization/gradient_entropy.png".
     The test is successful if the histogram
     absolute values are smaller than 1.e-9.
     
     """
     nb_points_per_interval = 10
     nb_intervals_per_side = 10
     height_samples = 24
     width_samples = 32
     
     nb_points = 2*nb_intervals_per_side*nb_points_per_interval + 1
     grid = numpy.linspace(-nb_intervals_per_side,
                           nb_intervals_per_side,
                           num=nb_points)
     parameters = scipy.stats.distributions.norm.pdf(grid,
                                                     loc=0.,
                                                     scale=1.)
     samples = numpy.random.normal(loc=0.,
                                   scale=1.,
                                   size=(height_samples, width_samples))
     gradients = tls.gradient_entropy(samples,
                                      parameters,
                                      nb_points_per_interval,
                                      nb_intervals_per_side)
     
     # `idx_initial` stores the linear piece
     # index of each sample before the gradient
     # checking.
     idx_initial = tls.index_linear_piece(samples.flatten(),
                                          nb_points_per_interval,
                                          nb_intervals_per_side)
     offset = 1.e-4
     approx = numpy.zeros((height_samples, width_samples))
     
     # `is_non_diff_fct` becomes true if the
     # non-differentiability of the piecewise
     # linear function at the edges of pieces
     # wrecks the gradient checking.
     is_non_diff_fct = False
     for i in range(height_samples):
         for j in range(width_samples):
             samples_pos = samples.copy()
             samples_pos[i, j] += offset
             
             # `idx_pos` stores the linear piece
             # index of each sample after adding
             # an offset.
             idx_pos = tls.index_linear_piece(samples_pos.flatten(),
                                              nb_points_per_interval,
                                              nb_intervals_per_side)
             diff_entropy_pos = tls.differential_entropy(samples_pos.flatten(),
                                                         parameters,
                                                         nb_points_per_interval,
                                                         nb_intervals_per_side)
             samples_neg = samples.copy()
             samples_neg[i, j] -= offset
             
             # `idx_neg` stores the linear piece
             # index of each sample after subtracting
             # an offset.
             idx_neg = tls.index_linear_piece(samples_neg.flatten(),
                                              nb_points_per_interval,
                                              nb_intervals_per_side)
             diff_entropy_neg = tls.differential_entropy(samples_neg.flatten(),
                                                         parameters,
                                                         nb_points_per_interval,
                                                         nb_intervals_per_side)
             approx[i, j] = 0.5*(diff_entropy_pos - diff_entropy_neg)/offset
             is_idx_pos_changed = not numpy.array_equal(idx_initial, idx_pos)
             is_idx_neg_changed = not numpy.array_equal(idx_initial, idx_neg)
             if is_idx_pos_changed or is_idx_neg_changed:
                 is_non_diff_fct = True
     diff = (gradients/height_samples) - approx
     
     if is_non_diff_fct:
         warnings.warn('The non-differentiability of the piecewise linear function wrecks the gradient checking. Re-run it.')
     else:
         tls.histogram(diff.flatten(),
                       'Gradient checking for the differential entropy',
                       'tools/pseudo_visualization/gradient_entropy.png')
                          args.nb_test,
                          paths_to_outputs)
    
    # `training_uint8.dtype` is equal to `numpy.uint8`.
    training_uint8 = numpy.load(paths_to_outputs[0])
    mean_training = numpy.load(paths_to_outputs[3])
    std_training = numpy.load(paths_to_outputs[4])
    sample_uint8 = training_uint8[0:nb_display, :]
    
    # The function `svhn.svhn.preprocess_svhn` checks
    # that `sample_uint8.dtype` is equal to `numpy.uint8`
    # and `sample_uint8.ndim` is equal to 2.
    sample_float64 = svhn.svhn.preprocess_svhn(sample_uint8,
                                               mean_training,
                                               std_training)
    tls.visualize_rows(sample_uint8,
                       32,
                       32,
                       10,
                       'svhn/visualization/sample_training.png')
    mu = numpy.mean(sample_float64, axis=0)
    sigma = numpy.sqrt(numpy.mean((sample_float64 - numpy.tile(mu, (nb_display, 1)))**2, axis=0))
    tls.histogram(mu,
                  'Mean of each pixel over {} training images after preprocessing'.format(nb_display),
                  'svhn/visualization/mean_after_preprocessing.png')
    tls.histogram(sigma,
                  'Std of each pixel over {} training images after preprocessing'.format(nb_display),
                  'svhn/visualization/std_after_preprocessing.png')


def fit_maps(y_float32, path_to_histogram_locations, path_to_histogram_scales, paths, idx_map_exception=None):
    """Fits a Laplace density to the normed histogram of each latent variable feature map.
    
    Parameters
    ----------
    y_float32 : numpy.ndarray
        4D array with data-type `numpy.float32`.
        Latent variables. `y_float32[i, :, :, j]`
        is the jth latent variable feature map of
        the ith example.
    path_to_histogram_locations : str
        Path to the histogram of the Laplace locations. The
        path ends with ".png".
    path_to_histogram_scales : str
        Path to the histogram of the Laplace scales. The
        path ends with ".png".
    paths : list
        `paths[i]` is the path to the fitted normed histogram
        for the ith latent variable feature map. Each path ends
        with ".png".
    idx_map_exception : int, optional
        Index of the latent variable feature map that is
        not compressed as the other maps. The default value
        is None.
    
    Raises
    ------
    ValueError
        If `len(paths)` is not equal to `y_float32.shape[3]`.
    
    """
    if len(paths) != y_float32.shape[3]:
        raise ValueError('`len(paths)` is not equal to `y_float32.shape[3]`.')
    locations = []
    scales = []
    for i in range(y_float32.shape[3]):
        map_float32 = y_float32[:, :, :, i]
        edge_left = numpy.floor(numpy.amin(map_float32)).item()
        edge_right = numpy.ceil(numpy.amax(map_float32)).item()
        
        # The grid below contains 50 points
        # per unit interval.
        grid = numpy.linspace(edge_left,
                              edge_right,
                              num=50*int(edge_right - edge_left) + 1)
        
        # Let's assume that `map_float32` contains i.i.d samples
        # from an unknown probability density function. The two
        # equations below result from the minimization of the
        # Kullback-Lieber divergence of the unknown probability
        # density function from our statistical model (Laplace
        # density of location `laplace_location` and scale
        # `laplace_scale`). Note that this minimization is
        # equivalent to the maximum likelihood estimator.
        # To dive into the details, see:
        # "Estimating distributions and densities". 36-402,
        # advanced data analysis, CMU, 27 January 2011.
        laplace_location = numpy.mean(map_float32).item()
        laplace_scale = numpy.mean(numpy.absolute(map_float32 - laplace_location)).item()
        laplace_pdf = scipy.stats.laplace.pdf(grid,
                                              loc=laplace_location,
                                              scale=laplace_scale)
        handle = [plt.plot(grid, laplace_pdf, color='red')[0]]
        hist, bin_edges = numpy.histogram(map_float32,
                                          bins=60,
                                          density=True)
        plt.bar(bin_edges[0:60],
                hist,
                width=bin_edges[1] - bin_edges[0],
                align='edge',
                color='blue')
        plt.title('Latent variable feature map {}'.format(i + 1))
        plt.legend(handle,
                   [r'$f( . ; {0}, {1})$'.format(str(round(laplace_location, 2)), str(round(laplace_scale, 2)))],
                   prop={'size': 30},
                   loc=9)
        plt.savefig(paths[i])
        plt.clf()
        if idx_map_exception is None:
            locations.append(laplace_location)
            scales.append(laplace_scale)
        else:
            if i != idx_map_exception:
                locations.append(laplace_location)
                scales.append(laplace_scale)
    
    # `len(locations)` and `len(scales)` are equal.
    tls.histogram(numpy.array(locations),
                  'Histogram of {} locations'.format(len(locations)),
                  path_to_histogram_locations)
    tls.histogram(numpy.array(scales),
                  'Histogram of {} scales'.format(len(scales)),
                  path_to_histogram_scales)