예제 #1
0
def test_parameters(mncs, learning_rates, batch_sizes, nums_hidden):
    """Iterate over all combinations of learning rate, batch size, and number of hidden units."""
    # Iteration counter
    i: int = 0
    # Compute total number of iterations
    iMax = len(learning_rates) * len(batch_sizes) * len(nums_hidden)
    skips = 0
    # Start the timer
    t0 = time.time()
    # Iterate over learning rates
    for learning_rate in learning_rates:
        # Iterate over batch sizes
        for batch_size in batch_sizes:
            # Iterate over number of hidden units
            for num_hidden in nums_hidden:
                # key for saving these parameter settings in the dictionary of models
                key = (learning_rate, batch_size, num_hidden)
                # Is this parameter setting already present in mncs? Then skip it.
                if key in mncs:
                    skips += 1
                    print(
                        f'Already fit learning_rate={learning_rate}, batch_size={batch_size}, num_hidden={num_hidden}.'
                    )
                    continue
                # Create a two layer model with the given number of hidden units
                model = TwoLayerNetwork(num_hidden)
                # Create an MNIST classifier with the given learning rate and batch size
                mnc_curr = MNIST_Classifier(model=model,
                                            learning_rate=learning_rate,
                                            weight_decay=weight_decay,
                                            batch_size=batch_size,
                                            validation_size=validation_size,
                                            epochs=epochs)
                # Train this model; turn off visualization of loss by epoch until the end of training
                print(
                    f'Training two layer network with learning_rate {learning_rate}, '
                    f'batch_size = {batch_size}, num_hidden={num_hidden}.')
                mnc_curr.fit(viz_val_loss=False)
                # Save this model to the dictionary mncs
                mncs[key] = mnc_curr
                # Save this to the vartbl
                vartbl['mncs'] = mncs
                save_vartbl(vartbl, fname)
                # Status update
                i += 1
                t1 = time.time()
                elapsed = (t1 - t0)
                projected = (iMax - skips - i) / i * elapsed
                print(
                    f'Elapsed time {int(elapsed)}, projected remaining {int(projected)} (seconds).'
                )
예제 #2
0
# Number of iterations for ADVI fit
num_iters: int = 50000

# Fit the model using ADVI
# Tried to fit using FullRankADVI as well; results were horrible
try:
    advi = vartbl['advi']
    print(f'Loaded ADVI fit for Gaussian Mixture Model.')
except:
    print(f'Running ADVI fit for Gaussian Mixture Model...')
    advi = pm.ADVI(model=model)
    advi.fit(n=num_iters,
             obj_optimizer=pm.adam(),
             callbacks=[CheckParametersConvergence()])
    vartbl['advi'] = advi
    save_vartbl(vartbl, fname)


def plot_elbo(elbo, plot_step, title):
    """Generate the ELBO plot"""
    fig, ax = plt.subplots(figsize=[12, 8])
    ax.set_title(title)
    ax.set_xlabel('Iteration')
    ax.set_ylabel('ELBO')
    n = len(elbo)
    plot_x = np.arange(0, n, plot_step)
    plot_y = elbo[::plot_step]
    ax.plot(plot_x, plot_y, color='b')
    ax.grid()
    return fig
# Grid of lambda1 and lambda2
grid_size: int = 200
lambda1_samp = np.linspace(-10, 10, grid_size)
lambda2_samp = np.linspace(-10, 10, grid_size)
lambda1_grid, lambda2_grid = np.meshgrid(lambda1_samp, lambda2_samp)

# Grid of the total loss function
try:
    loss_grid = vartbl['loss_grid']
except:
    loss_grid = np.zeros((grid_size, grid_size))
    for i, lambda1 in enumerate(lambda1_samp):
        for j, lambda2 in enumerate(lambda2_samp):
            loss_grid[j, i] = L(lambda1, lambda2)
    vartbl['loss_grid'] = loss_grid
    save_vartbl(vartbl, fname)

# Plot the loss function - large scale overview
fig, ax = plt.subplots()
fig.set_size_inches([16, 8])
ax.set_title('Loss Function on Entire Data Set - Overview')
ax.set_xlabel(r'$\lambda_1$')
ax.set_ylabel(r'$\lambda_2$')
cs = ax.contour(lambda1_grid, lambda2_grid, loss_grid, linewidths=8)
ax.plot(lambda1_min,
        lambda2_min,
        label='Min',
        marker='o',
        markersize=12,
        linewidth=0,
        color='r')