def test_parameters(mncs, learning_rates, batch_sizes, nums_hidden): """Iterate over all combinations of learning rate, batch size, and number of hidden units.""" # Iteration counter i: int = 0 # Compute total number of iterations iMax = len(learning_rates) * len(batch_sizes) * len(nums_hidden) skips = 0 # Start the timer t0 = time.time() # Iterate over learning rates for learning_rate in learning_rates: # Iterate over batch sizes for batch_size in batch_sizes: # Iterate over number of hidden units for num_hidden in nums_hidden: # key for saving these parameter settings in the dictionary of models key = (learning_rate, batch_size, num_hidden) # Is this parameter setting already present in mncs? Then skip it. if key in mncs: skips += 1 print( f'Already fit learning_rate={learning_rate}, batch_size={batch_size}, num_hidden={num_hidden}.' ) continue # Create a two layer model with the given number of hidden units model = TwoLayerNetwork(num_hidden) # Create an MNIST classifier with the given learning rate and batch size mnc_curr = MNIST_Classifier(model=model, learning_rate=learning_rate, weight_decay=weight_decay, batch_size=batch_size, validation_size=validation_size, epochs=epochs) # Train this model; turn off visualization of loss by epoch until the end of training print( f'Training two layer network with learning_rate {learning_rate}, ' f'batch_size = {batch_size}, num_hidden={num_hidden}.') mnc_curr.fit(viz_val_loss=False) # Save this model to the dictionary mncs mncs[key] = mnc_curr # Save this to the vartbl vartbl['mncs'] = mncs save_vartbl(vartbl, fname) # Status update i += 1 t1 = time.time() elapsed = (t1 - t0) projected = (iMax - skips - i) / i * elapsed print( f'Elapsed time {int(elapsed)}, projected remaining {int(projected)} (seconds).' )
# Number of iterations for ADVI fit num_iters: int = 50000 # Fit the model using ADVI # Tried to fit using FullRankADVI as well; results were horrible try: advi = vartbl['advi'] print(f'Loaded ADVI fit for Gaussian Mixture Model.') except: print(f'Running ADVI fit for Gaussian Mixture Model...') advi = pm.ADVI(model=model) advi.fit(n=num_iters, obj_optimizer=pm.adam(), callbacks=[CheckParametersConvergence()]) vartbl['advi'] = advi save_vartbl(vartbl, fname) def plot_elbo(elbo, plot_step, title): """Generate the ELBO plot""" fig, ax = plt.subplots(figsize=[12, 8]) ax.set_title(title) ax.set_xlabel('Iteration') ax.set_ylabel('ELBO') n = len(elbo) plot_x = np.arange(0, n, plot_step) plot_y = elbo[::plot_step] ax.plot(plot_x, plot_y, color='b') ax.grid() return fig
# Grid of lambda1 and lambda2 grid_size: int = 200 lambda1_samp = np.linspace(-10, 10, grid_size) lambda2_samp = np.linspace(-10, 10, grid_size) lambda1_grid, lambda2_grid = np.meshgrid(lambda1_samp, lambda2_samp) # Grid of the total loss function try: loss_grid = vartbl['loss_grid'] except: loss_grid = np.zeros((grid_size, grid_size)) for i, lambda1 in enumerate(lambda1_samp): for j, lambda2 in enumerate(lambda2_samp): loss_grid[j, i] = L(lambda1, lambda2) vartbl['loss_grid'] = loss_grid save_vartbl(vartbl, fname) # Plot the loss function - large scale overview fig, ax = plt.subplots() fig.set_size_inches([16, 8]) ax.set_title('Loss Function on Entire Data Set - Overview') ax.set_xlabel(r'$\lambda_1$') ax.set_ylabel(r'$\lambda_2$') cs = ax.contour(lambda1_grid, lambda2_grid, loss_grid, linewidths=8) ax.plot(lambda1_min, lambda2_min, label='Min', marker='o', markersize=12, linewidth=0, color='r')