# %% markdown # # Density Ensemble # %% n_networks = 10 # %% initial_learning_rate = 0.01 lr_schedule = tf.keras.optimizers.schedules.ExponentialDecay( initial_learning_rate, decay_steps=n_train, decay_rate=0.9, staircase=True) ensemble = MapDensityEnsemble( n_networks=n_networks, input_shape=[1], layer_units=layer_units, layer_activations=layer_activations, learning_rate=lr_schedule, seed=seed, ) ensemble.fit(x_train=x_train, y_train=y_train, batch_size=batch_size, epochs=epochs, verbose=0) # %% prediction = ensemble.predict(x_plot) # Mixture Of Gaussian prediction fig, ax = plt.subplots(figsize=figsize) plot_moment_matched_predictive_normal_distribution( x_plot=_x_plot,
# save_path=figure_dir.joinpath(f"llb_moment_matched_{experiment_name}.pdf") ) # %% markdown # # Using pretrained networks # %% initial_learning_rate = 0.01 lr_schedule = tf.keras.optimizers.schedules.ExponentialDecay( initial_learning_rate, decay_steps=n_train, decay_rate=0.9, staircase=True) ensemble = MapDensityEnsemble( input_shape=input_shape, layer_units=layer_units, layer_activations=layer_activations, initial_unconstrained_scale=initial_unconstrained_scale, transform_unconstrained_scale_factor=transform_unconstrained_scale_factor, preprocess_x=preprocess_x, preprocess_y=preprocess_y, learning_rate=lr_schedule, names=[None, "feature_extractor", "output"], seed=0, ) ensemble.fit(x_train=x_train, y_train=y_train, batch_size=batch_size, epochs=epochs, verbose=0) prediction = ensemble.predict(x_plot) plot_moment_matched_predictive_normal_distribution( x_plot=x_plot, predictive_distribution=prediction, x_train=x_train,
[-prediction.log_prob(y_train) / n_train for prediction in prior_predictions] current_state = overdispersed_prior_samples # %% markdown # ## Do a few gradient descent training epochs to help HMC find a region of higher posterior density # %% ensemble = MapDensityEnsemble( n_networks=n_chains, input_shape=input_shape, layer_units=layer_units, layer_activations=layer_activations, initial_unconstrained_scale=-1, # doesn't matter, will be overwritten transform_unconstrained_scale_factor=transform_unconstrained_scale_factor, weight_prior=weight_prior, bias_prior=bias_prior, noise_scale_prior=noise_scale_prior, n_train=n_train, learning_rate=0.5, ) assert check_posterior_equivalence( ensemble.networks[0], hmc_net, x_train, y_train, n_train=n_train ) map_weights = hmc_to_map_weights(overdispersed_prior_samples) # for weight in map_weights: # weight[-1] = backtransform_constrained_scale(0.014, transform_unconstrained_scale_factor).numpy() ensemble.set_weights(map_weights)
plot_training_data(_x_train, y_train, fig=fig, ax=ax, y_lim=y_lim) plot_ground_truth(_x_plot, y_ground_truth, fig=fig, ax=ax) ax.legend() # %% initial_learning_rate = 0.01 lr_schedule = tf.keras.optimizers.schedules.ExponentialDecay( initial_learning_rate, decay_steps=20, decay_rate=0.9, staircase=True ) ensemble = MapDensityEnsemble( n_networks=2, input_shape=[1], layer_units=layer_units, layer_activations=layer_activations, learning_rate=lr_schedule, seed=0, ) ensemble.fit( x_train=x_train, y_train=y_train, batch_size=batchsize_train, epochs=100, verbose=0 ) # %% mog_prediction = ensemble.predict(x_plot) # Mixture Of Gaussian prediction plot_moment_matched_predictive_normal_distribution( x_plot=_x_plot, predictive_distribution=mog_prediction,