# initialize gmm parameters pgm_params = init_pgm_param(K, N, alpha=1., niw_conc=1., random_scale=3.) params = pgm_params, loglike_params, recogn_params # set up encoder/decoder and plotting encode_mean, decode_mean = make_encoder_decoder(recognize, decode) plot = make_plotter_2d(recognize, decode, data, num_clusters, params, plot_every=500) # instantiate svae gradient function gradfun = make_gradfun(run_inference, recognize, loglike, pgm_prior_params, data) # optimize params = sgd(gradfun(batch_size=50, num_samples=5, natgrad_scale=1, callback=plot), params, num_iters=5000) # filename = '/Users/ybansal/Documents/PhD/Courses/CS282/Project/Code/Data/vae_adam.hkl' # file = open(filename, 'w') # hkl.dump(params, file) # #data = hkl.load(file) # file.close()
init_log_std = -5 * np.ones(D) init_var_params = np.concatenate([init_mean, init_log_std]) variational_params = optfun(num_iters, init_var_params, callback) return np.array(elbos) # let's optimize this with a few different step sizes elbo_lists = [] step_sizes = [.1, .25, .5] for step_size in step_sizes: # optimize with standard gradient + adam optfun = lambda n, init, cb: adam(gradient, init, step_size=step_size, num_iters=n, callback=cb) standard_lls = optimize_and_lls(optfun) # optimize with natural gradient + sgd, no momentum optnat = lambda n, init, cb: sgd(natural_gradient, init, step_size=step_size, num_iters=n, callback=cb, mass=.001) natural_lls = optimize_and_lls(optnat) elbo_lists.append((standard_lls, natural_lls)) # visually compare the ELBO plt.figure(figsize=(12,8)) colors = ['b', 'k', 'g'] for col, ss, (stand_lls, nat_lls) in zip(colors, step_sizes, elbo_lists): plt.plot(np.arange(len(stand_lls)), stand_lls, '--', label="standard (adam, step-size = %2.2f)"%ss, alpha=.5, c=col) plt.plot(np.arange(len(nat_lls)), nat_lls, '-', label="natural (sgd, step-size = %2.2f)"%ss, c=col) llrange = natural_lls.max() - natural_lls.min() plt.ylim((natural_lls.max() - llrange*.1, natural_lls.max() + 10)) plt.xlabel("optimization iteration")
variational_params = optfun(num_iters, init_var_params, callback) return np.array(elbos) # let's optimize this with a few different step sizes elbo_lists = [] step_sizes = [.1, .25, .5] for step_size in step_sizes: # optimize with standard gradient + adam optfun = lambda n, init, cb: adam( gradient, init, step_size=step_size, num_iters=n, callback=cb) standard_lls = optimize_and_lls(optfun) # optimize with natural gradient + sgd, no momentum optnat = lambda n, init, cb: sgd(natural_gradient, init, step_size=step_size, num_iters=n, callback=cb, mass=.001) natural_lls = optimize_and_lls(optnat) elbo_lists.append((standard_lls, natural_lls)) # visually compare the ELBO plt.figure(figsize=(12, 8)) colors = ['b', 'k', 'g'] for col, ss, (stand_lls, nat_lls) in zip(colors, step_sizes, elbo_lists): plt.plot(np.arange(len(stand_lls)), stand_lls, '--', label="standard (adam, step-size = %2.2f)" % ss, alpha=.5, c=col)
N = 2 # number of latent dimensions P = 2 # number of observation dimensions # generate synthetic data data = make_pinwheel_data(0.3, 0.05, num_clusters, samples_per_cluster, 0.25) # set prior natparam to something sparsifying but otherwise generic pgm_prior_params = init_pgm_param(K, N, alpha=0.05/K, niw_conc=0.5) # construct recognition and decoder networks and initialize them recognize, recogn_params = \ init_gresnet(P, [(40, np.tanh), (40, np.tanh), (2*N, gaussian_info)]) decode, loglike_params = \ init_gresnet(N, [(40, np.tanh), (40, np.tanh), (2*P, gaussian_mean)]) loglike = make_loglike(decode) # initialize gmm parameters pgm_params = init_pgm_param(K, N, alpha=1., niw_conc=1., random_scale=3.) params = pgm_params, loglike_params, recogn_params # set up encoder/decoder and plotting encode_mean, decode_mean = make_encoder_decoder(recognize, decode) plot = make_plotter_2d(recognize, decode, data, num_clusters, params, plot_every=100) # instantiate svae gradient function gradfun = make_gradfun(run_inference, recognize, loglike, pgm_prior_params, data) # optimize params = sgd(gradfun(batch_size=50, num_samples=1, natgrad_scale=1e4, callback=plot), params, num_iters=1000, step_size=1e-3)
return g_reparam(epsilon, theta) \ + g_score(epsilon, theta) \ + g_entropy(theta) elbo_values = [] def callback(theta, t, g): _elbo = elbo(theta) elbo_values.append(_elbo) theta_0 = wrap(2.0, 2.0) theta_star = sgd(lambda th, t: -1 * g_elbo(th), theta_0, num_iters=300, callback=callback) alpha_star, beta_star = unwrap(theta_star) print("true a = ", alpha_true) print("infd a = ", alpha_star) print("true b = ", beta_true) print("infd b = ", beta_star) print("E_q(z; theta)[z] = ", alpha_star / beta_star) plt.plot(elbo_values) plt.xlabel("Iteration") plt.ylabel("ELBO") plt.show() import scipy.stats