def train_walk_from_pretrained_osm(lam_kld=0.0): # Simple test code, to check that everything is basically functional. print("TESTING...") # Initialize a source of randomness rng = np.random.RandomState(1234) # Load some data to train/validate/test with data_file = 'data/tfd_data_48x48.pkl' dataset = load_tfd(tfd_pkl_name=data_file, which_set='unlabeled', fold='all') Xtr_unlabeled = dataset[0] dataset = load_tfd(tfd_pkl_name=data_file, which_set='train', fold='all') Xtr_train = dataset[0] Xtr = np.vstack([Xtr_unlabeled, Xtr_train]) dataset = load_tfd(tfd_pkl_name=data_file, which_set='valid', fold='all') Xva = dataset[0] print("Xtr.shape: {0:s}, Xva.shape: {1:s}".format(str(Xtr.shape),str(Xva.shape))) # get and set some basic dataset information tr_samples = Xtr.shape[0] va_samples = Xva.shape[0] data_dim = Xtr.shape[1] batch_size = 400 batch_reps = 5 prior_sigma = 1.0 Xtr_mean = np.mean(Xtr, axis=0, keepdims=True) Xtr_mean = (0.0 * Xtr_mean) + np.mean(np.mean(Xtr,axis=1)) Xc_mean = np.repeat(Xtr_mean, batch_size, axis=0) # Symbolic inputs Xd = T.matrix(name='Xd') Xc = T.matrix(name='Xc') Xm = T.matrix(name='Xm') Xt = T.matrix(name='Xt') ############################### # Setup discriminator network # ############################### # Set some reasonable mlp parameters dn_params = {} # Set up some proto-networks pc0 = [data_dim, (300, 4), (300, 4), 10] dn_params['proto_configs'] = [pc0] # Set up some spawn networks sc0 = {'proto_key': 0, 'input_noise': 0.1, 'bias_noise': 0.1, 'do_dropout': True} #sc1 = {'proto_key': 0, 'input_noise': 0.1, 'bias_noise': 0.1, 'do_dropout': True} dn_params['spawn_configs'] = [sc0] dn_params['spawn_weights'] = [1.0] # Set remaining params dn_params['init_scale'] = 1.0 dn_params['lam_l2a'] = 1e-2 dn_params['vis_drop'] = 0.2 dn_params['hid_drop'] = 0.5 # Initialize a network object to use as the discriminator DN = PeaNet(rng=rng, Xd=Xd, params=dn_params) DN.init_biases(0.0) ####################################################### # Load inferencer and generator from saved parameters # ####################################################### gn_fname = RESULT_PATH+"pt_osm_params_b100000_GN.pkl" in_fname = RESULT_PATH+"pt_osm_params_b100000_IN.pkl" IN = load_infnet_from_file(f_name=in_fname, rng=rng, Xd=Xd) GN = load_infnet_from_file(f_name=gn_fname, rng=rng, Xd=Xd) ######################################################## # Define parameters for the VCGLoop, and initialize it # ######################################################## print("Building the VCGLoop...") vcgl_params = {} vcgl_params['x_type'] = 'gaussian' vcgl_params['xt_transform'] = 'sigmoid' vcgl_params['logvar_bound'] = LOGVAR_BOUND vcgl_params['cost_decay'] = 0.1 vcgl_params['chain_type'] = 'walkout' vcgl_params['lam_l2d'] = 5e-2 VCGL = VCGLoop(rng=rng, Xd=Xd, Xc=Xc, Xm=Xm, Xt=Xt, \ i_net=IN, g_net=GN, d_net=DN, chain_len=5, \ data_dim=data_dim, prior_dim=PRIOR_DIM, params=vcgl_params) out_file = open(RESULT_PATH+"pt_walk_results.txt", 'wb') #################################################### # Train the VCGLoop by unrolling and applying BPTT # #################################################### learn_rate = 0.0005 cost_1 = [0. for i in range(10)] for i in range(100000): scale = float(min((i+1), 5000)) / 5000.0 if ((i+1 % 25000) == 0): learn_rate = learn_rate * 0.8 ######################################## # TRAIN THE CHAIN IN FREE-RUNNING MODE # ######################################## VCGL.set_all_sgd_params(learn_rate=(scale*learn_rate), \ mom_1=0.9, mom_2=0.99) VCGL.set_disc_weights(dweight_gn=25.0, dweight_dn=25.0) VCGL.set_lam_chain_nll(1.0) VCGL.set_lam_chain_kld(lam_kld) # get some data to train with tr_idx = npr.randint(low=0,high=tr_samples,size=(batch_size,)) Xd_batch = Xtr.take(tr_idx, axis=0) Xc_batch = 0.0 * Xd_batch Xm_batch = 0.0 * Xd_batch # examples from the target distribution, to train discriminator tr_idx = npr.randint(low=0,high=tr_samples,size=(2*batch_size,)) Xt_batch = Xtr.take(tr_idx, axis=0) # do a minibatch update of the model, and compute some costs outputs = VCGL.train_joint(Xd_batch, Xc_batch, Xm_batch, Xt_batch, batch_reps) cost_1 = [(cost_1[k] + 1.*outputs[k]) for k in range(len(outputs))] if ((i % 500) == 0): cost_1 = [(v / 500.0) for v in cost_1] o_str_1 = "batch: {0:d}, joint_cost: {1:.4f}, chain_nll_cost: {2:.4f}, chain_kld_cost: {3:.4f}, disc_cost_gn: {4:.4f}, disc_cost_dn: {5:.4f}".format( \ i, cost_1[0], cost_1[1], cost_1[2], cost_1[5], cost_1[6]) print(o_str_1) cost_1 = [0. for v in cost_1] if ((i % 1000) == 0): tr_idx = npr.randint(low=0,high=Xtr.shape[0],size=(5,)) va_idx = npr.randint(low=0,high=Xva.shape[0],size=(5,)) Xd_batch = np.vstack([Xtr.take(tr_idx, axis=0), Xva.take(va_idx, axis=0)]) # draw some chains of samples from the VAE loop file_name = RESULT_PATH+"pt_walk_chain_samples_b{0:d}.png".format(i) Xd_samps = np.repeat(Xd_batch, 3, axis=0) sample_lists = VCGL.OSM.sample_from_chain(Xd_samps, loop_iters=20) Xs = np.vstack(sample_lists["data samples"]) utils.visualize_samples(Xs, file_name, num_rows=20) # draw some masked chains of samples from the VAE loop file_name = RESULT_PATH+"pt_walk_mask_samples_b{0:d}.png".format(i) Xd_samps = np.repeat(Xc_mean[0:Xd_batch.shape[0],:], 3, axis=0) Xc_samps = np.repeat(Xd_batch, 3, axis=0) Xm_rand = sample_masks(Xc_samps, drop_prob=0.0) Xm_patch = sample_patch_masks(Xc_samps, (48,48), (25,25)) Xm_samps = Xm_rand * Xm_patch sample_lists = VCGL.OSM.sample_from_chain(Xd_samps, \ X_c=Xc_samps, X_m=Xm_samps, loop_iters=20) Xs = np.vstack(sample_lists["data samples"]) utils.visualize_samples(Xs, file_name, num_rows=20) # draw some samples independently from the GenNet's prior file_name = RESULT_PATH+"pt_walk_prior_samples_b{0:d}.png".format(i) Xs = VCGL.sample_from_prior(20*20) utils.visualize_samples(Xs, file_name, num_rows=20) # DUMP PARAMETERS FROM TIME-TO-TIME if (i % 5000 == 0): DN.save_to_file(f_name=RESULT_PATH+"pt_walk_params_b{0:d}_DN.pkl".format(i)) IN.save_to_file(f_name=RESULT_PATH+"pt_walk_params_b{0:d}_IN.pkl".format(i)) GN.save_to_file(f_name=RESULT_PATH+"pt_walk_params_b{0:d}_GN.pkl".format(i)) return
def train_walk_from_pretrained_osm(lam_kld=0.0): # Simple test code, to check that everything is basically functional. print("TESTING...") # Initialize a source of randomness rng = np.random.RandomState(1234) # Load some data to train/validate/test with data_file = 'data/tfd_data_48x48.pkl' dataset = load_tfd(tfd_pkl_name=data_file, which_set='unlabeled', fold='all') Xtr_unlabeled = dataset[0] dataset = load_tfd(tfd_pkl_name=data_file, which_set='train', fold='all') Xtr_train = dataset[0] Xtr = np.vstack([Xtr_unlabeled, Xtr_train]) dataset = load_tfd(tfd_pkl_name=data_file, which_set='valid', fold='all') Xva = dataset[0] print("Xtr.shape: {0:s}, Xva.shape: {1:s}".format(str(Xtr.shape), str(Xva.shape))) # get and set some basic dataset information tr_samples = Xtr.shape[0] va_samples = Xva.shape[0] data_dim = Xtr.shape[1] batch_size = 400 batch_reps = 5 prior_sigma = 1.0 Xtr_mean = np.mean(Xtr, axis=0, keepdims=True) Xtr_mean = (0.0 * Xtr_mean) + np.mean(np.mean(Xtr, axis=1)) Xc_mean = np.repeat(Xtr_mean, batch_size, axis=0) # Symbolic inputs Xd = T.matrix(name='Xd') Xc = T.matrix(name='Xc') Xm = T.matrix(name='Xm') Xt = T.matrix(name='Xt') ############################### # Setup discriminator network # ############################### # Set some reasonable mlp parameters dn_params = {} # Set up some proto-networks pc0 = [data_dim, (300, 4), (300, 4), 10] dn_params['proto_configs'] = [pc0] # Set up some spawn networks sc0 = { 'proto_key': 0, 'input_noise': 0.1, 'bias_noise': 0.1, 'do_dropout': True } #sc1 = {'proto_key': 0, 'input_noise': 0.1, 'bias_noise': 0.1, 'do_dropout': True} dn_params['spawn_configs'] = [sc0] dn_params['spawn_weights'] = [1.0] # Set remaining params dn_params['init_scale'] = 1.0 dn_params['lam_l2a'] = 1e-2 dn_params['vis_drop'] = 0.2 dn_params['hid_drop'] = 0.5 # Initialize a network object to use as the discriminator DN = PeaNet(rng=rng, Xd=Xd, params=dn_params) DN.init_biases(0.0) ####################################################### # Load inferencer and generator from saved parameters # ####################################################### gn_fname = RESULT_PATH + "pt_osm_params_b100000_GN.pkl" in_fname = RESULT_PATH + "pt_osm_params_b100000_IN.pkl" IN = load_infnet_from_file(f_name=in_fname, rng=rng, Xd=Xd) GN = load_infnet_from_file(f_name=gn_fname, rng=rng, Xd=Xd) ######################################################## # Define parameters for the VCGLoop, and initialize it # ######################################################## print("Building the VCGLoop...") vcgl_params = {} vcgl_params['x_type'] = 'gaussian' vcgl_params['xt_transform'] = 'sigmoid' vcgl_params['logvar_bound'] = LOGVAR_BOUND vcgl_params['cost_decay'] = 0.1 vcgl_params['chain_type'] = 'walkout' vcgl_params['lam_l2d'] = 5e-2 VCGL = VCGLoop(rng=rng, Xd=Xd, Xc=Xc, Xm=Xm, Xt=Xt, \ i_net=IN, g_net=GN, d_net=DN, chain_len=5, \ data_dim=data_dim, prior_dim=PRIOR_DIM, params=vcgl_params) out_file = open(RESULT_PATH + "pt_walk_results.txt", 'wb') #################################################### # Train the VCGLoop by unrolling and applying BPTT # #################################################### learn_rate = 0.0005 cost_1 = [0. for i in range(10)] for i in range(100000): scale = float(min((i + 1), 5000)) / 5000.0 if ((i + 1 % 25000) == 0): learn_rate = learn_rate * 0.8 ######################################## # TRAIN THE CHAIN IN FREE-RUNNING MODE # ######################################## VCGL.set_all_sgd_params(learn_rate=(scale*learn_rate), \ mom_1=0.9, mom_2=0.99) VCGL.set_disc_weights(dweight_gn=25.0, dweight_dn=25.0) VCGL.set_lam_chain_nll(1.0) VCGL.set_lam_chain_kld(lam_kld) # get some data to train with tr_idx = npr.randint(low=0, high=tr_samples, size=(batch_size, )) Xd_batch = Xtr.take(tr_idx, axis=0) Xc_batch = 0.0 * Xd_batch Xm_batch = 0.0 * Xd_batch # examples from the target distribution, to train discriminator tr_idx = npr.randint(low=0, high=tr_samples, size=(2 * batch_size, )) Xt_batch = Xtr.take(tr_idx, axis=0) # do a minibatch update of the model, and compute some costs outputs = VCGL.train_joint(Xd_batch, Xc_batch, Xm_batch, Xt_batch, batch_reps) cost_1 = [(cost_1[k] + 1. * outputs[k]) for k in range(len(outputs))] if ((i % 500) == 0): cost_1 = [(v / 500.0) for v in cost_1] o_str_1 = "batch: {0:d}, joint_cost: {1:.4f}, chain_nll_cost: {2:.4f}, chain_kld_cost: {3:.4f}, disc_cost_gn: {4:.4f}, disc_cost_dn: {5:.4f}".format( \ i, cost_1[0], cost_1[1], cost_1[2], cost_1[5], cost_1[6]) print(o_str_1) cost_1 = [0. for v in cost_1] if ((i % 1000) == 0): tr_idx = npr.randint(low=0, high=Xtr.shape[0], size=(5, )) va_idx = npr.randint(low=0, high=Xva.shape[0], size=(5, )) Xd_batch = np.vstack( [Xtr.take(tr_idx, axis=0), Xva.take(va_idx, axis=0)]) # draw some chains of samples from the VAE loop file_name = RESULT_PATH + "pt_walk_chain_samples_b{0:d}.png".format( i) Xd_samps = np.repeat(Xd_batch, 3, axis=0) sample_lists = VCGL.OSM.sample_from_chain(Xd_samps, loop_iters=20) Xs = np.vstack(sample_lists["data samples"]) utils.visualize_samples(Xs, file_name, num_rows=20) # draw some masked chains of samples from the VAE loop file_name = RESULT_PATH + "pt_walk_mask_samples_b{0:d}.png".format( i) Xd_samps = np.repeat(Xc_mean[0:Xd_batch.shape[0], :], 3, axis=0) Xc_samps = np.repeat(Xd_batch, 3, axis=0) Xm_rand = sample_masks(Xc_samps, drop_prob=0.0) Xm_patch = sample_patch_masks(Xc_samps, (48, 48), (25, 25)) Xm_samps = Xm_rand * Xm_patch sample_lists = VCGL.OSM.sample_from_chain(Xd_samps, \ X_c=Xc_samps, X_m=Xm_samps, loop_iters=20) Xs = np.vstack(sample_lists["data samples"]) utils.visualize_samples(Xs, file_name, num_rows=20) # draw some samples independently from the GenNet's prior file_name = RESULT_PATH + "pt_walk_prior_samples_b{0:d}.png".format( i) Xs = VCGL.sample_from_prior(20 * 20) utils.visualize_samples(Xs, file_name, num_rows=20) # DUMP PARAMETERS FROM TIME-TO-TIME if (i % 5000 == 0): DN.save_to_file(f_name=RESULT_PATH + "pt_walk_params_b{0:d}_DN.pkl".format(i)) IN.save_to_file(f_name=RESULT_PATH + "pt_walk_params_b{0:d}_IN.pkl".format(i)) GN.save_to_file(f_name=RESULT_PATH + "pt_walk_params_b{0:d}_GN.pkl".format(i)) return
def test_gip_sigma_scale_mnist(): from LogPDFs import cross_validate_sigma # Simple test code, to check that everything is basically functional. print("TESTING...") # Initialize a source of randomness rng = np.random.RandomState(12345) # Load some data to train/validate/test with dataset = 'data/mnist.pkl.gz' datasets = load_udm(dataset, zero_mean=False) Xtr = datasets[0][0] Xtr = Xtr.get_value(borrow=False) Xva = datasets[2][0] Xva = Xva.get_value(borrow=False) print("Xtr.shape: {0:s}, Xva.shape: {1:s}".format(str(Xtr.shape),str(Xva.shape))) # get and set some basic dataset information tr_samples = Xtr.shape[0] batch_size = 100 Xtr_mean = np.mean(Xtr, axis=0, keepdims=True) Xtr_mean = (0.0 * Xtr_mean) + np.mean(Xtr) Xc_mean = np.repeat(Xtr_mean, batch_size, axis=0).astype(theano.config.floatX) # Symbolic inputs Xd = T.matrix(name='Xd') Xc = T.matrix(name='Xc') Xm = T.matrix(name='Xm') Xt = T.matrix(name='Xt') # Load inferencer and generator from saved parameters gn_fname = "MNIST_WALKOUT_TEST_MAX_KLD/pt_walk_params_b70000_GN.pkl" in_fname = "MNIST_WALKOUT_TEST_MAX_KLD/pt_walk_params_b70000_IN.pkl" IN = load_infnet_from_file(f_name=in_fname, rng=rng, Xd=Xd) GN = load_infnet_from_file(f_name=gn_fname, rng=rng, Xd=Xd) x_dim = IN.shared_layers[0].in_dim z_dim = IN.mu_layers[-1].out_dim # construct a GIPair with the loaded InfNet and GenNet osm_params = {} osm_params['x_type'] = 'gaussian' osm_params['xt_transform'] = 'sigmoid' osm_params['logvar_bound'] = LOGVAR_BOUND OSM = OneStageModel(rng=rng, Xd=Xd, Xc=Xc, Xm=Xm, \ p_x_given_z=GN, q_z_given_x=IN, \ x_dim=x_dim, z_dim=z_dim, params=osm_params) # compute variational likelihood bound and its sub-components Xva = row_shuffle(Xva) Xb = Xva[0:5000] file_name = "A_MNIST_POST_KLDS.png" post_klds = OSM.compute_post_klds(Xb) post_dim_klds = np.mean(post_klds, axis=0) utils.plot_stem(np.arange(post_dim_klds.shape[0]), post_dim_klds, \ file_name) # compute information about free-energy on validation set file_name = "A_MNIST_FREE_ENERGY.png" fe_terms = OSM.compute_fe_terms(Xb, 20) utils.plot_scatter(fe_terms[1], fe_terms[0], file_name, \ x_label='Posterior KLd', y_label='Negative Log-likelihood') # bound_results = OSM.compute_ll_bound(Xva) # ll_bounds = bound_results[0] # post_klds = bound_results[1] # log_likelihoods = bound_results[2] # max_lls = bound_results[3] # print("mean ll bound: {0:.4f}".format(np.mean(ll_bounds))) # print("mean posterior KLd: {0:.4f}".format(np.mean(post_klds))) # print("mean log-likelihood: {0:.4f}".format(np.mean(log_likelihoods))) # print("mean max log-likelihood: {0:.4f}".format(np.mean(max_lls))) # print("min ll bound: {0:.4f}".format(np.min(ll_bounds))) # print("max posterior KLd: {0:.4f}".format(np.max(post_klds))) # print("min log-likelihood: {0:.4f}".format(np.min(log_likelihoods))) # print("min max log-likelihood: {0:.4f}".format(np.min(max_lls))) # # compute some information about the approximate posteriors # post_stats = OSM.compute_post_stats(Xva, 0.0*Xva, 0.0*Xva) # all_post_klds = np.sort(post_stats[0].ravel()) # post KLds for each obs and dim # obs_post_klds = np.sort(post_stats[1]) # summed post KLds for each obs # post_dim_klds = post_stats[2] # average post KLds for each post dim # post_dim_vars = post_stats[3] # average squared mean for each post dim # utils.plot_line(np.arange(all_post_klds.shape[0]), all_post_klds, "AAA_ALL_POST_KLDS.png") # utils.plot_line(np.arange(obs_post_klds.shape[0]), obs_post_klds, "AAA_OBS_POST_KLDS.png") # utils.plot_stem(np.arange(post_dim_klds.shape[0]), post_dim_klds, "AAA_POST_DIM_KLDS.png") # utils.plot_stem(np.arange(post_dim_vars.shape[0]), post_dim_vars, "AAA_POST_DIM_VARS.png") # draw many samples from the GIP for i in range(5): tr_idx = npr.randint(low=0,high=tr_samples,size=(100,)) Xd_batch = Xtr.take(tr_idx, axis=0) Xs = [] for row in range(3): Xs.append([]) for col in range(3): sample_lists = OSM.sample_from_chain(Xd_batch[0:10,:], loop_iters=100, \ sigma_scale=1.0) Xs[row].append(group_chains(sample_lists['data samples'])) Xs, block_im_dim = block_video(Xs, (28,28), (3,3)) to_video(Xs, block_im_dim, "A_MNIST_KLD_CHAIN_VIDEO_{0:d}.avi".format(i), frame_rate=10) #sample_lists = GIP.sample_from_chain(Xd_batch[0,:].reshape((1,data_dim)), loop_iters=300, \ # sigma_scale=1.0) #Xs = np.vstack(sample_lists["data samples"]) #file_name = "TFD_TEST_{0:d}.png".format(i) #utils.visualize_samples(Xs, file_name, num_rows=15) file_name = "A_MNIST_KLD_PRIOR_SAMPLE.png" Xs = OSM.sample_from_prior(20*20) utils.visualize_samples(Xs, file_name, num_rows=20) # # test Parzen density estimator built from prior samples # Xs = OSM.sample_from_prior(10000) # [best_sigma, best_ll, best_lls] = \ # cross_validate_sigma(Xs, Xva, [0.12, 0.14, 0.15, 0.16, 0.18], 20) # sort_idx = np.argsort(best_lls) # sort_idx = sort_idx[0:400] # utils.plot_line(np.arange(sort_idx.shape[0]), best_lls[sort_idx], "A_MNIST_BEST_LLS_1.png") # utils.visualize_samples(Xva[sort_idx], "A_MNIST_BAD_DIGITS_1.png", num_rows=20) # ########## # # AGAIN! # # ########## # Xs = OSM.sample_from_prior(10000) # tr_idx = npr.randint(low=0,high=tr_samples,size=(5000,)) # Xva = Xtr.take(tr_idx, axis=0) # [best_sigma, best_ll, best_lls] = \ # cross_validate_sigma(Xs, Xva, [0.12, 0.14, 0.15, 0.16, 0.18], 20) # sort_idx = np.argsort(best_lls) # sort_idx = sort_idx[0:400] # utils.plot_line(np.arange(sort_idx.shape[0]), best_lls[sort_idx], "A_MNIST_BEST_LLS_2.png") # utils.visualize_samples(Xva[sort_idx], "A_MNIST_BAD_DIGITS_2.png", num_rows=20) return
def test_gip_sigma_scale_tfd(): from LogPDFs import cross_validate_sigma # Simple test code, to check that everything is basically functional. print("TESTING...") # Initialize a source of randomness rng = np.random.RandomState(12345) # Load some data to train/validate/test with data_file = "data/tfd_data_48x48.pkl" dataset = load_tfd(tfd_pkl_name=data_file, which_set="unlabeled", fold="all") Xtr_unlabeled = dataset[0] dataset = load_tfd(tfd_pkl_name=data_file, which_set="train", fold="all") Xtr_train = dataset[0] Xtr = np.vstack([Xtr_unlabeled, Xtr_train]) dataset = load_tfd(tfd_pkl_name=data_file, which_set="test", fold="all") Xva = dataset[0] tr_samples = Xtr.shape[0] va_samples = Xva.shape[0] print("Xtr.shape: {0:s}, Xva.shape: {1:s}".format(str(Xtr.shape), str(Xva.shape))) # get and set some basic dataset information tr_samples = Xtr.shape[0] data_dim = Xtr.shape[1] batch_size = 100 # Symbolic inputs Xd = T.matrix(name="Xd") Xc = T.matrix(name="Xc") Xm = T.matrix(name="Xm") Xt = T.matrix(name="Xt") # Load inferencer and generator from saved parameters gn_fname = "TFD_WALKOUT_TEST_KLD/pt_walk_params_b25000_GN.pkl" in_fname = "TFD_WALKOUT_TEST_KLD/pt_walk_params_b25000_IN.pkl" IN = load_infnet_from_file(f_name=in_fname, rng=rng, Xd=Xd) GN = load_infnet_from_file(f_name=gn_fname, rng=rng, Xd=Xd) x_dim = IN.shared_layers[0].in_dim z_dim = IN.mu_layers[-1].out_dim # construct a GIPair with the loaded InfNet and GenNet osm_params = {} osm_params["x_type"] = "gaussian" osm_params["xt_transform"] = "sigmoid" osm_params["logvar_bound"] = LOGVAR_BOUND OSM = OneStageModel( rng=rng, Xd=Xd, Xc=Xc, Xm=Xm, p_x_given_z=GN, q_z_given_x=IN, x_dim=x_dim, z_dim=z_dim, params=osm_params ) # # compute variational likelihood bound and its sub-components Xva = row_shuffle(Xva) Xb = Xva[0:5000] # file_name = "A_TFD_POST_KLDS.png" # post_klds = OSM.compute_post_klds(Xb) # post_dim_klds = np.mean(post_klds, axis=0) # utils.plot_stem(np.arange(post_dim_klds.shape[0]), post_dim_klds, \ # file_name) # compute information about free-energy on validation set file_name = "A_TFD_KLD_FREE_ENERGY.png" fe_terms = OSM.compute_fe_terms(Xb, 20) utils.plot_scatter(fe_terms[1], fe_terms[0], file_name, x_label="Posterior KLd", y_label="Negative Log-likelihood") # bound_results = OSM.compute_ll_bound(Xva) # ll_bounds = bound_results[0] # post_klds = bound_results[1] # log_likelihoods = bound_results[2] # max_lls = bound_results[3] # print("mean ll bound: {0:.4f}".format(np.mean(ll_bounds))) # print("mean posterior KLd: {0:.4f}".format(np.mean(post_klds))) # print("mean log-likelihood: {0:.4f}".format(np.mean(log_likelihoods))) # print("mean max log-likelihood: {0:.4f}".format(np.mean(max_lls))) # print("min ll bound: {0:.4f}".format(np.min(ll_bounds))) # print("max posterior KLd: {0:.4f}".format(np.max(post_klds))) # print("min log-likelihood: {0:.4f}".format(np.min(log_likelihoods))) # print("min max log-likelihood: {0:.4f}".format(np.min(max_lls))) # # compute some information about the approximate posteriors # post_stats = OSM.compute_post_stats(Xva, 0.0*Xva, 0.0*Xva) # all_post_klds = np.sort(post_stats[0].ravel()) # post KLds for each obs and dim # obs_post_klds = np.sort(post_stats[1]) # summed post KLds for each obs # post_dim_klds = post_stats[2] # average post KLds for each post dim # post_dim_vars = post_stats[3] # average squared mean for each post dim # utils.plot_line(np.arange(all_post_klds.shape[0]), all_post_klds, "AAA_ALL_POST_KLDS.png") # utils.plot_line(np.arange(obs_post_klds.shape[0]), obs_post_klds, "AAA_OBS_POST_KLDS.png") # utils.plot_stem(np.arange(post_dim_klds.shape[0]), post_dim_klds, "AAA_POST_DIM_KLDS.png") # utils.plot_stem(np.arange(post_dim_vars.shape[0]), post_dim_vars, "AAA_POST_DIM_VARS.png") # draw many samples from the GIP for i in range(5): tr_idx = npr.randint(low=0, high=tr_samples, size=(100,)) Xd_batch = Xtr.take(tr_idx, axis=0) Xs = [] for row in range(3): Xs.append([]) for col in range(3): sample_lists = OSM.sample_from_chain(Xd_batch[0:10, :], loop_iters=100, sigma_scale=1.0) Xs[row].append(group_chains(sample_lists["data samples"])) Xs, block_im_dim = block_video(Xs, (48, 48), (3, 3)) to_video(Xs, block_im_dim, "A_TFD_KLD_CHAIN_VIDEO_{0:d}.avi".format(i), frame_rate=10) # sample_lists = GIP.sample_from_chain(Xd_batch[0,:].reshape((1,data_dim)), loop_iters=300, \ # sigma_scale=1.0) # Xs = np.vstack(sample_lists["data samples"]) # file_name = "TFD_TEST_{0:d}.png".format(i) # utils.visualize_samples(Xs, file_name, num_rows=15) file_name = "A_TFD_KLD_PRIOR_SAMPLE.png" Xs = OSM.sample_from_prior(20 * 20) utils.visualize_samples(Xs, file_name, num_rows=20) # test Parzen density estimator built from prior samples # Xs = OSM.sample_from_prior(10000) # [best_sigma, best_ll, best_lls] = \ # cross_validate_sigma(Xs, Xva, [0.09, 0.095, 0.1, 0.105, 0.11], 10) # sort_idx = np.argsort(best_lls) # sort_idx = sort_idx[0:400] # utils.plot_line(np.arange(sort_idx.shape[0]), best_lls[sort_idx], "A_TFD_BEST_LLS_1.png") # utils.visualize_samples(Xva[sort_idx], "A_TFD_BAD_FACES_1.png", num_rows=20) return
def test_gip_sigma_scale_tfd(): from LogPDFs import cross_validate_sigma # Simple test code, to check that everything is basically functional. print("TESTING...") # Initialize a source of randomness rng = np.random.RandomState(12345) # Load some data to train/validate/test with data_file = 'data/tfd_data_48x48.pkl' dataset = load_tfd(tfd_pkl_name=data_file, which_set='unlabeled', fold='all') Xtr_unlabeled = dataset[0] dataset = load_tfd(tfd_pkl_name=data_file, which_set='train', fold='all') Xtr_train = dataset[0] Xtr = np.vstack([Xtr_unlabeled, Xtr_train]) dataset = load_tfd(tfd_pkl_name=data_file, which_set='test', fold='all') Xva = dataset[0] tr_samples = Xtr.shape[0] va_samples = Xva.shape[0] print("Xtr.shape: {0:s}, Xva.shape: {1:s}".format(str(Xtr.shape), str(Xva.shape))) # get and set some basic dataset information tr_samples = Xtr.shape[0] data_dim = Xtr.shape[1] batch_size = 100 # Symbolic inputs Xd = T.matrix(name='Xd') Xc = T.matrix(name='Xc') Xm = T.matrix(name='Xm') Xt = T.matrix(name='Xt') # Load inferencer and generator from saved parameters gn_fname = "TFD_WALKOUT_TEST_KLD/pt_walk_params_b25000_GN.pkl" in_fname = "TFD_WALKOUT_TEST_KLD/pt_walk_params_b25000_IN.pkl" IN = load_infnet_from_file(f_name=in_fname, rng=rng, Xd=Xd) GN = load_infnet_from_file(f_name=gn_fname, rng=rng, Xd=Xd) x_dim = IN.shared_layers[0].in_dim z_dim = IN.mu_layers[-1].out_dim # construct a GIPair with the loaded InfNet and GenNet osm_params = {} osm_params['x_type'] = 'gaussian' osm_params['xt_transform'] = 'sigmoid' osm_params['logvar_bound'] = LOGVAR_BOUND OSM = OneStageModel(rng=rng, Xd=Xd, Xc=Xc, Xm=Xm, \ p_x_given_z=GN, q_z_given_x=IN, \ x_dim=x_dim, z_dim=z_dim, params=osm_params) # # compute variational likelihood bound and its sub-components Xva = row_shuffle(Xva) Xb = Xva[0:5000] # file_name = "A_TFD_POST_KLDS.png" # post_klds = OSM.compute_post_klds(Xb) # post_dim_klds = np.mean(post_klds, axis=0) # utils.plot_stem(np.arange(post_dim_klds.shape[0]), post_dim_klds, \ # file_name) # compute information about free-energy on validation set file_name = "A_TFD_KLD_FREE_ENERGY.png" fe_terms = OSM.compute_fe_terms(Xb, 20) utils.plot_scatter(fe_terms[1], fe_terms[0], file_name, \ x_label='Posterior KLd', y_label='Negative Log-likelihood') # bound_results = OSM.compute_ll_bound(Xva) # ll_bounds = bound_results[0] # post_klds = bound_results[1] # log_likelihoods = bound_results[2] # max_lls = bound_results[3] # print("mean ll bound: {0:.4f}".format(np.mean(ll_bounds))) # print("mean posterior KLd: {0:.4f}".format(np.mean(post_klds))) # print("mean log-likelihood: {0:.4f}".format(np.mean(log_likelihoods))) # print("mean max log-likelihood: {0:.4f}".format(np.mean(max_lls))) # print("min ll bound: {0:.4f}".format(np.min(ll_bounds))) # print("max posterior KLd: {0:.4f}".format(np.max(post_klds))) # print("min log-likelihood: {0:.4f}".format(np.min(log_likelihoods))) # print("min max log-likelihood: {0:.4f}".format(np.min(max_lls))) # # compute some information about the approximate posteriors # post_stats = OSM.compute_post_stats(Xva, 0.0*Xva, 0.0*Xva) # all_post_klds = np.sort(post_stats[0].ravel()) # post KLds for each obs and dim # obs_post_klds = np.sort(post_stats[1]) # summed post KLds for each obs # post_dim_klds = post_stats[2] # average post KLds for each post dim # post_dim_vars = post_stats[3] # average squared mean for each post dim # utils.plot_line(np.arange(all_post_klds.shape[0]), all_post_klds, "AAA_ALL_POST_KLDS.png") # utils.plot_line(np.arange(obs_post_klds.shape[0]), obs_post_klds, "AAA_OBS_POST_KLDS.png") # utils.plot_stem(np.arange(post_dim_klds.shape[0]), post_dim_klds, "AAA_POST_DIM_KLDS.png") # utils.plot_stem(np.arange(post_dim_vars.shape[0]), post_dim_vars, "AAA_POST_DIM_VARS.png") # draw many samples from the GIP for i in range(5): tr_idx = npr.randint(low=0, high=tr_samples, size=(100, )) Xd_batch = Xtr.take(tr_idx, axis=0) Xs = [] for row in range(3): Xs.append([]) for col in range(3): sample_lists = OSM.sample_from_chain(Xd_batch[0:10,:], loop_iters=100, \ sigma_scale=1.0) Xs[row].append(group_chains(sample_lists['data samples'])) Xs, block_im_dim = block_video(Xs, (48, 48), (3, 3)) to_video(Xs, block_im_dim, "A_TFD_KLD_CHAIN_VIDEO_{0:d}.avi".format(i), frame_rate=10) #sample_lists = GIP.sample_from_chain(Xd_batch[0,:].reshape((1,data_dim)), loop_iters=300, \ # sigma_scale=1.0) #Xs = np.vstack(sample_lists["data samples"]) #file_name = "TFD_TEST_{0:d}.png".format(i) #utils.visualize_samples(Xs, file_name, num_rows=15) file_name = "A_TFD_KLD_PRIOR_SAMPLE.png" Xs = OSM.sample_from_prior(20 * 20) utils.visualize_samples(Xs, file_name, num_rows=20) # test Parzen density estimator built from prior samples # Xs = OSM.sample_from_prior(10000) # [best_sigma, best_ll, best_lls] = \ # cross_validate_sigma(Xs, Xva, [0.09, 0.095, 0.1, 0.105, 0.11], 10) # sort_idx = np.argsort(best_lls) # sort_idx = sort_idx[0:400] # utils.plot_line(np.arange(sort_idx.shape[0]), best_lls[sort_idx], "A_TFD_BEST_LLS_1.png") # utils.visualize_samples(Xva[sort_idx], "A_TFD_BAD_FACES_1.png", num_rows=20) return