def generate_batch_multi(num_samples, xobjs=['circle'], yobjs=[0], img_scale=1.0): obj_imgs = [] obj_coords = [] for obj in xobjs: imgs, coords = generate_batch(num_samples, obj_type=obj) obj_imgs.append(imgs) obj_coords.append(coords) seq_len = obj_coords[0].shape[0] batch_size = obj_coords[0].shape[1] x_imgs = np.zeros(obj_imgs[0].shape) y_imgs = np.zeros(obj_imgs[0].shape) y_coords = np.zeros(obj_coords[0].shape) for o_num in range(len(xobjs)): x_imgs = x_imgs + obj_imgs[o_num] if o_num in yobjs: y_imgs = y_imgs + obj_imgs[o_num] mask = npr.rand(seq_len, batch_size) < (1. / (o_num+1)) mask = mask[:,:,np.newaxis] y_coords = (mask * obj_coords[o_num]) + ((1.-mask) * y_coords) # rescale coordinates as desired y_coords = img_scale * y_coords # add noise to image sequences pix_mask = npr.rand(*x_imgs.shape) < 0.05 pix_noise = npr.rand(*x_imgs.shape) x_imgs = x_imgs + (pix_mask * pix_noise) # clip to 0...0.99 x_imgs = np.maximum(x_imgs, 0.001) x_imgs = np.minimum(x_imgs, 0.999) y_imgs = np.maximum(y_imgs, 0.001) y_imgs = np.minimum(y_imgs, 0.999) return [to_fX(x_imgs), to_fX(y_imgs), to_fX(y_coords)]
def generate_batch(num_samples): for n in range(len(objects)): # generate a minibatch of trajectories traj_pos, traj_vel = TRAJ.generate_trajectories(num_samples, traj_len) traj_x = traj_pos[:,:,0] traj_y = traj_pos[:,:,1] # draw the trajectories center_x = to_fX( traj_x.T.ravel() ) center_y = to_fX( traj_y.T.ravel() ) delta = to_fX( np.ones(center_x.shape) ) sigma = to_fX( np.ones(center_x.shape) ) if n == 0: W = write_funcs[n](center_y, center_x, delta, 0.05*sigma) else: W += write_funcs[n](center_y, center_x, delta, 0.05*sigma) W = utils.scale_to_unit_interval(W) # shape trajectories into a batch for passing to the model batch_imgs = np.zeros((num_samples, traj_len, obs_dim)) for i in range(num_samples): start_idx = i * traj_len end_idx = start_idx + traj_len img_set = W[start_idx:end_idx,:] batch_imgs[i,:,:] = img_set batch_imgs = np.swapaxes(batch_imgs, 0, 1) batch_imgs = to_fX( batch_imgs ) return batch_imgs
def generate_batch_multi(num_samples, xobjs=['circle'], yobjs=[0], img_scale=1.0): obj_imgs = [] obj_coords = [] for obj in xobjs: imgs, coords = generate_batch(num_samples+1, obj_type=obj) obj_imgs.append(imgs) obj_coords.append(coords) seq_len = obj_imgs[0].shape[0] - 1 batch_size = obj_imgs[0].shape[1] obs_dim = obj_imgs[0].shape[2] x_imgs = np.zeros((seq_len, batch_size, obs_dim)) y_imgs = np.zeros((seq_len, batch_size, obs_dim)) for o_num in range(len(xobjs)): x_imgs = x_imgs + obj_imgs[o_num][:-1,:,:] if o_num in yobjs: y_imgs = y_imgs + obj_imgs[o_num][1:,:,:] # # add noise to image sequences # pix_mask = npr.rand(*x_imgs.shape) < 0.05 # pix_noise = npr.rand(*x_imgs.shape) # x_imgs = x_imgs + (pix_mask * pix_noise) # clip to 0...0.99 x_imgs = np.maximum(x_imgs, 0.001) x_imgs = np.minimum(x_imgs, 0.999) y_imgs = np.maximum(y_imgs, 0.001) y_imgs = np.minimum(y_imgs, 0.999) return [to_fX(x_imgs), to_fX(y_imgs)]
def generate_batch_multi(num_samples, xobjs=['circle'], yobjs=[0], img_scale=1.0): obj_imgs = [] obj_coords = [] for obj in xobjs: imgs, coords = generate_batch(num_samples + 1, obj_type=obj) obj_imgs.append(imgs) obj_coords.append(coords) seq_len = obj_imgs[0].shape[0] - 1 batch_size = obj_imgs[0].shape[1] obs_dim = obj_imgs[0].shape[2] x_imgs = np.zeros((seq_len, batch_size, obs_dim)) y_imgs = np.zeros((seq_len, batch_size, obs_dim)) for o_num in range(len(xobjs)): x_imgs = x_imgs + obj_imgs[o_num][:-1, :, :] if o_num in yobjs: y_imgs = y_imgs + obj_imgs[o_num][1:, :, :] # # add noise to image sequences # pix_mask = npr.rand(*x_imgs.shape) < 0.05 # pix_noise = npr.rand(*x_imgs.shape) # x_imgs = x_imgs + (pix_mask * pix_noise) # clip to 0...0.99 x_imgs = np.maximum(x_imgs, 0.001) x_imgs = np.minimum(x_imgs, 0.999) y_imgs = np.maximum(y_imgs, 0.001) y_imgs = np.minimum(y_imgs, 0.999) return [to_fX(x_imgs), to_fX(y_imgs)]
def set_lam_kld(self, lam_kld_q2p=1.0, lam_kld_p2q=1.0): """ Set the relative weight of various KL-divergences. """ zero_ary = np.zeros((1,)) new_lam = zero_ary + lam_kld_q2p self.lam_kld_q2p.set_value(to_fX(new_lam)) new_lam = zero_ary + lam_kld_p2q self.lam_kld_p2q.set_value(to_fX(new_lam)) return
def prior_sampler(samp_count): x_samps = to_fX( np.zeros((samp_count, self.obs_dim)) ) old_switch = self.train_switch.get_value(borrow=False) # set model to generation mode self.set_train_switch(switch_val=0.0) z_samps = to_fX( npr.randn(samp_count, self.z_dim) ) model_samps = sample_func(z_samps, x_samps) # set model back to either training or generation mode self.set_train_switch(switch_val=old_switch) return model_samps
def set_lam_kld(self, lam_kld_p=1.0, lam_kld_q=1.0): """ Set the relative weight of prior KL-divergence vs. data likelihood. """ zero_ary = np.zeros((1,)) new_lam = zero_ary + lam_kld_p self.lam_kld_p.set_value(to_fX(new_lam)) new_lam = zero_ary + lam_kld_q self.lam_kld_q.set_value(to_fX(new_lam)) return
def raw_cost_computer(XI, XO, XM): _all_costs = cost_func(to_fX(XI), to_fX(XO), to_fX(XM)) _kld_q2p = np.sum(np.mean(_all_costs[1], axis=1, keepdims=True), axis=0) _kld_p2q = np.sum(np.mean(_all_costs[2], axis=1, keepdims=True), axis=0) _step_klds = np.mean(np.sum(_all_costs[1], axis=2, keepdims=True), axis=1) _step_klds = to_fX( np.asarray([k for k in _step_klds]) ) _step_nlls = np.mean(_all_costs[0], axis=1) _step_nlls = to_fX( np.asarray([k for k in _step_nlls]) ) results = [_step_nlls, _step_klds, _kld_q2p, _kld_p2q] return results
def set_lam_kld(self, lam_kld_p=0.0, lam_kld_q=1.0, lam_kld_g=0.0): """ Set the relative weight of prior KL-divergence vs. data likelihood. """ zero_ary = np.zeros((1, )) new_lam = zero_ary + lam_kld_p self.lam_kld_p.set_value(to_fX(new_lam)) new_lam = zero_ary + lam_kld_q self.lam_kld_q.set_value(to_fX(new_lam)) new_lam = zero_ary + lam_kld_g self.lam_kld_g.set_value(to_fX(new_lam)) return
def test_tfd_nll(occ_dim=15, drop_prob=0.0): RESULT_PATH = "IMP_TFD_TM/" ######################################### # Format the result tag more thoroughly # ######################################### dp_int = int(100.0 * drop_prob) result_tag = RESULT_PATH + "TM_OD{}_DP{}".format(occ_dim, dp_int) ########################## # Get some training data # ########################## rng = np.random.RandomState(1234) data_file = 'data/tfd_data_48x48.pkl' dataset = load_tfd(tfd_pkl_name=data_file, which_set='unlabeled', fold='all') Xtr_unlabeled = dataset[0] dataset = load_tfd(tfd_pkl_name=data_file, which_set='train', fold='all') Xtr_train = dataset[0] Xtr = np.vstack([Xtr_unlabeled, Xtr_train]) dataset = load_tfd(tfd_pkl_name=data_file, which_set='valid', fold='all') Xva = dataset[0] Xtr = to_fX(shift_and_scale_into_01(Xtr)) Xva = to_fX(shift_and_scale_into_01(Xva)) tr_samples = Xtr.shape[0] va_samples = Xva.shape[0] batch_size = 250 batch_reps = 1 all_pix_mean = np.mean(np.mean(Xtr, axis=1)) data_mean = to_fX(all_pix_mean * np.ones((Xtr.shape[1], ))) TM = TemplateMatchImputer(x_train=Xtr, x_type='bernoulli') log_name = "{}_RESULTS.txt".format(result_tag) out_file = open(log_name, 'wb') Xva = row_shuffle(Xva) # record an estimate of performance on the test set xi, xo, xm = construct_masked_data(Xva, drop_prob=drop_prob, \ occ_dim=occ_dim, data_mean=data_mean) result = TM.best_match_nll(xo, xm) match_on_known = np.mean(result[0]) match_on_unknown = np.mean(result[1]) str0 = "Test 1:" str1 = " match on known : {}".format(match_on_known) str2 = " match on unknown : {}".format(match_on_unknown) joint_str = "\n".join([str0, str1, str2]) print(joint_str) out_file.write(joint_str + "\n") out_file.flush() out_file.close() return
def set_sgd_params(self, lr=0.01, mom_1=0.9, mom_2=0.999): """ Set learning rate and momentum parameter for all updates. """ zero_ary = np.zeros((1,)) # set learning rate new_lr = zero_ary + lr self.lr.set_value(to_fX(new_lr)) # set momentums (use first and second order "momentum") new_mom_1 = zero_ary + mom_1 self.mom_1.set_value(to_fX(new_mom_1)) new_mom_2 = zero_ary + mom_2 self.mom_2.set_value(to_fX(new_mom_2)) return
def test_tfd_nll(occ_dim=15, drop_prob=0.0): RESULT_PATH = "IMP_TFD_TM/" ######################################### # Format the result tag more thoroughly # ######################################### dp_int = int(100.0 * drop_prob) result_tag = RESULT_PATH + "TM_OD{}_DP{}".format(occ_dim, dp_int) ########################## # Get some training data # ########################## rng = np.random.RandomState(1234) data_file = 'data/tfd_data_48x48.pkl' dataset = load_tfd(tfd_pkl_name=data_file, which_set='unlabeled', fold='all') Xtr_unlabeled = dataset[0] dataset = load_tfd(tfd_pkl_name=data_file, which_set='train', fold='all') Xtr_train = dataset[0] Xtr = np.vstack([Xtr_unlabeled, Xtr_train]) dataset = load_tfd(tfd_pkl_name=data_file, which_set='valid', fold='all') Xva = dataset[0] Xtr = to_fX(shift_and_scale_into_01(Xtr)) Xva = to_fX(shift_and_scale_into_01(Xva)) tr_samples = Xtr.shape[0] va_samples = Xva.shape[0] batch_size = 250 batch_reps = 1 all_pix_mean = np.mean(np.mean(Xtr, axis=1)) data_mean = to_fX( all_pix_mean * np.ones((Xtr.shape[1],)) ) TM = TemplateMatchImputer(x_train=Xtr, x_type='bernoulli') log_name = "{}_RESULTS.txt".format(result_tag) out_file = open(log_name, 'wb') Xva = row_shuffle(Xva) # record an estimate of performance on the test set xi, xo, xm = construct_masked_data(Xva, drop_prob=drop_prob, \ occ_dim=occ_dim, data_mean=data_mean) result = TM.best_match_nll(xo, xm) match_on_known = np.mean(result[0]) match_on_unknown = np.mean(result[1]) str0 = "Test 1:" str1 = " match on known : {}".format(match_on_known) str2 = " match on unknown : {}".format(match_on_unknown) joint_str = "\n".join([str0, str1, str2]) print(joint_str) out_file.write(joint_str+"\n") out_file.flush() out_file.close() return
def set_sgd_params(self, lr=0.01, mom_1=0.9, mom_2=0.999): """ Set learning rate and momentum parameter for all updates. """ zero_ary = np.zeros((1, )) # set learning rate new_lr = zero_ary + lr self.lr.set_value(to_fX(new_lr)) # set momentums (use first and second order "momentum") new_mom_1 = zero_ary + mom_1 self.mom_1.set_value(to_fX(new_mom_1)) new_mom_2 = zero_ary + mom_2 self.mom_2.set_value(to_fX(new_mom_2)) return
def raw_cost_computer(XO): _all_costs = cost_func(to_fX(XO)) _kld_q2p = np.sum(np.mean(_all_costs[1], axis=1, keepdims=True), axis=0) _kld_p2q = np.sum(np.mean(_all_costs[2], axis=1, keepdims=True), axis=0) _kld_p2g = np.sum(np.mean(_all_costs[3], axis=1, keepdims=True), axis=0) _step_klds = np.mean(np.sum(_all_costs[1], axis=2, keepdims=True), axis=1) _step_klds = to_fX(np.asarray([k for k in _step_klds])) _step_nlls = np.mean(_all_costs[0], axis=1) _step_nlls = to_fX(np.asarray([k for k in _step_nlls])) results = [_step_nlls, _step_klds, _kld_q2p, _kld_p2q, _kld_p2g] return results
def test_mnist_img(occ_dim=15, drop_prob=0.0): ######################################### # Format the result tag more thoroughly # ######################################### dp_int = int(100.0 * drop_prob) result_tag = RESULT_PATH + "TM_OD{}_DP{}".format(occ_dim, dp_int) ########################## # Get some training data # ########################## rng = np.random.RandomState(1234) dataset = 'data/mnist.pkl.gz' datasets = load_udm(dataset, as_shared=False, zero_mean=False) Xtr = datasets[0][0] Xva = datasets[1][0] Xtr = to_fX(shift_and_scale_into_01(Xtr)) Xva = to_fX(shift_and_scale_into_01(Xva)) tr_samples = Xtr.shape[0] va_samples = Xva.shape[0] batch_size = 200 batch_reps = 1 all_pix_mean = np.mean(np.mean(Xtr, axis=1)) data_mean = to_fX(all_pix_mean * np.ones((Xtr.shape[1],))) TM = TemplateMatchImputer(x_train=Xtr, x_type='bernoulli') Xva = row_shuffle(Xva) # record an estimate of performance on the test set xi, xo, xm = construct_masked_data(Xva[:500], drop_prob=drop_prob, \ occ_dim=occ_dim, data_mean=data_mean) img_match_on_known, img_match_on_unknown = TM.best_match_img(xo, xm) display_count = 100 # visualize matches on known elements Xs = np.zeros((2*display_count, Xva.shape[1])) for idx in range(display_count): Xs[2*idx] = xi[idx] Xs[(2*idx)+1] = img_match_on_known[idx] file_name = "{0:s}_SAMPLES_MOK.png".format(result_tag) utils.visualize_samples(Xs, file_name, num_rows=20) # visualize matches on unknown elements Xs = np.zeros((2*display_count, Xva.shape[1])) for idx in range(display_count): Xs[2*idx] = xi[idx] Xs[(2*idx)+1] = img_match_on_unknown[idx] file_name = "{0:s}_SAMPLES_MOU.png".format(result_tag) utils.visualize_samples(Xs, file_name, num_rows=20) return
def test_mnist_img(occ_dim=15, drop_prob=0.0): ######################################### # Format the result tag more thoroughly # ######################################### dp_int = int(100.0 * drop_prob) result_tag = RESULT_PATH + "TM_OD{}_DP{}".format(occ_dim, dp_int) ########################## # Get some training data # ########################## rng = np.random.RandomState(1234) dataset = 'data/mnist.pkl.gz' datasets = load_udm(dataset, as_shared=False, zero_mean=False) Xtr = datasets[0][0] Xva = datasets[1][0] Xtr = to_fX(shift_and_scale_into_01(Xtr)) Xva = to_fX(shift_and_scale_into_01(Xva)) tr_samples = Xtr.shape[0] va_samples = Xva.shape[0] batch_size = 200 batch_reps = 1 all_pix_mean = np.mean(np.mean(Xtr, axis=1)) data_mean = to_fX(all_pix_mean * np.ones((Xtr.shape[1], ))) TM = TemplateMatchImputer(x_train=Xtr, x_type='bernoulli') Xva = row_shuffle(Xva) # record an estimate of performance on the test set xi, xo, xm = construct_masked_data(Xva[:500], drop_prob=drop_prob, \ occ_dim=occ_dim, data_mean=data_mean) img_match_on_known, img_match_on_unknown = TM.best_match_img(xo, xm) display_count = 100 # visualize matches on known elements Xs = np.zeros((2 * display_count, Xva.shape[1])) for idx in range(display_count): Xs[2 * idx] = xi[idx] Xs[(2 * idx) + 1] = img_match_on_known[idx] file_name = "{0:s}_SAMPLES_MOK.png".format(result_tag) utils.visualize_samples(Xs, file_name, num_rows=20) # visualize matches on unknown elements Xs = np.zeros((2 * display_count, Xva.shape[1])) for idx in range(display_count): Xs[2 * idx] = xi[idx] Xs[(2 * idx) + 1] = img_match_on_unknown[idx] file_name = "{0:s}_SAMPLES_MOU.png".format(result_tag) utils.visualize_samples(Xs, file_name, num_rows=20) return
def load_hydranet_from_dict(model_dict, rng=None, Xd=None, \ new_params=None): """ Load a clone of some previously trained model. """ # load basic parameters self_dot_params = model_dict['params'] if not (new_params is None): for k in new_params: self_dot_params[k] = new_params[k] # load numpy arrays that will be converted to Theano shared arrays self_dot_numpy_param_dicts = model_dict['numpy_param_dicts'] self_dot_shared_param_dicts = {'shared': [], 'output': []} for layer_group in ['shared', 'output']: # go over the list of parameter dicts in this layer group for numpy_dict in self_dot_numpy_param_dicts[layer_group]: shared_dict = {} for key in numpy_dict: # convert each numpy array to a Theano shared array val = to_fX(numpy_dict[key]) shared_dict[key] = theano.shared(val) self_dot_shared_param_dicts[layer_group].append(shared_dict) # now, create a HydraNet with the configuration we just unpacked clone_net = HydraNet(rng=rng, Xd=Xd, params=self_dot_params, \ shared_param_dicts=self_dot_shared_param_dicts) # helpful output print("==================================================") print("LOADED HydraNet WITH PARAMS:") for k in self_dot_params: print(" {0:s}: {1:s}".format(str(k), str(self_dot_params[k]))) print("==================================================") return clone_net
def sample_func(XO, use_guide_policy=False): # set model to desired generation mode old_switch = self.train_switch.get_value(borrow=False) if use_guide_policy: # take samples from the guide policy self.set_train_switch(switch_val=1.0) else: # take samples from the primary policy self.set_train_switch(switch_val=0.0) # get belief states and masks generated by the scan loop scan_vals = func(to_fX(XO)) step_count = self.total_steps + 1 seq_shape = (step_count, XO.shape[0], XO.shape[1]) xm_seq = np.zeros(seq_shape).astype(theano.config.floatX) xi_seq = np.zeros(seq_shape).astype(theano.config.floatX) mi_seq = np.zeros(seq_shape).astype(theano.config.floatX) for i in range(step_count): _xi = scan_vals[i] _mi = scan_vals[i + step_count] _xm = (_mi * XO) + ((1.0 - _mi) * _xi) xm_seq[i,:,:] = _xm xi_seq[i,:,:] = _xi mi_seq[i,:,:] = _mi # set model back to either training or generation mode self.set_train_switch(switch_val=old_switch) return [xm_seq, xi_seq, mi_seq]
def prior_sampler(samp_count): z_samps = npr.randn(samp_count, self.z_dim) z_samps = (np.exp(0.5 * self.prior_logvar) * z_samps) + \ self.prior_mean z_samps = to_fX(z_samps) model_samps = sample_func(z_samps) return model_samps
def prior_sampler(samp_count): z_samps = npr.randn(samp_count, self.z_dim) z_samps = (np.exp(0.5 * self.prior_logvar) * z_samps) + \ self.prior_mean z_samps =to_fX(z_samps) model_samps = sample_func(z_samps) return model_samps
def load_infnet_from_dict(model_dict, rng=None, Xd=None, \ new_params=None): """ Load a clone of some previously trained model. """ self_dot_params = model_dict['params'] if not (new_params is None): for k in new_params: self_dot_params[k] = new_params[k] self_dot_numpy_param_dicts = model_dict['numpy_param_dicts'] self_dot_shared_param_dicts = {'shared': [], 'mu': [], 'sigma': []} for layer_group in ['shared', 'mu', 'sigma']: for numpy_dict in self_dot_numpy_param_dicts[layer_group]: shared_dict = {} for key in numpy_dict: val = to_fX(numpy_dict[key]) shared_dict[key] = theano.shared(val) self_dot_shared_param_dicts[layer_group].append(shared_dict) # now, create an InfNet with the configuration we just unpacked clone_net = InfNet(rng=rng, Xd=Xd, params=self_dot_params, \ shared_param_dicts=self_dot_shared_param_dicts) # helpful output print("==================================================") print("LOADED InfNet WITH PARAMS:") for k in self_dot_params: print(" {0:s}: {1:s}".format(str(k), str(self_dot_params[k]))) print("==================================================") return clone_net
def sample_func(XO, use_guide_policy=False): # set model to desired generation mode old_switch = self.train_switch.get_value(borrow=False) if use_guide_policy: # take samples from the guide policy self.set_train_switch(switch_val=1.0) else: # take samples from the primary policy self.set_train_switch(switch_val=0.0) # get belief states and masks generated by the scan loop scan_vals = func(to_fX(XO)) step_count = self.total_steps + 1 seq_shape = (step_count, XO.shape[0], XO.shape[1]) xm_seq = np.zeros(seq_shape).astype(theano.config.floatX) xi_seq = np.zeros(seq_shape).astype(theano.config.floatX) mi_seq = np.zeros(seq_shape).astype(theano.config.floatX) for i in range(step_count): _xi = scan_vals[i] _mi = scan_vals[i + step_count] _xm = (_mi * XO) + ((1.0 - _mi) * _xi) xm_seq[i, :, :] = _xm xi_seq[i, :, :] = _xi mi_seq[i, :, :] = _mi # set model back to either training or generation mode self.set_train_switch(switch_val=old_switch) return [xm_seq, xi_seq, mi_seq]
def set_lam_nll(self, lam_nll=1.0): """ Set weight for controlling the influence of the data likelihood. """ zero_ary = np.zeros((1, )) new_lam = zero_ary + lam_nll self.lam_nll.set_value(to_fX(new_lam)) return
def test_svhn_nll(occ_dim=15, drop_prob=0.0): RESULT_PATH = "IMP_SVHN_TM/" ######################################### # Format the result tag more thoroughly # ######################################### dp_int = int(100.0 * drop_prob) result_tag = RESULT_PATH + "TM_OD{}_DP{}".format(occ_dim, dp_int) ########################## # Get some training data # ########################## rng = np.random.RandomState(1234) tr_file = 'data/svhn_train_gray.pkl' te_file = 'data/svhn_test_gray.pkl' ex_file = 'data/svhn_extra_gray.pkl' data = load_svhn_gray(tr_file, te_file, ex_file=ex_file, ex_count=200000) Xtr = to_fX( shift_and_scale_into_01(np.vstack([data['Xtr'], data['Xex']])) ) Xva = to_fX( shift_and_scale_into_01(data['Xte']) ) tr_samples = Xtr.shape[0] va_samples = Xva.shape[0] batch_size = 250 batch_reps = 1 all_pix_mean = np.mean(np.mean(Xtr, axis=1)) data_mean = to_fX( all_pix_mean * np.ones((Xtr.shape[1],)) ) TM = TemplateMatchImputer(x_train=Xtr, x_type='bernoulli') log_name = "{}_RESULTS.txt".format(result_tag) out_file = open(log_name, 'wb') Xva = row_shuffle(Xva) # record an estimate of performance on the test set xi, xo, xm = construct_masked_data(Xva, drop_prob=drop_prob, \ occ_dim=occ_dim, data_mean=data_mean) result = TM.best_match_nll(xo, xm) match_on_known = np.mean(result[0]) match_on_unknown = np.mean(result[1]) str0 = "Test 1:" str1 = " match on known : {}".format(match_on_known) str2 = " match on unknown : {}".format(match_on_unknown) joint_str = "\n".join([str0, str1, str2]) print(joint_str) out_file.write(joint_str+"\n") out_file.flush() out_file.close() return
def set_sgd_params(self, lr_1=0.01, lr_2=0.01, \ mom_1=0.9, mom_2=0.999): """ Set learning rate and momentum parameter for all updates. """ zero_ary = np.zeros((1,)) # set learning rates new_lr_1 = zero_ary + lr_1 self.lr_1.set_value(to_fX(new_lr_1)) new_lr_2 = zero_ary + lr_2 self.lr_2.set_value(to_fX(new_lr_2)) # set momentums new_mom_1 = zero_ary + mom_1 self.mom_1.set_value(to_fX(new_mom_1)) new_mom_2 = zero_ary + mom_2 self.mom_2.set_value(to_fX(new_mom_2)) return
def set_lam_l2w(self, lam_l2w=1e-3): """ Set the relative strength of l2 regularization on network params. """ zero_ary = np.zeros((1, )) new_lam = zero_ary + lam_l2w self.lam_l2w.set_value(to_fX(new_lam)) return
def set_lam_nll(self, lam_nll=1.0): """ Set weight for controlling the influence of the data likelihood. """ zero_ary = np.zeros((1,)) new_lam = zero_ary + lam_nll self.lam_nll.set_value(to_fX(new_lam)) return
def set_lam_l2w(self, lam_l2w=1e-3): """ Set the relative strength of l2 regularization on network params. """ zero_ary = np.zeros((1,)) new_lam = zero_ary + lam_l2w self.lam_l2w.set_value(to_fX(new_lam)) return
def set_lam_kld_l1l2(self, lam_kld_l1l2=1.0): """ Set the weight for shaping penalty on conditional priors over zt. """ zero_ary = np.zeros((1,)) new_val = zero_ary + lam_kld_l1l2 self.lam_kld_l1l2.set_value(to_fX(new_val)) return
def set_drop_rate(self, drop_rate=0.0): """ Set the weight for shaping penalty on conditional priors over zt. """ zero_ary = np.zeros((1,)) new_val = zero_ary + drop_rate self.drop_rate.set_value(to_fX(new_val)) return
def raw_kld_computer(XI, XO): hi_zmuv = to_fX( npr.randn(self.ir_steps, XI.shape[0], self.h_dim) ) _all_costs = cost_func(XI, XO, hi_zmuv) _init_klds = _all_costs[0] _kld_q2p = np.sum(np.mean(_all_costs[1], axis=1, keepdims=True), axis=0) _kld_p2q = np.sum(np.mean(_all_costs[2], axis=1, keepdims=True), axis=0) results = [_init_klds, _kld_q2p, _kld_p2q] return results
def set_sigma_scale(self, sigma_scale=1.0): """ Set the posterior sigma rescaling shared parameter to some value. """ zero_ary = np.zeros((1,)) new_scale = zero_ary + sigma_scale self.sigma_scale.set_value(to_fX(new_scale)) return
def init_biases(self, b_init=0.0, b_std=1e-2): """ Initialize the biases in all hidden layers to some constant. """ for layer in self.shared_layers: b_vec = (0.0 * layer.b.get_value(borrow=False)) + b_init b_vec = b_vec + (b_std * npr.randn(*b_vec.shape)) layer.b.set_value(to_fX(b_vec)) for layer in self.mu_layers[:-1]: b_vec = (0.0 * layer.b.get_value(borrow=False)) + b_init b_vec = b_vec + (b_std * npr.randn(*b_vec.shape)) layer.b.set_value(to_fX(b_vec)) for layer in self.sigma_layers[:-1]: b_vec = (0.0 * layer.b.get_value(borrow=False)) + b_init b_vec = b_vec + (b_std * npr.randn(*b_vec.shape)) layer.b.set_value(to_fX(b_vec)) return
def test_svhn_nll(occ_dim=15, drop_prob=0.0): RESULT_PATH = "IMP_SVHN_TM/" ######################################### # Format the result tag more thoroughly # ######################################### dp_int = int(100.0 * drop_prob) result_tag = RESULT_PATH + "TM_OD{}_DP{}".format(occ_dim, dp_int) ########################## # Get some training data # ########################## rng = np.random.RandomState(1234) tr_file = 'data/svhn_train_gray.pkl' te_file = 'data/svhn_test_gray.pkl' ex_file = 'data/svhn_extra_gray.pkl' data = load_svhn_gray(tr_file, te_file, ex_file=ex_file, ex_count=200000) Xtr = to_fX(shift_and_scale_into_01(np.vstack([data['Xtr'], data['Xex']]))) Xva = to_fX(shift_and_scale_into_01(data['Xte'])) tr_samples = Xtr.shape[0] va_samples = Xva.shape[0] batch_size = 250 batch_reps = 1 all_pix_mean = np.mean(np.mean(Xtr, axis=1)) data_mean = to_fX(all_pix_mean * np.ones((Xtr.shape[1], ))) TM = TemplateMatchImputer(x_train=Xtr, x_type='bernoulli') log_name = "{}_RESULTS.txt".format(result_tag) out_file = open(log_name, 'wb') Xva = row_shuffle(Xva) # record an estimate of performance on the test set xi, xo, xm = construct_masked_data(Xva, drop_prob=drop_prob, \ occ_dim=occ_dim, data_mean=data_mean) result = TM.best_match_nll(xo, xm) match_on_known = np.mean(result[0]) match_on_unknown = np.mean(result[1]) str0 = "Test 1:" str1 = " match on known : {}".format(match_on_known) str2 = " match on unknown : {}".format(match_on_unknown) joint_str = "\n".join([str0, str1, str2]) print(joint_str) out_file.write(joint_str + "\n") out_file.flush() out_file.close() return
def test_mnist_nll(occ_dim=15, drop_prob=0.0): ######################################### # Format the result tag more thoroughly # ######################################### dp_int = int(100.0 * drop_prob) result_tag = RESULT_PATH + "TM_OD{}_DP{}".format(occ_dim, dp_int) ########################## # Get some training data # ########################## rng = np.random.RandomState(1234) dataset = 'data/mnist.pkl.gz' datasets = load_udm(dataset, as_shared=False, zero_mean=False) Xtr = datasets[0][0] Xva = datasets[1][0] Xtr = to_fX(shift_and_scale_into_01(Xtr)) Xva = to_fX(shift_and_scale_into_01(Xva)) tr_samples = Xtr.shape[0] va_samples = Xva.shape[0] batch_size = 200 batch_reps = 1 all_pix_mean = np.mean(np.mean(Xtr, axis=1)) data_mean = to_fX(all_pix_mean * np.ones((Xtr.shape[1],))) TM = TemplateMatchImputer(x_train=Xtr, x_type='bernoulli') log_name = "{}_RESULTS.txt".format(result_tag) out_file = open(log_name, 'wb') Xva = row_shuffle(Xva) # record an estimate of performance on the test set xi, xo, xm = construct_masked_data(Xva, drop_prob=drop_prob, \ occ_dim=occ_dim, data_mean=data_mean) result = TM.best_match_nll(xo, xm) match_on_known = np.mean(result[0]) match_on_unknown = np.mean(result[1]) str0 = "Test 1:" str1 = " match on known : {}".format(match_on_known) str2 = " match on unknown : {}".format(match_on_unknown) joint_str = "\n".join([str0, str1, str2]) print(joint_str) out_file.write(joint_str+"\n") out_file.flush() out_file.close() return
def conditional_sampler(XI, XO=None, guided_decoding=False): XI = to_fX( XI ) if XO is None: XO = XI XO = to_fX( XO ) # set model to desired generation mode old_switch = self.train_switch.get_value(borrow=False) if guided_decoding: # take samples from guide policies (i.e. variational q) self.set_train_switch(switch_val=1.0) else: # take samples from model's generative policy self.set_train_switch(switch_val=0.0) # draw guided/unguided conditional samples model_samps = sample_func(XI, XO) # set model back to either training or generation mode self.set_train_switch(switch_val=old_switch) return model_samps
def init_biases(self, b_init=0.0, b_std=1e-2): """ Initialize the biases in all shred layers to some constant. """ for layer in self.shared_layers: b_vec = (0.0 * layer.b.get_value(borrow=False)) + b_init b_vec = b_vec + (b_std * npr.randn(*b_vec.shape)) layer.b.set_value(to_fX(b_vec)) return
def prior_sampler(samp_count): x_samps = to_fX( np.zeros((samp_count, self.x_dim)) ) old_switch = self.train_switch.get_value(borrow=False) # set model to generation mode self.set_train_switch(switch_val=0.0) # generate samples from model model_samps = sample_func(x_samps) # set model back to previous mode self.set_train_switch(switch_val=old_switch) return model_samps
def set_kld_z_mean(self, x): """ Compute mean of KL(q(z|x) || p(z)) for the observations in x, and then use it to reset self.kld_z_mean. """ nll, kld = self.compute_fe_terms(x, 10) old_mean = self.kld_z_mean.get_value(borrow=False) new_mean = (0.0 * old_mean) + np.mean(kld) self.kld_z_mean.set_value(to_fX(new_mean)) return
def load_gpsimputer_from_file(f_name=None, rng=None): """ Load a clone of some previously trained model. """ from InfNet import load_infnet_from_dict from HydraNet import load_hydranet_from_dict assert(not (f_name is None)) pickle_file = open(f_name) # reload the basic python parameters self_dot_params = cPickle.load(pickle_file) # reload the theano shared parameters self_dot_numpy_param_dicts = cPickle.load(pickle_file) self_dot_shared_param_dicts = {} for key in self_dot_numpy_param_dicts: val = to_fX(self_dot_numpy_param_dicts[key]) self_dot_shared_param_dicts[key] = theano.shared(val) # reload the child models child_model_dicts = cPickle.load(pickle_file) xd = T.matrix() p_h_given_x = load_infnet_from_dict( \ child_model_dicts['p_h_given_x'], rng=rng, Xd=xd) p_s0_given_h = load_hydranet_from_dict( \ child_model_dicts['p_s0_given_h'], rng=rng, Xd=xd) p_zi_given_xi = load_infnet_from_dict( \ child_model_dicts['p_zi_given_xi'], rng=rng, Xd=xd) p_sip1_given_zi = load_hydranet_from_dict( \ child_model_dicts['p_sip1_given_zi'], rng=rng, Xd=xd) p_x_given_si = load_hydranet_from_dict( \ child_model_dicts['p_x_given_si'], rng=rng, Xd=xd) q_h_given_x = load_infnet_from_dict( \ child_model_dicts['q_h_given_x'], rng=rng, Xd=xd) q_zi_given_xi = load_infnet_from_dict( \ child_model_dicts['q_zi_given_xi'], rng=rng, Xd=xd) # now, create a new GPSImputerWI based on the loaded data xi = T.matrix() xm = T.matrix() xo = T.matrix() clone_net = GPSImputerWI(rng=rng, \ x_in=xi, x_mask=xm, x_out=xo, \ p_h_given_x=p_h_given_x, \ p_s0_given_h=p_s0_given_h, \ p_zi_given_xi=p_zi_given_xi, \ p_sip1_given_zi=p_sip1_given_zi, \ p_x_given_si=p_x_given_si, \ q_h_given_x=q_h_given_x, \ q_zi_given_xi=q_zi_given_xi, \ params=self_dot_params, \ shared_param_dicts=self_dot_shared_param_dicts) # helpful output print("==================================================") print("LOADED GPSImputerWI WITH PARAMS:") for k in self_dot_params: print(" {0:s}: {1:s}".format(str(k), str(self_dot_params[k]))) print("==================================================") return clone_net
def fe_term_estimator(X, sample_count): X = to_fX(X) ll_sum = np.zeros((X.shape[0], )) kld_sum = np.zeros((X.shape[0], )) for i in range(sample_count): result = fe_term_sample(X) ll_sum = ll_sum + result[0].ravel() kld_sum = kld_sum + result[1].ravel() mean_nll = -ll_sum / float(sample_count) mean_kld = kld_sum / float(sample_count) return [mean_nll, mean_kld]
def fe_term_estimator(X, sample_count): X = to_fX(X) ll_sum = np.zeros((X.shape[0],)) kld_sum = np.zeros((X.shape[0],)) for i in range(sample_count): result = fe_term_sample(X) ll_sum = ll_sum + result[0].ravel() kld_sum = kld_sum + result[1].ravel() mean_nll = -ll_sum / float(sample_count) mean_kld = kld_sum / float(sample_count) return [mean_nll, mean_kld]
def set_bias_noise(self, bias_noise=0.0): """ Set the bias noise in all hidden layers to the given value. """ new_ary = np.zeros((1,)) + bias_noise new_bn = to_fX( new_ary ) for layer in self.shared_layers: layer.bias_noise.set_value(new_bn) for layer in self.output_layers: layer.bias_noise.set_value(new_bn) return
def set_train_switch(self, switch_val=0.0): """ Set the switch for changing between training and sampling behavior. """ if (switch_val < 0.5): switch_val = 0.0 else: switch_val = 1.0 zero_ary = np.zeros((1, )) new_val = zero_ary + switch_val self.train_switch.set_value(to_fX(new_val)) return
def img_split(imgs, im_dim=None, split_col=None, transposed=False): """ Split flattened images in rows of img vertically, with obs_cols taken from the left and im_dim[1]-obs_cols taken from the right. """ if transposed: assert (im_dim[0] == im_dim[1]), "transpose only works for square imgs" img_count = imgs.shape[0] row_count = im_dim[0] col_count = im_dim[1] l_obs_dim = split_col * row_count r_obs_dim = (col_count - split_col) * row_count left_cols = np.zeros((img_count, l_obs_dim)) right_cols = np.zeros((img_count, r_obs_dim)) for i in range(img_count): im = imgs[i, :].reshape(im_dim) if transposed: im = im.transpose() left_cols[i, :] = im[:, :split_col].flatten() right_cols[i, :] = im[:, split_col:].flatten() return to_fX(left_cols), to_fX(right_cols)
def generate_batch(num_samples, obj_type='circle'): # generate a minibatch of trajectories traj_pos, traj_vel = TRAJ.generate_trajectories( num_samples, (traj_len + 1)) traj_x = traj_pos[:, :, 0] traj_y = traj_pos[:, :, 1] # draw the trajectories center_x = to_fX(traj_x.T.ravel()) center_y = to_fX(traj_y.T.ravel()) delta = to_fX(np.ones(center_x.shape)) sigma = to_fX(np.ones(center_x.shape)) paint_obj = OPTRS[obj_type] W = paint_obj(center_y, center_x, delta, 0.05 * sigma) # shape trajectories into a batch for passing to the model batch_imgs = np.zeros((num_samples, (traj_len + 1), obs_dim)) batch_coords = np.zeros((num_samples, (traj_len + 1), 2)) for i in range(num_samples): start_idx = i * (traj_len + 1) end_idx = start_idx + (traj_len + 1) img_set = W[start_idx:end_idx, :] batch_imgs[i, :, :] = img_set batch_coords[i, :, 0] = center_x[start_idx:end_idx] batch_coords[i, :, 1] = center_y[start_idx:end_idx] batch_imgs = np.swapaxes(batch_imgs, 0, 1) batch_coords = np.swapaxes(batch_coords, 0, 1) return [to_fX(batch_imgs), to_fX(batch_coords)]
def imputer_sampler(XI, XO, XM, use_guide_policy=False): XI = to_fX(XI) XO = to_fX(XO) XM = to_fX(XM) # set model to desired generation mode old_switch = self.train_switch.get_value(borrow=False) if use_guide_policy: # take samples from guide policies (i.e. variational q) self.set_train_switch(switch_val=1.0) else: # take samples from model's imputation policy self.set_train_switch(switch_val=0.0) # draw guided/unguided conditional samples model_samps = sample_func(XI, XO, XM) # set model back to either training or generation mode self.set_train_switch(switch_val=old_switch) # reverse engineer the "masked" samples... masked_samps = [] for xs in model_samps: xsm = (XM * XI) + ((1.0 - XM) * xs) masked_samps.append(xsm) return model_samps, masked_samps
def load_WalkoutModel_from_file(f_name=None, rng=None): """ Load a clone of some previously trained model. """ from InfNet import load_infnet_from_dict from HydraNet import load_hydranet_from_dict assert (not (f_name is None)) pickle_file = open(f_name) # reload the basic python parameters self_dot_params = cPickle.load(pickle_file) # reload the theano shared parameters self_dot_numpy_param_dicts = cPickle.load(pickle_file) self_dot_shared_param_dicts = {} for key in self_dot_numpy_param_dicts: val = to_fX(self_dot_numpy_param_dicts[key]) self_dot_shared_param_dicts[key] = theano.shared(val) # reload the child models child_model_dicts = cPickle.load(pickle_file) xd = T.matrix() p_zi_given_xi = load_infnet_from_dict( \ child_model_dicts['p_zi_given_xi'], rng=rng, Xd=xd) p_sip1_given_zi = load_hydranet_from_dict( \ child_model_dicts['p_sip1_given_zi'], rng=rng, Xd=xd) p_x_given_si = load_hydranet_from_dict( \ child_model_dicts['p_x_given_si'], rng=rng, Xd=xd) q_zi_given_xi = load_infnet_from_dict( \ child_model_dicts['q_zi_given_xi'], rng=rng, Xd=xd) # now, create a new WalkoutModel based on the loaded data xo = T.matrix() clone_net = WalkoutModel(rng=rng, \ x_out=xo, \ p_zi_given_xi=p_zi_given_xi, \ p_sip1_given_zi=p_sip1_given_zi, \ p_x_given_si=p_x_given_si, \ q_zi_given_xi=q_zi_given_xi, \ params=self_dot_params, \ shared_param_dicts=self_dot_shared_param_dicts) # helpful output print("==================================================") print("LOADED WalkoutModel WITH PARAMS:") for k in self_dot_params: print(" {0:s}: {1:s}".format(str(k), str(self_dot_params[k]))) print("==================================================") return clone_net
def img_join(left_cols, right_cols, im_dim=None, transposed=False): """ Join flattened images vertically. """ if transposed: assert (im_dim[0] == im_dim[1]), "transpose only works for square imgs" img_count = left_cols.shape[0] row_count = im_dim[0] col_count = im_dim[1] left_col_count = left_cols.shape[1] / row_count right_col_count = col_count - left_col_count imgs = np.zeros((img_count, row_count * col_count)) im_sq = np.zeros((row_count, col_count)) for i in range(img_count): left_chunk = left_cols[i, :].reshape((row_count, left_col_count)) right_chunk = right_cols[i, :].reshape((row_count, right_col_count)) im_sq[:, :left_col_count] = left_chunk[:, :] im_sq[:, left_col_count:] = right_chunk[:, :] if transposed: im_sq = im_sq.transpose() imgs[i, :] = im_sq.flatten() return to_fX(imgs)
def test_seq_cond_gen_copy(step_type='add', res_tag="AAA"): ############################## # File tag, for output stuff # ############################## result_tag = "{}TEST_{}".format(RESULT_PATH, res_tag) ########################## # Get some training data # ########################## rng = np.random.RandomState(1234) dataset = 'data/mnist.pkl.gz' datasets = load_udm(dataset, as_shared=False, zero_mean=False) Xtr = datasets[0][0] Xva = datasets[1][0] Xte = datasets[2][0] # merge validation set and training set, and test on test set. #Xtr = np.concatenate((Xtr, Xva), axis=0) #Xva = Xte Xtr = to_fX(shift_and_scale_into_01(Xtr)) Xva = to_fX(shift_and_scale_into_01(Xva)) # basic params batch_size = 128 traj_len = 20 im_dim = 28 obs_dim = im_dim * im_dim def sample_batch(np_ary, bs=100): row_count = np_ary.shape[0] samp_idx = npr.randint(low=0, high=row_count, size=(bs, )) xb = np_ary.take(samp_idx, axis=0) return xb ############################################################ # Setup some parameters for the Iterative Refinement Model # ############################################################ total_steps = traj_len init_steps = 5 exit_rate = 0.1 nll_weight = 0.0 x_dim = obs_dim y_dim = obs_dim z_dim = 128 att_spec_dim = 5 rnn_dim = 512 mlp_dim = 512 def visualize_attention(result, pre_tag="AAA", post_tag="AAA"): seq_len = result[0].shape[0] samp_count = result[0].shape[1] # get generated predictions x_samps = np.zeros((seq_len * samp_count, obs_dim)) idx = 0 for s1 in range(samp_count): for s2 in range(seq_len): x_samps[idx] = result[0][s2, s1, :] idx += 1 file_name = "{0:s}_traj_xs_{1:s}.png".format(pre_tag, post_tag) utils.visualize_samples(x_samps, file_name, num_rows=samp_count) # get sequential attention maps seq_samps = np.zeros((seq_len * samp_count, obs_dim)) idx = 0 for s1 in range(samp_count): for s2 in range(seq_len): seq_samps[idx] = result[1][s2, s1, :] idx += 1 file_name = "{0:s}_traj_att_maps_{1:s}.png".format(pre_tag, post_tag) utils.visualize_samples(seq_samps, file_name, num_rows=samp_count) # get sequential attention maps (read out values) seq_samps = np.zeros((seq_len * samp_count, obs_dim)) idx = 0 for s1 in range(samp_count): for s2 in range(seq_len): seq_samps[idx] = result[2][s2, s1, :] idx += 1 file_name = "{0:s}_traj_read_outs_{1:s}.png".format(pre_tag, post_tag) utils.visualize_samples(seq_samps, file_name, num_rows=samp_count) # get original input sequences seq_samps = np.zeros((seq_len * samp_count, obs_dim)) idx = 0 for s1 in range(samp_count): for s2 in range(seq_len): seq_samps[idx] = result[3][s2, s1, :] idx += 1 file_name = "{0:s}_traj_xs_in_{1:s}.png".format(pre_tag, post_tag) utils.visualize_samples(seq_samps, file_name, num_rows=samp_count) return rnninits = { 'weights_init': IsotropicGaussian(0.01), 'biases_init': Constant(0.), } inits = { 'weights_init': IsotropicGaussian(0.01), 'biases_init': Constant(0.), } # module for doing local 2d read defined by an attention specification img_scale = 1.0 # image coords will range over [-img_scale...img_scale] read_N = 2 # use NxN grid for reader reader_mlp = FovAttentionReader2d(x_dim=obs_dim, width=im_dim, height=im_dim, N=read_N, img_scale=img_scale, att_scale=0.5, **inits) read_dim = reader_mlp.read_dim # total number of "pixels" read by reader # MLP for updating belief state based on con_rnn writer_mlp = MLP([None, None], [rnn_dim, mlp_dim, obs_dim], \ name="writer_mlp", **inits) # mlps for processing inputs to LSTMs con_mlp_in = MLP([Identity()], \ [ z_dim, 4*rnn_dim], \ name="con_mlp_in", **inits) var_mlp_in = MLP([Identity()], \ [(read_dim + read_dim + att_spec_dim + rnn_dim), 4*rnn_dim], \ name="var_mlp_in", **inits) gen_mlp_in = MLP([Identity()], \ [ (read_dim + att_spec_dim + rnn_dim), 4*rnn_dim], \ name="gen_mlp_in", **inits) # mlps for turning LSTM outputs into conditionals over z_gen con_mlp_out = CondNet([], [rnn_dim, att_spec_dim], \ name="con_mlp_out", **inits) gen_mlp_out = CondNet([], [rnn_dim, z_dim], name="gen_mlp_out", **inits) var_mlp_out = CondNet([], [rnn_dim, z_dim], name="var_mlp_out", **inits) # LSTMs for the actual LSTMs (obviously, perhaps) con_rnn = BiasedLSTM(dim=rnn_dim, ig_bias=2.0, fg_bias=2.0, \ name="con_rnn", **rnninits) gen_rnn = BiasedLSTM(dim=rnn_dim, ig_bias=2.0, fg_bias=2.0, \ name="gen_rnn", **rnninits) var_rnn = BiasedLSTM(dim=rnn_dim, ig_bias=2.0, fg_bias=2.0, \ name="var_rnn", **rnninits) SCG = SeqCondGenRAM(x_and_y_are_seqs=False, total_steps=total_steps, init_steps=init_steps, exit_rate=exit_rate, nll_weight=nll_weight, step_type=step_type, x_dim=obs_dim, y_dim=obs_dim, reader_mlp=reader_mlp, writer_mlp=writer_mlp, con_mlp_in=con_mlp_in, con_mlp_out=con_mlp_out, con_rnn=con_rnn, gen_mlp_in=gen_mlp_in, gen_mlp_out=gen_mlp_out, gen_rnn=gen_rnn, var_mlp_in=var_mlp_in, var_mlp_out=var_mlp_out, var_rnn=var_rnn) SCG.initialize() compile_start_time = time.time() # build the attention trajectory sampler SCG.build_attention_funcs() # quick test of attention trajectory sampler Xb = sample_batch(Xtr, bs=32) result = SCG.sample_attention(Xb, Xb) visualize_attention(result, pre_tag=result_tag, post_tag="b0") # build the main model functions (i.e. training and cost functions) SCG.build_model_funcs() compile_end_time = time.time() compile_minutes = (compile_end_time - compile_start_time) / 60.0 print("THEANO COMPILE TIME (MIN): {}".format(compile_minutes)) # TEST SAVE/LOAD FUNCTIONALITY param_save_file = "{}_params.pkl".format(result_tag) SCG.save_model_params(param_save_file) SCG.load_model_params(param_save_file) ################################################################ # Apply some updates, to check that they aren't totally broken # ################################################################ print("Beginning to train the model...") out_file = open("{}_results.txt".format(result_tag), 'wb') out_file.flush() costs = [0. for i in range(10)] learn_rate = 0.0001 momentum = 0.95 for i in range(250000): lr_scale = min(1.0, ((i + 1) / 5000.0)) mom_scale = min(1.0, ((i + 1) / 10000.0)) if (((i + 1) % 10000) == 0): learn_rate = learn_rate * 0.95 # set sgd and objective function hyperparams for this update SCG.set_sgd_params(lr=lr_scale * learn_rate, mom_1=mom_scale * momentum, mom_2=0.99) SCG.set_lam_kld(lam_kld_q2p=0.95, lam_kld_p2q=0.05, \ lam_kld_amu=0.0, lam_kld_alv=0.1) # perform a minibatch update and record the cost for this batch Xb = sample_batch(Xtr, bs=batch_size) result = SCG.train_joint(Xb, Xb) costs = [(costs[j] + result[j]) for j in range(len(result))] # output diagnostic information and checkpoint parameters, etc. if ((i % 250) == 0): costs = [(v / 250.0) for v in costs] str1 = "-- batch {0:d} --".format(i) str2 = " total_cost: {0:.4f}".format(costs[0]) str3 = " nll_term : {0:.4f}".format(costs[1]) str4 = " kld_q2p : {0:.4f}".format(costs[2]) str5 = " kld_p2q : {0:.4f}".format(costs[3]) str6 = " kld_amu : {0:.4f}".format(costs[4]) str7 = " kld_alv : {0:.4f}".format(costs[5]) str8 = " reg_term : {0:.4f}".format(costs[6]) joint_str = "\n".join( [str1, str2, str3, str4, str5, str6, str7, str8]) print(joint_str) out_file.write(joint_str + "\n") out_file.flush() costs = [0.0 for v in costs] if ((i % 500) == 0): SCG.save_model_params("{}_params.pkl".format(result_tag)) ############################################# # check model performance on validation set # ############################################# Xb = sample_batch(Xva, bs=500) result = SCG.compute_nll_bound(Xb, Xb) str2 = " va_total_cost: {0:.4f}".format(float(result[0])) str3 = " va_nll_term : {0:.4f}".format(float(result[1])) str4 = " va_kld_q2p : {0:.4f}".format(float(result[2])) str5 = " va_kld_p2q : {0:.4f}".format(float(result[3])) str6 = " va_kld_amu : {0:.4f}".format(float(result[4])) str7 = " va_kld_alv : {0:.4f}".format(float(result[5])) str8 = " va_reg_term : {0:.4f}".format(float(result[6])) joint_str = "\n".join([str2, str3, str4, str5, str6, str7, str8]) print(joint_str) out_file.write(joint_str + "\n") out_file.flush() ########################################### # sample and draw attention trajectories. # ########################################### Xb = sample_batch(Xva, bs=32) result = SCG.sample_attention(Xb, Xb) post_tag = "b{0:d}".format(i) visualize_attention(result, pre_tag=result_tag, post_tag=post_tag)
def test_imocld_mnist(step_type='add', attention=False): ########################## # Get some training data # ########################## rng = np.random.RandomState(1234) dataset = 'data/mnist.pkl.gz' datasets = load_udm(dataset, as_shared=False, zero_mean=False) Xtr = datasets[0][0] Xva = datasets[1][0] Xtr = to_fX(shift_and_scale_into_01(Xtr)) Xva = to_fX(shift_and_scale_into_01(Xva)) tr_samples = Xtr.shape[0] va_samples = Xva.shape[0] batch_size = 250 ############################################################ # Setup some parameters for the Iterative Refinement Model # ############################################################ x_dim = Xtr.shape[1] write_dim = 300 enc_dim = 300 dec_dim = 300 mix_dim = 20 z_dim = 100 n_iter = 16 rnninits = { 'weights_init': IsotropicGaussian(0.01), 'biases_init': Constant(0.), } inits = { 'weights_init': IsotropicGaussian(0.01), 'biases_init': Constant(0.), } att_tag = "NA" # attention not implemented yet # setup the reader and writer (shared by primary and guide policies) read_dim = 2*x_dim # dimension of output from reader_mlp reader_mlp = Reader(x_dim=x_dim, dec_dim=dec_dim, **inits) writer_mlp = MLP([None, None], [dec_dim, write_dim, x_dim], \ name="writer_mlp", **inits) # mlps for setting conditionals over z_mix mix_var_mlp = CondNet([Tanh()], [x_dim, 250, mix_dim], \ name="mix_var_mlp", **inits) mix_enc_mlp = CondNet([Tanh()], [x_dim, 250, mix_dim], \ name="mix_enc_mlp", **inits) # mlp for decoding z_mix into a distribution over initial LSTM states mix_dec_mlp = MLP([Tanh(), Tanh()], \ [mix_dim, 250, (2*enc_dim + 2*dec_dim + 2*enc_dim + mix_dim)], \ name="mix_dec_mlp", **inits) # mlps for processing inputs to LSTMs var_mlp_in = MLP([Identity()], [(read_dim + dec_dim + mix_dim), 4*enc_dim], \ name="var_mlp_in", **inits) enc_mlp_in = MLP([Identity()], [(read_dim + dec_dim + mix_dim), 4*enc_dim], \ name="enc_mlp_in", **inits) dec_mlp_in = MLP([Identity()], [ z_dim, 4*dec_dim], \ name="dec_mlp_in", **inits) # mlps for turning LSTM outputs into conditionals over z_gen var_mlp_out = CondNet([], [enc_dim, z_dim], name="var_mlp_out", **inits) enc_mlp_out = CondNet([], [enc_dim, z_dim], name="enc_mlp_out", **inits) # LSTMs for the actual LSTMs (obviously, perhaps) var_rnn = BiasedLSTM(dim=enc_dim, ig_bias=2.0, fg_bias=2.0, \ name="var_rnn", **rnninits) enc_rnn = BiasedLSTM(dim=enc_dim, ig_bias=2.0, fg_bias=2.0, \ name="enc_rnn", **rnninits) dec_rnn = BiasedLSTM(dim=dec_dim, ig_bias=2.0, fg_bias=2.0, \ name="dec_rnn", **rnninits) draw = IMoCLDrawModels( n_iter, step_type=step_type, # step_type can be 'add' or 'jump' reader_mlp=reader_mlp, writer_mlp=writer_mlp, mix_enc_mlp=mix_enc_mlp, mix_dec_mlp=mix_dec_mlp, mix_var_mlp=mix_var_mlp, enc_mlp_in=enc_mlp_in, enc_mlp_out=enc_mlp_out, enc_rnn=enc_rnn, dec_mlp_in=dec_mlp_in, dec_rnn=dec_rnn, var_mlp_in=var_mlp_in, var_mlp_out=var_mlp_out, var_rnn=var_rnn) draw.initialize() # build the cost gradients, training function, samplers, etc. draw.build_model_funcs() # sample several interchangeable versions of the model conditions = [{'occ_dim': 0, 'drop_prob': 0.8}, \ {'occ_dim': 16, 'drop_prob': 0.0}] for cond_dict in conditions: occ_dim = cond_dict['occ_dim'] drop_prob = cond_dict['drop_prob'] dp_int = int(100.0 * drop_prob) draw.load_model_params(f_name="TBCLM_IMP_MNIST_PARAMS_OD{}_DP{}_{}_{}.pkl".format(occ_dim, dp_int, step_type, att_tag)) # draw some independent samples from the model Xva = row_shuffle(Xva) Xb = to_fX(Xva[:128]) _, Xb, Mb = construct_masked_data(Xb, drop_prob=drop_prob, \ occ_dim=occ_dim, data_mean=None) Xb = np.repeat(Xb, 2, axis=0) Mb = np.repeat(Mb, 2, axis=0) samples, _ = draw.do_sample(Xb, Mb) # save the samples to a pkl file, in their numpy array form sample_pkl_name = "IMP-MNIST-OD{0:d}-DP{1:d}-{2:s}.pkl".format(occ_dim, dp_int, step_type) f_handle = file(sample_pkl_name, 'wb') cPickle.dump(samples, f_handle, protocol=-1) f_handle.close() print("Saved some samples in: {}".format(sample_pkl_name)) return
def test_sgm_mnist(step_type='add', occ_dim=14, drop_prob=0.0, attention=False): ########################## # Get some training data # ########################## rng = np.random.RandomState(1234) Xtr, Xva, Xte = load_binarized_mnist(data_path='./data/') Xtr = np.vstack((Xtr, Xva)) Xva = Xte #del Xte tr_samples = Xtr.shape[0] va_samples = Xva.shape[0] batch_size = 200 ############################################################ # Setup some parameters for the Iterative Refinement Model # ############################################################ x_dim = Xtr.shape[1] writer_dim = 250 reader_dim = 250 dyn_dim = 250 primary_dim = 500 guide_dim = 500 z_dim = 100 n_iter = 20 dp_int = int(100.0 * drop_prob) rnninits = { 'weights_init': IsotropicGaussian(0.01), 'biases_init': Constant(0.), } inits = { 'weights_init': IsotropicGaussian(0.01), 'biases_init': Constant(0.), } att_tag = "NA" # attention not implemented yet # reader MLP provides input to the dynamics LSTM update reader_mlp = MLP([Rectifier(), Rectifier(), None], \ [(x_dim + z_dim), reader_dim, reader_dim, 4*dyn_dim], \ name="reader_mlp", **inits) # writer MLP applies changes to the generation workspace writer_mlp = MLP([Rectifier(), Rectifier(), None], \ [(dyn_dim + z_dim), writer_dim, writer_dim, x_dim], \ name="writer_mlp", **inits) # MLPs for computing conditionals over z primary_policy = CondNet([Rectifier(), Rectifier()], \ [(dyn_dim + x_dim), primary_dim, primary_dim, z_dim], \ name="primary_policy", **inits) guide_policy = CondNet([Rectifier(), Rectifier()], \ [(dyn_dim + 2*x_dim), guide_dim, guide_dim, z_dim], \ name="guide_policy", **inits) # LSTMs for the actual LSTMs (obviously, perhaps) shared_dynamics = BiasedLSTM(dim=dyn_dim, ig_bias=2.0, fg_bias=2.0, \ name="shared_dynamics", **rnninits) model = SeqGenModel( n_iter, step_type=step_type, # step_type can be 'add' or 'jump' reader_mlp=reader_mlp, writer_mlp=writer_mlp, primary_policy=primary_policy, guide_policy=guide_policy, shared_dynamics=shared_dynamics) model.initialize() # build the cost gradients, training function, samplers, etc. model.build_model_funcs() #model.load_model_params(f_name="TBSGM_IMP_MNIST_PARAMS_OD{}_DP{}_{}_{}.pkl".format(occ_dim, dp_int, step_type, att_tag)) ################################################################ # Apply some updates, to check that they aren't totally broken # ################################################################ print("Beginning to train the model...") out_file = open("TBSGM_IMP_MNIST_RESULTS_OD{}_DP{}_{}_{}.txt".format(occ_dim, dp_int, step_type, att_tag), 'wb') out_file.flush() costs = [0. for i in range(10)] learn_rate = 0.0002 momentum = 0.5 batch_idx = np.arange(batch_size) + tr_samples for i in range(250000): scale = min(1.0, ((i+1) / 1000.0)) if (((i + 1) % 10000) == 0): learn_rate = learn_rate * 0.95 if (i > 10000): momentum = 0.90 else: momentum = 0.50 # get the indices of training samples for this batch update batch_idx += batch_size if (np.max(batch_idx) >= tr_samples): # we finished an "epoch", so we rejumble the training set Xtr = row_shuffle(Xtr) batch_idx = np.arange(batch_size) # set sgd and objective function hyperparams for this update zero_ary = np.zeros((1,)) model.lr.set_value(to_fX(zero_ary + learn_rate)) model.mom_1.set_value(to_fX(zero_ary + momentum)) model.mom_2.set_value(to_fX(zero_ary + 0.99)) # perform a minibatch update and record the cost for this batch Xb = to_fX(Xtr.take(batch_idx, axis=0)) _, Xb, Mb = construct_masked_data(Xb, drop_prob=drop_prob, \ occ_dim=occ_dim, data_mean=None) result = model.train_joint(Xb, Mb) costs = [(costs[j] + result[j]) for j in range(len(result))] if ((i % 200) == 0): costs = [(v / 200.0) for v in costs] str1 = "-- batch {0:d} --".format(i) str2 = " total_cost: {0:.4f}".format(costs[0]) str3 = " nll_bound : {0:.4f}".format(costs[1]) str4 = " nll_term : {0:.4f}".format(costs[2]) str5 = " kld_q2p : {0:.4f}".format(costs[3]) str6 = " kld_p2q : {0:.4f}".format(costs[4]) str7 = " reg_term : {0:.4f}".format(costs[5]) joint_str = "\n".join([str1, str2, str3, str4, str5, str6, str7]) print(joint_str) out_file.write(joint_str+"\n") out_file.flush() costs = [0.0 for v in costs] if ((i % 1000) == 0): model.save_model_params("TBSGM_IMP_MNIST_PARAMS_OD{}_DP{}_{}_{}.pkl".format(occ_dim, dp_int, step_type, att_tag)) # compute a small-sample estimate of NLL bound on validation set Xva = row_shuffle(Xva) Xb = to_fX(Xva[:5000]) _, Xb, Mb = construct_masked_data(Xb, drop_prob=drop_prob, \ occ_dim=occ_dim, data_mean=None) va_costs = model.compute_nll_bound(Xb, Mb) str1 = " va_nll_bound : {}".format(va_costs[1]) str2 = " va_nll_term : {}".format(va_costs[2]) str3 = " va_kld_q2p : {}".format(va_costs[3]) joint_str = "\n".join([str1, str2, str3]) print(joint_str) out_file.write(joint_str+"\n") out_file.flush() # draw some independent samples from the model Xb = to_fX(Xva[:100]) _, Xb, Mb = construct_masked_data(Xb, drop_prob=drop_prob, \ occ_dim=occ_dim, data_mean=None) samples, _ = model.do_sample(Xb, Mb) n_iter, N, D = samples.shape samples = samples.reshape( (n_iter, N, 28, 28) ) for j in xrange(n_iter): img = img_grid(samples[j,:,:,:]) img.save("TBSGM-IMP-MNIST-OD{0:d}-DP{1:d}-{2:s}-samples-{3:03d}.png".format(occ_dim, dp_int, step_type, j))
def __init__(self, rng=None, x_out=None, \ p_z_given_x=None, \ p_x_given_z=None, \ params=None, \ shared_param_dicts=None): # setup a rng for this WalkoutModel self.rng = RandStream(rng.randint(100000)) # grab the user-provided parameters self.params = params self.x_dim = self.params['x_dim'] self.z_dim = self.params['z_dim'] self.walkout_steps = self.params['walkout_steps'] self.x_type = self.params['x_type'] self.shared_param_dicts = shared_param_dicts if 'x_transform' in self.params: assert((self.params['x_transform'] == 'sigmoid') or \ (self.params['x_transform'] == 'none')) if self.params['x_transform'] == 'sigmoid': self.x_transform = lambda x: T.nnet.sigmoid(x) else: self.x_transform = lambda x: x else: self.x_transform = lambda x: T.nnet.sigmoid(x) if self.x_type == 'bernoulli': self.x_transform = lambda x: T.nnet.sigmoid(x) assert ((self.x_type == 'bernoulli') or (self.x_type == 'gaussian')) assert ((self.step_type == 'add') or (self.step_type == 'jump')) # grab handles to the relevant networks self.p_z_given_x = p_z_given_x self.p_x_given_z = p_x_given_z # record the symbolic variables that will provide inputs to the # computation graph created for this WalkoutModel self.x_out = x_out # target output for generation self.zi_zmuv = T.tensor3() # ZMUV gauss noise for walk-out wobble if self.shared_param_dicts is None: # initialize the parameters "owned" by this model zero_ary = to_fX(np.zeros((1, ))) self.obs_logvar = theano.shared(value=zero_ary, name='obs_logvar') self.bounded_logvar = 8.0 * T.tanh( (1.0 / 8.0) * self.obs_logvar[0]) self.shared_param_dicts = {} self.shared_param_dicts['obs_logvar'] = self.obs_logvar else: # grab the parameters required by this model from a given dict self.obs_logvar = self.shared_param_dicts['obs_logvar'] self.bounded_logvar = 8.0 * T.tanh( (1.0 / 8.0) * self.obs_logvar[0]) ############################################################### # Setup the forwards (i.e. training) walk-out loop using scan # ############################################################### def forwards_loop(xi_zmuv, zi_zmuv, xi_fw, zi_fw): # get samples of next zi, according to the forwards model zi_fw_mean, zi_fw_logvar = self.p_z_given_x.apply(xi_fw, \ do_samples=False) zi_fw = zi_fw_mean + (T.exp(0.5 * zi_fw_logvar) * zi_zmuv) # check reverse direction probability p(xi_fw | zi_fw) xi_bw_mean, xi_bw_logvar = self.p_x_given_z.apply(zi_fw, \ do_samples=False) xi_bw_mean = self.x_transform(xi_bw_mean) nll_xi_bw = log_prob_gaussian2(xi_fw, xi_bw_mean, \ log_vars=xi_bw_logvar, mask=None) nll_xi_bw = nll_xi_bw.flatten() # get samples of next xi, according to the forwards model xi_fw_mean, xi_fw_logvar = self.p_x_given_z.apply(zi_fw, \ do_samples=False) xi_fw_mean = self.x_transform(xi_fw_mean) xi_fw = xi_fw_mean + (T.exp(0.5 * xi_fw_logvar) * xi_zmuv) # check reverse direction probability p(zi_fw | xi_fw) zi_bw_mean, zi_bw_logvar = self.p_z_given_x.apply(xi_fw, \ do_samples=False) nll_zi_bw = log_prob_gaussian2(zi_fw, zi_bw_mean, \ log_vars=zi_bw_logvar, mask=None) nll_zi_bw = nll_zi_bw.flatten() # each loop iteration produces the following values: # xi_fw: xi generated fom zi by forwards walk # zi_fw: zi generated fom xi by forwards walk # xi_fw_mean: ---- # xi_fw_logvar: ---- # zi_fw_mean: ---- # zi_fw_logvar: ---- # nll_xi_bw: NLL for reverse step zi_fw -> xi_fw # nll_zi_bw: NLL for reverse step xi_fw -> zi_fw return xi_fw, zi_fw, xi_fw_mean, xi_fw_logvar, zi_fw_mean, zi_fw_logvar, nll_xi_bw, nll_zi_bw # initialize states for x/z self.x0 = self.x_out self.z0 = T.alloc(0.0, self.x0.shape[0], self.z_dim) # setup initial values to pass to scan op outputs_init = [self.x0, self.z0, None, None, None, None, None, None] sequences_init = [self.xi_zmuv, self.zi_zmuv] # apply scan op for the sequential imputation loop self.scan_results, self.scan_updates = theano.scan(forwards_loop, \ outputs_info=outputs_init, \ sequences=sequences_init) # grab results of the scan op. all values are computed for each step self.xi = self.scan_results[0] self.zi = self.scan_results[1] self.xi_fw_mean = self.scan_results[2] self.xi_fw_logvar = self.scan_results[3] self.zi_fw_mean = self.scan_results[4] self.zi_fw_logvar = self.scan_results[5] self.nll_xi_bw = self.scan_results[6] self.nll_zi_bw = self.scan_results[7] ###################################################################### # ALL SYMBOLIC VARS NEEDED FOR THE OBJECTIVE SHOULD NOW BE AVAILABLE # ###################################################################### # shared var learning rate for generator and inferencer zero_ary = to_fX(np.zeros((1, ))) self.lr = theano.shared(value=zero_ary, name='srr_lr') # shared var momentum parameters for ADAM optimization self.mom_1 = theano.shared(value=zero_ary, name='srr_mom_1') self.mom_2 = theano.shared(value=zero_ary, name='srr_mom_2') # init parameters for controlling learning dynamics self.set_sgd_params() # init shared vars for weighting prior kld against reconstruction self.lam_kld_p = theano.shared(value=zero_ary, name='srr_lam_kld_p') self.lam_kld_q = theano.shared(value=zero_ary, name='srr_lam_kld_q') self.lam_kld_g = theano.shared(value=zero_ary, name='srr_lam_kld_g') self.lam_kld_s = theano.shared(value=zero_ary, name='srr_lam_kld_s') self.set_lam_kld(lam_kld_p=0.0, lam_kld_q=1.0, lam_kld_g=0.0, lam_kld_s=0.0) # init shared var for controlling l2 regularization on params self.lam_l2w = theano.shared(value=zero_ary, name='srr_lam_l2w') self.set_lam_l2w(1e-5) # grab all of the "optimizable" parameters from the base networks self.joint_params = [self.s0, self.obs_logvar, self.step_scales] self.joint_params.extend(self.p_zi_given_xi.mlp_params) self.joint_params.extend(self.p_sip1_given_zi.mlp_params) self.joint_params.extend(self.p_x_given_si.mlp_params) self.joint_params.extend(self.q_zi_given_xi.mlp_params) ################################# # CONSTRUCT THE KLD-BASED COSTS # ################################# self.kld_p, self.kld_q, self.kld_g, self.kld_s = self._construct_kld_costs( p=1.0) self.kld_costs = (self.lam_kld_p[0] * self.kld_p) + \ (self.lam_kld_q[0] * self.kld_q) + \ (self.lam_kld_g[0] * self.kld_g) + \ (self.lam_kld_s[0] * self.kld_s) self.kld_cost = T.mean(self.kld_costs) ################################# # CONSTRUCT THE NLL-BASED COSTS # ################################# self.nll_costs = T.sum(self.nlli, axis=0) # sum the per-step NLLs self.nll_cost = T.mean(self.nll_costs) self.nll_bounds = self.nll_costs.ravel() + self.kld_q.ravel() self.nll_bound = T.mean(self.nll_bounds) ######################################## # CONSTRUCT THE REST OF THE JOINT COST # ######################################## param_reg_cost = self._construct_reg_costs() self.reg_cost = self.lam_l2w[0] * param_reg_cost self.joint_cost = self.nll_cost + self.kld_cost + self.reg_cost ############################## # CONSTRUCT A PER-TRIAL COST # ############################## self.obs_costs = self.nll_costs + self.kld_costs # Get the gradient of the joint cost for all optimizable parameters print("Computing gradients of self.joint_cost...") self.joint_grads = OrderedDict() grad_list = T.grad(self.joint_cost, self.joint_params) for i, p in enumerate(self.joint_params): self.joint_grads[p] = grad_list[i] # Construct the updates for the generator and inferencer networks self.joint_updates = get_adam_updates(params=self.joint_params, \ grads=self.joint_grads, alpha=self.lr, \ beta1=self.mom_1, beta2=self.mom_2, \ mom2_init=1e-3, smoothing=1e-5, max_grad_norm=10.0) for k, v in self.scan_updates.items(): self.joint_updates[k] = v # Construct theano functions for training and diagnostic computations print("Compiling cost computer...") self.compute_raw_costs = self._construct_raw_costs() print("Compiling training function...") self.train_joint = self._construct_train_joint() print("Compiling free-energy sampler...") self.compute_fe_terms = self._construct_compute_fe_terms() print("Compiling sequence sampler...") self.sequence_sampler = self._construct_sequence_sampler() # make easy access points for some interesting parameters #self.gen_inf_weights = self.p_zi_given_xi.shared_layers[0].W return
def test_imocld_generation(step_type='add', attention=False): ########################## # Get some training data # ########################## rng = np.random.RandomState(1234) Xtr, Xva, Xte = load_binarized_mnist(data_path='./data/') Xtr = np.vstack((Xtr, Xva)) Xva = Xte #del Xte tr_samples = Xtr.shape[0] va_samples = Xva.shape[0] batch_size = 250 ############################################################ # Setup some parameters for the Iterative Refinement Model # ############################################################ x_dim = Xtr.shape[1] write_dim = 200 enc_dim = 250 dec_dim = 250 mix_dim = 20 z_dim = 100 n_iter = 16 rnninits = { 'weights_init': IsotropicGaussian(0.01), 'biases_init': Constant(0.), } inits = { 'weights_init': IsotropicGaussian(0.01), 'biases_init': Constant(0.), } att_tag = "NA" # attention not tested yet # setup the reader and writer (shared by primary and guide policies) read_dim = 2 * x_dim # dimension of output from reader_mlp reader_mlp = Reader(x_dim=x_dim, dec_dim=dec_dim, **inits) writer_mlp = MLP([None, None], [dec_dim, write_dim, x_dim], \ name="writer_mlp", **inits) # mlps for setting conditionals over z_mix mix_var_mlp = CondNet([Tanh()], [x_dim, 250, mix_dim], \ name="mix_var_mlp", **inits) mix_enc_mlp = CondNet([Tanh()], [x_dim, 250, mix_dim], \ name="mix_enc_mlp", **inits) # mlp for decoding z_mix into a distribution over initial LSTM states mix_dec_mlp = MLP([Tanh(), Tanh()], \ [mix_dim, 250, (2*enc_dim + 2*dec_dim + 2*enc_dim)], \ name="mix_dec_mlp", **inits) # mlps for processing inputs to LSTMs var_mlp_in = MLP([Identity()], [(read_dim + dec_dim), 4*enc_dim], \ name="var_mlp_in", **inits) enc_mlp_in = MLP([Identity()], [(read_dim + dec_dim), 4*enc_dim], \ name="enc_mlp_in", **inits) dec_mlp_in = MLP([Identity()], [ z_dim, 4*dec_dim], \ name="dec_mlp_in", **inits) # mlps for turning LSTM outputs into conditionals over z_gen var_mlp_out = CondNet([], [enc_dim, z_dim], name="var_mlp_out", **inits) enc_mlp_out = CondNet([], [enc_dim, z_dim], name="enc_mlp_out", **inits) # LSTMs for the actual LSTMs (obviously, perhaps) var_rnn = BiasedLSTM(dim=enc_dim, ig_bias=2.0, fg_bias=2.0, \ name="var_rnn", **rnninits) enc_rnn = BiasedLSTM(dim=enc_dim, ig_bias=2.0, fg_bias=2.0, \ name="enc_rnn", **rnninits) dec_rnn = BiasedLSTM(dim=dec_dim, ig_bias=2.0, fg_bias=2.0, \ name="dec_rnn", **rnninits) draw = IMoCLDrawModels( n_iter, step_type='add', # step_type can be 'add' or 'jump' reader_mlp=reader_mlp, writer_mlp=writer_mlp, mix_enc_mlp=mix_enc_mlp, mix_dec_mlp=mix_dec_mlp, mix_var_mlp=mix_var_mlp, enc_mlp_in=enc_mlp_in, enc_mlp_out=enc_mlp_out, enc_rnn=enc_rnn, dec_mlp_in=dec_mlp_in, dec_rnn=dec_rnn, var_mlp_in=var_mlp_in, var_mlp_out=var_mlp_out, var_rnn=var_rnn) draw.initialize() # build the cost gradients, training function, samplers, etc. draw.build_model_funcs() ################################################################ # Apply some updates, to check that they aren't totally broken # ################################################################ print("Beginning to train the model...") out_file = open("TBCLM_GEN_RESULTS_{}_{}.txt".format(step_type, att_tag), 'wb') costs = [0. for i in range(10)] learn_rate = 0.0002 momentum = 0.5 batch_idx = np.arange(batch_size) + tr_samples for i in range(250000): scale = min(1.0, ((i + 1) / 1000.0)) if (((i + 1) % 10000) == 0): learn_rate = learn_rate * 0.95 if (i > 10000): momentum = 0.90 else: momentum = 0.50 # get the indices of training samples for this batch update batch_idx += batch_size if (np.max(batch_idx) >= tr_samples): # we finished an "epoch", so we rejumble the training set Xtr = row_shuffle(Xtr) batch_idx = np.arange(batch_size) # set sgd and objective function hyperparams for this update zero_ary = np.zeros((1, )) draw.lr.set_value(to_fX(zero_ary + learn_rate)) draw.mom_1.set_value(to_fX(zero_ary + momentum)) draw.mom_2.set_value(to_fX(zero_ary + 0.99)) # perform a minibatch update and record the cost for this batch Xb = to_fX(Xtr.take(batch_idx, axis=0)) Mb = 0.0 * Xb result = draw.train_joint(Xb, Mb) costs = [(costs[j] + result[j]) for j in range(len(result))] if ((i % 200) == 0): costs = [(v / 200.0) for v in costs] str1 = "-- batch {0:d} --".format(i) str2 = " total_cost: {0:.4f}".format(costs[0]) str3 = " nll_bound : {0:.4f}".format(costs[1]) str4 = " nll_term : {0:.4f}".format(costs[2]) str5 = " kld_q2p : {0:.4f}".format(costs[3]) str6 = " kld_p2q : {0:.4f}".format(costs[4]) str7 = " reg_term : {0:.4f}".format(costs[5]) joint_str = "\n".join([str1, str2, str3, str4, str5, str6, str7]) print(joint_str) out_file.write(joint_str + "\n") out_file.flush() costs = [0.0 for v in costs] if ((i % 1000) == 0): draw.save_model_params("TBCLM_GEN_PARAMS_{}_{}.pkl".format( step_type, att_tag)) # compute a small-sample estimate of NLL bound on validation set Xva = row_shuffle(Xva) Xb = to_fX(Xva[:5000]) Mb = 0.0 * Xb va_costs = draw.compute_nll_bound(Xb, Mb) str1 = " va_nll_bound : {}".format(va_costs[1]) str2 = " va_nll_term : {}".format(va_costs[2]) str3 = " va_kld_q2p : {}".format(va_costs[3]) joint_str = "\n".join([str1, str2, str3]) print(joint_str) out_file.write(joint_str + "\n") out_file.flush() # draw some independent samples from the model Xb = to_fX(Xva[:256]) Mb = 0.0 * Xb samples, _ = draw.do_sample(Xb, Mb) n_iter, N, D = samples.shape samples = samples.reshape((n_iter, N, 28, 28)) for j in xrange(n_iter): img = img_grid(samples[j, :, :, :]) img.save("TBCLM-gen-samples-%03d.png" % (j, ))
def test_imoold_generation(step_type='add', attention=False): ########################## # Get some training data # ########################## rng = np.random.RandomState(1234) Xtr, Xva, Xte = load_binarized_mnist(data_path='./data/') Xtr = np.vstack((Xtr, Xva)) Xva = Xte #del Xte tr_samples = Xtr.shape[0] va_samples = Xva.shape[0] batch_size = 200 ############################################################ # Setup some parameters for the Iterative Refinement Model # ############################################################ x_dim = Xtr.shape[1] write_dim = 250 enc_dim = 250 dec_dim = 250 mix_dim = 25 z_dim = 100 if attention: n_iter = 64 else: n_iter = 32 rnninits = { 'weights_init': IsotropicGaussian(0.01), 'biases_init': Constant(0.), } inits = { 'weights_init': IsotropicGaussian(0.01), 'biases_init': Constant(0.), } # setup the reader and writer if attention: read_N, write_N = (2, 5) # resolution of reader and writer read_dim = 2*read_N**2 # total number of "pixels" read by reader reader_mlp = AttentionReader2d(x_dim=x_dim, dec_dim=dec_dim, width=28, height=28, N=read_N, **inits) writer_mlp = AttentionWriter(input_dim=dec_dim, output_dim=x_dim, width=28, height=28, N=write_N, **inits) att_tag = "YA" else: read_dim = 2*x_dim reader_mlp = Reader(x_dim=x_dim, dec_dim=dec_dim, **inits) writer_mlp = MLP([None, None], [dec_dim, write_dim, x_dim], \ name="writer_mlp", **inits) att_tag = "NA" # setup the infinite mixture initialization model mix_enc_mlp = CondNet([Tanh()], [x_dim, 250, mix_dim], \ name="mix_enc_mlp", **inits) mix_dec_mlp = MLP([Tanh(), Tanh()], \ [mix_dim, 250, (2*enc_dim + 2*dec_dim)], \ name="mix_dec_mlp", **inits) # setup the components of the sequential generative model enc_mlp_in = MLP([Identity()], [(read_dim + dec_dim), 4*enc_dim], \ name="enc_mlp_in", **inits) dec_mlp_in = MLP([Identity()], [ z_dim, 4*dec_dim], \ name="dec_mlp_in", **inits) enc_mlp_out = CondNet([], [enc_dim, z_dim], name="enc_mlp_out", **inits) dec_mlp_out = CondNet([], [dec_dim, z_dim], name="dec_mlp_out", **inits) enc_rnn = BiasedLSTM(dim=enc_dim, ig_bias=2.0, fg_bias=2.0, \ name="enc_rnn", **rnninits) dec_rnn = BiasedLSTM(dim=dec_dim, ig_bias=2.0, fg_bias=2.0, \ name="dec_rnn", **rnninits) draw = IMoOLDrawModels( n_iter, step_type=step_type, # step_type can be 'add' or 'jump' mix_enc_mlp=mix_enc_mlp, mix_dec_mlp=mix_dec_mlp, reader_mlp=reader_mlp, enc_mlp_in=enc_mlp_in, enc_mlp_out=enc_mlp_out, enc_rnn=enc_rnn, dec_mlp_in=dec_mlp_in, dec_mlp_out=dec_mlp_out, dec_rnn=dec_rnn, writer_mlp=writer_mlp) draw.initialize() compile_start_time = time.time() # build the cost gradients, training function, samplers, etc. draw.build_model_funcs() compile_end_time = time.time() compile_minutes = (compile_end_time - compile_start_time) / 60.0 print("THEANO COMPILE TIME (MIN): {}".format(compile_minutes)) ################################################################ # Apply some updates, to check that they aren't totally broken # ################################################################ print("Beginning to train the model...") out_file = open("TBOLM_GEN_RESULTS_{}_{}.txt".format(step_type, att_tag), 'wb') costs = [0. for i in range(10)] learn_rate = 0.00015 momentum = 0.9 batch_idx = np.arange(batch_size) + tr_samples for i in range(250000): scale = min(1.0, ((i+1) / 5000.0)) if (((i + 1) % 10000) == 0): learn_rate = learn_rate * 0.95 # get the indices of training samples for this batch update batch_idx += batch_size if (np.max(batch_idx) >= tr_samples): # we finished an "epoch", so we rejumble the training set Xtr = row_shuffle(Xtr) batch_idx = np.arange(batch_size) # set sgd and objective function hyperparams for this update zero_ary = np.zeros((1,)) draw.lr.set_value(to_fX(zero_ary + scale*learn_rate)) draw.mom_1.set_value(to_fX(zero_ary + scale*momentum)) draw.mom_2.set_value(to_fX(zero_ary + 0.98)) # perform a minibatch update and record the cost for this batch Xb = to_fX(Xtr.take(batch_idx, axis=0)) draw.set_rnn_noise(rnn_noise=0.02) result = draw.train_joint(Xb, Xb) costs = [(costs[j] + result[j]) for j in range(len(result))] # diagnostics if ((i % 200) == 0): costs = [(v / 200.0) for v in costs] str1 = "-- batch {0:d} --".format(i) str2 = " total_cost: {0:.4f}".format(costs[0]) str3 = " nll_bound : {0:.4f}".format(costs[1]) str4 = " nll_term : {0:.4f}".format(costs[2]) str5 = " kld_q2p : {0:.4f}".format(costs[3]) str6 = " kld_p2q : {0:.4f}".format(costs[4]) str7 = " reg_term : {0:.4f}".format(costs[5]) str8 = " step_klds : {0:s}".format(np.array_str(costs[6], precision=2)) joint_str = "\n".join([str1, str2, str3, str4, str5, str6, str7, str8]) print(joint_str) out_file.write(joint_str+"\n") out_file.flush() costs = [0.0 for v in costs] if ((i % 1000) == 0): draw.save_model_params("TBOLM_GEN_PARAMS_{}_{}.pkl".format(step_type, att_tag)) # compute a small-sample estimate of NLL bound on validation set Xva = row_shuffle(Xva) Xb = to_fX(Xva[:5000]) draw.set_rnn_noise(rnn_noise=0.0) va_costs = draw.compute_nll_bound(Xb, Xb) str1 = " va_nll_bound : {}".format(va_costs[1]) str2 = " va_nll_term : {}".format(va_costs[2]) str3 = " va_kld_q2p : {}".format(va_costs[3]) joint_str = "\n".join([str1, str2, str3]) print(joint_str) out_file.write(joint_str+"\n") out_file.flush() # draw some independent samples from the model samples, x_logodds = draw.do_sample(16*16) utils.plot_kde_histogram(x_logodds[-1,:,:], "TBOLM-log_odds_hist.png", bins=30) n_iter, N, D = samples.shape samples = samples.reshape( (n_iter, N, 28, 28) ) for j in xrange(n_iter): img = img_grid(samples[j,:,:,:]) img.save("TBOLM-gen-samples-%03d.png" % (j,))
# configure a trajectory generator num_samples = 100 traj_len = 64 x_range = [-0.8, 0.8] y_range = [-0.8, 0.8] max_speed = 0.15 TRAJ = TrajectoryGenerator(x_range=x_range, y_range=y_range, \ max_speed=max_speed) # test the writer function start_time = time.time() batch_count = 50 for i in range(batch_count): # generate a minibatch of trajectories traj_pos, traj_vel = TRAJ.generate_trajectories(num_samples, traj_len) traj_x = traj_pos[:, :, 0] traj_y = traj_pos[:, :, 1] # draw the trajectories center_x = to_fX(traj_x.T.ravel()) center_y = to_fX(traj_y.T.ravel()) delta = to_fX(np.ones(center_x.shape)) sigma = to_fX(np.ones(center_x.shape)) W = write_func(center_y, center_x, delta, 0.2 * sigma) end_time = time.time() render_time = end_time - start_time render_bps = batch_count / render_time print("RENDER BATCH/SECOND: {0:.2f}".format(render_bps)) W = W[:20 * traj_len] utils.visualize_samples(W, "AAAAA.png", num_rows=20)