def batch_test_ss_mlp(test_count=10, su_count=1000): """Run multiple semisupervised learning tests.""" # Set some reasonable sgd parameters sgd_params = {} sgd_params['start_rate'] = 0.1 sgd_params['decay_rate'] = 0.998 sgd_params['wt_norm_bound'] = 3.5 sgd_params['epochs'] = 1000 sgd_params['batch_size'] = 128 # Set some reasonable mlp parameters mlp_params = {} # Set up some proto-networks pc0 = [28*28, 800, 800, 11] mlp_params['proto_configs'] = [pc0] # Set up some spawn networks sc0 = {'proto_key': 0, 'input_noise': 0.1, 'bias_noise': 0.1, 'do_dropout': True} sc1 = {'proto_key': 0, 'input_noise': 0.1, 'bias_noise': 0.1, 'do_dropout': True} mlp_params['spawn_configs'] = [sc0, sc1] mlp_params['spawn_weights'] = [0.5, 0.5] # Set remaining params mlp_params['ear_type'] = 2 mlp_params['ear_lam'] = 2.0 mlp_params['lam_l2a'] = 1e-3 mlp_params['reg_all_obs'] = False # Goofy symbolic sacrament to Theano x_in = T.matrix('x_in') # Run tests with different sorts of regularization for test_num in range(test_count): # Run test with EAR regularization on unsupervised examples sgd_params['result_tag'] = "ss_sde_s{0:d}_t{1:d}".format(su_count, test_num) mlp_params['ear_type'] = 2 ################################### # PARAMS FOR TRAINING WITHOUT EAR # ################################### #sgd_params['epochs'] = 500 #mlp_params['ear_lam'] = 0.0 ################################ # PARAMS FOR TRAINING WITH EAR # ################################ sgd_params['epochs'] = 600 mlp_params['ear_lam'] = 2.0 # Initialize a random number generator for this test rng = np.random.RandomState(test_num) # Load some data to train/validate/test with dataset = 'data/mnist.pkl.gz' datasets = load_udm_ss(dataset, su_count, rng, zero_mean=True) # Construct the EarNet object that we will be training NET = EarNet(rng=rng, input=x_in, params=mlp_params) init_biases(NET, b_init=0.1) train_ss_mlp(NET, sgd_params, datasets) return
def train_ss_mlp(NET, mlp_params, sgd_params, rng, su_count=1000): """Run semisupervised DEV-regularized test.""" # Load some data to train/validate/test with dataset = 'data/mnist.pkl.gz' datasets = load_udm_ss(dataset, su_count, rng) # Tell the net that it's semisupervised, which will force it to use only # unlabeled examples for computing the DEV regularizer. NET.is_semisupervised = 1 # Run training on the given NET NT.train_ss_mlp(NET=NET, \ mlp_params=mlp_params, \ sgd_params=sgd_params, \ datasets=datasets) return 1
def train_ss_mlp(NET, mlp_params, sgd_params, rng, su_count=1000): """Run semisupervised DEV-regularized test.""" # Load some data to train/validate/test with dataset = 'data/mnist.pkl.gz' datasets = load_udm_ss(dataset, su_count, rng) # Tell the net that it's semisupervised, which will force it to use only # unlabeled examples for computing the DEV regularizer. NET.is_semisupervised = 1 # Run training on the given NET NT.train_ss_mlp(NET=NET, \ mlp_params=mlp_params, \ sgd_params=sgd_params, \ datasets=datasets) return
sgd_params['decay_rate'] = 0.998 sgd_params['wt_norm_bound'] = 3.75 sgd_params['epochs'] = 1000 sgd_params['batch_size'] = 128 # Set parameters for the network to be trained mlp_params = {} mlp_params['layer_sizes'] = [28*28, 500, 500, 11] mlp_params['lam_l2a'] = 1e-3 mlp_params['dev_clones'] = 1 mlp_params['dev_types'] = [1, 1, 2] mlp_params['dev_lams'] = [0.1, 0.1, 2.0] mlp_params['use_bias'] = 1 # Load some data to train/validate/test with dataset = 'data/mnist.pkl.gz' datasets = load_udm_ss(dataset, 1000, rng) # Set the type of network to train, based on user input if (len(sys.argv) != 3): print "Usage: {0} [raw|sde|dev] [result_tag]".format(sys.argv[0]) exit(1) elif sys.argv[1] == "raw": sgd_params['mlp_type'] = 'raw' sgd_params['result_tag'] = sys.argv[2] mlp_params['dev_lams'] = [0.0 for l in mlp_params['dev_lams']] elif sys.argv[1] == "sde": sgd_params['mlp_type'] = 'sde' sgd_params['result_tag'] = sys.argv[2] elif sys.argv[1] == "dev": sgd_params['mlp_type'] = 'dev' sgd_params['result_tag'] = sys.argv[2]
if __name__=="__main__": import utils as utils from load_data import load_udm, load_udm_ss, load_mnist from NetLayers import relu_actfun import PeaNet as PNet import GenNet as GNet import InfNet as INet # Initialize a source of randomness rng = np.random.RandomState(123) # Load some data to train/validate/test with sup_count = 600 dataset = 'data/mnist.pkl.gz' datasets = load_udm_ss(dataset, sup_count, rng, zero_mean=True) Xtr_su = datasets[0][0].get_value(borrow=False) Ytr_su = datasets[0][1].get_value(borrow=False).astype(np.int32) Xtr_un = datasets[1][0].get_value(borrow=False) Ytr_un = datasets[1][1].get_value(borrow=False).astype(np.int32) # get the joint labeled and unlabeled data Xtr_un = np.vstack([Xtr_su, Xtr_un]).astype(theano.config.floatX) Ytr_un = np.vstack([Ytr_su[:,np.newaxis], Ytr_un[:,np.newaxis]]) Ytr_un = 0 * Ytr_un # KEEP CATS FIXED OR FREE? YES/NO? # get the labeled data Xtr_su = Xtr_su.astype(theano.config.floatX) Ytr_su = Ytr_su[:,np.newaxis] # get observations and labels for the validation set Xva = datasets[2][0].get_value(borrow=False).astype(theano.config.floatX) Yva = datasets[2][1].get_value(borrow=False).astype(np.int32) Yva = Yva[:,np.newaxis] # numpy is dumb
def batch_test_ss_mlp_pt(test_count=10, su_count=1000): """Setup basic test for semisupervised EAR-regularized MLP.""" # Set some reasonable sgd parameters sgd_params = {} sgd_params['start_rate'] = 0.01 sgd_params['decay_rate'] = 0.998 sgd_params['wt_norm_bound'] = 3.5 sgd_params['epochs'] = 1000 sgd_params['batch_size'] = 100 sgd_params['result_tag'] = '---' # Set some reasonable mlp parameters mlp_params = {} # Set up some proto-networks pc0 = [28*28, 800, 800, 11] mlp_params['proto_configs'] = [pc0] # Set up some spawn networks sc0 = {'proto_key': 0, 'input_noise': 0.1, 'bias_noise': 0.1, 'do_dropout': True} sc1 = {'proto_key': 0, 'input_noise': 0.1, 'bias_noise': 0.1, 'do_dropout': True} mlp_params['spawn_configs'] = [sc0, sc1] mlp_params['spawn_weights'] = [0.0, 1.0] # Set remaining params mlp_params['ear_type'] = 5 mlp_params['ear_lam'] = 1.0 mlp_params['lam_l2a'] = 1e-2 mlp_params['reg_all_obs'] = True for test_num in range(test_count): rng_seed = test_num sgd_params['result_tag'] = "test_{0:d}".format(test_num) # Initialize a random number generator for this test rng = np.random.RandomState(rng_seed) # Load some data to train/validate/test with dataset = 'data/mnist.pkl.gz' datasets = load_udm(dataset, zero_mean=False) # Construct the EarNet object that we will be training x_in = T.matrix('x_in') NET = EarNet(rng=rng, input=x_in, params=mlp_params) init_biases(NET, b_init=0.05) ########################################## # First, pretrain each layer in the mlp. # ########################################## sgd_params['result_tag'] = "ss_ear_pt_s{0:d}_t{1:d}".format(su_count,test_num) sgd_params['batch_size'] = 25 sgd_params['start_rate'] = 0.02 sgd_params['epochs'] = 40 for i in range(len(NET.dae_costs)): print("==================================================") print("Pretraining hidden layer(s) at depth {0:d}".format(i+1)) print("==================================================") train_dae(NET, i, sgd_params, datasets) # Load some data to train/validate/test with rng = np.random.RandomState(rng_seed) dataset = 'data/mnist.pkl.gz' datasets = load_udm_ss(dataset, su_count, rng, zero_mean=False) # Run semisupervised training on the given MLP sgd_params['batch_size'] = 100 sgd_params['start_rate'] = 0.04 # Train with weak EAR regularization sgd_params['top_only'] = True sgd_params['epochs'] = 5 NET.set_ear_lam(0.0) train_ss_mlp(NET, sgd_params, datasets) COMMENT=""" # Train with no EAR regularization sgd_params['top_only'] = False sgd_params['epochs'] = 100 NET.set_ear_lam(0.0) train_ss_mlp(NET, sgd_params, datasets) """ # Train with weak EAR regularization sgd_params['top_only'] = False sgd_params['epochs'] = 5 NET.set_ear_lam(0.5) train_ss_mlp(NET, sgd_params, datasets) # Train with weak EAR regularization sgd_params['epochs'] = 10 NET.set_ear_lam(1.0) train_ss_mlp(NET, sgd_params, datasets) # Train with more EAR regularization sgd_params['epochs'] = 15 NET.set_ear_lam(1.5) train_ss_mlp(NET, sgd_params, datasets) # Train with more EAR regularization sgd_params['epochs'] = 20 NET.set_ear_lam(2.0) train_ss_mlp(NET, sgd_params, datasets) # Train with more EAR regularization sgd_params['epochs'] = 100 NET.set_ear_lam(3.0) train_ss_mlp(NET, sgd_params, datasets) return
def batch_test_ss_mlp_gentle(test_count=10, su_count=1000): """Setup basic test for semisupervised EAR-regularized MLP.""" # Set some reasonable sgd parameters sgd_params = {} sgd_params['start_rate'] = 0.1 sgd_params['decay_rate'] = 0.998 sgd_params['wt_norm_bound'] = 3.5 sgd_params['epochs'] = 1000 sgd_params['batch_size'] = 100 sgd_params['result_tag'] = 'xxx' sgd_params['top_only'] = False # Set some reasonable mlp parameters mlp_params = {} # Set up some proto-networks pc0 = [28*28, 800, 800, 11] mlp_params['proto_configs'] = [pc0] # Set up some spawn networks sc0 = {'proto_key': 0, 'input_noise': 0.1, 'bias_noise': 0.1, 'do_dropout': True} sc1 = {'proto_key': 0, 'input_noise': 0.1, 'bias_noise': 0.1, 'do_dropout': True} mlp_params['spawn_configs'] = [sc0, sc1] mlp_params['spawn_weights'] = [0.5, 0.5] # Set remaining params mlp_params['ear_type'] = 5 mlp_params['ear_lam'] = 1.0 mlp_params['lam_l2a'] = 1e-2 mlp_params['reg_all_obs'] = True for test_num in range(test_count): rng_seed = test_num sgd_params['result_tag'] = "ss_ear_gentle_s{0:d}_t{1:d}".format(su_count, test_num) # Initialize a random number generator for this test rng = np.random.RandomState(rng_seed) # Load some data to train/validate/test with dataset = 'data/mnist.pkl.gz' datasets = load_udm_ss(dataset, su_count, rng, zero_mean=False) # Construct the EarNet object that we will be training x_in = T.matrix('x_in') NET = EarNet(rng=rng, input=x_in, params=mlp_params) init_biases(NET, b_init=0.1) # Run semisupervised training on the given MLP sgd_params['batch_size'] = 100 # Train with weak EAR regularization sgd_params['top_only'] = False mlp_params['ear_type'] = 5 sgd_params['start_rate'] = 0.1 sgd_params['epochs'] = 5 NET.set_ear_lam(0.0) train_ss_mlp(NET, sgd_params, datasets) # Train with weak EAR regularization sgd_params['epochs'] = 10 NET.set_ear_lam(1.0) # for EAR #NET.set_ear_lam(0.0) # for SDE train_ss_mlp(NET, sgd_params, datasets) # Train with more EAR regularization sgd_params['epochs'] = 10 NET.set_ear_lam(1.5) # for EAR #NET.set_ear_lam(0.0) # for SDE train_ss_mlp(NET, sgd_params, datasets) # Train with more EAR regularization sgd_params['epochs'] = 15 NET.set_ear_lam(2.0) # for EAR #NET.set_ear_lam(0.0) # for SDE train_ss_mlp(NET, sgd_params, datasets) # Train with more EAR regularization sgd_params['epochs'] = 70 sgd_params['start_rate'] = 0.05 NET.set_ear_lam(3.0) # for EAR #NET.set_ear_lam(0.0) # for SDE train_ss_mlp(NET, sgd_params, datasets) return
def manifold_walk_regularization(): for t_num in range(10): out_file = open("MWR_TEST_RESULTS_{0:d}.txt".format(t_num), 'wb') # Initialize a source of randomness rng = np.random.RandomState(t_num) # Load some data to train/validate/test with sup_count = 600 dataset = 'data/mnist.pkl.gz' datasets = load_udm_ss(dataset, sup_count, rng, zero_mean=False) Xtr_su = datasets[0][0].get_value(borrow=False) Ytr_su = datasets[0][1].get_value(borrow=False).astype(np.int32) Xtr_un = datasets[1][0].get_value(borrow=False) Ytr_un = datasets[1][1].get_value(borrow=False).astype(np.int32) # get the joint labeled and unlabeled data Xtr_un = np.vstack([Xtr_su, Xtr_un]).astype(theano.config.floatX) Ytr_un = np.vstack([Ytr_su[:,np.newaxis], Ytr_un[:,np.newaxis]]) Ytr_un = 0 * Ytr_un # KEEP CATS FIXED OR FREE? YES/NO? Xtr_mean = np.mean(Xtr_un, axis=0, keepdims=True) # get the labeled data Xtr_su = Xtr_su.astype(theano.config.floatX) Ytr_su = Ytr_su[:,np.newaxis] # get observations and labels for the validation set Xva = datasets[2][0].get_value(borrow=False).astype(theano.config.floatX) Yva = datasets[2][1].get_value(borrow=False).astype(np.int32) Yva = Yva[:,np.newaxis] # numpy is dumb # get observations and labels for the test set Xte = datasets[3][0].get_value(borrow=False).astype(theano.config.floatX) Yte = datasets[3][1].get_value(borrow=False).astype(np.int32) Yte = Yte[:,np.newaxis] # numpy is dumb # get size information for the data and training batches un_samples = Xtr_un.shape[0] su_samples = Xtr_su.shape[0] va_samples = Xva.shape[0] data_dim = Xtr_su.shape[1] label_dim = 10 batch_size = 100 # Symbolic inputs Xd = T.matrix(name='Xd') Xc = T.matrix(name='Xc') Xm = T.matrix(name='Xm') Xt = T.matrix(name='Xt') Xp = T.matrix(name='Xp') Yd = T.icol('Yd') # Load inferencer and generator from saved parameters gn_fname = "MNIST_WALKOUT_TEST_BIN/pt_walk_params_b150000_GN.pkl" in_fname = "MNIST_WALKOUT_TEST_BIN/pt_walk_params_b150000_IN.pkl" IN = INet.load_infnet_from_file(f_name=in_fname, rng=rng, Xd=Xd) GN = GNet.load_gennet_from_file(f_name=gn_fname, rng=rng, Xp=Xp) IN.set_sigma_scale(1.3) prior_dim = GN.latent_dim MCS = MCSampler(rng=rng, Xd=Xd, i_net=IN, g_net=GN, chain_len=2, \ data_dim=data_dim, prior_dim=prior_dim) full_chain_len = MCS.chain_len + 1 # setup "chain" versions of the labeled/unlabeled/validate sets Xtr_su_chains = [Xtr_su.copy() for i in range(full_chain_len)] Xtr_un_chains = [Xtr_un.copy() for i in range(full_chain_len)] Ytr_su_chains = [Ytr_su for i in range(full_chain_len)] Ytr_un_chains = [Ytr_un for i in range(full_chain_len)] Xva_chains = [Xva for i in range(full_chain_len)] Yva_chains = [Yva for i in range(full_chain_len)] # downsample, to feed less into the PNS Xtr_su_short = downsample_chains(Xtr_su_chains, stride=1) Xtr_un_short = downsample_chains(Xtr_un_chains, stride=1) Ytr_su_short = downsample_chains(Ytr_su_chains, stride=1) Ytr_un_short = downsample_chains(Ytr_un_chains, stride=1) Xva_short = downsample_chains(Xva_chains, stride=1) Yva_short = downsample_chains(Yva_chains, stride=1) short_chain_len = len(Xtr_su_short) print("REGULARIZATION CHAIN STEPS: {0:d}".format(short_chain_len)) # choose some parameters for the categorical inferencer pn_params = {} pc0 = [data_dim, 800, 800, label_dim] pn_params['proto_configs'] = [pc0] # Set up some spawn networks sc0 = {'proto_key': 0, 'input_noise': 0.1, 'bias_noise': 0.1, 'do_dropout': True} pn_params['spawn_configs'] = [ sc0 ] pn_params['spawn_weights'] = [ 1.0 ] # Set remaining params pn_params['activation'] = relu_actfun pn_params['init_scale'] = 0.5 pn_params['lam_l2a'] = 1e-3 pn_params['vis_drop'] = 0.2 pn_params['hid_drop'] = 0.5 # Initialize the base network for this PNSeq PN = PeaNet(rng=rng, Xd=Xd, params=pn_params) PN.init_biases(0.1) print("Initializing PNS...") # Initialize the PeaNetSeq PNS = PeaNetSeq(rng=rng, pea_net=PN, seq_len=short_chain_len, \ seq_Xd=None, params=None) # set weighting parameters for the various costs... PNS.set_lam_class(1.0) PNS.set_lam_pea_su(0.0) PNS.set_lam_pea_un(2.0) PNS.set_lam_ent(0.0) PNS.set_lam_l2w(1e-5) learn_rate = 0.05 PNS.set_pn_sgd_params(lr_pn=learn_rate, mom_1=0.9, mom_2=0.999) for i in range(300000): if i < 5000: scale = float(i + 1) / 5000.0 if ((i+1 % 100000) == 0): learn_rate = learn_rate * 0.5 if ((i % 250) == 0): Xtr_su_chains = resample_chain_steps(MCS, Xtr_su_chains) Xtr_un_chains = resample_chain_steps(MCS, Xtr_un_chains) Xtr_su_short = downsample_chains(Xtr_su_chains, stride=1) Xtr_un_short = downsample_chains(Xtr_un_chains, stride=1) # get some data to train with su_idx = npr.randint(low=0,high=su_samples,size=(batch_size,)) xsuc = [(x.take(su_idx, axis=0) - Xtr_mean) for x in Xtr_su_short] ysuc = [y.take(su_idx, axis=0) for y in Ytr_su_short] un_idx = npr.randint(low=0,high=un_samples,size=(batch_size,)) xunc = [(x.take(un_idx, axis=0) - Xtr_mean) for x in Xtr_un_short] yunc = [y.take(un_idx, axis=0) for y in Ytr_un_short] Xb_chains = [np.vstack((xsu, xun)) for (xsu, xun) in zip(xsuc, xunc)] Yb_chains = [np.vstack((ysu, yun)) for (ysu, yun) in zip(ysuc, yunc)] # set learning parameters for this update PNS.set_pn_sgd_params(lr_pn=learn_rate, mom_1=0.9, mom_2=0.999) # do a minibatch update of all PeaNet parameters outputs = PNS.train_joint(*(Xb_chains + Yb_chains)) joint_cost = 1.0 * outputs[0] class_cost = 1.0 * outputs[1] pea_cost = 1.0 * outputs[2] ent_cost = 1.0 * outputs[3] other_reg_cost = 1.0 * outputs[4] assert(not (np.isnan(joint_cost))) if ((i % 500) == 0): o_str = "batch: {0:d}, joint: {1:.4f}, class: {2:.4f}, pea: {3:.4f}, ent: {4:.4f}, other_reg: {5:.4f}".format( \ i, joint_cost, class_cost, pea_cost, ent_cost, other_reg_cost) print(o_str) out_file.write(o_str+"\n") out_file.flush() # check classification error on training and validation set train_err = PNS.classification_error(Xtr_su-Xtr_mean, Ytr_su) va_err = PNS.classification_error(Xva-Xtr_mean, Yva) o_str = " tr_err: {0:.4f}, va_err: {1:.4f}".format(train_err, va_err) print(o_str) out_file.write(o_str+"\n") out_file.flush() if ((i % 1000) == 0): # draw the main PeaNet's first-layer filters/weights file_name = "MWR_PN_WEIGHTS.png".format(i) utils.visualize_net_layer(PNS.PN.proto_nets[0][0], file_name) print("TESTING COMPLETE!")
def batch_test_ss_mlp(test_count=10, su_count=1000): """Run multiple semisupervised learning tests.""" # Set some reasonable sgd parameters sgd_params = {} sgd_params['start_rate'] = 0.1 sgd_params['decay_rate'] = 0.998 sgd_params['wt_norm_bound'] = 3.5 sgd_params['epochs'] = 1000 sgd_params['batch_size'] = 128 # Set some reasonable mlp parameters mlp_params = {} # Set up some proto-networks pc0 = [28 * 28, 800, 800, 11] mlp_params['proto_configs'] = [pc0] # Set up some spawn networks sc0 = { 'proto_key': 0, 'input_noise': 0.1, 'bias_noise': 0.1, 'do_dropout': True } sc1 = { 'proto_key': 0, 'input_noise': 0.1, 'bias_noise': 0.1, 'do_dropout': True } mlp_params['spawn_configs'] = [sc0, sc1] mlp_params['spawn_weights'] = [0.5, 0.5] # Set remaining params mlp_params['ear_type'] = 2 mlp_params['ear_lam'] = 2.0 mlp_params['lam_l2a'] = 1e-3 mlp_params['reg_all_obs'] = False # Goofy symbolic sacrament to Theano x_in = T.matrix('x_in') # Run tests with different sorts of regularization for test_num in range(test_count): # Run test with EAR regularization on unsupervised examples sgd_params['result_tag'] = "ss_sde_s{0:d}_t{1:d}".format( su_count, test_num) mlp_params['ear_type'] = 2 ################################### # PARAMS FOR TRAINING WITHOUT EAR # ################################### #sgd_params['epochs'] = 500 #mlp_params['ear_lam'] = 0.0 ################################ # PARAMS FOR TRAINING WITH EAR # ################################ sgd_params['epochs'] = 600 mlp_params['ear_lam'] = 2.0 # Initialize a random number generator for this test rng = np.random.RandomState(test_num) # Load some data to train/validate/test with dataset = 'data/mnist.pkl.gz' datasets = load_udm_ss(dataset, su_count, rng, zero_mean=True) # Construct the EAR_NET object that we will be training NET = EAR_NET(rng=rng, input=x_in, params=mlp_params) init_biases(NET, b_init=0.1) train_ss_mlp(NET, sgd_params, datasets) return
def batch_test_ss_mlp_pt(test_count=10, su_count=1000): """Setup basic test for semisupervised EAR-regularized MLP.""" # Set some reasonable sgd parameters sgd_params = {} sgd_params['start_rate'] = 0.01 sgd_params['decay_rate'] = 0.998 sgd_params['wt_norm_bound'] = 3.5 sgd_params['epochs'] = 1000 sgd_params['batch_size'] = 100 sgd_params['result_tag'] = '---' # Set some reasonable mlp parameters mlp_params = {} # Set up some proto-networks pc0 = [28 * 28, 800, 800, 11] mlp_params['proto_configs'] = [pc0] # Set up some spawn networks sc0 = { 'proto_key': 0, 'input_noise': 0.1, 'bias_noise': 0.1, 'do_dropout': True } sc1 = { 'proto_key': 0, 'input_noise': 0.1, 'bias_noise': 0.1, 'do_dropout': True } mlp_params['spawn_configs'] = [sc0, sc1] mlp_params['spawn_weights'] = [0.0, 1.0] # Set remaining params mlp_params['ear_type'] = 5 mlp_params['ear_lam'] = 1.0 mlp_params['lam_l2a'] = 1e-2 mlp_params['reg_all_obs'] = True for test_num in range(test_count): rng_seed = test_num sgd_params['result_tag'] = "test_{0:d}".format(test_num) # Initialize a random number generator for this test rng = np.random.RandomState(rng_seed) # Load some data to train/validate/test with dataset = 'data/mnist.pkl.gz' datasets = load_udm(dataset, zero_mean=False) # Construct the EAR_NET object that we will be training x_in = T.matrix('x_in') NET = EAR_NET(rng=rng, input=x_in, params=mlp_params) init_biases(NET, b_init=0.05) ########################################## # First, pretrain each layer in the mlp. # ########################################## sgd_params['result_tag'] = "ss_ear_pt_s{0:d}_t{1:d}".format( su_count, test_num) sgd_params['batch_size'] = 25 sgd_params['start_rate'] = 0.02 sgd_params['epochs'] = 40 for i in range(len(NET.dae_costs)): print("==================================================") print("Pretraining hidden layer(s) at depth {0:d}".format(i + 1)) print("==================================================") train_dae(NET, i, sgd_params, datasets) # Load some data to train/validate/test with rng = np.random.RandomState(rng_seed) dataset = 'data/mnist.pkl.gz' datasets = load_udm_ss(dataset, su_count, rng, zero_mean=False) # Run semisupervised training on the given MLP sgd_params['batch_size'] = 100 sgd_params['start_rate'] = 0.04 # Train with weak EAR regularization sgd_params['top_only'] = True sgd_params['epochs'] = 5 NET.set_ear_lam(0.0) train_ss_mlp(NET, sgd_params, datasets) COMMENT = """ # Train with no EAR regularization sgd_params['top_only'] = False sgd_params['epochs'] = 100 NET.set_ear_lam(0.0) train_ss_mlp(NET, sgd_params, datasets) """ # Train with weak EAR regularization sgd_params['top_only'] = False sgd_params['epochs'] = 5 NET.set_ear_lam(0.5) train_ss_mlp(NET, sgd_params, datasets) # Train with weak EAR regularization sgd_params['epochs'] = 10 NET.set_ear_lam(1.0) train_ss_mlp(NET, sgd_params, datasets) # Train with more EAR regularization sgd_params['epochs'] = 15 NET.set_ear_lam(1.5) train_ss_mlp(NET, sgd_params, datasets) # Train with more EAR regularization sgd_params['epochs'] = 20 NET.set_ear_lam(2.0) train_ss_mlp(NET, sgd_params, datasets) # Train with more EAR regularization sgd_params['epochs'] = 100 NET.set_ear_lam(3.0) train_ss_mlp(NET, sgd_params, datasets) return
def batch_test_ss_mlp_gentle(test_count=10, su_count=1000): """Setup basic test for semisupervised EAR-regularized MLP.""" # Set some reasonable sgd parameters sgd_params = {} sgd_params['start_rate'] = 0.1 sgd_params['decay_rate'] = 0.998 sgd_params['wt_norm_bound'] = 3.5 sgd_params['epochs'] = 1000 sgd_params['batch_size'] = 100 sgd_params['result_tag'] = 'xxx' sgd_params['top_only'] = False # Set some reasonable mlp parameters mlp_params = {} # Set up some proto-networks pc0 = [28 * 28, 800, 800, 11] mlp_params['proto_configs'] = [pc0] # Set up some spawn networks sc0 = { 'proto_key': 0, 'input_noise': 0.1, 'bias_noise': 0.1, 'do_dropout': True } sc1 = { 'proto_key': 0, 'input_noise': 0.1, 'bias_noise': 0.1, 'do_dropout': True } mlp_params['spawn_configs'] = [sc0, sc1] mlp_params['spawn_weights'] = [0.5, 0.5] # Set remaining params mlp_params['ear_type'] = 5 mlp_params['ear_lam'] = 1.0 mlp_params['lam_l2a'] = 1e-2 mlp_params['reg_all_obs'] = True for test_num in range(test_count): rng_seed = test_num sgd_params['result_tag'] = "ss_ear_gentle_s{0:d}_t{1:d}".format( su_count, test_num) # Initialize a random number generator for this test rng = np.random.RandomState(rng_seed) # Load some data to train/validate/test with dataset = 'data/mnist.pkl.gz' datasets = load_udm_ss(dataset, su_count, rng, zero_mean=False) # Construct the EAR_NET object that we will be training x_in = T.matrix('x_in') NET = EAR_NET(rng=rng, input=x_in, params=mlp_params) init_biases(NET, b_init=0.1) # Run semisupervised training on the given MLP sgd_params['batch_size'] = 100 # Train with weak EAR regularization sgd_params['top_only'] = False mlp_params['ear_type'] = 5 sgd_params['start_rate'] = 0.1 sgd_params['epochs'] = 5 NET.set_ear_lam(0.0) train_ss_mlp(NET, sgd_params, datasets) # Train with weak EAR regularization sgd_params['epochs'] = 10 NET.set_ear_lam(1.0) # for EAR #NET.set_ear_lam(0.0) # for SDE train_ss_mlp(NET, sgd_params, datasets) # Train with more EAR regularization sgd_params['epochs'] = 10 NET.set_ear_lam(1.5) # for EAR #NET.set_ear_lam(0.0) # for SDE train_ss_mlp(NET, sgd_params, datasets) # Train with more EAR regularization sgd_params['epochs'] = 15 NET.set_ear_lam(2.0) # for EAR #NET.set_ear_lam(0.0) # for SDE train_ss_mlp(NET, sgd_params, datasets) # Train with more EAR regularization sgd_params['epochs'] = 70 sgd_params['start_rate'] = 0.05 NET.set_ear_lam(3.0) # for EAR #NET.set_ear_lam(0.0) # for SDE train_ss_mlp(NET, sgd_params, datasets) return
sgd_params['decay_rate'] = 0.998 sgd_params['wt_norm_bound'] = 3.75 sgd_params['epochs'] = 1000 sgd_params['batch_size'] = 128 # Set parameters for the network to be trained mlp_params = {} mlp_params['layer_sizes'] = [28 * 28, 500, 500, 11] mlp_params['lam_l2a'] = 1e-3 mlp_params['dev_clones'] = 1 mlp_params['dev_types'] = [1, 1, 2] mlp_params['dev_lams'] = [0.1, 0.1, 2.0] mlp_params['use_bias'] = 1 # Load some data to train/validate/test with dataset = 'data/mnist.pkl.gz' datasets = load_udm_ss(dataset, 1000, rng) # Set the type of network to train, based on user input if (len(sys.argv) != 3): print "Usage: {0} [raw|sde|dev] [result_tag]".format(sys.argv[0]) exit(1) elif sys.argv[1] == "raw": sgd_params['mlp_type'] = 'raw' sgd_params['result_tag'] = sys.argv[2] mlp_params['dev_lams'] = [0.0 for l in mlp_params['dev_lams']] elif sys.argv[1] == "sde": sgd_params['mlp_type'] = 'sde' sgd_params['result_tag'] = sys.argv[2] elif sys.argv[1] == "dev": sgd_params['mlp_type'] = 'dev' sgd_params['result_tag'] = sys.argv[2]
def test_git_on_gip(hyper_params=None, rng_seed=1234): assert(not (hyper_params is None)) # Initialize a source of randomness rng = np.random.RandomState(rng_seed) sup_count = 100 # Load some data to train/validate/test with dataset = 'data/mnist.pkl.gz' datasets = load_udm_ss(dataset, sup_count, rng, zero_mean=False) Xtr_su = datasets[0][0].get_value(borrow=False) Ytr_su = datasets[0][1].get_value(borrow=False).astype(np.int32) Xtr_un = datasets[1][0].get_value(borrow=False) Ytr_un = datasets[1][1].get_value(borrow=False).astype(np.int32) # get the joint labeled and unlabeled data Xtr_un = np.vstack([Xtr_su, Xtr_un]).astype(theano.config.floatX) Ytr_un = np.vstack([Ytr_su[:,np.newaxis], Ytr_un[:,np.newaxis]]) # get the labeled data Xtr_su = Xtr_su.astype(theano.config.floatX) Ytr_su = Ytr_su[:,np.newaxis] # get observations and labels for the validation set Xva = datasets[2][0].get_value(borrow=False).astype(theano.config.floatX) Yva = datasets[2][1].get_value(borrow=False).astype(np.int32) Yva = Yva[:,np.newaxis] # numpy is dumb # get size information for the data un_samples = Xtr_un.shape[0] su_samples = Xtr_su.shape[0] va_samples = Xva.shape[0] # set up some symbolic variables for input/output Xp = T.matrix('Xp_base') Xd = T.matrix('Xd_base') Xc = T.matrix('Xc_base') Xm = T.matrix('Xm_base') Yd = T.icol('Yd_base') # set some "shape" parameters for the networks data_dim = Xtr_un.shape[1] label_dim = 10 prior_1_dim = 50 prior_2_dim = 50 prior_sigma = 1.0 batch_size = 100 ################## # SETUP A GIPAIR # ################## gn1_params = {} gn1_config = [prior_1_dim, 600, 600, data_dim] gn1_params['mlp_config'] = gn1_config gn1_params['activation'] = softplus_actfun gn1_params['out_type'] = 'bernoulli' gn1_params['lam_l2a'] = 1e-3 gn1_params['vis_drop'] = 0.0 gn1_params['hid_drop'] = 0.0 gn1_params['bias_noise'] = 0.1 # choose some parameters for the continuous inferencer in1_params = {} shared_config = [data_dim, 600, 600] top_config = [shared_config[-1], prior_1_dim] in1_params['shared_config'] = shared_config in1_params['mu_config'] = top_config in1_params['sigma_config'] = top_config in1_params['activation'] = softplus_actfun in1_params['lam_l2a'] = 1e-3 in1_params['vis_drop'] = 0.0 in1_params['hid_drop'] = 0.0 in1_params['bias_noise'] = 0.1 in1_params['input_noise'] = 0.0 # Initialize the base networks for this GIPair IN1 = InfNet(rng=rng, Xd=Xd, Xc=Xc, Xm=Xm, prior_sigma=prior_sigma, \ params=in1_params, shared_param_dicts=None) GN1 = GenNet(rng=rng, Xp=Xp, prior_sigma=prior_sigma, \ params=gn1_params, shared_param_dicts=None) # Initialize biases in IN and GN IN1.init_biases(0.0) GN1.init_biases(0.0) # Initialize the GIPair GIP = GIPair(rng=rng, Xd=Xd, Xc=Xc, Xm=Xm, g_net=GN1, i_net=IN1, \ data_dim=data_dim, prior_dim=prior_1_dim, \ params=None, shared_param_dicts=None) # Set cost weighting parameters GIP.set_lam_nll(1.0) GIP.set_lam_kld(1.0) GIP.set_lam_l2w(1e-4) ################## # SETUP A GITRIP # ################## # set parameters for the generator network gn2_params = {} gn2_config = [(prior_2_dim + label_dim), 300, prior_1_dim] gn2_params['mlp_config'] = gn2_config gn2_params['activation'] = softplus_actfun gn2_params['out_type'] = 'gaussian' gn2_params['lam_l2a'] = 1e-3 gn2_params['vis_drop'] = 0.0 gn2_params['hid_drop'] = 0.0 gn2_params['bias_noise'] = 0.1 # choose some parameters for the continuous inferencer in2_params = {} shared_config = [prior_1_dim, 300] top_config = [shared_config[-1], prior_2_dim] in2_params['shared_config'] = shared_config in2_params['mu_config'] = top_config in2_params['sigma_config'] = top_config in2_params['activation'] = softplus_actfun in2_params['lam_l2a'] = 1e-3 in2_params['vis_drop'] = 0.0 in2_params['hid_drop'] = 0.0 in2_params['bias_noise'] = 0.1 in2_params['input_noise'] = 0.0 # choose some parameters for the categorical inferencer pn2_params = {} pc0 = [prior_1_dim, 300, label_dim] pn2_params['proto_configs'] = [pc0] # Set up some spawn networks sc0 = {'proto_key': 0, 'input_noise': 0.0, 'bias_noise': 0.1, 'do_dropout': False} #sc1 = {'proto_key': 0, 'input_noise': 0.1, 'bias_noise': 0.1, 'do_dropout': True} pn2_params['spawn_configs'] = [sc0] #[sc0, sc1] pn2_params['spawn_weights'] = [1.0] #[0.5, 0.5] # Set remaining params pn2_params['activation'] = softplus_actfun pn2_params['ear_type'] = 6 pn2_params['lam_l2a'] = 1e-3 pn2_params['vis_drop'] = 0.0 pn2_params['hid_drop'] = 0.0 # Initialize the base networks for this GITrip GN2 = GenNet(rng=rng, Xp=Xp, prior_sigma=prior_sigma, \ params=gn2_params, shared_param_dicts=None) IN2 = InfNet(rng=rng, Xd=Xd, Xc=Xc, Xm=Xm, prior_sigma=prior_sigma, \ params=in2_params, shared_param_dicts=None) PN2 = PeaNet(rng=rng, Xd=Xd, params=pn2_params) # Initialize biases in GN, IN, and PN GN2.init_biases(0.0) IN2.init_biases(0.0) PN2.init_biases(0.0) # Initialize the GITrip GIT = GITrip(rng=rng, \ Xd=Xd, Yd=Yd, Xc=Xc, Xm=Xm, \ g_net=GN2, i_net=IN2, p_net=PN2, \ data_dim=prior_1_dim, prior_dim=prior_2_dim, \ label_dim=label_dim, batch_size=batch_size, \ params=None, shared_param_dicts=None) # Set cost weighting parameters GIT.set_lam_nll(1.0) GIT.set_lam_kld(1.0) GIT.set_lam_cat(0.0) GIT.set_lam_pea(0.0) GIT.set_lam_ent(0.0) GIT.set_lam_l2w(1e-4) ##################################################### # CONSTRUCT A GITonGIP STACKED, SEMI-SUPERVISED VAE # ##################################################### GOG = GITonGIP(rng=rng, \ Xd=Xd, Yd=Yd, Xc=Xc, Xm=Xm, \ gip_vae=GIP, git_vae=GIT, \ data_dim=data_dim, prior_1_dim=prior_1_dim, \ prior_2_dim=prior_2_dim, label_dim=label_dim, \ batch_size=batch_size, \ params=None, shared_param_dicts=None) ################################# # WRITE SOME INFO TO "LOG" FILE # ################################# learn_rate_git = hyper_params['learn_rate_git'] lam_pea_git = hyper_params['lam_pea_git'] lam_cat_git = hyper_params['lam_cat_git'] lam_ent_git = hyper_params['lam_ent_git'] lam_l2w_git = hyper_params['lam_l2w_git'] out_name = hyper_params['out_name'] out_file = open(out_name, 'wb') out_file.write("**TODO: More informative output, and maybe a real log**\n") out_file.write("learn_rate_git: {0:.4f}\n".format(learn_rate_git)) out_file.write("lam_pea_git: {0:.4f}\n".format(lam_pea_git)) out_file.write("lam_cat_git: {0:.4f}\n".format(lam_cat_git)) out_file.write("lam_ent_git: {0:.4f}\n".format(lam_ent_git)) out_file.write("lam_l2w_git: {0:.4f}\n".format(lam_l2w_git)) out_file.flush() ################################################## # TRAIN THE GIPair FOR SOME NUMBER OF ITERATIONS # ################################################## learn_rate = 0.002 for i in range(250000): if ((i+1 % 100000) == 0): learn_rate = learn_rate * 0.8 scale = min(1.0, (float(i+1) / 50000.0)) GIP.set_all_sgd_params(learn_rate=(scale*learn_rate), momentum=0.98) GIP.set_lam_nll(lam_nll=1.0) GIP.set_lam_kld(lam_kld=scale) # sample some unlabeled data to train with tr_idx = npr.randint(low=0,high=un_samples,size=(batch_size,)) Xd_batch = binarize_data(Xtr_un.take(tr_idx, axis=0)) Xc_batch = 0.0 * Xd_batch Xm_batch = 0.0 * Xd_batch # do a minibatch update of the model, and compute some costs outputs = GOG.train_gip(Xd_batch, Xc_batch, Xm_batch) joint_cost = 1.0 * outputs[0] data_nll_cost = 1.0 * outputs[1] post_kld_cost = 1.0 * outputs[2] other_reg_cost = 1.0 * outputs[3] if ((i % 1000) == 0): o_str = "batch: {0:d}, joint_cost: {1:.4f}, data_nll_cost: {2:.4f}, post_kld_cost: {3:.4f}, other_reg_cost: {4:.4f}".format( \ i, joint_cost, data_nll_cost, post_kld_cost, other_reg_cost) print(o_str) out_file.write("{}\n".format(o_str)) out_file.flush() if ((i % 5000) == 0): file_name = "GOG_GIP_SAMPLES_b{0:d}.png".format(i) Xd_samps = np.repeat(Xd_batch[0:10,:], 3, axis=0) sample_lists = GIP.sample_gil_from_data(Xd_samps, loop_iters=10) Xs = np.vstack(sample_lists["data samples"]) utils.visualize_samples(Xs, file_name) ######################################################## # REMOVE (SORT OF) UNUSED DIMENSIONS FROM LATENT SPACE # ######################################################## #tr_idx = npr.randint(low=0,high=un_samples,size=(10000,)) #Xd_batch = binarize_data(Xtr_un.take(tr_idx, axis=0)) #Xp_batch = GIP.IN.mean_posterior(Xd_batch, 0.0*Xd_batch, 0.0*Xd_batch) #Xp_std = np.std(Xp_batch, axis=0, keepdims=True) #dim_mask = 1.0 * (Xp_std > 0.1) #GIT.set_input_mask(dim_mask) #print("MASK NNZ: {0:.4f}".format(np.sum(dim_mask))) ################################################## # TRAIN THE GITrip FOR SOME NUMBER OF ITERATIONS # ################################################## GIT.set_lam_l2w(lam_l2w=lam_l2w_git) learn_rate = learn_rate_git GIT.set_all_sgd_params(learn_rate=learn_rate, momentum=0.98) for i in range(250000): scale = 1.0 if (i < 25000): scale = float(i+1) / 25000.0 if ((i+1 % 50000) == 0): learn_rate = learn_rate * 0.8 # do a minibatch update using unlabeled data if True: # get some data to train with un_idx = npr.randint(low=0,high=un_samples,size=(batch_size,)) Xd_un = binarize_data(Xtr_un.take(un_idx, axis=0)) Yd_un = Ytr_un.take(un_idx, axis=0) Xc_un = 0.0 * Xd_un Xm_un = 0.0 * Xd_un # do a minibatch update of the model, and compute some costs GIT.set_all_sgd_params(learn_rate=(scale*learn_rate), momentum=0.98) GIT.set_lam_nll(1.0) GIT.set_lam_kld(scale * 1.0) GIT.set_lam_cat(0.0) GIT.set_lam_pea(scale * lam_pea_git) GIT.set_lam_ent(scale * lam_ent_git) outputs = GOG.train_git(Xd_un, Xc_un, Xm_un, Yd_un) joint_cost = 1.0 * outputs[0] data_nll_cost = 1.0 * outputs[1] post_kld_cost = 1.0 * outputs[2] post_cat_cost = 1.0 * outputs[3] post_pea_cost = 1.0 * outputs[4] post_ent_cost = 1.0 * outputs[5] other_reg_cost = 1.0 * outputs[6] if True: # get some data to train with su_idx = npr.randint(low=0,high=su_samples,size=(batch_size,)) Xd_su = binarize_data(Xtr_su.take(su_idx, axis=0)) Yd_su = Ytr_su.take(su_idx, axis=0) Xc_su = 0.0 * Xd_su Xm_su = 0.0 * Xd_su # update only based on the label-based classification cost GIT.set_all_sgd_params(learn_rate=(scale*learn_rate), momentum=0.98) GIT.set_lam_nll(0.0) GIT.set_lam_kld(0.0) GIT.set_lam_cat(scale * lam_cat_git) GIT.set_lam_pea(scale * lam_pea_git) GIT.set_lam_ent(0.0) outputs = GOG.train_git(Xd_su, Xc_su, Xm_su, Yd_su) joint_2 = 1.0 * outputs[0] data_nll_2 = 1.0 * outputs[1] post_kld_2 = 1.0 * outputs[2] post_cat_cost = 1.0 * outputs[3] post_pea_2 = 1.0 * outputs[4] post_ent_2 = 1.0 * outputs[5] other_reg_cost = 1.0 * outputs[6] if ((i % 500) == 0): o_str = "batch: {0:d}, joint_cost: {1:.4f}, nll: {2:.4f}, kld: {3:.4f}, cat: {4:.4f}, pea: {5:.4f}, ent: {6:.4f}, other_reg: {7:.4f}".format( \ i, joint_cost, data_nll_cost, post_kld_cost, post_cat_cost, post_pea_cost, post_ent_cost, other_reg_cost) print(o_str) out_file.write("{}\n".format(o_str)) out_file.flush() if ((i % 2500) == 0): # check classification error on training and validation set train_err = GOG.classification_error(Xtr_su, Ytr_su) va_err = GOG.classification_error(Xva, Yva) o_str = " tr_err: {0:.4f}, va_err: {1:.4f}".format(train_err, va_err) print(o_str) out_file.write("{}\n".format(o_str)) out_file.flush() if ((i % 5000) == 0): file_name = "GoG_GIT_SAMPLES_b{0:d}.png".format(i) va_idx = npr.randint(low=0,high=va_samples,size=(5,)) Xd_samps = np.vstack([Xd_un[0:5,:], binarize_data(Xva[va_idx,:])]) Xd_samps = np.repeat(Xd_samps, 3, axis=0) sample_lists = GOG.sample_git_from_data(Xd_samps, loop_iters=10) Xs = np.vstack(sample_lists["data samples"]) Ys = GOG.class_probs(Xs) Xs = mnist_prob_embed(Xs, Ys) utils.visualize_samples(Xs, file_name)
def test_gi_stack(hyper_params=None, sup_count=600, rng_seed=1234): assert(not (hyper_params is None)) # Initialize a source of randomness rng = np.random.RandomState(rng_seed) # Load some data to train/validate/test with dataset = 'data/mnist.pkl.gz' datasets = load_udm_ss(dataset, sup_count, rng, zero_mean=False) Xtr_su = datasets[0][0].get_value(borrow=False) Ytr_su = datasets[0][1].get_value(borrow=False) Xtr_un = datasets[1][0].get_value(borrow=False) Ytr_un = datasets[1][1].get_value(borrow=False) # get the unlabeled data Xtr_un = np.vstack([Xtr_su, Xtr_un]).astype(theano.config.floatX) Ytr_un = np.vstack([Ytr_su[:,np.newaxis], Ytr_un[:,np.newaxis]]).astype(np.int32) Ytr_un = 0 * Ytr_un # get the labeled data Xtr_su = Xtr_su.astype(theano.config.floatX) Ytr_su = Ytr_su[:,np.newaxis].astype(np.int32) # get observations and labels for the validation set Xva = datasets[2][0].get_value(borrow=False).astype(theano.config.floatX) Yva = datasets[2][1].get_value(borrow=False).astype(np.int32) Yva = Yva[:,np.newaxis] # numpy is dumb # get size information for the data un_samples = Xtr_un.shape[0] su_samples = Xtr_su.shape[0] va_samples = Xva.shape[0] # Construct a GenNet and an InfNet, then test constructor for GIPair. # Do basic testing, to make sure classes aren't completely broken. Xp = T.matrix('Xp_base') Xd = T.matrix('Xd_base') Xc = T.matrix('Xc_base') Xm = T.matrix('Xm_base') Yd = T.icol('Yd_base') data_dim = Xtr_un.shape[1] label_dim = 10 prior_dim = 50 prior_sigma = 1.0 batch_size = 150 # Choose some parameters for the generator network gn_params = {} gn_config = [prior_dim, 600, 600, data_dim] gn_params['mlp_config'] = gn_config gn_params['activation'] = softplus_actfun gn_params['lam_l2a'] = 1e-3 gn_params['vis_drop'] = 0.0 gn_params['hid_drop'] = 0.0 gn_params['bias_noise'] = 0.1 # choose some parameters for the continuous inferencer in_params = {} shared_config = [data_dim, 600, 600] top_config = [shared_config[-1], prior_dim] in_params['shared_config'] = shared_config in_params['mu_config'] = top_config in_params['sigma_config'] = top_config in_params['activation'] = softplus_actfun in_params['init_scale'] = 2.0 in_params['lam_l2a'] = 1e-3 in_params['vis_drop'] = 0.0 in_params['hid_drop'] = 0.0 in_params['bias_noise'] = 0.1 in_params['input_noise'] = 0.1 # choose some parameters for the categorical inferencer pn_params = {} pc0 = [prior_dim, 800, 800, label_dim] pn_params['proto_configs'] = [pc0] # Set up some spawn networks sc0 = {'proto_key': 0, 'input_noise': 0.1, 'bias_noise': 0.1, 'do_dropout': True} sc1 = {'proto_key': 0, 'input_noise': 0.1, 'bias_noise': 0.1, 'do_dropout': True} pn_params['spawn_configs'] = [sc0, sc1] pn_params['spawn_weights'] = [0.5, 0.5] # Set remaining params pn_params['activation'] = relu_actfun pn_params['init_scale'] = 2.0 pn_params['ear_type'] = 6 pn_params['lam_l2a'] = 1e-3 pn_params['vis_drop'] = 0.0 pn_params['hid_drop'] = 0.5 # Initialize the base networks for this GIPair GN = GenNet(rng=rng, Xp=Xp, prior_sigma=prior_sigma, \ params=gn_params, shared_param_dicts=None) IN = InfNet(rng=rng, Xd=Xd, Xc=Xc, Xm=Xm, prior_sigma=prior_sigma, \ params=in_params, shared_param_dicts=None) PN = PeaNet(rng=rng, Xd=Xd, params=pn_params) # Initialize biases in GN, IN, and PN GN.init_biases(0.0) IN.init_biases(0.0) PN.init_biases(0.1) # Initialize the GIStack GIS = GIStack(rng=rng, \ Xd=Xd, Yd=Yd, Xc=Xc, Xm=Xm, \ g_net=GN, i_net=IN, p_net=PN, \ data_dim=data_dim, prior_dim=prior_dim, \ label_dim=label_dim, batch_size=batch_size, \ params=None, shared_param_dicts=None) # set weighting parameters for the various costs... GIS.set_lam_nll(1.0) GIS.set_lam_kld(1.0) GIS.set_lam_cat(0.0) GIS.set_lam_pea(0.0) GIS.set_lam_ent(0.0) # Set initial learning rate and basic SGD hyper parameters num_updates = hyper_params['num_updates'] learn_rate = hyper_params['learn_rate'] lam_pea = hyper_params['lam_pea'] lam_cat = hyper_params['lam_cat'] lam_ent = hyper_params['lam_ent'] lam_l2w = hyper_params['lam_l2w'] out_name = hyper_params['out_name'] out_file = open(out_name, 'wb') out_file.write("**TODO: More informative output, and maybe a real log**\n") out_file.write("sup_count: {0:d}\n".format(sup_count)) out_file.write("learn_rate: {0:.4f}\n".format(learn_rate)) out_file.write("lam_pea: {0:.4f}\n".format(lam_pea)) out_file.write("lam_cat: {0:.4f}\n".format(lam_cat)) out_file.write("lam_ent: {0:.4f}\n".format(lam_ent)) out_file.write("lam_l2w: {0:.4f}\n".format(lam_l2w)) out_file.flush() GIS.set_lam_l2w(lam_l2w) GIS.set_all_sgd_params(learn_rate=learn_rate, momentum=0.98) for i in range(num_updates): if (i < 100000): # start with some updates only for the VAE (InfNet and GenNet) scale = float(min(i+1, 50000)) / 50000.0 lam_cat = 0.0 lam_pea = 0.0 lam_ent = 0.0 learn_rate_pn = 0.0 else: # move on to updates that include loss from the PeaNet scale = 1.0 lam_cat = hyper_params['lam_cat'] lam_pea = hyper_params['lam_pea'] if i < 150000: lam_ent = float(i - 99999) * hyper_params['lam_ent'] else: lam_ent = hyper_params['lam_ent'] learn_rate_pn = learn_rate if ((i+1 % 100000) == 0): learn_rate = learn_rate * 0.7 # do a minibatch update using unlabeled data if True: # get some data to train with un_idx = npr.randint(low=0,high=un_samples,size=(batch_size,)) Xd_un = binarize_data(Xtr_un.take(un_idx, axis=0)) Yd_un = Ytr_un.take(un_idx, axis=0) Xc_un = 0.0 * Xd_un Xm_un = 0.0 * Xd_un # do a minibatch update of the model, and compute some costs GIS.set_all_sgd_params(learn_rate=(scale*learn_rate), momentum=0.98) GIS.set_pn_sgd_params(learn_rate=(scale*learn_rate_pn), momentum=0.98) GIS.set_lam_nll(1.0) GIS.set_lam_kld(0.01 + (0.99*scale)) GIS.set_lam_cat(0.0) GIS.set_lam_pea(lam_pea) GIS.set_lam_ent(lam_ent) outputs = GIS.train_joint(Xd_un, Xc_un, Xm_un, Yd_un) joint_cost = 1.0 * outputs[0] data_nll_cost = 1.0 * outputs[1] post_kld_cost = 1.0 * outputs[2] post_cat_cost = 1.0 * outputs[3] post_pea_cost = 1.0 * outputs[4] post_ent_cost = 1.0 * outputs[5] other_reg_cost = 1.0 * outputs[6] # do another minibatch update incorporating label information if (i >= 100000): # get some data to train with su_idx = npr.randint(low=0,high=su_samples,size=(batch_size,)) Xd_su = binarize_data(Xtr_su.take(su_idx, axis=0)) Yd_su = Ytr_su.take(su_idx, axis=0) Xc_su = 0.0 * Xd_su Xm_su = 0.0 * Xd_su # update only based on the label-based classification cost GIS.set_all_sgd_params(learn_rate=(scale*learn_rate), momentum=0.98) GIS.set_pn_sgd_params(learn_rate=(scale*learn_rate_pn), momentum=0.98) GIS.set_lam_nll(0.0) GIS.set_lam_kld(0.0) GIS.set_lam_cat(lam_cat) GIS.set_lam_pea(lam_pea) GIS.set_lam_ent(0.0) outputs = GIS.train_joint(Xd_su, Xc_su, Xm_su, Yd_su) post_cat_cost = 1.0 * outputs[3] assert(not (np.isnan(joint_cost))) if ((i % 500) == 0): o_str = "batch: {0:d}, joint_cost: {1:.4f}, nll: {2:.4f}, kld: {3:.4f}, cat: {4:.4f}, pea: {5:.4f}, ent: {6:.4f}, other_reg: {7:.4f}".format( \ i, joint_cost, data_nll_cost, post_kld_cost, post_cat_cost, post_pea_cost, post_ent_cost, other_reg_cost) print(o_str) out_file.write("{}\n".format(o_str)) if ((i % 1000) == 0): # check classification error on training and validation set train_err = GIS.classification_error(Xtr_su, Ytr_su) va_err = GIS.classification_error(Xva, Yva) o_str = " tr_err: {0:.4f}, va_err: {1:.4f}".format(train_err, va_err) print(o_str) out_file.write("{}\n".format(o_str)) out_file.flush() if ((i % 5000) == 0): file_name = "GIS_SAMPLES_b{0:d}.png".format(i) va_idx = npr.randint(low=0,high=va_samples,size=(5,)) Xd_samps = np.vstack([Xd_un[0:5,:], binarize_data(Xva[va_idx,:])]) Xd_samps = np.repeat(Xd_samps, 3, axis=0) sample_lists = GIS.sample_gis_from_data(Xd_samps, loop_iters=10) Xs = np.vstack(sample_lists["data samples"]) Ys = GIS.class_probs(Xs) Xs = mnist_prob_embed(Xs, Ys) utils.visualize_samples(Xs, file_name) print("TESTING COMPLETE!") out_file.close() return
def test_gi_trip(hyper_params=None, sup_count=600, rng_seed=1234): assert(not (hyper_params is None)) # Initialize a source of randomness rng = np.random.RandomState(rng_seed) # Load some data to train/validate/test with dataset = 'data/mnist.pkl.gz' datasets = load_udm_ss(dataset, sup_count, rng, zero_mean=False) Xtr_su = datasets[0][0].get_value(borrow=False) Ytr_su = datasets[0][1].get_value(borrow=False).astype(np.int32) Xtr_un = datasets[1][0].get_value(borrow=False) Ytr_un = datasets[1][1].get_value(borrow=False).astype(np.int32) # get the joint labeled and unlabeled data Xtr_un = np.vstack([Xtr_su, Xtr_un]).astype(theano.config.floatX) Ytr_un = np.vstack([Ytr_su[:,np.newaxis], Ytr_un[:,np.newaxis]]) Ytr_un = 0 * Ytr_un # KEEP CATS FIXED OR FREE? YES/NO? # get the labeled data Xtr_su = Xtr_su.astype(theano.config.floatX) Ytr_su = Ytr_su[:,np.newaxis] # get observations and labels for the validation set Xva = datasets[2][0].get_value(borrow=False).astype(theano.config.floatX) Yva = datasets[2][1].get_value(borrow=False).astype(np.int32) Yva = Yva[:,np.newaxis] # numpy is dumb # get size information for the data un_samples = Xtr_un.shape[0] su_samples = Xtr_su.shape[0] va_samples = Xva.shape[0] # set up some symbolic variables for input to the GITrip Xp = T.matrix('Xp_base') Xd = T.matrix('Xd_base') Xc = T.matrix('Xc_base') Xm = T.matrix('Xm_base') Yd = T.icol('Yd_base') # set some "shape" parameters for the networks data_dim = Xtr_un.shape[1] label_dim = 10 prior_dim = 50 prior_sigma = 1.0 batch_size = 150 # set parameters for the generator network gn_params = {} gn_config = [(prior_dim + label_dim), 500, 500, data_dim] gn_params['mlp_config'] = gn_config gn_params['activation'] = softplus_actfun gn_params['lam_l2a'] = 1e-3 gn_params['vis_drop'] = 0.0 gn_params['hid_drop'] = 0.0 gn_params['bias_noise'] = 0.1 # choose some parameters for the continuous inferencer in_params = {} shared_config = [data_dim, 500, 500] top_config = [shared_config[-1], prior_dim] in_params['shared_config'] = shared_config in_params['mu_config'] = top_config in_params['sigma_config'] = top_config in_params['activation'] = softplus_actfun in_params['init_scale'] = 1.0 in_params['lam_l2a'] = 1e-3 in_params['vis_drop'] = 0.2 in_params['hid_drop'] = 0.0 in_params['bias_noise'] = 0.1 in_params['input_noise'] = 0.1 # choose some parameters for the categorical inferencer pn_params = {} pc0 = [data_dim, (200, 4), (200, 4), label_dim] pn_params['proto_configs'] = [pc0] # Set up some spawn networks sc0 = {'proto_key': 0, 'input_noise': 0.1, 'bias_noise': 0.1, 'do_dropout': True} sc1 = {'proto_key': 0, 'input_noise': 0.1, 'bias_noise': 0.1, 'do_dropout': True} pn_params['spawn_configs'] = [sc0, sc1] pn_params['spawn_weights'] = [0.5, 0.5] # Set remaining params pn_params['activation'] = relu_actfun pn_params['ear_type'] = 6 pn_params['lam_l2a'] = 1e-3 pn_params['vis_drop'] = 0.2 pn_params['hid_drop'] = 0.5 # Initialize the base networks for this GITrip GN = GenNet(rng=rng, Xp=Xp, prior_sigma=prior_sigma, \ params=gn_params, shared_param_dicts=None) IN = InfNet(rng=rng, Xd=Xd, Xc=Xc, Xm=Xm, prior_sigma=prior_sigma, \ params=in_params, shared_param_dicts=None) PN = PeaNet(rng=rng, Xd=Xd, params=pn_params) # Initialize biases in GN, IN, and PN GN.init_biases(0.0) IN.init_biases(0.0) PN.init_biases(0.1) # Initialize the GITrip git_params = {} GIT = GITrip(rng=rng, \ Xd=Xd, Yd=Yd, Xc=Xc, Xm=Xm, \ g_net=GN, i_net=IN, p_net=PN, \ data_dim=data_dim, prior_dim=prior_dim, \ label_dim=label_dim, batch_size=batch_size, \ params=git_params, shared_param_dicts=None) # set weighting parameters for the various costs... GIT.set_lam_nll(1.0) GIT.set_lam_kld(1.0) GIT.set_lam_cat(0.0) GIT.set_lam_pea(0.0) GIT.set_lam_ent(0.0) # Set initial learning rate and basic SGD hyper parameters num_updates = hyper_params['num_updates'] learn_rate = hyper_params['learn_rate'] lam_cat = hyper_params['lam_cat'] lam_pea = hyper_params['lam_pea'] cat_prior = hyper_params['cat_prior'] lam_l2w = hyper_params['lam_l2w'] out_name = hyper_params['out_name'] out_file = open(out_name, 'wb') out_file.write("**TODO: More informative output, and maybe a real log**\n") out_file.write("sup_count: {0:d}\n".format(sup_count)) out_file.write("learn_rate: {0:.4f}\n".format(learn_rate)) out_file.write("lam_pea: {0:.4f}\n".format(lam_pea)) out_file.write("lam_cat: {0:.4f}\n".format(lam_cat)) out_file.write("lam_l2w: {0:.4f}\n".format(lam_l2w)) out_file.write("cat_prior: {0:s}\n".format(str(cat_prior))) out_file.flush() GIT.set_lam_l2w(lam_l2w) GIT.set_all_sgd_params(learn_rate=learn_rate, momentum=0.98) for i in range(num_updates): if i < 75000: scale = float(i + 1) / 75000.0 lam_ent = -1.0 lam_dir = 0.0 else: scale = 1.0 lam_ent = cat_prior['lam_ent'] lam_dir = cat_prior['lam_dir'] if ((i+1 % 100000) == 0): learn_rate = learn_rate * 0.75 # do a minibatch update using unlabeled data if True: # get some data to train with un_idx = npr.randint(low=0,high=un_samples,size=(batch_size,)) Xd_un = binarize_data(Xtr_un.take(un_idx, axis=0)) Yd_un = Ytr_un.take(un_idx, axis=0) Xc_un = 0.0 * Xd_un Xm_un = 0.0 * Xd_un # do a minibatch update of the model, and compute some costs GIT.set_all_sgd_params(learn_rate=(scale*learn_rate), momentum=0.98) GIT.set_lam_nll(1.0) GIT.set_lam_kld(0.1 + (0.9 * scale)) GIT.set_lam_cat(0.0) GIT.set_lam_pea(lam_pea) GIT.set_lam_ent(lam_ent) GIT.set_lam_dir(lam_dir) outputs = GIT.train_joint(Xd_un, Xc_un, Xm_un, Yd_un) joint_cost = 1.0 * outputs[0] data_nll_cost = 1.0 * outputs[1] post_kld_cost = 1.0 * outputs[2] post_cat_cost = 1.0 * outputs[3] post_pea_cost = 1.0 * outputs[4] post_ent_cost = 1.0 * outputs[5] post_dir_cost = 1.0 * outputs[6] other_reg_cost = 1.0 * outputs[7] # do another minibatch update incorporating label information if True: # get some data to train with su_idx = npr.randint(low=0,high=su_samples,size=(batch_size,)) Xd_su = binarize_data(Xtr_su.take(su_idx, axis=0)) Yd_su = Ytr_su.take(su_idx, axis=0) Xc_su = 0.0 * Xd_su Xm_su = 0.0 * Xd_su # update only based on the label-based classification cost GIT.set_all_sgd_params(learn_rate=(scale*learn_rate), momentum=0.98) GIT.set_lam_nll(0.0) GIT.set_lam_kld(0.0) GIT.set_lam_cat(lam_cat) GIT.set_lam_pea(lam_pea) GIT.set_lam_ent(0.0) GIT.set_lam_dir(0.0) outputs = GIT.train_joint(Xd_su, Xc_su, Xm_su, Yd_su) joint_2 = 1.0 * outputs[0] data_nll_2 = 1.0 * outputs[1] post_kld_2 = 1.0 * outputs[2] post_cat_cost = 1.0 * outputs[3] post_pea_2 = 1.0 * outputs[4] post_ent_2 = 1.0 * outputs[5] other_reg_cost = 1.0 * outputs[6] assert(not (np.isnan(joint_cost))) if ((i % 500) == 0): o_str = "batch: {0:d}, joint_cost: {1:.4f}, nll: {2:.4f}, kld: {3:.4f}, cat: {4:.4f}, pea: {5:.4f}, ent: {6:.4f}, dir: {7:.4f}, other_reg: {8:.4f}".format( \ i, joint_cost, data_nll_cost, post_kld_cost, post_cat_cost, post_pea_cost, post_ent_cost, post_dir_cost, other_reg_cost) print(o_str) out_file.write("{}\n".format(o_str)) if ((i % 1000) == 0): # check classification error on training and validation set train_err = GIT.classification_error(Xtr_su, Ytr_su) va_err = GIT.classification_error(Xva, Yva) o_str = " tr_err: {0:.4f}, va_err: {1:.4f}".format(train_err, va_err) print(o_str) out_file.write("{}\n".format(o_str)) out_file.flush() if ((i % 5000) == 0): # sample the VAE loop freely file_name = "GIT_CHAIN_SAMPLES_b{0:d}.png".format(i) va_idx = npr.randint(low=0,high=va_samples,size=(5,)) Xd_samps = np.vstack([Xd_un[0:5,:], binarize_data(Xva[va_idx,:])]) Xd_samps = np.repeat(Xd_samps, 3, axis=0) sample_lists = GIT.sample_git_from_data(Xd_samps, loop_iters=15) Xs = np.vstack(sample_lists["data samples"]) Ys = GIT.class_probs(Xs) Xs = mnist_prob_embed(Xs, Ys) utils.visualize_samples(Xs, file_name, num_rows=15) # sample the VAE loop with some labels held fixed file_name = "GIT_SYNTH_SAMPLES_b{0:d}.png".format(i) Xd_samps = Xd_su[0:10,:] Xd_samps = np.repeat(Xd_samps, 3, axis=0) Yd_samps = Yd_su[0:10,:].reshape((10,1)) Yd_samps = np.repeat(Yd_samps, 3, axis=0) SAMPS = GIT.sample_synth_labels(Xd_samps, Yd_samps, loop_iters=15, binarize=True) Xs = np.vstack(SAMPS["X_syn"]) Ys = one_hot_np(np.vstack(SAMPS["Y_syn"]), cat_dim=11) Ys = Ys[:,1:] Xs = mnist_prob_embed(Xs, Ys) utils.visualize_samples(Xs, file_name, num_rows=15) # draw samples freely from the generative model's prior file_name = "GIT_PRIOR_SAMPLES_b{0:d}.png".format(i) Xs = GIT.sample_from_prior(20*15) utils.visualize_samples(Xs, file_name, num_rows=15) # draw categorical inferencer's weights file_name = "GIT_PN_WEIGHTS_b{0:d}.png".format(i) utils.visualize_net_layer(GIT.PN.proto_nets[0][0], file_name) # draw continuous inferencer's weights file_name = "GIT_IN_WEIGHTS_b{0:d}.png".format(i) utils.visualize_net_layer(GIT.IN.shared_layers[0], file_name) # draw generator net final layer weights file_name = "GIT_GN_WEIGHTS_b{0:d}.png".format(i) utils.visualize_net_layer(GIT.GN.mlp_layers[-1], file_name, use_transpose=True) print("TESTING COMPLETE!") out_file.close() return