def gen_z(z_dist, nbatch): ret = np.zeros((nbatch, Nz)) for j in xrange(Nz): z_tmp = np_rng.normal(z_mean[j], z_std[j], nbatch) ret[:, j] = z_tmp # print ret return ret
def __call__(self, shape, name=None): if len(shape) == 2: scale = np.sqrt(2. / shape[0]) elif len(shape) == 4: scale = np.sqrt(2. / np.prod(shape[1:])) else: raise NotImplementedError return sharedX(np_rng.normal(size=shape, scale=scale), name=name)
def __call__(self, shape, name=None): print('called orthogonal init with shape', shape) flat_shape = (shape[0], np.prod(shape[1:])) a = np_rng.normal(0.0, 1.0, flat_shape) u, _, v = np.linalg.svd(a, full_matrices=False) q = u if u.shape == flat_shape else v # pick the one with the correct shape q = q.reshape(shape) return sharedX(self.scale * q[:shape[0], :shape[1]], name=name)
def rand_gen(size, noise_type='normal'): if noise_type == 'normal': r_vals = floatX(np_rng.normal(size=size)) elif noise_type == 'uniform': r_vals = floatX(np_rng.uniform(size=size, low=-1.0, high=1.0)) else: assert False, "unrecognized noise type!" return r_vals
def weights(self, shape, stddev=None, reparameterize=reparam, nin_axis=None, exp_reparam=exp_reparam): if stddev is None: stddev = self.weight_init print 'weights: initializing weights with stddev = %f' % stddev if stddev == 0: value = np.zeros(shape) else: value = np_rng.normal(loc=0, scale=stddev, size=shape) w = self.add_param(value, prefix='w') if isinstance(nin_axis, int): nin_axis = [nin_axis] assert isinstance(nin_axis, list) if reparameterize: g_shape = [dim for axis, dim in enumerate(shape) if axis not in nin_axis] f_init = np.zeros if exp_reparam else np.ones g = self.add_param(f_init(g_shape, dtype=theano.config.floatX), prefix='w_scale') w = reparameterized_weights(w, g, exp=exp_reparam, nin_axis=nin_axis) return w
n_updates = 0 n_check = 0 n_epochs = 0 n_updates = 0 n_examples = 0 t = time() # FOR EACH EPOCH for epoch in range(niter): # FOR EACH BATCH train_batch_iters = tr_stream.get_epoch_iterator() for b, train_batch_data in enumerate(train_batch_iters): # GET NORMALIZED INPUT DATA imb = transform(train_batch_data[0]) # GET NOISE DATA nmb = floatX(np_rng.normal(loc=0., scale=0.01, size=imb.shape)) # GET INPUT RANDOM DATA FOR SAMPLING zmb = floatX(np_rng.uniform(-1., 1., size=(len(imb), nz))) # UPDATE MODEL flag = None if n_updates % 2 == 1: cost = _train_g(imb, nmb, zmb, epoch+1) flag = 'generator_update' else: cost = _train_d(imb, nmb, zmb, epoch+1) flag = 'energy_update' n_updates += 1 n_examples += len(imb) if (b)%100==0: print 'EPOCH #{}'.format(epoch),' : batch #{}'.format(b), desc, ' ', flag print '================================================================'
def train_model(model_name, data_stream, num_hiddens, num_epochs, optimizer): # set models print "LOADING VGG" t = time() feature_extractor, encoder_feat_params = load_vgg_feature_extractor() encoder_mean, encoder_mean_params = set_encoder_mean_model(num_hiddens) encoder_variance, encoder_var_params = set_encoder_variance_model(num_hiddens) print "%.2f SEC " % (time() - t) sample_generator, generator_parameters = set_generator_model(num_hiddens) print "COMPILING UPDATER AND SAMPLER" t = time() updater_function = set_updater_function( feature_extractor, encoder_mean, encoder_variance, sample_generator, encoder_feat_params + encoder_mean_params + encoder_var_params, generator_parameters, optimizer, ) sampling_function = set_sampling_function(sample_generator) print "%.2f SEC " % (time() - t) # set fixed hidden data for sampling fixed_hidden_data = floatX(np_rng.normal(size=(16 * 16, num_hiddens))) print "START TRAINING" # for each epoch moment_cost_list = [] vae_cost_list = [] batch_count = 0 for e in xrange(num_epochs): # train phase batch_iters = data_stream.get_epoch_iterator() # for each batch for b, batch_data in enumerate(batch_iters): # set update function inputs input_data = transform(batch_data[0]) positive_random = floatX(np_rng.normal(size=(input_data.shape[0], num_hiddens))) negative_hidden = floatX(np_rng.normal(size=(input_data.shape[0], num_hiddens))) updater_inputs = [input_data, positive_random, negative_hidden] updater_outputs = updater_function(*updater_inputs) moment_cost_list.append(updater_outputs[0]) vae_cost_list.append(updater_outputs[1].mean()) # batch count up batch_count += 1 if batch_count % 10 == 0: print "================================================================" print "BATCH ITER #{}".format(batch_count), model_name print "================================================================" print " TRAIN RESULTS" print "================================================================" print " moment matching cost : ", moment_cost_list[-1] print "----------------------------------------------------------------" print " vae cost : ", vae_cost_list[-1] print "================================================================" if batch_count % 100 == 0: # sample data save_as = samples_dir + "/" + model_name + "_SAMPLES{}.png".format(batch_count) sample_data = sampling_function(fixed_hidden_data)[0] sample_data = np.asarray(sample_data) color_grid_vis(inverse_transform(sample_data).transpose([0, 2, 3, 1]), (16, 16), save_as) np.save(file=samples_dir + "/" + model_name + "_MOMENT_COST", arr=np.asarray(moment_cost_list)) np.save(file=samples_dir + "/" + model_name + "_VAE_COST", arr=np.asarray(vae_cost_list))
def train_model(data_stream, model_optimizer, model_config_dict, model_test_name): encoder_model = set_encoder_model(model_config_dict["hidden_size"], model_config_dict["min_num_gen_filters"]) encoder_function = encoder_model[0] encoder_parameters = encoder_model[1] decoder_model = set_decoder_model(model_config_dict["hidden_size"], model_config_dict["min_num_eng_filters"]) decoder_function = decoder_model[0] decoder_parameters = decoder_model[1] # compile functions print "COMPILING UPDATER FUNCTION" t = time() updater_function = set_updater_function( encoder_function=encoder_function, decoder_function=decoder_function, encoder_params=encoder_parameters, decoder_params=decoder_parameters, optimizer=model_optimizer, ) print "%.2f SEC " % (time() - t) print "COMPILING SAMPLING FUNCTION" t = time() sampling_function = set_sampling_function(decoder_function=decoder_function) print "%.2f SEC " % (time() - t) # set fixed hidden data for sampling fixed_hidden_data = floatX(np_rng.normal(size=(model_config_dict["num_display"], model_config_dict["hidden_size"]))) print "START TRAINING" # for each epoch recon_cost_list = [] moment_match_cost_list = [] model_cost_list = [] batch_count = 0 for e in xrange(model_config_dict["epochs"]): # train phase batch_iters = data_stream.get_epoch_iterator() # for each batch for b, batch_data in enumerate(batch_iters): # set update function inputs positive_visible_data = transform(batch_data[0]) negative_hidden_data = floatX( np_rng.normal(size=(positive_visible_data.shape[0], model_config_dict["hidden_size"])) ) moment_cost_weight = 1.0 updater_inputs = [positive_visible_data, negative_hidden_data, moment_cost_weight] updater_outputs = updater_function(*updater_inputs) recon_cost = updater_outputs[0].mean() moment_match_cost = updater_outputs[1].mean() model_cost = updater_outputs[2].mean() recon_cost_list.append(recon_cost) moment_match_cost_list.append(moment_match_cost) model_cost_list.append(model_cost) # batch count up batch_count += 1 if batch_count % 1 == 0: print "================================================================" print "BATCH ITER #{}".format(batch_count), model_test_name print "================================================================" print " TRAIN RESULTS" print "================================================================" print " recon cost : ", recon_cost_list[-1] print "----------------------------------------------------------------" print " moment cost : ", moment_match_cost_list[-1] print "----------------------------------------------------------------" print " model cost : ", model_cost_list[-1] print "================================================================" if batch_count % 100 == 0: # sample data sample_data = sampling_function(fixed_hidden_data)[0] save_as = samples_dir + "/" + model_test_name + "_SAMPLES(NEGATIVE){}.png".format(batch_count) color_grid_vis(inverse_transform(sample_data).transpose([0, 2, 3, 1]), (16, 16), save_as) # save costs np.save(file=samples_dir + "/" + model_test_name + "_recon_cost", arr=np.asarray(recon_cost_list)) np.save( file=samples_dir + "/" + model_test_name + "_moment_cost", arr=np.asarray(moment_match_cost_list) ) np.save(file=samples_dir + "/" + model_test_name + "_model_cost", arr=np.asarray(model_cost_list)) if batch_count % 1000 == 0: save_as = samples_dir + "/" + model_test_name + "_MODEL.pkl" save_model(tensor_params_list=decoder_parameters, save_to=save_as)
def train_model(data_stream, energy_optimizer, generator_optimizer, generator_bn_optimizer, model_config_dict, model_test_name): [generator_function, generator_params, generator_bn_params] = set_generator_model(model_config_dict['hidden_size'], model_config_dict['min_num_gen_filters']) [feature_function, energy_function, energy_params] = set_energy_model(model_config_dict['expert_size'], model_config_dict['min_num_eng_filters']) # compile functions print 'COMPILING ENERGY UPDATER' t=time() energy_updater = set_energy_update_function(feature_function=feature_function, energy_function=energy_function, generator_function=generator_function, energy_params=energy_params, energy_optimizer=energy_optimizer) print '%.2f SEC '%(time()-t) print 'COMPILING GENERATOR UPDATER' t=time() generator_updater = set_generator_update_function(feature_function=feature_function, energy_function=energy_function, generator_function=generator_function, generator_params=generator_params, generator_bn_params=generator_bn_params, generator_optimizer=generator_optimizer, generator_bn_optimizer=generator_bn_optimizer) print '%.2f SEC '%(time()-t) print 'COMPILING SAMPLING FUNCTION' t=time() sampling_function = set_sampling_function(generator_function=generator_function) print '%.2f SEC '%(time()-t) # set fixed hidden data for sampling fixed_hidden_data = floatX(np_rng.uniform(low=-model_config_dict['hidden_distribution'], high=model_config_dict['hidden_distribution'], size=(model_config_dict['num_display'], model_config_dict['hidden_size']))) print 'START TRAINING' # for each epoch input_energy_list = [] sample_energy_list = [] batch_count = 0 for e in xrange(model_config_dict['epochs']): # train phase batch_iters = data_stream.get_epoch_iterator() # for each batch for b, batch_data in enumerate(batch_iters): # set update function inputs input_data = transform(batch_data[0]) num_data = input_data.shape[0] hidden_data = floatX(np_rng.uniform(low=-model_config_dict['hidden_distribution'], high=model_config_dict['hidden_distribution'], size=(num_data, model_config_dict['hidden_size']))) noise_data = floatX(np_rng.normal(scale=0.01*(0.99**int(batch_count/100)), size=(num_data, num_channels, input_shape, input_shape))) updater_inputs = [input_data, hidden_data, noise_data, batch_count] updater_outputs = generator_updater(*updater_inputs) noise_data = floatX(np_rng.normal(scale=0.01*(0.99**int(batch_count/100)), size=(num_data, num_channels, input_shape, input_shape))) updater_inputs = [input_data, hidden_data, noise_data, batch_count] updater_outputs = energy_updater(*updater_inputs) # get output values input_energy = updater_outputs[0].mean() sample_energy = updater_outputs[1].mean() input_energy_list.append(input_energy) sample_energy_list.append(sample_energy) # batch count up batch_count += 1 if batch_count%1==0: print '================================================================' print 'BATCH ITER #{}'.format(batch_count), model_test_name print '================================================================' print ' TRAIN RESULTS' print '================================================================' print ' input energy : ', input_energy_list[-1] print '----------------------------------------------------------------' print ' sample energy : ', sample_energy_list[-1] print '================================================================' if batch_count%1000==0: # sample data [sample_data_t, sample_data_f] = sampling_function(fixed_hidden_data) sample_data_t = np.asarray(sample_data_t) save_as = samples_dir + '/' + model_test_name + '_SAMPLES(TRAIN){}.png'.format(batch_count) color_grid_vis(inverse_transform(sample_data_t).transpose([0,2,3,1]), (16, 16), save_as) sample_data_f = np.asarray(sample_data_f) save_as = samples_dir + '/' + model_test_name + '_SAMPLES(TEST){}.png'.format(batch_count) color_grid_vis(inverse_transform(sample_data_f).transpose([0,2,3,1]), (16, 16), save_as) np.save(file=samples_dir + '/' + model_test_name +'_input_energy', arr=np.asarray(input_energy_list)) np.save(file=samples_dir + '/' + model_test_name +'_sample_energy', arr=np.asarray(sample_energy_list)) save_as = samples_dir + '/' + model_test_name + '_MODEL.pkl' save_model(tensor_params_list=generator_params + generator_bn_params + energy_params, save_to=save_as)
def continue_train_model(last_batch_idx, data_stream, energy_optimizer, generator_optimizer, model_config_dict, model_test_name): model_list = glob.glob(samples_dir +'/*.pkl') # load parameters model_param_dicts = unpickle(model_list[0]) generator_models = load_generator_model(min_num_gen_filters=model_config_dict['min_num_gen_filters'], model_params_dict=model_param_dicts) generator_function = generator_models[0] generator_params = generator_models[1] energy_models = load_energy_model(num_experts=model_config_dict['expert_size'], model_params_dict=model_param_dicts) feature_function = energy_models[0] # norm_function = energy_models[1] expert_function = energy_models[1] # prior_function = energy_models[3] energy_params = energy_models[2] # compile functions print 'COMPILING MODEL UPDATER' t=time() generator_updater, generator_optimizer_params = set_generator_update_function(energy_feature_function=feature_function, # energy_norm_function=norm_function, energy_expert_function=expert_function, # energy_prior_function=prior_function, generator_function=generator_function, generator_params=generator_params, generator_optimizer=generator_optimizer, init_param_dict=model_param_dicts) energy_updater, energy_optimizer_params = set_energy_update_function(energy_feature_function=feature_function, # energy_norm_function=norm_function, energy_expert_function=expert_function, # energy_prior_function=prior_function, generator_function=generator_function, energy_params=energy_params, energy_optimizer=energy_optimizer, init_param_dict=model_param_dicts) print '%.2f SEC '%(time()-t) print 'COMPILING SAMPLING FUNCTION' t=time() sampling_function = set_sampling_function(generator_function=generator_function) print '%.2f SEC '%(time()-t) # set fixed hidden data for sampling fixed_hidden_data = floatX(np_rng.uniform(low=-model_config_dict['hidden_distribution'], high=model_config_dict['hidden_distribution'], size=(model_config_dict['num_display'], model_config_dict['hidden_size']))) print 'START TRAINING' # for each epoch input_energy_list = [] sample_energy_list = [] batch_count = 0 for e in xrange(model_config_dict['epochs']): # train phase batch_iters = data_stream.get_epoch_iterator() # for each batch for b, batch_data in enumerate(batch_iters): # batch count up batch_count += 1 if batch_count<last_batch_idx: continue # set update function inputs input_data = transform(batch_data[0]) num_data = input_data.shape[0] hidden_data = floatX(np_rng.uniform(low=-model_config_dict['hidden_distribution'], high=model_config_dict['hidden_distribution'], size=(num_data, model_config_dict['hidden_size']))) noise_data = floatX(np_rng.normal(scale=0.01, size=input_data.shape)) update_input = [hidden_data, noise_data] update_output = generator_updater(*update_input) entropy_weights = update_output[1].mean() entropy_cost = update_output[2].mean() noise_data = floatX(np_rng.normal(scale=0.01, size=input_data.shape)) update_input = [input_data, hidden_data, noise_data] update_output = energy_updater(*update_input) input_energy = update_output[0].mean() sample_energy = update_output[1].mean() input_energy_list.append(input_energy) sample_energy_list.append(sample_energy) if batch_count%10==0: print '================================================================' print 'BATCH ITER #{}'.format(batch_count), model_test_name print '================================================================' print ' TRAIN RESULTS' print '================================================================' print ' input energy : ', input_energy_list[-1] print '----------------------------------------------------------------' print ' sample energy : ', sample_energy_list[-1] print '----------------------------------------------------------------' print ' entropy weight : ', entropy_weights print '----------------------------------------------------------------' print ' entropy cost : ', entropy_cost print '================================================================' if batch_count%100==0: # sample data sample_data = sampling_function(fixed_hidden_data)[0] sample_data = np.asarray(sample_data) save_as = samples_dir + '/' + model_test_name + '_SAMPLES{}.png'.format(batch_count) color_grid_vis(inverse_transform(sample_data).transpose([0,2,3,1]), (16, 16), save_as) np.save(file=samples_dir + '/' + model_test_name +'_input_energy', arr=np.asarray(input_energy_list)) np.save(file=samples_dir + '/' + model_test_name +'_sample_energy', arr=np.asarray(sample_energy_list)) save_as = samples_dir + '/' + model_test_name + '_MODEL.pkl' save_model(tensor_params_list=generator_params[0] + generator_params[1] + energy_params + generator_optimizer_params + energy_optimizer_params, save_to=save_as)
print X_train.shape, X_valid.shape, X_test.shape print 'COMPILING' t = time() _train = theano.function([X, num_z], decost, updates=gupdates) _reconstruct = theano.function([X], func_res_x) _encoder = theano.function([X], func_z) _decoder = theano.function([func_z], func_res_x) print '%.2f seconds to compile theano functions'%(time()-t) n_updates = 0 t = time() zmb = floatX(np_rng.normal(0, 1, size=(100, nz))) xmb = floatX(shuffle(X_test)[:100]) number_z = 5 for epoch in range(1, niter+niter_decay+1): X_train = shuffle(X_train) logpxz = 0 for imb in tqdm(iter_data(X_train, size=nbatch), total=ntrain/nbatch): imb = floatX(imb) logpxz += _train(imb, number_z) * len(imb) n_updates+=1 print epoch, 'logpxz', logpxz / ntrain
def train_model(data_stream, energy_optimizer, generator_optimizer, model_config_dict, model_test_name): [generator_function, generator_params, generator_entropy_params] = set_generator_model(model_config_dict['hidden_size'], model_config_dict['min_num_gen_filters']) [feature_function, energy_function, energy_params] = set_energy_model(model_config_dict['hidden_size'], model_config_dict['min_num_eng_filters']) # compile functions print 'COMPILING ENERGY UPDATER' t=time() energy_updater = set_energy_update_function(feature_function=feature_function, energy_function=energy_function, generator_function=generator_function, energy_params=energy_params, energy_optimizer=energy_optimizer) print '%.2f SEC '%(time()-t) print 'COMPILING GENERATOR UPDATER' t=time() generator_updater = set_generator_update_function(feature_function=feature_function, energy_function=energy_function, generator_function=generator_function, generator_params=generator_params, generator_entropy_params=generator_entropy_params, generator_optimizer=generator_optimizer) print '%.2f SEC '%(time()-t) print 'COMPILING EVALUATION FUNCTION' t=time() evaluation_function = set_evaluation_and_sampling_function(feature_function=feature_function, energy_function=energy_function, generator_function=generator_function) print '%.2f SEC '%(time()-t) print 'COMPILING SAMPLING FUNCTION' t=time() sampling_function = set_sampling_function(generator_function=generator_function) print '%.2f SEC '%(time()-t) # set fixed hidden data for sampling fixed_hidden_data = floatX(np_rng.uniform(low=-model_config_dict['hidden_distribution'], high=model_config_dict['hidden_distribution'], size=(model_config_dict['num_display'], model_config_dict['hidden_size']))) print 'START TRAINING' # for each epoch input_energy_list = [] sample_energy_list = [] batch_count = 0 for e in xrange(model_config_dict['epochs']): # train phase batch_iters = data_stream.get_epoch_iterator() # for each batch for b, batch_data in enumerate(batch_iters): # set update function inputs input_data = transform(batch_data[0]) num_data = input_data.shape[0] hidden_data = floatX(np_rng.uniform(low=-model_config_dict['hidden_distribution'], high=model_config_dict['hidden_distribution'], size=(num_data, model_config_dict['hidden_size']))) noise_data = np_rng.normal(size=input_data.shape) noise_data = floatX(noise_data*model_config_dict['init_noise']*(model_config_dict['noise_decay']**e)) # update generator generator_update_inputs = [input_data, hidden_data, noise_data, e] [input_energy_val, sample_energy_val, entropy_cost] = generator_updater(*generator_update_inputs) # update energy function energy_update_inputs = [input_data, hidden_data, e] [input_energy_val, sample_energy_val, ] = energy_updater(*energy_update_inputs) # get output values input_energy = input_energy_val.mean() sample_energy = sample_energy_val.mean() input_energy_list.append(input_energy) sample_energy_list.append(sample_energy) # batch count up batch_count += 1 if batch_count%100==0: print '================================================================' print 'BATCH ITER #{}'.format(batch_count), model_test_name print '================================================================' print ' TRAIN RESULTS' print '================================================================' print ' input energy : ', input_energy print '----------------------------------------------------------------' print ' sample energy : ', sample_energy print '----------------------------------------------------------------' print ' entropy cost : ', entropy_cost print '================================================================' if batch_count%1000==0: # sample data save_as = samples_dir + '/' + model_test_name + '_SAMPLES{}.png'.format(batch_count) sample_data = sampling_function(fixed_hidden_data)[0] sample_data = np.asarray(sample_data) color_grid_vis(inverse_transform(sample_data).transpose([0,2,3,1]), (16, 16), save_as) np.save(file=samples_dir + '/' + model_test_name +'_input_energy', arr=np.asarray(input_energy_list)) np.save(file=samples_dir + '/' + model_test_name +'_sample_energy', arr=np.asarray(sample_energy_list))
def train_model(data_stream, model_optimizer, model_config_dict, model_test_name): encoder_model = set_encoder_model(model_config_dict['hidden_size'], model_config_dict['min_num_gen_filters']) encoder_feature_function = encoder_model[0] encoder_mean_function = encoder_model[1] encoder_var_function = encoder_model[2] encoder_parameters = encoder_model[3] decoder_model = set_decoder_model(model_config_dict['hidden_size'], model_config_dict['min_num_eng_filters']) decoder_function = decoder_model[0] decoder_parameters = decoder_model[1] # compile functions print 'COMPILING UPDATER FUNCTION' t=time() updater_function = set_updater_function(encoder_feature_function=encoder_feature_function, encoder_mean_function=encoder_mean_function, encoder_var_function=encoder_var_function, decoder_function=decoder_function, encoder_params=encoder_parameters, decoder_params=decoder_parameters, optimizer=model_optimizer) print '%.2f SEC '%(time()-t) print 'COMPILING SAMPLING FUNCTION' t=time() sampling_function = set_sampling_function(decoder_function=decoder_function) print '%.2f SEC '%(time()-t) # set fixed hidden data for sampling fixed_hidden_data = floatX(np_rng.normal(size=(model_config_dict['num_display'], model_config_dict['hidden_size']))) print 'START TRAINING' # for each epoch vae_cost_list = [] recon_cost_list = [] kl_cost_list = [] moment_match_cost_list = [] batch_count = 0 for e in xrange(model_config_dict['epochs']): # train phase batch_iters = data_stream.get_epoch_iterator() # for each batch for b, batch_data in enumerate(batch_iters): # set update function inputs positive_visible_data = transform(batch_data[0]) positive_hidden_data = floatX(np_rng.normal(size=(positive_visible_data.shape[0], model_config_dict['hidden_size']))) negative_hidden_data = floatX(np_rng.normal(size=(positive_visible_data.shape[0], model_config_dict['hidden_size']))) moment_cost_weight = 1.0 updater_inputs = [positive_visible_data, positive_hidden_data, negative_hidden_data, moment_cost_weight] updater_outputs = updater_function(*updater_inputs) vae_cost = updater_outputs[0].mean() recon_cost = updater_outputs[1].mean() kl_cost = updater_outputs[2].mean() moment_match_cost = updater_outputs[3].mean() recon_samples = updater_outputs[4] vae_cost_list.append(vae_cost) recon_cost_list.append(recon_cost) kl_cost_list.append(kl_cost) moment_match_cost_list.append(moment_match_cost) # batch count up batch_count += 1 if batch_count%10==0: print '================================================================' print 'BATCH ITER #{}'.format(batch_count), model_test_name print '================================================================' print ' TRAIN RESULTS' print '================================================================' print ' vae cost : ', vae_cost_list[-1] print '----------------------------------------------------------------' print ' recon cost : ', recon_cost_list[-1] print '----------------------------------------------------------------' print ' kl cost : ', kl_cost_list[-1] print '----------------------------------------------------------------' print ' moment cost : ', moment_match_cost_list[-1] print '================================================================' if batch_count%500==0: # sample data sample_data = sampling_function(fixed_hidden_data)[0] save_as = samples_dir + '/' + model_test_name + '_SAMPLES(NEGATIVE){}.png'.format(batch_count) color_grid_vis(inverse_transform(sample_data).transpose([0,2,3,1]), (16, 16), save_as) # recon data save_as = samples_dir + '/' + model_test_name + '_ORIGINALS(POSITIVE){}.png'.format(batch_count) color_grid_vis(inverse_transform(positive_visible_data).transpose([0,2,3,1]), (12, 12), save_as) save_as = samples_dir + '/' + model_test_name + '_RECONSTRUCTIONS(POSITIVE){}.png'.format(batch_count) color_grid_vis(inverse_transform(recon_samples).transpose([0,2,3,1]), (12, 12), save_as) # save costs np.save(file=samples_dir + '/' + model_test_name +'_vae_cost', arr=np.asarray(vae_cost_list)) np.save(file=samples_dir + '/' + model_test_name +'_recon_cost', arr=np.asarray(recon_cost_list)) np.save(file=samples_dir + '/' + model_test_name +'_kl_cost', arr=np.asarray(kl_cost_list)) np.save(file=samples_dir + '/' + model_test_name +'_moment_cost', arr=np.asarray(moment_match_cost_list)) if batch_count%1000==0: save_as = samples_dir + '/' + model_test_name + '_MODEL.pkl' save_model(tensor_params_list=decoder_parameters, save_to=save_as)
def __call__(self, shape, name=None): r = np_rng.normal(loc=0, scale=0.01, size=shape) r = r / np.sqrt(np.sum(r**2)) * np.sqrt(shape[1]) return sharedX(r, name=name)
def __call__(self, shape, name=None): return sharedX(np_rng.normal(loc=self.loc, scale=self.scale, size=shape), name=name)
def rand_gen(size): #r_vals = floatX(np_rng.uniform(-1., 1., size=size)) r_vals = floatX(np_rng.normal(size=size)) return r_vals
def train_model(data_stream, energy_optimizer, generator_optimizer, model_config_dict, model_test_name): generator_models = set_generator_model( num_hiddens=model_config_dict["hidden_size"], min_num_gen_filters=model_config_dict["min_num_gen_filters"] ) generator_function = generator_models[0] generator_params = generator_models[1] energy_models = set_energy_model( num_experts=model_config_dict["expert_size"], min_num_eng_filters=model_config_dict["min_num_eng_filters"] ) feature_function = energy_models[0] # norm_function = energy_models[1] expert_function = energy_models[1] # prior_function = energy_models[3] energy_params = energy_models[2] # compile functions print "COMPILING MODEL UPDATER" t = time() generator_updater = set_generator_update_function( energy_feature_function=feature_function, # energy_norm_function=norm_function, energy_expert_function=expert_function, # energy_prior_function=prior_function, generator_function=generator_function, generator_params=generator_params, generator_optimizer=generator_optimizer, ) energy_updater = set_energy_update_function( energy_feature_function=feature_function, # energy_norm_function=norm_function, energy_expert_function=expert_function, # energy_prior_function=prior_function, generator_function=generator_function, energy_params=energy_params, energy_optimizer=energy_optimizer, ) print "%.2f SEC " % (time() - t) print "COMPILING SAMPLING FUNCTION" t = time() sampling_function = set_sampling_function(generator_function=generator_function) print "%.2f SEC " % (time() - t) # set fixed hidden data for sampling fixed_hidden_data = floatX( np_rng.uniform( low=-model_config_dict["hidden_distribution"], high=model_config_dict["hidden_distribution"], size=(model_config_dict["num_display"], model_config_dict["hidden_size"]), ) ) print "START TRAINING" # for each epoch input_energy_list = [] sample_energy_list = [] batch_count = 0 for e in xrange(model_config_dict["epochs"]): # train phase batch_iters = data_stream.get_epoch_iterator() # for each batch for b, batch_data in enumerate(batch_iters): # set update function inputs input_data = transform(batch_data[0]) num_data = input_data.shape[0] hidden_data = floatX( np_rng.uniform( low=-model_config_dict["hidden_distribution"], high=model_config_dict["hidden_distribution"], size=(num_data, model_config_dict["hidden_size"]), ) ) noise_data = floatX(np_rng.normal(scale=0.01, size=input_data.shape)) update_input = [hidden_data, noise_data] update_output = generator_updater(*update_input) entropy_weights = update_output[1].mean() entropy_cost = update_output[2].mean() noise_data = floatX(np_rng.normal(scale=0.01, size=input_data.shape)) update_input = [input_data, hidden_data, noise_data] update_output = energy_updater(*update_input) input_energy = update_output[0].mean() sample_energy = update_output[1].mean() input_energy_list.append(input_energy) sample_energy_list.append(sample_energy) # batch count up batch_count += 1 if batch_count % 10 == 0: print "================================================================" print "BATCH ITER #{}".format(batch_count), model_test_name print "================================================================" print " TRAIN RESULTS" print "================================================================" print " input energy : ", input_energy_list[-1] print "----------------------------------------------------------------" print " sample energy : ", sample_energy_list[-1] print "----------------------------------------------------------------" print " entropy weight : ", entropy_weights print "----------------------------------------------------------------" print " entropy cost : ", entropy_cost print "================================================================" if batch_count % 100 == 0: # sample data sample_data = sampling_function(fixed_hidden_data)[0] sample_data = np.asarray(sample_data) save_as = samples_dir + "/" + model_test_name + "_SAMPLES(TRAIN){}.png".format(batch_count) color_grid_vis(inverse_transform(sample_data).transpose([0, 2, 3, 1]), (16, 16), save_as) np.save(file=samples_dir + "/" + model_test_name + "_input_energy", arr=np.asarray(input_energy_list)) np.save(file=samples_dir + "/" + model_test_name + "_sample_energy", arr=np.asarray(sample_energy_list)) save_as = samples_dir + "/" + model_test_name + "_MODEL.pkl" save_model( tensor_params_list=generator_params[0] + generator_params[1] + energy_params, save_to=save_as )
def train_model(train_stream, valid_stream, energy_optimizer, generator_optimizer, model_config_dict, model_test_name): [generator_function, generator_params] = set_generator_model(model_config_dict['hidden_size'], model_config_dict['min_num_gen_filters']) [feature_function, energy_function, energy_params] = set_energy_model(model_config_dict['hidden_size'], model_config_dict['min_num_eng_filters']) # compile functions print 'COMPILING ENERGY UPDATER' t=time() energy_updater = set_energy_update_function(feature_function=feature_function, energy_function=energy_function, generator_function=generator_function, energy_params=energy_params, energy_optimizer=energy_optimizer) print '%.2f SEC '%(time()-t) print 'COMPILING GENERATOR UPDATER' t=time() generator_updater = set_generator_update_function(feature_function=feature_function, energy_function=energy_function, generator_function=generator_function, generator_params=generator_params, generator_optimizer=generator_optimizer) print '%.2f SEC '%(time()-t) print 'COMPILING EVALUATION FUNCTION' t=time() evaluation_function = set_evaluation_and_sampling_function(feature_function=feature_function, energy_function=energy_function, generator_function=generator_function) print '%.2f SEC '%(time()-t) print 'COMPILING SAMPLING FUNCTION' t=time() sampling_function = set_sampling_function(generator_function=generator_function) print '%.2f SEC '%(time()-t) # set fixed hidden data for sampling fixed_hidden_data = floatX(np_rng.uniform(low=-model_config_dict['hidden_distribution'], high=model_config_dict['hidden_distribution'], size=(model_config_dict['num_display'], model_config_dict['hidden_size']))) print 'START TRAINING' # for each epoch for e in xrange(model_config_dict['epochs']): # train phase epoch_train_input_energy = 0. epoch_train_sample_energy = 0. epoch_train_count = 0. train_batch_iters = train_stream.get_epoch_iterator() # for each batch for b, train_batch_data in enumerate(train_batch_iters): # set update function inputs input_data = transform(train_batch_data[0]) num_data = input_data.shape[0] hidden_data = floatX(np_rng.uniform(low=-model_config_dict['hidden_distribution'], high=model_config_dict['hidden_distribution'], size=(num_data, model_config_dict['hidden_size']))) noise_data = np_rng.normal(size=input_data.shape) noise_data = floatX(noise_data*model_config_dict['init_noise']*(model_config_dict['noise_decay']**e)) # update generator generator_update_inputs = [input_data, hidden_data, noise_data, e] [input_energy_val, sample_energy_val, ] = generator_updater(*generator_update_inputs) # update energy function energy_update_inputs = [input_data, hidden_data, e] [input_energy_val, sample_energy_val, ] = energy_updater(*energy_update_inputs) # get output values epoch_train_input_energy += input_energy_val.mean() epoch_train_sample_energy += sample_energy_val.mean() epoch_train_count += 1. epoch_train_input_energy /= epoch_train_count epoch_train_sample_energy /= epoch_train_count # validation phase epoch_valid_input_energy = 0. epoch_valid_sample_energy = 0. epoch_valid_count = 0. valid_batch_iters = valid_stream.get_epoch_iterator() for b, valid_batch_data in enumerate(valid_batch_iters): # set function inputs input_data = transform(valid_batch_data[0]) num_data = input_data.shape[0] hidden_data = floatX(np_rng.uniform(low=-model_config_dict['hidden_distribution'], high=model_config_dict['hidden_distribution'], size=(num_data, model_config_dict['hidden_size']))) # evaluate model evaluation_input = [input_data, hidden_data] outputs = evaluation_function(*evaluation_input) epoch_valid_input_energy += outputs[0].mean() epoch_valid_sample_energy += outputs[1].mean() epoch_valid_count += 1. epoch_valid_input_energy /= epoch_valid_count epoch_valid_sample_energy /= epoch_valid_count print '================================================================' print 'EPOCH #{}'.format(e), model_test_name print '================================================================' print ' TRAIN RESULTS' print '================================================================' print ' input energy : ', epoch_train_input_energy print '----------------------------------------------------------------' print ' sample energy : ', epoch_train_sample_energy print '================================================================' print ' VALID RESULTS' print '================================================================' print ' input energy : ', epoch_valid_input_energy print '----------------------------------------------------------------' print ' sample energy : ', epoch_valid_sample_energy print '================================================================' # # plot curve data # save_as = model_test_name + '_ENERGY_CURVE.png' # plot_learning_curve(cost_values=[train_input_energy, # train_sample_energy, # valid_input_energy, # valid_sample_energy], # cost_names=['Input Energy (train)', # 'Sample Energy (train)', # 'Input Energy (valid)', # 'Sample Energy (valid)'], # save_as=save_as) # sample data save_as = samples_dir + '/' + model_test_name + '_SAMPLES{}.png'.format(e+1) sample_data = sampling_function(fixed_hidden_data)[0] sample_data = np.asarray(sample_data) color_grid_vis(inverse_transform(sample_data).transpose([0,2,3,1]), (16, 16), save_as)