def test_model(model_config_dict, model_test_name): import glob model_list = glob.glob(samples_dir +'/*.pkl') # load parameters model_param_dicts = unpickle(model_list[0]) # load generator generator_models = load_generator_model(min_num_gen_filters=model_config_dict['min_num_gen_filters'], model_params_dict=model_param_dicts) generator_function = generator_models[0] print 'COMPILING SAMPLING FUNCTION' t=time() sampling_function = set_sampling_function(generator_function=generator_function) print '%.2f SEC '%(time()-t) print 'START SAMPLING' for s in xrange(model_config_dict['num_sampling']): print '{} sampling'.format(s) hidden_data = floatX(np_rng.uniform(low=-model_config_dict['hidden_distribution'], high=model_config_dict['hidden_distribution'], size=(model_config_dict['num_display'], model_config_dict['hidden_size']))) sample_data = sampling_function(hidden_data)[0] sample_data = inverse_transform(np.asarray(sample_data)).transpose([0,2,3,1]) save_as = samples_dir + '/' + model_test_name + '_SAMPLES(TRAIN){}.png'.format(s+1) color_grid_vis(sample_data, (16, 16), save_as)
def train_vgan(fns, iter_data, opt, **kwargs): desc = opt['desc'] samples_dir = 'samples/%s' % desc if not os.path.exists(samples_dir): os.makedirs(samples_dir) for sample_imb in iter_data(subset='test', size=100): break color_grid_vis(sample_imb.transpose(0, 2, 3, 1), (10, 10), 'samples/%s/inputs.png' % (desc)) print desc.upper() sys.stdout.flush() n_updates = 0 n_updates = 0 n_examples = 0 t = time() energy_x = 0 energy_gx = 0 entropy_x = 0 entropy_gx = 0 for epoch in range(opt['niter'] + opt['niterdecay']): if epoch <= opt['niter']: lr = opt['lr'] else: lr = opt['lr'] * (opt['niter'] + opt['niterdecay'] - epoch + 1.) / opt['niterdecay'] for imb in iter_data(size=2 * opt['nbatch'], shuffle=True): this_x = imb[:len(imb) / 2] this_z = imb[len(imb) / 2:] for _ in range(opt['k']): this_energy_x, this_energy_gx = fns['train_g'](this_x, this_z, lr) energy_gx = 0.9 * energy_gx + 0.1 * this_energy_gx this_energy_x, this_energy_gx = fns['train_d'](this_x, this_z, lr) energy_x = 0.9 * energy_x + 0.1 * this_energy_x energy_gx = 0.9 * energy_gx + 0.1 * this_energy_gx n_updates += 1 n_examples += len(imb) samples = np.asarray(fns['gen'](sample_imb)) color_grid_vis(samples.transpose(0, 2, 3, 1), (10, 10), 'samples/%s/%d.png' % (desc, epoch)) recons = np.asarray(fns['recon'](sample_imb)) color_grid_vis(recons.transpose(0, 2, 3, 1), (10, 10), 'samples/%s/recon%d.png' % (desc, epoch)) print 'epoch %d, energy_gx %.4f' % (epoch, energy_gx) print 'epoch %d, energy_x %.4f\n' % (epoch, energy_x) sys.stdout.flush()
def train_vgan(fns, iter_data, opt, **kwargs): desc = opt['desc'] samples_dir = 'samples/%s'%desc if not os.path.exists(samples_dir): os.makedirs(samples_dir) for sample_imb in iter_data(subset='test', size=100): break color_grid_vis(sample_imb.transpose(0, 2, 3, 1), (10, 10), 'samples/%s/inputs.png'%(desc)) print desc.upper() sys.stdout.flush() n_updates = 0 n_updates = 0 n_examples = 0 t = time() energy_x = 0 energy_gx = 0 entropy_x = 0 entropy_gx = 0 for epoch in range(opt['niter'] + opt['niterdecay']): if epoch <= opt['niter']: lr = opt['lr'] else: lr = opt['lr'] * (opt['niter'] + opt['niterdecay'] - epoch + 1.) / opt['niterdecay'] for imb in iter_data(size=2*opt['nbatch'], shuffle=True): this_x = imb[:len(imb)/2] this_z = imb[len(imb)/2:] for _ in range(opt['k']): this_energy_x, this_energy_gx = fns['train_g'](this_x, this_z, lr) energy_gx = 0.9 * energy_gx + 0.1 * this_energy_gx this_energy_x, this_energy_gx = fns['train_d'](this_x, this_z, lr) energy_x = 0.9 * energy_x + 0.1 * this_energy_x energy_gx = 0.9 * energy_gx + 0.1 * this_energy_gx n_updates += 1 n_examples += len(imb) samples = np.asarray(fns['gen'](sample_imb)) color_grid_vis(samples.transpose(0, 2, 3, 1), (10, 10), 'samples/%s/%d.png'%(desc, epoch)) recons = np.asarray(fns['recon'](sample_imb)) color_grid_vis(recons.transpose(0, 2, 3, 1), (10, 10), 'samples/%s/recon%d.png'%(desc, epoch)) print 'epoch %d, energy_gx %.4f' % (epoch, energy_gx) print 'epoch %d, energy_x %.4f\n' % (epoch, energy_x) sys.stdout.flush()
def train_model(model_name, data_stream, num_hiddens, num_epochs, optimizer): # set models print "LOADING VGG" t = time() feature_extractor, encoder_feat_params = load_vgg_feature_extractor() encoder_mean, encoder_mean_params = set_encoder_mean_model(num_hiddens) encoder_variance, encoder_var_params = set_encoder_variance_model(num_hiddens) print "%.2f SEC " % (time() - t) sample_generator, generator_parameters = set_generator_model(num_hiddens) print "COMPILING UPDATER AND SAMPLER" t = time() updater_function = set_updater_function( feature_extractor, encoder_mean, encoder_variance, sample_generator, encoder_feat_params + encoder_mean_params + encoder_var_params, generator_parameters, optimizer, ) sampling_function = set_sampling_function(sample_generator) print "%.2f SEC " % (time() - t) # set fixed hidden data for sampling fixed_hidden_data = floatX(np_rng.normal(size=(16 * 16, num_hiddens))) print "START TRAINING" # for each epoch moment_cost_list = [] vae_cost_list = [] batch_count = 0 for e in xrange(num_epochs): # train phase batch_iters = data_stream.get_epoch_iterator() # for each batch for b, batch_data in enumerate(batch_iters): # set update function inputs input_data = transform(batch_data[0]) positive_random = floatX(np_rng.normal(size=(input_data.shape[0], num_hiddens))) negative_hidden = floatX(np_rng.normal(size=(input_data.shape[0], num_hiddens))) updater_inputs = [input_data, positive_random, negative_hidden] updater_outputs = updater_function(*updater_inputs) moment_cost_list.append(updater_outputs[0]) vae_cost_list.append(updater_outputs[1].mean()) # batch count up batch_count += 1 if batch_count % 10 == 0: print "================================================================" print "BATCH ITER #{}".format(batch_count), model_name print "================================================================" print " TRAIN RESULTS" print "================================================================" print " moment matching cost : ", moment_cost_list[-1] print "----------------------------------------------------------------" print " vae cost : ", vae_cost_list[-1] print "================================================================" if batch_count % 100 == 0: # sample data save_as = samples_dir + "/" + model_name + "_SAMPLES{}.png".format(batch_count) sample_data = sampling_function(fixed_hidden_data)[0] sample_data = np.asarray(sample_data) color_grid_vis(inverse_transform(sample_data).transpose([0, 2, 3, 1]), (16, 16), save_as) np.save(file=samples_dir + "/" + model_name + "_MOMENT_COST", arr=np.asarray(moment_cost_list)) np.save(file=samples_dir + "/" + model_name + "_VAE_COST", arr=np.asarray(vae_cost_list))
def train_model(data_stream, model_optimizer, model_config_dict, model_test_name): encoder_model = set_encoder_model(model_config_dict["hidden_size"], model_config_dict["min_num_gen_filters"]) encoder_function = encoder_model[0] encoder_parameters = encoder_model[1] decoder_model = set_decoder_model(model_config_dict["hidden_size"], model_config_dict["min_num_eng_filters"]) decoder_function = decoder_model[0] decoder_parameters = decoder_model[1] # compile functions print "COMPILING UPDATER FUNCTION" t = time() updater_function = set_updater_function( encoder_function=encoder_function, decoder_function=decoder_function, encoder_params=encoder_parameters, decoder_params=decoder_parameters, optimizer=model_optimizer, ) print "%.2f SEC " % (time() - t) print "COMPILING SAMPLING FUNCTION" t = time() sampling_function = set_sampling_function(decoder_function=decoder_function) print "%.2f SEC " % (time() - t) # set fixed hidden data for sampling fixed_hidden_data = floatX(np_rng.normal(size=(model_config_dict["num_display"], model_config_dict["hidden_size"]))) print "START TRAINING" # for each epoch recon_cost_list = [] moment_match_cost_list = [] model_cost_list = [] batch_count = 0 for e in xrange(model_config_dict["epochs"]): # train phase batch_iters = data_stream.get_epoch_iterator() # for each batch for b, batch_data in enumerate(batch_iters): # set update function inputs positive_visible_data = transform(batch_data[0]) negative_hidden_data = floatX( np_rng.normal(size=(positive_visible_data.shape[0], model_config_dict["hidden_size"])) ) moment_cost_weight = 1.0 updater_inputs = [positive_visible_data, negative_hidden_data, moment_cost_weight] updater_outputs = updater_function(*updater_inputs) recon_cost = updater_outputs[0].mean() moment_match_cost = updater_outputs[1].mean() model_cost = updater_outputs[2].mean() recon_cost_list.append(recon_cost) moment_match_cost_list.append(moment_match_cost) model_cost_list.append(model_cost) # batch count up batch_count += 1 if batch_count % 1 == 0: print "================================================================" print "BATCH ITER #{}".format(batch_count), model_test_name print "================================================================" print " TRAIN RESULTS" print "================================================================" print " recon cost : ", recon_cost_list[-1] print "----------------------------------------------------------------" print " moment cost : ", moment_match_cost_list[-1] print "----------------------------------------------------------------" print " model cost : ", model_cost_list[-1] print "================================================================" if batch_count % 100 == 0: # sample data sample_data = sampling_function(fixed_hidden_data)[0] save_as = samples_dir + "/" + model_test_name + "_SAMPLES(NEGATIVE){}.png".format(batch_count) color_grid_vis(inverse_transform(sample_data).transpose([0, 2, 3, 1]), (16, 16), save_as) # save costs np.save(file=samples_dir + "/" + model_test_name + "_recon_cost", arr=np.asarray(recon_cost_list)) np.save( file=samples_dir + "/" + model_test_name + "_moment_cost", arr=np.asarray(moment_match_cost_list) ) np.save(file=samples_dir + "/" + model_test_name + "_model_cost", arr=np.asarray(model_cost_list)) if batch_count % 1000 == 0: save_as = samples_dir + "/" + model_test_name + "_MODEL.pkl" save_model(tensor_params_list=decoder_parameters, save_to=save_as)
def run(self): parser = argparse.ArgumentParser() parser.add_argument("--gendim", type = int, default = 100) #parser.add_argument("--dataset", type = str, default = 'stl10') parser.add_argument("--batch_size", type = int, default = 128) parser.add_argument("--n_epochs", type = int, default = 100) parser.add_argument("--k_iter", type = int, default = 1) parser.add_argument("--monitor_size", type = int, default = 196) parser.add_argument("--init_scale", type = float, default = 0.02) parser.add_argument("--folds", type = int, default = 5) parser.add_argument("--valid_fold", type = int, default = 0) parser.add_argument("--iter_save", type = int, default = 100) parser.add_argument('--classify', action='store_true') parser.add_argument("--img_size", type = int, default = 64) args = parser.parse_args() print args gen_dim = args.gendim n_epochs = args.n_epochs batch_size = args.batch_size #dataset = args.dataset k_iter = args.k_iter monitor_size = args.monitor_size init_scale = args.init_scale folds = args.folds valid_fold = args.valid_fold iter_save = args.iter_save classify = args.classify img_size = args.img_size if classify: from src.gan_class import GAN_trainer else: from src.gan import GAN_trainer model = self.model_module.GAN_model(img_shape=(img_size,img_size),gen_dim=gen_dim,init_scale=init_scale) trainer = GAN_trainer(model) data = dataset.stl10() desc = 'dcgan' model_dir = 'models/%s'%desc samples_dir = 'samples/%s'%desc if not os.path.exists('logs/'): os.makedirs('logs/') if not os.path.exists(model_dir): os.makedirs(model_dir) if not os.path.exists(samples_dir): os.makedirs(samples_dir) X_sample = data.get_unlab_batch(0,monitor_size) X_sample = data.center_crop(X_sample,img_size) color_grid_vis(X_sample.transpose(0, 2, 3, 1), (14, 14), 'samples/%s_etl_test.png'%desc) Z_sample = floatX(np_rng.uniform(-1., 1., size=(monitor_size, model.gen_dim))) print desc.upper() print "starting training" with open('errors.log', 'w') as f: f.write('# iter data_seen epoch dis_loss g_loss') if classify: f.write(' c_loss c_val_err c_test_err\n') else: f.write('\n') if classify: with open('best.log', 'w') as f: f.write('# iter data_seen epoch c_val_err c_test_err\n') n_iter = n_epochs*(data.unlab_size/batch_size+1) best_err = 1e6 last_it = 0 t = time() for it in xrange(n_iter): epoch = it*batch_size/data.unlab_size X_batch = data.get_unlab_batch(it,batch_size) X_batch = data.scale_data(data.center_crop(X_batch,img_size)) Z_batch = floatX(np_rng.uniform(-1., 1., size=(len(X_batch), model.gen_dim))) gen_loss = trainer.train_generator_on_batch(Z_batch) dis_loss = trainer.train_discriminator_on_batch(X_batch, Z_batch) if classify: X_batch, y_batch = data.get_train_batch(it,batch_size) X_batch = data.scale_data(data.center_crop(X_batch,img_size)) cls_loss = trainer.train_classifier_on_batch(X_batch, y_batch) if (it % iter_save == 0) or (it % 10 == 0 and it < iter_save): if classify: cls_test_err = 0.0 for it2 in xrange(data.test_size/batch_size): X_batch, y_batch = data.get_test_batch(it2,batch_size) X_batch = data.scale_data(data.center_crop(X_batch,img_size)) cls_test_err += trainer._cls_error(X_batch, y_batch) cls_test_err /= data.test_size/batch_size cls_valid_err = 0.0 for it2 in xrange(data.valid_size/batch_size): X_batch, y_batch = data.get_valid_batch(it2,batch_size) X_batch = data.scale_data(data.center_crop(X_batch,img_size)) cls_valid_err += trainer._cls_error(X_batch, y_batch) cls_valid_err /= data.valid_size/batch_size samples = np.asarray(trainer._gen(Z_sample)) color_grid_vis(data.inv_scale_data(samples).transpose(0, 2, 3, 1), (14, 14), 'samples/%s/%d.png'%(desc, it)) with open('errors.log', 'a') as f: f.write( " ".join(map(str, (it,it*batch_size,epoch) ))+" ") f.write( " ".join(map(str, (dis_loss,gen_loss) ))+" ") if classify: f.write( " ".join(map(str, (cls_loss,cls_valid_err,cls_test_err) ))+"\n") else: f.write("\n") if classify and cls_valid_err<best_err: best_err = cls_valid_err with open('best.log', 'a') as f: f.write( " ".join(map(str, (it,it*batch_size,epoch) ))+" ") f.write( " ".join(map(str, (cls_valid_err,cls_test_err) ))+"\n") model.dump('models/%s/best_gen_params.jl'%(desc)) t2 = time()-t t += t2 print "iter:%d/%d; epoch:%d; %f sec. per iteration"%(it,n_iter,epoch,t2/(1+it-last_it)) last_it = it+1 if epoch in [1, 2, 3, 4, 5, 10, 15, 20, 25, 50, 75, 100, 200, n_epochs]: if (it*batch_size)%data.unlab_size<batch_size: model_dir = 'models/%s/%d'%(desc, it) if not os.path.exists(model_dir): os.makedirs(model_dir) model.dump('%s/params.jl'%(model_dir)) model_dir = 'models/%s/last'%(desc) if not os.path.exists(model_dir): os.makedirs(model_dir) model.dump('%s/params.jl' % (model_dir))
def train_model(data_stream, energy_optimizer, generator_optimizer, model_config_dict, model_test_name): generator_models = set_generator_model( num_hiddens=model_config_dict["hidden_size"], min_num_gen_filters=model_config_dict["min_num_gen_filters"] ) generator_function = generator_models[0] generator_params = generator_models[1] energy_models = set_energy_model( num_experts=model_config_dict["expert_size"], min_num_eng_filters=model_config_dict["min_num_eng_filters"] ) feature_function = energy_models[0] # norm_function = energy_models[1] expert_function = energy_models[1] # prior_function = energy_models[3] energy_params = energy_models[2] # compile functions print "COMPILING MODEL UPDATER" t = time() generator_updater = set_generator_update_function( energy_feature_function=feature_function, # energy_norm_function=norm_function, energy_expert_function=expert_function, # energy_prior_function=prior_function, generator_function=generator_function, generator_params=generator_params, generator_optimizer=generator_optimizer, ) energy_updater = set_energy_update_function( energy_feature_function=feature_function, # energy_norm_function=norm_function, energy_expert_function=expert_function, # energy_prior_function=prior_function, generator_function=generator_function, energy_params=energy_params, energy_optimizer=energy_optimizer, ) print "%.2f SEC " % (time() - t) print "COMPILING SAMPLING FUNCTION" t = time() sampling_function = set_sampling_function(generator_function=generator_function) print "%.2f SEC " % (time() - t) # set fixed hidden data for sampling fixed_hidden_data = floatX( np_rng.uniform( low=-model_config_dict["hidden_distribution"], high=model_config_dict["hidden_distribution"], size=(model_config_dict["num_display"], model_config_dict["hidden_size"]), ) ) print "START TRAINING" # for each epoch input_energy_list = [] sample_energy_list = [] batch_count = 0 for e in xrange(model_config_dict["epochs"]): # train phase batch_iters = data_stream.get_epoch_iterator() # for each batch for b, batch_data in enumerate(batch_iters): # set update function inputs input_data = transform(batch_data[0]) num_data = input_data.shape[0] hidden_data = floatX( np_rng.uniform( low=-model_config_dict["hidden_distribution"], high=model_config_dict["hidden_distribution"], size=(num_data, model_config_dict["hidden_size"]), ) ) noise_data = floatX(np_rng.normal(scale=0.01, size=input_data.shape)) update_input = [hidden_data, noise_data] update_output = generator_updater(*update_input) entropy_weights = update_output[1].mean() entropy_cost = update_output[2].mean() noise_data = floatX(np_rng.normal(scale=0.01, size=input_data.shape)) update_input = [input_data, hidden_data, noise_data] update_output = energy_updater(*update_input) input_energy = update_output[0].mean() sample_energy = update_output[1].mean() input_energy_list.append(input_energy) sample_energy_list.append(sample_energy) # batch count up batch_count += 1 if batch_count % 10 == 0: print "================================================================" print "BATCH ITER #{}".format(batch_count), model_test_name print "================================================================" print " TRAIN RESULTS" print "================================================================" print " input energy : ", input_energy_list[-1] print "----------------------------------------------------------------" print " sample energy : ", sample_energy_list[-1] print "----------------------------------------------------------------" print " entropy weight : ", entropy_weights print "----------------------------------------------------------------" print " entropy cost : ", entropy_cost print "================================================================" if batch_count % 100 == 0: # sample data sample_data = sampling_function(fixed_hidden_data)[0] sample_data = np.asarray(sample_data) save_as = samples_dir + "/" + model_test_name + "_SAMPLES(TRAIN){}.png".format(batch_count) color_grid_vis(inverse_transform(sample_data).transpose([0, 2, 3, 1]), (16, 16), save_as) np.save(file=samples_dir + "/" + model_test_name + "_input_energy", arr=np.asarray(input_energy_list)) np.save(file=samples_dir + "/" + model_test_name + "_sample_energy", arr=np.asarray(sample_energy_list)) save_as = samples_dir + "/" + model_test_name + "_MODEL.pkl" save_model( tensor_params_list=generator_params[0] + generator_params[1] + energy_params, save_to=save_as )
kld_qtiles = np.percentile(vae_klds, [50., 80., 90., 95.]) str4 = " [q50, q80, q90, q95, max](vae-kld): {0:.2f}, {1:.2f}, {2:.2f}, {3:.2f}, {4:.2f}".format( kld_qtiles[0], kld_qtiles[1], kld_qtiles[2], kld_qtiles[3], np.max(vae_klds)) kld_strs = ["{0:s}: {1:.2f},".format(ln, lk) for ln, lk in zip(vae_layer_names, epoch_layer_klds)] str5 = " module kld -- {}".format(" ".join(kld_strs)) joint_str = "\n".join([str1, str2, str3, str4, str5]) print(joint_str) out_file.write(joint_str + "\n") out_file.flush() ###################### # DRAW SOME PICTURES # ###################### if (epoch < 20) or (((epoch - 1) % 20) == 0): # sample some reconstructions directly from the conditional model xg_gen, xm_gen, xg_inf, xm_inf = make_model_input(Xtr[:100, :]) xg_rec = sample_func(xg_gen, xm_gen, inf_gen_model) # stripe data for nice display (each reconstruction next to its target) tr_vis_batch = np.zeros((200, nc, npx, npx)) for rec_pair in range(100): idx_in = 2 * rec_pair idx_out = 2 * rec_pair + 1 tr_vis_batch[idx_in, :, :, :] = xg_gen[rec_pair, :, :, :] tr_vis_batch[idx_out, :, :, :] = xg_rec[rec_pair, :, :, :] # draw images... color_grid_vis(draw_transform(tr_vis_batch), (10, 20), "{}/gen_tr_{}.png".format(result_dir, epoch)) ############## # EYE BUFFER # ##############
def run(self): parser = argparse.ArgumentParser() parser.add_argument("--model", type=str) parser.add_argument("--gendim", type=int, default=100) parser.add_argument("--batch_size", type=int, default=128) parser.add_argument("--n_samples_row", type=int, default=16) parser.add_argument("--n_iter", type=int, default=100000) parser.add_argument("--iter_save", type=int, default=5000) parser.add_argument("--img_size", type = int, default = 64) args = parser.parse_args() print args model_file = args.model gen_dim = args.gendim n_iter = args.n_iter batch_size = args.batch_size n_samples_row = args.n_samples_row #dataset = args.dataset iter_save = args.iter_save img_size = args.img_size data = dataset.stl10() model = self.model_module.GAN_model(img_shape=(img_size,img_size), gen_dim=gen_dim) model.load(model_file) desc = 'dcgan' model_dir = 'models/%s'%desc samples_dir = 'samples/%s'%desc if not os.path.exists('logs/'): os.makedirs('logs/') if not os.path.exists(model_dir): os.makedirs(model_dir) if not os.path.exists(samples_dir): os.makedirs(samples_dir) reconstructor = partial_reconstructor(model, self.layer, self.layer_shape, batch_size) print "starting training" best_err = 1e6 last_it = 0 t = time() reconstructor.set_H_uniform() X_batch, _ = data.get_test_batch(0, batch_size) X_batch = self.process_data(X_batch) X_batch = data.scale_data(data.center_crop(X_batch, img_size)) color_grid_vis(data.inv_scale_data(X_batch).transpose(0, 2, 3, 1), (batch_size/n_samples_row, n_samples_row), 'samples/%s/reconstruction_objective.png' % (desc)) for it in xrange(n_iter): loss = reconstructor.train_h_on_batch(X_batch) if (it % iter_save == 0) or (it % 1000 == 0 and it < iter_save): samples = reconstructor.reconstruct() color_grid_vis(data.inv_scale_data(samples).transpose(0, 2, 3, 1), (batch_size/n_samples_row, n_samples_row), 'samples/%s/reconstruction_%d.png' % (desc, it)) joblib.dump(reconstructor.get_H_value(), 'models/%s/H_%d.jl' % (desc, it)) with open('rec_errors.log', 'a') as f: f.write( " ".join(map(str, (it,it*batch_size) ))+" ") f.write( " ".join(str(loss))+"\n") t2 = time()-t t += t2 print "iter:%d/%d; %f sec. per iteration"%(it,n_iter,t2/(1+it-last_it)) last_it = it+1 samples = reconstructor.reconstruct() color_grid_vis(data.inv_scale_data(samples).transpose(0, 2, 3, 1), (batch_size / n_samples_row, n_samples_row), 'samples/%s/reconstruction_last.png' % (desc)) joblib.dump(reconstructor.get_H_value(), 'models/%s/H_last.jl' % (desc))
d_updater = updates.Adam(lr=lrt, b1=b1, regularizer=updates.Regularizer(l2=l2)) g_updater = updates.Adam(lr=lrt, b1=b1, regularizer=updates.Regularizer(l2=l2)) d_updates = d_updater(discrim_params, d_cost) g_updates = g_updater(gen_params, g_cost) updates = d_updates + g_updates print 'COMPILING' t = time() _train_g = theano.function([X, Z], cost, updates=g_updates) _train_d = theano.function([X, Z], cost, updates=d_updates) _gen = theano.function([Z], gX) print '%.2f seconds to compile theano functions' % (time() - t) vis_idxs = py_rng.sample(np.arange(len(vaX)), nvis) vaX_vis = inverse_transform(vaX[vis_idxs]) color_grid_vis(vaX_vis, (14, 14), 'samples/%s_etl_test.png' % desc) sample_zmb = floatX(np_rng.uniform(-1., 1., size=(nvis, nz))) def gen_samples(n, nbatch=128): samples = [] n_gen = 0 for i in range(n / nbatch): zmb = floatX(np_rng.uniform(-1., 1., size=(nbatch, nz))) xmb = _gen(zmb) samples.append(xmb) n_gen += len(xmb) n_left = n - n_gen zmb = floatX(np_rng.uniform(-1., 1., size=(n_left, nz))) xmb = _gen(zmb)
def draw_transform(X): # transform vectorized observations into drawable images X = (X + 1.0) * 127.0 return X.reshape(-1, nc, npx, npx).transpose(0, 2, 3, 1) def rand_gen(size): #r_vals = floatX(np_rng.uniform(-1., 1., size=size)) r_vals = floatX(np_rng.normal(size=size)) return r_vals # draw some examples from training set color_grid_vis(draw_transform(Xtr[0:200]), (10, 20), "{}/Xtr.png".format(sample_dir)) tanh = activations.Tanh() sigmoid = activations.Sigmoid() bce = T.nnet.binary_crossentropy gifn = inits.Normal(scale=0.02) difn = inits.Normal(scale=0.02) # # Define some modules to use in the generator # gen_module_1 = \ GenUniModule( rand_dim=nz0, out_dim=(ngf*8*2*2),
print '%.2f seconds to compile theano functions.' % ( time( ) - t ) # DO THE JOB. numVal = 0 save_dir = './save/' t = time( ) num_batch = int( np.ceil( len( sset_val) / float( batch_size))) for bi in range( num_batch ): bis = bi * batch_size bie = min( bi*batch_size + batch_size, len( sset_val) ) this_bsize = bie - bis Pb = pset_val[ bis:bie ] ISb = ims_LAB[ sset_val[ bis:bie] ] ISb_sr = np.zeros( ISb.shape, ISb.dtype ) for b in range (this_bsize): iid = tset_val[ np_rng.choice( ( pset_val != Pb[b] ).nonzero( )[ 0 ], 1) ] ISb_sr[b] = ims_LAB[ iid ] ISb_input,Gnd = ConvertGenInput(ISb, ISb_sr) results = _test_ced(ISb_input) numVal += 1 color_grid_vis( itf( ConvertGenOutput( Gnd, results), npx, 'LAB' ), ( nvis, nvis ), os.path.join( save_dir, 'VAL%03d.png' %numVal)) color_grid_vis( itf( ISb, npx, 'LAB'), ( nvis, nvis ), os.path.join( save_dir, 'PER%03d.png' %numVal)) color_grid_vis( itf( ISb_sr, npx, 'LAB'), ( nvis, nvis ), os.path.join( save_dir, 'CLO%03d.png' %numVal)) print( 'Test) Iteration : %d, (BATCHSIZE : %d of %d' %( numVal, bi, num_batch))
vis_tr = np_rng.permutation( len( sset_tr ) ) vis_tr = vis_tr[ 0 : nvis ** 2 ] vis_ims_tr_s = ims_LAB[ sset_tr[ vis_tr ] ] vis_ims_tr_t = ims_LAB[ tset_tr[ vis_tr ] ] vis_tr_input, vis_tr_gnd = ConvertGenInput(vis_ims_tr_s, vis_ims_tr_t) vis_ims_tr_t_hat = _test_ced( vis_tr_input ) #pdb.set_trace() #color_grid_vis( itf( vis_ims_tr_s ,npx, 'LAB'), # ( nvis, nvis), # os.path.join( sample_dir, 'TR_S1.png') ) #color_grid_vis( itf( vis_ims_tr_t ,npx, 'LAB'), # ( nvis, nvis), # os.path.join( sample_dir, 'TR_S2.png') ) color_grid_vis( itf( MakeVisual( vis_ims_tr_s, vis_ims_tr_t ), npx, 'LAB'), ( nvis, nvis), os.path.join( sample_dir, 'TR_S.png') ) color_grid_vis( itf( ConvertGenOutput( vis_tr_gnd, vis_ims_tr_s),npx,'LAB' ), (nvis, nvis), os.path.join( sample_dir, 'TR_T.png') ) color_grid_vis( itf( ConvertGenOutput(vis_ims_tr_s, vis_ims_tr_t_hat),npx,'LAB' ), (nvis, nvis), os.path.join( sample_dir, 'TR000T.png') ) vis_val = np_rng.permutation( len( sset_val ) ) vis_val = vis_val[ 0 : nvis ** 2 ] vis_ims_val_s = ims_LAB[ sset_val[ vis_val ] ] vis_ims_val_t = ims_LAB[ tset_val[ vis_val ] ] vis_val_input, vis_val_gnd = ConvertGenInput(vis_ims_val_s, vis_ims_val_t) vis_ims_val_t_hat = _test_ced( vis_val_input )
def discrim(X, w, w2, g2, b2, w3, g3, b3, w4, g4, b4, w5, g5, b5, w6, g6, b6, wy): h = lrelu(dnn_conv(X, w, subsample=(1, 1), border_mode=(1, 1))) h2 = lrelu(batchnorm(dnn_conv(h, w2, subsample=(2, 2), border_mode=(1, 1)), g=g2, b=b2)) h3 = lrelu(batchnorm(dnn_conv(h2, w3, subsample=(1, 1), border_mode=(1, 1)), g=g3, b=b3)) h4 = lrelu(batchnorm(dnn_conv(h3, w4, subsample=(2, 2), border_mode=(1, 1)), g=g4, b=b4)) h5 = lrelu(batchnorm(dnn_conv(h4, w5, subsample=(1, 1), border_mode=(1, 1)), g=g5, b=b5)) h6 = lrelu(batchnorm(dnn_conv(h5, w6, subsample=(2, 2), border_mode=(1, 1)), g=g6, b=b6)) h6 = T.flatten(h6, 2) y = sigmoid(T.dot(h6, wy)) return y def inverse_transform(X): X = (X.reshape(-1, nc, npx, npx).transpose(0, 2, 3, 1)+1.)/2. return X Z = T.matrix() X = T.tensor4() gX = gen(Z, *gen_params) dX = discrim(X, *discrim_params) _gen = theano.function([Z], gX) _discrim = theano.function([X], dX) sample_zmb = floatX(np_rng.uniform(-1., 1., size=(400, 256))) samples = _gen(sample_zmb) scores = _discrim(samples) sort = np.argsort(scores.flatten())[::-1] samples = samples[sort] color_grid_vis(inverse_transform(samples), (20, 20), 'samples.png')
desc = 'dcgan' model_dir = 'models/%s'%desc samples_dir = 'samples/%s'%desc if not os.path.exists('logs/'): os.makedirs('logs/') if not os.path.exists(model_dir): os.makedirs(model_dir) if not os.path.exists(samples_dir): os.makedirs(samples_dir) X_sample = data.get_unlab_batch(0,monitor_size) X_sample = data.center_crop(X_sample,img_size) color_grid_vis(X_sample.transpose(0, 2, 3, 1), (14, 14), 'samples/%s_etl_test.png'%desc) Z_sample = floatX(np_rng.uniform(-1., 1., size=(monitor_size, model.gen_dim))) print desc.upper() print "starting training" with open('errors.log', 'w') as f: f.write('# iter data_seen epoch dis_loss g_loss') f.write(' c_loss c_val_err c_test_err\n') with open('best.log', 'w') as f: f.write('# iter data_seen epoch c_val_err c_test_err\n')
'n_updates', 'n_examples', 'n_seconds', '1k_va_nnd', '10k_va_nnd', '100k_va_nnd', 'g_cost', 'd_cost', ] tr_data, te_data, tr_stream, val_stream, te_stream = faces(ntrain=ntrain) # Only tr_data/tr_stream are used. tr_handle = tr_data.open() vaX, = tr_data.get_data(tr_handle, slice(0, 10000)) vaX = transform(vaX) vis_idxs = py_rng.sample(np.arange(len(vaX)), nvis) vaX_vis = inverse_transform(vaX[vis_idxs]) color_grid_vis(vaX_vis, (14, 14), 'samples/%s_etl_test.png'%desc) sample_zmb = floatX(np_rng.uniform(-1., 1., size=(nvis, nz))) vaX = vaX.reshape(len(vaX), -1) # DEFINE NETWORKS. relu = activations.Rectify() sigmoid = activations.Sigmoid() lrelu = activations.LeakyRectify() tanh = activations.Tanh() bce = T.nnet.binary_crossentropy gifn = inits.Normal(scale=0.02) difn = inits.Normal(scale=0.02) gain_ifn = inits.Normal(loc=1., scale=0.02) bias_ifn = inits.Constant(c=0.) gw = gifn((nz, ngf*8*4*4), 'gw') gg = gain_ifn((ngf*8*4*4), 'gg')
idxs = idx * batch_size idxe = min( idx * batch_size + batch_size, ims.shape[ 0 ] ) ITb = transform( ims[ np.arange( idxs, idxe ) ], npx_t ) Zb = floatX( np_rng.uniform( -1., 1., size = ( len( ITb ), nz, 1, 1 ) ) ) if num_update % 2 == 0: cost = _train_c( Zb, ITb ) else: cost = _train_d( Zb, ITb ) num_update += 1 num_example += len( Zb ) c_cost = float( cost[ 0 ] ) d_cost = float( cost[ 1 ] ) if np.mod( idx, num_batches / 20 ) == 0: prog = np.round( idx * 100. / num_batches ) print( 'Epoch %02d: %03d%% (batch %06d / %06d), c_cost = %.4f, d_cost = %.4f' % ( num_epoch, prog, idx + 1, num_batches, c_cost, d_cost ) ) # Leave logs. c_cost = float( cost[ 0 ] ) d_cost = float( cost[ 1 ] ) log = [ num_epoch, num_update, num_example, time( ) - t, c_cost, d_cost ] f_log.write( json.dumps( dict( zip( log_fields, log ) ) ) + '\n' ) f_log.flush( ) # Sample visualization. IT_hat_vis = np.asarray( _convert( Zb_vis ) ) color_grid_vis( inverse_transform( IT_hat_vis, npx_t ), ( nvis, nvis ), os.path.join( sample_dir, '%03d.png' % num_epoch ) ) # Save network. print( 'Epoch %02d: Save.' % num_epoch ) np.save( mpath_c, [ p.get_value( ) for p in converter_params ] ) np.save( mpath_d, [ p.get_value( ) for p in discrim_params ] )
def train_model(train_stream, valid_stream, energy_optimizer, generator_optimizer, model_config_dict, model_test_name): [generator_function, generator_params] = set_generator_model(model_config_dict['hidden_size'], model_config_dict['min_num_gen_filters']) [feature_function, energy_function, energy_params] = set_energy_model(model_config_dict['hidden_size'], model_config_dict['min_num_eng_filters']) # compile functions print 'COMPILING ENERGY UPDATER' t=time() energy_updater = set_energy_update_function(feature_function=feature_function, energy_function=energy_function, generator_function=generator_function, energy_params=energy_params, energy_optimizer=energy_optimizer) print '%.2f SEC '%(time()-t) print 'COMPILING GENERATOR UPDATER' t=time() generator_updater = set_generator_update_function(feature_function=feature_function, energy_function=energy_function, generator_function=generator_function, generator_params=generator_params, generator_optimizer=generator_optimizer) print '%.2f SEC '%(time()-t) print 'COMPILING EVALUATION FUNCTION' t=time() evaluation_function = set_evaluation_and_sampling_function(feature_function=feature_function, energy_function=energy_function, generator_function=generator_function) print '%.2f SEC '%(time()-t) print 'COMPILING SAMPLING FUNCTION' t=time() sampling_function = set_sampling_function(generator_function=generator_function) print '%.2f SEC '%(time()-t) # set fixed hidden data for sampling fixed_hidden_data = floatX(np_rng.uniform(low=-model_config_dict['hidden_distribution'], high=model_config_dict['hidden_distribution'], size=(model_config_dict['num_display'], model_config_dict['hidden_size']))) print 'START TRAINING' # for each epoch for e in xrange(model_config_dict['epochs']): # train phase epoch_train_input_energy = 0. epoch_train_sample_energy = 0. epoch_train_count = 0. train_batch_iters = train_stream.get_epoch_iterator() # for each batch for b, train_batch_data in enumerate(train_batch_iters): # set update function inputs input_data = transform(train_batch_data[0]) num_data = input_data.shape[0] hidden_data = floatX(np_rng.uniform(low=-model_config_dict['hidden_distribution'], high=model_config_dict['hidden_distribution'], size=(num_data, model_config_dict['hidden_size']))) noise_data = np_rng.normal(size=input_data.shape) noise_data = floatX(noise_data*model_config_dict['init_noise']*(model_config_dict['noise_decay']**e)) # update generator generator_update_inputs = [input_data, hidden_data, noise_data, e] [input_energy_val, sample_energy_val, ] = generator_updater(*generator_update_inputs) # update energy function energy_update_inputs = [input_data, hidden_data, e] [input_energy_val, sample_energy_val, ] = energy_updater(*energy_update_inputs) # get output values epoch_train_input_energy += input_energy_val.mean() epoch_train_sample_energy += sample_energy_val.mean() epoch_train_count += 1. epoch_train_input_energy /= epoch_train_count epoch_train_sample_energy /= epoch_train_count # validation phase epoch_valid_input_energy = 0. epoch_valid_sample_energy = 0. epoch_valid_count = 0. valid_batch_iters = valid_stream.get_epoch_iterator() for b, valid_batch_data in enumerate(valid_batch_iters): # set function inputs input_data = transform(valid_batch_data[0]) num_data = input_data.shape[0] hidden_data = floatX(np_rng.uniform(low=-model_config_dict['hidden_distribution'], high=model_config_dict['hidden_distribution'], size=(num_data, model_config_dict['hidden_size']))) # evaluate model evaluation_input = [input_data, hidden_data] outputs = evaluation_function(*evaluation_input) epoch_valid_input_energy += outputs[0].mean() epoch_valid_sample_energy += outputs[1].mean() epoch_valid_count += 1. epoch_valid_input_energy /= epoch_valid_count epoch_valid_sample_energy /= epoch_valid_count print '================================================================' print 'EPOCH #{}'.format(e), model_test_name print '================================================================' print ' TRAIN RESULTS' print '================================================================' print ' input energy : ', epoch_train_input_energy print '----------------------------------------------------------------' print ' sample energy : ', epoch_train_sample_energy print '================================================================' print ' VALID RESULTS' print '================================================================' print ' input energy : ', epoch_valid_input_energy print '----------------------------------------------------------------' print ' sample energy : ', epoch_valid_sample_energy print '================================================================' # # plot curve data # save_as = model_test_name + '_ENERGY_CURVE.png' # plot_learning_curve(cost_values=[train_input_energy, # train_sample_energy, # valid_input_energy, # valid_sample_energy], # cost_names=['Input Energy (train)', # 'Sample Energy (train)', # 'Input Energy (valid)', # 'Sample Energy (valid)'], # save_as=save_as) # sample data save_as = samples_dir + '/' + model_test_name + '_SAMPLES{}.png'.format(e+1) sample_data = sampling_function(fixed_hidden_data)[0] sample_data = np.asarray(sample_data) color_grid_vis(inverse_transform(sample_data).transpose([0,2,3,1]), (16, 16), save_as)
def mnistGANcond(): """ This example loads the 32x32 imagenet model used in the paper, generates 400 random samples, and sorts them according to the discriminator's probability of being real and renders them to the file samples.png """ nc = 1 npx = 28 ngf = 64 # # of gen filters in first conv layer ndf = 128 ny = 10 # # of classes nz = 100 # # of dim for Z k = 1 # # of discrim updates for each gen update l2 = 2.5e-5 # l2 weight decay b1 = 0.5 # momentum term of adam nc = 1 # # of channels in image ny = 10 # # of classes nbatch = 128 # # of examples in batch npx = 28 # # of pixels width/height of images nz = 100 # # of dim for Z ngfc = 1024 # # of gen units for fully connected layers ndfc = 1024 # # of discrim units for fully connected layers ngf = 64 # # of gen filters in first conv layer ndf = 64 # # of discrim filters in first conv layer nx = npx * npx * nc # # of dimensions in X niter = 100 # # of iter at starting learning rate niter_decay = 100 # # of iter to linearly decay learning rate to zero lr = 0.0002 relu = activations.Rectify() sigmoid = activations.Sigmoid() lrelu = activations.LeakyRectify() tanh = activations.Tanh() model_path = 'dcgan_code-master/mnist/models/cond_dcgan/' gen_params = [ sharedX(p) for p in joblib.load(model_path + '200_gen_params.jl') ] discrim_params = [ sharedX(p) for p in joblib.load(model_path + '200_discrim_params.jl') ] def gen(Z, Y, w, w2, w3, wx): yb = Y.dimshuffle(0, 1, 'x', 'x') Z = T.concatenate([Z, Y], axis=1) h = relu(batchnorm(T.dot(Z, w))) h = T.concatenate([h, Y], axis=1) h2 = relu(batchnorm(T.dot(h, w2))) h2 = h2.reshape((h2.shape[0], ngf * 2, 7, 7)) h2 = conv_cond_concat(h2, yb) h3 = relu( batchnorm(deconv(h2, w3, subsample=(2, 2), border_mode=(2, 2)))) h3 = conv_cond_concat(h3, yb) x = sigmoid(deconv(h3, wx, subsample=(2, 2), border_mode=(2, 2))) return x def discrim(X, Y, w, w2, w3, wy): yb = Y.dimshuffle(0, 1, 'x', 'x') X = conv_cond_concat(X, yb) h = lrelu(dnn_conv(X, w, subsample=(2, 2), border_mode=(2, 2))) h = conv_cond_concat(h, yb) h2 = lrelu( batchnorm(dnn_conv(h, w2, subsample=(2, 2), border_mode=(2, 2)))) h2 = T.flatten(h2, 2) h2 = T.concatenate([h2, Y], axis=1) h3 = lrelu(batchnorm(T.dot(h2, w3))) h3 = T.concatenate([h3, Y], axis=1) y = sigmoid(T.dot(h3, wy)) return y def inverse_transform(X): X = (X.reshape(-1, nc, npx, npx).transpose(0, 2, 3, 1) + 1.) / 2. return X Z = T.matrix() X = T.tensor4() Y = T.matrix() gX = gen(Z, Y, *gen_params) dX = discrim(X, Y, *discrim_params) _gen = theano.function([Z, Y], gX) _discrim = theano.function([X, Y], dX) sample_zmb = floatX(np_rng.uniform(-1., 1., size=(200, nz))) sample_ymb = floatX( OneHot( np.asarray([[i for _ in range(20)] for i in range(10)]).flatten(), ny)) samples = _gen(sample_zmb, sample_ymb) scores = _discrim(samples, sample_ymb) print(scores[1:10]) sort = np.argsort(scores.flatten())[::-1] samples = samples[sort] print(np.shape(inverse_transform(samples))) print(min(scores)) print(max(scores)) color_grid_vis(inverse_transform(samples), (20, 20), 'samples.png') return inverse_transform(samples), sample_ymb
os.makedirs(samples_dir) layer = 1 layer_shape = (model.ngf*2*2*2, img_size/(2*2*2*model.visible_subsamp[0]), img_size/(2*2*2*model.visible_subsamp[1])) reconstructor = partial_reconstructor(model, layer, layer_shape, batch_size) print "starting training" best_err = 1e6 last_it = 0 t = time() reconstructor.set_H_uniform() X_batch, _ = data.get_test_batch(0, batch_size) X_batch = data.scale_data(data.center_crop(X_batch, img_size)) color_grid_vis(data.inv_scale_data(X_batch).transpose(0, 2, 3, 1), (batch_size/n_samples_row, n_samples_row), 'samples/%s/reconstruction_objective.png' % (desc)) for it in xrange(n_iter): loss = reconstructor.train_h_on_batch(X_batch) if (it % iter_save == 0) or (it % 1000 == 0 and it < iter_save): samples = reconstructor.reconstruct() color_grid_vis(data.inv_scale_data(samples).transpose(0, 2, 3, 1), (batch_size/n_samples_row, n_samples_row), 'samples/%s/reconstruction_%d.png' % (desc, it)) with open('rec_errors.log', 'a') as f: f.write( " ".join(map(str, (it,it*batch_size) ))+" ") f.write( " ".join(str(loss))+"\n") t2 = time()-t
xmb, pg, pc = _genscore(zmb) pgs.append(pg) pcs.append(pc) for i in range(args.batch): if pg[i] >= args.generate_d and pc[i] >= args.generate_c: zmbs.append(zmb[i]) samples.append(xmb[i]) t.next() if len(zmbs) >= nvis: break pgs = np.concatenate(pgs) pcs = np.concatenate(pcs) print 'generate_d',pgs.mean(),pgs.std(),'generate_c',pcs.mean(),pcs.std() samples = np.asarray(samples) color_grid_vis(inverse_transform(samples), (nvis2, nvis2), '%s/Z_%03d.png'%(samples_dir,0)) if args.generate_v is None: sample_zmb0 = np.array(zmbs) sample_zmb1 = np.roll(sample_zmb0, 1, axis=0) for i in tqdm(range(1,ngif)): z = abs(1.-2.*i/(ngif-1.)) # from 1 to 0 and back to almost 1 sample_zmb = z * sample_zmb0 + (1-z) * sample_zmb1 samples = np.asarray(_gen(sample_zmb)) color_grid_vis(inverse_transform(samples), (nvis2, nvis2), '%s/Z_%03d.png'%(samples_dir,i)) else: sample_zmb = np.array(zmbs) v = gen_z(nvis) for i in tqdm(range(1,ngif)): sample_zmb += args.generate_v * v
cost_batch_vgd = _reconstruction_cost(floatX(samples)) cost_batch_data = _reconstruction_cost(imb) # weight decay decay = 1.0 - np.maximum(1. * (epoch - 50) / (niter - 50), 0.) g_lrt.set_value(floatX(g_lr * decay)) d_lrt.set_value(floatX(d_lr * decay)) if cost_batch_data > cost_batch_vgd: d_lrt.set_value(floatX(5. * d_lrt.get_value())) balance_weight.set_value(0.3) else: balance_weight.set_value(0.1) # Freezing learning if cost_batch_vgd > cost_batch_data + .5: n_updates = n_updates + k + 1 - (n_updates) % (k + 1) samples = np.asarray(_gen(sample_zmb, sample_ymb)) color_grid_vis(inverse_transform(samples), (10, 20), 'samples/%s/vgd_gan-%d.png' % (desc, epoch)) if (epoch + 1) % 20 == 0: joblib.dump([p.get_value() for p in gen_params], 'models/%s/%d_gen_params.jl' % (desc, epoch)) joblib.dump([p.get_value() for p in discrim_params], 'models/%s/%d_discrim_params.jl' % (desc, epoch)) print '%.2f seconds to train the generative model' % (time() - t) print 'DONE'
zmb = floatX(np_rng.uniform(-1., 1., size=(imb.shape[0], nz))) samples = floatX(_gen(zmb)) grad, vgd_grad, dxkxy = _svgd_gradient(samples) _train_g(zmb, floatX(vgd_grad)) _train_d(imb, samples) n_updates += 1 cost_batch_vgd = _reconstruction_cost(floatX(samples)) cost_batch_data = _reconstruction_cost(imb) if n_updates % 50 == 0: print desc, cost_batch_data, cost_batch_vgd if cost_batch_data > cost_batch_vgd: d_lrt.set_value(5e-4) else: d_lrt.set_value(1e-4) color_grid_vis(inverse_transform(_ae(imb)), (10, 10), 'samples/%s/ae-%d.png'%(desc, epoch)) samples = np.asarray(_gen(sample_zmb)) color_grid_vis(inverse_transform(samples), (14, 14), 'samples/%s/gan-%d.png' % (desc, epoch)) if epoch % 2 == 0: joblib.dump([p.get_value() for p in gen_params], 'models/%s/%d_gen_params.jl'%(desc, epoch)) joblib.dump([p.get_value() for p in discrim_params], 'models/%s/%d_discrim_params.jl'%(desc, epoch)) print '%.2f seconds to train the generative model' % (time()-t) print 'DONE'
X = T.tensor4() Xx = X.astype('float32') gX = gen(Zz, *gen_params32) #%% dX = discrim(Xx, *discrim_params) _gen = theano.function([Zz], gX) _discrim = theano.function([Xx], dX) sample_zmb = floatX(np_rng.uniform(-1., 1., size=(400, 256))) samples = _gen(sample_zmb) scores = _discrim(samples) sort = np.argsort(scores.flatten())[::-1] samples = samples[sort] color_grid_vis(inverse_transform(samples), (20, 20), 'samples.png') #%% #%% def calculate_b_u_b_s(X, g=None, b=None, u=None, s=None, a=1., e=1e-8): if X.ndim == 4: if u is not None and s is not None: b_u = u.dimshuffle('x', 0, 'x', 'x') b_s = s.dimshuffle('x', 0, 'x', 'x') else: b_u = np.mean(X, axis=[0, 2, 3]).dimshuffle('x', 0, 'x', 'x') b_s = np.mean(T.sqr(X - b_u), axis=[0, 2, 3]).dimshuffle('x', 0, 'x', 'x') if a != 1: b_u = (1. - a) * 0. + a * b_u
n_updates += 1 cost_batch_vgd = _reconstruction_cost(floatX(samples)) cost_batch_data = _reconstruction_cost(imb) if n_updates % 10 == 0: print desc, cost_batch_data, cost_batch_vgd if cost_batch_data > cost_batch_vgd: balance_weight.set_value(0.3) else: balance_weight.set_value(0.1) samples = np.asarray(_gen(sample_zmb, sample_ymb)) color_grid_vis(inverse_transform(samples), (10, 20), 'samples/%s/gan-%d.png' % (desc, epoch)) color_grid_vis(inverse_transform(_reconstruction(imb)), (10, 10), 'samples/%s/ae-%d.png' % (desc, epoch)) n_epochs += 1 if epoch % 50 == 0: joblib.dump([p.get_value() for p in gen_params], 'models/%s/%d_gen_params.jl' % (desc, epoch)) joblib.dump([p.get_value() for p in discrim_params], 'models/%s/%d_discrim_params.jl' % (desc, epoch)) print '%.2f seconds to train the generative model' % (time() - t) print 'DONE'
def train_model(data_stream, energy_optimizer, generator_optimizer, generator_bn_optimizer, model_config_dict, model_test_name): [generator_function, generator_params, generator_bn_params] = set_generator_model(model_config_dict['hidden_size'], model_config_dict['min_num_gen_filters']) [feature_function, energy_function, energy_params] = set_energy_model(model_config_dict['expert_size'], model_config_dict['min_num_eng_filters']) # compile functions print 'COMPILING ENERGY UPDATER' t=time() energy_updater = set_energy_update_function(feature_function=feature_function, energy_function=energy_function, generator_function=generator_function, energy_params=energy_params, energy_optimizer=energy_optimizer) print '%.2f SEC '%(time()-t) print 'COMPILING GENERATOR UPDATER' t=time() generator_updater = set_generator_update_function(feature_function=feature_function, energy_function=energy_function, generator_function=generator_function, generator_params=generator_params, generator_bn_params=generator_bn_params, generator_optimizer=generator_optimizer, generator_bn_optimizer=generator_bn_optimizer) print '%.2f SEC '%(time()-t) print 'COMPILING SAMPLING FUNCTION' t=time() sampling_function = set_sampling_function(generator_function=generator_function) print '%.2f SEC '%(time()-t) # set fixed hidden data for sampling fixed_hidden_data = floatX(np_rng.uniform(low=-model_config_dict['hidden_distribution'], high=model_config_dict['hidden_distribution'], size=(model_config_dict['num_display'], model_config_dict['hidden_size']))) print 'START TRAINING' # for each epoch input_energy_list = [] sample_energy_list = [] batch_count = 0 for e in xrange(model_config_dict['epochs']): # train phase batch_iters = data_stream.get_epoch_iterator() # for each batch for b, batch_data in enumerate(batch_iters): # set update function inputs input_data = transform(batch_data[0]) num_data = input_data.shape[0] hidden_data = floatX(np_rng.uniform(low=-model_config_dict['hidden_distribution'], high=model_config_dict['hidden_distribution'], size=(num_data, model_config_dict['hidden_size']))) noise_data = floatX(np_rng.normal(scale=0.01*(0.99**int(batch_count/100)), size=(num_data, num_channels, input_shape, input_shape))) updater_inputs = [input_data, hidden_data, noise_data, batch_count] updater_outputs = generator_updater(*updater_inputs) noise_data = floatX(np_rng.normal(scale=0.01*(0.99**int(batch_count/100)), size=(num_data, num_channels, input_shape, input_shape))) updater_inputs = [input_data, hidden_data, noise_data, batch_count] updater_outputs = energy_updater(*updater_inputs) # get output values input_energy = updater_outputs[0].mean() sample_energy = updater_outputs[1].mean() input_energy_list.append(input_energy) sample_energy_list.append(sample_energy) # batch count up batch_count += 1 if batch_count%1==0: print '================================================================' print 'BATCH ITER #{}'.format(batch_count), model_test_name print '================================================================' print ' TRAIN RESULTS' print '================================================================' print ' input energy : ', input_energy_list[-1] print '----------------------------------------------------------------' print ' sample energy : ', sample_energy_list[-1] print '================================================================' if batch_count%1000==0: # sample data [sample_data_t, sample_data_f] = sampling_function(fixed_hidden_data) sample_data_t = np.asarray(sample_data_t) save_as = samples_dir + '/' + model_test_name + '_SAMPLES(TRAIN){}.png'.format(batch_count) color_grid_vis(inverse_transform(sample_data_t).transpose([0,2,3,1]), (16, 16), save_as) sample_data_f = np.asarray(sample_data_f) save_as = samples_dir + '/' + model_test_name + '_SAMPLES(TEST){}.png'.format(batch_count) color_grid_vis(inverse_transform(sample_data_f).transpose([0,2,3,1]), (16, 16), save_as) np.save(file=samples_dir + '/' + model_test_name +'_input_energy', arr=np.asarray(input_energy_list)) np.save(file=samples_dir + '/' + model_test_name +'_sample_energy', arr=np.asarray(sample_energy_list)) save_as = samples_dir + '/' + model_test_name + '_MODEL.pkl' save_model(tensor_params_list=generator_params + generator_bn_params + energy_params, save_to=save_as)
def train_model(data_stream, model_optimizer, model_config_dict, model_test_name): encoder_model = set_encoder_model(model_config_dict['hidden_size'], model_config_dict['min_num_gen_filters']) encoder_feature_function = encoder_model[0] encoder_mean_function = encoder_model[1] encoder_var_function = encoder_model[2] encoder_parameters = encoder_model[3] decoder_model = set_decoder_model(model_config_dict['hidden_size'], model_config_dict['min_num_eng_filters']) decoder_function = decoder_model[0] decoder_parameters = decoder_model[1] # compile functions print 'COMPILING UPDATER FUNCTION' t=time() updater_function = set_updater_function(encoder_feature_function=encoder_feature_function, encoder_mean_function=encoder_mean_function, encoder_var_function=encoder_var_function, decoder_function=decoder_function, encoder_params=encoder_parameters, decoder_params=decoder_parameters, optimizer=model_optimizer) print '%.2f SEC '%(time()-t) print 'COMPILING SAMPLING FUNCTION' t=time() sampling_function = set_sampling_function(decoder_function=decoder_function) print '%.2f SEC '%(time()-t) # set fixed hidden data for sampling fixed_hidden_data = floatX(np_rng.normal(size=(model_config_dict['num_display'], model_config_dict['hidden_size']))) print 'START TRAINING' # for each epoch vae_cost_list = [] recon_cost_list = [] kl_cost_list = [] moment_match_cost_list = [] batch_count = 0 for e in xrange(model_config_dict['epochs']): # train phase batch_iters = data_stream.get_epoch_iterator() # for each batch for b, batch_data in enumerate(batch_iters): # set update function inputs positive_visible_data = transform(batch_data[0]) positive_hidden_data = floatX(np_rng.normal(size=(positive_visible_data.shape[0], model_config_dict['hidden_size']))) negative_hidden_data = floatX(np_rng.normal(size=(positive_visible_data.shape[0], model_config_dict['hidden_size']))) moment_cost_weight = 1.0 updater_inputs = [positive_visible_data, positive_hidden_data, negative_hidden_data, moment_cost_weight] updater_outputs = updater_function(*updater_inputs) vae_cost = updater_outputs[0].mean() recon_cost = updater_outputs[1].mean() kl_cost = updater_outputs[2].mean() moment_match_cost = updater_outputs[3].mean() recon_samples = updater_outputs[4] vae_cost_list.append(vae_cost) recon_cost_list.append(recon_cost) kl_cost_list.append(kl_cost) moment_match_cost_list.append(moment_match_cost) # batch count up batch_count += 1 if batch_count%10==0: print '================================================================' print 'BATCH ITER #{}'.format(batch_count), model_test_name print '================================================================' print ' TRAIN RESULTS' print '================================================================' print ' vae cost : ', vae_cost_list[-1] print '----------------------------------------------------------------' print ' recon cost : ', recon_cost_list[-1] print '----------------------------------------------------------------' print ' kl cost : ', kl_cost_list[-1] print '----------------------------------------------------------------' print ' moment cost : ', moment_match_cost_list[-1] print '================================================================' if batch_count%500==0: # sample data sample_data = sampling_function(fixed_hidden_data)[0] save_as = samples_dir + '/' + model_test_name + '_SAMPLES(NEGATIVE){}.png'.format(batch_count) color_grid_vis(inverse_transform(sample_data).transpose([0,2,3,1]), (16, 16), save_as) # recon data save_as = samples_dir + '/' + model_test_name + '_ORIGINALS(POSITIVE){}.png'.format(batch_count) color_grid_vis(inverse_transform(positive_visible_data).transpose([0,2,3,1]), (12, 12), save_as) save_as = samples_dir + '/' + model_test_name + '_RECONSTRUCTIONS(POSITIVE){}.png'.format(batch_count) color_grid_vis(inverse_transform(recon_samples).transpose([0,2,3,1]), (12, 12), save_as) # save costs np.save(file=samples_dir + '/' + model_test_name +'_vae_cost', arr=np.asarray(vae_cost_list)) np.save(file=samples_dir + '/' + model_test_name +'_recon_cost', arr=np.asarray(recon_cost_list)) np.save(file=samples_dir + '/' + model_test_name +'_kl_cost', arr=np.asarray(kl_cost_list)) np.save(file=samples_dir + '/' + model_test_name +'_moment_cost', arr=np.asarray(moment_match_cost_list)) if batch_count%1000==0: save_as = samples_dir + '/' + model_test_name + '_MODEL.pkl' save_model(tensor_params_list=decoder_parameters, save_to=save_as)
samples = _gen(zmb) vgd_grad = _vgd_gradient(samples, samples) if n_updates % (k + 1) == 0: _train_g(zmb, floatX(vgd_grad)) else: _train_d(imb, samples) n_updates += 1 cost_batch_vgd = _reconstruction_cost(floatX(samples)) cost_batch_data = _reconstruction_cost(imb) if cost_batch_data > cost_batch_vgd: d_lrt.set_value(5e-4) else: d_lrt.set_value(1e-4) samples = np.asarray(_gen(sample_zmb)) color_grid_vis(inverse_transform(samples), (14, 14), 'samples/%s/SteinGAN-%d.png' % (desc, epoch)) if epoch + 1 % 5 == 0: joblib.dump([p.get_value() for p in gen_params], 'models/%s/%d_gen_params.jl' % (desc, epoch)) joblib.dump([p.get_value() for p in discrim_params], 'models/%s/%d_discrim_params.jl' % (desc, epoch)) print '%.2f seconds to train the generative model' % (time() - t) print 'DONE'
def train_model(model_name, data_stream, num_hiddens, num_epochs, generator_optimizer): # set models print 'LOADING VGG' t=time() feature_extractor = load_vgg_feature_extractor() print '%.2f SEC '%(time()-t) sample_generator , generator_parameters = set_generator_model(num_hiddens) print 'COMPILING UPDATER AND SAMPLER' t=time() updater_function = set_updater_function(feature_extractor, sample_generator, generator_parameters, generator_optimizer) sampling_function = set_sampling_function(sample_generator) print '%.2f SEC '%(time()-t) # set fixed hidden data for sampling fixed_hidden_data = floatX(np_rng.uniform(low=-1.0, high=1.0, size=(16*16, num_hiddens))) print 'START TRAINING' # for each epoch moment_cost_list = [] batch_count = 0 for e in xrange(num_epochs): # train phase batch_iters = data_stream.get_epoch_iterator() # for each batch for b, batch_data in enumerate(batch_iters): # set update function inputs input_data = transform(batch_data[0]) hidden_data = floatX(np_rng.uniform(low=-1.0, high=1.0, size=(input_data.shape[0], num_hiddens))) updater_inputs = [input_data, hidden_data] updater_outputs = updater_function(*updater_inputs) moment_cost_list.append(updater_outputs[0]) # batch count up batch_count += 1 if batch_count%10==0: print '================================================================' print 'BATCH ITER #{}'.format(batch_count), model_name print '================================================================' print ' TRAIN RESULTS' print '================================================================' print ' moment matching cost : ', moment_cost_list[-1] print '================================================================' if batch_count%100==0: # sample data save_as = samples_dir + '/' + model_name + '_SAMPLES{}.png'.format(batch_count) sample_data = sampling_function(fixed_hidden_data)[0] sample_data = np.asarray(sample_data) color_grid_vis(inverse_transform(sample_data).transpose([0,2,3,1]), (16, 16), save_as) np.save(file=samples_dir + '/' + model_name +'_MOMENT_COST', arr=np.asarray(moment_cost_list))
def main(dataset='cifar10', option='stochatic-16-8', arch='nin', vis_layer=0, lr=1, num_epochs=200, lowerlr_at=-1, load_model='none', **kwargs): print "Loading data..." data = load_data(dataset) X_train = data['X_train'] Y_train = data['Y_train'] X_test = data['X_test'] Y_test = data['Y_test'] ny = {'cifar10': 10, 'cifar100': 100, 'svhn': 10, 'food101': 101} iterate_minibatches = get_iterator(dataset) input_var, target_var, network = build_net(option=option, arch=arch, ny=ny[dataset], visualize=True) pred_net = network[0] inv_net = network[vis_layer + 1] if num_epochs: params = lasagne.layers.get_all_params(pred_net, trainable=True) print 'loading from', load_model sys.stdout.flush() with np.load(load_model) as f: param_values = [f['arr_%d' % i] for i in range(len(f.files))] lasagne.layers.set_all_param_values(pred_net, param_values) sh_lr = theano.shared(lasagne.utils.floatX(lr)) recon = get_output(inv_net, deterministic=True, batch_norm_use_averages=False, batch_norm_update_averages=True) inv_params = get_all_params(inv_net, trainable=True) inv_params = [p for p in inv_params if p not in params] loss = lasagne.objectives.squared_error(input_var, recon).mean(axis=0).sum() updates = lasagne.updates.adadelta(loss, inv_params, learning_rate=sh_lr) train_fn = theano.function([input_var], loss, updates=updates) recon_static = get_output(inv_net, deterministic=True) recon_stochastic = get_output(inv_net, deterministic=False, batch_norm_use_averages=True, batch_norm_update_averages=False) static_recon_fn = theano.function([input_var], recon_static) stochastic_recon_fn = theano.function([input_var], recon_stochastic) if num_epochs: # launch the training loop print "Starting training..." sys.stdout.flush() # We iterate over epochs: for epoch in range(num_epochs): if epoch == lowerlr_at: new_lr = sh_lr.get_value() * 0.1 print "reducing lr to " + str(new_lr) sh_lr.set_value(lasagne.utils.floatX(new_lr)) train_err = 0 train_batches = 0 start_time = time.time() for batch in iterate_minibatches(X_train, Y_train, 128, flip=False, crop=False): inputs, targets = batch err = train_fn(inputs) train_err += err train_batches += 1 # Then we print the results for this epoch: print "Epoch {} of {} took {:.3f}s".format( epoch + 1, num_epochs, time.time() - start_time) print " training loss:\t\t{:.6f}".format(train_err / train_batches) sys.stdout.flush() sample = [] for i in range(ny[dataset]): idx = np.where(Y_test[:1000] == i)[0][:5] sample.append(X_test[idx]) sample = np.concatenate(sample, axis=0) color_grid_vis(sample.transpose(0, 2, 3, 1), (10, 5), arch + '_sample%d.png' % (vis_layer)) color_grid_vis( static_recon_fn(sample).transpose(0, 2, 3, 1), (10, 5), arch + '_static_recon%d.png' % (vis_layer)) color_grid_vis( stochastic_recon_fn(sample).transpose(0, 2, 3, 1), (10, 5), arch + '_stochastic_recon%d.png' % (vis_layer))
desc = ( sys.argv[ 0 ][ 0 : -3 ] ).upper( ) model_dir = os.path.join( dataout, desc, 'models'.upper( ) ) sample_dir = os.path.join( dataout, desc, 'samples'.upper( ) ) if not os.path.exists( model_dir ): os.makedirs( model_dir ) if not os.path.exists( sample_dir ): os.makedirs( sample_dir ) # PLOT SOURCE/TARGET SAMPLE IMAGES. np_rng = np.random.RandomState( 1 ) vis_tr = np_rng.permutation( len( sset_tr ) ) vis_tr = vis_tr[ 0 : nvis ** 2 ] vis_ims_tr_s = ims_st[ sset_tr[ vis_tr ] ] vis_ims_tr_t = ims_st[ tset_tr[ vis_tr ] ] vis_ims_tr_t_hat = _test_ced( vis_ims_tr_s ) color_grid_vis( itf( vis_ims_tr_s, npx ), ( nvis, nvis ), os.path.join( sample_dir, 'TR_S.png' ) ) color_grid_vis( itf( vis_ims_tr_t, npx ), ( nvis, nvis ), os.path.join( sample_dir, 'TR_T.png' ) ) color_grid_vis( itf( vis_ims_tr_t_hat, npx ), ( nvis, nvis ), os.path.join( sample_dir, 'TR000T.png' ) ) vis_val = np_rng.permutation( len( sset_val ) ) vis_val = vis_val[ 0 : nvis ** 2 ] vis_ims_val_s = ims_st[ sset_val[ vis_val ] ] vis_ims_val_t = ims_st[ tset_val[ vis_val ] ] vis_ims_val_t_hat = _test_ced( vis_ims_val_s ) color_grid_vis( itf( vis_ims_val_s, npx ), ( nvis, nvis ), os.path.join( sample_dir, 'VAL_S.png' ) ) color_grid_vis( itf( vis_ims_val_t, npx ), ( nvis, nvis ), os.path.join( sample_dir, 'VAL_T.png' ) ) color_grid_vis( itf( vis_ims_val_t_hat, npx ), ( nvis, nvis ), os.path.join( sample_dir, 'VAL000T.png' ) )
pgs.append(pg) pcs.append(pc) for i in range(args.batch): if pg[i] >= args.generate_d and pc[i] >= args.generate_c: zmbs.append(zmb[i]) samples.append(xmb[i]) t.next() if len(zmbs) >= nvis: break pgs = np.concatenate(pgs) pcs = np.concatenate(pcs) print 'generate_d', pgs.mean(), pgs.std(), 'generate_c', pcs.mean( ), pcs.std() samples = np.asarray(samples) color_grid_vis(inverse_transform(samples), (nvis2, nvis2), '%s/Z_%03d.png' % (samples_dir, 0)) if args.generate_v is None: sample_zmb0 = np.array(zmbs) sample_zmb1 = np.roll(sample_zmb0, 1, axis=0) for i in tqdm(range(1, ngif)): z = abs(1. - 2. * i / (ngif - 1.)) # from 1 to 0 and back to almost 1 sample_zmb = z * sample_zmb0 + (1 - z) * sample_zmb1 samples = np.asarray(_gen(sample_zmb)) color_grid_vis(inverse_transform(samples), (nvis2, nvis2), '%s/Z_%03d.png' % (samples_dir, i)) else: sample_zmb = np.array(zmbs) v = gen_z(nvis) for i in tqdm(range(1, ngif)):
def train_model(data_stream, energy_optimizer, generator_optimizer, model_config_dict, model_test_name): [generator_function, generator_params, generator_entropy_params] = set_generator_model(model_config_dict['hidden_size'], model_config_dict['min_num_gen_filters']) [feature_function, energy_function, energy_params] = set_energy_model(model_config_dict['hidden_size'], model_config_dict['min_num_eng_filters']) # compile functions print 'COMPILING ENERGY UPDATER' t=time() energy_updater = set_energy_update_function(feature_function=feature_function, energy_function=energy_function, generator_function=generator_function, energy_params=energy_params, energy_optimizer=energy_optimizer) print '%.2f SEC '%(time()-t) print 'COMPILING GENERATOR UPDATER' t=time() generator_updater = set_generator_update_function(feature_function=feature_function, energy_function=energy_function, generator_function=generator_function, generator_params=generator_params, generator_entropy_params=generator_entropy_params, generator_optimizer=generator_optimizer) print '%.2f SEC '%(time()-t) print 'COMPILING EVALUATION FUNCTION' t=time() evaluation_function = set_evaluation_and_sampling_function(feature_function=feature_function, energy_function=energy_function, generator_function=generator_function) print '%.2f SEC '%(time()-t) print 'COMPILING SAMPLING FUNCTION' t=time() sampling_function = set_sampling_function(generator_function=generator_function) print '%.2f SEC '%(time()-t) # set fixed hidden data for sampling fixed_hidden_data = floatX(np_rng.uniform(low=-model_config_dict['hidden_distribution'], high=model_config_dict['hidden_distribution'], size=(model_config_dict['num_display'], model_config_dict['hidden_size']))) print 'START TRAINING' # for each epoch input_energy_list = [] sample_energy_list = [] batch_count = 0 for e in xrange(model_config_dict['epochs']): # train phase batch_iters = data_stream.get_epoch_iterator() # for each batch for b, batch_data in enumerate(batch_iters): # set update function inputs input_data = transform(batch_data[0]) num_data = input_data.shape[0] hidden_data = floatX(np_rng.uniform(low=-model_config_dict['hidden_distribution'], high=model_config_dict['hidden_distribution'], size=(num_data, model_config_dict['hidden_size']))) noise_data = np_rng.normal(size=input_data.shape) noise_data = floatX(noise_data*model_config_dict['init_noise']*(model_config_dict['noise_decay']**e)) # update generator generator_update_inputs = [input_data, hidden_data, noise_data, e] [input_energy_val, sample_energy_val, entropy_cost] = generator_updater(*generator_update_inputs) # update energy function energy_update_inputs = [input_data, hidden_data, e] [input_energy_val, sample_energy_val, ] = energy_updater(*energy_update_inputs) # get output values input_energy = input_energy_val.mean() sample_energy = sample_energy_val.mean() input_energy_list.append(input_energy) sample_energy_list.append(sample_energy) # batch count up batch_count += 1 if batch_count%100==0: print '================================================================' print 'BATCH ITER #{}'.format(batch_count), model_test_name print '================================================================' print ' TRAIN RESULTS' print '================================================================' print ' input energy : ', input_energy print '----------------------------------------------------------------' print ' sample energy : ', sample_energy print '----------------------------------------------------------------' print ' entropy cost : ', entropy_cost print '================================================================' if batch_count%1000==0: # sample data save_as = samples_dir + '/' + model_test_name + '_SAMPLES{}.png'.format(batch_count) sample_data = sampling_function(fixed_hidden_data)[0] sample_data = np.asarray(sample_data) color_grid_vis(inverse_transform(sample_data).transpose([0,2,3,1]), (16, 16), save_as) np.save(file=samples_dir + '/' + model_test_name +'_input_energy', arr=np.asarray(input_energy_list)) np.save(file=samples_dir + '/' + model_test_name +'_sample_energy', arr=np.asarray(sample_energy_list))
def continue_train_model(last_batch_idx, data_stream, energy_optimizer, generator_optimizer, model_config_dict, model_test_name): model_list = glob.glob(samples_dir +'/*.pkl') # load parameters model_param_dicts = unpickle(model_list[0]) generator_models = load_generator_model(min_num_gen_filters=model_config_dict['min_num_gen_filters'], model_params_dict=model_param_dicts) generator_function = generator_models[0] generator_params = generator_models[1] energy_models = load_energy_model(num_experts=model_config_dict['expert_size'], model_params_dict=model_param_dicts) feature_function = energy_models[0] # norm_function = energy_models[1] expert_function = energy_models[1] # prior_function = energy_models[3] energy_params = energy_models[2] # compile functions print 'COMPILING MODEL UPDATER' t=time() generator_updater, generator_optimizer_params = set_generator_update_function(energy_feature_function=feature_function, # energy_norm_function=norm_function, energy_expert_function=expert_function, # energy_prior_function=prior_function, generator_function=generator_function, generator_params=generator_params, generator_optimizer=generator_optimizer, init_param_dict=model_param_dicts) energy_updater, energy_optimizer_params = set_energy_update_function(energy_feature_function=feature_function, # energy_norm_function=norm_function, energy_expert_function=expert_function, # energy_prior_function=prior_function, generator_function=generator_function, energy_params=energy_params, energy_optimizer=energy_optimizer, init_param_dict=model_param_dicts) print '%.2f SEC '%(time()-t) print 'COMPILING SAMPLING FUNCTION' t=time() sampling_function = set_sampling_function(generator_function=generator_function) print '%.2f SEC '%(time()-t) # set fixed hidden data for sampling fixed_hidden_data = floatX(np_rng.uniform(low=-model_config_dict['hidden_distribution'], high=model_config_dict['hidden_distribution'], size=(model_config_dict['num_display'], model_config_dict['hidden_size']))) print 'START TRAINING' # for each epoch input_energy_list = [] sample_energy_list = [] batch_count = 0 for e in xrange(model_config_dict['epochs']): # train phase batch_iters = data_stream.get_epoch_iterator() # for each batch for b, batch_data in enumerate(batch_iters): # batch count up batch_count += 1 if batch_count<last_batch_idx: continue # set update function inputs input_data = transform(batch_data[0]) num_data = input_data.shape[0] hidden_data = floatX(np_rng.uniform(low=-model_config_dict['hidden_distribution'], high=model_config_dict['hidden_distribution'], size=(num_data, model_config_dict['hidden_size']))) noise_data = floatX(np_rng.normal(scale=0.01, size=input_data.shape)) update_input = [hidden_data, noise_data] update_output = generator_updater(*update_input) entropy_weights = update_output[1].mean() entropy_cost = update_output[2].mean() noise_data = floatX(np_rng.normal(scale=0.01, size=input_data.shape)) update_input = [input_data, hidden_data, noise_data] update_output = energy_updater(*update_input) input_energy = update_output[0].mean() sample_energy = update_output[1].mean() input_energy_list.append(input_energy) sample_energy_list.append(sample_energy) if batch_count%10==0: print '================================================================' print 'BATCH ITER #{}'.format(batch_count), model_test_name print '================================================================' print ' TRAIN RESULTS' print '================================================================' print ' input energy : ', input_energy_list[-1] print '----------------------------------------------------------------' print ' sample energy : ', sample_energy_list[-1] print '----------------------------------------------------------------' print ' entropy weight : ', entropy_weights print '----------------------------------------------------------------' print ' entropy cost : ', entropy_cost print '================================================================' if batch_count%100==0: # sample data sample_data = sampling_function(fixed_hidden_data)[0] sample_data = np.asarray(sample_data) save_as = samples_dir + '/' + model_test_name + '_SAMPLES{}.png'.format(batch_count) color_grid_vis(inverse_transform(sample_data).transpose([0,2,3,1]), (16, 16), save_as) np.save(file=samples_dir + '/' + model_test_name +'_input_energy', arr=np.asarray(input_energy_list)) np.save(file=samples_dir + '/' + model_test_name +'_sample_energy', arr=np.asarray(sample_energy_list)) save_as = samples_dir + '/' + model_test_name + '_MODEL.pkl' save_model(tensor_params_list=generator_params[0] + generator_params[1] + energy_params + generator_optimizer_params + energy_optimizer_params, save_to=save_as)
labels_idx = tr_stream.dataset.provides_sources.index('labels') patches_idx = tr_stream.dataset.provides_sources.index('patches') data = tr_data.get_data(tr_handle, slice(0, tr_data.num_examples)) labels = data[labels_idx] vc_idx = np.where(labels == vc_num)[0] vc_idx = vc_idx[:196] if 'orig' in desc: zmb_idx = tr_stream.dataset.provides_sources.index('feat_orig') else: zmb_idx = tr_stream.dataset.provides_sources.index('feat_l2') sample_zmb = data[zmb_idx][vc_idx,:] patches = data[patches_idx][vc_idx,:] patches = transform(patches, 64) color_grid_vis(inverse_transform(patches, nc=3, npx=64), (14, 14), './patches.png') else: sample_zmb = floatX(np_rng.uniform(-1., 1., size=(196, 100))) print 'COMPILING...' _gen = theano.function([Z], gX) recon = theano.function([gX,X], cost) print 'Done!' samples = np.asarray(_gen(sample_zmb)) if 'patches' in locals(): recon_cost = recon(samples, patches) costs[ii,0] = recon_cost print "Reconstruction Error: %3f" % (float(recon_cost))
def train_transform(X): # transform vectorized observations into convnet inputs return X.reshape(-1, nc, npx, npx).transpose(0, 1, 2, 3) def draw_transform(X): # transform vectorized observations into drawable images X = (X + 1.0) * 127.0 return X.reshape(-1, nc, npx, npx).transpose(0, 2, 3, 1) def rand_gen(size): #r_vals = floatX(np_rng.uniform(-1., 1., size=size)) r_vals = floatX(np_rng.normal(size=size)) return r_vals # draw some examples from training set color_grid_vis(draw_transform(Xtr[0:200]), (10, 20), "{}/Xtr.png".format(sample_dir)) tanh = activations.Tanh() sigmoid = activations.Sigmoid() bce = T.nnet.binary_crossentropy gifn = inits.Normal(scale=0.02) difn = inits.Normal(scale=0.02) # # Define some modules to use in the generator # gen_module_1 = \ GenUniModule( rand_dim=nz0,
os.makedirs( sample_dir ) f_log = open( os.path.join( log_dir, '%s.ndjson' % desc ), 'wb' ) log_fields = [ 'num_epoch', 'num_update', 'num_example', 't_spent', 'cost',] # PLOT SOURCE/TARGET SAMPLE IMAGES. np_rng = np.random.RandomState( 1 ) vis_tr = np_rng.permutation( len( sset_tr ) ) vis_tr = vis_tr[ 0 : nvis ** 2 ] vis_ims_tr_s = ims[ sset_tr[ vis_tr ] ] vis_ims_tr_t = ims[ tset_tr[ vis_tr ] ] color_grid_vis( vis_ims_tr_s, ( nvis, nvis ), os.path.join( sample_dir, 'TR000S.png' ) ) color_grid_vis( vis_ims_tr_t, ( nvis, nvis ), os.path.join( sample_dir, 'TR000T.png' ) ) vis_ims_tr_s = transform( ims[ sset_tr[ vis_tr ] ], npx ) vis_val = np_rng.permutation( len( sset_val ) ) vis_val = vis_val[ 0 : nvis ** 2 ] vis_ims_val_s = ims[ sset_val[ vis_val ] ] vis_ims_val_t = ims[ tset_val[ vis_val ] ] color_grid_vis( vis_ims_val_s, ( nvis, nvis ), os.path.join( sample_dir, 'VAL000S.png' ) ) color_grid_vis( vis_ims_val_t, ( nvis, nvis ), os.path.join( sample_dir, 'VAL000T.png' ) ) vis_ims_val_s = transform( ims[ sset_val[ vis_val ] ], npx ) # DO THE JOB. print desc.upper( )