def get_tfd_code_space_model(): ae = nn.AutoEncoder() ae.load_model_from_file(BEST_TFD_AUTOENCODER) net = gen.StochasticGenerativeNetWithAutoencoder() net.load_model_from_file(BEST_TFD_CODE_SPACE_MODEL) net.autoencoder = ae return net
def create_autoencoder(dropout_rate=0): in_dim = 3 h_dim = 2 net1 = nn.NeuralNet(in_dim, h_dim) net1.add_layer(2, nonlin_type=ly.NONLIN_NAME_SIGMOID, dropout=0) net1.add_layer(0, nonlin_type=ly.NONLIN_NAME_SIGMOID, dropout=dropout_rate) net2 = nn.NeuralNet(h_dim, in_dim) net2.add_layer(2, nonlin_type=ly.NONLIN_NAME_TANH, dropout=0) net2.add_layer(1, nonlin_type=ly.NONLIN_NAME_TANH, dropout=dropout_rate) net2.add_layer(0, nonlin_type=ly.NONLIN_NAME_LINEAR, dropout=dropout_rate) net2.set_loss(ls.LOSS_NAME_SQUARED, loss_weight=1.5) autoencoder = nn.AutoEncoder(net1, net2) return autoencoder
def test_autoencoder_pretraining(): x_train, t_train, x_val, t_val = load_toy_data() in_dim = x_train.shape[1] h_dim = 5 enc = nn.NeuralNet(in_dim, h_dim) enc.add_layer(30, nonlin_type=ly.NONLIN_NAME_SIGMOID, use_batch_normalization=True) enc.add_layer(20, nonlin_type=ly.NONLIN_NAME_TANH, use_batch_normalization=True) enc.add_layer(10, nonlin_type=ly.NONLIN_NAME_RELU, use_batch_normalization=True) enc.add_layer(0, nonlin_type=ly.NONLIN_NAME_LINEAR) dec = nn.NeuralNet(h_dim, in_dim) dec.add_layer(10, nonlin_type=ly.NONLIN_NAME_RELU) dec.add_layer(20, nonlin_type=ly.NONLIN_NAME_TANH) dec.add_layer(30, nonlin_type=ly.NONLIN_NAME_SIGMOID) dec.add_layer(0, nonlin_type=ly.NONLIN_NAME_SIGMOID) ae = nn.AutoEncoder(enc, dec) print '' print ae print '' pretrainer = learner.AutoEncoderPretrainer(ae) pretrainer.load_data(x_train) pretrainer.pretrain_network(learn_rate=1e-1, momentum=0.5, weight_decay=0, minibatch_size=10, max_grad_norm=10, max_iters=1000, iprint=50)
def tfd_mmd_code_space(ae_n_hids=[512, 512, 128], ae_dropout=[0.1, 0.1, 0.1], ae_learn_rate=1e-1, ae_momentum=0, mmd_n_hids=[10, 64, 256, 256, 1024], mmd_sigma=[1, 2, 5, 10, 20, 40], mmd_learn_rate=1e-1, mmd_momentum=0.9): """ ae_n_hids: #hid for the encoder, bottom-up ae_dropout: the amount of dropout for each layer in the encoder, same order ae_learn_rate, ae_momentum: . mmd_n_hids: #hid for the generative net, top-down mmd_sigma: scale of the kernel mmd_learn_rate, mmd_momentum: . Return KDE log_likelihood on the validation set. """ gnp.seed_rand(8) x_train, x_val, x_test = load_tfd_fold(0) common_output_base = OUTPUT_BASE_DIR + '/tfd/code_space' output_base = common_output_base + '/aeh_%s_dr_%s_aelr_%s_aem_%s_nh_%s_s_%s_lr_%s_m_%s' % ( cat_list(ae_n_hids), cat_list(ae_dropout), str(ae_learn_rate), str(ae_momentum), cat_list(mmd_n_hids), cat_list(mmd_sigma), str(mmd_learn_rate), str(mmd_momentum)) ####################### # Auto-encoder training ####################### n_dims = x_train.shape[1] h_dim = ae_n_hids[-1] encoder = nn.NeuralNet(n_dims, h_dim) for i in range(len(ae_n_hids) - 1): encoder.add_layer(ae_n_hids[i], nonlin_type=ly.NONLIN_NAME_SIGMOID, dropout=ae_dropout[i]) encoder.add_layer(0, nonlin_type=ly.NONLIN_NAME_SIGMOID, dropout=ae_dropout[-1]) decoder = nn.NeuralNet(h_dim, n_dims) for i in range(len(ae_n_hids) - 1)[::-1]: decoder.add_layer(ae_n_hids[i], nonlin_type=ly.NONLIN_NAME_SIGMOID, dropout=0) decoder.add_layer(0, nonlin_type=ly.NONLIN_NAME_SIGMOID, dropout=0) decoder.set_loss(ls.LOSS_NAME_BINARY_CROSSENTROPY, loss_weight=1) autoenc = nn.AutoEncoder(encoder=encoder, decoder=decoder) print '' print autoenc print '' learn_rate = ae_learn_rate final_momentum = ae_momentum max_iters = 15000 #max_iters = 200 nn_pretrainer = learner.AutoEncoderPretrainer(autoenc) nn_pretrainer.load_data(x_train) nn_pretrainer.pretrain_network(learn_rate=1e-1, momentum=0.5, weight_decay=0, minibatch_size=100, max_grad_norm=10, max_iters=max_iters, iprint=100) nn_learner = learner.Learner(autoenc) nn_learner.set_output_dir(output_base + '/ae') nn_learner.load_data(x_train, x_train) def f_checkpoint(i_iter, w): nn_learner.save_checkpoint('%d' % i_iter) nn_learner.train_sgd(learn_rate=learn_rate, momentum=0, weight_decay=0, minibatch_size=100, learn_rate_schedule=None, momentum_schedule={ 50: 0.5, 200: final_momentum }, max_grad_norm=10, learn_rate_drop_iters=0, decrease_type='linear', adagrad_start_iter=0, max_iters=max_iters, iprint=100, i_exe=2000, f_exe=f_checkpoint) nn_learner.save_checkpoint('best') ################## # Training MMD net ################## n_hids = mmd_n_hids in_dim = n_hids[0] out_dim = autoenc.encoder.out_dim net = gen.StochasticGenerativeNetWithAutoencoder(in_dim, out_dim, autoenc) for i in range(1, len(n_hids)): net.add_layer(n_hids[i], nonlin_type=ly.NONLIN_NAME_RELU, dropout=0) net.add_layer(0, nonlin_type=ly.NONLIN_NAME_SIGMOID, dropout=0) print '' print '========' print 'Training' print '========' print '' print net print '' mmd_learner = gen.StochasticGenerativeNetLearner(net) mmd_learner.load_data(x_train) sigma = mmd_sigma sigma_weights = [1] * len(sigma) learn_rate = mmd_learn_rate momentum = mmd_momentum minibatch_size = 1000 n_sample_update_iters = 1 max_iters = 48000 #max_iters = 200 i_checkpoint = 2000 mmd_learner.set_output_dir(output_base + '/mmd') #net.set_loss(ls.LOSS_NAME_MMDGEN_MULTISCALE, loss_after_nonlin=True, sigma=sigma, scale_weight=sigma_weights, loss_weight=1000) net.set_loss(ls.LOSS_NAME_MMDGEN_SQRT_GAUSSIAN, loss_after_nonlin=True, sigma=sigma, scale_weight=sigma_weights, loss_weight=1000) print '**********************************' print net.loss print '**********************************' print '' def f_checkpoint(i_iter, w): mmd_learner.save_checkpoint('%d' % i_iter) mmd_learner.train_sgd(minibatch_size=minibatch_size, n_samples_per_update=minibatch_size, n_sample_update_iters=n_sample_update_iters, learn_rate=learn_rate, momentum=momentum, weight_decay=0, learn_rate_schedule={10000: learn_rate / 10.0}, momentum_schedule={10000: 1 - (1 - momentum) / 10.0}, learn_rate_drop_iters=0, decrease_type='linear', adagrad_start_iter=0, max_iters=max_iters, iprint=100, i_exe=i_checkpoint, f_exe=f_checkpoint) mmd_learner.save_model() # Evaluation print '' print '====================' print 'Evaluating the model' print '====================' print '' x_val = load_tfd_all_folds('val') x_test = load_tfd_all_folds('test') log_prob, std, sigma = ev.kde_eval_tfd(net, x_val, verbose=False) test_log_prob, test_std, _ = ev.kde_eval_tfd(net, x_test, sigma_range=[sigma], verbose=False) print 'Validation: %.2f (%.2f)' % (log_prob, std) print 'Test : %.2f (%.2f)' % (test_log_prob, test_std) print '' write_config( output_base + '/params_and_results.cfg', { 'ae_n_hids': ae_n_hids, 'ae_dropout': ae_dropout, 'ae_learn_rate': ae_learn_rate, 'ae_momentum': ae_momentum, 'mmd_n_hids': mmd_n_hids, 'mmd_sigma': mmd_sigma, 'mmd_sigma_weights': sigma_weights, 'mmd_learn_rate': mmd_learn_rate, 'mmd_momentum': mmd_momentum, 'mmd_minibatch_size': minibatch_size, 'mmd_n_sample_update_iters': n_sample_update_iters, 'mmd_max_iters': max_iters, 'mmd_i_checkpoint': i_checkpoint, 'val_log_prob': log_prob, 'val_std': std, 'test_log_prob': test_log_prob, 'test_std': test_std }) print '>>>> output_dir = %s' % output_base print '' return log_prob
def load_model_from_stream(self, f): self.convnet = FixedConvolutionalNetwork() self.convnet.load_model_from_stream(f) self.ae = nn.AutoEncoder() self.ae.load_model_from_stream(f)
def f_create_void(): return nn.AutoEncoder()