def create_default_rnn_on_nn(add_noise=False): in_dim = 3 net_hid_dim = 2 rnn_in_dim = 2 hid_dim = 2 out_dim = 3 net = nn.NeuralNet(in_dim, hid_dim) net.add_layer(net_hid_dim, nonlin_type=ly.NONLIN_NAME_TANH, dropout=(0.5 if add_noise else 0)) net.add_layer(0, nonlin_type=ly.NONLIN_NAME_SIGMOID) rnn_net = rnn.RnnOnNeuralNet( net, rnn.RNN(in_dim=rnn_in_dim, out_dim=hid_dim, nonlin_type=ly.NONLIN_NAME_TANH)) predict_net = nn.NeuralNet(hid_dim, out_dim) predict_net.add_layer(0, nonlin_type=ly.NONLIN_NAME_LINEAR) predict_net.set_loss(ls.LOSS_NAME_SQUARED) rnn_predict_net = rnn.RnnHybridNetwork(rnn_net, predict_net) return rnn_predict_net
def f_create(): in_dim = 3 out_dim = [2, 2, 2] n_cases = 5 seed = 8 dropout_rate = 0.5 net1 = nn.NeuralNet(3, 2) net1.add_layer(2, nonlin_type=ly.NONLIN_NAME_SIGMOID, dropout=dropout_rate) net1.add_layer(0, nonlin_type=ly.NONLIN_NAME_TANH, dropout=dropout_rate) net1.set_loss(ls.LOSS_NAME_SQUARED) net2 = nn.NeuralNet(out_dim[0], out_dim[1]) net2.add_layer(0, nonlin_type=ly.NONLIN_NAME_TANH, dropout=0) net2.set_loss(ls.LOSS_NAME_SQUARED) net3 = nn.NeuralNet(out_dim[0], out_dim[2]) net3.add_layer(1, nonlin_type=ly.NONLIN_NAME_SIGMOID, dropout=dropout_rate) net3.add_layer(0, nonlin_type=ly.NONLIN_NAME_LINEAR, dropout=0) net3.set_loss(ls.LOSS_NAME_SQUARED) return nn.YNeuralNet(net1, net2, net3)
def train_rnn_on_nn_ae(): print '' print 'Training RNN autoencoder' print '' x_train, _ = generate_binary_add_data(50, int_max=64) x_val, _ = generate_binary_add_data(20, int_max=64) in_dim = x_train[0].shape[1] out_dim = in_dim hid_dim = 10 out_hid_dim = 5 in_hid_dim = 5 net = nn.NeuralNet(hid_dim, out_dim) net.add_layer(out_hid_dim, nonlin_type=ly.NONLIN_NAME_RELU) net.add_layer(0, nonlin_type=ly.NONLIN_NAME_LINEAR) net.set_loss(ls.LOSS_NAME_SQUARED) dec = rnn.RnnHybridNetwork( rnn.RNN(out_dim=hid_dim, nonlin_type=ly.NONLIN_NAME_RELU), net) enc_net = nn.NeuralNet(in_dim, in_hid_dim) enc_net.add_layer(0, nonlin_type=ly.NONLIN_NAME_RELU) enc = rnn.RnnOnNeuralNet( enc_net, rnn.RNN(in_dim=in_hid_dim, out_dim=hid_dim, nonlin_type=ly.NONLIN_NAME_RELU)) ae = rnn.RnnAutoEncoder(encoder=enc, decoder=dec) print ae rnn_learner = rnn.SequenceLearner(ae) # rnn_learner.load_data(x_train, revert_sequence(x_train), x_val=x_val, t_val=revert_sequence(x_val)) rnn_learner.load_data(x_train, x_train, x_val=x_val, t_val=x_val) # rnn_learner.train_gradient_descent(learn_rate=1e-2, momentum=0.5, iprint=10, max_iters=200, max_grad_norm=10) # rnn_learner.train_gradient_descent(learn_rate=1e-3, momentum=0.9, iprint=10, max_iters=200) # rnn_learner.train_gradient_descent(learn_rate=1e-2, momentum=0, iprint=10, max_iters=200, adagrad_start_iter=10) rnn_learner.train_sgd(minibatch_size=1, learn_rate=1e-1, momentum=0.9, iprint=100, adagrad_start_iter=1, max_iters=10000, max_grad_norm=1) return ae
def create_autoencoder(dropout_rate=0): in_dim = 3 h_dim = 2 net1 = nn.NeuralNet(in_dim, h_dim) net1.add_layer(2, nonlin_type=ly.NONLIN_NAME_SIGMOID, dropout=0) net1.add_layer(0, nonlin_type=ly.NONLIN_NAME_SIGMOID, dropout=dropout_rate) net2 = nn.NeuralNet(h_dim, in_dim) net2.add_layer(2, nonlin_type=ly.NONLIN_NAME_TANH, dropout=0) net2.add_layer(1, nonlin_type=ly.NONLIN_NAME_TANH, dropout=dropout_rate) net2.add_layer(0, nonlin_type=ly.NONLIN_NAME_LINEAR, dropout=dropout_rate) net2.set_loss(ls.LOSS_NAME_SQUARED, loss_weight=1.5) autoencoder = nn.AutoEncoder(net1, net2) return autoencoder
def train_rnn_binary_add(): print '' print 'Training RNN for binary add' print '' x_train, t_train = generate_binary_add_data(50, int_max=100) x_val, t_val = generate_binary_add_data(20, int_max=200) in_dim = x_train[0].shape[1] out_dim = t_train[0].shape[1] hid_dim = 20 net = nn.NeuralNet(hid_dim, out_dim) net.add_layer(0, nonlin_type=ly.NONLIN_NAME_LINEAR) net.set_loss(ls.LOSS_NAME_SQUARED) rnn_net = rnn.RnnHybridNetwork( rnn.RNN(in_dim, hid_dim, nonlin_type=ly.NONLIN_NAME_TANH), net) print rnn_net rnn_learner = rnn.SequenceLearner(rnn_net) rnn_learner.load_data(x_train, t_train, x_val=x_val, t_val=t_val) # rnn_learner.train_gradient_descent(learn_rate=1e-2, momentum=0.5, iprint=10, max_iters=200, max_grad_norm=10) # rnn_learner.train_gradient_descent(learn_rate=1e-3, momentum=0.9, iprint=10, max_iters=200) # rnn_learner.train_gradient_descent(learn_rate=1e-2, momentum=0, iprint=10, max_iters=200, adagrad_start_iter=10) rnn_learner.train_sgd(minibatch_size=1, learn_rate=1e-1, momentum=0.9, iprint=100, adagrad_start_iter=1, max_iters=2000, max_grad_norm=1) return rnn_net
def create_stacked_net(in_dim, out_dim, dropout_rate): net1 = nn.NeuralNet(3, out_dim[0]) net1.add_layer(2, nonlin_type=ly.NONLIN_NAME_TANH, dropout=0) net1.add_layer(0, nonlin_type=ly.NONLIN_NAME_SIGMOID, dropout=dropout_rate) net1.set_loss(ls.LOSS_NAME_SQUARED, loss_weight=0.5) net2 = nn.NeuralNet(out_dim[0], out_dim[1]) net2.add_layer(3, nonlin_type=ly.NONLIN_NAME_RELU, dropout=dropout_rate) net2.add_layer(0, nonlin_type=ly.NONLIN_NAME_TANH, dropout=0) net3 = nn.NeuralNet(out_dim[1], out_dim[2]) net3.add_layer(1, nonlin_type=ly.NONLIN_NAME_SIGMOID, dropout=dropout_rate) net3.add_layer(0, nonlin_type=ly.NONLIN_NAME_LINEAR, dropout=0) net3.set_loss(ls.LOSS_NAME_SQUARED, loss_weight=1) return nn.StackedNeuralNet(net1, net2, net3)
def create_neuralnet(dropout_rate, loss_after_nonlin=False, use_batch_normalization=False): in_dim = 3 out_dim = 2 h1_dim = 2 h2_dim = 2 h3_dim = 2 net = nn.NeuralNet(in_dim, out_dim) net.add_layer(h1_dim, nonlin_type=ly.NONLIN_NAME_TANH, dropout=0) net.add_layer(h2_dim, nonlin_type=ly.NONLIN_NAME_SIGMOID, dropout=dropout_rate, use_batch_normalization=use_batch_normalization) #net.add_layer(h3_dim, nonlin_type=ly.NONLIN_NAME_RELU, dropout=dropout_rate) #net.add_layer(10, nonlin_type=ly.NONLIN_NAME_RELU, dropout=dropout_rate) #net.add_layer(10, nonlin_type=ly.NONLIN_NAME_RELU, dropout=dropout_rate) net.add_layer(0, nonlin_type=ly.NONLIN_NAME_LINEAR, dropout=dropout_rate, use_batch_normalization=use_batch_normalization) net.set_loss(ls.LOSS_NAME_SQUARED, loss_weight=1.1, loss_after_nonlin=loss_after_nonlin) return net
def test_autoencoder_pretraining(): x_train, t_train, x_val, t_val = load_toy_data() in_dim = x_train.shape[1] h_dim = 5 enc = nn.NeuralNet(in_dim, h_dim) enc.add_layer(30, nonlin_type=ly.NONLIN_NAME_SIGMOID, use_batch_normalization=True) enc.add_layer(20, nonlin_type=ly.NONLIN_NAME_TANH, use_batch_normalization=True) enc.add_layer(10, nonlin_type=ly.NONLIN_NAME_RELU, use_batch_normalization=True) enc.add_layer(0, nonlin_type=ly.NONLIN_NAME_LINEAR) dec = nn.NeuralNet(h_dim, in_dim) dec.add_layer(10, nonlin_type=ly.NONLIN_NAME_RELU) dec.add_layer(20, nonlin_type=ly.NONLIN_NAME_TANH) dec.add_layer(30, nonlin_type=ly.NONLIN_NAME_SIGMOID) dec.add_layer(0, nonlin_type=ly.NONLIN_NAME_SIGMOID) ae = nn.AutoEncoder(enc, dec) print '' print ae print '' pretrainer = learner.AutoEncoderPretrainer(ae) pretrainer.load_data(x_train) pretrainer.pretrain_network(learn_rate=1e-1, momentum=0.5, weight_decay=0, minibatch_size=10, max_grad_norm=10, max_iters=1000, iprint=50)
def create_y_net(in_dim, out_dim, dropout_rate): net01 = nn.NeuralNet(3, 2) net01.add_layer(0, nonlin_type=ly.NONLIN_NAME_SIGMOID, dropout=dropout_rate) net02 = nn.NeuralNet(2, out_dim[0]) net02.add_layer(0, nonlin_type=ly.NONLIN_NAME_TANH, dropout=dropout_rate) net02.set_loss(ls.LOSS_NAME_SQUARED, loss_weight=0.5) net1 = nn.StackedNeuralNet(net01, net02) net2 = nn.NeuralNet(out_dim[0], out_dim[1]) net2.add_layer(0, nonlin_type=ly.NONLIN_NAME_TANH, dropout=0) net2.set_loss(ls.LOSS_NAME_SQUARED, loss_weight=0) net3 = nn.NeuralNet(out_dim[0], out_dim[2]) net3.add_layer(1, nonlin_type=ly.NONLIN_NAME_SIGMOID, dropout=dropout_rate) net3.add_layer(0, nonlin_type=ly.NONLIN_NAME_LINEAR, dropout=0) net3.set_loss(ls.LOSS_NAME_SQUARED, loss_weight=1.5) ynet = nn.YNeuralNet(net1, net2, net3) return ynet
def create_default_rnn_ae(add_noise=False): in_dim = 3 hid_dim = 2 net_hid_dim = 2 net = nn.NeuralNet(hid_dim, in_dim) net.add_layer(net_hid_dim, ly.NONLIN_NAME_TANH, dropout=(0.5 if add_noise else 0)) net.add_layer(0, ly.NONLIN_NAME_LINEAR) net.set_loss(ls.LOSS_NAME_SQUARED) dec = rnn.RnnHybridNetwork(rnn.RNN(out_dim=hid_dim), net) enc = rnn.RNN(in_dim=in_dim, out_dim=hid_dim) return rnn.RnnAutoEncoder(encoder=enc, decoder=dec)
def build_classification_net(in_dim, out_dim, dropout=0, use_batch_normalization=False): net = nn.NeuralNet(in_dim, out_dim) net.add_layer(128, nonlin_type=ly.NONLIN_NAME_TANH, dropout=dropout, use_batch_normalization=use_batch_normalization) net.add_layer(32, nonlin_type=ly.NONLIN_NAME_TANH, dropout=dropout, use_batch_normalization=use_batch_normalization) net.add_layer(0, nonlin_type=ly.NONLIN_NAME_LINEAR, dropout=dropout) net.set_loss(ls.LOSS_NAME_CROSSENTROPY) return net
def test_rnn(): print 'Testing RNN' n_cases = 5 in_dim = 3 out_dim = 2 label_dim = 2 x = gnp.randn(n_cases, in_dim) t = gnp.randn(n_cases, label_dim) net = nn.NeuralNet(out_dim, label_dim) net.add_layer(0, nonlin_type=ly.NONLIN_NAME_LINEAR) net.set_loss(ls.LOSS_NAME_SQUARED) net.load_target(t) rnn_net = rnn.RNN(in_dim, out_dim) print rnn_net print net rnn_net.clear_gradient() net.clear_gradient() h = rnn_net.forward_prop(x) net.forward_prop(h, add_noise=False, compute_loss=True, is_test=False) dh = net.backward_prop() rnn_net.backward_prop(dh) backprop_grad = rnn_net.get_grad_vec() def f(w): rnn_net.clear_gradient() rnn_net.set_param_from_vec(w) h = rnn_net.forward_prop(x) net.forward_prop(h, add_noise=False, compute_loss=True, is_test=False) return net.get_loss() fdiff_grad = finite_difference_gradient(f, rnn_net.get_param_vec()) test_passed = test_vec_pair(fdiff_grad, 'Finite Difference Gradient', backprop_grad, ' Backpropagation Gradient') print '' return test_passed
def create_default_rnn_hybrid(add_noise=False, has_input=True): in_dim = 3 if has_input else None hid_dim = 2 out_dim = 2 net_hid_dim = 2 net = nn.NeuralNet(hid_dim, out_dim) net.add_layer(net_hid_dim, nonlin_type=ly.NONLIN_NAME_TANH, dropout=(0 if not add_noise else 0.5)) net.add_layer(0, nonlin_type=ly.NONLIN_NAME_LINEAR) net.set_loss(ls.LOSS_NAME_SQUARED) rnn_net = rnn.RNN(in_dim, hid_dim) if not has_input: rnn_net.b = gnp.randn(hid_dim) return rnn.RnnHybridNetwork(rnn_net, net)
def tfd_mmd_code_space(ae_n_hids=[512, 512, 128], ae_dropout=[0.1, 0.1, 0.1], ae_learn_rate=1e-1, ae_momentum=0, mmd_n_hids=[10, 64, 256, 256, 1024], mmd_sigma=[1, 2, 5, 10, 20, 40], mmd_learn_rate=1e-1, mmd_momentum=0.9): """ ae_n_hids: #hid for the encoder, bottom-up ae_dropout: the amount of dropout for each layer in the encoder, same order ae_learn_rate, ae_momentum: . mmd_n_hids: #hid for the generative net, top-down mmd_sigma: scale of the kernel mmd_learn_rate, mmd_momentum: . Return KDE log_likelihood on the validation set. """ gnp.seed_rand(8) x_train, x_val, x_test = load_tfd_fold(0) common_output_base = OUTPUT_BASE_DIR + '/tfd/code_space' output_base = common_output_base + '/aeh_%s_dr_%s_aelr_%s_aem_%s_nh_%s_s_%s_lr_%s_m_%s' % ( cat_list(ae_n_hids), cat_list(ae_dropout), str(ae_learn_rate), str(ae_momentum), cat_list(mmd_n_hids), cat_list(mmd_sigma), str(mmd_learn_rate), str(mmd_momentum)) ####################### # Auto-encoder training ####################### n_dims = x_train.shape[1] h_dim = ae_n_hids[-1] encoder = nn.NeuralNet(n_dims, h_dim) for i in range(len(ae_n_hids) - 1): encoder.add_layer(ae_n_hids[i], nonlin_type=ly.NONLIN_NAME_SIGMOID, dropout=ae_dropout[i]) encoder.add_layer(0, nonlin_type=ly.NONLIN_NAME_SIGMOID, dropout=ae_dropout[-1]) decoder = nn.NeuralNet(h_dim, n_dims) for i in range(len(ae_n_hids) - 1)[::-1]: decoder.add_layer(ae_n_hids[i], nonlin_type=ly.NONLIN_NAME_SIGMOID, dropout=0) decoder.add_layer(0, nonlin_type=ly.NONLIN_NAME_SIGMOID, dropout=0) decoder.set_loss(ls.LOSS_NAME_BINARY_CROSSENTROPY, loss_weight=1) autoenc = nn.AutoEncoder(encoder=encoder, decoder=decoder) print '' print autoenc print '' learn_rate = ae_learn_rate final_momentum = ae_momentum max_iters = 15000 #max_iters = 200 nn_pretrainer = learner.AutoEncoderPretrainer(autoenc) nn_pretrainer.load_data(x_train) nn_pretrainer.pretrain_network(learn_rate=1e-1, momentum=0.5, weight_decay=0, minibatch_size=100, max_grad_norm=10, max_iters=max_iters, iprint=100) nn_learner = learner.Learner(autoenc) nn_learner.set_output_dir(output_base + '/ae') nn_learner.load_data(x_train, x_train) def f_checkpoint(i_iter, w): nn_learner.save_checkpoint('%d' % i_iter) nn_learner.train_sgd(learn_rate=learn_rate, momentum=0, weight_decay=0, minibatch_size=100, learn_rate_schedule=None, momentum_schedule={ 50: 0.5, 200: final_momentum }, max_grad_norm=10, learn_rate_drop_iters=0, decrease_type='linear', adagrad_start_iter=0, max_iters=max_iters, iprint=100, i_exe=2000, f_exe=f_checkpoint) nn_learner.save_checkpoint('best') ################## # Training MMD net ################## n_hids = mmd_n_hids in_dim = n_hids[0] out_dim = autoenc.encoder.out_dim net = gen.StochasticGenerativeNetWithAutoencoder(in_dim, out_dim, autoenc) for i in range(1, len(n_hids)): net.add_layer(n_hids[i], nonlin_type=ly.NONLIN_NAME_RELU, dropout=0) net.add_layer(0, nonlin_type=ly.NONLIN_NAME_SIGMOID, dropout=0) print '' print '========' print 'Training' print '========' print '' print net print '' mmd_learner = gen.StochasticGenerativeNetLearner(net) mmd_learner.load_data(x_train) sigma = mmd_sigma sigma_weights = [1] * len(sigma) learn_rate = mmd_learn_rate momentum = mmd_momentum minibatch_size = 1000 n_sample_update_iters = 1 max_iters = 48000 #max_iters = 200 i_checkpoint = 2000 mmd_learner.set_output_dir(output_base + '/mmd') #net.set_loss(ls.LOSS_NAME_MMDGEN_MULTISCALE, loss_after_nonlin=True, sigma=sigma, scale_weight=sigma_weights, loss_weight=1000) net.set_loss(ls.LOSS_NAME_MMDGEN_SQRT_GAUSSIAN, loss_after_nonlin=True, sigma=sigma, scale_weight=sigma_weights, loss_weight=1000) print '**********************************' print net.loss print '**********************************' print '' def f_checkpoint(i_iter, w): mmd_learner.save_checkpoint('%d' % i_iter) mmd_learner.train_sgd(minibatch_size=minibatch_size, n_samples_per_update=minibatch_size, n_sample_update_iters=n_sample_update_iters, learn_rate=learn_rate, momentum=momentum, weight_decay=0, learn_rate_schedule={10000: learn_rate / 10.0}, momentum_schedule={10000: 1 - (1 - momentum) / 10.0}, learn_rate_drop_iters=0, decrease_type='linear', adagrad_start_iter=0, max_iters=max_iters, iprint=100, i_exe=i_checkpoint, f_exe=f_checkpoint) mmd_learner.save_model() # Evaluation print '' print '====================' print 'Evaluating the model' print '====================' print '' x_val = load_tfd_all_folds('val') x_test = load_tfd_all_folds('test') log_prob, std, sigma = ev.kde_eval_tfd(net, x_val, verbose=False) test_log_prob, test_std, _ = ev.kde_eval_tfd(net, x_test, sigma_range=[sigma], verbose=False) print 'Validation: %.2f (%.2f)' % (log_prob, std) print 'Test : %.2f (%.2f)' % (test_log_prob, test_std) print '' write_config( output_base + '/params_and_results.cfg', { 'ae_n_hids': ae_n_hids, 'ae_dropout': ae_dropout, 'ae_learn_rate': ae_learn_rate, 'ae_momentum': ae_momentum, 'mmd_n_hids': mmd_n_hids, 'mmd_sigma': mmd_sigma, 'mmd_sigma_weights': sigma_weights, 'mmd_learn_rate': mmd_learn_rate, 'mmd_momentum': mmd_momentum, 'mmd_minibatch_size': minibatch_size, 'mmd_n_sample_update_iters': n_sample_update_iters, 'mmd_max_iters': max_iters, 'mmd_i_checkpoint': i_checkpoint, 'val_log_prob': log_prob, 'val_std': std, 'test_log_prob': test_log_prob, 'test_std': test_std }) print '>>>> output_dir = %s' % output_base print '' return log_prob
def create_databias_net(dropout_rate): net = nn.NeuralNet(3, 2) net.add_layer(2, nonlin_type=ly.NONLIN_NAME_TANH, dropout=dropout_rate) net.add_layer(0, nonlin_type=ly.NONLIN_NAME_LINEAR, dropout=0) return net
def f_create_void(): return nn.NeuralNet(0, 0)