def test_nn(X_train, y_train, X_test, y_test): model = nn.NeuralNet() model.add(nn.DenseLayer(512)) model.add(nn.SigmoidLayer()) model.add(nn.DropoutLayer(0.3)) model.add(nn.DenseLayer(512)) model.add(nn.SigmoidLayer()) model.add(nn.DropoutLayer(0.3)) model.add(nn.DenseLayer(10)) model.add(nn.SoftmaxLayer()) my_history = model.fit(X_train, y_train, num_epochs=20,\ learning_rate=0.01, batch_size=128,\ X_test=X_test, y_test=y_test) predictions = model.predict(X_test) predictions = np.argmax(predictions, axis=0) labels = np.argmax(y_test, axis=1) print "accuracy of my model: {}".format(sum(predictions == labels)*1.0/len(predictions))
def gan_unlabelled_classif(trainx, trainy, testx, testy, lab_cnt, inp_size, train_ex_cnt): trainy = trainy.astype(np.int32) testy = testy.astype(np.int32) trainx = trainx.reshape((-1, inp_size)).astype(th.config.floatX) testx = testx.reshape((-1, inp_size)).astype(th.config.floatX) assert train_ex_cnt == trainx.shape[0] # settings parser = argparse.ArgumentParser() parser.add_argument('--seed', type=int, default=1) parser.add_argument('--seed_data', type=int, default=1) parser.add_argument('--unlabeled_weight', type=float, default=1.) parser.add_argument('--batch_size', type=int, default=100) parser.add_argument('--count', type=int, default=10) parser.add_argument('--iter_limit', type=int, default=300) args = parser.parse_args() print(args) # fixed random seeds rng = np.random.RandomState(args.seed) theano_rng = MRG_RandomStreams(rng.randint(2**15)) lasagne.random.set_rng(np.random.RandomState(rng.randint(2**15))) data_rng = np.random.RandomState(args.seed_data) # npshow(trainx.reshape((-1, 27, 32))[0]) trainx_unl = trainx.copy() trainx_unl2 = trainx.copy() nr_batches_train = int(trainx.shape[0] / args.batch_size) nr_batches_test = int(testx.shape[0] / args.batch_size) # select labeled data inds = data_rng.permutation(trainx.shape[0]) trainx = trainx[inds] trainy = trainy[inds] txs = [] tys = [] for _j in range(10): j = _j % lab_cnt txs.append(trainx[trainy == j][:args.count]) tys.append(trainy[trainy == j][:args.count]) txs = np.concatenate(txs, axis=0) tys = np.concatenate(tys, axis=0) # specify generative model noise = theano_rng.uniform(size=(args.batch_size, 100)) gen_layers = [LL.InputLayer(shape=(args.batch_size, 100), input_var=noise)] gen_layers.append( nn.batch_norm(LL.DenseLayer(gen_layers[-1], num_units=500, nonlinearity=T.nnet.softplus), g=None)) gen_layers.append( nn.batch_norm(LL.DenseLayer(gen_layers[-1], num_units=500, nonlinearity=T.nnet.softplus), g=None)) gen_layers.append( nn.l2normalize( LL.DenseLayer(gen_layers[-1], num_units=inp_size, nonlinearity=T.nnet.sigmoid))) gen_dat = LL.get_output(gen_layers[-1], deterministic=False) # specify supervised model layers = [LL.InputLayer(shape=(None, inp_size))] layers.append(nn.GaussianNoiseLayer(layers[-1], sigma=0.3)) layers.append(nn.DenseLayer(layers[-1], num_units=1000)) layers.append(nn.GaussianNoiseLayer(layers[-1], sigma=0.5)) layers.append(nn.DenseLayer(layers[-1], num_units=500)) layers.append(nn.GaussianNoiseLayer(layers[-1], sigma=0.5)) layers.append(nn.DenseLayer(layers[-1], num_units=250)) layers.append(nn.GaussianNoiseLayer(layers[-1], sigma=0.5)) layers.append(nn.DenseLayer(layers[-1], num_units=250)) layers.append(nn.GaussianNoiseLayer(layers[-1], sigma=0.5)) layers.append(nn.DenseLayer(layers[-1], num_units=250)) layers.append(nn.GaussianNoiseLayer(layers[-1], sigma=0.5)) layers.append( nn.DenseLayer(layers[-1], num_units=lab_cnt, nonlinearity=None, train_scale=True)) # costs labels = T.ivector() x_lab = T.matrix() x_unl = T.matrix() temp = LL.get_output(gen_layers[-1], init=True) temp = LL.get_output(layers[-1], x_lab, deterministic=False, init=True) init_updates = [ u for l in gen_layers + layers for u in getattr(l, 'init_updates', []) ] output_before_softmax_lab = LL.get_output(layers[-1], x_lab, deterministic=False) output_before_softmax_unl = LL.get_output(layers[-1], x_unl, deterministic=False) output_before_softmax_fake = LL.get_output(layers[-1], gen_dat, deterministic=False) z_exp_lab = T.mean(nn.log_sum_exp(output_before_softmax_lab)) z_exp_unl = T.mean(nn.log_sum_exp(output_before_softmax_unl)) z_exp_fake = T.mean(nn.log_sum_exp(output_before_softmax_fake)) l_lab = output_before_softmax_lab[T.arange(args.batch_size), labels] l_unl = nn.log_sum_exp(output_before_softmax_unl) loss_lab = -T.mean(l_lab) + T.mean(z_exp_lab) loss_unl = -0.5 * T.mean(l_unl) + 0.5 * T.mean( T.nnet.softplus( nn.log_sum_exp(output_before_softmax_unl))) + 0.5 * T.mean( T.nnet.softplus(nn.log_sum_exp(output_before_softmax_fake))) train_err = T.mean( T.neq(T.argmax(output_before_softmax_lab, axis=1), labels)) mom_gen = T.mean(LL.get_output(layers[-3], gen_dat), axis=0) mom_real = T.mean(LL.get_output(layers[-3], x_unl), axis=0) loss_gen = T.mean(T.square(mom_gen - mom_real)) # test error output_before_softmax = LL.get_output(layers[-1], x_lab, deterministic=True) test_err = T.mean(T.neq(T.argmax(output_before_softmax, axis=1), labels)) # Theano functions for training and testing lr = T.scalar() disc_params = LL.get_all_params(layers, trainable=True) disc_param_updates = nn.adam_updates(disc_params, loss_lab + args.unlabeled_weight * loss_unl, lr=lr, mom1=0.5) disc_param_avg = [ th.shared(np.cast[th.config.floatX](0. * p.get_value())) for p in disc_params ] disc_avg_updates = [(a, a + 0.0001 * (p - a)) for p, a in zip(disc_params, disc_param_avg)] disc_avg_givens = [(p, a) for p, a in zip(disc_params, disc_param_avg)] gen_params = LL.get_all_params(gen_layers[-1], trainable=True) gen_param_updates = nn.adam_updates(gen_params, loss_gen, lr=lr, mom1=0.5) init_param = th.function(inputs=[x_lab], outputs=None, updates=init_updates) train_batch_disc = th.function(inputs=[x_lab, labels, x_unl, lr], outputs=[loss_lab, loss_unl, train_err], updates=disc_param_updates + disc_avg_updates) train_batch_gen = th.function(inputs=[x_unl, lr], outputs=[loss_gen], updates=gen_param_updates) test_batch = th.function(inputs=[x_lab, labels], outputs=test_err, givens=disc_avg_givens) init_param(trainx[:500]) # data dependent initialization # //////////// perform training ////////////// lr = 0.003 for epoch in range(args.iter_limit): begin = time.time() # construct randomly permuted minibatches trainx = [] trainy = [] for t in range(trainx_unl.shape[0] / txs.shape[0]): inds = rng.permutation(txs.shape[0]) trainx.append(txs[inds]) trainy.append(tys[inds]) trainx = np.concatenate(trainx, axis=0) trainy = np.concatenate(trainy, axis=0) trainx_unl = trainx_unl[rng.permutation(trainx_unl.shape[0])] trainx_unl2 = trainx_unl2[rng.permutation(trainx_unl2.shape[0])] # train loss_lab = 0. loss_unl = 0. train_err = 0. for t in range(nr_batches_train): ll, lu, te = train_batch_disc( trainx[t * args.batch_size:(t + 1) * args.batch_size], trainy[t * args.batch_size:(t + 1) * args.batch_size], trainx_unl[t * args.batch_size:(t + 1) * args.batch_size], lr) loss_lab += ll loss_unl += lu train_err += te e = train_batch_gen( trainx_unl2[t * args.batch_size:(t + 1) * args.batch_size], lr) loss_lab /= nr_batches_train loss_unl /= nr_batches_train train_err /= nr_batches_train # test test_err = 0. for t in range(nr_batches_test): test_err += test_batch( testx[t * args.batch_size:(t + 1) * args.batch_size], testy[t * args.batch_size:(t + 1) * args.batch_size]) test_err /= nr_batches_test # report print( "Iteration %d, time = %ds, loss_lab = %.4f, loss_unl = %.4f, train err = %.4f, test err = %.4f" % (epoch, time.time() - begin, loss_lab, loss_unl, train_err, test_err)) sys.stdout.flush()
batch_size = 100 learning_rate = 0.0003 seed = 1 n_epochs = 200 save_model_as = 'triplet_extractor.npz' #setting = [4048, 4048, 1024] #setting = [2048, 1048, 100] setting = [4048, 4048, 2048] ''' '' if we use loss from https://arxiv.org/abs/1704.02227 'L2' if we use loss max(d_+ - d_- + \lambda, 0), where \lambda=10.0''' l_type = 'L2' layers = [LL.InputLayer(shape=(None, 2048))] layers.append(nn.GaussianNoiseLayer(layers[-1], sigma=0.3)) layers.append(nn.DenseLayer(layers[-1], num_units=setting[0])) layers.append(nn.GaussianNoiseLayer(layers[-1], sigma=0.5)) layers.append(nn.DenseLayer(layers[-1], num_units=setting[1])) layers.append(nn.GaussianNoiseLayer(layers[-1], sigma=0.5)) layers.append(nn.DenseLayer(layers[-1], num_units=setting[2])) trainx = get_data('cifar_train_x.npz') _, trainy = load(DATA_DIR, subset='train') print(trainx.shape) x_lab = T.matrix() output_lab = LL.get_output(layers[-1], x_lab, deterministic=False) def get_triplets(prediction, size):
nonlinearity=ln.softplus, name='gen-2'), name='gen-3')) gen_layers.append(MLPConcatLayer([gen_layers[-1], gen_in_y], num_classes, name='gen-4')) gen_layers.append(ll.batch_norm(ll.DenseLayer(gen_layers[-1], num_units=500, nonlinearity=ln.softplus, name='gen-5'), name='gen-6')) gen_layers.append(MLPConcatLayer([gen_layers[-1], gen_in_y], num_classes, name='gen-7')) gen_layers.append(nn.l2normalize(ll.DenseLayer(gen_layers[-1], num_units=28 ** 2, nonlinearity=gen_final_non, name='gen-8'))) dis_in_x = ll.InputLayer(shape=(None, 28 ** 2)) dis_in_y = ll.InputLayer(shape=(None,)) dis_layers = [dis_in_x] dis_layers.append(nn.GaussianNoiseLayer(dis_layers[-1], sigma=noise_D_data, name='dis-1')) dis_layers.append(MLPConcatLayer([dis_layers[-1], dis_in_y], num_classes, name='dis-2')) dis_layers.append(nn.DenseLayer(dis_layers[-1], num_units=1000, name='dis-3')) dis_layers.append(MLPConcatLayer([dis_layers[-1], dis_in_y], num_classes, name='dis-4')) dis_layers.append(nn.GaussianNoiseLayer(dis_layers[-1], sigma=noise_D, name='dis-5')) dis_layers.append(MLPConcatLayer([dis_layers[-1], dis_in_y], num_classes, name='dis-6')) dis_layers.append(nn.DenseLayer(dis_layers[-1], num_units=500, name='dis-7')) dis_layers.append(nn.GaussianNoiseLayer(dis_layers[-1], sigma=noise_D, name='dis-8')) dis_layers.append(MLPConcatLayer([dis_layers[-1], dis_in_y], num_classes, name='dis-9')) dis_layers.append(nn.DenseLayer(dis_layers[-1], num_units=250, name='dis-10')) dis_layers.append(nn.GaussianNoiseLayer(dis_layers[-1], sigma=noise_D, name='dis-11')) dis_layers.append(MLPConcatLayer([dis_layers[-1], dis_in_y], num_classes, name='dis-12')) dis_layers.append(nn.DenseLayer(dis_layers[-1], num_units=250, name='dis-13'))
theano_rng = MRG_RandomStreams(rng.randint(2 ** 15)) lasagne.random.set_rng(np.random.RandomState(rng.randint(2 ** 15))) data_rng = np.random.RandomState(args.seed_data) # specify generative model noise = theano_rng.uniform(size=(args.batch_size, 100)) gen_layers = [LL.InputLayer(shape=(args.batch_size, 100), input_var=noise)] gen_layers.append(nn.batch_norm(LL.DenseLayer(gen_layers[-1], num_units=500, nonlinearity=T.nnet.softplus), g=None)) gen_layers.append(nn.batch_norm(LL.DenseLayer(gen_layers[-1], num_units=500, nonlinearity=T.nnet.softplus), g=None)) gen_layers.append(nn.l2normalize(LL.DenseLayer(gen_layers[-1], num_units=28**2, nonlinearity=T.nnet.sigmoid))) gen_dat = LL.get_output(gen_layers[-1], deterministic=False) # specify supervised model layers = [LL.InputLayer(shape=(None, 28**2))] layers.append(nn.GaussianNoiseLayer(layers[-1], sigma=0.3)) layers.append(nn.DenseLayer(layers[-1], num_units=1000)) layers.append(nn.GaussianNoiseLayer(layers[-1], sigma=0.5)) layers.append(nn.DenseLayer(layers[-1], num_units=500)) layers.append(nn.GaussianNoiseLayer(layers[-1], sigma=0.5)) layers.append(nn.DenseLayer(layers[-1], num_units=250)) layers.append(nn.GaussianNoiseLayer(layers[-1], sigma=0.5)) layers.append(nn.DenseLayer(layers[-1], num_units=250)) layers.append(nn.GaussianNoiseLayer(layers[-1], sigma=0.5)) layers.append(nn.DenseLayer(layers[-1], num_units=250)) layers.append(nn.GaussianNoiseLayer(layers[-1], sigma=0.5)) layers.append(nn.DenseLayer(layers[-1], num_units=10, nonlinearity=None, train_scale=True)) # costs labels = T.ivector() x_lab = T.matrix() x_unl = T.matrix()
# Reading the data outputs. Check the 'extract_features.py' script for extracting the features & preparing the outputs of the dataset. data_outputs = numpy.load("outputs.npy") # The number of inputs (i.e. feature vector length) per sample num_inputs = data_inputs.shape[1] # Number of outputs per sample num_outputs = 4 HL1_neurons = 150 HL2_neurons = 60 # Building the network architecture. input_layer = nn.InputLayer(num_inputs) hidden_layer1 = nn.DenseLayer(num_neurons=HL1_neurons, previous_layer=input_layer, activation_function="relu") hidden_layer2 = nn.DenseLayer(num_neurons=HL2_neurons, previous_layer=hidden_layer1, activation_function="relu") output_layer = nn.DenseLayer(num_neurons=num_outputs, previous_layer=hidden_layer2, activation_function="sigmoid") # Training the network. nn.train_network(num_epochs=10, last_layer=output_layer, data_inputs=data_inputs, data_outputs=data_outputs, learning_rate=0.01)
dis_layers.append(ConvConcatLayer([dis_layers[-1], dis_in_y], num_classes, name='dis-20')) dis_layers.append(nn.batch_norm(dnn.Conv2DDNNLayer(dis_layers[-1], 64, filter_size=4, stride=(2,2), pad=1, W=Normal(0.02), nonlinearity=nn.lrelu, name='dis-02'), name='dis-03')) dis_layers.append(ll.DropoutLayer(dis_layers[-1], p=0.2, name='dis-23')) dis_layers.append(ConvConcatLayer([dis_layers[-1], dis_in_y], num_classes, name='dis-30')) dis_layers.append(nn.batch_norm(dnn.Conv2DDNNLayer(dis_layers[-1], 128, filter_size=4, stride=(2,2), pad=1, W=Normal(0.02), nonlinearity=nn.lrelu, name='dis-02'), name='dis-03')) dis_layers.append(ll.DropoutLayer(dis_layers[-1], p=0.2, name='dis-23')) dis_layers.append(ConvConcatLayer([dis_layers[-1], dis_in_y], num_classes, name='dis-40')) dis_layers.append(nn.batch_norm(dnn.Conv2DDNNLayer(dis_layers[-1], 256, filter_size=4, stride=(2,2), pad=1, W=Normal(0.02), nonlinearity=nn.lrelu, name='dis-02'), name='dis-03')) dis_layers.append(ll.ReshapeLayer(dis_layers[-1], (-1, 256*4*4), name='dis-03')) dis_layers.append(MLPConcatLayer([dis_layers[-1], dis_in_y], num_classes, name='dis-70')) dis1 = [nn.DenseLayer(dis_layers[-1], num_units=1, nonlinearity=ln.sigmoid, name='dis-19')] dis2 = [nn.DenseLayer(dis_layers[-1], num_units=1, nonlinearity=ln.sigmoid, name='dis-19')] dis3 = [nn.DenseLayer(dis_layers[-1], num_units=1, nonlinearity=ln.sigmoid, name='dis-19')] dis1_layers = dis_layers + dis1 dis2_layers = dis_layers + dis2 dis3_layers = dis_layers + dis3 ''' objectives ''' gen_out_x = ll.get_output(layer_or_layers=gen_layers[-1], inputs={gen_in_y: sym_y_g, gen_in_z: sym_z_rand}, deterministic=False) cla_out_y_l = ll.get_output(cla_layers[-1], sym_x_l, deterministic=False)
def main(num, seed, args): import time import numpy as np import theano as th import theano.tensor as T from theano.sandbox.rng_mrg import MRG_RandomStreams import lasagne import lasagne.layers as ll from lasagne.init import Normal from lasagne.layers import dnn import nn import sys from checkpoints import save_weights, load_weights # fixed random seeds rng = np.random.RandomState(seed) theano_rng = MRG_RandomStreams(rng.randint(2**15)) lasagne.random.set_rng(np.random.RandomState(rng.randint(2**15))) #logsoftmax for computing entropy def logsoftmax(x): xdev = x - T.max(x, 1, keepdims=True) lsm = xdev - T.log(T.sum(T.exp(xdev), 1, keepdims=True)) return lsm #load MNIST data data = np.load(args.data_root) trainx = np.concatenate([data['x_train'], data['x_valid']], axis=0).astype(th.config.floatX) trainy = np.concatenate([data['y_train'], data['y_valid']]).astype(np.int32) testx = data['x_test'].astype(th.config.floatX) testy = data['y_test'].astype(np.int32) rng_data = np.random.RandomState(args.seed_data) inds = rng_data.permutation(trainx.shape[0]) trainx = trainx[inds] trainy = trainy[inds] trainx_unl = trainx[trainy == num] inds = np.arange(len(testy))[np.random.permutation(len(testy))] testx = testx[inds] testy = testy[inds] print(len(trainx_unl)) # specify generator h = T.matrix() gen_layers = [ll.InputLayer(shape=(None, 100))] gen_layers.append( nn.batch_norm(ll.DenseLayer(gen_layers[-1], num_units=500, W=Normal(0.05), nonlinearity=T.nnet.softplus, name='g1'), g=None, name='g_b1')) gen_layers.append( nn.batch_norm(ll.DenseLayer(gen_layers[-1], num_units=500, W=Normal(0.05), nonlinearity=T.nnet.softplus, name='g2'), g=None, name='g_b2')) gen_layers.append( nn.l2normalize( ll.DenseLayer(gen_layers[-1], num_units=28**2, W=Normal(0.05), nonlinearity=T.nnet.sigmoid, name='g3'))) gen_dat = ll.get_output(gen_layers[-1], h, deterministic=False) # specify random field layers = [ll.InputLayer(shape=(None, 28**2))] layers.append( nn.DenseLayer(layers[-1], num_units=1000, theta=Normal(0.05), name='d_1')) layers.append( nn.DenseLayer(layers[-1], num_units=500, theta=Normal(0.05), name='d_2')) layers.append( nn.DenseLayer(layers[-1], num_units=250, theta=Normal(0.05), name='d_3')) layers.append( nn.DenseLayer(layers[-1], num_units=250, theta=Normal(0.05), name='d_4')) layers.append( nn.DenseLayer(layers[-1], num_units=250, theta=Normal(0.05), name='d_5')) layers.append( nn.DenseLayer(layers[-1], num_units=1, theta=Normal(0.05), nonlinearity=None, train_scale=True, name='d_6')) #revision method if args.revison_method == 'revision_x_sgld': #only x will be revised, SGLD x_revised = gen_dat gradient_coefficient = T.scalar() noise_coefficient = T.scalar() for i in range(args.L): loss_revision = T.sum( ll.get_output(layers[-1], x_revised, deterministic=False)) gradient_x = T.grad(loss_revision, [x_revised])[0] x_revised = x_revised + gradient_coefficient * gradient_x + noise_coefficient * theano_rng.normal( size=T.shape(x_revised)) revision = th.function( inputs=[h, gradient_coefficient, noise_coefficient], outputs=x_revised) elif args.revison_method == 'revision_x_sghmc': #only x will be revised, SGHMC x_revised = gen_dat + args.sig * theano_rng.normal( size=T.shape(gen_dat)) gradient_coefficient = T.scalar() beta = T.scalar() noise_coefficient = T.scalar() v_x = 0. for i in range(args.L): # x_revised=x_revised loss_revision = T.sum( ll.get_output(layers[-1], x_revised, deterministic=False)) gradient_x = T.grad(loss_revision, [x_revised])[0] v_x = beta * v_x + gradient_coefficient * gradient_x x_revised = x_revised + v_x + noise_coefficient * theano_rng.normal( size=T.shape(x_revised)) x_revised = T.clip(x_revised, 0., 1.) revision = th.function( inputs=[h, beta, gradient_coefficient, noise_coefficient], outputs=x_revised, on_unused_input='ignore') elif args.revison_method == 'revision_joint_sgld': #x and h will be revised jointly, SGLD x_revised = gen_dat h_revised = h gradient_coefficient = T.scalar() noise_coefficient = T.scalar() for i in range(args.L): loss_x_revision = T.sum( ll.get_output(layers[-1], x_revised, deterministic=False)) gradient_x = T.grad(loss_x_revision, [x_revised])[0] x_revised = x_revised + gradient_coefficient * gradient_x + noise_coefficient * theano_rng.normal( size=T.shape(x_revised)) if i == 0: loss_h_revision = T.sum(T.square(x_revised - gen_dat)) + T.sum( T.square(h)) / args.batch_size gradient_h = T.grad(loss_h_revision, [h])[0] h_revised = h - gradient_coefficient * gradient_h + noise_coefficient * theano_rng.normal( size=T.shape(h)) else: loss_h_revision = T.sum( T.square(x_revised - gen_dat_h_revised)) + T.sum( T.square(h_revised)) / args.batch_size gradient_h = T.grad(loss_h_revision, [h_revised])[0] h_revised = h_revised - gradient_coefficient * gradient_h + noise_coefficient * theano_rng.normal( size=T.shape(h)) gen_dat_h_revised = ll.get_output(gen_layers[-1], h_revised, deterministic=False) revision = th.function( inputs=[h, gradient_coefficient, noise_coefficient], outputs=[x_revised, h_revised]) elif args.revison_method == 'revision_joint_sghmc': #x and h will be revised jointly, SGHMC x_revised = gen_dat h_revised = h beta = T.scalar() gradient_coefficient = T.scalar() noise_coefficient = T.scalar() v_x = 0. for i in range(args.L): loss_x_revision = T.sum( ll.get_output(layers[-1], x_revised, deterministic=False)) gradient_x = T.grad(loss_x_revision, [x_revised])[0] v_x = v_x * beta + gradient_coefficient * gradient_x + noise_coefficient * theano_rng.normal( size=T.shape(x_revised)) x_revised = x_revised + v_x if i == 0: loss_h_revision = T.sum(T.square(x_revised - gen_dat)) + T.sum( T.square(h)) / args.batch_size gradient_h = T.grad(loss_h_revision, [h])[0] v_h = gradient_coefficient * gradient_h + noise_coefficient * theano_rng.normal( size=T.shape(h)) h_revised = h - v_h else: loss_h_revision = T.sum( T.square(x_revised - gen_dat_h_revised)) + T.sum( T.square(h_revised)) / args.batch_size gradient_h = T.grad(loss_h_revision, [h_revised])[0] v_h = v_h * beta + gradient_coefficient * gradient_h + noise_coefficient * theano_rng.normal( size=T.shape(h)) h_revised = h_revised - v_h gen_dat_h_revised = ll.get_output(gen_layers[-1], h_revised, deterministic=False) revision = th.function( inputs=[h, beta, gradient_coefficient, noise_coefficient], outputs=[x_revised, h_revised]) x_revised = T.matrix() x_unl = T.matrix() temp = ll.get_output(layers[-1], x_unl, deterministic=False, init=True) init_updates = [u for l in layers for u in getattr(l, 'init_updates', [])] output_before_softmax_unl = ll.get_output(layers[-1], x_unl, deterministic=False) output_before_softmax_revised = ll.get_output(layers[-1], x_revised, deterministic=False) u_unl = T.mean(output_before_softmax_unl) u_revised = T.mean(output_before_softmax_revised) #unsupervised loss loss_unl = u_revised - u_unl + T.mean(output_before_softmax_unl** 2) * args.fxp # Theano functions for training the random field lr = T.scalar() RF_params = ll.get_all_params(layers, trainable=True) RF_param_updates = lasagne.updates.rmsprop(loss_unl, RF_params, learning_rate=lr) # RF_param_updates = lasagne.updates.adam(loss_unl, RF_params, learning_rate=lr,beta1=0.5) train_RF = th.function(inputs=[x_revised, x_unl, lr], outputs=[loss_unl, u_unl], updates=RF_param_updates) #weight norm initalization init_param = th.function(inputs=[x_unl], outputs=None, updates=init_updates) #predition on test data output_before_softmax = ll.get_output(layers[-1], x_unl, deterministic=True) test_batch = th.function(inputs=[x_unl], outputs=output_before_softmax) #loss on generator loss_G = T.sum(T.square(x_revised - gen_dat)) # Theano functions for training the generator gen_params = ll.get_all_params(gen_layers, trainable=True) gen_param_updates = lasagne.updates.rmsprop(loss_G, gen_params, learning_rate=lr) # gen_param_updates = lasagne.updates.adam(loss_G, gen_params, learning_rate=lr,beta1=0.5) train_G = th.function(inputs=[h, x_revised, lr], outputs=None, updates=gen_param_updates) # select labeled data # //////////// perform training ////////////// lr_D = args.lrd lr_G = args.lrg beta = args.beta gradient_coefficient = args.gradient_coefficient noise_coefficient = args.noise_coefficient supervised_loss_weight = args.supervised_loss_weight entropy_loss_weight = 0. acc_all = [] best_acc = 0 nr_batches_train = len(trainx_unl) // args.batch_size nr_batches_test = int(np.ceil(len(testy) / float(args.batch_size))) for epoch in range(args.max_e): begin = time.time() # construct randomly permuted minibatches trainx_unl = trainx_unl[rng.permutation(trainx_unl.shape[0])] if epoch == 0: init_param(trainx[:500]) # data based initialization if args.load: load_weights('mnist_model/mnist_jrf_' + args.load + '.npy', layers + gen_layers) # train loss_lab = 0. loss_unl = 0. train_err = 0. f_unl_all = 0. for t in range(nr_batches_train): h = np.cast[th.config.floatX](rng.uniform(size=(args.batch_size, 100))) if args.revison_method == 'revision_x_sgld': x_revised = revision(h, gradient_coefficient, noise_coefficient) elif args.revison_method == 'revision_x_sghmc': x_revised = revision(h, beta, gradient_coefficient, noise_coefficient) elif args.revison_method == 'revision_joint_sgld': x_revised, h = revision(h, gradient_coefficient, noise_coefficient) elif args.revison_method == 'revision_joint_sghmc': x_revised, h = revision(h, beta, gradient_coefficient, noise_coefficient) ran_from = t * args.batch_size ran_to = (t + 1) * args.batch_size #updata random field lo_unl, f_unl = train_RF(x_revised, trainx_unl[ran_from:ran_to], lr_D) loss_unl += lo_unl f_unl_all += f_unl #updata generator train_G(h, x_revised, lr_G) loss_lab /= nr_batches_train loss_unl /= nr_batches_train train_err /= nr_batches_train f_unl_all /= nr_batches_train # test test_pred = np.zeros((len(testy), 1), dtype=th.config.floatX) for t in range(nr_batches_test): last_ind = np.minimum((t + 1) * args.batch_size, len(testy)) first_ind = last_ind - args.batch_size test_pred[first_ind:last_ind] = test_batch( testx[first_ind:last_ind]) test_pred = test_pred[:, 0] from sklearn.metrics import roc_auc_score test_err = roc_auc_score(testy == num, test_pred) acc_all.append(test_err) if acc_all[-1] > best_acc: best_acc = acc_all[-1] if (epoch + 1) % 10 == 0: print('best acc:', best_acc, test_err) f_test_all = np.mean(test_pred) print( "epoch %d, time = %ds, loss_unl = %.4f, f unl = %.4f, f test = %.4f " % (epoch + 1, time.time() - begin, loss_unl, f_unl_all, f_test_all)) sys.stdout.flush() if (epoch + 1) % 50 == 0: import os if not os.path.exists('mnist_model'): os.mkdir('mnist_model') params = ll.get_all_params(layers + gen_layers) save_weights( 'mnist_model/nrf_dec_ep%d_num%d_seed%d_%s.npy' % (epoch + 1, num, seed, args.sf), params) if loss_unl < -100: break return best_acc
def create_network(num_neurons_input, num_neurons_output, num_neurons_hidden_layers=[], output_activation="relu", hidden_activations="relu", parameters_validated=False): """ Creates a neural network as a linked list between the input, hidden, and output layers where the layer at index N (which is the last/output layer) references the layer at index N-1 (which is a hidden layer) using its previous_layer attribute. The input layer does not reference any layer because it is the last layer in the linked list. In addition to the parameters_validated parameter, this function accepts the same parameters passed to the constructor of the gann.GANN class except for the num_solutions parameter because only a single network is created out of the create_network() function. num_neurons_input: Number of neurons in the input layer. num_neurons_output: Number of neurons in the output layer. num_neurons_hidden_layers=[]: A list holding the number of neurons in the hidden layer(s). If empty [], then no hidden layers are used. For each int value it holds, then a hidden layer is created with number of hidden neurons specified by the corresponding int value. For example, num_neurons_hidden_layers=[10] creates a single hidden layer with 10 neurons. num_neurons_hidden_layers=[10, 5] creates 2 hidden layers with 10 neurons for the first and 5 neurons for the second hidden layer. output_activation="relu": The name of the activation function of the output layer which defaults to "relu". hidden_activations="relu": The name(s) of the activation function(s) of the hidden layer(s). It defaults to "relu". If passed as a string, this means the specified activation function will be used across all the hidden layers. If passed as a list, then it must has the same length as the length of the num_neurons_hidden_layers list. An exception is raised if there lengths are different. When hidden_activations is a list, a one-to-one mapping between the num_neurons_hidden_layers and hidden_activations lists occurs. parameters_validated=False: If False, then the parameters are not validated and a call to the validate_network_parameters() function is made. Returns the reference to the last layer in the network architecture which is the output layer. Based on such reference, all network layer can be fetched. """ # When parameters_validated is False, then the parameters are not yet validated and a call to validate_network_parameters() is required. if parameters_validated == False: # Validating the passed parameters before creating the network. hidden_activations = validate_network_parameters( num_neurons_input=num_neurons_input, num_neurons_output=num_neurons_output, num_neurons_hidden_layers=num_neurons_hidden_layers, output_activation=output_activation, hidden_activations=hidden_activations) # Creating the input layer as an instance of the nn.InputLayer class. input_layer = nn.InputLayer(num_neurons_input) if len(num_neurons_hidden_layers) > 0: # If there are hidden layers, then the first hidden layer is connected to the input layer. hidden_layer = nn.DenseLayer( num_neurons=num_neurons_hidden_layers.pop(0), previous_layer=input_layer, activation_function=hidden_activations.pop(0)) # For the other hidden layers, each hidden layer is connected to its preceding hidden layer. for hidden_layer_idx in range(len(num_neurons_hidden_layers)): hidden_layer = nn.DenseLayer( num_neurons=num_neurons_hidden_layers.pop(0), previous_layer=hidden_layer, activation_function=hidden_activations.pop(0)) # The last hidden layer is connected to the output layer. # The output layer is created as an instance of the nn.DenseLayer class. output_layer = nn.DenseLayer(num_neurons=num_neurons_output, previous_layer=hidden_layer, activation_function=output_activation) # If there are no hidden layers, then the output layer is connected directly to the input layer. elif len(num_neurons_hidden_layers) == 0: # The output layer is created as an instance of the nn.DenseLayer class. output_layer = nn.DenseLayer(num_neurons=num_neurons_output, previous_layer=input_layer, activation_function=output_activation) # Returning the reference to the last layer in the network architecture which is the output layer. Based on such reference, all network layer can be fetched. return output_layer