Esempio n. 1
0
def test_nn(X_train, y_train, X_test, y_test):
    model = nn.NeuralNet()
    model.add(nn.DenseLayer(512))
    model.add(nn.SigmoidLayer())
    model.add(nn.DropoutLayer(0.3))

    model.add(nn.DenseLayer(512))
    model.add(nn.SigmoidLayer())
    model.add(nn.DropoutLayer(0.3))

    model.add(nn.DenseLayer(10))
    model.add(nn.SoftmaxLayer())

    my_history = model.fit(X_train, y_train, num_epochs=20,\
                        learning_rate=0.01, batch_size=128,\
                        X_test=X_test, y_test=y_test)
    predictions = model.predict(X_test)
    predictions = np.argmax(predictions, axis=0)
    labels = np.argmax(y_test, axis=1)
    print "accuracy of my model: {}".format(sum(predictions == labels)*1.0/len(predictions))
Esempio n. 2
0
def gan_unlabelled_classif(trainx, trainy, testx, testy, lab_cnt, inp_size,
                           train_ex_cnt):
    trainy = trainy.astype(np.int32)
    testy = testy.astype(np.int32)
    trainx = trainx.reshape((-1, inp_size)).astype(th.config.floatX)
    testx = testx.reshape((-1, inp_size)).astype(th.config.floatX)
    assert train_ex_cnt == trainx.shape[0]

    # settings
    parser = argparse.ArgumentParser()
    parser.add_argument('--seed', type=int, default=1)
    parser.add_argument('--seed_data', type=int, default=1)
    parser.add_argument('--unlabeled_weight', type=float, default=1.)
    parser.add_argument('--batch_size', type=int, default=100)
    parser.add_argument('--count', type=int, default=10)
    parser.add_argument('--iter_limit', type=int, default=300)
    args = parser.parse_args()
    print(args)

    # fixed random seeds
    rng = np.random.RandomState(args.seed)
    theano_rng = MRG_RandomStreams(rng.randint(2**15))
    lasagne.random.set_rng(np.random.RandomState(rng.randint(2**15)))
    data_rng = np.random.RandomState(args.seed_data)

    # npshow(trainx.reshape((-1, 27, 32))[0])

    trainx_unl = trainx.copy()
    trainx_unl2 = trainx.copy()
    nr_batches_train = int(trainx.shape[0] / args.batch_size)
    nr_batches_test = int(testx.shape[0] / args.batch_size)

    # select labeled data
    inds = data_rng.permutation(trainx.shape[0])
    trainx = trainx[inds]
    trainy = trainy[inds]
    txs = []
    tys = []
    for _j in range(10):
        j = _j % lab_cnt
        txs.append(trainx[trainy == j][:args.count])
        tys.append(trainy[trainy == j][:args.count])
    txs = np.concatenate(txs, axis=0)
    tys = np.concatenate(tys, axis=0)

    # specify generative model
    noise = theano_rng.uniform(size=(args.batch_size, 100))
    gen_layers = [LL.InputLayer(shape=(args.batch_size, 100), input_var=noise)]
    gen_layers.append(
        nn.batch_norm(LL.DenseLayer(gen_layers[-1],
                                    num_units=500,
                                    nonlinearity=T.nnet.softplus),
                      g=None))
    gen_layers.append(
        nn.batch_norm(LL.DenseLayer(gen_layers[-1],
                                    num_units=500,
                                    nonlinearity=T.nnet.softplus),
                      g=None))
    gen_layers.append(
        nn.l2normalize(
            LL.DenseLayer(gen_layers[-1],
                          num_units=inp_size,
                          nonlinearity=T.nnet.sigmoid)))
    gen_dat = LL.get_output(gen_layers[-1], deterministic=False)

    # specify supervised model
    layers = [LL.InputLayer(shape=(None, inp_size))]
    layers.append(nn.GaussianNoiseLayer(layers[-1], sigma=0.3))
    layers.append(nn.DenseLayer(layers[-1], num_units=1000))
    layers.append(nn.GaussianNoiseLayer(layers[-1], sigma=0.5))
    layers.append(nn.DenseLayer(layers[-1], num_units=500))
    layers.append(nn.GaussianNoiseLayer(layers[-1], sigma=0.5))
    layers.append(nn.DenseLayer(layers[-1], num_units=250))
    layers.append(nn.GaussianNoiseLayer(layers[-1], sigma=0.5))
    layers.append(nn.DenseLayer(layers[-1], num_units=250))
    layers.append(nn.GaussianNoiseLayer(layers[-1], sigma=0.5))
    layers.append(nn.DenseLayer(layers[-1], num_units=250))
    layers.append(nn.GaussianNoiseLayer(layers[-1], sigma=0.5))
    layers.append(
        nn.DenseLayer(layers[-1],
                      num_units=lab_cnt,
                      nonlinearity=None,
                      train_scale=True))

    # costs
    labels = T.ivector()
    x_lab = T.matrix()
    x_unl = T.matrix()

    temp = LL.get_output(gen_layers[-1], init=True)
    temp = LL.get_output(layers[-1], x_lab, deterministic=False, init=True)
    init_updates = [
        u for l in gen_layers + layers for u in getattr(l, 'init_updates', [])
    ]

    output_before_softmax_lab = LL.get_output(layers[-1],
                                              x_lab,
                                              deterministic=False)
    output_before_softmax_unl = LL.get_output(layers[-1],
                                              x_unl,
                                              deterministic=False)
    output_before_softmax_fake = LL.get_output(layers[-1],
                                               gen_dat,
                                               deterministic=False)

    z_exp_lab = T.mean(nn.log_sum_exp(output_before_softmax_lab))
    z_exp_unl = T.mean(nn.log_sum_exp(output_before_softmax_unl))
    z_exp_fake = T.mean(nn.log_sum_exp(output_before_softmax_fake))
    l_lab = output_before_softmax_lab[T.arange(args.batch_size), labels]
    l_unl = nn.log_sum_exp(output_before_softmax_unl)
    loss_lab = -T.mean(l_lab) + T.mean(z_exp_lab)
    loss_unl = -0.5 * T.mean(l_unl) + 0.5 * T.mean(
        T.nnet.softplus(
            nn.log_sum_exp(output_before_softmax_unl))) + 0.5 * T.mean(
                T.nnet.softplus(nn.log_sum_exp(output_before_softmax_fake)))

    train_err = T.mean(
        T.neq(T.argmax(output_before_softmax_lab, axis=1), labels))

    mom_gen = T.mean(LL.get_output(layers[-3], gen_dat), axis=0)
    mom_real = T.mean(LL.get_output(layers[-3], x_unl), axis=0)
    loss_gen = T.mean(T.square(mom_gen - mom_real))

    # test error
    output_before_softmax = LL.get_output(layers[-1],
                                          x_lab,
                                          deterministic=True)
    test_err = T.mean(T.neq(T.argmax(output_before_softmax, axis=1), labels))

    # Theano functions for training and testing
    lr = T.scalar()
    disc_params = LL.get_all_params(layers, trainable=True)
    disc_param_updates = nn.adam_updates(disc_params,
                                         loss_lab +
                                         args.unlabeled_weight * loss_unl,
                                         lr=lr,
                                         mom1=0.5)
    disc_param_avg = [
        th.shared(np.cast[th.config.floatX](0. * p.get_value()))
        for p in disc_params
    ]
    disc_avg_updates = [(a, a + 0.0001 * (p - a))
                        for p, a in zip(disc_params, disc_param_avg)]
    disc_avg_givens = [(p, a) for p, a in zip(disc_params, disc_param_avg)]
    gen_params = LL.get_all_params(gen_layers[-1], trainable=True)
    gen_param_updates = nn.adam_updates(gen_params, loss_gen, lr=lr, mom1=0.5)
    init_param = th.function(inputs=[x_lab],
                             outputs=None,
                             updates=init_updates)
    train_batch_disc = th.function(inputs=[x_lab, labels, x_unl, lr],
                                   outputs=[loss_lab, loss_unl, train_err],
                                   updates=disc_param_updates +
                                   disc_avg_updates)
    train_batch_gen = th.function(inputs=[x_unl, lr],
                                  outputs=[loss_gen],
                                  updates=gen_param_updates)
    test_batch = th.function(inputs=[x_lab, labels],
                             outputs=test_err,
                             givens=disc_avg_givens)

    init_param(trainx[:500])  # data dependent initialization

    # //////////// perform training //////////////
    lr = 0.003
    for epoch in range(args.iter_limit):
        begin = time.time()

        # construct randomly permuted minibatches
        trainx = []
        trainy = []
        for t in range(trainx_unl.shape[0] / txs.shape[0]):
            inds = rng.permutation(txs.shape[0])
            trainx.append(txs[inds])
            trainy.append(tys[inds])
        trainx = np.concatenate(trainx, axis=0)
        trainy = np.concatenate(trainy, axis=0)
        trainx_unl = trainx_unl[rng.permutation(trainx_unl.shape[0])]
        trainx_unl2 = trainx_unl2[rng.permutation(trainx_unl2.shape[0])]

        # train
        loss_lab = 0.
        loss_unl = 0.
        train_err = 0.
        for t in range(nr_batches_train):
            ll, lu, te = train_batch_disc(
                trainx[t * args.batch_size:(t + 1) * args.batch_size],
                trainy[t * args.batch_size:(t + 1) * args.batch_size],
                trainx_unl[t * args.batch_size:(t + 1) * args.batch_size], lr)
            loss_lab += ll
            loss_unl += lu
            train_err += te
            e = train_batch_gen(
                trainx_unl2[t * args.batch_size:(t + 1) * args.batch_size], lr)
        loss_lab /= nr_batches_train
        loss_unl /= nr_batches_train
        train_err /= nr_batches_train

        # test
        test_err = 0.
        for t in range(nr_batches_test):
            test_err += test_batch(
                testx[t * args.batch_size:(t + 1) * args.batch_size],
                testy[t * args.batch_size:(t + 1) * args.batch_size])
        test_err /= nr_batches_test

        # report
        print(
            "Iteration %d, time = %ds, loss_lab = %.4f, loss_unl = %.4f, train err = %.4f, test err = %.4f"
            % (epoch, time.time() - begin, loss_lab, loss_unl, train_err,
               test_err))
        sys.stdout.flush()
Esempio n. 3
0
batch_size = 100
learning_rate = 0.0003
seed = 1
n_epochs = 200

save_model_as = 'triplet_extractor.npz'
#setting = [4048, 4048, 1024]
#setting = [2048, 1048, 100]
setting = [4048, 4048, 2048]
''' '' if we use loss from https://arxiv.org/abs/1704.02227
'L2' if we use loss max(d_+ - d_- + \lambda, 0), where \lambda=10.0'''
l_type = 'L2'

layers = [LL.InputLayer(shape=(None, 2048))]
layers.append(nn.GaussianNoiseLayer(layers[-1], sigma=0.3))
layers.append(nn.DenseLayer(layers[-1], num_units=setting[0]))
layers.append(nn.GaussianNoiseLayer(layers[-1], sigma=0.5))
layers.append(nn.DenseLayer(layers[-1], num_units=setting[1]))
layers.append(nn.GaussianNoiseLayer(layers[-1], sigma=0.5))
layers.append(nn.DenseLayer(layers[-1], num_units=setting[2]))

trainx = get_data('cifar_train_x.npz')
_, trainy = load(DATA_DIR, subset='train')

print(trainx.shape)

x_lab = T.matrix()
output_lab = LL.get_output(layers[-1], x_lab, deterministic=False)


def get_triplets(prediction, size):
Esempio n. 4
0
                                              nonlinearity=ln.softplus, name='gen-2'), name='gen-3'))
gen_layers.append(MLPConcatLayer([gen_layers[-1], gen_in_y], num_classes, name='gen-4'))

gen_layers.append(ll.batch_norm(ll.DenseLayer(gen_layers[-1], num_units=500,
                                              nonlinearity=ln.softplus, name='gen-5'), name='gen-6'))
gen_layers.append(MLPConcatLayer([gen_layers[-1], gen_in_y], num_classes, name='gen-7'))
gen_layers.append(nn.l2normalize(ll.DenseLayer(gen_layers[-1], num_units=28 ** 2,
                                               nonlinearity=gen_final_non, name='gen-8')))

dis_in_x = ll.InputLayer(shape=(None, 28 ** 2))
dis_in_y = ll.InputLayer(shape=(None,))
dis_layers = [dis_in_x]

dis_layers.append(nn.GaussianNoiseLayer(dis_layers[-1], sigma=noise_D_data, name='dis-1'))
dis_layers.append(MLPConcatLayer([dis_layers[-1], dis_in_y], num_classes, name='dis-2'))
dis_layers.append(nn.DenseLayer(dis_layers[-1], num_units=1000, name='dis-3'))

dis_layers.append(MLPConcatLayer([dis_layers[-1], dis_in_y], num_classes, name='dis-4'))

dis_layers.append(nn.GaussianNoiseLayer(dis_layers[-1], sigma=noise_D, name='dis-5'))
dis_layers.append(MLPConcatLayer([dis_layers[-1], dis_in_y], num_classes, name='dis-6'))
dis_layers.append(nn.DenseLayer(dis_layers[-1], num_units=500, name='dis-7'))

dis_layers.append(nn.GaussianNoiseLayer(dis_layers[-1], sigma=noise_D, name='dis-8'))
dis_layers.append(MLPConcatLayer([dis_layers[-1], dis_in_y], num_classes, name='dis-9'))
dis_layers.append(nn.DenseLayer(dis_layers[-1], num_units=250, name='dis-10'))

dis_layers.append(nn.GaussianNoiseLayer(dis_layers[-1], sigma=noise_D, name='dis-11'))
dis_layers.append(MLPConcatLayer([dis_layers[-1], dis_in_y], num_classes, name='dis-12'))
dis_layers.append(nn.DenseLayer(dis_layers[-1], num_units=250, name='dis-13'))
Esempio n. 5
0
theano_rng = MRG_RandomStreams(rng.randint(2 ** 15))
lasagne.random.set_rng(np.random.RandomState(rng.randint(2 ** 15)))
data_rng = np.random.RandomState(args.seed_data)

# specify generative model
noise = theano_rng.uniform(size=(args.batch_size, 100))
gen_layers = [LL.InputLayer(shape=(args.batch_size, 100), input_var=noise)]
gen_layers.append(nn.batch_norm(LL.DenseLayer(gen_layers[-1], num_units=500, nonlinearity=T.nnet.softplus), g=None))
gen_layers.append(nn.batch_norm(LL.DenseLayer(gen_layers[-1], num_units=500, nonlinearity=T.nnet.softplus), g=None))
gen_layers.append(nn.l2normalize(LL.DenseLayer(gen_layers[-1], num_units=28**2, nonlinearity=T.nnet.sigmoid)))
gen_dat = LL.get_output(gen_layers[-1], deterministic=False)

# specify supervised model
layers = [LL.InputLayer(shape=(None, 28**2))]
layers.append(nn.GaussianNoiseLayer(layers[-1], sigma=0.3))
layers.append(nn.DenseLayer(layers[-1], num_units=1000))
layers.append(nn.GaussianNoiseLayer(layers[-1], sigma=0.5))
layers.append(nn.DenseLayer(layers[-1], num_units=500))
layers.append(nn.GaussianNoiseLayer(layers[-1], sigma=0.5))
layers.append(nn.DenseLayer(layers[-1], num_units=250))
layers.append(nn.GaussianNoiseLayer(layers[-1], sigma=0.5))
layers.append(nn.DenseLayer(layers[-1], num_units=250))
layers.append(nn.GaussianNoiseLayer(layers[-1], sigma=0.5))
layers.append(nn.DenseLayer(layers[-1], num_units=250))
layers.append(nn.GaussianNoiseLayer(layers[-1], sigma=0.5))
layers.append(nn.DenseLayer(layers[-1], num_units=10, nonlinearity=None, train_scale=True))

# costs
labels = T.ivector()
x_lab = T.matrix()
x_unl = T.matrix()
Esempio n. 6
0
# Reading the data outputs. Check the 'extract_features.py' script for extracting the features & preparing the outputs of the dataset.
data_outputs = numpy.load("outputs.npy")

# The number of inputs (i.e. feature vector length) per sample
num_inputs = data_inputs.shape[1]
# Number of outputs per sample
num_outputs = 4

HL1_neurons = 150
HL2_neurons = 60

# Building the network architecture.
input_layer = nn.InputLayer(num_inputs)
hidden_layer1 = nn.DenseLayer(num_neurons=HL1_neurons,
                              previous_layer=input_layer,
                              activation_function="relu")
hidden_layer2 = nn.DenseLayer(num_neurons=HL2_neurons,
                              previous_layer=hidden_layer1,
                              activation_function="relu")
output_layer = nn.DenseLayer(num_neurons=num_outputs,
                             previous_layer=hidden_layer2,
                             activation_function="sigmoid")

# Training the network.
nn.train_network(num_epochs=10,
                 last_layer=output_layer,
                 data_inputs=data_inputs,
                 data_outputs=data_outputs,
                 learning_rate=0.01)
Esempio n. 7
0
dis_layers.append(ConvConcatLayer([dis_layers[-1], dis_in_y], num_classes, name='dis-20'))
dis_layers.append(nn.batch_norm(dnn.Conv2DDNNLayer(dis_layers[-1], 64, filter_size=4, stride=(2,2), pad=1, W=Normal(0.02), nonlinearity=nn.lrelu, name='dis-02'), name='dis-03'))

dis_layers.append(ll.DropoutLayer(dis_layers[-1], p=0.2, name='dis-23'))
dis_layers.append(ConvConcatLayer([dis_layers[-1], dis_in_y], num_classes, name='dis-30'))
dis_layers.append(nn.batch_norm(dnn.Conv2DDNNLayer(dis_layers[-1], 128, filter_size=4, stride=(2,2), pad=1, W=Normal(0.02), nonlinearity=nn.lrelu, name='dis-02'), name='dis-03'))

dis_layers.append(ll.DropoutLayer(dis_layers[-1], p=0.2, name='dis-23'))
dis_layers.append(ConvConcatLayer([dis_layers[-1], dis_in_y], num_classes, name='dis-40'))
dis_layers.append(nn.batch_norm(dnn.Conv2DDNNLayer(dis_layers[-1], 256, filter_size=4, stride=(2,2), pad=1, W=Normal(0.02), nonlinearity=nn.lrelu, name='dis-02'), name='dis-03'))

dis_layers.append(ll.ReshapeLayer(dis_layers[-1], (-1, 256*4*4), name='dis-03'))

dis_layers.append(MLPConcatLayer([dis_layers[-1], dis_in_y], num_classes, name='dis-70'))

dis1 = [nn.DenseLayer(dis_layers[-1], num_units=1, nonlinearity=ln.sigmoid, name='dis-19')]
dis2 = [nn.DenseLayer(dis_layers[-1], num_units=1, nonlinearity=ln.sigmoid, name='dis-19')]
dis3 = [nn.DenseLayer(dis_layers[-1], num_units=1, nonlinearity=ln.sigmoid, name='dis-19')]

dis1_layers = dis_layers + dis1
dis2_layers = dis_layers + dis2
dis3_layers = dis_layers + dis3


'''
objectives
'''
gen_out_x = ll.get_output(layer_or_layers=gen_layers[-1], inputs={gen_in_y: sym_y_g, gen_in_z: sym_z_rand},
                          deterministic=False)

cla_out_y_l = ll.get_output(cla_layers[-1], sym_x_l, deterministic=False)
Esempio n. 8
0
def main(num, seed, args):
    import time
    import numpy as np
    import theano as th
    import theano.tensor as T
    from theano.sandbox.rng_mrg import MRG_RandomStreams
    import lasagne
    import lasagne.layers as ll
    from lasagne.init import Normal
    from lasagne.layers import dnn
    import nn
    import sys
    from checkpoints import save_weights, load_weights

    # fixed random seeds
    rng = np.random.RandomState(seed)
    theano_rng = MRG_RandomStreams(rng.randint(2**15))
    lasagne.random.set_rng(np.random.RandomState(rng.randint(2**15)))

    #logsoftmax for computing entropy
    def logsoftmax(x):
        xdev = x - T.max(x, 1, keepdims=True)
        lsm = xdev - T.log(T.sum(T.exp(xdev), 1, keepdims=True))
        return lsm

    #load MNIST data
    data = np.load(args.data_root)
    trainx = np.concatenate([data['x_train'], data['x_valid']],
                            axis=0).astype(th.config.floatX)
    trainy = np.concatenate([data['y_train'],
                             data['y_valid']]).astype(np.int32)
    testx = data['x_test'].astype(th.config.floatX)
    testy = data['y_test'].astype(np.int32)
    rng_data = np.random.RandomState(args.seed_data)
    inds = rng_data.permutation(trainx.shape[0])
    trainx = trainx[inds]
    trainy = trainy[inds]

    trainx_unl = trainx[trainy == num]

    inds = np.arange(len(testy))[np.random.permutation(len(testy))]
    testx = testx[inds]
    testy = testy[inds]

    print(len(trainx_unl))

    # specify generator
    h = T.matrix()
    gen_layers = [ll.InputLayer(shape=(None, 100))]
    gen_layers.append(
        nn.batch_norm(ll.DenseLayer(gen_layers[-1],
                                    num_units=500,
                                    W=Normal(0.05),
                                    nonlinearity=T.nnet.softplus,
                                    name='g1'),
                      g=None,
                      name='g_b1'))
    gen_layers.append(
        nn.batch_norm(ll.DenseLayer(gen_layers[-1],
                                    num_units=500,
                                    W=Normal(0.05),
                                    nonlinearity=T.nnet.softplus,
                                    name='g2'),
                      g=None,
                      name='g_b2'))
    gen_layers.append(
        nn.l2normalize(
            ll.DenseLayer(gen_layers[-1],
                          num_units=28**2,
                          W=Normal(0.05),
                          nonlinearity=T.nnet.sigmoid,
                          name='g3')))
    gen_dat = ll.get_output(gen_layers[-1], h, deterministic=False)

    # specify random field
    layers = [ll.InputLayer(shape=(None, 28**2))]
    layers.append(
        nn.DenseLayer(layers[-1],
                      num_units=1000,
                      theta=Normal(0.05),
                      name='d_1'))
    layers.append(
        nn.DenseLayer(layers[-1],
                      num_units=500,
                      theta=Normal(0.05),
                      name='d_2'))
    layers.append(
        nn.DenseLayer(layers[-1],
                      num_units=250,
                      theta=Normal(0.05),
                      name='d_3'))
    layers.append(
        nn.DenseLayer(layers[-1],
                      num_units=250,
                      theta=Normal(0.05),
                      name='d_4'))
    layers.append(
        nn.DenseLayer(layers[-1],
                      num_units=250,
                      theta=Normal(0.05),
                      name='d_5'))
    layers.append(
        nn.DenseLayer(layers[-1],
                      num_units=1,
                      theta=Normal(0.05),
                      nonlinearity=None,
                      train_scale=True,
                      name='d_6'))

    #revision method
    if args.revison_method == 'revision_x_sgld':  #only x will be revised, SGLD
        x_revised = gen_dat
        gradient_coefficient = T.scalar()
        noise_coefficient = T.scalar()
        for i in range(args.L):
            loss_revision = T.sum(
                ll.get_output(layers[-1], x_revised, deterministic=False))
            gradient_x = T.grad(loss_revision, [x_revised])[0]
            x_revised = x_revised + gradient_coefficient * gradient_x + noise_coefficient * theano_rng.normal(
                size=T.shape(x_revised))

        revision = th.function(
            inputs=[h, gradient_coefficient, noise_coefficient],
            outputs=x_revised)

    elif args.revison_method == 'revision_x_sghmc':  #only x will be revised, SGHMC
        x_revised = gen_dat + args.sig * theano_rng.normal(
            size=T.shape(gen_dat))
        gradient_coefficient = T.scalar()
        beta = T.scalar()
        noise_coefficient = T.scalar()
        v_x = 0.
        for i in range(args.L):
            # x_revised=x_revised
            loss_revision = T.sum(
                ll.get_output(layers[-1], x_revised, deterministic=False))
            gradient_x = T.grad(loss_revision, [x_revised])[0]
            v_x = beta * v_x + gradient_coefficient * gradient_x
            x_revised = x_revised + v_x + noise_coefficient * theano_rng.normal(
                size=T.shape(x_revised))
            x_revised = T.clip(x_revised, 0., 1.)

        revision = th.function(
            inputs=[h, beta, gradient_coefficient, noise_coefficient],
            outputs=x_revised,
            on_unused_input='ignore')
    elif args.revison_method == 'revision_joint_sgld':  #x and h will be revised jointly, SGLD
        x_revised = gen_dat
        h_revised = h
        gradient_coefficient = T.scalar()
        noise_coefficient = T.scalar()
        for i in range(args.L):

            loss_x_revision = T.sum(
                ll.get_output(layers[-1], x_revised, deterministic=False))
            gradient_x = T.grad(loss_x_revision, [x_revised])[0]
            x_revised = x_revised + gradient_coefficient * gradient_x + noise_coefficient * theano_rng.normal(
                size=T.shape(x_revised))
            if i == 0:
                loss_h_revision = T.sum(T.square(x_revised - gen_dat)) + T.sum(
                    T.square(h)) / args.batch_size
                gradient_h = T.grad(loss_h_revision, [h])[0]
                h_revised = h - gradient_coefficient * gradient_h + noise_coefficient * theano_rng.normal(
                    size=T.shape(h))
            else:
                loss_h_revision = T.sum(
                    T.square(x_revised - gen_dat_h_revised)) + T.sum(
                        T.square(h_revised)) / args.batch_size
                gradient_h = T.grad(loss_h_revision, [h_revised])[0]
                h_revised = h_revised - gradient_coefficient * gradient_h + noise_coefficient * theano_rng.normal(
                    size=T.shape(h))
            gen_dat_h_revised = ll.get_output(gen_layers[-1],
                                              h_revised,
                                              deterministic=False)

        revision = th.function(
            inputs=[h, gradient_coefficient, noise_coefficient],
            outputs=[x_revised, h_revised])
    elif args.revison_method == 'revision_joint_sghmc':  #x and h will be revised jointly, SGHMC
        x_revised = gen_dat
        h_revised = h
        beta = T.scalar()
        gradient_coefficient = T.scalar()
        noise_coefficient = T.scalar()
        v_x = 0.
        for i in range(args.L):

            loss_x_revision = T.sum(
                ll.get_output(layers[-1], x_revised, deterministic=False))
            gradient_x = T.grad(loss_x_revision, [x_revised])[0]
            v_x = v_x * beta + gradient_coefficient * gradient_x + noise_coefficient * theano_rng.normal(
                size=T.shape(x_revised))
            x_revised = x_revised + v_x

            if i == 0:
                loss_h_revision = T.sum(T.square(x_revised - gen_dat)) + T.sum(
                    T.square(h)) / args.batch_size
                gradient_h = T.grad(loss_h_revision, [h])[0]
                v_h = gradient_coefficient * gradient_h + noise_coefficient * theano_rng.normal(
                    size=T.shape(h))
                h_revised = h - v_h

            else:
                loss_h_revision = T.sum(
                    T.square(x_revised - gen_dat_h_revised)) + T.sum(
                        T.square(h_revised)) / args.batch_size
                gradient_h = T.grad(loss_h_revision, [h_revised])[0]
                v_h = v_h * beta + gradient_coefficient * gradient_h + noise_coefficient * theano_rng.normal(
                    size=T.shape(h))
                h_revised = h_revised - v_h
                gen_dat_h_revised = ll.get_output(gen_layers[-1],
                                                  h_revised,
                                                  deterministic=False)

        revision = th.function(
            inputs=[h, beta, gradient_coefficient, noise_coefficient],
            outputs=[x_revised, h_revised])

    x_revised = T.matrix()

    x_unl = T.matrix()
    temp = ll.get_output(layers[-1], x_unl, deterministic=False, init=True)
    init_updates = [u for l in layers for u in getattr(l, 'init_updates', [])]

    output_before_softmax_unl = ll.get_output(layers[-1],
                                              x_unl,
                                              deterministic=False)
    output_before_softmax_revised = ll.get_output(layers[-1],
                                                  x_revised,
                                                  deterministic=False)

    u_unl = T.mean(output_before_softmax_unl)
    u_revised = T.mean(output_before_softmax_revised)

    #unsupervised loss
    loss_unl = u_revised - u_unl + T.mean(output_before_softmax_unl**
                                          2) * args.fxp

    # Theano functions for training the random field
    lr = T.scalar()
    RF_params = ll.get_all_params(layers, trainable=True)
    RF_param_updates = lasagne.updates.rmsprop(loss_unl,
                                               RF_params,
                                               learning_rate=lr)
    # RF_param_updates = lasagne.updates.adam(loss_unl, RF_params, learning_rate=lr,beta1=0.5)
    train_RF = th.function(inputs=[x_revised, x_unl, lr],
                           outputs=[loss_unl, u_unl],
                           updates=RF_param_updates)

    #weight norm initalization
    init_param = th.function(inputs=[x_unl],
                             outputs=None,
                             updates=init_updates)
    #predition on test data
    output_before_softmax = ll.get_output(layers[-1],
                                          x_unl,
                                          deterministic=True)
    test_batch = th.function(inputs=[x_unl], outputs=output_before_softmax)

    #loss on generator
    loss_G = T.sum(T.square(x_revised - gen_dat))
    # Theano functions for training the generator
    gen_params = ll.get_all_params(gen_layers, trainable=True)
    gen_param_updates = lasagne.updates.rmsprop(loss_G,
                                                gen_params,
                                                learning_rate=lr)
    # gen_param_updates = lasagne.updates.adam(loss_G, gen_params, learning_rate=lr,beta1=0.5)
    train_G = th.function(inputs=[h, x_revised, lr],
                          outputs=None,
                          updates=gen_param_updates)

    # select labeled data

    # //////////// perform training //////////////
    lr_D = args.lrd
    lr_G = args.lrg
    beta = args.beta
    gradient_coefficient = args.gradient_coefficient
    noise_coefficient = args.noise_coefficient
    supervised_loss_weight = args.supervised_loss_weight
    entropy_loss_weight = 0.
    acc_all = []
    best_acc = 0
    nr_batches_train = len(trainx_unl) // args.batch_size
    nr_batches_test = int(np.ceil(len(testy) / float(args.batch_size)))
    for epoch in range(args.max_e):
        begin = time.time()
        # construct randomly permuted minibatches
        trainx_unl = trainx_unl[rng.permutation(trainx_unl.shape[0])]

        if epoch == 0:
            init_param(trainx[:500])  # data based initialization
            if args.load:
                load_weights('mnist_model/mnist_jrf_' + args.load + '.npy',
                             layers + gen_layers)
        # train
        loss_lab = 0.
        loss_unl = 0.
        train_err = 0.
        f_unl_all = 0.
        for t in range(nr_batches_train):
            h = np.cast[th.config.floatX](rng.uniform(size=(args.batch_size,
                                                            100)))
            if args.revison_method == 'revision_x_sgld':
                x_revised = revision(h, gradient_coefficient,
                                     noise_coefficient)
            elif args.revison_method == 'revision_x_sghmc':
                x_revised = revision(h, beta, gradient_coefficient,
                                     noise_coefficient)
            elif args.revison_method == 'revision_joint_sgld':
                x_revised, h = revision(h, gradient_coefficient,
                                        noise_coefficient)
            elif args.revison_method == 'revision_joint_sghmc':
                x_revised, h = revision(h, beta, gradient_coefficient,
                                        noise_coefficient)
            ran_from = t * args.batch_size
            ran_to = (t + 1) * args.batch_size
            #updata random field
            lo_unl, f_unl = train_RF(x_revised, trainx_unl[ran_from:ran_to],
                                     lr_D)

            loss_unl += lo_unl
            f_unl_all += f_unl
            #updata generator
            train_G(h, x_revised, lr_G)
        loss_lab /= nr_batches_train
        loss_unl /= nr_batches_train
        train_err /= nr_batches_train
        f_unl_all /= nr_batches_train
        # test

        test_pred = np.zeros((len(testy), 1), dtype=th.config.floatX)

        for t in range(nr_batches_test):
            last_ind = np.minimum((t + 1) * args.batch_size, len(testy))
            first_ind = last_ind - args.batch_size
            test_pred[first_ind:last_ind] = test_batch(
                testx[first_ind:last_ind])
        test_pred = test_pred[:, 0]

        from sklearn.metrics import roc_auc_score

        test_err = roc_auc_score(testy == num, test_pred)

        acc_all.append(test_err)

        if acc_all[-1] > best_acc:
            best_acc = acc_all[-1]
        if (epoch + 1) % 10 == 0:
            print('best acc:', best_acc, test_err)
            f_test_all = np.mean(test_pred)
            print(
                "epoch %d, time = %ds, loss_unl = %.4f, f unl = %.4f, f test = %.4f "
                % (epoch + 1, time.time() - begin, loss_unl, f_unl_all,
                   f_test_all))
            sys.stdout.flush()

        if (epoch + 1) % 50 == 0:
            import os
            if not os.path.exists('mnist_model'):
                os.mkdir('mnist_model')
            params = ll.get_all_params(layers + gen_layers)
            save_weights(
                'mnist_model/nrf_dec_ep%d_num%d_seed%d_%s.npy' %
                (epoch + 1, num, seed, args.sf), params)
        if loss_unl < -100:
            break
    return best_acc
Esempio n. 9
0
def create_network(num_neurons_input,
                   num_neurons_output,
                   num_neurons_hidden_layers=[],
                   output_activation="relu",
                   hidden_activations="relu",
                   parameters_validated=False):
    """
    Creates a neural network as a linked list between the input, hidden, and output layers where the layer at index N (which is the last/output layer) references the layer at index N-1 (which is a hidden layer) using its previous_layer attribute. The input layer does not reference any layer because it is the last layer in the linked list.

    In addition to the parameters_validated parameter, this function accepts the same parameters passed to the constructor of the gann.GANN class except for the num_solutions parameter because only a single network is created out of the create_network() function.

    num_neurons_input: Number of neurons in the input layer.
    num_neurons_output: Number of neurons in the output layer.
    num_neurons_hidden_layers=[]: A list holding the number of neurons in the hidden layer(s). If empty [], then no hidden layers are used. For each int value it holds, then a hidden layer is created with number of hidden neurons specified by the corresponding int value. For example, num_neurons_hidden_layers=[10] creates a single hidden layer with 10 neurons. num_neurons_hidden_layers=[10, 5] creates 2 hidden layers with 10 neurons for the first and 5 neurons for the second hidden layer.
    output_activation="relu": The name of the activation function of the output layer which defaults to "relu".
    hidden_activations="relu": The name(s) of the activation function(s) of the hidden layer(s). It defaults to "relu". If passed as a string, this means the specified activation function will be used across all the hidden layers. If passed as a list, then it must has the same length as the length of the num_neurons_hidden_layers list. An exception is raised if there lengths are different. When hidden_activations is a list, a one-to-one mapping between the num_neurons_hidden_layers and hidden_activations lists occurs.
    parameters_validated=False: If False, then the parameters are not validated and a call to the validate_network_parameters() function is made.

    Returns the reference to the last layer in the network architecture which is the output layer. Based on such reference, all network layer can be fetched.    
    """

    # When parameters_validated is False, then the parameters are not yet validated and a call to validate_network_parameters() is required.
    if parameters_validated == False:
        # Validating the passed parameters before creating the network.
        hidden_activations = validate_network_parameters(
            num_neurons_input=num_neurons_input,
            num_neurons_output=num_neurons_output,
            num_neurons_hidden_layers=num_neurons_hidden_layers,
            output_activation=output_activation,
            hidden_activations=hidden_activations)

    # Creating the input layer as an instance of the nn.InputLayer class.
    input_layer = nn.InputLayer(num_neurons_input)

    if len(num_neurons_hidden_layers) > 0:
        # If there are hidden layers, then the first hidden layer is connected to the input layer.
        hidden_layer = nn.DenseLayer(
            num_neurons=num_neurons_hidden_layers.pop(0),
            previous_layer=input_layer,
            activation_function=hidden_activations.pop(0))
        # For the other hidden layers, each hidden layer is connected to its preceding hidden layer.
        for hidden_layer_idx in range(len(num_neurons_hidden_layers)):
            hidden_layer = nn.DenseLayer(
                num_neurons=num_neurons_hidden_layers.pop(0),
                previous_layer=hidden_layer,
                activation_function=hidden_activations.pop(0))

        # The last hidden layer is connected to the output layer.
        # The output layer is created as an instance of the nn.DenseLayer class.
        output_layer = nn.DenseLayer(num_neurons=num_neurons_output,
                                     previous_layer=hidden_layer,
                                     activation_function=output_activation)

    # If there are no hidden layers, then the output layer is connected directly to the input layer.
    elif len(num_neurons_hidden_layers) == 0:
        # The output layer is created as an instance of the nn.DenseLayer class.
        output_layer = nn.DenseLayer(num_neurons=num_neurons_output,
                                     previous_layer=input_layer,
                                     activation_function=output_activation)

    # Returning the reference to the last layer in the network architecture which is the output layer. Based on such reference, all network layer can be fetched.
    return output_layer