예제 #1
0
def genLfc(input, num_outputs, learning_parameters):
    # A function to generate the lfc network topology which matches the overlay for the Pynq board.
    # WARNING: If you change this file, it's likely the resultant weights will not fit on the Pynq overlay.
    if num_outputs < 1 or num_outputs > 64:
        error("num_outputs should be in the range of 1 to 64.")
    stochastic = False
    binary = True
    H = 1
    num_units = 1024
    n_hidden_layers = 3
    activation = binary_net.binary_tanh_unit
    W_LR_scale = learning_parameters.W_LR_scale
    epsilon = learning_parameters.epsilon
    alpha = learning_parameters.alpha
    dropout_in = learning_parameters.dropout_in
    dropout_hidden = learning_parameters.dropout_hidden

    mlp = lasagne.layers.InputLayer(shape=(None, 1, 28, 28), input_var=input)

    mlp = lasagne.layers.DropoutLayer(mlp, p=dropout_in)

    for k in range(n_hidden_layers):

        mlp = binary_net.DenseLayer(
            mlp,
            binary=binary,
            stochastic=stochastic,
            H=H,
            W_LR_scale=W_LR_scale,
            nonlinearity=lasagne.nonlinearities.identity,
            num_units=num_units)

        mlp = lasagne.layers.BatchNormLayer(mlp, epsilon=epsilon, alpha=alpha)

        mlp = lasagne.layers.NonlinearityLayer(mlp, nonlinearity=activation)

        mlp = lasagne.layers.DropoutLayer(mlp, p=dropout_hidden)

    mlp = binary_net.DenseLayer(mlp,
                                binary=binary,
                                stochastic=stochastic,
                                H=H,
                                W_LR_scale=W_LR_scale,
                                nonlinearity=lasagne.nonlinearities.identity,
                                num_units=num_outputs)

    mlp = lasagne.layers.BatchNormLayer(mlp, epsilon=epsilon, alpha=alpha)
    return mlp
예제 #2
0
def genCnv(input, num_outputs, learning_parameters):
    # A function to generate the cnv network topology which matches the overlay for the Pynq board.
    # WARNING: If you change this file, it's likely the resultant weights will not fit on the Pynq overlay.
    stochastic = False
    binary = True
    H = 1
    activation = binary_net.binary_tanh_unit
    W_LR_scale = learning_parameters.W_LR_scale
    epsilon = learning_parameters.epsilon
    alpha = learning_parameters.alpha

    cnn = lasagne.layers.InputLayer(shape=(None, 3, 32, 32), input_var=input)

    # 64C3-64C3-P2
    cnn = binary_net.Conv2DLayer(cnn,
                                 binary=binary,
                                 stochastic=stochastic,
                                 H=H,
                                 W_LR_scale=W_LR_scale,
                                 num_filters=64,
                                 filter_size=(3, 3),
                                 pad='valid',
                                 flip_filters=False,
                                 nonlinearity=lasagne.nonlinearities.identity)

    cnn = lasagne.layers.BatchNormLayer(cnn, epsilon=epsilon, alpha=alpha)

    cnn = lasagne.layers.NonlinearityLayer(cnn, nonlinearity=activation)

    cnn = binary_net.Conv2DLayer(cnn,
                                 binary=binary,
                                 stochastic=stochastic,
                                 H=H,
                                 W_LR_scale=W_LR_scale,
                                 num_filters=64,
                                 filter_size=(3, 3),
                                 pad='valid',
                                 flip_filters=False,
                                 nonlinearity=lasagne.nonlinearities.identity)

    cnn = lasagne.layers.MaxPool2DLayer(cnn, pool_size=(2, 2))

    cnn = lasagne.layers.BatchNormLayer(cnn, epsilon=epsilon, alpha=alpha)

    cnn = lasagne.layers.NonlinearityLayer(cnn, nonlinearity=activation)

    # 256C3-256C3-P2
    cnn = binary_net.Conv2DLayer(cnn,
                                 binary=binary,
                                 stochastic=stochastic,
                                 H=H,
                                 W_LR_scale=W_LR_scale,
                                 num_filters=128,
                                 filter_size=(3, 3),
                                 pad='valid',
                                 flip_filters=False,
                                 nonlinearity=lasagne.nonlinearities.identity)

    cnn = lasagne.layers.BatchNormLayer(cnn, epsilon=epsilon, alpha=alpha)

    cnn = lasagne.layers.NonlinearityLayer(cnn, nonlinearity=activation)

    cnn = binary_net.Conv2DLayer(cnn,
                                 binary=binary,
                                 stochastic=stochastic,
                                 H=H,
                                 W_LR_scale=W_LR_scale,
                                 num_filters=128,
                                 filter_size=(3, 3),
                                 pad='valid',
                                 flip_filters=False,
                                 nonlinearity=lasagne.nonlinearities.identity)

    cnn = lasagne.layers.MaxPool2DLayer(cnn, pool_size=(2, 2))

    cnn = lasagne.layers.BatchNormLayer(cnn, epsilon=epsilon, alpha=alpha)

    cnn = lasagne.layers.NonlinearityLayer(cnn, nonlinearity=activation)

    # 256C3-256C3
    cnn = binary_net.Conv2DLayer(cnn,
                                 binary=binary,
                                 stochastic=stochastic,
                                 H=H,
                                 W_LR_scale=W_LR_scale,
                                 num_filters=256,
                                 filter_size=(3, 3),
                                 pad='valid',
                                 flip_filters=False,
                                 nonlinearity=lasagne.nonlinearities.identity)

    cnn = lasagne.layers.BatchNormLayer(cnn, epsilon=epsilon, alpha=alpha)

    cnn = lasagne.layers.NonlinearityLayer(cnn, nonlinearity=activation)

    cnn = binary_net.Conv2DLayer(cnn,
                                 binary=binary,
                                 stochastic=stochastic,
                                 H=H,
                                 W_LR_scale=W_LR_scale,
                                 num_filters=256,
                                 filter_size=(3, 3),
                                 pad='valid',
                                 flip_filters=False,
                                 nonlinearity=lasagne.nonlinearities.identity)

    cnn = lasagne.layers.BatchNormLayer(cnn, epsilon=epsilon, alpha=alpha)

    cnn = lasagne.layers.NonlinearityLayer(cnn, nonlinearity=activation)

    # print(cnn.output_shape)

    # 1024FP-1024FP-10FP
    cnn = binary_net.DenseLayer(cnn,
                                binary=binary,
                                stochastic=stochastic,
                                H=H,
                                W_LR_scale=W_LR_scale,
                                nonlinearity=lasagne.nonlinearities.identity,
                                num_units=512)

    cnn = lasagne.layers.BatchNormLayer(cnn, epsilon=epsilon, alpha=alpha)

    cnn = lasagne.layers.NonlinearityLayer(cnn, nonlinearity=activation)

    cnn = binary_net.DenseLayer(cnn,
                                binary=binary,
                                stochastic=stochastic,
                                H=H,
                                W_LR_scale=W_LR_scale,
                                nonlinearity=lasagne.nonlinearities.identity,
                                num_units=512)

    cnn = lasagne.layers.BatchNormLayer(cnn, epsilon=epsilon, alpha=alpha)

    cnn = lasagne.layers.NonlinearityLayer(cnn, nonlinearity=activation)

    cnn = binary_net.DenseLayer(cnn,
                                binary=binary,
                                stochastic=stochastic,
                                H=H,
                                W_LR_scale=W_LR_scale,
                                nonlinearity=lasagne.nonlinearities.identity,
                                num_units=num_outputs)

    cnn = lasagne.layers.BatchNormLayer(cnn, epsilon=epsilon, alpha=alpha)
    return cnn
예제 #3
0
파일: vae.py 프로젝트: MinahilRaza/BNN-PYNQ
def genCnv(input, num_outputs, learning_parameters):
    # A function to generate the cnv network topology which matches the overlay for the Pynq board.
    # WARNING: If you change this file, it's likely the resultant weights will not fit on the Pynq overlay.
    stochastic = False
    binary = True
    H = 1
    activation = binary_net.binary_tanh_unit
    W_LR_scale = learning_parameters.W_LR_scale
    epsilon = learning_parameters.epsilon
    alpha = learning_parameters.alpha

    # Encoder
    cnn = lasagne.layers.InputLayer(shape=(None, 1, 28, 28), input_var=input)

    # 1st Layer
    cnn = binary_net.Conv2DLayer(cnn,
                                 binary=binary,
                                 stochastic=stochastic,
                                 H=H,
                                 W_LR_scale=W_LR_scale,
                                 num_filters=64,
                                 filter_size=(4, 4),
                                 pad='valid',
                                 stride=(2, 2),
                                 flip_filters=False,
                                 nonlinearity=lasagne.nonlinearities.identity)

    cnn = lasagne.layers.BatchNormLayer(cnn, epsilon=epsilon, alpha=alpha)

    cnn = lasagne.layers.NonlinearityLayer(cnn, nonlinearity=activation)

    cnn = lasagne.layers.DropoutLayer(cnn, p=0.2)

    print cnn.output_shape
    # 2nd Layer
    cnn = binary_net.Conv2DLayer(cnn,
                                 binary=binary,
                                 stochastic=stochastic,
                                 H=H,
                                 W_LR_scale=W_LR_scale,
                                 num_filters=64,
                                 filter_size=(4, 4),
                                 pad='valid',
                                 stride=(2, 2),
                                 flip_filters=False,
                                 nonlinearity=lasagne.nonlinearities.identity)

    cnn = lasagne.layers.BatchNormLayer(cnn, epsilon=epsilon, alpha=alpha)

    cnn = lasagne.layers.NonlinearityLayer(cnn, nonlinearity=activation)

    cnn = lasagne.layers.DropoutLayer(cnn, p=0.2)

    print cnn.output_shape
    # 3rd Layer
    cnn = binary_net.Conv2DLayer(cnn,
                                 binary=binary,
                                 stochastic=stochastic,
                                 H=H,
                                 W_LR_scale=W_LR_scale,
                                 num_filters=64,
                                 filter_size=(4, 4),
                                 pad='valid',
                                 stride=(1, 1),
                                 flip_filters=False,
                                 nonlinearity=lasagne.nonlinearities.identity)

    cnn = lasagne.layers.BatchNormLayer(cnn, epsilon=epsilon, alpha=alpha)

    cnn = lasagne.layers.NonlinearityLayer(cnn, nonlinearity=activation)

    cnn = lasagne.layers.DropoutLayer(cnn, p=0.2)

    print cnn.output_shape

    cnn = lasagne.layers.flatten(cnn)

    print cnn.output_shape

    # FC Layer
    cnn = binary_net.DenseLayer(cnn,
                                binary=binary,
                                stochastic=stochastic,
                                H=H,
                                W_LR_scale=W_LR_scale,
                                nonlinearity=lasagne.nonlinearities.identity,
                                num_units=256)

    cnn = lasagne.layers.BatchNormLayer(cnn, epsilon=epsilon, alpha=alpha)

    cnn = lasagne.layers.NonlinearityLayer(cnn, nonlinearity=activation)

    print cnn.output_shape

    # Deoceder
    cnn = lasagne.layers.ReshapeLayer(cnn, shape=(-1, 64, 2, 2))

    print cnn.output_shape

    # 1st Deconv Layer
    cnn = binary_net.Deconv2DLayer(
        cnn,
        binary=binary,
        stochastic=stochastic,
        H=H,
        W_LR_scale=W_LR_scale,
        num_filters=64,
        filter_size=(4, 4),
        crop='valid',
        stride=(2, 2),
        flip_filters=False,
        nonlinearity=lasagne.nonlinearities.identity)

    cnn = lasagne.layers.BatchNormLayer(cnn, epsilon=epsilon, alpha=alpha)

    cnn = lasagne.layers.NonlinearityLayer(cnn, nonlinearity=activation)

    print cnn.output_shape

    # 2nd Deconv Layer
    cnn = binary_net.Deconv2DLayer(
        cnn,
        binary=binary,
        stochastic=stochastic,
        H=H,
        W_LR_scale=W_LR_scale,
        num_filters=64,
        filter_size=(4, 4),
        crop='valid',
        stride=(2, 2),
        flip_filters=False,
        nonlinearity=lasagne.nonlinearities.identity)

    cnn = lasagne.layers.BatchNormLayer(cnn, epsilon=epsilon, alpha=alpha)

    cnn = lasagne.layers.NonlinearityLayer(cnn, nonlinearity=activation)

    print cnn.output_shape

    # 3rd Deconv Layer
    cnn = binary_net.Deconv2DLayer(
        cnn,
        binary=binary,
        stochastic=stochastic,
        H=H,
        W_LR_scale=W_LR_scale,
        num_filters=1,
        filter_size=(4, 4),
        crop='valid',
        stride=(2, 2),
        flip_filters=False,
        nonlinearity=lasagne.nonlinearities.identity)

    cnn = lasagne.layers.BatchNormLayer(cnn, epsilon=epsilon, alpha=alpha)

    cnn = lasagne.layers.NonlinearityLayer(cnn, nonlinearity=activation)

    print cnn.output_shape

    cnn = lasagne.layers.flatten(cnn)

    print cnn.output_shape

    # Last FC layer
    cnn = binary_net.DenseLayer(cnn,
                                binary=binary,
                                stochastic=stochastic,
                                H=H,
                                W_LR_scale=W_LR_scale,
                                nonlinearity=lasagne.nonlinearities.identity,
                                num_units=num_outputs)

    cnn = lasagne.layers.BatchNormLayer(cnn, epsilon=epsilon, alpha=alpha)

    print cnn.output_shape

    return cnn
예제 #4
0
def trial(N_HIDDEN_LAYERS, NUM_UNITS, OUTPUT_TYPE, MAIN_LOSS_TYPE, LAMBDA,
          FOLD, FINTUNE_SNAPSHOT, FINTUNE_SCALE):
    # BN parameters
    batch_size = 97
    print("batch_size = " + str(batch_size))
    # alpha is the exponential moving average factor
    # alpha = .15
    alpha = .1
    print("alpha = " + str(alpha))
    epsilon = 1e-4
    print("epsilon = " + str(epsilon))

    # MLP parameters
    #NUM_UNITS = 25
    print("NUM_UNITS = " + str(NUM_UNITS))
    #N_HIDDEN_LAYERS = 1
    print("N_HIDDEN_LAYERS = " + str(N_HIDDEN_LAYERS))

    # Training parameters
    num_epochs = 1000
    print("num_epochs = " + str(num_epochs))

    # Dropout parameters
    dropout_in = .2  # 0. means no dropout
    print("dropout_in = " + str(dropout_in))
    dropout_hidden = .5
    print("dropout_hidden = " + str(dropout_hidden))

    # BinaryOut
    activation = binary_net.binary_tanh_unit
    print("activation = binary_net.binary_tanh_unit")
    # activation = binary_net.binary_sigmoid_unit
    # print("activation = binary_net.binary_sigmoid_unit")

    # BinaryConnect
    binary = True
    print("binary = " + str(binary))
    stochastic = False
    print("stochastic = " + str(stochastic))
    # (-H,+H) are the two binary values
    # H = "Glorot"
    H = 1.
    print("H = " + str(H))
    # W_LR_scale = 1.
    W_LR_scale = "Glorot"  # "Glorot" means we are using the coefficients from Glorot's paper
    print("W_LR_scale = " + str(W_LR_scale))

    # Decaying LR
    #LR_start = .003
    LR_start = 0.000003
    print("LR_start = " + str(LR_start))
    #LR_fin = 0.0000003
    LR_fin = LR_start
    print("LR_fin = " + str(LR_fin))
    LR_decay = (LR_fin / LR_start)**(1. / num_epochs)
    print("LR_decay = " + str(LR_decay))
    # BTW, LR decay might good for the BN moving average...

    # replace the dataset
    print('Loading SFEW2 dataset...')
    [train_x, train_y, val_x, val_y] = SFEW2.load_train_val()
    print(train_x.shape)
    print(train_y.shape)
    print(val_x.shape)
    print(val_y.shape)
    print('last training minibatch size: ' +
          str(train_x.shape[0] - train_x.shape[0] / batch_size * batch_size) +
          ' / ' + str(batch_size))
    print(
        'last training minibatch size should not be too small (except 0). try decrease the batch_size, but not add more minibatches.'
    )
    print('minibatches size: ' + str(batch_size))
    print('suggested minibatches size: ' + str(
        math.ceil(
            float(train_x.shape[0]) /
            math.ceil(float(train_x.shape[0]) / 100))))

    print('Building the MLP...')
    # Prepare Theano variables for inputs and targets
    input = T.matrix('inputs')
    target = T.matrix('targets')
    LR = T.scalar('LR', dtype=theano.config.floatX)

    mlp = lasagne.layers.InputLayer(shape=(None, train_x.shape[1]),
                                    input_var=input)

    mlp = lasagne.layers.DropoutLayer(mlp, p=dropout_in)

    for k in range(N_HIDDEN_LAYERS):

        # pretrain-finetune
        if (k == 0):
            # fixed num_units
            mlp = binary_net.DenseLayer(
                mlp,
                binary=binary,
                stochastic=stochastic,
                H=H,
                W_LR_scale=W_LR_scale,
                nonlinearity=lasagne.nonlinearities.identity,
                num_units=1500)

            # scale down the LR of transfered dense layer
            print('scale down the LR of transfered dense layer from',
                  str(mlp.W_LR_scale))
            mlp.W_LR_scale *= np.float32(FINTUNE_SCALE)
            print('to', str(mlp.W_LR_scale))
        else:
            mlp = binary_net.DenseLayer(
                mlp,
                binary=binary,
                stochastic=stochastic,
                H=H,
                W_LR_scale=W_LR_scale,
                nonlinearity=lasagne.nonlinearities.identity,
                num_units=NUM_UNITS)

        mlp = lasagne.layers.BatchNormLayer(mlp, epsilon=epsilon, alpha=alpha)

        mlp = lasagne.layers.NonlinearityLayer(mlp, nonlinearity=activation)

        mlp = lasagne.layers.DropoutLayer(mlp, p=dropout_hidden)

        # pretrain-finetune
        # only restore the first layer group
        if (k == 0):
            if (FINTUNE_SNAPSHOT != 0):
                print('Load ./W-%d.npz' % FINTUNE_SNAPSHOT)
                with np.load('./W-%d.npz' % FINTUNE_SNAPSHOT) as f:
                    param_values = [
                        f['arr_%d' % i] for i in range(len(f.files))
                    ]
                param_values = param_values[0:6]
                lasagne.layers.set_all_param_values(mlp, param_values)

    mlp = binary_net.DenseLayer(mlp,
                                binary=binary,
                                stochastic=stochastic,
                                H=H,
                                W_LR_scale=W_LR_scale,
                                nonlinearity=lasagne.nonlinearities.identity,
                                num_units=7)

    mlp = lasagne.layers.BatchNormLayer(mlp, epsilon=epsilon, alpha=alpha)

    # network output BN or SGN
    if OUTPUT_TYPE == 'C':
        pass  #
    elif OUTPUT_TYPE == 'D':
        mlp = lasagne.layers.NonlinearityLayer(mlp, nonlinearity=activation)
    else:
        assert (False)

    # loss weight nodes
    SPARSITY = 0.9
    SPARSITY_MAP = (np.float32(train_x == -1)).mean(0)
    LOSS_WEIGHT_1 = 1. + input * (2. * SPARSITY - 1)
    LOSS_WEIGHT_1 /= 4 * SPARSITY * (1 - SPARSITY
                                     )  # fixed 1->-1:5 -1->1:5/9 weights
    LOSS_WEIGHT_2 = 1. + input * (2. * SPARSITY_MAP - 1)  #
    LOSS_WEIGHT_2 /= 4 * SPARSITY_MAP * (
        1 - SPARSITY_MAP)  # weights considering element's prior probability

    # train loss nodes
    train_output = lasagne.layers.get_output(mlp, deterministic=False)
    if MAIN_LOSS_TYPE == 'SH':
        train_loss = T.mean(T.sqr(T.maximum(0., 1. - target * train_output)))
    elif MAIN_LOSS_TYPE == 'W1SH':
        train_loss = T.mean(
            T.sqr(T.maximum(0., (1. - target * train_output))) * LOSS_WEIGHT_1)
    elif MAIN_LOSS_TYPE == 'W2SH':
        train_loss = T.mean(
            T.sqr(T.maximum(0., (1. - target * train_output))) * LOSS_WEIGHT_2)
    elif MAIN_LOSS_TYPE == 'H':
        train_loss = T.mean(T.maximum(0., 1. - target * train_output))
    elif MAIN_LOSS_TYPE == 'W1H':
        train_loss = T.mean(
            T.maximum(0., (1. - target * train_output)) * LOSS_WEIGHT_1)
    elif MAIN_LOSS_TYPE == 'W2H':
        train_loss = T.mean(
            T.maximum(0., (1. - target * train_output)) * LOSS_WEIGHT_2)
    else:
        assert (False)

    # + sparse penalty
    if LAMBDA > 0:
        train_pixel_wise_density = T.mean(T.reshape(
            (train_output + 1.) / 2.,
            [train_output.shape[0], train_output.shape[1] / 10, 10]),
                                          axis=2)
        train_penalty = LAMBDA * T.mean(
            T.sqr(train_pixel_wise_density - (1. - SPARSITY)))
    else:
        train_penalty = T.constant(0.)
    train_loss = train_loss + train_penalty

    # acc
    train_acc = T.mean(T.eq(T.argmax(train_output, axis=1),
                            T.argmax(target, axis=1)),
                       dtype=theano.config.floatX)

    # grad nodes
    if binary:
        # W updates
        W = lasagne.layers.get_all_params(mlp, binary=True)
        W_grads = binary_net.compute_grads(train_loss, mlp)
        updates = lasagne.updates.adam(loss_or_grads=W_grads,
                                       params=W,
                                       learning_rate=LR)
        updates = binary_net.clipping_scaling(updates, mlp)

        # other parameters updates
        params = lasagne.layers.get_all_params(mlp,
                                               trainable=True,
                                               binary=False)
        updates = OrderedDict(updates.items() + lasagne.updates.adam(
            loss_or_grads=train_loss, params=params, learning_rate=LR).items())

    else:
        params = lasagne.layers.get_all_params(mlp, trainable=True)
        updates = lasagne.updates.adam(loss_or_grads=train_loss,
                                       params=params,
                                       learning_rate=LR)

    # val loss nodes
    # must be created after grad nodes
    val_output = lasagne.layers.get_output(mlp, deterministic=True)
    if MAIN_LOSS_TYPE == 'SH':
        val_loss = T.mean(T.sqr(T.maximum(0., 1. - target * val_output)))
    elif MAIN_LOSS_TYPE == 'W1SH':
        val_loss = T.mean(
            T.sqr(T.maximum(0., (1. - target * val_output))) * LOSS_WEIGHT_1)
    elif MAIN_LOSS_TYPE == 'W2SH':
        val_loss = T.mean(
            T.sqr(T.maximum(0., (1. - target * val_output))) * LOSS_WEIGHT_2)
    elif MAIN_LOSS_TYPE == 'H':
        val_loss = T.mean(T.maximum(0., 1. - target * val_output))
    elif MAIN_LOSS_TYPE == 'W1H':
        val_loss = T.mean(
            T.maximum(0., (1. - target * val_output)) * LOSS_WEIGHT_1)
    elif MAIN_LOSS_TYPE == 'W2H':
        val_loss = T.mean(
            T.maximum(0., (1. - target * val_output)) * LOSS_WEIGHT_2)

    # + sparse penalty
    if LAMBDA > 0:
        val_pixel_wise_density = T.mean(T.reshape(
            (val_output + 1.) / 2.,
            [val_output.shape[0], val_output.shape[1] / 10, 10]),
                                        axis=2)
        val_penalty = LAMBDA * T.mean(
            T.sqr(val_pixel_wise_density - (1. - SPARSITY)))
    else:
        val_penalty = T.constant(0.)
    val_loss = val_loss + val_penalty

    # acc
    val_acc = T.mean(T.eq(T.argmax(val_output, axis=1), T.argmax(target,
                                                                 axis=1)),
                     dtype=theano.config.floatX)

    # Compile a function performing a training step on a mini-batch (by giving the updates dictionary)
    # and returning the corresponding training train_loss:
    train_fn = theano.function(
        [input, target, LR],
        [train_loss, train_penalty, train_acc, train_output],
        updates=updates)

    # Compile a second function computing the validation train_loss and accuracy:
    val_fn = theano.function([input, target],
                             [val_loss, val_penalty, val_acc, val_output])

    print('Training...')
    train_x = binary_net.MoveParameter(train_x)
    binary_net.train(train_fn, val_fn, batch_size, LR_start, LR_decay,
                     num_epochs, train_x, train_y, val_x, val_y)
예제 #5
0
def run(binary=False, noise=None, nalpha=0, result_path=None):
    # BN parameters
    batch_size = 128
    print("batch_size = " + str(batch_size))

    # alpha is the exponential moving average factor
    alpha = .1
    print("alpha = " + str(alpha))
    epsilon = 1e-4
    print("epsilon = " + str(epsilon))

    # Training parameters
    num_epochs = 150
    print("num_epochs = " + str(num_epochs))

    # Dropout parameters
    dropout_in = .2  # default: .2
    print("dropout_in = " + str(dropout_in))
    dropout_hidden = .5  # default: .5
    print("dropout_hidden = " + str(dropout_hidden))

    # BinaryOut
    if binary:
        activation = binary_net.binary_tanh_unit
        print("activation = binary_net.binary_tanh_unit")
    else:
        activation = lasagne.nonlinearities.tanh
        print("activation = lasagne.nonlinearities.tanh")

    # BinaryConnect
    print("binary = " + str(binary))
    stochastic = False
    print("stochastic = " + str(stochastic))
    # (-H,+H) are the two binary values
    # H = "Glorot"
    H = 1.
    print("H = " + str(H))
    # W_LR_scale = 1.
    W_LR_scale = "Glorot"  # "Glorot" means we are using the coefficients from Glorot's paper
    print("W_LR_scale = " + str(W_LR_scale))

    # Decaying LR
    LR_start = 0.005
    print("LR_start = " + str(LR_start))
    LR_fin = 0.0000005  # 0.0000003
    print("LR_fin = " + str(LR_fin))
    LR_decay = (LR_fin / LR_start)**(1. / num_epochs)
    print("LR_decay = " + str(LR_decay))
    # BTW, LR decay might good for the BN moving average...

    train_set_size = 40000
    shuffle_parts = 1
    print("shuffle_parts = " + str(shuffle_parts))

    print("noise = " + str(noise))
    print("nalpha = " + str(nalpha))

    print('Loading CIFAR-10 dataset...')
    cifar = CifarReader("./data/cifar-10-batches-py/")

    train_X, train_y = cifar.get_train_data(n_samples=train_set_size,
                                            noise=noise,
                                            alpha=nalpha)
    valid_X, valid_y = cifar.get_validation_data()
    test_X, test_y = cifar.get_test_data()
    print("train_set_size = " + str(train_y.shape[0]))
    print("validation_set_size = " + str(valid_y.shape[0]))
    print("test_set_size = " + str(test_y.shape[0]))

    # Log output
    with open(result_path + "params.txt", "a+") as l:
        print("batch_size = " + str(batch_size), file=l)
        print("alpha = " + str(alpha), file=l)
        print("epsilon = " + str(epsilon), file=l)
        print("num_epochs = " + str(num_epochs), file=l)
        print("dropout_in = " + str(dropout_in), file=l)
        print("dropout_hidden = " + str(dropout_hidden), file=l)
        if binary:
            print("activation = binary_net.binary_tanh_unit", file=l)
        else:
            print("activation = lasagne.nonlinearities.tanh", file=l)
        print("binary = " + str(binary), file=l)
        print("stochastic = " + str(stochastic), file=l)
        print("H = " + str(H), file=l)
        print("W_LR_scale = " + str(W_LR_scale), file=l)
        print("LR_start = " + str(LR_start), file=l)
        print("LR_fin = " + str(LR_fin), file=l)
        print("LR_decay = " + str(LR_decay), file=l)
        print("shuffle_parts = " + str(shuffle_parts), file=l)
        print("noise = " + str(noise), file=l)
        print("nalpha = " + str(nalpha), file=l)
        print("train_set_size = " + str(train_y.shape[0]), file=l)
        print("validation_set_size = " + str(valid_y.shape[0]), file=l)
        print("test_set_size = " + str(test_y.shape[0]), file=l)

    # bc01 format
    # Inputs in the range [-1,+1]
    # print("Inputs in the range [-1,+1]")
    train_X = np.reshape(np.subtract(np.multiply(2. / 255., train_X), 1.),
                         (-1, 3, 32, 32))
    valid_X = np.reshape(np.subtract(np.multiply(2. / 255., valid_X), 1.),
                         (-1, 3, 32, 32))
    test_X = np.reshape(np.subtract(np.multiply(2. / 255., test_X), 1.),
                        (-1, 3, 32, 32))

    # flatten targets
    train_y = np.hstack(train_y)
    valid_y = np.hstack(valid_y)
    test_y = np.hstack(test_y)

    # Onehot the targets
    train_y = np.float32(np.eye(10)[train_y])
    valid_y = np.float32(np.eye(10)[valid_y])
    test_y = np.float32(np.eye(10)[test_y])

    # for hinge loss
    train_y = 2 * train_y - 1.
    valid_y = 2 * valid_y - 1.
    test_y = 2 * test_y - 1.

    print('Building the CNN...')

    # Prepare Theano variables for inputs and targets
    input = T.tensor4('inputs')
    target = T.matrix('targets')
    LR = T.scalar('LR', dtype=theano.config.floatX)

    cnn = lasagne.layers.InputLayer(shape=(None, 3, 32, 32), input_var=input)

    cnn = lasagne.layers.DropoutLayer(cnn, p=dropout_in)

    # 32C3-64C3-P2
    cnn = binary_net.Conv2DLayer(cnn,
                                 binary=binary,
                                 stochastic=stochastic,
                                 H=H,
                                 W_LR_scale=W_LR_scale,
                                 num_filters=32,
                                 filter_size=(3, 3),
                                 pad=1,
                                 nonlinearity=lasagne.nonlinearities.identity)

    cnn = lasagne.layers.BatchNormLayer(cnn, epsilon=epsilon, alpha=alpha)

    cnn = lasagne.layers.NonlinearityLayer(cnn, nonlinearity=activation)

    cnn = binary_net.Conv2DLayer(cnn,
                                 binary=binary,
                                 stochastic=stochastic,
                                 H=H,
                                 W_LR_scale=W_LR_scale,
                                 num_filters=64,
                                 filter_size=(3, 3),
                                 pad=1,
                                 nonlinearity=lasagne.nonlinearities.identity)

    cnn = lasagne.layers.MaxPool2DLayer(cnn, pool_size=(2, 2))

    cnn = lasagne.layers.BatchNormLayer(cnn, epsilon=epsilon, alpha=alpha)

    cnn = lasagne.layers.NonlinearityLayer(cnn, nonlinearity=activation)

    cnn = lasagne.layers.DropoutLayer(cnn, p=dropout_hidden)

    # 128FP-10FP
    cnn = binary_net.DenseLayer(cnn,
                                binary=binary,
                                stochastic=stochastic,
                                H=H,
                                W_LR_scale=W_LR_scale,
                                nonlinearity=lasagne.nonlinearities.identity,
                                num_units=128)

    cnn = lasagne.layers.BatchNormLayer(cnn, epsilon=epsilon, alpha=alpha)

    cnn = lasagne.layers.NonlinearityLayer(cnn, nonlinearity=activation)

    cnn = lasagne.layers.DropoutLayer(cnn, p=dropout_hidden)

    cnn = binary_net.DenseLayer(cnn,
                                binary=binary,
                                stochastic=stochastic,
                                H=H,
                                W_LR_scale=W_LR_scale,
                                nonlinearity=lasagne.nonlinearities.identity,
                                num_units=10)

    cnn = lasagne.layers.BatchNormLayer(cnn, epsilon=epsilon, alpha=alpha)

    cnn = lasagne.layers.NonlinearityLayer(
        cnn, nonlinearity=lasagne.nonlinearities.softmax)

    train_output = lasagne.layers.get_output(cnn, deterministic=False)

    # squared hinge loss
    loss = T.mean(T.sqr(T.maximum(0., 1. - target * train_output)))

    if binary:

        # W updates
        W = lasagne.layers.get_all_params(cnn, binary=True)
        W_grads = binary_net.compute_grads(loss, cnn)
        updates = lasagne.updates.adam(loss_or_grads=W_grads,
                                       params=W,
                                       learning_rate=LR)
        updates = binary_net.clipping_scaling(updates, cnn)

        # other parameters updates
        params = lasagne.layers.get_all_params(cnn,
                                               trainable=True,
                                               binary=False)
        updates.update(
            lasagne.updates.adam(loss_or_grads=loss,
                                 params=params,
                                 learning_rate=LR))

    else:
        params = lasagne.layers.get_all_params(cnn, trainable=True)
        updates = lasagne.updates.adam(loss_or_grads=loss,
                                       params=params,
                                       learning_rate=LR)

    test_output = lasagne.layers.get_output(cnn, deterministic=True)
    test_loss = T.mean(T.sqr(T.maximum(0., 1. - target * test_output)))
    test_err = T.mean(T.neq(T.argmax(test_output, axis=1),
                            T.argmax(target, axis=1)),
                      dtype=theano.config.floatX)

    # Compile a function performing a training step on a mini-batch (by giving the updates dictionary)
    # and returning the corresponding training loss:
    train_fn = theano.function([input, target, LR], loss, updates=updates)

    # Compile a second function computing the validation loss and accuracy:
    val_fn = theano.function([input, target], [test_loss, test_err])

    print('Training...')

    binary_net.train(train_fn,
                     val_fn,
                     cnn,
                     batch_size,
                     LR_start,
                     LR_decay,
                     num_epochs,
                     train_X,
                     train_y,
                     valid_X,
                     valid_y,
                     test_X,
                     test_y,
                     shuffle_parts=shuffle_parts,
                     result_path=result_path)
예제 #6
0
    mlp = lasagne.layers.InputLayer(
            #shape=(None, 1, 28, 28),
            shape=(None, n_inputs_per_sample),
            input_var=input)
            
    mlp = lasagne.layers.DropoutLayer(
            mlp, 
            p=dropout_in)
    
    for k in range(n_hidden_layers):

        mlp = binary_net.DenseLayer(
                mlp, 
                binary=binary,
                stochastic=stochastic,
                H=H,
                W_LR_scale=W_LR_scale,
                nonlinearity=lasagne.nonlinearities.identity,
                num_units=n_neurons_per_hiddenlayer)                  
        
        mlp = lasagne.layers.BatchNormLayer(
                mlp,
                epsilon=epsilon, 
                alpha=alpha)

        mlp = lasagne.layers.NonlinearityLayer(
                mlp,
                nonlinearity=activation)
                
        mlp = lasagne.layers.DropoutLayer(
                mlp, 
예제 #7
0
    # Prepare Theano variables for inputs and targets
    input = T.tensor4('inputs')
    target = T.matrix('targets')
    LR = T.scalar('LR', dtype=theano.config.floatX)

    mlp = lasagne.layers.InputLayer(shape=(None, 1, input_size, input_size),
                                    input_var=input)

    mlp = lasagne.layers.DropoutLayer(mlp, p=dropout_in)

    for k in range(n_hidden_layers):

        mlp = binary_net.DenseLayer(
            mlp,
            binary=binary,
            stochastic=stochastic,
            H=H,
            W_LR_scale=W_LR_scale,
            nonlinearity=lasagne.nonlinearities.identity,
            num_units=num_units)

        mlp = lasagne.layers.BatchNormLayer(mlp, epsilon=epsilon, alpha=alpha)

        mlp = lasagne.layers.NonlinearityLayer(mlp, nonlinearity=activation)

        mlp = lasagne.layers.DropoutLayer(mlp, p=dropout_hidden)

    mlp = binary_net.DenseLayer(mlp,
                                binary=binary,
                                stochastic=stochastic,
                                H=H,
                                W_LR_scale=W_LR_scale,
예제 #8
0
    def makeGenerator_encoder(self, layer_Z, layer_Y, layer_YGH):
        #(G2)
        # as the Conditional GAN style
        gen = ll.ConcatLayer([layer_Z, layer_YGH], axis=1)

        #(G3)
        #(G3-1) Full Connect (w)
        out_G3_1 = None
        for k in range(self.NUM_HIDDEN_LAYERS):
            if self.IS_USE_B_FC:
                gen = binary_net.DenseLayer(
                    gen,
                    binary=True,
                    stochastic=IS_STOCHASTIC,
                    H=H,
                    W_LR_scale=W_LR_scale,
                    b=None,  #No Bias
                    nonlinearity=None,
                    num_units=self.NUM_FC_UNITS)
            else:
                gen = ll.DenseLayer(gen,
                                    num_units=self.NUM_FC_UNITS,
                                    nonlinearity=None)

            print 'G3-1:gen.shape', gen.input_shape, gen.output_shape
            if out_G3_1 is None:
                out_G3_1 = ll.get_output(gen)

            #(G3-2) Batch Norm
            if self.IS_USE_B_BNA_1:
                # This layer includes the activation process
                gen = binary_net_ex.BatchNormLayer(gen,
                                                   epsilon=EPSILON,
                                                   alpha=ALPHA,
                                                   H=1)
                print 'G3-2:gen.shape', gen.input_shape, gen.output_shape
            else:
                gen = ll.BatchNormLayer(gen, epsilon=EPSILON, alpha=ALPHA)
                print 'G3-2:gen.shape', gen.input_shape, gen.output_shape

                #(G3-3) Activation: Binary tanh
                gen = ll.NonlinearityLayer(
                    gen, nonlinearity=binary_net.binary_tanh_unit)
                print 'G3-3:gen.shape', gen.input_shape, gen.output_shape

            out_G3_2 = ll.get_output(gen)

            #END for

        #(G4) Concat
        gen = ll.ConcatLayer([gen, layer_Y], axis=1)

        #(G5)
        #(G5-1) Full connect (w2)
        if self.IS_USE_B_FC:
            gen = binary_net.DenseLayer(
                gen,
                binary=True,
                stochastic=IS_STOCHASTIC,
                H=H,
                W_LR_scale=W_LR_scale,
                b=None,  #No Bias
                nonlinearity=lasagne.nonlinearities.identity,
                num_units=(self.NUM_GEN_FILTERS * 7 * 7))
        else:
            gen = ll.DenseLayer(gen,
                                num_units=(self.NUM_GEN_FILTERS * 7 * 7),
                                nonlinearity=None)

        print 'G5-1:gen.shape', gen.input_shape, gen.output_shape  #(128,3136)

        #(G5-2) Batch Norm
        if self.IS_USE_B_BNA_1:
            gen = binary_net_ex.BatchNormLayer(gen,
                                               epsilon=EPSILON,
                                               alpha=ALPHA,
                                               H=1)
        else:
            gen = ll.BatchNormLayer(gen, epsilon=EPSILON, alpha=ALPHA)

            #(G5-3) Activation: Binary tanh
            gen = ll.NonlinearityLayer(
                gen, nonlinearity=binary_net.binary_tanh_unit)
            print 'G5-3:gen.shape', gen.input_shape, gen.output_shape

        #(G6) Reshape
        gen = ll.ReshapeLayer(
            gen,
            # shape [0] denoting to use the size of the 0-th input dimension
            shape=([0], self.NUM_GEN_FILTERS, 7, 7)  #TODO constat var.
        )

        return gen, out_G3_1, out_G3_2
예제 #9
0
def build_net(input,binary,stochastic=False,H=1.0,W_LR_scale="Glorot",activation=binary_net.binary_tanh_unit,epsilon=1e-4,alpha=.1,patch_size=32,channels=3,num_filters=256):

    cnn = lasagne.layers.InputLayer(
            shape=(None, channels, patch_size, patch_size),
            input_var=input)
#1        
    cnn = binary_net.Conv2DLayer(
            cnn, 
            binary=binary,
            stochastic=stochastic,
            H=H,
            W_LR_scale=W_LR_scale,
            num_filters=num_filters, 
            filter_size=(2, 2),
            pad='valid',
            nonlinearity=lasagne.nonlinearities.identity)
    
    cnn = lasagne.layers.BatchNormLayer(
            cnn,
            epsilon=epsilon, 
            alpha=alpha)
                
    cnn = lasagne.layers.NonlinearityLayer(
            cnn,
            nonlinearity=activation) 
 
    print(cnn.output_shape)
    
#2        
    cnn = binary_net.Conv2DLayer(
            cnn, 
            binary=binary,
            stochastic=stochastic,
            H=H,
            W_LR_scale=W_LR_scale,
            num_filters=num_filters, 
            filter_size=(2, 2),
            pad='valid',
            nonlinearity=lasagne.nonlinearities.identity)
    
    cnn = lasagne.layers.BatchNormLayer(
            cnn,
            epsilon=epsilon, 
            alpha=alpha)
                
    cnn = lasagne.layers.NonlinearityLayer(
            cnn,
            nonlinearity=activation) 
 
    print(cnn.output_shape)
    
 
#3           
    cnn = binary_net.Conv2DLayer(
            cnn, 
            binary=binary,
            stochastic=stochastic,
            H=H,
            W_LR_scale=W_LR_scale,
            num_filters=num_filters, 
            filter_size=(2, 2),
            pad='valid',
            nonlinearity=lasagne.nonlinearities.identity)
    
    cnn = lasagne.layers.BatchNormLayer(
            cnn,
            epsilon=epsilon, 
            alpha=alpha)
                
    cnn = lasagne.layers.NonlinearityLayer(
            cnn,
            nonlinearity=activation) 

    print(cnn.output_shape)
 
#4 
    cnn = binary_net.Conv2DLayer(
            cnn, 
            binary=binary,
            stochastic=stochastic,
            H=H,
            W_LR_scale=W_LR_scale,
            num_filters=num_filters, 
            filter_size=(2, 2),
            pad='valid',
            nonlinearity=lasagne.nonlinearities.identity)
    
    cnn = lasagne.layers.BatchNormLayer(
            cnn,
            epsilon=epsilon, 
            alpha=alpha)
                
    cnn = lasagne.layers.NonlinearityLayer(
            cnn,
            nonlinearity=activation) 

    print(cnn.output_shape)
 
    cnn = lasagne.layers.MaxPool2DLayer(cnn, pool_size=(2, 2))

    print(cnn.output_shape)
 
#5                  
    cnn = binary_net.Conv2DLayer(
            cnn, 
            binary=binary,
            stochastic=stochastic,
            H=H,
            W_LR_scale=W_LR_scale,
            num_filters=num_filters, 
            filter_size=(2, 2),
            pad='valid',
            nonlinearity=lasagne.nonlinearities.identity)
    
    cnn = lasagne.layers.BatchNormLayer(
            cnn,
            epsilon=epsilon, 
            alpha=alpha)
                
    cnn = lasagne.layers.NonlinearityLayer(
            cnn,
            nonlinearity=activation)

    print(cnn.output_shape)
 
#6              
    cnn = binary_net.Conv2DLayer(
            cnn, 
            binary=binary,
            stochastic=stochastic,
            H=H,
            W_LR_scale=W_LR_scale,
            num_filters=num_filters, 
            filter_size=(2, 2),
            pad='valid',
            nonlinearity=lasagne.nonlinearities.identity)
    
    cnn = lasagne.layers.BatchNormLayer(
            cnn,
            epsilon=epsilon, 
            alpha=alpha)
                
    cnn = lasagne.layers.NonlinearityLayer(
            cnn,
            nonlinearity=activation) 

    print(cnn.output_shape)
 
    cnn = lasagne.layers.MaxPool2DLayer(cnn, pool_size=(2, 2))

    print(cnn.output_shape)
 
#7                  
    cnn = binary_net.Conv2DLayer(
            cnn, 
            binary=binary,
            stochastic=stochastic,
            H=H,
            W_LR_scale=W_LR_scale,
            num_filters=num_filters, 
            filter_size=(2, 2),
            pad='valid',
            nonlinearity=lasagne.nonlinearities.identity)
    
    cnn = lasagne.layers.BatchNormLayer(
            cnn,
            epsilon=epsilon, 
            alpha=alpha)
                
    cnn = lasagne.layers.NonlinearityLayer(
            cnn,
            nonlinearity=activation)
 
    print(cnn.output_shape)
 
#8
    cnn = binary_net.Conv2DLayer(
            cnn, 
            binary=binary,
            stochastic=stochastic,
            H=H,
            W_LR_scale=W_LR_scale,
            num_filters=num_filters, 
            filter_size=(2, 2),
            pad='valid',
            nonlinearity=lasagne.nonlinearities.identity)
    
    cnn = lasagne.layers.BatchNormLayer(
            cnn,
            epsilon=epsilon, 
            alpha=alpha)
                
    cnn = lasagne.layers.NonlinearityLayer(
            cnn,
            nonlinearity=activation)
 
    print(cnn.output_shape)
 
    cnn = binary_net.DenseLayer(
                cnn, 
                binary=binary,
                stochastic=stochastic,
                H=H,
                W_LR_scale=W_LR_scale,
                nonlinearity=lasagne.nonlinearities.identity,
                num_units=2)      
                  
    cnn = lasagne.layers.BatchNormLayer(
            cnn,
            epsilon=epsilon, 
            alpha=alpha)    

    return cnn
예제 #10
0
    #mlp = lasagne.layers.InputLayer(
    #        shape=(None, 1, 10, 10),
			#shape=(None, 1, 28, 28), 
    #       input_var=input)
            
    #mlp = lasagne.layers.DropoutLayer(
    #        mlp, 
    #        p=dropout_in)
    
    l1_in = lasagne.layers.InputLayer(shape=(None, 1, 10, 10)
		, input_var=input)
	#Jintao: tear-up the layers to get binarized weight/ output for testing. 
    l1_dl = binary_net.DenseLayer(l1_in,
		binary=binary,
		stochastic=stochastic,
		H=H,
		W_LR_scale=W_LR_scale,
		nonlinearity=lasagne.nonlinearities.identity,
		num_units=num_units)
								
    l1_bn = lasagne.layers.BatchNormLayer(l1_dl, 
	    epsilon=epsilon,
	    alpha=alpha)
		
    l1_nl = lasagne.layers.NonlinearityLayer(l1_bn,
		nonlinearity=activation)

    l2_dl = binary_net.DenseLayer(l1_nl,
		binary=binary,
		stochastic=stochastic,
		H=H,
def trial(N_HIDDEN_LAYERS, NUM_UNITS, OUTPUT_TYPE, MAIN_LOSS_TYPE, LAMBDA,
          FOLD):
    # BN parameters
    batch_size = 100
    print("batch_size = " + str(batch_size))
    # alpha is the exponential moving average factor
    # alpha = .15
    alpha = .1
    print("alpha = " + str(alpha))
    epsilon = 1e-4
    print("epsilon = " + str(epsilon))

    # MLP parameters
    #NUM_UNITS = 25
    print("NUM_UNITS = " + str(NUM_UNITS))
    #N_HIDDEN_LAYERS = 1
    print("N_HIDDEN_LAYERS = " + str(N_HIDDEN_LAYERS))

    # Training parameters
    num_epochs = 1000000
    print("num_epochs = " + str(num_epochs))

    # Dropout parameters
    dropout_in = .2  # 0. means no dropout
    print("dropout_in = " + str(dropout_in))
    dropout_hidden = .5
    print("dropout_hidden = " + str(dropout_hidden))

    # BinaryOut
    activation = binary_net.binary_tanh_unit
    print("activation = binary_net.binary_tanh_unit")
    # activation = binary_net.binary_sigmoid_unit
    # print("activation = binary_net.binary_sigmoid_unit")

    # BinaryConnect
    binary = True
    print("binary = " + str(binary))
    stochastic = False
    print("stochastic = " + str(stochastic))
    # (-H,+H) are the two binary values
    # H = "Glorot"
    H = 1.
    print("H = " + str(H))
    # W_LR_scale = 1.
    W_LR_scale = "Glorot"  # "Glorot" means we are using the coefficients from Glorot's paper
    print("W_LR_scale = " + str(W_LR_scale))

    # Decaying LR
    #LR_start = .003
    LR_start = 0.000003
    print("LR_start = " + str(LR_start))
    #LR_fin = 0.0000003
    LR_fin = LR_start
    print("LR_fin = " + str(LR_fin))
    LR_decay = (LR_fin / LR_start)**(1. / num_epochs)
    print("LR_decay = " + str(LR_decay))
    # BTW, LR decay might good for the BN moving average...

    # replace the dataset
    print('Loading SFEW2 dataset...')
    [train_x] = SFEW2.load_lfw()
    assert (train_x.shape[0] == 26404)
    train_x = train_x[0:26400, :]
    [val_x, _, _, _] = SFEW2.load_train_val()

    print(train_x.shape)
    print(val_x.shape)
    print('last training minibatch size: ' +
          str(train_x.shape[0] - train_x.shape[0] / batch_size * batch_size) +
          ' / ' + str(batch_size))
    print(
        'last training minibatch size should not be too small (except 0). try decrease the batch_size, but not add more minibatches.'
    )
    print('minibatches size: ' + str(batch_size))
    print('suggested minibatches size: ' + str(
        math.ceil(
            float(train_x.shape[0]) /
            math.ceil(float(train_x.shape[0]) / 100))))

    ##############################################################################################

    print('Building the MLP...')
    # Prepare Theano variables for inputs and targets
    input = T.matrix('inputs')
    LR = T.scalar('LR', dtype=theano.config.floatX)

    mlp = lasagne.layers.InputLayer(shape=(None, train_x.shape[1]),
                                    input_var=input)

    mlp = lasagne.layers.DropoutLayer(
        mlp, p=0)  # train BAE-2: no dropout on input & BAE-1 layer

    for k in range(N_HIDDEN_LAYERS):
        if (k == 0):
            mlp = binary_net.DenseLayer(
                mlp,
                binary=binary,
                stochastic=stochastic,
                H=H,
                W_LR_scale=W_LR_scale,
                nonlinearity=lasagne.nonlinearities.identity,
                num_units=NUM_UNITS)
        elif (k == 1):
            mlp = binary_net.DenseLayer(
                mlp,
                binary=binary,
                stochastic=stochastic,
                H=H,
                W_LR_scale=W_LR_scale,
                nonlinearity=lasagne.nonlinearities.identity,
                num_units=NUM_UNITS * 2)
        else:
            assert (False)

        #if(k==0):
        #    print('scale down the LR of transfered dense layer from', str(mlp.W_LR_scale))
        #    mlp.W_LR_scale = 0
        #    print('to', str(mlp.W_LR_scale))

        if (k == 0):
            # BAE1 encoder: BN
            mlp = lasagne.layers.BatchNormLayer(mlp,
                                                epsilon=epsilon,
                                                alpha=alpha)
        elif (k == 1):
            # BAE2 encoder: do not use BN for encouraging sparsity
            pass
        else:
            # further layer use BN
            mlp = lasagne.layers.BatchNormLayer(mlp,
                                                epsilon=epsilon,
                                                alpha=alpha)

        # midactivation place before hard tanh
        # encoder and decoder should not use BatchNorm
        # "l1 reg" on midactivation
        if (k == 1):
            mlp_midactivation = mlp

        mlp = lasagne.layers.NonlinearityLayer(mlp, nonlinearity=activation)

        if (k == 0):
            mlp = lasagne.layers.DropoutLayer(
                mlp, p=0)  # train BAE-2: no dropout on input & BAE-1 layer
        else:
            mlp = lasagne.layers.DropoutLayer(mlp, p=dropout_hidden)

        # pretrain-finetune
        # only restore the first layer group
        if (k == 0):
            print('Load ./W-1168.npz')
            with np.load('./W-1168.npz') as f:
                param_values = [f['arr_%d' % i] for i in range(len(f.files))]
            param_values = param_values[0:6]
            lasagne.layers.set_all_param_values(mlp, param_values)

            mlp_groundtruth = mlp

    mlp = binary_net.DenseLayer(mlp,
                                binary=binary,
                                stochastic=stochastic,
                                H=H,
                                W_LR_scale=W_LR_scale,
                                nonlinearity=lasagne.nonlinearities.identity,
                                num_units=1500)

    mlp = lasagne.layers.BatchNormLayer(mlp, epsilon=epsilon, alpha=alpha)

    # network output BN or SGN
    if OUTPUT_TYPE == 'C':
        pass  #
    elif OUTPUT_TYPE == 'D':
        mlp = lasagne.layers.NonlinearityLayer(mlp, nonlinearity=activation)
    else:
        assert (False)
    '''
    # equal transform validation
    # 1 set AE transform to I
    # 1 modift AE DenseLayer.get_output_for() use W(0 1) instead of Wb(+1 -1)
    # 2 set encoder's dropout=0
    # 3 comment out encoder's and decoder's BatchNormLayer, modify set_all_param_values
    # will see train loss = 0
    pv = lasagne.layers.get_all_param_values(mlp)
    pv[2] = np.identity(1500, np.float64)
    pv[4] = np.identity(1500, np.float64)
    lasagne.layers.set_all_param_values(mlp, pv)
    '''
    '''
    # loss weight nodes
    SPARSITY = 0.9
    SPARSITY_MAP = (np.float32(train_x==-1)).mean(0)
    LOSS_WEIGHT_1 = 1.+input*(2.*SPARSITY-1)
    LOSS_WEIGHT_1 /= 4*SPARSITY*(1 - SPARSITY)# fixed 1->-1:5 -1->1:5/9 weights
    LOSS_WEIGHT_2 = 1.+input*(2.*SPARSITY_MAP-1)#
    LOSS_WEIGHT_2 /= 4*SPARSITY_MAP*(1 - SPARSITY_MAP)# weights considering element's prior probability
    '''

    # train loss nodes
    '''
    train_output = lasagne.layers.get_output(mlp, deterministic=False)
    if MAIN_LOSS_TYPE=='SH':
        train_loss = T.mean(T.sqr(T.maximum(0.,1.-input*train_output)))
    elif MAIN_LOSS_TYPE == 'W1SH':
        train_loss = T.mean(T.sqr(T.maximum(0., (1. - input * train_output))) * LOSS_WEIGHT_1)
    elif MAIN_LOSS_TYPE == 'W2SH':
        train_loss = T.mean(T.sqr(T.maximum(0., (1. - input * train_output))) * LOSS_WEIGHT_2)
    elif MAIN_LOSS_TYPE == 'H':
        train_loss = T.mean(T.maximum(0.,1.-input*train_output))
    elif MAIN_LOSS_TYPE == 'W1H':
        train_loss = T.mean(T.maximum(0., (1. - input * train_output)) * LOSS_WEIGHT_1)
    elif MAIN_LOSS_TYPE == 'W2H':
        train_loss = T.mean(T.maximum(0., (1. - input * train_output)) * LOSS_WEIGHT_2)
    else:
        assert(False)
    '''
    [
        train_output_mlp_groundtruth, train_output_mlp_midactivation,
        train_output
    ] = lasagne.layers.get_output([mlp_groundtruth, mlp_midactivation, mlp],
                                  deterministic=False)
    train_loss = T.mean(
        T.maximum(0., 1. - train_output_mlp_groundtruth * train_output))

    # + sparse penalty
    '''
    if LAMBDA>0:
        train_pixel_wise_density = T.mean(T.reshape((train_output+1.)/2., [train_output.shape[0], train_output.shape[1]/10, 10]), axis=2)
        train_penalty = LAMBDA*T.mean(T.sqr(train_pixel_wise_density - (1.-SPARSITY)))
    else:
        train_penalty = T.constant(0.)
    train_loss = train_loss + train_penalty
    '''
    if LAMBDA > 0:
        train_penalty = LAMBDA * T.mean(
            T.maximum(0., 1. + train_output_mlp_midactivation))
    else:
        train_penalty = T.constant(0.)
    train_loss = train_loss + train_penalty

    # grad nodes
    if binary:
        # W updates
        W = lasagne.layers.get_all_params(mlp, binary=True)
        W_grads = binary_net.compute_grads(train_loss, mlp)

        # untrainable W1
        assert (len(W) == 3)
        assert (len(W_grads) == 3)
        W = W[1:len(W)]
        W_grads = W_grads[1:len(W_grads)]
        assert (len(W) == 2)
        assert (len(W_grads) == 2)

        updates = lasagne.updates.adam(loss_or_grads=W_grads,
                                       params=W,
                                       learning_rate=LR)
        updates = binary_net.clipping_scaling(updates, mlp)

        # other parameters updates
        params = lasagne.layers.get_all_params(mlp,
                                               trainable=True,
                                               binary=False)

        # untrainable b1 bn1
        assert (len(params) == 7)
        assert (params[0].name == 'b')  # fix
        assert (params[1].name == 'beta')  # fix
        assert (params[2].name == 'gamma')  # fix
        assert (params[3].name == 'b')
        assert (params[4].name == 'b')
        assert (params[5].name == 'beta')
        assert (params[6].name == 'gamma')
        params = params[3:len(params)]
        assert (len(params) == 4)

        updates = OrderedDict(updates.items() + lasagne.updates.adam(
            loss_or_grads=train_loss, params=params, learning_rate=LR).items())

    else:
        params = lasagne.layers.get_all_params(mlp, trainable=True)
        updates = lasagne.updates.adam(loss_or_grads=train_loss,
                                       params=params,
                                       learning_rate=LR)

    ##############################################################################################

    # val loss nodes
    # must be created after grad nodes
    '''
    val_output = lasagne.layers.get_output(mlp, deterministic=True)
    if MAIN_LOSS_TYPE=='SH':
        val_loss = T.mean(T.sqr(T.maximum(0.,1.-input*val_output)))
    elif MAIN_LOSS_TYPE == 'W1SH':
        val_loss = T.mean(T.sqr(T.maximum(0., (1. - input * val_output))) * LOSS_WEIGHT_1)
    elif MAIN_LOSS_TYPE == 'W2SH':
        val_loss = T.mean(T.sqr(T.maximum(0., (1. - input * val_output))) * LOSS_WEIGHT_2)
    elif MAIN_LOSS_TYPE == 'H':
        val_loss = T.mean(T.maximum(0.,1.-input*val_output))
    elif MAIN_LOSS_TYPE == 'W1H':
        val_loss = T.mean(T.maximum(0., (1. - input * val_output)) * LOSS_WEIGHT_1)
    elif MAIN_LOSS_TYPE == 'W2H':
        val_loss = T.mean(T.maximum(0., (1. - input * val_output)) * LOSS_WEIGHT_2)
    '''
    [val_output_mlp_groundtruth, val_output_mlp_midactivation, val_output
     ] = lasagne.layers.get_output([mlp_groundtruth, mlp_midactivation, mlp],
                                   deterministic=True)
    val_loss = T.mean(
        T.maximum(0., 1. - val_output_mlp_groundtruth * val_output))

    # + sparse penalty
    '''
    if LAMBDA>0:
        val_pixel_wise_density = T.mean(T.reshape((val_output + 1.) / 2., [val_output.shape[0], val_output.shape[1] / 10, 10]), axis=2)
        val_penalty = LAMBDA*T.mean(T.sqr(val_pixel_wise_density - (1. - SPARSITY)))
    else:
        val_penalty = T.constant(0.)
    val_loss = val_loss + val_penalty
    '''
    if LAMBDA > 0:
        val_penalty = LAMBDA * T.mean(
            T.maximum(0., 1. + val_output_mlp_midactivation))
    else:
        val_penalty = T.constant(0.)
    val_loss = val_loss + val_penalty

    ##############################################################################################

    # Compile a function performing a training step on a mini-batch (by giving the updates dictionary)
    # and returning the corresponding training train_loss:
    train_fn = theano.function([input, LR], [
        train_loss, train_penalty, train_output_mlp_groundtruth,
        train_output_mlp_midactivation, train_output
    ],
                               updates=updates)

    ##############################################################################################

    # Compile a second function computing the validation loss and accuracy:
    val_fn = theano.function([input], [
        val_loss, val_penalty, val_output_mlp_groundtruth,
        val_output_mlp_midactivation, val_output
    ])

    ##############################################################################################

    print('Training...')
    train_x = binary_net.MoveParameter(train_x)
    binary_net.train(train_fn, val_fn, batch_size, LR_start, LR_decay,
                     num_epochs, train_x, val_x, mlp)

    print('Save W')
    np.savez('./W.npz', *lasagne.layers.get_all_param_values(
        mlp))  # W b BN BN BN BN W b BN BN BN BN
예제 #12
0
    #        shape=(None, 1, 28, 28),
    #        input_var=input)
    mlp = lasagne.layers.InputLayer(
             shape=(None, ins),
             input_var=input)
            
    mlp = lasagne.layers.DropoutLayer(
            mlp, 
            p=dropout_in)
    
    for k in range(n_hidden_layers):

        mlp = binary_net.DenseLayer(
                mlp, 
                binary=binary,
                stochastic=stochastic,
                H=H,
                W_LR_scale=W_LR_scale,
                nonlinearity=lasagne.nonlinearities.identity,
                num_units=num_units)      #/(k+1) Dont divide, its here for a somesort of an autoencoder            
        
        mlp = lasagne.layers.BatchNormLayer(
                mlp,
                epsilon=epsilon, 
                alpha=alpha)

        mlp = lasagne.layers.NonlinearityLayer(
                mlp,
                nonlinearity=activation)
                
        mlp = lasagne.layers.DropoutLayer(
                mlp, 
예제 #13
0
def genCnv(input, num_outputs, learning_parameters):
    # A function to generate the cnv network topology which matches the overlay for the Pynq board.
    # WARNING: If you change this file, it's likely the resultant weights will not fit on the Pynq overlay.
    if num_outputs < 1 or num_outputs > 64:
        error("num_outputs should be in the range of 1 to 64.")
    stochastic = False
    binary = True
    H = 1
    activation = binary_net.binary_tanh_unit
    W_LR_scale = learning_parameters.W_LR_scale
    epsilon = learning_parameters.epsilon
    alpha = learning_parameters.alpha

    cnn = lasagne.layers.InputLayer(shape=(None, 5, 64, 64), input_var=input)

    # conv maxpool
    cnn = binary_net.Conv2DLayer(cnn,
                                 binary=binary,
                                 stochastic=stochastic,
                                 H=H,
                                 W_LR_scale=W_LR_scale,
                                 num_filters=64,
                                 filter_size=(3, 3),
                                 pad='same',
                                 flip_filters=False,
                                 nonlinearity=lasagne.nonlinearities.identity)

    cnn = lasagne.layers.MaxPool2DLayer(cnn, pool_size=(2, 2))  #32

    cnn = lasagne.layers.BatchNormLayer(cnn, epsilon=epsilon, alpha=alpha)

    cnn = lasagne.layers.NonlinearityLayer(cnn, nonlinearity=activation)

    # conv maxpool
    cnn = binary_net.Conv2DLayer(cnn,
                                 binary=binary,
                                 stochastic=stochastic,
                                 H=H,
                                 W_LR_scale=W_LR_scale,
                                 num_filters=64,
                                 filter_size=(3, 3),
                                 pad='same',
                                 flip_filters=False,
                                 nonlinearity=lasagne.nonlinearities.identity)

    cnn = lasagne.layers.MaxPool2DLayer(cnn, pool_size=(2, 2))  #16

    cnn = lasagne.layers.BatchNormLayer(cnn, epsilon=epsilon, alpha=alpha)

    cnn = lasagne.layers.NonlinearityLayer(cnn, nonlinearity=activation)

    # conv conv maxpool
    cnn = binary_net.Conv2DLayer(cnn,
                                 binary=binary,
                                 stochastic=stochastic,
                                 H=H,
                                 W_LR_scale=W_LR_scale,
                                 num_filters=128,
                                 filter_size=(3, 3),
                                 pad='same',
                                 flip_filters=False,
                                 nonlinearity=lasagne.nonlinearities.identity)

    cnn = lasagne.layers.BatchNormLayer(cnn, epsilon=epsilon, alpha=alpha)

    cnn = lasagne.layers.NonlinearityLayer(cnn, nonlinearity=activation)

    cnn = binary_net.Conv2DLayer(cnn,
                                 binary=binary,
                                 stochastic=stochastic,
                                 H=H,
                                 W_LR_scale=W_LR_scale,
                                 num_filters=128,
                                 filter_size=(3, 3),
                                 pad='same',
                                 flip_filters=False,
                                 nonlinearity=lasagne.nonlinearities.identity)

    cnn = lasagne.layers.MaxPool2DLayer(cnn, pool_size=(2, 2))  # 8

    cnn = lasagne.layers.BatchNormLayer(cnn, epsilon=epsilon, alpha=alpha)

    cnn = lasagne.layers.NonlinearityLayer(cnn, nonlinearity=activation)
    cnn = lasagne.layers.DropoutLayer(cnn, p=0.6)

    # conv maxpool
    cnn = binary_net.Conv2DLayer(cnn,
                                 binary=binary,
                                 stochastic=stochastic,
                                 H=H,
                                 W_LR_scale=W_LR_scale,
                                 num_filters=256,
                                 filter_size=(3, 3),
                                 pad='same',
                                 flip_filters=False,
                                 nonlinearity=lasagne.nonlinearities.identity)

    cnn = lasagne.layers.MaxPool2DLayer(cnn, pool_size=(2, 2))  #4

    cnn = lasagne.layers.BatchNormLayer(cnn, epsilon=epsilon, alpha=alpha)

    cnn = lasagne.layers.NonlinearityLayer(cnn, nonlinearity=activation)

    # conv maxpool
    cnn = binary_net.Conv2DLayer(cnn,
                                 binary=binary,
                                 stochastic=stochastic,
                                 H=H,
                                 W_LR_scale=W_LR_scale,
                                 num_filters=256,
                                 filter_size=(3, 3),
                                 pad='same',
                                 flip_filters=False,
                                 nonlinearity=lasagne.nonlinearities.identity)

    cnn = lasagne.layers.MaxPool2DLayer(cnn, pool_size=(2, 2))  #2

    cnn = lasagne.layers.BatchNormLayer(cnn, epsilon=epsilon, alpha=alpha)

    cnn = lasagne.layers.NonlinearityLayer(cnn, nonlinearity=activation)
    cnn = lasagne.layers.DropoutLayer(cnn, p=0.6)

    print(cnn.output_shape)

    # FC1
    cnn = binary_net.DenseLayer(cnn,
                                binary=binary,
                                stochastic=stochastic,
                                H=H,
                                W_LR_scale=W_LR_scale,
                                nonlinearity=lasagne.nonlinearities.identity,
                                num_units=512)

    cnn = lasagne.layers.BatchNormLayer(cnn, epsilon=epsilon, alpha=alpha)

    cnn = lasagne.layers.NonlinearityLayer(cnn, nonlinearity=activation)

    # FC 2
    cnn = binary_net.DenseLayer(cnn,
                                binary=binary,
                                stochastic=stochastic,
                                H=H,
                                W_LR_scale=W_LR_scale,
                                nonlinearity=lasagne.nonlinearities.identity,
                                num_units=512)

    cnn = lasagne.layers.BatchNormLayer(cnn, epsilon=epsilon, alpha=alpha)

    cnn = lasagne.layers.NonlinearityLayer(cnn, nonlinearity=activation)

    # output
    cnn = binary_net.DenseLayer(cnn,
                                binary=binary,
                                stochastic=stochastic,
                                H=H,
                                W_LR_scale=W_LR_scale,
                                nonlinearity=lasagne.nonlinearities.identity,
                                num_units=num_outputs)

    cnn = lasagne.layers.BatchNormLayer(cnn, epsilon=epsilon, alpha=alpha)
    return cnn
예제 #14
0
def run(binary=False, noise=None, nalpha=0, result_path=None):
    # BN parameters
    batch_size = 128  # default: 100
    print("batch_size = " + str(batch_size))

    # alpha is the exponential moving average factor
    alpha = .1  # default: .1
    print("alpha = " + str(alpha))
    epsilon = 1e-4  # default: 1e-4
    print("epsilon = " + str(epsilon))

    # MLP parameters
    num_units = 300  # default: 4096
    print("num_units = " + str(num_units))
    n_hidden_layers = 1  # default: 3
    print("n_hidden_layers = " + str(n_hidden_layers))

    # Training parameters
    num_epochs = 500  # default: 1000
    print("num_epochs = " + str(num_epochs))

    # Dropout parameters
    dropout_in = .2  # default: .2
    print("dropout_in = " + str(dropout_in))
    dropout_hidden = .5  # default: .5
    print("dropout_hidden = " + str(dropout_hidden))

    # BinaryOut
    if binary:
        activation = binary_net.binary_tanh_unit
        print("activation = binary_net.binary_tanh_unit")
    else:
        activation = lasagne.nonlinearities.tanh
        print("activation = lasagne.nonlinearities.tanh")

    # BinaryConnect
    print("binary = " + str(binary))
    stochastic = False  # default: False
    print("stochastic = " + str(stochastic))
    # (-H,+H) are the two binary values
    # H = "Glorot"
    H = 1.  # default: 1.
    print("H = " + str(H))
    # W_LR_scale = 1.
    W_LR_scale = "Glorot"  # default: "Glorot"
    print("W_LR_scale = " + str(W_LR_scale))

    # Decaying LR
    LR_start = 0.005  # default: .003
    print("LR_start = " + str(LR_start))
    LR_fin = 0.0000005  # default: 0.0000003
    print("LR_fin = " + str(LR_fin))
    LR_decay = (LR_fin / LR_start) ** (1. / num_epochs)
    print("LR_decay = " + str(LR_decay))
    # BTW, LR decay might good for the BN moving average...

    save_path = None  # default: "mnist_parameters.npz"
    print("save_path = " + str(save_path))

    # Load the dataset (https://github.com/mnielsen/neural-networks-and-deep-learning)
    print('Loading MNIST dataset...')
    mnist = MnistReader("./data/mnist.pkl.gz")

    shuffle_parts = 1  # default: 1
    print("shuffle_parts = " + str(shuffle_parts))

    print("noise = " + str(noise))
    print("nalpha = " + str(nalpha))

    train_set_size = 50000  # default: 50000
    train_X, train_y = mnist.get_train_data(n_samples=train_set_size, noise=noise, alpha=nalpha)
    validation_X, validation_y = mnist.get_validation_data()
    test_X, test_y = mnist.get_test_data()
    print("train_set_size = "+str(train_y.shape[0]))
    print("validation_set_size = "+str(validation_y.shape[0]))
    print("test_set_size = "+str(test_y.shape[0]))

    # Log output
    with open(result_path + "params.txt", "a+") as l:
        print("batch_size = " + str(batch_size), file=l)
        print("alpha = " + str(alpha), file=l)
        print("epsilon = " + str(epsilon), file=l)
        print("num_units = " + str(num_units), file=l)
        print("n_hidden_layers = " + str(n_hidden_layers), file=l)
        print("num_epochs = " + str(num_epochs), file=l)
        print("dropout_in = " + str(dropout_in), file=l)
        print("dropout_hidden = " + str(dropout_hidden), file=l)
        if binary:
            print("activation = binary_net.binary_tanh_unit", file=l)
        else:
            print("activation = lasagne.nonlinearities.tanh", file=l)
        print("binary = " + str(binary), file=l)
        print("stochastic = " + str(stochastic), file=l)
        print("H = " + str(H), file=l)
        print("W_LR_scale = " + str(W_LR_scale), file=l)
        print("LR_start = " + str(LR_start), file=l)
        print("LR_fin = " + str(LR_fin), file=l)
        print("LR_decay = " + str(LR_decay), file=l)
        print("save_path = " + str(save_path), file=l)
        print("shuffle_parts = " + str(shuffle_parts), file=l)
        print("noise = " + str(noise), file=l)
        print("nalpha = " + str(nalpha), file=l)
        print("train_set_size = "+str(train_y.shape[0]), file=l)
        print("validation_set_size = "+str(validation_y.shape[0]), file=l)
        print("test_set_size = "+str(test_y.shape[0]), file=l)

    # bc01 format
    # Inputs in the range [-1,+1]
    # print("Inputs in the range [-1,+1]")
    train_X = 2 * train_X.reshape(-1, 1, 28, 28) - 1.
    validation_X = 2 * validation_X.reshape(-1, 1, 28, 28) - 1.
    test_X = 2 * test_X.reshape(-1, 1, 28, 28) - 1.

    # flatten targets
    train_y = np.hstack(train_y)
    validation_y = np.hstack(validation_y)
    test_y = np.hstack(test_y)

    # Onehot the targets
    train_y = np.float32(np.eye(10)[train_y])
    validation_y = np.float32(np.eye(10)[validation_y])
    test_y = np.float32(np.eye(10)[test_y])

    # for hinge loss
    train_y = 2 * train_y - 1.
    validation_y = 2 * validation_y - 1.
    test_y = 2 * test_y - 1.

    print('Building the MLP...')

    # Prepare Theano variables for inputs and targets
    input = T.tensor4('inputs')
    target = T.matrix('targets')
    LR = T.scalar('LR', dtype=theano.config.floatX)

    mlp = lasagne.layers.InputLayer(
        shape=(None, 1, 28, 28),
        input_var=input)

    mlp = lasagne.layers.DropoutLayer(
        mlp,
        p=dropout_in)

    for k in range(n_hidden_layers):
        mlp = binary_net.DenseLayer(
            mlp,
            binary=binary,
            stochastic=stochastic,
            H=H,
            W_LR_scale=W_LR_scale,
            nonlinearity=lasagne.nonlinearities.identity,
            num_units=num_units)

        mlp = lasagne.layers.BatchNormLayer(
            mlp,
            epsilon=epsilon,
            alpha=alpha)

        mlp = lasagne.layers.NonlinearityLayer(
            mlp,
            nonlinearity=activation)

        mlp = lasagne.layers.DropoutLayer(
            mlp,
            p=dropout_hidden)

    mlp = binary_net.DenseLayer(
        mlp,
        binary=binary,
        stochastic=stochastic,
        H=H,
        W_LR_scale=W_LR_scale,
        nonlinearity=lasagne.nonlinearities.identity,
        num_units=10)

    mlp = lasagne.layers.BatchNormLayer(
        mlp,
        epsilon=epsilon,
        alpha=alpha)

    train_output = lasagne.layers.get_output(mlp, deterministic=False)

    # squared hinge loss
    loss = T.mean(T.sqr(T.maximum(0., 1. - target * train_output)))

    if binary:

        # W updates
        W = lasagne.layers.get_all_params(mlp, binary=True)
        W_grads = binary_net.compute_grads(loss, mlp)
        updates = lasagne.updates.adam(loss_or_grads=W_grads, params=W, learning_rate=LR)
        updates = binary_net.clipping_scaling(updates, mlp)

        # other parameters updates
        params = lasagne.layers.get_all_params(mlp, trainable=True, binary=False)
        updates.update(lasagne.updates.adam(loss_or_grads=loss, params=params, learning_rate=LR))

    else:
        params = lasagne.layers.get_all_params(mlp, trainable=True)
        updates = lasagne.updates.adam(loss_or_grads=loss, params=params, learning_rate=LR)

    test_output = lasagne.layers.get_output(mlp, deterministic=True)
    test_loss = T.mean(T.sqr(T.maximum(0., 1. - target * test_output)))
    test_err = T.mean(T.neq(T.argmax(test_output, axis=1), T.argmax(target, axis=1)), dtype=theano.config.floatX)

    # Compile a function performing a training step on a mini-batch (by giving the updates dictionary)
    # and returning the corresponding training loss:
    train_fn = theano.function([input, target, LR], loss, updates=updates)

    # Compile a second function computing the validation loss and accuracy:
    val_fn = theano.function([input, target], [test_loss, test_err])

    print('Training...')

    binary_net.train(
        train_fn, val_fn,
        mlp,
        batch_size,
        LR_start, LR_decay,
        num_epochs,
        train_X, train_y,
        validation_X, validation_y,
        test_X, test_y,
        save_path,
        shuffle_parts,
        result_path)
예제 #15
0
def buildCNN(networkType,
             dataType,
             input,
             epsilon,
             alpha,
             activation,
             binary,
             stochastic,
             H,
             W_LR_scale,
             oneHot=True):
    if oneHot:
        print("identity")
        denseOut = lasagne.nonlinearities.identity
    else:
        print("softmax")
        denseOut = lasagne.nonlinearities.softmax

    if dataType == 'TCDTIMIT':
        nbClasses = 39
        cnn = lasagne.layers.InputLayer(shape=(None, 1, 120, 120),
                                        input_var=input)
    elif dataType == 'cifar10':
        nbClasses = 10
        cnn = lasagne.layers.InputLayer(shape=(None, 3, 32, 32),
                                        input_var=input)

    if networkType == 'google':
        # conv 1
        cnn = binary_net.Conv2DLayer(
            cnn,
            binary=binary,
            stochastic=stochastic,
            H=H,
            W_LR_scale=W_LR_scale,
            num_filters=128,
            filter_size=(3, 3),
            pad=1,
            nonlinearity=lasagne.nonlinearities.identity)
        cnn = lasagne.layers.MaxPool2DLayer(cnn, pool_size=(2, 2))
        cnn = lasagne.layers.BatchNormLayer(cnn, epsilon=epsilon, alpha=alpha)
        cnn = lasagne.layers.NonlinearityLayer(cnn, nonlinearity=activation)

        # conv 2
        cnn = binary_net.Conv2DLayer(
            cnn,
            binary=binary,
            stochastic=stochastic,
            H=H,
            W_LR_scale=W_LR_scale,
            num_filters=256,
            filter_size=(3, 3),
            stride=(2, 2),
            pad=1,
            nonlinearity=lasagne.nonlinearities.identity)
        cnn = lasagne.layers.MaxPool2DLayer(cnn, pool_size=(2, 2))
        cnn = lasagne.layers.BatchNormLayer(cnn, epsilon=epsilon, alpha=alpha)
        cnn = lasagne.layers.NonlinearityLayer(cnn, nonlinearity=activation)

        # conv3
        cnn = binary_net.Conv2DLayer(
            cnn,
            binary=binary,
            stochastic=stochastic,
            H=H,
            W_LR_scale=W_LR_scale,
            num_filters=512,
            filter_size=(3, 3),
            pad=1,
            nonlinearity=lasagne.nonlinearities.identity)
        cnn = lasagne.layers.NonlinearityLayer(cnn, nonlinearity=activation)

        # conv 4
        cnn = binary_net.Conv2DLayer(
            cnn,
            binary=binary,
            stochastic=stochastic,
            H=H,
            W_LR_scale=W_LR_scale,
            num_filters=512,
            filter_size=(3, 3),
            pad=1,
            nonlinearity=lasagne.nonlinearities.identity)
        cnn = lasagne.layers.NonlinearityLayer(cnn, nonlinearity=activation)

        # conv 5
        cnn = binary_net.Conv2DLayer(
            cnn,
            binary=binary,
            stochastic=stochastic,
            H=H,
            W_LR_scale=W_LR_scale,
            num_filters=512,
            filter_size=(3, 3),
            pad=1,
            nonlinearity=lasagne.nonlinearities.identity)
        cnn = lasagne.layers.MaxPool2DLayer(cnn, pool_size=(2, 2))
        cnn = lasagne.layers.NonlinearityLayer(cnn, nonlinearity=activation)

        # FC layer
        cnn = binary_net.DenseLayer(
            cnn,
            binary=binary,
            stochastic=stochastic,
            H=H,
            W_LR_scale=W_LR_scale,
            nonlinearity=denseOut,  #TODO was identity
            num_units=nbClasses)
        cnn = lasagne.layers.BatchNormLayer(cnn, epsilon=epsilon, alpha=alpha)

    elif networkType == 'cifar10':
        # 128C3-128C3-P2

        # 128C3-128C3-P2
        cnn = binary_net.Conv2DLayer(
            cnn,
            binary=binary,
            stochastic=stochastic,
            H=H,
            W_LR_scale=W_LR_scale,
            num_filters=128,
            filter_size=(3, 3),
            pad=1,
            nonlinearity=lasagne.nonlinearities.identity)

        cnn = lasagne.layers.BatchNormLayer(cnn, epsilon=epsilon, alpha=alpha)

        cnn = lasagne.layers.NonlinearityLayer(cnn, nonlinearity=activation)

        cnn = binary_net.Conv2DLayer(
            cnn,
            binary=binary,
            stochastic=stochastic,
            H=H,
            W_LR_scale=W_LR_scale,
            num_filters=128,
            filter_size=(3, 3),
            pad=1,
            nonlinearity=lasagne.nonlinearities.identity)

        cnn = lasagne.layers.MaxPool2DLayer(cnn, pool_size=(2, 2))

        cnn = lasagne.layers.BatchNormLayer(cnn, epsilon=epsilon, alpha=alpha)

        cnn = lasagne.layers.NonlinearityLayer(cnn, nonlinearity=activation)

        # 256C3-256C3-P2
        cnn = binary_net.Conv2DLayer(
            cnn,
            binary=binary,
            stochastic=stochastic,
            H=H,
            W_LR_scale=W_LR_scale,
            num_filters=256,
            filter_size=(3, 3),
            pad=1,
            nonlinearity=lasagne.nonlinearities.identity)

        cnn = lasagne.layers.BatchNormLayer(cnn, epsilon=epsilon, alpha=alpha)

        cnn = lasagne.layers.NonlinearityLayer(cnn, nonlinearity=activation)

        cnn = binary_net.Conv2DLayer(
            cnn,
            binary=binary,
            stochastic=stochastic,
            H=H,
            W_LR_scale=W_LR_scale,
            num_filters=256,
            filter_size=(3, 3),
            pad=1,
            nonlinearity=lasagne.nonlinearities.identity)

        cnn = lasagne.layers.MaxPool2DLayer(cnn, pool_size=(2, 2))

        cnn = lasagne.layers.BatchNormLayer(cnn, epsilon=epsilon, alpha=alpha)

        cnn = lasagne.layers.NonlinearityLayer(cnn, nonlinearity=activation)

        # 512C3-512C3-P2
        cnn = binary_net.Conv2DLayer(
            cnn,
            binary=binary,
            stochastic=stochastic,
            H=H,
            W_LR_scale=W_LR_scale,
            num_filters=512,
            filter_size=(3, 3),
            pad=1,
            nonlinearity=lasagne.nonlinearities.identity)

        cnn = lasagne.layers.BatchNormLayer(cnn, epsilon=epsilon, alpha=alpha)

        cnn = lasagne.layers.NonlinearityLayer(cnn, nonlinearity=activation)

        cnn = binary_net.Conv2DLayer(
            cnn,
            binary=binary,
            stochastic=stochastic,
            H=H,
            W_LR_scale=W_LR_scale,
            num_filters=512,
            filter_size=(3, 3),
            pad=1,
            nonlinearity=lasagne.nonlinearities.identity)

        cnn = lasagne.layers.MaxPool2DLayer(cnn, pool_size=(2, 2))

        cnn = lasagne.layers.BatchNormLayer(cnn, epsilon=epsilon, alpha=alpha)

        cnn = lasagne.layers.NonlinearityLayer(cnn, nonlinearity=activation)

        # print(cnn.output_shape)

        # 1024FP-1024FP-10FP
        cnn = binary_net.DenseLayer(
            cnn,
            binary=binary,
            stochastic=stochastic,
            H=H,
            W_LR_scale=W_LR_scale,
            nonlinearity=lasagne.nonlinearities.identity,
            num_units=1024)

        cnn = lasagne.layers.BatchNormLayer(cnn, epsilon=epsilon, alpha=alpha)

        cnn = lasagne.layers.NonlinearityLayer(cnn, nonlinearity=activation)

        cnn = binary_net.DenseLayer(
            cnn,
            binary=binary,
            stochastic=stochastic,
            H=H,
            W_LR_scale=W_LR_scale,
            nonlinearity=lasagne.nonlinearities.identity,
            num_units=1024)

        cnn = lasagne.layers.BatchNormLayer(cnn, epsilon=epsilon, alpha=alpha)

        cnn = lasagne.layers.NonlinearityLayer(cnn, nonlinearity=activation)

        cnn = binary_net.DenseLayer(cnn,
                                    binary=binary,
                                    stochastic=stochastic,
                                    H=H,
                                    W_LR_scale=W_LR_scale,
                                    nonlinearity=denseOut,
                                    num_units=nbClasses)
        cnn = lasagne.layers.BatchNormLayer(cnn, epsilon=epsilon, alpha=alpha)
    return cnn
예제 #16
0
def genCnv(input, num_outputs, learning_parameters):
    # A function to generate the cnv network topology which matches the overlay for the Pynq board.
    # WARNING: If you change this file, it's likely the resultant weights will not fit on the Pynq overlay.
    stochastic = False
    binary = True
    H = 1
    activation = binary_net.binary_tanh_unit
    W_LR_scale = learning_parameters.W_LR_scale
    epsilon = learning_parameters.epsilon
    alpha = learning_parameters.alpha

    out_layers = []
    inp = lasagne.layers.InputLayer(shape=(None, 1, 28, 28), input_var=input)

    # first conv
    cnn = binary_net.Conv2DLayer(inp,
                                 binary=binary,
                                 stochastic=stochastic,
                                 H=H,
                                 W_LR_scale=W_LR_scale,
                                 num_filters=16,
                                 filter_size=(3, 3),
                                 pad='same',
                                 flip_filters=False,
                                 nonlinearity=lasagne.nonlinearities.identity)

    cnn = lasagne.layers.MaxPool2DLayer(cnn, pool_size=(2, 2))

    cnn = lasagne.layers.BatchNormLayer(cnn, epsilon=epsilon, alpha=alpha)

    cnn = lasagne.layers.NonlinearityLayer(cnn, nonlinearity=activation)

    # 1x1 conv
    cnn_1x1 = binary_net.Conv2DLayer(
        cnn,
        binary=binary,
        stochastic=stochastic,
        H=H,
        W_LR_scale=W_LR_scale,
        num_filters=32,
        filter_size=(1, 1),
        pad='valid',
        flip_filters=False,
        nonlinearity=lasagne.nonlinearities.identity)

    cnn_1x1 = lasagne.layers.MaxPool2DLayer(cnn_1x1, pool_size=(2, 2))

    cnn_1x1 = lasagne.layers.BatchNormLayer(cnn_1x1,
                                            epsilon=epsilon,
                                            alpha=alpha)

    cnn_1x1 = lasagne.layers.NonlinearityLayer(cnn_1x1,
                                               nonlinearity=activation)

    out_layers.append(cnn_1x1)

    # 3x3 conv layer
    cnn_3x3 = binary_net.Conv2DLayer(
        cnn,
        binary=binary,
        stochastic=stochastic,
        H=H,
        W_LR_scale=W_LR_scale,
        num_filters=32,
        filter_size=(3, 3),
        pad='same',
        flip_filters=False,
        nonlinearity=lasagne.nonlinearities.identity)

    cnn_3x3 = lasagne.layers.MaxPool2DLayer(cnn_3x3, pool_size=(2, 2))

    cnn_3x3 = lasagne.layers.BatchNormLayer(cnn_3x3,
                                            epsilon=epsilon,
                                            alpha=alpha)

    cnn_3x3 = lasagne.layers.NonlinearityLayer(cnn_3x3,
                                               nonlinearity=activation)
    out_layers.append(cnn_3x3)

    # 2nd conv layer
    cnn_5x5 = binary_net.Conv2DLayer(
        cnn,
        binary=binary,
        stochastic=stochastic,
        H=H,
        W_LR_scale=W_LR_scale,
        num_filters=32,
        filter_size=(5, 5),
        pad='same',
        flip_filters=False,
        nonlinearity=lasagne.nonlinearities.identity)

    cnn_5x5 = lasagne.layers.MaxPool2DLayer(cnn_5x5, pool_size=(2, 2))

    cnn_5x5 = lasagne.layers.BatchNormLayer(cnn_5x5,
                                            epsilon=epsilon,
                                            alpha=alpha)

    cnn_5x5 = lasagne.layers.NonlinearityLayer(cnn_5x5,
                                               nonlinearity=activation)

    out_layers.append(cnn_5x5)

    cnn = lasagne.layers.concat(out_layers)

    # FC layer 1
    cnn = binary_net.DenseLayer(cnn,
                                binary=binary,
                                stochastic=stochastic,
                                H=H,
                                W_LR_scale=W_LR_scale,
                                nonlinearity=lasagne.nonlinearities.identity,
                                num_units=512)

    cnn = lasagne.layers.BatchNormLayer(cnn, epsilon=epsilon, alpha=alpha)

    cnn = lasagne.layers.NonlinearityLayer(cnn, nonlinearity=activation)

    # FC layer 2
    cnn = binary_net.DenseLayer(cnn,
                                binary=binary,
                                stochastic=stochastic,
                                H=H,
                                W_LR_scale=W_LR_scale,
                                nonlinearity=lasagne.nonlinearities.identity,
                                num_units=num_outputs)

    cnn = lasagne.layers.BatchNormLayer(cnn, epsilon=epsilon, alpha=alpha)
    return cnn
예제 #17
0
파일: cifar10.py 프로젝트: hterada/b-dcgan
                                 pad=1,
                                 nonlinearity=lasagne.nonlinearities.identity)

    cnn = lasagne.layers.MaxPool2DLayer(cnn, pool_size=(2, 2))

    cnn = lasagne.layers.BatchNormLayer(cnn, epsilon=epsilon, alpha=alpha)

    cnn = lasagne.layers.NonlinearityLayer(cnn, nonlinearity=activation)

    # print(cnn.output_shape)

    # 1024FP-1024FP-10FP
    cnn = binary_net.DenseLayer(cnn,
                                binary=binary,
                                stochastic=stochastic,
                                H=H,
                                W_LR_scale=W_LR_scale,
                                nonlinearity=lasagne.nonlinearities.identity,
                                num_units=1024)

    cnn = lasagne.layers.BatchNormLayer(cnn, epsilon=epsilon, alpha=alpha)

    cnn = lasagne.layers.NonlinearityLayer(cnn, nonlinearity=activation)

    cnn = binary_net.DenseLayer(cnn,
                                binary=binary,
                                stochastic=stochastic,
                                H=H,
                                W_LR_scale=W_LR_scale,
                                nonlinearity=lasagne.nonlinearities.identity,
                                num_units=1024)
예제 #18
0
def genCnv(input, num_outputs, learning_parameters):
    # A function to generate the cnv network topology which matches the overlay for the Pynq board.
    # WARNING: If you change this file, it's likely the resultant weights will not fit on the Pynq overlay.
    stochastic = False
    binary = True
    H = 1
    activation = binary_net.binary_tanh_unit
    W_LR_scale = learning_parameters.W_LR_scale
    epsilon = learning_parameters.epsilon
    alpha = learning_parameters.alpha

    inp = lasagne.layers.InputLayer(shape=(None, 1, 28, 28), input_var=input)

    # first conv
    cnn = binary_net.Conv2DLayer(inp,
                                 binary=binary,
                                 stochastic=stochastic,
                                 H=H,
                                 W_LR_scale=W_LR_scale,
                                 num_filters=64,
                                 filter_size=(3, 3),
                                 pad='same',
                                 flip_filters=False,
                                 nonlinearity=lasagne.nonlinearities.identity)

    cnn = lasagne.layers.MaxPool2DLayer(cnn, pool_size=(2, 2))

    cnn = lasagne.layers.BatchNormLayer(cnn, epsilon=epsilon, alpha=alpha)

    cnn = lasagne.layers.NonlinearityLayer(cnn, nonlinearity=activation)

    residual = cnn

    # conv 1 in Res block
    cnn = binary_net.Conv2DLayer(cnn,
                                 binary=binary,
                                 stochastic=stochastic,
                                 H=H,
                                 W_LR_scale=W_LR_scale,
                                 num_filters=64,
                                 filter_size=(3, 3),
                                 pad='same',
                                 flip_filters=False,
                                 nonlinearity=lasagne.nonlinearities.identity)

    cnn = lasagne.layers.BatchNormLayer(cnn, epsilon=epsilon, alpha=alpha)

    cnn = lasagne.layers.NonlinearityLayer(cnn, nonlinearity=activation)

    # special conv 3 is Res block
    cnn = binary_net.Conv2DLayer(cnn,
                                 binary=binary,
                                 stochastic=stochastic,
                                 H=H,
                                 W_LR_scale=W_LR_scale,
                                 num_filters=64,
                                 filter_size=(3, 3),
                                 pad='same',
                                 flip_filters=False,
                                 nonlinearity=lasagne.nonlinearities.identity)

    cnn = ElemwiseSumLayer([residual, cnn], coeffs=1)

    cnn = lasagne.layers.MaxPool2DLayer(cnn, pool_size=(2, 2))

    cnn = lasagne.layers.BatchNormLayer(cnn, epsilon=epsilon, alpha=alpha)

    cnn = lasagne.layers.NonlinearityLayer(cnn, nonlinearity=activation)

    # FC layer 1
    cnn = binary_net.DenseLayer(cnn,
                                binary=binary,
                                stochastic=stochastic,
                                H=H,
                                W_LR_scale=W_LR_scale,
                                nonlinearity=lasagne.nonlinearities.identity,
                                num_units=512)

    cnn = lasagne.layers.BatchNormLayer(cnn, epsilon=epsilon, alpha=alpha)

    cnn = lasagne.layers.NonlinearityLayer(cnn, nonlinearity=activation)

    # FC layer 2
    cnn = binary_net.DenseLayer(cnn,
                                binary=binary,
                                stochastic=stochastic,
                                H=H,
                                W_LR_scale=W_LR_scale,
                                nonlinearity=lasagne.nonlinearities.identity,
                                num_units=num_outputs)

    cnn = lasagne.layers.BatchNormLayer(cnn, epsilon=epsilon, alpha=alpha)

    return cnn