Example #1
0
    test_output = lasagne.layers.get_output(cnn, deterministic=True)
    test_loss = T.mean(T.sqr(T.maximum(0., 1. - target * test_output)))
    test_err = T.mean(T.neq(T.argmax(test_output, axis=1),
                            T.argmax(target, axis=1)),
                      dtype=theano.config.floatX)

    # Compile a function performing a training step on a mini-batch (by giving the updates dictionary)
    # and returning the corresponding training loss:
    train_fn = theano.function([input, target, LR], loss, updates=updates)

    # Compile a second function computing the validation loss and accuracy:
    val_fn = theano.function([input, target], [test_loss, test_err])

    print('Training...')

    binary_net.train(train_fn,
                     val_fn,
                     cnn,
                     batch_size,
                     LR_start,
                     LR_decay,
                     num_epochs,
                     train_set.X,
                     train_set.y,
                     valid_set.X,
                     valid_set.y,
                     test_set.X,
                     test_set.y,
                     shuffle_parts=shuffle_parts)
Example #2
0
    # Compile a second function computing the validation loss and accuracy:
    val_fn = theano.function(
        [input, target],
        [test_loss, test_err, test_1_when_0, test_0_when_1, test_0_when_0])

    if training:
        print('Training...')
        binary_net.train(train_fn,
                         val_fn,
                         cnn,
                         batch_size,
                         LR_start,
                         LR_,
                         LR_decay,
                         num_filters,
                         run_name,
                         num_epochs,
                         train_set_X,
                         train_set_Y,
                         valid_set_X,
                         valid_set_Y,
                         test_set_X,
                         test_set_Y,
                         save_path=save_path,
                         shuffle_parts=shuffle_parts)

    label = lasagne.layers.get_output(cnn, deterministic=True)
    forward_pass_fn = theano.function([input], [label])

    if testing:
        with np.load(load_path) as f:
            param_values = [f['arr_%d' % i] for i in range(len(f.files))]
Example #3
0
def main():
    # BN parameters
    # alpha is the exponential moving average factor
    alpha = .1
    print("alpha = " + str(alpha))
    epsilon = 1e-4
    print("epsilon = " + str(epsilon))

    # BinaryOut
    activation = binary_net.binary_tanh_unit
    print("activation = binary_net.binary_tanh_unit")
    # activation = binary_net.binary_sigmoid_unit
    # print("activation = binary_net.binary_sigmoid_unit")

    # BinaryConnect
    binary = True
    print("binary = " + str(binary))
    stochastic = False
    print("stochastic = " + str(stochastic))
    # (-H,+H) are the two binary values
    # H = "Glorot"
    H = 1.
    print("H = " + str(H))
    W_LR_scale = 1.
    #W_LR_scale = "Glorot"  # "Glorot" means we are using the coefficients from Glorot's paper
    print("W_LR_scale = " + str(W_LR_scale))

    # Training parameters
    num_epochs = 50
    print("num_epochs = " + str(num_epochs))

    # Decaying LR
    LR_start = 0.01
    print("LR_start = " + str(LR_start))
    LR_fin = 0.000003

    # LR_start = 0.01
    # print("LR_start = " + str(LR_start))
    # LR_fin = 1e-6
    print("LR_fin = " + str(LR_fin))
    LR_decay = (LR_fin / LR_start)**(1. / num_epochs)
    print("LR_decay = " + str(LR_decay))
    # BTW, LR decay might good for the BN moving average...

    shuffle_parts = 1
    print("shuffle_parts = " + str(shuffle_parts))

    networkType = 'google'
    dataType = 'TCDTIMIT'

    #networkType='cifar10'
    #dataType='cifar10'

    # these batch sizes work for a GTX 1060 (6GB)
    if dataType == 'TCDTIMIT':
        if networkType == 'google': batch_size = 100
        else: batch_size = 24
    elif dataType == 'cifar10' and networkType == 'cifar10': batch_size = 400
    elif dataType == 'cifar10' and networkType == 'google': batch_size = 1000

    model_name = os.path.expanduser('~/TCDTIMIT/lipreading/TCDTIMIT/results/CNN_binaryNet/lipspeakers_') \
                 + networkType + "_phoneme39_binary" + "_" + dataType
    model_path = model_name + ".npz"
    if not os.path.exists(os.path.dirname(model_path)):
        os.makedirs(os.path.dirname(model_path))

    print('Building the CNN...')
    # Prepare Theano variables for inputs and targets
    input = T.tensor4('inputs')
    target = T.matrix('targets')
    LR = T.scalar('LR', dtype=theano.config.floatX)

    cnn = buildCNN(networkType, dataType, input, epsilon, alpha, activation,
                   binary, stochastic, H, W_LR_scale)

    # restore network weights
    if os.path.exists(model_path):
        with np.load(model_path) as f:
            param_values = [f['arr_%d' % i] for i in range(len(f.files))]
            try:
                lasagne.layers.set_all_param_values(cnn, *param_values)
            except:
                lasagne.layers.set_all_param_values(cnn, param_values)
            print("\n\n\t Loaded model " + model_path)

    print('Loading ' + dataType + ' dataset...')
    X_train, y_train, X_val, y_val, X_test, y_test = loadDataset(dataType)

    print("Building Functions...")
    train_output = lasagne.layers.get_output(cnn, deterministic=False)

    # squared hinge loss
    loss = T.mean(T.sqr(T.maximum(0., 1. - target * train_output)))

    # W updates
    W = lasagne.layers.get_all_params(cnn, binary=True)
    W_grads = binary_net.compute_grads(loss, cnn)
    updates = lasagne.updates.adam(loss_or_grads=W_grads,
                                   params=W,
                                   learning_rate=LR)
    updates = binary_net.clipping_scaling(updates, cnn)

    # other parameters updates
    params = lasagne.layers.get_all_params(cnn, trainable=True, binary=False)
    updates = OrderedDict(updates.items() + lasagne.updates.adam(
        loss_or_grads=loss, params=params, learning_rate=LR).items())

    test_output = lasagne.layers.get_output(cnn, deterministic=True)
    test_loss = T.mean(T.sqr(T.maximum(0., 1. - target * test_output)))
    test_err = T.mean(T.neq(T.argmax(test_output, axis=1),
                            T.argmax(target, axis=1)),
                      dtype=theano.config.floatX)

    # Compile a function performing a training step on a mini-batch (by giving the updates dictionary)
    # and returning the corresponding training loss:
    train_fn = theano.function([input, target, LR], loss, updates=updates)

    # Compile a second function computing the validation loss and accuracy:
    val_fn = theano.function([input, target], [test_loss, test_err])

    print('Training...')

    binary_net.train(train_fn,
                     val_fn,
                     cnn,
                     batch_size,
                     LR_start,
                     LR_decay,
                     num_epochs,
                     X_train,
                     y_train,
                     X_val,
                     y_val,
                     X_test,
                     y_test,
                     save_name=model_name,
                     shuffle_parts=shuffle_parts,
                     justTest=justTest)
Example #4
0
        # other parameters updates
        params = lasagne.layers.get_all_params(cnn, trainable=True, binary=False)
        updates = OrderedDict(updates.items() + lasagne.updates.adam(loss_or_grads=loss, params=params, learning_rate=LR).items())
        
    else:
        params = lasagne.layers.get_all_params(cnn, trainable=True)
        updates = lasagne.updates.adam(loss_or_grads=loss, params=params, learning_rate=LR)

    test_output = lasagne.layers.get_output(cnn, deterministic=True)
    test_loss = T.mean(T.sqr(T.maximum(0.,1.-target*test_output)))
    test_err = T.mean(T.neq(T.argmax(test_output, axis=1), T.argmax(target, axis=1)),dtype=theano.config.floatX)
    
    # Compile a function performing a training step on a mini-batch (by giving the updates dictionary) 
    # and returning the corresponding training loss:
    train_fn = theano.function([input, target, LR], loss, updates=updates)

    # Compile a second function computing the validation loss and accuracy:
    val_fn = theano.function([input, target], [test_loss, test_err])

    print('Training...')
    
    binary_net.train(
            train_fn,val_fn,
            cnn,
            batch_size,
            LR_start,LR_decay,
            num_epochs,
            train_set.X,train_set.y,
            valid_set.X,valid_set.y,
            test_set.X,test_set.y,
            shuffle_parts=shuffle_parts)
Example #5
0
    test_loss = T.mean(T.sqr(T.maximum(0., 1. - target * test_output)))
    test_err = T.mean(T.neq(T.argmax(test_output, axis=1),
                            T.argmax(target, axis=1)),
                      dtype=theano.config.floatX)

    # Compile a function performing a training step on a mini-batch (by giving the updates dictionary)
    # and returning the corresponding training loss:
    train_fn = theano.function([input, target, LR], loss, updates=updates)

    # Compile a second function computing the validation loss and accuracy:
    val_fn = theano.function([input, target], [test_loss, test_err])

    print('Training...')

    binary_net.train(train_fn,
                     val_fn,
                     cnn,
                     batch_size,
                     LR_start,
                     LR_decay,
                     num_epochs,
                     train_set.X,
                     train_set.y,
                     valid_set.X,
                     valid_set.y,
                     test_set.X,
                     test_set.y,
                     save_path=save_path,
                     shuffle_parts=shuffle_parts,
                     rotations=random_rot_range)
Example #6
0
def run(binary=False, noise=None, nalpha=0, result_path=None):
    # BN parameters
    batch_size = 128
    print("batch_size = " + str(batch_size))

    # alpha is the exponential moving average factor
    alpha = .1
    print("alpha = " + str(alpha))
    epsilon = 1e-4
    print("epsilon = " + str(epsilon))

    # Training parameters
    num_epochs = 150
    print("num_epochs = " + str(num_epochs))

    # Dropout parameters
    dropout_in = .2  # default: .2
    print("dropout_in = " + str(dropout_in))
    dropout_hidden = .5  # default: .5
    print("dropout_hidden = " + str(dropout_hidden))

    # BinaryOut
    if binary:
        activation = binary_net.binary_tanh_unit
        print("activation = binary_net.binary_tanh_unit")
    else:
        activation = lasagne.nonlinearities.tanh
        print("activation = lasagne.nonlinearities.tanh")

    # BinaryConnect
    print("binary = " + str(binary))
    stochastic = False
    print("stochastic = " + str(stochastic))
    # (-H,+H) are the two binary values
    # H = "Glorot"
    H = 1.
    print("H = " + str(H))
    # W_LR_scale = 1.
    W_LR_scale = "Glorot"  # "Glorot" means we are using the coefficients from Glorot's paper
    print("W_LR_scale = " + str(W_LR_scale))

    # Decaying LR
    LR_start = 0.005
    print("LR_start = " + str(LR_start))
    LR_fin = 0.0000005  # 0.0000003
    print("LR_fin = " + str(LR_fin))
    LR_decay = (LR_fin / LR_start)**(1. / num_epochs)
    print("LR_decay = " + str(LR_decay))
    # BTW, LR decay might good for the BN moving average...

    train_set_size = 40000
    shuffle_parts = 1
    print("shuffle_parts = " + str(shuffle_parts))

    print("noise = " + str(noise))
    print("nalpha = " + str(nalpha))

    print('Loading CIFAR-10 dataset...')
    cifar = CifarReader("./data/cifar-10-batches-py/")

    train_X, train_y = cifar.get_train_data(n_samples=train_set_size,
                                            noise=noise,
                                            alpha=nalpha)
    valid_X, valid_y = cifar.get_validation_data()
    test_X, test_y = cifar.get_test_data()
    print("train_set_size = " + str(train_y.shape[0]))
    print("validation_set_size = " + str(valid_y.shape[0]))
    print("test_set_size = " + str(test_y.shape[0]))

    # Log output
    with open(result_path + "params.txt", "a+") as l:
        print("batch_size = " + str(batch_size), file=l)
        print("alpha = " + str(alpha), file=l)
        print("epsilon = " + str(epsilon), file=l)
        print("num_epochs = " + str(num_epochs), file=l)
        print("dropout_in = " + str(dropout_in), file=l)
        print("dropout_hidden = " + str(dropout_hidden), file=l)
        if binary:
            print("activation = binary_net.binary_tanh_unit", file=l)
        else:
            print("activation = lasagne.nonlinearities.tanh", file=l)
        print("binary = " + str(binary), file=l)
        print("stochastic = " + str(stochastic), file=l)
        print("H = " + str(H), file=l)
        print("W_LR_scale = " + str(W_LR_scale), file=l)
        print("LR_start = " + str(LR_start), file=l)
        print("LR_fin = " + str(LR_fin), file=l)
        print("LR_decay = " + str(LR_decay), file=l)
        print("shuffle_parts = " + str(shuffle_parts), file=l)
        print("noise = " + str(noise), file=l)
        print("nalpha = " + str(nalpha), file=l)
        print("train_set_size = " + str(train_y.shape[0]), file=l)
        print("validation_set_size = " + str(valid_y.shape[0]), file=l)
        print("test_set_size = " + str(test_y.shape[0]), file=l)

    # bc01 format
    # Inputs in the range [-1,+1]
    # print("Inputs in the range [-1,+1]")
    train_X = np.reshape(np.subtract(np.multiply(2. / 255., train_X), 1.),
                         (-1, 3, 32, 32))
    valid_X = np.reshape(np.subtract(np.multiply(2. / 255., valid_X), 1.),
                         (-1, 3, 32, 32))
    test_X = np.reshape(np.subtract(np.multiply(2. / 255., test_X), 1.),
                        (-1, 3, 32, 32))

    # flatten targets
    train_y = np.hstack(train_y)
    valid_y = np.hstack(valid_y)
    test_y = np.hstack(test_y)

    # Onehot the targets
    train_y = np.float32(np.eye(10)[train_y])
    valid_y = np.float32(np.eye(10)[valid_y])
    test_y = np.float32(np.eye(10)[test_y])

    # for hinge loss
    train_y = 2 * train_y - 1.
    valid_y = 2 * valid_y - 1.
    test_y = 2 * test_y - 1.

    print('Building the CNN...')

    # Prepare Theano variables for inputs and targets
    input = T.tensor4('inputs')
    target = T.matrix('targets')
    LR = T.scalar('LR', dtype=theano.config.floatX)

    cnn = lasagne.layers.InputLayer(shape=(None, 3, 32, 32), input_var=input)

    cnn = lasagne.layers.DropoutLayer(cnn, p=dropout_in)

    # 32C3-64C3-P2
    cnn = binary_net.Conv2DLayer(cnn,
                                 binary=binary,
                                 stochastic=stochastic,
                                 H=H,
                                 W_LR_scale=W_LR_scale,
                                 num_filters=32,
                                 filter_size=(3, 3),
                                 pad=1,
                                 nonlinearity=lasagne.nonlinearities.identity)

    cnn = lasagne.layers.BatchNormLayer(cnn, epsilon=epsilon, alpha=alpha)

    cnn = lasagne.layers.NonlinearityLayer(cnn, nonlinearity=activation)

    cnn = binary_net.Conv2DLayer(cnn,
                                 binary=binary,
                                 stochastic=stochastic,
                                 H=H,
                                 W_LR_scale=W_LR_scale,
                                 num_filters=64,
                                 filter_size=(3, 3),
                                 pad=1,
                                 nonlinearity=lasagne.nonlinearities.identity)

    cnn = lasagne.layers.MaxPool2DLayer(cnn, pool_size=(2, 2))

    cnn = lasagne.layers.BatchNormLayer(cnn, epsilon=epsilon, alpha=alpha)

    cnn = lasagne.layers.NonlinearityLayer(cnn, nonlinearity=activation)

    cnn = lasagne.layers.DropoutLayer(cnn, p=dropout_hidden)

    # 128FP-10FP
    cnn = binary_net.DenseLayer(cnn,
                                binary=binary,
                                stochastic=stochastic,
                                H=H,
                                W_LR_scale=W_LR_scale,
                                nonlinearity=lasagne.nonlinearities.identity,
                                num_units=128)

    cnn = lasagne.layers.BatchNormLayer(cnn, epsilon=epsilon, alpha=alpha)

    cnn = lasagne.layers.NonlinearityLayer(cnn, nonlinearity=activation)

    cnn = lasagne.layers.DropoutLayer(cnn, p=dropout_hidden)

    cnn = binary_net.DenseLayer(cnn,
                                binary=binary,
                                stochastic=stochastic,
                                H=H,
                                W_LR_scale=W_LR_scale,
                                nonlinearity=lasagne.nonlinearities.identity,
                                num_units=10)

    cnn = lasagne.layers.BatchNormLayer(cnn, epsilon=epsilon, alpha=alpha)

    cnn = lasagne.layers.NonlinearityLayer(
        cnn, nonlinearity=lasagne.nonlinearities.softmax)

    train_output = lasagne.layers.get_output(cnn, deterministic=False)

    # squared hinge loss
    loss = T.mean(T.sqr(T.maximum(0., 1. - target * train_output)))

    if binary:

        # W updates
        W = lasagne.layers.get_all_params(cnn, binary=True)
        W_grads = binary_net.compute_grads(loss, cnn)
        updates = lasagne.updates.adam(loss_or_grads=W_grads,
                                       params=W,
                                       learning_rate=LR)
        updates = binary_net.clipping_scaling(updates, cnn)

        # other parameters updates
        params = lasagne.layers.get_all_params(cnn,
                                               trainable=True,
                                               binary=False)
        updates.update(
            lasagne.updates.adam(loss_or_grads=loss,
                                 params=params,
                                 learning_rate=LR))

    else:
        params = lasagne.layers.get_all_params(cnn, trainable=True)
        updates = lasagne.updates.adam(loss_or_grads=loss,
                                       params=params,
                                       learning_rate=LR)

    test_output = lasagne.layers.get_output(cnn, deterministic=True)
    test_loss = T.mean(T.sqr(T.maximum(0., 1. - target * test_output)))
    test_err = T.mean(T.neq(T.argmax(test_output, axis=1),
                            T.argmax(target, axis=1)),
                      dtype=theano.config.floatX)

    # Compile a function performing a training step on a mini-batch (by giving the updates dictionary)
    # and returning the corresponding training loss:
    train_fn = theano.function([input, target, LR], loss, updates=updates)

    # Compile a second function computing the validation loss and accuracy:
    val_fn = theano.function([input, target], [test_loss, test_err])

    print('Training...')

    binary_net.train(train_fn,
                     val_fn,
                     cnn,
                     batch_size,
                     LR_start,
                     LR_decay,
                     num_epochs,
                     train_X,
                     train_y,
                     valid_X,
                     valid_y,
                     test_X,
                     test_y,
                     shuffle_parts=shuffle_parts,
                     result_path=result_path)
Example #7
0
def trial(N_HIDDEN_LAYERS, NUM_UNITS, OUTPUT_TYPE, MAIN_LOSS_TYPE, LAMBDA,
          FOLD, FINTUNE_SNAPSHOT, FINTUNE_SCALE):
    # BN parameters
    batch_size = 97
    print("batch_size = " + str(batch_size))
    # alpha is the exponential moving average factor
    # alpha = .15
    alpha = .1
    print("alpha = " + str(alpha))
    epsilon = 1e-4
    print("epsilon = " + str(epsilon))

    # MLP parameters
    #NUM_UNITS = 25
    print("NUM_UNITS = " + str(NUM_UNITS))
    #N_HIDDEN_LAYERS = 1
    print("N_HIDDEN_LAYERS = " + str(N_HIDDEN_LAYERS))

    # Training parameters
    num_epochs = 1000
    print("num_epochs = " + str(num_epochs))

    # Dropout parameters
    dropout_in = .2  # 0. means no dropout
    print("dropout_in = " + str(dropout_in))
    dropout_hidden = .5
    print("dropout_hidden = " + str(dropout_hidden))

    # BinaryOut
    activation = binary_net.binary_tanh_unit
    print("activation = binary_net.binary_tanh_unit")
    # activation = binary_net.binary_sigmoid_unit
    # print("activation = binary_net.binary_sigmoid_unit")

    # BinaryConnect
    binary = True
    print("binary = " + str(binary))
    stochastic = False
    print("stochastic = " + str(stochastic))
    # (-H,+H) are the two binary values
    # H = "Glorot"
    H = 1.
    print("H = " + str(H))
    # W_LR_scale = 1.
    W_LR_scale = "Glorot"  # "Glorot" means we are using the coefficients from Glorot's paper
    print("W_LR_scale = " + str(W_LR_scale))

    # Decaying LR
    #LR_start = .003
    LR_start = 0.000003
    print("LR_start = " + str(LR_start))
    #LR_fin = 0.0000003
    LR_fin = LR_start
    print("LR_fin = " + str(LR_fin))
    LR_decay = (LR_fin / LR_start)**(1. / num_epochs)
    print("LR_decay = " + str(LR_decay))
    # BTW, LR decay might good for the BN moving average...

    # replace the dataset
    print('Loading SFEW2 dataset...')
    [train_x, train_y, val_x, val_y] = SFEW2.load_train_val()
    print(train_x.shape)
    print(train_y.shape)
    print(val_x.shape)
    print(val_y.shape)
    print('last training minibatch size: ' +
          str(train_x.shape[0] - train_x.shape[0] / batch_size * batch_size) +
          ' / ' + str(batch_size))
    print(
        'last training minibatch size should not be too small (except 0). try decrease the batch_size, but not add more minibatches.'
    )
    print('minibatches size: ' + str(batch_size))
    print('suggested minibatches size: ' + str(
        math.ceil(
            float(train_x.shape[0]) /
            math.ceil(float(train_x.shape[0]) / 100))))

    print('Building the MLP...')
    # Prepare Theano variables for inputs and targets
    input = T.matrix('inputs')
    target = T.matrix('targets')
    LR = T.scalar('LR', dtype=theano.config.floatX)

    mlp = lasagne.layers.InputLayer(shape=(None, train_x.shape[1]),
                                    input_var=input)

    mlp = lasagne.layers.DropoutLayer(mlp, p=dropout_in)

    for k in range(N_HIDDEN_LAYERS):

        # pretrain-finetune
        if (k == 0):
            # fixed num_units
            mlp = binary_net.DenseLayer(
                mlp,
                binary=binary,
                stochastic=stochastic,
                H=H,
                W_LR_scale=W_LR_scale,
                nonlinearity=lasagne.nonlinearities.identity,
                num_units=1500)

            # scale down the LR of transfered dense layer
            print('scale down the LR of transfered dense layer from',
                  str(mlp.W_LR_scale))
            mlp.W_LR_scale *= np.float32(FINTUNE_SCALE)
            print('to', str(mlp.W_LR_scale))
        else:
            mlp = binary_net.DenseLayer(
                mlp,
                binary=binary,
                stochastic=stochastic,
                H=H,
                W_LR_scale=W_LR_scale,
                nonlinearity=lasagne.nonlinearities.identity,
                num_units=NUM_UNITS)

        mlp = lasagne.layers.BatchNormLayer(mlp, epsilon=epsilon, alpha=alpha)

        mlp = lasagne.layers.NonlinearityLayer(mlp, nonlinearity=activation)

        mlp = lasagne.layers.DropoutLayer(mlp, p=dropout_hidden)

        # pretrain-finetune
        # only restore the first layer group
        if (k == 0):
            if (FINTUNE_SNAPSHOT != 0):
                print('Load ./W-%d.npz' % FINTUNE_SNAPSHOT)
                with np.load('./W-%d.npz' % FINTUNE_SNAPSHOT) as f:
                    param_values = [
                        f['arr_%d' % i] for i in range(len(f.files))
                    ]
                param_values = param_values[0:6]
                lasagne.layers.set_all_param_values(mlp, param_values)

    mlp = binary_net.DenseLayer(mlp,
                                binary=binary,
                                stochastic=stochastic,
                                H=H,
                                W_LR_scale=W_LR_scale,
                                nonlinearity=lasagne.nonlinearities.identity,
                                num_units=7)

    mlp = lasagne.layers.BatchNormLayer(mlp, epsilon=epsilon, alpha=alpha)

    # network output BN or SGN
    if OUTPUT_TYPE == 'C':
        pass  #
    elif OUTPUT_TYPE == 'D':
        mlp = lasagne.layers.NonlinearityLayer(mlp, nonlinearity=activation)
    else:
        assert (False)

    # loss weight nodes
    SPARSITY = 0.9
    SPARSITY_MAP = (np.float32(train_x == -1)).mean(0)
    LOSS_WEIGHT_1 = 1. + input * (2. * SPARSITY - 1)
    LOSS_WEIGHT_1 /= 4 * SPARSITY * (1 - SPARSITY
                                     )  # fixed 1->-1:5 -1->1:5/9 weights
    LOSS_WEIGHT_2 = 1. + input * (2. * SPARSITY_MAP - 1)  #
    LOSS_WEIGHT_2 /= 4 * SPARSITY_MAP * (
        1 - SPARSITY_MAP)  # weights considering element's prior probability

    # train loss nodes
    train_output = lasagne.layers.get_output(mlp, deterministic=False)
    if MAIN_LOSS_TYPE == 'SH':
        train_loss = T.mean(T.sqr(T.maximum(0., 1. - target * train_output)))
    elif MAIN_LOSS_TYPE == 'W1SH':
        train_loss = T.mean(
            T.sqr(T.maximum(0., (1. - target * train_output))) * LOSS_WEIGHT_1)
    elif MAIN_LOSS_TYPE == 'W2SH':
        train_loss = T.mean(
            T.sqr(T.maximum(0., (1. - target * train_output))) * LOSS_WEIGHT_2)
    elif MAIN_LOSS_TYPE == 'H':
        train_loss = T.mean(T.maximum(0., 1. - target * train_output))
    elif MAIN_LOSS_TYPE == 'W1H':
        train_loss = T.mean(
            T.maximum(0., (1. - target * train_output)) * LOSS_WEIGHT_1)
    elif MAIN_LOSS_TYPE == 'W2H':
        train_loss = T.mean(
            T.maximum(0., (1. - target * train_output)) * LOSS_WEIGHT_2)
    else:
        assert (False)

    # + sparse penalty
    if LAMBDA > 0:
        train_pixel_wise_density = T.mean(T.reshape(
            (train_output + 1.) / 2.,
            [train_output.shape[0], train_output.shape[1] / 10, 10]),
                                          axis=2)
        train_penalty = LAMBDA * T.mean(
            T.sqr(train_pixel_wise_density - (1. - SPARSITY)))
    else:
        train_penalty = T.constant(0.)
    train_loss = train_loss + train_penalty

    # acc
    train_acc = T.mean(T.eq(T.argmax(train_output, axis=1),
                            T.argmax(target, axis=1)),
                       dtype=theano.config.floatX)

    # grad nodes
    if binary:
        # W updates
        W = lasagne.layers.get_all_params(mlp, binary=True)
        W_grads = binary_net.compute_grads(train_loss, mlp)
        updates = lasagne.updates.adam(loss_or_grads=W_grads,
                                       params=W,
                                       learning_rate=LR)
        updates = binary_net.clipping_scaling(updates, mlp)

        # other parameters updates
        params = lasagne.layers.get_all_params(mlp,
                                               trainable=True,
                                               binary=False)
        updates = OrderedDict(updates.items() + lasagne.updates.adam(
            loss_or_grads=train_loss, params=params, learning_rate=LR).items())

    else:
        params = lasagne.layers.get_all_params(mlp, trainable=True)
        updates = lasagne.updates.adam(loss_or_grads=train_loss,
                                       params=params,
                                       learning_rate=LR)

    # val loss nodes
    # must be created after grad nodes
    val_output = lasagne.layers.get_output(mlp, deterministic=True)
    if MAIN_LOSS_TYPE == 'SH':
        val_loss = T.mean(T.sqr(T.maximum(0., 1. - target * val_output)))
    elif MAIN_LOSS_TYPE == 'W1SH':
        val_loss = T.mean(
            T.sqr(T.maximum(0., (1. - target * val_output))) * LOSS_WEIGHT_1)
    elif MAIN_LOSS_TYPE == 'W2SH':
        val_loss = T.mean(
            T.sqr(T.maximum(0., (1. - target * val_output))) * LOSS_WEIGHT_2)
    elif MAIN_LOSS_TYPE == 'H':
        val_loss = T.mean(T.maximum(0., 1. - target * val_output))
    elif MAIN_LOSS_TYPE == 'W1H':
        val_loss = T.mean(
            T.maximum(0., (1. - target * val_output)) * LOSS_WEIGHT_1)
    elif MAIN_LOSS_TYPE == 'W2H':
        val_loss = T.mean(
            T.maximum(0., (1. - target * val_output)) * LOSS_WEIGHT_2)

    # + sparse penalty
    if LAMBDA > 0:
        val_pixel_wise_density = T.mean(T.reshape(
            (val_output + 1.) / 2.,
            [val_output.shape[0], val_output.shape[1] / 10, 10]),
                                        axis=2)
        val_penalty = LAMBDA * T.mean(
            T.sqr(val_pixel_wise_density - (1. - SPARSITY)))
    else:
        val_penalty = T.constant(0.)
    val_loss = val_loss + val_penalty

    # acc
    val_acc = T.mean(T.eq(T.argmax(val_output, axis=1), T.argmax(target,
                                                                 axis=1)),
                     dtype=theano.config.floatX)

    # Compile a function performing a training step on a mini-batch (by giving the updates dictionary)
    # and returning the corresponding training train_loss:
    train_fn = theano.function(
        [input, target, LR],
        [train_loss, train_penalty, train_acc, train_output],
        updates=updates)

    # Compile a second function computing the validation train_loss and accuracy:
    val_fn = theano.function([input, target],
                             [val_loss, val_penalty, val_acc, val_output])

    print('Training...')
    train_x = binary_net.MoveParameter(train_x)
    binary_net.train(train_fn, val_fn, batch_size, LR_start, LR_decay,
                     num_epochs, train_x, train_y, val_x, val_y)
Example #8
0
def main():
    # BN parameters
    batch_size = 200
    print("batch_size = " + str(batch_size))
    # alpha is the exponential moving average factor
    alpha = .1
    print("alpha = " + str(alpha))
    epsilon = 1e-4
    print("epsilon = " + str(epsilon))

    # BinaryOut
    activation = binary_net.binary_tanh_unit
    print("activation = binary_net.binary_tanh_unit")
    # activation = binary_net.binary_sigmoid_unit
    # print("activation = binary_net.binary_sigmoid_unit")

    # BinaryConnect
    binary = True
    print("binary = " + str(binary))
    stochastic = False
    print("stochastic = " + str(stochastic))
    # (-H,+H) are the two binary values
    # H = "Glorot"
    H = 1.
    print("H = " + str(H))
    # W_LR_scale = 1.
    W_LR_scale = "Glorot"  # "Glorot" means we are using the coefficients from Glorot's paper
    print("W_LR_scale = " + str(W_LR_scale))

    # Training parameters
    num_epochs = 500
    print("num_epochs = " + str(num_epochs))

    # Decaying LR
    LR_start = 0.01
    print("LR_start = " + str(LR_start))
    LR_fin = 0.0000003
    print("LR_fin = " + str(LR_fin))
    LR_decay = (LR_fin / LR_start)**(1. / num_epochs)
    print("LR_decay = " + str(LR_decay))
    # BTW, LR decay might good for the BN moving average...

    train_set_size = 45000
    print("train_set_size = " + str(train_set_size))
    shuffle_parts = 1
    print("shuffle_parts = " + str(shuffle_parts))

    print('\nLoading CIFAR-10 dataset...')

    train_set = CIFAR10(which_set="train", start=0, stop=train_set_size)
    valid_set = CIFAR10(which_set="train", start=train_set_size, stop=50000)
    test_set = CIFAR10(which_set="test")

    # bc01 format
    # Inputs in the range [-1,+1]
    # print("Inputs in the range [-1,+1]")
    train_set.X = np.reshape(
        np.subtract(np.multiply(2. / 255., train_set.X), 1.), (-1, 3, 32, 32))
    valid_set.X = np.reshape(
        np.subtract(np.multiply(2. / 255., valid_set.X), 1.), (-1, 3, 32, 32))
    test_set.X = np.reshape(
        np.subtract(np.multiply(2. / 255., test_set.X), 1.), (-1, 3, 32, 32))

    # flatten targets
    train_set.y = np.hstack(train_set.y)
    valid_set.y = np.hstack(valid_set.y)
    test_set.y = np.hstack(test_set.y)

    if oneHot:
        #  Onehot the targets
        train_set.y = np.float32(np.eye(10)[train_set.y])
        valid_set.y = np.float32(np.eye(10)[valid_set.y])
        test_set.y = np.float32(np.eye(10)[test_set.y])

        # for hinge loss
        train_set.y = 2 * train_set.y - 1.
        valid_set.y = 2 * valid_set.y - 1.
        test_set.y = 2 * test_set.y - 1.
    else:
        train_set.y = np.int32(train_set.y)
        valid_set.y = np.int32(valid_set.y)
        test_set.y = np.int32(test_set.y)

    #import pdb;pdb.set_trace()

    print('\nBuilding the CNN...')

    # Prepare Theano variables for inputs and targets
    input = T.tensor4('inputs')
    if oneHot: target = T.matrix('targets')
    else: target = T.ivector('targets')

    LR = T.scalar('LR', dtype=theano.config.floatX)

    cnn = buildCNN(dataType='cifar10',
                   networkType='cifar10',
                   oneHot=oneHot,
                   input=input,
                   epsilon=epsilon,
                   alpha=alpha,
                   activation=activation,
                   binary=binary,
                   stochastic=stochastic,
                   H=H,
                   W_LR_scale=W_LR_scale)

    train_output = lasagne.layers.get_output(cnn, deterministic=False)

    # squared hinge loss
    if oneHot: loss = T.mean(T.sqr(T.maximum(0., 1. - target * train_output)))
    else:
        loss = LO.categorical_crossentropy(train_output, target)
        loss = loss.mean()

    # W updates
    W = lasagne.layers.get_all_params(cnn, binary=True)
    W_grads = binary_net.compute_grads(loss, cnn)
    updates = lasagne.updates.adam(loss_or_grads=W_grads,
                                   params=W,
                                   learning_rate=LR)
    updates = binary_net.clipping_scaling(updates, cnn)

    # other parameters updates
    params = lasagne.layers.get_all_params(cnn, trainable=True, binary=False)
    updates = OrderedDict(updates.items() + lasagne.updates.adam(
        loss_or_grads=loss, params=params, learning_rate=LR).items())

    test_output = lasagne.layers.get_output(cnn, deterministic=True)

    if oneHot:
        test_loss = T.mean(T.sqr(T.maximum(0., 1. - target * test_output)))
        test_err = T.mean(T.neq(T.argmax(test_output, axis=1),
                                T.argmax(target, axis=1)),
                          dtype=theano.config.floatX)
    else:
        test_loss = LO.categorical_crossentropy(test_output, target)
        test_loss = test_loss.mean()
        test_err = T.mean(T.neq(T.argmax(test_output, axis=1),
                                T.argmax(target)),
                          dtype=theano.config.floatX)

    # Compile a function performing a training step on a mini-batch (by giving the updates dictionary)
    # and returning the corresponding training loss:
    train_fn = theano.function([input, target, LR], loss, updates=updates)

    # Compile a second function computing the validation loss and accuracy:
    val_fn = theano.function([input, target], [test_loss, test_err])

    print('Training...')

    binary_net.train(train_fn,
                     val_fn,
                     cnn,
                     batch_size,
                     LR_start,
                     LR_decay,
                     num_epochs,
                     train_set.X,
                     train_set.y,
                     valid_set.X,
                     valid_set.y,
                     test_set.X,
                     test_set.y,
                     shuffle_parts=shuffle_parts)
Example #9
0
        # other parameters updates
        params = lasagne.layers.get_all_params(mlp,
                                               trainable=True,
                                               binary=False)
        updates = OrderedDict(updates.items() + lasagne.updates.adam(
            loss_or_grads=loss, params=params, learning_rate=LR).items())

    else:
        params = lasagne.layers.get_all_params(mlp, trainable=True)
        updates = lasagne.updates.adam(loss_or_grads=loss,
                                       params=params,
                                       learning_rate=LR)

    test_output = lasagne.layers.get_output(mlp, deterministic=True)
    test_loss = T.mean(T.sqr(T.maximum(0., 1. - target * test_output)))
    test_err = T.mean(T.neq(T.argmax(test_output, axis=1),
                            T.argmax(target, axis=1)),
                      dtype=theano.config.floatX)

    # Compile a function performing a training step on a mini-batch (by giving the updates dictionary)
    # and returning the corresponding training loss:
    train_fn = theano.function([input, target, LR], loss, updates=updates)

    # Compile a second function computing the validation loss and accuracy:
    val_fn = theano.function([input, target], [test_loss, test_err])

    print('Training...')

    binary_net.train(train_fn, val_fn, mlp, batch_size, LR_start, LR_decay,
                     num_epochs, X_train, y_train, X_val, y_val, X_test,
                     y_test, save_path, shuffle_parts)
Example #10
0
        params = lasagne.layers.get_all_params(mlp,
                                               trainable=True,
                                               binary=False)
        updates = OrderedDict(updates.items() + lasagne.updates.adam(
            loss_or_grads=loss, params=params, learning_rate=LR).items())

    else:
        params = lasagne.layers.get_all_params(mlp, trainable=True)
        updates = lasagne.updates.adam(loss_or_grads=loss,
                                       params=params,
                                       learning_rate=LR)

    test_output = lasagne.layers.get_output(mlp, deterministic=True)
    test_loss = T.mean(T.sqr(T.maximum(0., 1. - target * test_output)))
    test_err = T.mean(T.neq(T.argmax(test_output, axis=1),
                            T.argmax(target, axis=1)),
                      dtype=theano.config.floatX)

    # Compile a function performing a training step on a mini-batch (by giving the updates dictionary)
    # and returning the corresponding training loss:
    train_fn = theano.function([input, target, LR], loss, updates=updates)

    # Compile a second function computing the validation loss and accuracy:
    val_fn = theano.function([input, target], [test_loss, test_err])

    print('Training...')

    binary_net.train(train_fn, val_fn, mlp, batch_size, LR_start, LR_decay,
                     num_epochs, train_setX, train_setY, valid_setX,
                     valid_setY, test_setX, test_setY, save_path,
                     shuffle_parts)
def trial(N_HIDDEN_LAYERS, NUM_UNITS, OUTPUT_TYPE, MAIN_LOSS_TYPE, LAMBDA,
          FOLD):
    # BN parameters
    batch_size = 100
    print("batch_size = " + str(batch_size))
    # alpha is the exponential moving average factor
    # alpha = .15
    alpha = .1
    print("alpha = " + str(alpha))
    epsilon = 1e-4
    print("epsilon = " + str(epsilon))

    # MLP parameters
    #NUM_UNITS = 25
    print("NUM_UNITS = " + str(NUM_UNITS))
    #N_HIDDEN_LAYERS = 1
    print("N_HIDDEN_LAYERS = " + str(N_HIDDEN_LAYERS))

    # Training parameters
    num_epochs = 1000000
    print("num_epochs = " + str(num_epochs))

    # Dropout parameters
    dropout_in = .2  # 0. means no dropout
    print("dropout_in = " + str(dropout_in))
    dropout_hidden = .5
    print("dropout_hidden = " + str(dropout_hidden))

    # BinaryOut
    activation = binary_net.binary_tanh_unit
    print("activation = binary_net.binary_tanh_unit")
    # activation = binary_net.binary_sigmoid_unit
    # print("activation = binary_net.binary_sigmoid_unit")

    # BinaryConnect
    binary = True
    print("binary = " + str(binary))
    stochastic = False
    print("stochastic = " + str(stochastic))
    # (-H,+H) are the two binary values
    # H = "Glorot"
    H = 1.
    print("H = " + str(H))
    # W_LR_scale = 1.
    W_LR_scale = "Glorot"  # "Glorot" means we are using the coefficients from Glorot's paper
    print("W_LR_scale = " + str(W_LR_scale))

    # Decaying LR
    #LR_start = .003
    LR_start = 0.000003
    print("LR_start = " + str(LR_start))
    #LR_fin = 0.0000003
    LR_fin = LR_start
    print("LR_fin = " + str(LR_fin))
    LR_decay = (LR_fin / LR_start)**(1. / num_epochs)
    print("LR_decay = " + str(LR_decay))
    # BTW, LR decay might good for the BN moving average...

    # replace the dataset
    print('Loading SFEW2 dataset...')
    [train_x] = SFEW2.load_lfw()
    assert (train_x.shape[0] == 26404)
    train_x = train_x[0:26400, :]
    [val_x, _, _, _] = SFEW2.load_train_val()

    print(train_x.shape)
    print(val_x.shape)
    print('last training minibatch size: ' +
          str(train_x.shape[0] - train_x.shape[0] / batch_size * batch_size) +
          ' / ' + str(batch_size))
    print(
        'last training minibatch size should not be too small (except 0). try decrease the batch_size, but not add more minibatches.'
    )
    print('minibatches size: ' + str(batch_size))
    print('suggested minibatches size: ' + str(
        math.ceil(
            float(train_x.shape[0]) /
            math.ceil(float(train_x.shape[0]) / 100))))

    ##############################################################################################

    print('Building the MLP...')
    # Prepare Theano variables for inputs and targets
    input = T.matrix('inputs')
    LR = T.scalar('LR', dtype=theano.config.floatX)

    mlp = lasagne.layers.InputLayer(shape=(None, train_x.shape[1]),
                                    input_var=input)

    mlp = lasagne.layers.DropoutLayer(
        mlp, p=0)  # train BAE-2: no dropout on input & BAE-1 layer

    for k in range(N_HIDDEN_LAYERS):
        if (k == 0):
            mlp = binary_net.DenseLayer(
                mlp,
                binary=binary,
                stochastic=stochastic,
                H=H,
                W_LR_scale=W_LR_scale,
                nonlinearity=lasagne.nonlinearities.identity,
                num_units=NUM_UNITS)
        elif (k == 1):
            mlp = binary_net.DenseLayer(
                mlp,
                binary=binary,
                stochastic=stochastic,
                H=H,
                W_LR_scale=W_LR_scale,
                nonlinearity=lasagne.nonlinearities.identity,
                num_units=NUM_UNITS * 2)
        else:
            assert (False)

        #if(k==0):
        #    print('scale down the LR of transfered dense layer from', str(mlp.W_LR_scale))
        #    mlp.W_LR_scale = 0
        #    print('to', str(mlp.W_LR_scale))

        if (k == 0):
            # BAE1 encoder: BN
            mlp = lasagne.layers.BatchNormLayer(mlp,
                                                epsilon=epsilon,
                                                alpha=alpha)
        elif (k == 1):
            # BAE2 encoder: do not use BN for encouraging sparsity
            pass
        else:
            # further layer use BN
            mlp = lasagne.layers.BatchNormLayer(mlp,
                                                epsilon=epsilon,
                                                alpha=alpha)

        # midactivation place before hard tanh
        # encoder and decoder should not use BatchNorm
        # "l1 reg" on midactivation
        if (k == 1):
            mlp_midactivation = mlp

        mlp = lasagne.layers.NonlinearityLayer(mlp, nonlinearity=activation)

        if (k == 0):
            mlp = lasagne.layers.DropoutLayer(
                mlp, p=0)  # train BAE-2: no dropout on input & BAE-1 layer
        else:
            mlp = lasagne.layers.DropoutLayer(mlp, p=dropout_hidden)

        # pretrain-finetune
        # only restore the first layer group
        if (k == 0):
            print('Load ./W-1168.npz')
            with np.load('./W-1168.npz') as f:
                param_values = [f['arr_%d' % i] for i in range(len(f.files))]
            param_values = param_values[0:6]
            lasagne.layers.set_all_param_values(mlp, param_values)

            mlp_groundtruth = mlp

    mlp = binary_net.DenseLayer(mlp,
                                binary=binary,
                                stochastic=stochastic,
                                H=H,
                                W_LR_scale=W_LR_scale,
                                nonlinearity=lasagne.nonlinearities.identity,
                                num_units=1500)

    mlp = lasagne.layers.BatchNormLayer(mlp, epsilon=epsilon, alpha=alpha)

    # network output BN or SGN
    if OUTPUT_TYPE == 'C':
        pass  #
    elif OUTPUT_TYPE == 'D':
        mlp = lasagne.layers.NonlinearityLayer(mlp, nonlinearity=activation)
    else:
        assert (False)
    '''
    # equal transform validation
    # 1 set AE transform to I
    # 1 modift AE DenseLayer.get_output_for() use W(0 1) instead of Wb(+1 -1)
    # 2 set encoder's dropout=0
    # 3 comment out encoder's and decoder's BatchNormLayer, modify set_all_param_values
    # will see train loss = 0
    pv = lasagne.layers.get_all_param_values(mlp)
    pv[2] = np.identity(1500, np.float64)
    pv[4] = np.identity(1500, np.float64)
    lasagne.layers.set_all_param_values(mlp, pv)
    '''
    '''
    # loss weight nodes
    SPARSITY = 0.9
    SPARSITY_MAP = (np.float32(train_x==-1)).mean(0)
    LOSS_WEIGHT_1 = 1.+input*(2.*SPARSITY-1)
    LOSS_WEIGHT_1 /= 4*SPARSITY*(1 - SPARSITY)# fixed 1->-1:5 -1->1:5/9 weights
    LOSS_WEIGHT_2 = 1.+input*(2.*SPARSITY_MAP-1)#
    LOSS_WEIGHT_2 /= 4*SPARSITY_MAP*(1 - SPARSITY_MAP)# weights considering element's prior probability
    '''

    # train loss nodes
    '''
    train_output = lasagne.layers.get_output(mlp, deterministic=False)
    if MAIN_LOSS_TYPE=='SH':
        train_loss = T.mean(T.sqr(T.maximum(0.,1.-input*train_output)))
    elif MAIN_LOSS_TYPE == 'W1SH':
        train_loss = T.mean(T.sqr(T.maximum(0., (1. - input * train_output))) * LOSS_WEIGHT_1)
    elif MAIN_LOSS_TYPE == 'W2SH':
        train_loss = T.mean(T.sqr(T.maximum(0., (1. - input * train_output))) * LOSS_WEIGHT_2)
    elif MAIN_LOSS_TYPE == 'H':
        train_loss = T.mean(T.maximum(0.,1.-input*train_output))
    elif MAIN_LOSS_TYPE == 'W1H':
        train_loss = T.mean(T.maximum(0., (1. - input * train_output)) * LOSS_WEIGHT_1)
    elif MAIN_LOSS_TYPE == 'W2H':
        train_loss = T.mean(T.maximum(0., (1. - input * train_output)) * LOSS_WEIGHT_2)
    else:
        assert(False)
    '''
    [
        train_output_mlp_groundtruth, train_output_mlp_midactivation,
        train_output
    ] = lasagne.layers.get_output([mlp_groundtruth, mlp_midactivation, mlp],
                                  deterministic=False)
    train_loss = T.mean(
        T.maximum(0., 1. - train_output_mlp_groundtruth * train_output))

    # + sparse penalty
    '''
    if LAMBDA>0:
        train_pixel_wise_density = T.mean(T.reshape((train_output+1.)/2., [train_output.shape[0], train_output.shape[1]/10, 10]), axis=2)
        train_penalty = LAMBDA*T.mean(T.sqr(train_pixel_wise_density - (1.-SPARSITY)))
    else:
        train_penalty = T.constant(0.)
    train_loss = train_loss + train_penalty
    '''
    if LAMBDA > 0:
        train_penalty = LAMBDA * T.mean(
            T.maximum(0., 1. + train_output_mlp_midactivation))
    else:
        train_penalty = T.constant(0.)
    train_loss = train_loss + train_penalty

    # grad nodes
    if binary:
        # W updates
        W = lasagne.layers.get_all_params(mlp, binary=True)
        W_grads = binary_net.compute_grads(train_loss, mlp)

        # untrainable W1
        assert (len(W) == 3)
        assert (len(W_grads) == 3)
        W = W[1:len(W)]
        W_grads = W_grads[1:len(W_grads)]
        assert (len(W) == 2)
        assert (len(W_grads) == 2)

        updates = lasagne.updates.adam(loss_or_grads=W_grads,
                                       params=W,
                                       learning_rate=LR)
        updates = binary_net.clipping_scaling(updates, mlp)

        # other parameters updates
        params = lasagne.layers.get_all_params(mlp,
                                               trainable=True,
                                               binary=False)

        # untrainable b1 bn1
        assert (len(params) == 7)
        assert (params[0].name == 'b')  # fix
        assert (params[1].name == 'beta')  # fix
        assert (params[2].name == 'gamma')  # fix
        assert (params[3].name == 'b')
        assert (params[4].name == 'b')
        assert (params[5].name == 'beta')
        assert (params[6].name == 'gamma')
        params = params[3:len(params)]
        assert (len(params) == 4)

        updates = OrderedDict(updates.items() + lasagne.updates.adam(
            loss_or_grads=train_loss, params=params, learning_rate=LR).items())

    else:
        params = lasagne.layers.get_all_params(mlp, trainable=True)
        updates = lasagne.updates.adam(loss_or_grads=train_loss,
                                       params=params,
                                       learning_rate=LR)

    ##############################################################################################

    # val loss nodes
    # must be created after grad nodes
    '''
    val_output = lasagne.layers.get_output(mlp, deterministic=True)
    if MAIN_LOSS_TYPE=='SH':
        val_loss = T.mean(T.sqr(T.maximum(0.,1.-input*val_output)))
    elif MAIN_LOSS_TYPE == 'W1SH':
        val_loss = T.mean(T.sqr(T.maximum(0., (1. - input * val_output))) * LOSS_WEIGHT_1)
    elif MAIN_LOSS_TYPE == 'W2SH':
        val_loss = T.mean(T.sqr(T.maximum(0., (1. - input * val_output))) * LOSS_WEIGHT_2)
    elif MAIN_LOSS_TYPE == 'H':
        val_loss = T.mean(T.maximum(0.,1.-input*val_output))
    elif MAIN_LOSS_TYPE == 'W1H':
        val_loss = T.mean(T.maximum(0., (1. - input * val_output)) * LOSS_WEIGHT_1)
    elif MAIN_LOSS_TYPE == 'W2H':
        val_loss = T.mean(T.maximum(0., (1. - input * val_output)) * LOSS_WEIGHT_2)
    '''
    [val_output_mlp_groundtruth, val_output_mlp_midactivation, val_output
     ] = lasagne.layers.get_output([mlp_groundtruth, mlp_midactivation, mlp],
                                   deterministic=True)
    val_loss = T.mean(
        T.maximum(0., 1. - val_output_mlp_groundtruth * val_output))

    # + sparse penalty
    '''
    if LAMBDA>0:
        val_pixel_wise_density = T.mean(T.reshape((val_output + 1.) / 2., [val_output.shape[0], val_output.shape[1] / 10, 10]), axis=2)
        val_penalty = LAMBDA*T.mean(T.sqr(val_pixel_wise_density - (1. - SPARSITY)))
    else:
        val_penalty = T.constant(0.)
    val_loss = val_loss + val_penalty
    '''
    if LAMBDA > 0:
        val_penalty = LAMBDA * T.mean(
            T.maximum(0., 1. + val_output_mlp_midactivation))
    else:
        val_penalty = T.constant(0.)
    val_loss = val_loss + val_penalty

    ##############################################################################################

    # Compile a function performing a training step on a mini-batch (by giving the updates dictionary)
    # and returning the corresponding training train_loss:
    train_fn = theano.function([input, LR], [
        train_loss, train_penalty, train_output_mlp_groundtruth,
        train_output_mlp_midactivation, train_output
    ],
                               updates=updates)

    ##############################################################################################

    # Compile a second function computing the validation loss and accuracy:
    val_fn = theano.function([input], [
        val_loss, val_penalty, val_output_mlp_groundtruth,
        val_output_mlp_midactivation, val_output
    ])

    ##############################################################################################

    print('Training...')
    train_x = binary_net.MoveParameter(train_x)
    binary_net.train(train_fn, val_fn, batch_size, LR_start, LR_decay,
                     num_epochs, train_x, val_x, mlp)

    print('Save W')
    np.savez('./W.npz', *lasagne.layers.get_all_param_values(
        mlp))  # W b BN BN BN BN W b BN BN BN BN
Example #12
0
    get_intermediate_activation = theano.function([input], [Output1, Output2, Output3])
    Outputs1, Outputs2, Outputs3 = get_intermediate_activation(test_set.X)
	
    # Compile a second function computing the validation loss and accuracy:
    val_fn = theano.function([input, target], [test_loss, test_err])

    print('Training...')
    if save_path is not None:
        os.mkdir(save_path)
	
    binary_net.train(
            train_fn, get_intermediate_activation, val_fn,
            mlp,
            batch_size,
            LR_start,LR_decay,
            num_epochs,
            train_set.X,train_set.y,
            valid_set.X,valid_set.y,
            test_set.X,test_set.y,
            save_path,
            shuffle_parts)
			
filename = "mnist_"
#os.mkdir(filename)
#os.chdir(filename)
#Params_val = lasagne.layers.get_all_param_values(mlp)
#Params  = lasagne.layers.get_all_params(mlp)
#Output1 = lasagne.layers.get_output(l1, deterministic=True)

#Out1Func = theano.function([input], Output1)
#Outputs1 = Out1Func(test_set.X)
Example #13
0
def run(binary=False, noise=None, nalpha=0, result_path=None):
    # BN parameters
    batch_size = 128  # default: 100
    print("batch_size = " + str(batch_size))

    # alpha is the exponential moving average factor
    alpha = .1  # default: .1
    print("alpha = " + str(alpha))
    epsilon = 1e-4  # default: 1e-4
    print("epsilon = " + str(epsilon))

    # MLP parameters
    num_units = 300  # default: 4096
    print("num_units = " + str(num_units))
    n_hidden_layers = 1  # default: 3
    print("n_hidden_layers = " + str(n_hidden_layers))

    # Training parameters
    num_epochs = 500  # default: 1000
    print("num_epochs = " + str(num_epochs))

    # Dropout parameters
    dropout_in = .2  # default: .2
    print("dropout_in = " + str(dropout_in))
    dropout_hidden = .5  # default: .5
    print("dropout_hidden = " + str(dropout_hidden))

    # BinaryOut
    if binary:
        activation = binary_net.binary_tanh_unit
        print("activation = binary_net.binary_tanh_unit")
    else:
        activation = lasagne.nonlinearities.tanh
        print("activation = lasagne.nonlinearities.tanh")

    # BinaryConnect
    print("binary = " + str(binary))
    stochastic = False  # default: False
    print("stochastic = " + str(stochastic))
    # (-H,+H) are the two binary values
    # H = "Glorot"
    H = 1.  # default: 1.
    print("H = " + str(H))
    # W_LR_scale = 1.
    W_LR_scale = "Glorot"  # default: "Glorot"
    print("W_LR_scale = " + str(W_LR_scale))

    # Decaying LR
    LR_start = 0.005  # default: .003
    print("LR_start = " + str(LR_start))
    LR_fin = 0.0000005  # default: 0.0000003
    print("LR_fin = " + str(LR_fin))
    LR_decay = (LR_fin / LR_start) ** (1. / num_epochs)
    print("LR_decay = " + str(LR_decay))
    # BTW, LR decay might good for the BN moving average...

    save_path = None  # default: "mnist_parameters.npz"
    print("save_path = " + str(save_path))

    # Load the dataset (https://github.com/mnielsen/neural-networks-and-deep-learning)
    print('Loading MNIST dataset...')
    mnist = MnistReader("./data/mnist.pkl.gz")

    shuffle_parts = 1  # default: 1
    print("shuffle_parts = " + str(shuffle_parts))

    print("noise = " + str(noise))
    print("nalpha = " + str(nalpha))

    train_set_size = 50000  # default: 50000
    train_X, train_y = mnist.get_train_data(n_samples=train_set_size, noise=noise, alpha=nalpha)
    validation_X, validation_y = mnist.get_validation_data()
    test_X, test_y = mnist.get_test_data()
    print("train_set_size = "+str(train_y.shape[0]))
    print("validation_set_size = "+str(validation_y.shape[0]))
    print("test_set_size = "+str(test_y.shape[0]))

    # Log output
    with open(result_path + "params.txt", "a+") as l:
        print("batch_size = " + str(batch_size), file=l)
        print("alpha = " + str(alpha), file=l)
        print("epsilon = " + str(epsilon), file=l)
        print("num_units = " + str(num_units), file=l)
        print("n_hidden_layers = " + str(n_hidden_layers), file=l)
        print("num_epochs = " + str(num_epochs), file=l)
        print("dropout_in = " + str(dropout_in), file=l)
        print("dropout_hidden = " + str(dropout_hidden), file=l)
        if binary:
            print("activation = binary_net.binary_tanh_unit", file=l)
        else:
            print("activation = lasagne.nonlinearities.tanh", file=l)
        print("binary = " + str(binary), file=l)
        print("stochastic = " + str(stochastic), file=l)
        print("H = " + str(H), file=l)
        print("W_LR_scale = " + str(W_LR_scale), file=l)
        print("LR_start = " + str(LR_start), file=l)
        print("LR_fin = " + str(LR_fin), file=l)
        print("LR_decay = " + str(LR_decay), file=l)
        print("save_path = " + str(save_path), file=l)
        print("shuffle_parts = " + str(shuffle_parts), file=l)
        print("noise = " + str(noise), file=l)
        print("nalpha = " + str(nalpha), file=l)
        print("train_set_size = "+str(train_y.shape[0]), file=l)
        print("validation_set_size = "+str(validation_y.shape[0]), file=l)
        print("test_set_size = "+str(test_y.shape[0]), file=l)

    # bc01 format
    # Inputs in the range [-1,+1]
    # print("Inputs in the range [-1,+1]")
    train_X = 2 * train_X.reshape(-1, 1, 28, 28) - 1.
    validation_X = 2 * validation_X.reshape(-1, 1, 28, 28) - 1.
    test_X = 2 * test_X.reshape(-1, 1, 28, 28) - 1.

    # flatten targets
    train_y = np.hstack(train_y)
    validation_y = np.hstack(validation_y)
    test_y = np.hstack(test_y)

    # Onehot the targets
    train_y = np.float32(np.eye(10)[train_y])
    validation_y = np.float32(np.eye(10)[validation_y])
    test_y = np.float32(np.eye(10)[test_y])

    # for hinge loss
    train_y = 2 * train_y - 1.
    validation_y = 2 * validation_y - 1.
    test_y = 2 * test_y - 1.

    print('Building the MLP...')

    # Prepare Theano variables for inputs and targets
    input = T.tensor4('inputs')
    target = T.matrix('targets')
    LR = T.scalar('LR', dtype=theano.config.floatX)

    mlp = lasagne.layers.InputLayer(
        shape=(None, 1, 28, 28),
        input_var=input)

    mlp = lasagne.layers.DropoutLayer(
        mlp,
        p=dropout_in)

    for k in range(n_hidden_layers):
        mlp = binary_net.DenseLayer(
            mlp,
            binary=binary,
            stochastic=stochastic,
            H=H,
            W_LR_scale=W_LR_scale,
            nonlinearity=lasagne.nonlinearities.identity,
            num_units=num_units)

        mlp = lasagne.layers.BatchNormLayer(
            mlp,
            epsilon=epsilon,
            alpha=alpha)

        mlp = lasagne.layers.NonlinearityLayer(
            mlp,
            nonlinearity=activation)

        mlp = lasagne.layers.DropoutLayer(
            mlp,
            p=dropout_hidden)

    mlp = binary_net.DenseLayer(
        mlp,
        binary=binary,
        stochastic=stochastic,
        H=H,
        W_LR_scale=W_LR_scale,
        nonlinearity=lasagne.nonlinearities.identity,
        num_units=10)

    mlp = lasagne.layers.BatchNormLayer(
        mlp,
        epsilon=epsilon,
        alpha=alpha)

    train_output = lasagne.layers.get_output(mlp, deterministic=False)

    # squared hinge loss
    loss = T.mean(T.sqr(T.maximum(0., 1. - target * train_output)))

    if binary:

        # W updates
        W = lasagne.layers.get_all_params(mlp, binary=True)
        W_grads = binary_net.compute_grads(loss, mlp)
        updates = lasagne.updates.adam(loss_or_grads=W_grads, params=W, learning_rate=LR)
        updates = binary_net.clipping_scaling(updates, mlp)

        # other parameters updates
        params = lasagne.layers.get_all_params(mlp, trainable=True, binary=False)
        updates.update(lasagne.updates.adam(loss_or_grads=loss, params=params, learning_rate=LR))

    else:
        params = lasagne.layers.get_all_params(mlp, trainable=True)
        updates = lasagne.updates.adam(loss_or_grads=loss, params=params, learning_rate=LR)

    test_output = lasagne.layers.get_output(mlp, deterministic=True)
    test_loss = T.mean(T.sqr(T.maximum(0., 1. - target * test_output)))
    test_err = T.mean(T.neq(T.argmax(test_output, axis=1), T.argmax(target, axis=1)), dtype=theano.config.floatX)

    # Compile a function performing a training step on a mini-batch (by giving the updates dictionary)
    # and returning the corresponding training loss:
    train_fn = theano.function([input, target, LR], loss, updates=updates)

    # Compile a second function computing the validation loss and accuracy:
    val_fn = theano.function([input, target], [test_loss, test_err])

    print('Training...')

    binary_net.train(
        train_fn, val_fn,
        mlp,
        batch_size,
        LR_start, LR_decay,
        num_epochs,
        train_X, train_y,
        validation_X, validation_y,
        test_X, test_y,
        save_path,
        shuffle_parts,
        result_path)
Example #14
0
    # Compile a function performing a training step on a mini-batch (by giving the updates dictionary)
    # and returning the corresponding training loss:
    train_fn = theano.function([input, target, LR], loss,
                               updates=updates)  #XXX

    # Compile a second function computing the validation loss and accuracy:
    val_fn = theano.function([input, target], [test_loss, test_err])
    print('Theano Compiling...DONE')

    print('Training...')

    #
    # 訓練&評価の実行
    #
    binary_net.train(
        train_fn,  #XXX (input, target, LR) -> loss
        val_fn,
        mlp,
        batch_size,
        LR_start,
        LR_decay,
        num_epochs,
        train_set.X,
        train_set.y,  #XXX target
        valid_set.X,
        valid_set.y,
        test_set.X,
        test_set.y,
        save_path,
        shuffle_parts)
Example #15
0
    # Compile a function performing a training step on a mini-batch (by giving the updates dictionary) 
    # and returning the corresponding training loss:
    train_fn = theano.function([input, target, LR], loss, updates=updates)

    # Compile a second function computing the validation loss and accuracy:
    val_fn = theano.function([input, target], [test_loss, test_err])

    print('Training...')
    
    binary_net.train(
            train_fn,val_fn,
            mlp,
            batch_size,
            LR_start,LR_decay,
            num_epochs,
            X_train, y_train,
            X_train, y_train,
            X_test, y_test,
            #train_set.X,train_set.y,
            #valid_set.X,valid_set.y,
            #test_set.X,test_set.y,
            save_path,
            shuffle_parts)


    # Load parameters
    with np.load('mnist_parameters.npz') as f:
        param_values = [f['arr_%d' % i] for i in range(len(f.files))]
    #for i in range(len(f.files)):
    #    print('arr_%d ' % i)
    lasagne.layers.set_all_param_values(mlp, param_values)