Ejemplo n.º 1
0
def test_Highway(datasets,
                 learning_rate=0.1,
                 rho=0.9,
                 n_epochs=200,
                 n_hidden=10,
                 n_hiddenLayers=1,
                 n_highwayLayers=5,
                 activation_hidden=T.nnet.nnet.relu,
                 activation_highway=T.nnet.nnet.sigmoid,
                 b_T=-5,
                 L1_reg=0,
                 L2_reg=0,
                 batch_size=500,
                 verbose=False):

    rng = numpy.random.RandomState(23455)

    train_set_x, train_set_y = datasets[0]
    valid_set_x, valid_set_y = datasets[1]
    test_set_x, test_set_y = datasets[2]

    # compute number of minibatches for training, validation and testing
    n_train_batches = train_set_x.get_value(borrow=True).shape[0]
    n_valid_batches = valid_set_x.get_value(borrow=True).shape[0]
    n_test_batches = test_set_x.get_value(borrow=True).shape[0]

    n_in = train_set_x.get_value(borrow=True).shape[1]

    n_train_batches //= batch_size
    n_valid_batches //= batch_size
    n_test_batches //= batch_size

    # allocate symbolic variables for the data
    index = T.lscalar()  # index to a [mini]batch
    itr = T.fscalar()  # index to an iteration

    # start-snippet-1
    x = T.matrix('x')  # the data is presented as rasterized images
    y = T.ivector('y')  # the labels are presented as 1D vector of
    # [int] labels

    ######################
    # BUILD ACTUAL MODEL #
    ######################

    highway_net = HighwayNetwork(rng=rng,
                                 input=x,
                                 n_in=n_in,
                                 n_hidden=n_hidden,
                                 n_out=10,
                                 n_hiddenLayers=n_hiddenLayers,
                                 n_highwayLayers=n_highwayLayers,
                                 activation_hidden=activation_hidden,
                                 activation_highway=activation_highway,
                                 b_T=b_T)

    print('... building the model')

    # the cost we minimize during training is the negative log likelihood of
    # the model plus the regularization terms (L1 and L2); cost is expressed
    # here symbolically
    cost = (
        highway_net.logRegressionLayer.negative_log_likelihood(y)
        #+ L1_reg * L1
        #+ L2_reg * L2_sqr
    )

    # compiling a Theano function that computes the mistakes that are made
    # by the model on a minibatch
    test_model = theano.function(
        inputs=[index],
        outputs=highway_net.logRegressionLayer.errors(y),
        givens={
            x: test_set_x[index * batch_size:(index + 1) * batch_size],
            y: test_set_y[index * batch_size:(index + 1) * batch_size]
        })

    validate_model = theano.function(
        inputs=[index],
        outputs=highway_net.logRegressionLayer.errors(y),
        givens={
            x: valid_set_x[index * batch_size:(index + 1) * batch_size],
            y: valid_set_y[index * batch_size:(index + 1) * batch_size]
        })

    updates = RMSprop(cost, highway_net.params, lr=learning_rate, rho=rho)

    # compiling a Theano function `train_model` that returns the cost, but
    # in the same time updates the parameter of the model based on the rules
    # defined in `updates`
    train_model = theano.function(
        inputs=[index, itr],
        outputs=cost,
        updates=updates,
        givens={
            x: train_set_x[index * batch_size:(index + 1) * batch_size],
            y: train_set_y[index * batch_size:(index + 1) * batch_size]
        },
        on_unused_input='ignore')

    result = train_nn(train_model, validate_model, test_model, n_train_batches,
                      n_valid_batches, n_test_batches, n_epochs, verbose)

    res = pd.DataFrame([
        result.RunningTime, result.BestXEntropy, result.TestPerformance,
        result.BestValidationScore, n_epochs, result.N_Epochs,
        activation_hidden, activation_highway, L2_reg, L1_reg, batch_size,
        result.N_Iterations, n_hidden, n_hiddenLayers, n_highwayLayers,
        learning_rate, rho, result.Patience
    ],
                       index=[
                           'Running time', 'XEntropy', 'Test performance',
                           'Best Validation score', 'Max epochs', 'N epochs',
                           'Activation function - hidden',
                           'Activation function - highway', 'L2_reg parameter',
                           'L1_reg parameter', 'Batch size', 'Iterations',
                           'Hidden neurons per layer', 'Hidden Layers',
                           'Highway Layers', 'Learning rate', 'Rho', 'Patience'
                       ]).transpose()

    res.to_csv('Results.csv', mode='a', index=None, header=False)
    idx = pd.read_csv('Results.csv').index.values[-1]

    pickle.dump(result.XEntropy, open("cross_entropy" + str(idx) + ".p", "wb"))
    print('Cross entropy is stored in cross_entropy' + str(idx) + '.p')
Ejemplo n.º 2
0
def run_experiment(lr=0.01, num_epochs=128, nkerns=[96, 192, 10], lambda_decay=1e-3, conv_arch=all_CNN_C, n_class=10,
                   batch_size=128, verbose=False, filter_size=(3,3)):
    """
    Wrapper function for testing the all convolutional networks implemented here

    :type lr: float
    :param lr: learning rate used (factor for the stochastic
    gradient)

    :type n_epochs: int
    :param n_epochs: maximal number of epochs to run the optimizer

    :type nkerns: list of ints
    :param nkerns: number of kernels on each layer

    :type batch_size: int
    :param batch_szie: number of examples in minibatch.

    :type verbose: boolean
    :param verbose: to print out epoch summary or not to.

    :type filter_size: tuple(int)
    :param filter_size: size of the filters.

    :type conv_arch: function
    :param verbose: Convolutional Network to run

    :type weight_decay: float
    :param weight_decay: L2 regularization parameter

    :type n_class: int
    :param n_class: Number of classes/output units of final layer (10 vs. 100)

    """
    datasets = load_data(
        simple=False if n_class == 100 else True
    )

    X_train, y_train = datasets[0]
    X_val, y_val = datasets[1]
    X_test, y_test = datasets[2]

    n_train_batches = X_train.get_value(borrow=True).shape[0]
    n_valid_batches = X_val.get_value(borrow=True).shape[0]
    n_test_batches = X_test.get_value(borrow=True).shape[0]
    n_train_batches //= batch_size
    n_valid_batches //= batch_size
    n_test_batches //= batch_size

    index = T.lscalar()  # index to a [mini]batch

    x = T.tensor4('x')
    y = T.ivector('y')

    channel = 3
    imsize = 32

    data_size = X_train.eval().shape[0]
    tdata_size = X_test.eval().shape[0]
    vdata_size = X_val.eval().shape[0]

    X_train = X_train.reshape((data_size, channel, imsize, imsize))
    X_test = X_test.reshape((tdata_size, channel, imsize, imsize))
    X_val = X_val.reshape((vdata_size, channel, imsize, imsize))

    # Building the all conv network

    network = all_CNN_C(x, filter_size=filter_size, n_class=n_class)

    # Loss and prediction calculation
    # Training loss function used is Categorical Cross Entropy
    # which computes the categorical cross-entropy between predictions and targets.

    train_prediction = lasagne.layers.get_output(network)
    train_loss = lasagne.objectives.categorical_crossentropy(train_prediction, y)
    train_loss = train_loss.mean()

    # Regularization
    l2_penalty = lasagne.regularization.regularize_network_params(network, lasagne.regularization.l2)
    train_loss += lambda_decay * l2_penalty

    params = lasagne.layers.get_all_params(network, trainable=True)

    #Updates to the parameters are defined here

    updates = lasagne.updates.nesterov_momentum(
        train_loss, params, learning_rate=lr, momentum=0.9)

    val_prediction = lasagne.layers.get_output(network)
    val_loss = errors(val_prediction, y)

    test_prediction = lasagne.layers.get_output(network, deterministic=True)
    test_loss = errors(test_prediction, y)


   # Training, Validation and test models are defined here

    train_fn = theano.function([index],
           train_loss,
           updates=updates,
           givens={
               x: X_train[index * batch_size: (index + 1) * batch_size],
               y: y_train[index * batch_size: (index + 1) * batch_size]
         }
    )

    val_fn = theano.function(
        [index],
        val_loss,
        givens={
            x: X_val[index * batch_size: (index + 1) * batch_size],
            y: y_val[index * batch_size: (index + 1) * batch_size]
        }
    )

    test_fn = theano.function(
        [index],
        test_loss,
        givens={
            x: X_test[index * batch_size: (index + 1) * batch_size],
            y: y_test[index * batch_size: (index + 1) * batch_size]
        }
    )

    train_nn(train_fn, val_fn, test_fn,
             n_train_batches, n_valid_batches, n_test_batches, num_epochs,
             verbose=verbose)