Beispiel #1
0
def evaluate_lenet5(learning_rate=0.1,
                    n_epochs=200,
                    dataset='mnist.pkl.gz',
                    nkerns=[20, 50],
                    batch_size=500):
    """ Demonstrates lenet on MNIST dataset

    :type learning_rate: float
    :param learning_rate: learning rate used (factor for the stochastic
                          gradient)

    :type n_epochs: int
    :param n_epochs: maximal number of epochs to run the optimizer

    :type dataset: string
    :param dataset: path to the dataset used for training /testing (MNIST here)

    :type nkerns: list of ints
    :param nkerns: number of kernels on each layer
    """

    rng = numpy.random.RandomState(23455)

    datasets = load_data(dataset)

    train_set_x, train_set_y = datasets[0]
    valid_set_x, valid_set_y = datasets[1]
    test_set_x, test_set_y = datasets[2]

    # compute number of minibatches for training, validation and testing
    n_train_batches = train_set_x.get_value(borrow=True).shape[0]
    n_valid_batches = valid_set_x.get_value(borrow=True).shape[0]
    n_test_batches = test_set_x.get_value(borrow=True).shape[0]
    n_train_batches //= batch_size
    n_valid_batches //= batch_size
    n_test_batches //= batch_size

    # allocate symbolic variables for the data
    index = T.lscalar()  # index to a [mini]batch

    # start-snippet-1
    x = T.matrix('x')  # the data is presented as rasterized images
    y = T.ivector('y')  # the labels are presented as 1D vector of
    # [int] labels

    ######################
    # BUILD ACTUAL MODEL #
    ######################
    print('... building the model')

    # Reshape matrix of rasterized images of shape (batch_size, 28 * 28)
    # to a 4D tensor, compatible with our LeNetConvPoolLayer
    # (28, 28) is the size of MNIST images.
    layer0_input = x.reshape((batch_size, 1, 28, 28))

    # Construct the first convolutional pooling layer:
    # filtering reduces the image size to (28-5+1 , 28-5+1) = (24, 24)
    # maxpooling reduces this further to (24/2, 24/2) = (12, 12)
    # 4D output tensor is thus of shape (batch_size, nkerns[0], 12, 12)
    layer0 = LeNetConvPoolLayer(rng,
                                input=layer0_input,
                                image_shape=(batch_size, 1, 28, 28),
                                filter_shape=(nkerns[0], 1, 5, 5),
                                poolsize=(2, 2))

    # Construct the second convolutional pooling layer
    # filtering reduces the image size to (12-5+1, 12-5+1) = (8, 8)
    # maxpooling reduces this further to (8/2, 8/2) = (4, 4)
    # 4D output tensor is thus of shape (batch_size, nkerns[1], 4, 4)
    layer1 = LeNetConvPoolLayer(rng,
                                input=layer0.output,
                                image_shape=(batch_size, nkerns[0], 12, 12),
                                filter_shape=(nkerns[1], nkerns[0], 5, 5),
                                poolsize=(2, 2))

    # the HiddenLayer being fully-connected, it operates on 2D matrices of
    # shape (batch_size, num_pixels) (i.e matrix of rasterized images).
    # This will generate a matrix of shape (batch_size, nkerns[1] * 4 * 4),
    # or (500, 50 * 4 * 4) = (500, 800) with the default values.
    layer2_input = layer1.output.flatten(2)

    # construct a fully-connected sigmoidal layer
    layer2 = HidedenLayer(rng,
                          input=layer2_input,
                          n_input=nkerns[1] * 4 * 4,
                          n_output=500,
                          activation=T.tanh)

    # classify the values of the fully-connected sigmoidal layer
    layer3 = LogisticRegression(input=layer2.output, n_in=500, n_out=10)

    # the cost we minimize during training is the NLL of the model
    cost = layer3.negative_log_likelihood(y)

    # create a function to compute the mistakes that are made by the model
    test_model = theano.function(
        [index],
        layer3.errors(y),
        givens={
            x: test_set_x[index * batch_size:(index + 1) * batch_size],
            y: test_set_y[index * batch_size:(index + 1) * batch_size]
        })

    validate_model = theano.function(
        [index],
        layer3.errors(y),
        givens={
            x: valid_set_x[index * batch_size:(index + 1) * batch_size],
            y: valid_set_y[index * batch_size:(index + 1) * batch_size]
        })

    # create a list of all model parameters to be fit by gradient descent
    params = layer3.params + layer2.params + layer1.params + layer0.params

    # create a list of gradients for all model parameters
    grads = T.grad(cost, params)

    # train_model is a function that updates the model parameters by
    # SGD Since this model has many parameters, it would be tedious to
    # manually create an update rule for each model parameter. We thus
    # create the updates list by automatically looping over all
    # (params[i], grads[i]) pairs.
    updates = [(param_i, param_i - learning_rate * grad_i)
               for param_i, grad_i in zip(params, grads)]

    train_model = theano.function(
        [index],
        cost,
        updates=updates,
        givens={
            x: train_set_x[index * batch_size:(index + 1) * batch_size],
            y: train_set_y[index * batch_size:(index + 1) * batch_size]
        })
    # end-snippet-1

    ###############
    # TRAIN MODEL #
    ###############
    print('... training')
    # early-stopping parameters
    patience = 10000  # look as this many examples regardless
    patience_increase = 2  # wait this much longer when a new best is
    # found
    improvement_threshold = 0.995  # a relative improvement of this much is
    # considered significant
    validation_frequency = min(n_train_batches, patience // 2)
    # go through this many
    # minibatche before checking the network
    # on the validation set; in this case we
    # check every epoch

    best_validation_loss = numpy.inf
    best_iter = 0
    test_score = 0.
    start_time = timeit.default_timer()

    epoch = 0
    done_looping = False

    while (epoch < n_epochs) and (not done_looping):
        epoch = epoch + 1
        for minibatch_index in range(n_train_batches):

            iter = (epoch - 1) * n_train_batches + minibatch_index

            if iter % 100 == 0:
                print('training @ iter = ', iter)
            cost_ij = train_model(minibatch_index)

            if (iter + 1) % validation_frequency == 0:

                # compute zero-one loss on validation set
                validation_losses = [
                    validate_model(i) for i in range(n_valid_batches)
                ]
                this_validation_loss = numpy.mean(validation_losses)
                print('epoch %i, minibatch %i/%i, validation error %f %%' %
                      (epoch, minibatch_index + 1, n_train_batches,
                       this_validation_loss * 100.))

                # if we got the best validation score until now
                if this_validation_loss < best_validation_loss:

                    #improve patience if loss improvement is good enough
                    if this_validation_loss < best_validation_loss *  \
                       improvement_threshold:
                        patience = max(patience, iter * patience_increase)

                    # save best validation score and iteration number
                    best_validation_loss = this_validation_loss
                    best_iter = iter

                    # test it on the test set
                    test_losses = [
                        test_model(i) for i in range(n_test_batches)
                    ]
                    test_score = numpy.mean(test_losses)
                    print(('     epoch %i, minibatch %i/%i, test error of '
                           'best model %f %%') %
                          (epoch, minibatch_index + 1, n_train_batches,
                           test_score * 100.))

            if patience <= iter:
                done_looping = True
                break

    end_time = timeit.default_timer()
    print('Optimization complete.')
    print('Best validation score of %f %% obtained at iteration %i, '
          'with test performance %f %%' %
          (best_validation_loss * 100., best_iter + 1, test_score * 100.))
    print(
        ('The code for file ' + os.path.split(__file__)[1] + ' ran for %.2fm' %
         ((end_time - start_time) / 60.)),
        file=sys.stderr)
def test_mlp(learning_rate=0.01, L1_reg=0.00, L2_reg=0.0001, n_epochs=1000,
             dataset='mnist.pkl.gz', batch_size=20, n_hidden=500):
    # 学习率 0.01 L1层正则化因子0.00 L2层正则化因子0.0001 最大迭代次数1000 数据集 分块大小20 隐藏层单元数
    # 数据集
    datasets = load_data(dataset)

    train_set_x, train_set_y = datasets[0]
    valid_set_x, valid_set_y = datasets[1]
    test_set_x, test_set_y = datasets[2]

    n_train_batches = train_set_x.get_value(borrow=True).shape[0] / batch_size
    n_valid_batches = valid_set_x.get_value(borrow=True).shape[0] / batch_size
    n_test_batches = test_set_x.get_value(borrow=True).shape[0] / batch_size

    ######################
    # BUILD ACTUAL MODEL #
    ######################
    print '... building the model'

    index = T.lscalar()  # index to a [mini]batch
    x = T.matrix('x')  # the data is presented as rasterized images
    y = T.ivector('y')  # the labels are presented as 1D vector of
    # 随机数
    rng = numpy.random.RandomState(1234)
    # 构建MLP神经网络
    classifier = MLP(
        rng=rng,
        input=x,
        n_in=28 * 28,
        n_hidden=n_hidden,
        n_out=10
    )
    # 计算代价
    cost = (
        classifier.negative_log_likelihood(y)
        + L1_reg * classifier.L1
        + L2_reg * classifier.L2_sqr
    )

    test_model = theano.function(
        inputs=[index],
        outputs=classifier.errors(y),
        givens={
            x: test_set_x[index * batch_size:(index + 1) * batch_size],
            y: test_set_y[index * batch_size:(index + 1) * batch_size]
        }
    )

    validate_model = theano.function(
        inputs=[index],
        outputs=classifier.errors(y),
        givens={
            x: valid_set_x[index * batch_size:(index + 1) * batch_size],
            y: valid_set_y[index * batch_size:(index + 1) * batch_size]
        }
    )

    # 计算梯度
    gparams = [T.grad(cost, param) for param in classifier.params]

    # 更新参数 zip大法好
    updates = [
        (param, param - learning_rate * gparam)
        for param, gparam in zip(classifier.params, gparams)
    ]

    train_model = theano.function(
        inputs=[index],
        outputs=cost,
        updates=updates,
        givens={
            x: train_set_x[index * batch_size: (index + 1) * batch_size],
            y: train_set_y[index * batch_size: (index + 1) * batch_size]
        }
    )

    ###############
    # TRAIN MODEL #
    ###############
    print '... training'

    patience = 10000
    patience_increase = 2
    improvement_threshold = 0.995
    validation_frequency = min(n_train_batches, patience / 2)

    best_validation_loss = numpy.inf
    best_iter = 0
    test_score = 0.
    start_time = timeit.default_timer()

    epoch = 0
    done_looping = False

    while (epoch < n_epochs) and (not done_looping):
        epoch = epoch + 1
        for minibatch_index in xrange(n_train_batches):

            minibatch_avg_cost = train_model(minibatch_index)
            iter = (epoch - 1) * n_train_batches + minibatch_index

            if (iter + 1) % validation_frequency == 0:
                validation_losses = [validate_model(i) for i
                                     in xrange(n_valid_batches)]
                this_validation_loss = numpy.mean(validation_losses)

                print(
                    'epoch %i, minibatch %i/%i, validation error %f %%' %
                    (
                        epoch,
                        minibatch_index + 1,
                        n_train_batches,
                        this_validation_loss * 100.
                    )
                )

                # if we got the best validation score until now
                if this_validation_loss < best_validation_loss:
                    #improve patience if loss improvement is good enough
                    if (
                        this_validation_loss < best_validation_loss *
                        improvement_threshold
                    ):
                        patience = max(patience, iter * patience_increase)

                    best_validation_loss = this_validation_loss
                    best_iter = iter

                    test_losses = [test_model(i) for i
                                   in xrange(n_test_batches)]
                    test_score = numpy.mean(test_losses)

                    print(('     epoch %i, minibatch %i/%i, test error of '
                           'best model %f %%') %
                          (epoch, minibatch_index + 1, n_train_batches,
                           test_score * 100.))

            if patience <= iter:
                done_looping = True
                break

    end_time = timeit.default_timer()
    print(('Optimization complete. Best validation score of %f %% '
           'obtained at iteration %i, with test performance %f %%') %
          (best_validation_loss * 100., best_iter + 1, test_score * 100.))
    print >> sys.stderr, ('The code for file ' +
                          os.path.split(__file__)[1] +
                          ' ran for %.2fm' % ((end_time - start_time) / 60.))
Beispiel #3
0
def test_mlp(learning_rate = 0.01, 
            L1_reg = 0.00,
            L2_reg = 0.0001,
            n_epochs = 1000,
            dataset = 'mnist.pkl.gz',
            batch_size = 20,
            n_hidden=500):
    """
    This method implements stochastic gradient descent for an MLP. 
    This is demonstrated on MNIST.

    :param learning_rate: Learning rate
    :type learning_rate: float

    :param L1_reg: l1 norm's weight when added to the cost 
    :type L1_reg: float

    :param L1_reg: l2 norm's weight when added to the cost
    :type L2_reg: float

    :param n_epoch: maximum number of epochs to run the optimizer
    :type n_epoch: int

    :param dataset: Path to the dataset, here MNIST. 
    :type dataset: string

    :param batch_size: size of the minibatches
    :type batch_size: int

    :param n_hidden: number of hidden units in the hidden layer
    :type n_hidden: int
    """

    datasets = load_data(dataset)

    train_set_x,train_set_y = datasets[0]
    valid_set_x,valid_set_y = datasets[1]
    test_set_x,test_set_y = datasets[2]

    #Compute number of minibatches for training, validation and testing
    n_train_batches = train_set_x.get_value(borrow=True).shape[0]/batch_size
    n_valid_batches = valid_set_x.get_value(borrow=True).shape[0]/batch_size
    n_test_batches = test_set_x.get_value(borrow=True).shape[0]/batch_size


    ###############
    #-BUILD MODEL-#
    ###############

    print '.........BUILDING MODEL..............'

    #Allocate symbolic variables for the data
    index = T.lscalar() #Index to a minibatch
    x = T.matrix('x') #MNIST data is present as rasterized images
    y = T.ivector('y') #Labels are presented as a vector of 1D integers

    rng = np.random.RandomState(1234)

    #Construct the MLP class
    classifier = MLP(rng = rng, input=x,
                    n_in = 28*28, n_hidden = n_hidden,n_out = 10)

    #The cost we minimize during training is the negative log likelihood
    #   plus the l1 and l2 regularization terms. 
    #Here, the cost is expressed symbolically
    cost = classifier.negative_log_likelihood(y) \
        + L1_reg*classifier.L1 \
        + L2_reg*classifier.L2_sqr


    #Compiling a theano function that computes the mistakes that are made 
    #   on a minibatch by the model 
    test_model = theano.function(inputs = [index],
                outputs = classifier.errors(y),
                givens={
                    x: test_set_x[index*batch_size : (index+1)*batch_size],
                    y: test_set_y[index*batch_size : (index+1)*batch_size]})

    validate_model = theano.function(inputs = [index],
                    outputs = classifier.errors(y),
                    givens={
                        x: valid_set_x[index*batch_size : (index+1)*batch_size],
                        y: valid_set_y[index*batch_size : (index+1)*batch_size]})

    #Compute the gradient of the cost with respect to theta (stored in params) of the model 
    #the result will be stored in a list 'gparams'
    gparams = []
    for param in classifier.params:
        gparam = T.grad(cost,param)
        gparams.append(gparam)

    #Specify how to update the parameters of the model as a list of
    #   (variable, update expression) pairs
    updates = []
    #Given two lists A=[a1,a1,a3,...,an] and B=[b1,b2,b3,...,bn] of the same length (n),
    #   the 'zip' function generates a third list C that combines A and B to form
    #   a list of ordered pairs
    #   i.e. C = [(a1,b1),(a2,b2),...,(an,bn)]
    for param,gparam in zip(classifier.params,gparams):
        updates.append((param,param-learning_rate*gparam))

    #Compling a theano function 'train_model' that returns the cost but
    #   at the same time updates the parameter of the model based on the
    #   rules in 'updates'
    train_model = theano.function(inputs=[index],outputs=cost,
                    updates=updates,
                    givens={
                        x: train_set_x[index*batch_size:(index+1)*batch_size],
                        y: train_set_y[index*batch_size:(index+1)*batch_size]})


    ###############
    #-TRAIN MODEL-#
    ###############

    print '...........TRAINING.............'

    #Early stopping parameters
    patience = 10000 # Look at this many examples no matter what. 
    patience_increase = 2 #Wait this much longer when a new best is found
    improvement_threshold = 0.995 #A relative improvement of this much is considered 
                                  # significant
    
    #NOTE: I don't understand the line of reasoning behind the line below
    validation_frequency = min(n_train_batches,patience/2)
                            #Go through this many minibatches
                            #   before computing the error on the
                            #   validation set. 
                            #In this case, we compute the error
                            #   after every epoch
   
    best_params = None
    best_validation_loss = np.inf
    best_iter = 0
    test_score = 0.
    start_time = time.clock()

    epoch = 0
    done_looping = False

    while (epoch < n_epochs) and (not done_looping):
        epoch = epoch + 1
        for minibatch_index in xrange(n_train_batches):

            minibatch_avg_cost = train_model(minibatch_index)
            #Iteration Number
            #NOTE: I don't understand the line below
            iter = (epoch-1)*n_train_batches + minibatch_index

            if (iter+1) % validation_frequency == 0:
                #Compute the zero-one loss on the validation set
                validation_losses = [validate_model(i) for i in 
                                    xrange(n_valid_batches)]
                this_validation_loss = np.mean(validation_losses)
                
                print('epoch %i, minibatch %i/%i,validation error %f %%' %
                        (epoch,minibatch_index+1,n_train_batches,
                            this_validation_loss*100.))

                #If this is the best validation loss so far:
                if this_validation_loss < best_validation_loss: 
                    best_validation_loss = this_validation_loss
                    best_iter = iter

                    #Improve patience if loss improvement is good enough
                    if this_validation_loss < best_validation_loss \
                                                *improvement_threshold:
                        #NOTE: I don't know the logic behind the line below
                        patience = max(patience,iter*patience_increase)

                    #Test it on the test set
                    test_losses = [test_model(i) for i in \
                                    xrange(n_test_batches)]
                    test_score = np.mean(test_losses)

                    print('epoch %i,minibatch %i/%i, test error of'
                            'best model so far %f %%' %
                         (epoch,minibatch_index+1,n_train_batches,test_score*100))

            if patience <= iter:
                done_looping = True
                break

    end_time = time.clock()
    print('Optimization Complete. Best validation score of %f %%'
          'obtained at iteration %i, with test performance of %f %%' %
        (best_validation_loss,best_iter,test_score*100))
def test_MLP(learning_rate=0.01,
             L1_reg=0.00,
             L2_reg=0.0001,
             n_epochs=1000,
             dataset='mnist.pkl.gz',
             batch_size=20,
             n_hidden=500):

    datasets = load_data(dataset)

    train_set_x, train_set_y = datasets[0]
    valid_set_x, valid_set_y = datasets[1]
    test_set_x, test_set_y = datasets[2]

    n_train_batches = train_set_x.get_value(borrow=True).shape[0] // batch_size
    n_valid_batches = valid_set_x.get_value(borrow=True).shape[0] // batch_size
    n_test_batches = test_set_x.get_value(borrow=True).shape[0] // batch_size

    print("... building the model")

    index = T.lscalar()
    x = T.matrix('x')
    y = T.ivector('y')

    rng = numpy.random.RandomState(1234)

    classifier = MLP(rng=rng,
                     input=x,
                     n_input=28 * 28,
                     n_hidden=n_hidden,
                     n_output=10)

    cost = (classifier.negative_log_likelihood(y) + L1_reg * classifier.L1 +
            L2_reg * classifier.L2_sqr)

    test_model = theano.function(
        inputs=[index],
        outputs=classifier.errors(y),
        givens={
            x: test_set_x[index * batch_size:(index + 1) * batch_size],
            y: test_set_y[index * batch_size:(index + 1) * batch_size]
        })

    validate_model = theano.function(
        inputs=[index],
        outputs=classifier.errors(y),
        givens={
            x: valid_set_x[index * batch_size:(index + 1) * batch_size],
            y: valid_set_y[index * batch_size:(index + 1) * batch_size]
        })

    gparams = [T.grad(cost, param) for param in classifier.params]

    updates = [(param, param - learning_rate * gparam)
               for param, gparam in zip(classifier.params, gparams)]

    # compiling a Theano function `train_model` that returns the cost, but
    # in the same time updates the parameter of the model based on the rules
    # defined in `updates`
    train_model = theano.function(
        inputs=[index],
        outputs=cost,
        updates=updates,
        givens={
            x: train_set_x[index * batch_size:(index + 1) * batch_size],
            y: train_set_y[index * batch_size:(index + 1) * batch_size]
        })

    print('... training')

    # early-stopping parameters
    patience = 10000  # look as this many examples regardless
    patience_increase = 2  # wait this much longer when a new best is
    # found
    improvement_threshold = 0.995  # a relative improvement of this much is
    # considered significant
    validation_frequency = min(n_train_batches, patience // 2)
    # go through this many
    # minibatche before checking the network
    # on the validation set; in this case we
    # check every epoch

    best_validation_loss = numpy.inf
    best_iter = 0
    test_score = 0.
    start_time = timeit.default_timer()

    epoch = 0
    done_looping = False

    while (epoch < n_epochs) and (not done_looping):
        epoch = epoch + 1
        for minibatch_index in range(n_train_batches):

            minibatch_avg_cost = train_model(minibatch_index)
            # iteration number
            iter = (epoch - 1) * n_train_batches + minibatch_index

            if (iter + 1) % validation_frequency == 0:
                # compute zero-one loss on validation set
                validation_losses = [
                    validate_model(i) for i in range(n_valid_batches)
                ]
                this_validation_loss = numpy.mean(validation_losses)

                print('epoch %i, minibatch %i/%i, validation error %f %%' %
                      (epoch, minibatch_index + 1, n_train_batches,
                       this_validation_loss * 100.))

                # if we got the best validation score until now
                if this_validation_loss < best_validation_loss:
                    # improve patience if loss improvement is good enough
                    if (this_validation_loss <
                            best_validation_loss * improvement_threshold):
                        patience = max(patience, iter * patience_increase)

                    best_validation_loss = this_validation_loss
                    best_iter = iter

                    # test it on the test set
                    test_losses = [
                        test_model(i) for i in range(n_test_batches)
                    ]
                    test_score = numpy.mean(test_losses)

                    print(('     epoch %i, minibatch %i/%i, test error of '
                           'best model %f %%') %
                          (epoch, minibatch_index + 1, n_train_batches,
                           test_score * 100.))

            if patience <= iter:
                done_looping = True
                break

    end_time = timeit.default_timer()
    print(('Optimization complete. Best validation score of %f %% '
           'obtained at iteration %i, with test performance %f %%') %
          (best_validation_loss * 100., best_iter + 1, test_score * 100.))
    print(
        ('The code for file ' + os.path.split(__file__)[1] + ' ran for %.2fm' %
         ((end_time - start_time) / 60.)),
        file=sys.stderr)
Beispiel #5
0
def evaluate_lenet5(
    learning_rate = 0.1,
    n_epochs = 200,
    dataset = 'mnist.pkl.gz',
    nkerns = [20, 50],
    batch_size = 500):

    """
    learning_rate (type: float;
                content: learning rate used (factor for the stochastic gradient)

    n_epochs (type: int;
             content: maximal number of epochs to run the optimizer)

    dataset (type: string;
            content: path to the dataset used for training /testing (MNIST here))

    nkerns (type: list of ints;
            content: number of kernels on each layer
    """

    # Initialise random number (used to initialise weights)
    rng = numpy.random.RandomState(23455)

    ## --------------------------------------------------------------------------------------
    ##  Load MNIST data (using load_data() [defined above], and the dataset path)
    ## --------------------------------------------------------------------------------------
    datasets = load_data(dataset)
    train_set_x, train_set_y = datasets[0] # devided into training set...
    valid_set_x, valid_set_y = datasets[1] # validation set
    test_set_x, test_set_y = datasets[2] # and test set

    # Compute number of minibatches for training, validation and testing
    n_train_batches = train_set_x.get_value(borrow=True).shape[0] // batch_size
    n_valid_batches = valid_set_x.get_value(borrow=True).shape[0] // batch_size
    n_test_batches = test_set_x.get_value(borrow=True).shape[0] // batch_size


    #########################################################################################
    #                                    BUILD THE MODEL                                    #
    #########################################################################################
    print('... building the model')

    # Allocate (initialise) symbolic variables and generate symbolic variables for input (x and y represent a minibatch)
    index = T.lscalar()  # index to a [mini]batch (lscalar() returns a zero-dimension value)
    x = T.matrix('x')  # data, presented as rasterized images
    y = T.ivector('y')  # labels, presented as 1D vector of [int] labels
    
    ## --------------------------------------------------------------------------------------
    ##  Define the FIRST layer
    ## --------------------------------------------------------------------------------------
    
    # Reshape matrix of rasterized images of shape (batch_size, 28 * 28) to a 4D tensor,
    # compatible with our LeNetConvPoolLayer. (28, 28) is the size of MNIST images.
    layer0_input = x.reshape((batch_size, 1, 28, 28))

    # Construct the first convolutional pooling layer:
    # filtering reduces the image size to (28-5+1 , 28-5+1) = (24, 24)
    # maxpooling reduces this further to (24/2, 24/2) = (12, 12)
    # 4D output tensor is thus of shape (batch_size, nkerns[0], 12, 12)
    layer0 = LeNetConvPoolLayer(
        rng,
        input = layer0_input,
        image_shape = (batch_size, 1, 28, 28),
        filter_shape = (nkerns[0], 1, 5, 5),
        poolsize = (2, 2)
    )

    ## --------------------------------------------------------------------------------------
    ##  Define the SECOND layer
    ## --------------------------------------------------------------------------------------

    # Construct the second convolutional pooling layer
    # filtering reduces the image size to (12-5+1, 12-5+1) = (8, 8)
    # maxpooling reduces this further to (8/2, 8/2) = (4, 4)
    # 4D output tensor is thus of shape (batch_size, nkerns[1], 4, 4)
    layer1 = LeNetConvPoolLayer(
        rng,
        input=layer0.output,
        image_shape=(batch_size, nkerns[0], 12, 12),
        filter_shape=(nkerns[1], nkerns[0], 5, 5),
        poolsize=(2, 2)
    )

    ## --------------------------------------------------------------------------------------
    ##  Define the THIRD layer
    ## --------------------------------------------------------------------------------------

    # the HiddenLayer being fully-connected, it operates on 2D matrices of
    # shape (batch_size, num_pixels) (i.e matrix of rasterized images).
    # This will generate a matrix of shape (batch_size, nkerns[1] * 4 * 4),
    # or (500, 50 * 4 * 4) = (500, 800) with the default values.
    layer2_input = layer1.output.flatten(2)

    # construct a fully-connected sigmoidal layer
    layer2 = HiddenLayer(
        rng,
        input=layer2_input,
        n_in=nkerns[1] * 4 * 4,
        n_out=500,
        activation=T.tanh
    )

    ## --------------------------------------------------------------------------------------
    ##  Define the FOURTH layer
    ## --------------------------------------------------------------------------------------

    # Classify the values of the fully-connected sigmoidal layer
    layer3 = LogisticRegression(input=layer2.output, n_in=500, n_out=10)


    ## --------------------------------------------------------------------------------------
    ##  Define cost and test functions
    ## --------------------------------------------------------------------------------------
    cost = layer3.negative_log_likelihood(y) # Calulate the cost (negative_log_likelihood)

    # Compile a Theano function that computes the mistakes that are made by the model on a minibatch
    # Both for the test model
    test_model = theano.function(
        [index],
        layer3.errors(y),
        givens={
            x: test_set_x[index * batch_size: (index + 1) * batch_size],
            y: test_set_y[index * batch_size: (index + 1) * batch_size]
        }
    )
    # And for the validation model
    validate_model = theano.function(
        [index],
        layer3.errors(y),
        givens={
            x: valid_set_x[index * batch_size: (index + 1) * batch_size],
            y: valid_set_y[index * batch_size: (index + 1) * batch_size]
        }
    )

    # Create a list of all model parameters to be fit by gradient descent
    params = layer3.params + layer2.params + layer1.params + layer0.params

    # Create a list of gradients for all model parameters
    grads = T.grad(cost, params)

    ##  Specify how to update the parameters of the model
    """ train_model is a function that updates the model parameters by SGD.
    Since this model has many parameters, it would be tedious to manually
    create an update rule for each model parameter. We thus create the
    updates list by automatically looping over all (params[i], grads[i]) pairs.
    """
    updates = [
        (param_i, param_i - learning_rate * grad_i)
        for param_i, grad_i in zip(params, grads)
    ]

    # Compile a Theano function `train_model` that returns the cost, but at the same time updates
    # the parameter of the model based on the rules defined in `updates`.
    train_model = theano.function(
        [index],
        cost,
        updates=updates,
        givens={
            x: train_set_x[index * batch_size: (index + 1) * batch_size],
            y: train_set_y[index * batch_size: (index + 1) * batch_size]
        }
    )

    #########################################################################################
    #                                       TRAIN MODEL                                     #
    #########################################################################################
    print('... training the model')

    ## --------------------------------------------------------------------------------------
    ##  Define early-stopping parameters
    ## --------------------------------------------------------------------------------------
    patience = 10000  # look as this many examples regardless
    patience_increase = 2  # wait this much longer when a new best is found
    improvement_threshold = 0.995  # a relative improvement of this much is considered significant
    validation_frequency = min(n_train_batches, patience // 2)
                                  # go through this many minibatches before checking the network
                                  # on the validation set; in this case we check every epoch
    best_validation_loss = numpy.inf
    best_iter = 0
    test_score = 0.
    start_time = timeit.default_timer()

    ## --------------------------------------------------------------------------------------
    ##  Start iterating loop (i.e. through multibatches for repeated SGD)
    ## --------------------------------------------------------------------------------------
    epoch = 0
    done_looping = False
    # Loop through epochs
    while (epoch < n_epochs) and (not done_looping): # n_epochs defined in definition of this large function
        epoch = epoch + 1 # Increment epoch on each loop

        # Loop through minibatches
        for minibatch_index in range(n_train_batches):

            iter = (epoch - 1) * n_train_batches + minibatch_index # iteration number

            ## On every 100 iterations...
            if iter % 100 == 0:
                print('training @ iter = ', iter)
                cost_ij = train_model(minibatch_index)

            # When the iteration is fully divisible by the validation frequency
            if (iter + 1) % validation_frequency == 0:

                # Check for performance (zero-one loss) on validation data set
                validation_losses = [
                    validate_model(i)
                    for i in range(n_valid_batches)
                ]
                this_validation_loss = numpy.mean(validation_losses)

                # Print current validation test results
                print('epoch %i, minibatch %i/%i, validation error %f %%' %
                      (
                          epoch,
                          minibatch_index + 1,
                          n_train_batches,
                          this_validation_loss * 100.
                      )
                )

                # If we got the best validation score until now
                if this_validation_loss < best_validation_loss:

                    # ...and if loss improvement is good enough
                    if this_validation_loss < best_validation_loss *  \
                       improvement_threshold:
                        patience = max(patience, iter * patience_increase)

                    # Save the best validation score and iteration number
                    best_validation_loss = this_validation_loss
                    best_iter = iter

                    # Test it on the test set
                    test_losses = [
                        test_model(i)
                        for i in range(n_test_batches)
                    ]
                    test_score = numpy.mean(test_losses)

                    # Print test results
                    print(('     epoch %i, minibatch %i/%i, test error of '
                           'best model %f %%') %
                          (epoch, minibatch_index + 1, n_train_batches,
                           test_score * 100.))

                    ## -----------------------------------------------------------------
                    ##  Save model parameters using cPickle
                    ## -----------------------------------------------------------------
                    fname = 'bestCNNModel.pkl'
                    saveFile = open(fname, 'wb')

                    # model weights
                    cPickle.dump(layer0.W, saveFile)
                    cPickle.dump(layer0.b, saveFile)
                    cPickle.dump(layer1.W, saveFile)
                    cPickle.dump(layer1.b, saveFile)
                    cPickle.dump(layer2.W, saveFile)
                    cPickle.dump(layer2.b, saveFile)

                    """
                    # hyperparameters and performance
                    cPickle.dump(learning_rate, saveFile)
                    cPickle.dump(best_validation_loss, saveFile)
                    cPickle.dump(test_score, saveFile)
                    cPickle.dump(test_losses, saveFile)
                    cPickle.dump(nkerns, saveFile)
                    cPickle.dump(n_epochs, saveFile)
                    cPickle.dump(batch_size, saveFile)
                    """
                    saveFile.close()

            # Else, if patience is expired
            if patience <= iter:
                done_looping = True # Break the loop
                break

    # Now that the loop has ended...
    end_time = timeit.default_timer() # note the time of loop ending

    # Print the ending results
    print('Optimization complete.')
    print('Best validation score of %f %% obtained at iteration %i, '
          'with test performance %f %%' %
          (best_validation_loss * 100., best_iter + 1, test_score * 100.))
    print(('The code for file ' +
           os.path.split(__file__)[1] +
           ' ran for %.2fm' % ((end_time - start_time) / 60.)), file=sys.stderr)
Beispiel #6
0
def test_mlp(learning_rate=0.01,
             L1_reg=0.00,
             L2_reg=0.0001,
             n_epochs=1000,
             dataset='mnist.pkl.gz',
             batch_size=20,
             n_hidden=500):
    """
    This method implements stochastic gradient descent for an MLP. 
    This is demonstrated on MNIST.

    :param learning_rate: Learning rate
    :type learning_rate: float

    :param L1_reg: l1 norm's weight when added to the cost 
    :type L1_reg: float

    :param L1_reg: l2 norm's weight when added to the cost
    :type L2_reg: float

    :param n_epoch: maximum number of epochs to run the optimizer
    :type n_epoch: int

    :param dataset: Path to the dataset, here MNIST. 
    :type dataset: string

    :param batch_size: size of the minibatches
    :type batch_size: int

    :param n_hidden: number of hidden units in the hidden layer
    :type n_hidden: int
    """

    datasets = load_data(dataset)

    train_set_x, train_set_y = datasets[0]
    valid_set_x, valid_set_y = datasets[1]
    test_set_x, test_set_y = datasets[2]

    #Compute number of minibatches for training, validation and testing
    n_train_batches = train_set_x.get_value(borrow=True).shape[0] / batch_size
    n_valid_batches = valid_set_x.get_value(borrow=True).shape[0] / batch_size
    n_test_batches = test_set_x.get_value(borrow=True).shape[0] / batch_size

    ###############
    #-BUILD MODEL-#
    ###############

    print '.........BUILDING MODEL..............'

    #Allocate symbolic variables for the data
    index = T.lscalar()  #Index to a minibatch
    x = T.matrix('x')  #MNIST data is present as rasterized images
    y = T.ivector('y')  #Labels are presented as a vector of 1D integers

    rng = np.random.RandomState(1234)

    #Construct the MLP class
    classifier = MLP(rng=rng,
                     input=x,
                     n_in=28 * 28,
                     n_hidden=n_hidden,
                     n_out=10)

    #The cost we minimize during training is the negative log likelihood
    #   plus the l1 and l2 regularization terms.
    #Here, the cost is expressed symbolically
    cost = classifier.negative_log_likelihood(y) \
        + L1_reg*classifier.L1 \
        + L2_reg*classifier.L2_sqr

    #Compiling a theano function that computes the mistakes that are made
    #   on a minibatch by the model
    test_model = theano.function(
        inputs=[index],
        outputs=classifier.errors(y),
        givens={
            x: test_set_x[index * batch_size:(index + 1) * batch_size],
            y: test_set_y[index * batch_size:(index + 1) * batch_size]
        })

    validate_model = theano.function(
        inputs=[index],
        outputs=classifier.errors(y),
        givens={
            x: valid_set_x[index * batch_size:(index + 1) * batch_size],
            y: valid_set_y[index * batch_size:(index + 1) * batch_size]
        })

    #Compute the gradient of the cost with respect to theta (stored in params) of the model
    #the result will be stored in a list 'gparams'
    gparams = []
    for param in classifier.params:
        gparam = T.grad(cost, param)
        gparams.append(gparam)

    #Specify how to update the parameters of the model as a list of
    #   (variable, update expression) pairs
    updates = []
    #Given two lists A=[a1,a1,a3,...,an] and B=[b1,b2,b3,...,bn] of the same length (n),
    #   the 'zip' function generates a third list C that combines A and B to form
    #   a list of ordered pairs
    #   i.e. C = [(a1,b1),(a2,b2),...,(an,bn)]
    for param, gparam in zip(classifier.params, gparams):
        updates.append((param, param - learning_rate * gparam))

    #Compling a theano function 'train_model' that returns the cost but
    #   at the same time updates the parameter of the model based on the
    #   rules in 'updates'
    train_model = theano.function(
        inputs=[index],
        outputs=cost,
        updates=updates,
        givens={
            x: train_set_x[index * batch_size:(index + 1) * batch_size],
            y: train_set_y[index * batch_size:(index + 1) * batch_size]
        })

    ###############
    #-TRAIN MODEL-#
    ###############

    print '...........TRAINING.............'

    #Early stopping parameters
    patience = 10000  # Look at this many examples no matter what.
    patience_increase = 2  #Wait this much longer when a new best is found
    improvement_threshold = 0.995  #A relative improvement of this much is considered
    # significant

    #NOTE: I don't understand the line of reasoning behind the line below
    validation_frequency = min(n_train_batches, patience / 2)
    #Go through this many minibatches
    #   before computing the error on the
    #   validation set.
    #In this case, we compute the error
    #   after every epoch

    best_params = None
    best_validation_loss = np.inf
    best_iter = 0
    test_score = 0.
    start_time = time.clock()

    epoch = 0
    done_looping = False

    while (epoch < n_epochs) and (not done_looping):
        epoch = epoch + 1
        for minibatch_index in xrange(n_train_batches):

            minibatch_avg_cost = train_model(minibatch_index)
            #Iteration Number
            #NOTE: I don't understand the line below
            iter = (epoch - 1) * n_train_batches + minibatch_index

            if (iter + 1) % validation_frequency == 0:
                #Compute the zero-one loss on the validation set
                validation_losses = [
                    validate_model(i) for i in xrange(n_valid_batches)
                ]
                this_validation_loss = np.mean(validation_losses)

                print('epoch %i, minibatch %i/%i,validation error %f %%' %
                      (epoch, minibatch_index + 1, n_train_batches,
                       this_validation_loss * 100.))

                #If this is the best validation loss so far:
                if this_validation_loss < best_validation_loss:
                    best_validation_loss = this_validation_loss
                    best_iter = iter

                    #Improve patience if loss improvement is good enough
                    if this_validation_loss < best_validation_loss \
                                                *improvement_threshold:
                        #NOTE: I don't know the logic behind the line below
                        patience = max(patience, iter * patience_increase)

                    #Test it on the test set
                    test_losses = [test_model(i) for i in \
                                    xrange(n_test_batches)]
                    test_score = np.mean(test_losses)

                    print(
                        'epoch %i,minibatch %i/%i, test error of'
                        'best model so far %f %%' %
                        (epoch, minibatch_index + 1, n_train_batches,
                         test_score * 100))

            if patience <= iter:
                done_looping = True
                break

    end_time = time.clock()
    print(
        'Optimization Complete. Best validation score of %f %%'
        'obtained at iteration %i, with test performance of %f %%' %
        (best_validation_loss, best_iter, test_score * 100))
Beispiel #7
0
def perceptronTraining(
    learning_rate = 0.01,
    L1_reg = 0.00, # L1_reg and L2_reg are the hyperparameters controlling the weight of these regularization terms in the total cost function
    L2_reg = 0.0001,
    n_epochs = 1000,
    dataset = 'mnist.pkl.gz',
    batch_size = 20,
    n_hidden = 500):

    """
    Demonstrate stochastic gradient descent optimization for a multilayer
    perceptron

    This is demonstrated on MNIST.

    :type learning_rate: float
    :param learning_rate: learning rate used (factor for the stochastic
    gradient

    :type L1_reg: float
    :param L1_reg: L1-norm's weight when added to the cost (see
    regularization)

    :type L2_reg: float
    :param L2_reg: L2-norm's weight when added to the cost (see
    regularization)

    :type n_epochs: int
    :param n_epochs: maximal number of epochs to run the optimizer

    :type dataset: string
    :param dataset: the path of the MNIST dataset file from
                 http://www.iro.umontreal.ca/~lisa/deep/data/mnist/mnist.pkl.gz


   """

    ## --------------------------------------------------------------------------------------
    ##  Load MNIST data (using load_data() [defined above], and the dataset path)
    ## --------------------------------------------------------------------------------------
    datasets = load_data(dataset)
    train_set_x, train_set_y = datasets[0] # devided into training set...
    valid_set_x, valid_set_y = datasets[1] # validation set
    test_set_x, test_set_y = datasets[2] # and test set


    ## --------------------------------------------------------------------------------------
    ##  Compute the number of minibatches for training, validation and testing
    ## --------------------------------------------------------------------------------------
    n_train_batches = train_set_x.get_value(borrow=True).shape[0] // batch_size # // = division without remainder
    n_valid_batches = valid_set_x.get_value(borrow=True).shape[0] // batch_size
    n_test_batches = test_set_x.get_value(borrow=True).shape[0] // batch_size

    
    #########################################################################################
    #                                    BUILD THE MODEL                                    #
    #########################################################################################
    print('... building the model')

    # Allocate (initialise) symbolic variables and generate symbolic variables for input (x and y represent a minibatch)
    index = T.lscalar()  # index to a [mini]batch (lscalar() returns a zero-dimension value)
    x = T.matrix('x')  # data, presented as rasterized images
    y = T.ivector('y')  # labels, presented as 1D vector of [int] labels

    # Initialise random number (used to initialise weights)
    rng = numpy.random.RandomState(1234)

    # Construct the MLP class
    classifier = MLP(
        rng=rng,
        input=x,
        n_in=28 * 28,
        n_hidden=n_hidden,
        n_out=10
    )

    # Calulate the cost (negative_log_likelihood + L1_reg + L2_reg)
    """ The cost we minimize during training is the negative log likelihood of the model
    plus the regularization terms (L1 and L2); cost is expressed here symbolically"""
    cost = (
        classifier.negative_log_likelihood(y)
        + L1_reg * classifier.L1
        + L2_reg * classifier.L2_sqr
    )

    # Compile a Theano function that computes the mistakes that are made by the model on a minibatch
    # Both for the test model
    test_model = theano.function(
        inputs=[index],
        outputs=classifier.errors(y),
        givens={
            x: test_set_x[index * batch_size:(index + 1) * batch_size],
            y: test_set_y[index * batch_size:(index + 1) * batch_size]
        }
    )
    # And for the validation model
    validate_model = theano.function(
        inputs=[index],
        outputs=classifier.errors(y),
        givens={
            x: valid_set_x[index * batch_size:(index + 1) * batch_size],
            y: valid_set_y[index * batch_size:(index + 1) * batch_size]
        }
    )

    # Compute the gradient of cost with respect to mode parameters
    """ Compute the gradient of cost with respect to theta (sorted in params) the
    resulting gradients will be stored in a list gparams"""
    gparams = [T.grad(cost, param) for param in classifier.params]

    ##  Specify how to update the parameters of the model as a list of (variable, update expression) pairs.
    """zip - Given two lists of the same length, A = [a1, a2, a3, a4] and B = [b1, b2, b3, b4],
    zip generates a list C of same size, where each element is a pair formed from the two lists :
    C = [(a1, b1), (a2, b2), (a3, b3), (a4, b4)]"""
    updates = [
        (param, param - learning_rate * gparam)
        for param, gparam in zip(classifier.params, gparams)
    ]

    # Compiling a Theano function `train_model` that returns the cost, but at the same time updates
    # the parameter of the model based on the rules defined in `updates`.
    train_model = theano.function(
        inputs=[index],
        outputs=cost,
        updates=updates,
        givens={
            x: train_set_x[index * batch_size: (index + 1) * batch_size],
            y: train_set_y[index * batch_size: (index + 1) * batch_size]
        }
    )

    #########################################################################################
    #                                       TRAIN MODEL                                     #
    #########################################################################################
    print('... training the model')

    ## --------------------------------------------------------------------------------------
    ##  Define early-stopping parameters
    ## --------------------------------------------------------------------------------------
    patience = 10000  # look as this many examples regardless
    patience_increase = 2  # wait this much longer when a new best is found
    improvement_threshold = 0.995  # a relative improvement of this much is considered significant
    validation_frequency = min(n_train_batches, patience // 2)
                                  # go through this many minibatches before checking the network
                                  # on the validation set; in this case we check every epoch
    best_validation_loss = numpy.inf
    best_iter = 0
    test_score = 0.
    start_time = timeit.default_timer()


    ## --------------------------------------------------------------------------------------
    ##  Start iterating loop (i.e. through multibatches for repeated SGD)
    ## --------------------------------------------------------------------------------------
    done_looping = False
    epoch = 0
    # Loop through epochs
    while (epoch < n_epochs) and (not done_looping): # n_epochs defined in definition of this large function
        epoch = epoch + 1 # Increment epoch on each loop

        # Loop through minibatches
        for minibatch_index in range(n_train_batches):

            minibatch_avg_cost = train_model(minibatch_index) # output current cost
            iter = (epoch - 1) * n_train_batches + minibatch_index # iteration number

            # If the current iter is fully divisible by validation frequency. I.e. concise way of doing something once every 'n' times
            if (iter + 1) % validation_frequency == 0:
                
                # Check for performance (zero-one loss) on validation data set
                validation_losses = [
                    validate_model(i)
                    for i in range(n_valid_batches)
                ]
                this_validation_loss = numpy.mean(validation_losses)

                # Print current validation test results
                print(
                    'epoch %i, minibatch %i/%i, validation error %f %%' %
                    (
                        epoch,
                        minibatch_index + 1,
                        n_train_batches,
                        this_validation_loss * 100.
                    )
                )

                # If we got the best validation score until now
                if this_validation_loss < best_validation_loss:
                    # ...and if loss improvement is good enough
                    if (this_validation_loss < best_validation_loss * improvement_threshold):
                        patience = max(patience, iter * patience_increase) # Increase patience

                    # Update best validation loss
                    best_validation_loss = this_validation_loss
                    best_iter = iter

                    # Test it on the test set
                    test_losses = [
                        test_model(i)
                        for i in range(n_test_batches)
                    ]
                    test_score = numpy.mean(test_losses)

                    # Print test results
                    print(('     epoch %i, minibatch %i/%i, test error of '
                           'best model %f %%') %
                          ( epoch,
                            minibatch_index + 1,
                            n_train_batches,
                            test_score * 100.
                          )
                        )

                    # Save the best model (taking the classifier parameters only)
                    with open('bestMLPModel.pkl', 'wb') as f:
                        pickle.dump(classifier.params, f)

            # Else, if patience is expired
            if patience <= iter:
                done_looping = True # Break the loop
                break

    # Now that the loop has ended...
    end_time = timeit.default_timer() # note the time of loop ending

    # Print the ending results
    print(('Optimization complete. Best validation score of %f %% '
           'obtained at iteration %i, with test performance %f %%') %
          (best_validation_loss * 100., best_iter + 1, test_score * 100.))
    print(('The code for file ' +
           os.path.split(__file__)[1] +
           ' ran for %.2fm' % ((end_time - start_time) / 60.)), file=sys.stderr)