예제 #1
0
     def __init__(self, rng, input, n_in, n_hidden, n_out):
         """Initialize the parameters for the multilayer perceptron
 
         :type rng: numpy.random.RandomState
         :param rng: a random number generator used to initialize weights
 
         :type input: theano.tensor.TensorType
         :param input: symbolic variable that describes the input of the
         architecture (one minibatch)
 
         :type n_in: int
         :param n_in: number of input units, the dimension of the space in
         which the datapoints lie
 
         :type n_hidden: int
         :param n_hidden: number of hidden units
 
         :type n_out: int
         :param n_out: number of output units, the dimension of the space in
         which the labels lie
 
         """
 
         # Since we are dealing with a one hidden layer MLP, this will translate
         # into a HiddenLayer with a tanh activation function connected to the
         # LogisticRegression layer; the activation function can be replaced by
         # sigmoid or any other nonlinear function
         self.hiddenLayer = hlv_layers.HiddenLayer(rng=rng, input=input,
                                        n_in=n_in, n_out=n_hidden,
                                        activation=T.tanh)
 
         # The logistic regression layer gets as input the hidden units
         # of the hidden layer
         self.logRegressionLayer = hlv_layers.LogisticRegression(
             input=self.hiddenLayer.output,
             n_in=n_hidden,
             n_out=n_out)
 
         # L1 norm ; one regularization option is to enforce L1 norm to
         # be small
         self.L1 = abs(self.hiddenLayer.W).sum() \
                 + abs(self.logRegressionLayer.W).sum()
 
         # square of L2 norm ; one regularization option is to enforce
         # square of L2 norm to be small
         self.L2_sqr = (self.hiddenLayer.W ** 2).sum() \
                     + (self.logRegressionLayer.W ** 2).sum()
 
         # negative log likelihood of the MLP is given by the negative
         # log likelihood of the output of the model, computed in the
         # logistic regression layer
         self.negative_log_likelihood = self.logRegressionLayer.negative_log_likelihood
         # same holds for the function computing the number of errors
         self.errors = self.logRegressionLayer.errors
 
         # the parameters of the model are the parameters of the two layer it is
         # made out of
         self.params = self.hiddenLayer.params + self.logRegressionLayer.params
예제 #2
0
    def __init__(self,
                 n_in,
                 n_out,
                 data,
                 batch_size,
                 rng,
                 n_hidden,
                 learning_rate,
                 activation=T.tanh,
                 L1_reg=0.,
                 L2_reg=0.0001):
        index = T.lscalar()  # index to a [mini]batch
        x = T.matrix('x')
        y = T.ivector('y')
        X = T.matrix('X')
        Y = T.ivector('Y')

        if (rng is None):
            rng = np.random.RandomState(1234)
        self.batch_size = batch_size
        self.n_train_batches = data.train.x.get_value(
            borrow=True).shape[0] / batch_size
        self.n_valid_batches = data.valid.x.get_value(
            borrow=True).shape[0] / batch_size
        self.n_test_batches = data.test.x.get_value(
            borrow=True).shape[0] / batch_size

        self.x = x
        self.y = y
        self.data = data
        # Define the layers
        layer_input = x
        layer_n_in = n_in
        self.hiddenLayers = []
        for i in range(len(n_hidden) - 1):
            layer_n_out = n_hidden[i + 1]
            hiddenLayer = hlv_layers.HiddenLayer(rng=rng,
                                                 input=layer_input,
                                                 n_in=layer_n_in,
                                                 n_out=layer_n_out,
                                                 activation=activation)
            self.hiddenLayers.append(hiddenLayer)
            layer_input = hiddenLayer.output
            layer_n_in = layer_n_out

        self.logRegressionLayer = hlv_layers.LogisticRegression(
            input=layer_input, n_in=layer_n_in, n_out=n_out)

        # Define regularization
        self.L1 = 0
        self.L2_sqr = 0
        # define parameters
        self.params = self.logRegressionLayer.params

        for HL in self.hiddenLayers:
            self.L1 = self.L1 + abs(HL.W).sum()
            self.L2 = self.L2_sqr + abs(HL.W**2).sum()

            self.params = self.params + HL.params

        self.L1 = self.L1 + abs(self.logRegressionLayer.W).sum()
        self.L2_sqr = self.L2_sqr + (self.logRegressionLayer.W**2).sum()

        self.negative_log_likelihood = self.logRegressionLayer.negative_log_likelihood
        self.errors = self.logRegressionLayer.errors

        # define the cost
        self.cost = self.negative_log_likelihood(y) \
             + L1_reg * self.L1 \
             + L2_reg * self.L2_sqr

        self.grads = [
            T.grad(cost=self.cost, wrt=param) for param in self.params
        ]

        self.updates = [(param, param - learning_rate * grad)
                        for param, grad in zip(self.params, self.grads)]

        # compiling a Theano function `train_model` that returns the cost, but
        # in the same time updates the parameter of the model based on the rules
        # defined in `updates`

        self.train_model = theano.function(inputs=[X, Y],
                                           outputs=self.errors(y),
                                           updates=self.updates,
                                           givens={
                                               x: X,
                                               y: Y
                                           })
        self.test_model = theano.function(inputs=[],
                                          outputs=self.errors(y),
                                          givens={
                                              x: self.data.test.x,
                                              y: self.data.test.y
                                          })

        self.validate_model = theano.function(inputs=[],
                                              outputs=self.errors(y),
                                              givens={
                                                  x: self.data.valid.x,
                                                  y: self.data.valid.y
                                              })
        self.train_model_minibatch = theano.function(
            inputs=[index],
            outputs=self.cost,
            updates=self.updates,
            givens={
                x:
                self.data.train.x[index * self.batch_size:(index + 1) *
                                  self.batch_size],
                y:
                self.data.train.y[index * self.batch_size:(index + 1) *
                                  self.batch_size]
            })
예제 #3
0
    def __init__(self,
                 n_in,
                 n_out,
                 data,
                 batch_size,
                 rng,
                 n_hidden,
                 learning_rate,
                 activation=T.tanh,
                 L1_reg=0.,
                 L2_reg=0.0001,
                 prob_drop=0.2):
        index = T.lscalar()  # index to a [mini]batch
        x = T.matrix('x')
        y = T.ivector('y')
        X = T.matrix('X')
        Y = T.ivector('Y')

        if (rng is None):
            rng = np.random.RandomState(1234)
        self.batch_size = batch_size
        self.n_train_batches = data.train.x.get_value(
            borrow=True).shape[0] / batch_size
        self.n_valid_batches = data.valid.x.get_value(
            borrow=True).shape[0] / batch_size
        self.n_test_batches = data.test.x.get_value(
            borrow=True).shape[0] / batch_size

        self.x = x
        self.y = y
        self.data = data
        # Define the layers
        self.hiddenLayer = hlv_layers.HiddenLayer(rng=rng,
                                                  input=x,
                                                  n_in=n_in,
                                                  n_out=n_hidden,
                                                  activation=activation)
        self.dropOutLayer = hlv_layers.DropOutLayer(self.hiddenLayer.output,
                                                    n_hidden)
        self.logRegressionLayer_do = hlv_layers.LogisticRegression(
            input=self.dropOutLayer.output, n_in=n_hidden, n_out=n_out)
        self.logRegressionLayer = hlv_layers.LogisticRegression(
            input=self.hiddenLayer.output, n_in=n_hidden, n_out=n_out)
        # Make the vanilla logistic regression layer have the same parameters
        #    as the dropout layer
        self.logRegressionLayer.W = self.logRegressionLayer_do.W
        self.logRegressionLayer.b = self.logRegressionLayer_do.b
        self.logRegressionLayer.p_y_given_x = T.nnet.softmax(
            T.dot(self.hiddenLayer.output, self.logRegressionLayer.W) +
            self.logRegressionLayer.b)
        self.logRegressionLayer.y_pred = T.argmax(
            self.logRegressionLayer.p_y_given_x, axis=1)

        # Define regularization
        self.L1 = abs(self.hiddenLayer.W).sum() \
                + abs(self.logRegressionLayer_do.W).sum()
        self.L2_sqr = (self.hiddenLayer.W ** 2).sum() \
                    + (self.logRegressionLayer_do.W ** 2).sum()

        self.negative_log_likelihood = self.logRegressionLayer_do.negative_log_likelihood
        self.errors = self.logRegressionLayer.errors

        # define the cost
        self.cost = self.negative_log_likelihood(y) \
             + L1_reg * self.L1 \
             + L2_reg * self.L2_sqr

        # define parameters
        self.params = self.hiddenLayer.params + self.logRegressionLayer_do.params

        self.grads = [
            T.grad(cost=self.cost, wrt=param) for param in self.params
        ]

        self.updates = [(param, param - learning_rate * grad)
                        for param, grad in zip(self.params, self.grads)]

        #  Just dropout stuff
        ##########################
        self.prob_drop = prob_drop
        self.srng = theano.tensor.shared_randomstreams.RandomStreams(0)

        def random_drop_mask():
            # p=1-p because 1's indicate keep and p is prob of dropping
            mask = self.srng.binomial(n=1,
                                      p=1 - prob_drop,
                                      size=self.dropOutLayer.drop_mask.shape)
            # The cast is important because
            # int * float32 = float64 which pulls things off the gpu
            output = T.cast(mask, theano.config.floatX)
            return output

        self.dropOutLayer.drop_mask.set_value(random_drop_mask().eval())
        self.updates.append((self.dropOutLayer.drop_mask, random_drop_mask()))

        # compiling a Theano function `train_model` that returns the cost, but
        # in the same time updates the parameter of the model based on the rules
        # defined in `updates`

        self.train_model = theano.function(inputs=[X, Y],
                                           outputs=self.errors(y),
                                           updates=self.updates,
                                           givens={
                                               x: X,
                                               y: Y
                                           })
        self.test_model = theano.function(inputs=[],
                                          outputs=self.errors(y),
                                          givens={
                                              x: self.data.test.x,
                                              y: self.data.test.y
                                          })

        self.validate_model = theano.function(inputs=[],
                                              outputs=self.errors(y),
                                              givens={
                                                  x: self.data.valid.x,
                                                  y: self.data.valid.y
                                              })
        self.train_model_minibatch = theano.function(
            inputs=[index],
            outputs=self.cost,
            updates=self.updates,
            givens={
                x:
                self.data.train.x[index * self.batch_size:(index + 1) *
                                  self.batch_size],
                y:
                self.data.train.y[index * self.batch_size:(index + 1) *
                                  self.batch_size]
            })
예제 #4
0
    def __init__(self, n_in, n_out, data, learning_rate=0.001, batch_size=1):
        index = T.lscalar()  # index to a [mini]batch
        X = T.matrix()
        Y = T.ivector()
        x = T.matrix('x')
        y = T.ivector('y')

        self.batch_size = batch_size
        self.n_train_batches = data.train.x.get_value(
            borrow=True).shape[0] / batch_size
        self.n_valid_batches = data.valid.x.get_value(
            borrow=True).shape[0] / batch_size
        self.n_test_batches = data.test.x.get_value(
            borrow=True).shape[0] / batch_size

        self.x = x
        self.y = y
        self.data = data
        self.classifier = hlv_layers.LogisticRegression(input=self.x,
                                                        n_in=n_in,
                                                        n_out=n_out)
        self.cost = self.classifier.negative_log_likelihood(y)

        self.params = [self.classifier.W, self.classifier.b]

        self.grads = [
            T.grad(cost=self.cost, wrt=param) for param in self.params
        ]

        self.updates = [(param, param - learning_rate * grad)
                        for param, grad in zip(self.params, self.grads)]

        # Model operations (training/testing)
        self.train_model = theano.function(inputs=[X, Y],
                                           outputs=self.cost,
                                           updates=self.updates,
                                           givens={
                                               x: X,
                                               y: Y
                                           })
        self.test_model = theano.function(inputs=[],
                                          outputs=self.classifier.errors(y),
                                          givens={
                                              x: self.data.test.x,
                                              y: self.data.test.y
                                          })

        self.validate_model = theano.function(
            inputs=[],
            outputs=self.classifier.errors(y),
            givens={
                x: self.data.valid.x,
                y: self.data.valid.y
            })
        self.train_model_minibatch = theano.function(
            inputs=[index],
            outputs=self.cost,
            updates=self.updates,
            givens={
                x:
                self.data.train.x[index * self.batch_size:(index + 1) *
                                  self.batch_size],
                y:
                self.data.train.y[index * self.batch_size:(index + 1) *
                                  self.batch_size]
            })
예제 #5
0
    def __init__(self,
                 n_in,
                 n_out,
                 data,
                 n_hidden,
                 batch_size,
                 rng,
                 learning_rate,
                 activation=T.tanh,
                 L1_reg=0.,
                 L2_reg=0.0001):

        # Define classifier independent stuff
        #####################################

        index = T.lscalar()  # index to a [mini]batch
        x = T.matrix('x')
        y = T.ivector('y')
        X = T.matrix('X')
        Y = T.ivector('Y')

        if (rng is None):
            rng = np.random.RandomState(23455)
        self.batch_size = batch_size
        self.n_train_batches = data.train.x.get_value(
            borrow=True).shape[0] / batch_size
        self.n_valid_batches = data.valid.x.get_value(
            borrow=True).shape[0] / batch_size
        self.n_test_batches = data.test.x.get_value(
            borrow=True).shape[0] / batch_size

        self.x = x
        self.y = y
        self.data = data

        # Define the model structure
        ############################
        """
        layerClasses = {
            'conv': hlv_layers.ConvLayer,
            'pool': hlv_layers.PoolingLayer,
            'hidden': hlv_layers.HiddenLayer,
            'logistic': hlv_layers.LogisticRegression,
            'flatten': hlv_layers.FlattenLayer
            'reshape': hlv_layers.ReshapeLayer
            }
        
        Layers = []
        layer_input = x
        layer_n_input = n_in
        for layer_idx in range(len(layerSpecs)):
            layerType, layerConfig = layerSpecs[layer_idx]            
            layerClass = layerClasses[layerType]
            new_layer = layerClass(        input = layer_input,
                                           shape_in = layer_n_in,                                           
                                           data = data,
                                           rng = rng, 
                                           **config)
        """

        self.convLayer1 = hlv_layers.ConvLayer(rng=rng,
                                               input=x.reshape(
                                                   (-1, 1, 64, 64)),
                                               filter_shape=(n_hidden[0], 1,
                                                             13, 13),
                                               image_shape=(batch_size, 1, 64,
                                                            64),
                                               activation=activation,
                                               poolsize=(4, 4))
        self.poolingLayer1 = hlv_layers.PoolingLayer(
            rng=rng,
            input=self.convLayer1.output,
            input_shape=(0),
            poolsize=(4, 4))
        self.convLayer2 = hlv_layers.ConvLayer(
            rng=rng,
            input=self.poolingLayer1.output.reshape((-1, n_hidden[0], 13, 13)),
            filter_shape=(n_hidden[1], n_hidden[0], 4, 4),
            image_shape=(batch_size, n_hidden[0], 13, 13),
            activation=activation,
            poolsize=(2, 2))
        self.poolingLayer2 = hlv_layers.PoolingLayer(
            rng=rng,
            input=self.convLayer2.output,
            input_shape=(0),
            poolsize=(2, 2))
        self.hiddenLayer = hlv_layers.HiddenLayer(
            rng=rng,
            input=self.poolingLayer2.output.flatten(2),
            n_in=n_hidden[1] * 5 * 5,
            n_out=500,
            activation=activation)

        self.logRegressionLayer = hlv_layers.LogisticRegression(
            input=self.hiddenLayer.output, n_in=500, n_out=n_out)

        self.negative_log_likelihood = self.logRegressionLayer.negative_log_likelihood
        self.errors = self.logRegressionLayer.errors

        # define the cost
        self.cost = self.negative_log_likelihood(y)
        #\
        # + L1_reg * self.L1 \
        # + L2_reg * self.L2_sqr

        # define parameters
        self.params = self.convLayer1.params + self.convLayer2.params + self.hiddenLayer.params + self.logRegressionLayer.params
        #self.params = self.logRegressionLayer.params

        self.grads = [
            T.grad(cost=self.cost, wrt=param) for param in self.params
        ]

        self.updates = [(param, param - learning_rate * grad)
                        for param, grad in zip(self.params, self.grads)]

        # compiling a Theano function `train_model` that returns the cost, but
        # in the same time updates the parameter of the model based on the rules
        # defined in `updates`
        """
        self.train_model = theano.function(inputs=[X, Y],
                                            outputs=self.errors(y),
                                            updates=self.updates,
                                            givens={
                                                x: X,
                                                y: Y})
        self.test_model = theano.function(  inputs=[],
                                            outputs=self.errors(y),
                                            givens={
                                                x: self.data.test.x,
                                                y: self.data.test.y})
                                                
        self.validate_model = theano.function(inputs=[],
                                            outputs=self.errors(y),
                                            givens={
                                                x: self.data.valid.x,
                                                y: self.data.valid.y}) 
        """

        self.test_model_minibatch = theano.function(
            inputs=[index],
            outputs=self.errors(y),
            givens={
                x:
                self.data.test.x[index * self.batch_size:(index + 1) *
                                 self.batch_size],
                y:
                self.data.test.y[index * self.batch_size:(index + 1) *
                                 self.batch_size]
            })

        self.validate_model_minibatch = theano.function(
            inputs=[index],
            outputs=self.errors(y),
            givens={
                x:
                self.data.valid.x[index * self.batch_size:(index + 1) *
                                  self.batch_size],
                y:
                self.data.valid.y[index * self.batch_size:(index + 1) *
                                  self.batch_size]
            })
        self.train_model_minibatch = theano.function(
            inputs=[index],
            outputs=self.cost,
            updates=self.updates,
            givens={
                x:
                self.data.train.x[index * self.batch_size:(index + 1) *
                                  self.batch_size],
                y:
                self.data.train.y[index * self.batch_size:(index + 1) *
                                  self.batch_size]
            })
예제 #6
0
def logistic_sgd():
    learning_rate=0.13
    n_epochs=100
    dataset='mnist.pkl.gz',
    batch_size=600
    # get data ready
    
    datasets = load_data('mnist.pkl.gz')
    train_set_x, train_set_y = datasets[0]
    valid_set_x, valid_set_y = datasets[1]
    test_set_x, test_set_y = datasets[2]
    n_train_batches = train_set_x.get_value(borrow=True).shape[0] / batch_size
    n_valid_batches = valid_set_x.get_value(borrow=True).shape[0] / batch_size
    n_test_batches = test_set_x.get_value(borrow=True).shape[0] / batch_size

    ######################
    # BUILD ACTUAL MODEL #
    ######################
    print '... building the model'

    # allocate symbolic variables for the data
    index = T.lscalar()  # index to a [mini]batch
    x = T.matrix('x')  # the data is presented as rasterized images
    y = T.ivector('y')  # the labels are presented as 1D vector of
                           # [int] labels
    classifier = hlv_layers.LogisticRegression(input=x, n_in=28 * 28, n_out=10)

    cost = classifier.negative_log_likelihood(y)

    test_model = theano.function(inputs=[index],
            outputs=classifier.errors(y),
            givens={
                x: test_set_x[index * batch_size: (index + 1) * batch_size],
                y: test_set_y[index * batch_size: (index + 1) * batch_size]})

    validate_model = theano.function(inputs=[index],
            outputs=classifier.errors(y),
            givens={
                x: valid_set_x[index * batch_size:(index + 1) * batch_size],
                y: valid_set_y[index * batch_size:(index + 1) * batch_size]})

    # compute the gradient of cost with respect to theta = (W,b)
    g_W = T.grad(cost=cost, wrt=classifier.W)
    g_b = T.grad(cost=cost, wrt=classifier.b)

    # specify how to update the parameters of the model as a list of
    # (variable, update expression) pairs.
    updates = [(classifier.W, classifier.W - learning_rate * g_W),
               (classifier.b, classifier.b - learning_rate * g_b)]

    # compiling a Theano function `train_model` that returns the cost, but in
    # the same time updates the parameter of the model based on the rules
    # defined in `updates`
    train_model = theano.function(inputs=[index],
            outputs=cost,
            updates=updates,
            givens={
                x: train_set_x[index * batch_size:(index + 1) * batch_size],
                y: train_set_y[index * batch_size:(index + 1) * batch_size]})

    ###############
    # TRAIN MODEL #
    ###############
    print '... training the model'
    # early-stopping parameters
    patience = 5000  # look as this many examples regardless
    patience_increase = 2  # wait this much longer when a new best is
                                  # found
    improvement_threshold = 0.995  # a relative improvement of this much is
                                  # considered significant
    validation_frequency = min(n_train_batches, patience / 2)
                                  # go through this many
                                  # minibatche before checking the network
                                  # on the validation set; in this case we
                                  # check every epoch

    best_params = None
    best_validation_loss = np.inf
    test_score = 0.
    start_time = time.clock()

    done_looping = False
    epoch = 0
    while (epoch < n_epochs) and (not done_looping):
        epoch = epoch + 1
        for minibatch_index in xrange(n_train_batches):

            minibatch_avg_cost = train_model(minibatch_index)
            # iteration number
            iter = (epoch - 1) * n_train_batches + minibatch_index

            if (iter + 1) % validation_frequency == 0:
                # compute zero-one loss on validation set
                validation_losses = [validate_model(i)
                                     for i in xrange(n_valid_batches)]
                this_validation_loss = np.mean(validation_losses)

                print('epoch %i, minibatch %i/%i, validation error %f %%' % \
                    (epoch, minibatch_index + 1, n_train_batches,
                    this_validation_loss * 100.))

                # if we got the best validation score until now
                if this_validation_loss < best_validation_loss:
                    #improve patience if loss improvement is good enough
                    if this_validation_loss < best_validation_loss *  \
                       improvement_threshold:
                        patience = max(patience, iter * patience_increase)

                    best_validation_loss = this_validation_loss
                    # test it on the test set

                    test_losses = [test_model(i)
                                   for i in xrange(n_test_batches)]
                    test_score = np.mean(test_losses)

                    print(('     epoch %i, minibatch %i/%i, test error of best'
                       ' model %f %%') %
                        (epoch, minibatch_index + 1, n_train_batches,
                         test_score * 100.))

            if patience <= iter:
                done_looping = True
                break

    end_time = time.clock()
    print(('Optimization complete with best validation score of %f %%,'
           'with test performance %f %%') %
                 (best_validation_loss * 100., test_score * 100.))
    print 'The code run for %d epochs, with %f epochs/sec' % (
        epoch, 1. * epoch / (end_time - start_time))
    print ('The code '+                         
                          ' ran for %.1fs' % ((end_time - start_time)))
    if (test_score*100 < 7.5):
        print >> sys.stderr, ('Test OK (logistic_sgd)')
    else:
        print >> sys.stderr, ('Test FAILED!')