Ejemplo n.º 1
0
class StackedDenoisingAutoEncoders(object):
    def __init__(self,
                 random_generator,
                 theano_random_generator=None,
                 x_dim=28 * 28,
                 y_dim=10,
                 hidden_layer_sizes=[500, 500],
                 corruption_levels=[0.1, 0.1]):
        """
        """
        # Declare empty sigmoid layer array for MLP
        self.sigmoid_layers = []

        # Declare an empty array of DenoisingAutoEncoder
        self.autoencoder_layers = []

        self.params = []
        self.n_layers = len(hidden_layer_sizes)

        if theano_random_generator == None:
            self.theano_random_generator = RandomStreams(
                random_generator.randint(2**30))
        else:
            self.theano_random_generator = theano_random_generator

        # Inputs using Theano
        self.x = T.matrix("x")
        self.y = T.ivector("y")

        # Initialize all parameters
        for i in range(self.n_layers):
            # Define x and y dimensions
            if i == 0:
                internal_x_dim = x_dim
            else:
                internal_x_dim = hidden_layer_sizes[i - 1]
            internal_y_dim = hidden_layer_sizes[i]

            # Find inputs
            if i == 0:
                internal_input = self.x
            else:
                internal_input = self.sigmoid_layers[i - 1].output

            # Define Sigmoid Layer
            self.sigmoid_layers.append(
                HiddenLayer(internal_input,
                            internal_x_dim,
                            internal_y_dim,
                            random_generator,
                            activation=T.nnet.sigmoid))

            # Define input
            self.autoencoder_layers.append(
                DenoisingAutoEncoder(random_generator,
                                     theano_random_generator,
                                     internal_x_dim,
                                     internal_y_dim,
                                     internal_input,
                                     W=self.sigmoid_layers[i].W,
                                     b=self.sigmoid_layers[i].b))

            # Uppdate parameters
            self.params.extend(self.sigmoid_layers[i].params)

        # Finally add logistic layer
        self.logistic_layer = LogisticRegression(
            self.sigmoid_layers[-1].output, hidden_layer_sizes[-1], y_dim)

        self.params.extend(self.logistic_layer.params)

        # These are two important costs
        # Finetuning after pretraining individual AutoEncoders
        self.finetune_cost = self.logistic_layer.negative_log_likelihood(
            self.y)

        # Error from prediction
        self.error = self.logistic_layer.error(self.y)

    def pretrain(self, train_x, batch_size):
        """Generates a list of functions, each of them implementing one
        step in trainnig the dA corresponding to the layer with same index.
        The function will require as input the minibatch index, and to train
        a dA you just need to iterate, calling the corresponding function on
        all minibatch indexes.

        :type train_set_x: theano.tensor.TensorType
        :param train_set_x: Shared variable that contains all datapoints used
        for training the dA
        
        :type batch_size: int
        :param batch_size: size of a [mini]batch
        
        :type learning_rate: float
        :param learning_rate: learning rate used during training for any of
        the dA layer
        """
        index = T.iscalar("index")
        corruption_level = T.scalar("corruption_level")
        learning_rate = T.scalar("learning_rate")

        pretrain_functions = []
        for autoencoder in self.autoencoder_layers:

            # Find cost and updates for the layer
            cost, updates = autoencoder.cost_updates(corruption_level,
                                                     learning_rate)

            f = theano.function(inputs=[
                index,
                theano.Param(corruption_level, default=0.2),
                theano.Param(learning_rate, default=0.1)
            ],
                                outputs=cost,
                                updates=updates,
                                givens={
                                    self.x:
                                    train_x[index * batch_size:(index + 1) *
                                            batch_size]
                                })

            pretrain_functions.append(f)

        return pretrain_functions

    def finetune(self, train_x, train_y, valid_x, valid_y, test_x, test_y,
                 batch_size, learning_rate):
        """Generates a function `train` that implements one step of
        finetuning, a function `validate` that computes the error on
        a batch from the validation set, and a function `test` that
        computes the error on a batch from the testing set
        
        :type batch_size: int
        :param batch_size: size of a minibatch
        
        :type learning_rate: float
        :param learning_rate: learning rate used during finetune stage
        """
        # Define index
        index = T.iscalar("index")

        # Cost and updates in SGD
        grad = T.grad(self.finetune_cost, wrt=self.params)
        updates = list()
        for i in range(len(self.params)):
            updates.append(
                (self.params[i], self.params[i] - learning_rate * grad[i]))

        # Define train, valid and test models
        train_model = theano.function(
            inputs=[index],
            outputs=self.finetune_cost,
            updates=updates,
            givens={
                self.x: train_x[index * batch_size:(index + 1) * batch_size],
                self.y: train_y[index * batch_size:(index + 1) * batch_size]
            })

        valid_model = theano.function(
            inputs=[index],
            outputs=self.error,
            givens={
                self.x: valid_x[index * batch_size:(index + 1) * batch_size],
                self.y: valid_y[index * batch_size:(index + 1) * batch_size]
            })

        test_model = theano.function(
            inputs=[index],
            outputs=self.error,
            givens={
                self.x: test_x[index * batch_size:(index + 1) * batch_size],
                self.y: test_y[index * batch_size:(index + 1) * batch_size]
            })

        return (train_model, valid_model, test_model)
Ejemplo n.º 2
0
def sgd_optimize(learning_rate=0.1,
                 n_epochs=200,
                 batch_size=500,
                 nkerns=[20, 50]):
    # Load input
    train, valid, test = util.load()
    print "loading 0 - ", train[0].shape[0], " train inputs in gpu memory"
    train_x, train_y = util.create_theano_shared(train)

    print "loading 0 - ", valid[0].shape[0], " validation inputs in gpu memory"
    valid_x, valid_y = util.create_theano_shared(valid)

    print "loading 0 - ", test[0].shape[0], " test inputs in gpu memory"
    test_x, test_y = util.create_theano_shared(test)

    # Define symbolic input matrices
    print "Building Model..."
    index = T.iscalar()
    x = T.matrix("x")
    y = T.ivector("y")
    random_generator = numpy.random.RandomState(1)

    # Create Layer0 of Lenet Model
    layer0_input = x.reshape( (batch_size, 1, 28, 28) )
    filter_shape0 = (nkerns[0], 1, 5, 5)
    image_shape0 = (batch_size, 1, 28, 28) 
    layer0 = LeNetConvPoolLayer(layer0_input, filter_shape0, image_shape0, random_generator)
    
    # Create Layer1 of Lenet model
    filter_shape1 = (nkerns[1], nkerns[0], 5, 5)
    image_shape1 = (batch_size, nkerns[0], 12, 12)
    layer1 = LeNetConvPoolLayer(layer0.output, filter_shape1, image_shape1, random_generator)

    # Create Layer2 which is a simple MLP hidden layer
    layer2_input = layer1.output.flatten(2)
    layer2 = HiddenLayer(layer2_input, nkerns[1] * 4 * 4, 500, random_generator)

    # Finally, Layer3 is LogisticRegression layer
    layer3 = LogisticRegression(layer2.output, 500, 10)

    # Define error
    error = layer3.error(y)

    # Create cost function
    cost = layer3.negative_log_likelihood(y)

    # Gradient and update functions
    params = layer3.params + layer2.params + layer1.params + layer0.params
    grads = T.grad(cost, wrt=params)
    updates = list()
    for i in range(len(params)):
        updates.append( (params[i], params[i] - learning_rate * grads[i]) )

    # Train model
    train_model = theano.function(
                    inputs=[index],
                    outputs=cost,
                    updates=updates,
                    givens = {
                       x: train_x[index*batch_size : (index+1)*batch_size],
                       y: train_y[index*batch_size : (index+1)*batch_size]
                    })

    # Valid model
    valid_model = theano.function(
                    inputs=[index],
                    outputs=error,
                    givens = {
                       x: valid_x[index*batch_size : (index+1)*batch_size],
                       y: valid_y[index*batch_size : (index+1)*batch_size]
                    })
    
    # Test Model 
    test_model  = theano.function(
                    inputs=[index],
                    outputs=error,
                    givens={
                       x: test_x[index*batch_size : (index+1)*batch_size],
                       y: test_y[index*batch_size : (index+1)*batch_size]
                    })

    # Create number of minibatches
    n_train_batches = train[0].shape[0] / batch_size
    n_valid_batches = valid[0].shape[0] / batch_size
    n_test_batches = test[0].shape[0] / batch_size

    # Finally, main loop for training
    util.train_test_model(n_epochs, train_model, valid_model, test_model,
                          n_train_batches, n_valid_batches, n_test_batches)
Ejemplo n.º 3
0
class StackedDenoisingAutoEncoders(object):

    def __init__(self,
                 random_generator,
                 theano_random_generator=None,
                 x_dim=28*28,
                 y_dim=10,
                 hidden_layer_sizes=[500, 500],
                 corruption_levels=[0.1, 0.1]):
        """
        """
        # Declare empty sigmoid layer array for MLP
        self.sigmoid_layers = []

        # Declare an empty array of DenoisingAutoEncoder
        self.autoencoder_layers = []
        
        self.params = []
        self.n_layers = len(hidden_layer_sizes)

        if theano_random_generator == None:
            self.theano_random_generator = RandomStreams(random_generator.randint(2 ** 30))
        else:
            self.theano_random_generator = theano_random_generator

        # Inputs using Theano
        self.x = T.matrix("x")
        self.y = T.ivector("y")
        
        # Initialize all parameters
        for i in range(self.n_layers):
            # Define x and y dimensions
            if i == 0:
                internal_x_dim = x_dim
            else:
                internal_x_dim = hidden_layer_sizes[i - 1]
            internal_y_dim = hidden_layer_sizes[i]

            # Find inputs
            if i == 0:
                internal_input = self.x
            else:
                internal_input = self.sigmoid_layers[i-1].output

            # Define Sigmoid Layer
            self.sigmoid_layers.append(HiddenLayer(internal_input,
                                                   internal_x_dim,
                                                   internal_y_dim,
                                                   random_generator,
                                                   activation=T.nnet.sigmoid))

            # Define input
            self.autoencoder_layers.append(DenoisingAutoEncoder(random_generator,
                                                                theano_random_generator,
                                                                internal_x_dim,
                                                                internal_y_dim,
                                                                internal_input,
                                                                W=self.sigmoid_layers[i].W,
                                                                b=self.sigmoid_layers[i].b))

            # Uppdate parameters
            self.params.extend(self.sigmoid_layers[i].params)

            
        # Finally add logistic layer
        self.logistic_layer = LogisticRegression(self.sigmoid_layers[-1].output,
                                                 hidden_layer_sizes[-1],
                                                 y_dim)

        self.params.extend(self.logistic_layer.params)

        # These are two important costs
        # Finetuning after pretraining individual AutoEncoders
        self.finetune_cost = self.logistic_layer.negative_log_likelihood(self.y)

        # Error from prediction
        self.error = self.logistic_layer.error(self.y)


    def pretrain(self, train_x, batch_size):
        """Generates a list of functions, each of them implementing one
        step in trainnig the dA corresponding to the layer with same index.
        The function will require as input the minibatch index, and to train
        a dA you just need to iterate, calling the corresponding function on
        all minibatch indexes.

        :type train_set_x: theano.tensor.TensorType
        :param train_set_x: Shared variable that contains all datapoints used
        for training the dA
        
        :type batch_size: int
        :param batch_size: size of a [mini]batch
        
        :type learning_rate: float
        :param learning_rate: learning rate used during training for any of
        the dA layer
        """
        index = T.iscalar("index")
        corruption_level = T.scalar("corruption_level")
        learning_rate = T.scalar("learning_rate")

        pretrain_functions = []
        for autoencoder in self.autoencoder_layers:

            # Find cost and updates for the layer
            cost, updates = autoencoder.cost_updates(corruption_level, learning_rate)
            
            f = theano.function(
                inputs=[index,
                        theano.Param(corruption_level, default=0.2),
                        theano.Param (learning_rate, default=0.1)],
                outputs=cost,
                updates=updates,
                givens={
                    self.x : train_x[index * batch_size : (index + 1) * batch_size]
                })

            pretrain_functions.append(f)

        return pretrain_functions


    def finetune(self, train_x, train_y,
                 valid_x, valid_y,
                 test_x, test_y,
                 batch_size, learning_rate):
        """Generates a function `train` that implements one step of
        finetuning, a function `validate` that computes the error on
        a batch from the validation set, and a function `test` that
        computes the error on a batch from the testing set
        
        :type batch_size: int
        :param batch_size: size of a minibatch
        
        :type learning_rate: float
        :param learning_rate: learning rate used during finetune stage
        """
        # Define index
        index = T.iscalar("index")

        # Cost and updates in SGD
        grad = T.grad(self.finetune_cost, wrt=self.params)
        updates = list()
        for i in range(len(self.params)):
            updates.append( (self.params[i], self.params[i] - learning_rate * grad[i]) )

        # Define train, valid and test models
        train_model = theano.function(
                         inputs=[index],
                         outputs=self.finetune_cost,
                         updates = updates,
                         givens = {
                           self.x : train_x[index * batch_size : (index + 1) * batch_size],
                           self.y : train_y[index * batch_size : (index + 1) * batch_size]
                         })

        valid_model = theano.function(
                         inputs=[index],
                         outputs=self.error,
                         givens = {
                           self.x : valid_x[index * batch_size : (index + 1) * batch_size],
                           self.y : valid_y[index * batch_size : (index + 1) * batch_size]
                         })

        test_model = theano.function(
                        inputs=[index],
                        outputs=self.error,
                        givens = {
                          self.x : test_x[index * batch_size : (index + 1) * batch_size],
                          self.y : test_y[index * batch_size : (index + 1) * batch_size]
                        })

        return (train_model, valid_model, test_model)