Beispiel #1
0
def faces_data(resize_factor = None):
    print "Loading faces data ..."
    t0 = time.time()
    faces, labels = q.load_from_pkl("../data/faces.bzpkl")
    if resize_factor is not None:
        new_size = (int(64*resize_factor), int(64*resize_factor))
        print new_size
        for i in range(len(faces)):
            print i
            #t = cv2.resize(faces[i].reshape(64,64),new_size).ravel()
    labels = np.asarray(labels, dtype=np.int32)
    faces = np.asarray(faces, dtype=np.float32)
    
    
    data = q.newObject()
    data.train = q.newObject()
    data.valid = q.newObject()
    data.test = q.newObject()
    
    mean = 128
    stdev = 75
    
    faces = (faces - mean) / stdev
    np.random.seed(10)
    np.random.shuffle(faces)
    np.random.seed(10)
    np.random.shuffle(labels)
    
    data.train.x = np.vstack((faces[labels==1,:][    :1500], faces[labels==-1,:][    :1500]))
    data.valid.x = np.vstack((faces[labels==1,:][1500:2000], faces[labels==-1,:][1500:2000]))
    data.test.x =  np.vstack((faces[labels==1,:][2000:2400], faces[labels==-1,:][2000:2400]))
    
    
    
    data.train.y = np.hstack((np.ones((1500, ), dtype=np.int32),
                              np.zeros((1500, ),dtype=np.int32)))
    data.valid.y = np.hstack((np.ones((500, ),dtype=np.int32),
                              np.zeros((500, ),dtype=np.int32)))
    data.test.y  = np.hstack((np.ones((400, ),dtype=np.int32),
                              np.zeros((400, ),dtype=np.int32)))
    


    data.train.x = theano.shared(data.train.x, borrow=True)
    data.train.y = theano.shared(data.train.y, borrow=True)
    data.valid.x = theano.shared(data.valid.x, borrow=True)
    data.valid.y = theano.shared(data.valid.y, borrow=True)
    data.test.x  = theano.shared(data.test.x , borrow=True)
    data.test.y  = theano.shared(data.test.y , borrow=True)
    data.dim = (64, 64)
    data_digits.dim_out = 2
    def show(img):
        q.show(img.reshape((64,64)))
        cv2.destroyAllWindows()
        for k in range(10):
            cv2.waitKey(10)
    data.show = show
    t1 = time.time()
    print "Data loaded in %0.2f seconds" % ( t1-t0)
    return data
Beispiel #2
0
    def compile_model_ops(self):
        self.model_ops = q.newObject()
        # Define the model training and testing functions for both
        # minibatches and arbitrary data
        index = self.model.index
        x = self.model.x
        y = self.model.y
        X = self.model.X
        Y = self.model.Y
        """
        self.model_ops.train_model = theano.function(inputs=[X, Y],
                                            outputs=self.model.errors(y),
                                            updates=self.model.updates,
                                            givens={
                                                x: X,
                                                y: Y})
                                                "
        self.model_ops.test_model2 = theano.function(  inputs=[],
                                            outputs=self.model.errors(y),
                                            givens={
                                                x: self.model.data.test.x,
                                                y: self.model.data.test.y})
                                                
        self.model_ops.validate_model2 = theano.function(inputs=[],
                                            outputs=self.model.errors(y),
                                            givens={
                                                x: self.model.data.valid.x,
                                                y: self.model.data.valid.y})                                                 
                                                """
        self.params = self.model.params
        
        self.acc_cost = self.model.acc_cost
        self.L1 = np.sum([abs(param).sum() for param in self.params])
        self.L2 = np.sum([(param**2).sum() for param in self.params])
        self.reg_cost = self.L1_reg * self.L1 + self.L2_reg * self.L2
        self.cost = self.acc_cost + self.reg_cost

        self.grads = [T.grad(cost=self.cost, wrt=param) for param in self.params]    
        
        self.updates = [(param, param - self.lr * grad) for
                    param, grad in zip(self.params, self.grads)]
        
        self.model_ops.minibatch = q.newObject()        
        self.model_ops.minibatch.test = theano.function(  inputs=[index],
                                            outputs=self.model.errors(y),
                                            givens={
                                                x: self.model.data.test.x[index * self.batch_size:(index + 1) * self.batch_size],
                                                y: self.model.data.test.y[index * self.batch_size:(index + 1) * self.batch_size]})
                                                
        self.model_ops.minibatch.validate = theano.function(inputs=[index],
                                            outputs=self.model.errors(y),
                                            givens={
                                                x: self.model.data.valid.x[index * self.batch_size:(index + 1) * self.batch_size],
                                                y: self.model.data.valid.y[index * self.batch_size:(index + 1) * self.batch_size]}) 
        self.model_ops.minibatch.train = theano.function(inputs=[index],
                                            outputs=self.cost,
                                            updates=self.updates,
                                            givens={
                                                x: self.model.data.train.x[index * self.batch_size:(index + 1) * self.batch_size],
                                                y: self.model.data.train.y[index * self.batch_size:(index + 1) * self.batch_size]})            
Beispiel #3
0
def get_faces(resize_factor=None):
    print "Loading faces data ..."
    t0 = time.time()
    faces, labels = q.load_from_pkl("../data/faces.bzpkl")
    if resize_factor is not None:
        new_size = (int(64 * resize_factor), int(64 * resize_factor))
        print new_size
        for i in range(len(faces)):
            print i
            #t = cv2.resize(faces[i].reshape(64,64),new_size).ravel()
    labels = np.asarray(labels, dtype=np.int32)
    faces = np.asarray(faces, dtype=np.float32)

    data = q.newObject()
    data.train = q.newObject()
    data.valid = q.newObject()
    data.test = q.newObject()

    mean = 128
    stdev = 75

    faces = (faces - mean) / stdev
    np.random.seed(10)
    np.random.shuffle(faces)
    np.random.seed(10)
    np.random.shuffle(labels)

    data.train.x = np.vstack(
        (faces[labels == 1, :][:1500], faces[labels == -1, :][:1500]))
    data.valid.x = np.vstack(
        (faces[labels == 1, :][1500:2000], faces[labels == -1, :][1500:2000]))
    data.test.x = np.vstack(
        (faces[labels == 1, :][2000:2400], faces[labels == -1, :][2000:2400]))

    data.train.y = np.hstack((np.ones(
        (1500, ), dtype=np.int32), np.zeros((1500, ), dtype=np.int32)))
    data.valid.y = np.hstack((np.ones(
        (500, ), dtype=np.int32), np.zeros((500, ), dtype=np.int32)))
    data.test.y = np.hstack((np.ones(
        (400, ), dtype=np.int32), np.zeros((400, ), dtype=np.int32)))

    data.train.x = theano.shared(data.train.x, borrow=True)
    data.train.y = theano.shared(data.train.y, borrow=True)
    data.valid.x = theano.shared(data.valid.x, borrow=True)
    data.valid.y = theano.shared(data.valid.y, borrow=True)
    data.test.x = theano.shared(data.test.x, borrow=True)
    data.test.y = theano.shared(data.test.y, borrow=True)
    data.dim = (64, 64)
    data.dim_out = 2

    def show(img):
        q.show(img.reshape((64, 64)))
        cv2.destroyAllWindows()
        for k in range(10):
            cv2.waitKey(10)

    data.show = show
    t1 = time.time()
    print "Data loaded in %0.2f seconds" % (t1 - t0)
    return data
Beispiel #4
0
def faces_data():
    faces, labels = q.load_from_pkl("/home/tc/faces.bzpkl")
    labels = np.asarray(labels, dtype=np.float64)
    faces = np.asarray(faces, dtype=np.float64)

    data = q.newObject()
    data.train = q.newObject()
    data.valid = q.newObject()
    data.test = q.newObject()

    data.train.x = np.vstack(
        (faces[labels == 1, :][:1000], faces[labels == -1, :][:1000]))
    data.valid.x = np.vstack(
        (faces[labels == 1, :][1000:1500], faces[labels == -1, :][1000:1500]))
    data.test.x = np.vstack(
        (faces[labels == 1, :][1500:2000], faces[labels == -1, :][1500:2000]))
    data.train.y = np.hstack((np.ones(
        (1000, ), dtype=np.int32), np.zeros((1000, ), dtype=np.int32)))
    data.valid.y = np.hstack((np.ones(
        (500, ), dtype=np.int32), np.zeros((500, ), dtype=np.int32)))
    data.test.y = np.hstack((np.ones(
        (500, ), dtype=np.int32), np.zeros((500, ), dtype=np.int32)))

    data.train.x = theano.shared(data.train.x, borrow=True)
    data.train.y = theano.shared(data.train.y, borrow=True)
    data.valid.x = theano.shared(data.valid.x, borrow=True)
    data.valid.y = theano.shared(data.valid.y, borrow=True)
    data.test.x = theano.shared(data.test.x, borrow=True)
    data.test.y = theano.shared(data.test.y, borrow=True)

    #np.random.seed(10)
    #np.random.shuffle(data.train.x)
    #np.random.seed(10)
    #np.random.shuffle(data.train.y)

    def show(img):
        q.show(img.reshape((64, 64)))
        cv2.destroyAllWindows()
        for k in range(10):
            cv2.waitKey(10)

    data.show = show
    return data
Beispiel #5
0
def get_mnist(dataset='mnist.pkl.gz'):
    t0 = time.time()
    print "Loading digits data ..."
    dataset = "../data/" + dataset
    f = gzip.open(dataset, 'rb')
    train_set, valid_set, test_set = cPickle.load(f)
    f.close()

    def shared_dataset(data_xy, borrow=True):

        data_x, data_y = data_xy
        shared_x = theano.shared(np.asarray(data_x,
                                            dtype=theano.config.floatX),
                                 borrow=borrow)
        shared_y = theano.shared(np.asarray(data_y, dtype=np.int32),
                                 borrow=borrow)
        return shared_x, shared_y

    test_set_x, test_set_y = shared_dataset(test_set)
    valid_set_x, valid_set_y = shared_dataset(valid_set)
    train_set_x, train_set_y = shared_dataset(train_set)

    data_digits = q.newObject()
    data_digits.train = q.newObject()
    data_digits.valid = q.newObject()
    data_digits.test = q.newObject()

    data_digits.train.x = train_set_x
    data_digits.valid.x = valid_set_x
    data_digits.test.x = test_set_x
    data_digits.train.y = train_set_y
    data_digits.valid.y = valid_set_y
    data_digits.test.y = test_set_y
    data_digits.dim = (28, 28)
    data_digits.dim_out = 10
    t1 = time.time()
    print "Data loaded in %0.2f seconds" % (t1 - t0)
    return data_digits
Beispiel #6
0
def mnist_data(dataset='mnist.pkl.gz'):
    t0 = time.time()
    print "Loading digits data ..."
    dataset = "../data/"+dataset
    f = gzip.open(dataset, 'rb')
    train_set, valid_set, test_set = cPickle.load(f)
    f.close()
    def shared_dataset(data_xy, borrow=True):

        data_x, data_y = data_xy
        shared_x = theano.shared(np.asarray(data_x,
                    dtype=theano.config.floatX), borrow=borrow)
        shared_y = theano.shared(np.asarray(data_y,
                    dtype=np.int32), borrow=borrow)
        return shared_x, shared_y

    test_set_x, test_set_y = shared_dataset(test_set)
    valid_set_x, valid_set_y = shared_dataset(valid_set)
    train_set_x, train_set_y = shared_dataset(train_set)

    data_digits = q.newObject()
    data_digits.train = q.newObject()
    data_digits.valid = q.newObject()
    data_digits.test = q.newObject()
    
    data_digits.train.x = train_set_x
    data_digits.valid.x = valid_set_x
    data_digits.test.x  = test_set_x
    data_digits.train.y = train_set_y
    data_digits.valid.y = valid_set_y
    data_digits.test.y  = test_set_y
    data_digits.dim = (28, 28)
    data_digits.dim_out = 10
    t1 = time.time()
    print "Data loaded in %0.2f seconds" % ( t1-t0)
    return data_digits
Beispiel #7
0
    def compile_model_ops(self):
        self.model_ops = q.newObject()
        # Define the model training and testing functions for both
        # minibatches and arbitrary data
        index = self.model.index
        x = self.model.x
        y = self.model.y
        X = self.model.X
        Y = self.model.Y
        """
        self.model_ops.train_model = theano.function(inputs=[X, Y],
                                            outputs=self.model.errors(y),
                                            updates=self.model.updates,
                                            givens={
                                                x: X,
                                                y: Y})
                                                "
        self.model_ops.test_model2 = theano.function(  inputs=[],
                                            outputs=self.model.errors(y),
                                            givens={
                                                x: self.model.data.test.x,
                                                y: self.model.data.test.y})
                                                
        self.model_ops.validate_model2 = theano.function(inputs=[],
                                            outputs=self.model.errors(y),
                                            givens={
                                                x: self.model.data.valid.x,
                                                y: self.model.data.valid.y})                                                 
                                                """
        self.params = self.model.params

        self.acc_cost = self.model.acc_cost
        self.L1 = np.sum([abs(param).sum() for param in self.params])
        self.L2 = np.sum([(param**2).sum() for param in self.params])
        self.reg_cost = self.L1_reg * self.L1 + self.L2_reg * self.L2
        self.cost = self.acc_cost + self.reg_cost

        self.grads = [
            T.grad(cost=self.cost, wrt=param) for param in self.params
        ]

        self.updates = [(param, param - self.lr * grad)
                        for param, grad in zip(self.params, self.grads)]

        self.model_ops.minibatch = q.newObject()
        self.model_ops.minibatch.test = theano.function(
            inputs=[index],
            outputs=self.model.errors(y),
            givens={
                x:
                self.model.data.test.x[index * self.batch_size:(index + 1) *
                                       self.batch_size],
                y:
                self.model.data.test.y[index * self.batch_size:(index + 1) *
                                       self.batch_size]
            })

        self.model_ops.minibatch.validate = theano.function(
            inputs=[index],
            outputs=self.model.errors(y),
            givens={
                x:
                self.model.data.valid.x[index * self.batch_size:(index + 1) *
                                        self.batch_size],
                y:
                self.model.data.valid.y[index * self.batch_size:(index + 1) *
                                        self.batch_size]
            })
        self.model_ops.minibatch.train = theano.function(
            inputs=[index],
            outputs=self.cost,
            updates=self.updates,
            givens={
                x:
                self.model.data.train.x[index * self.batch_size:(index + 1) *
                                        self.batch_size],
                y:
                self.model.data.train.y[index * self.batch_size:(index + 1) *
                                        self.batch_size]
            })
Beispiel #8
0
    def __init__(self,
                 n_in,
                 n_out,
                 data,
                 layerSpecs,
                 batch_size,
                 rng,
                 learning_rate,
                 activation=T.tanh,
                 L1_reg=0.,
                 L2_reg=0.0001):

        # Define classifier independent stuff
        # preliminaries
        #######################################################################

        index = T.lscalar()  # index to a [mini]batch
        x = T.matrix('x')
        y = T.ivector('y')
        X = T.matrix('X')
        Y = T.ivector('Y')
        self.lr = theano.shared(learning_rate)

        if (rng is None):
            rng = np.random.RandomState(23455)

        self.batch_size = batch_size
        self.n_train_batches = data.train.x.get_value(
            borrow=True).shape[0] / batch_size
        self.n_valid_batches = data.valid.x.get_value(
            borrow=True).shape[0] / batch_size
        self.n_test_batches = data.test.x.get_value(
            borrow=True).shape[0] / batch_size

        self.data = data

        # Define the model structure
        #######################################################################

        layerClasses = {
            'conv': voo.layers.ConvLayer,
            'hidden': voo.layers.FullyConnectedLayer,
            'hidden_decomp': voo.layers.decomp.FullyConnectedDecompLayer,
            'logistic': voo.layers.LogisticRegressionLayer,
            'pooling': voo.layers.PoolingLayer,
            'dropout': voo.layers.DropOutLayer
        }

        Layers = []

        input_layer = voo.layers.InputLayer(x, n_in, batch_size)
        Layers.append(input_layer)
        prev_layer = input_layer
        for layer_idx in range(len(layerSpecs)):
            # First we get the layer class and specs
            layerType, layerConfig = layerSpecs[layer_idx]
            layerClass = layerClasses[layerType]
            new_layer = voo.layers.GenLayer(layerClass, prev_layer, batch_size,
                                            rng, layerConfig)

            Layers.append(new_layer)
            prev_layer = new_layer

        self.Layers = Layers

        #
        # Define all the model specifics that relate to the parameters:
        # vector of parameters, gradients, errors, updates, accuracy cost
        # regularization cost, overall cost
        ######################################################################

        self.params = []
        for layer in Layers:
            self.params.extend(layer.params)

        self.acc_cost = self.Layers[-1].negative_log_likelihood(y)
        self.L1 = np.sum([abs(param).sum() for param in self.params])
        self.L2 = np.sum([(param**2).sum() for param in self.params])
        self.reg_cost = L1_reg * self.L1 + L2_reg * self.L2
        self.cost = self.acc_cost + self.reg_cost

        self.grads = [
            T.grad(cost=self.cost, wrt=param) for param in self.params
        ]

        self.updates = [(param, param - self.lr * grad)
                        for param, grad in zip(self.params, self.grads)]

        # Define the model training and testing functions for both
        # minibatches and arbitrary data

        self.train_model = theano.function(inputs=[X, Y],
                                           outputs=self.errors(y),
                                           updates=self.updates,
                                           givens={
                                               x: X,
                                               y: Y
                                           })
        self.test_model2 = theano.function(inputs=[],
                                           outputs=self.errors(y),
                                           givens={
                                               x: self.data.test.x,
                                               y: self.data.test.y
                                           })

        self.validate_model2 = theano.function(inputs=[],
                                               outputs=self.errors(y),
                                               givens={
                                                   x: self.data.valid.x,
                                                   y: self.data.valid.y
                                               })

        self.minibatch = q.newObject()
        self.minibatch.test = theano.function(
            inputs=[index],
            outputs=self.errors(y),
            givens={
                x:
                self.data.test.x[index * self.batch_size:(index + 1) *
                                 self.batch_size],
                y:
                self.data.test.y[index * self.batch_size:(index + 1) *
                                 self.batch_size]
            })

        self.minibatch.validate = theano.function(
            inputs=[index],
            outputs=self.errors(y),
            givens={
                x:
                self.data.valid.x[index * self.batch_size:(index + 1) *
                                  self.batch_size],
                y:
                self.data.valid.y[index * self.batch_size:(index + 1) *
                                  self.batch_size]
            })
        self.minibatch.train = theano.function(
            inputs=[index],
            outputs=self.cost,
            updates=self.updates,
            givens={
                x:
                self.data.train.x[index * self.batch_size:(index + 1) *
                                  self.batch_size],
                y:
                self.data.train.y[index * self.batch_size:(index + 1) *
                                  self.batch_size]
            })
    def __init__(self, n_in, n_out, data, layerSpecs, batch_size, rng, learning_rate, activation=T.tanh, L1_reg=0., L2_reg=0.0001):

        # Define classifier independent stuff
        # preliminaries
        #######################################################################

        index = T.lscalar()  # index to a [mini]batch
        x = T.matrix('x')
        y = T.ivector('y')  
        X = T.matrix('X')  
        Y = T.ivector('Y')       
        self.lr = theano.shared(learning_rate)
        
        if (rng is None):
            rng = np.random.RandomState(23455)  
            
        self.batch_size = batch_size
        self.n_train_batches = data.train.x.get_value(borrow=True).shape[0] / batch_size
        self.n_valid_batches = data.valid.x.get_value(borrow=True).shape[0] / batch_size
        self.n_test_batches  = data.test.x.get_value (borrow=True).shape[0] / batch_size        
        
        self.data = data 
        
        
        # Define the model structure    
        #######################################################################
        
        layerClasses = {
            'conv': voo.layers.ConvLayer,
            'hidden': voo.layers.FullyConnectedLayer,
            'hidden_decomp': voo.layers.decomp.FullyConnectedDecompLayer,
            'logistic': voo.layers.LogisticRegressionLayer,
            'pooling': voo.layers.PoolingLayer,
            'dropout': voo.layers.DropOutLayer
            }
        
        Layers = []

        input_layer = voo.layers.InputLayer(x, n_in, batch_size)
        Layers.append(input_layer)
        prev_layer = input_layer
        for layer_idx in range(len(layerSpecs)):
            # First we get the layer class and specs
            layerType, layerConfig = layerSpecs[layer_idx]
            layerClass = layerClasses[layerType]           
            new_layer = voo.layers.GenLayer(layerClass, prev_layer, batch_size, rng, layerConfig)
            
            Layers.append(new_layer)
            prev_layer = new_layer
        
        self.Layers=Layers

        #
        # Define all the model specifics that relate to the parameters:
        # vector of parameters, gradients, errors, updates, accuracy cost
        # regularization cost, overall cost
        ######################################################################


        self.params = []
        for layer in Layers:
            self.params.extend(layer.params)
        

        self.acc_cost = self.Layers[-1].negative_log_likelihood(y)
        self.L1 = np.sum([abs(param).sum() for param in self.params])
        self.L2 = np.sum([(param**2).sum() for param in self.params])
        self.reg_cost = L1_reg * self.L1 + L2_reg * self.L2
        self.cost = self.acc_cost + self.reg_cost



        self.grads = [T.grad(cost=self.cost, wrt=param) for param in self.params]    
        
        self.updates = [(param, param - self.lr * grad) for
                    param, grad in zip(self.params, self.grads)]



        

        # Define the model training and testing functions for both
        # minibatches and arbitrary data
    
        self.train_model = theano.function(inputs=[X, Y],
                                            outputs=self.errors(y),
                                            updates=self.updates,
                                            givens={
                                                x: X,
                                                y: Y})
        self.test_model2 = theano.function(  inputs=[],
                                            outputs=self.errors(y),
                                            givens={
                                                x: self.data.test.x,
                                                y: self.data.test.y})
                                                
        self.validate_model2 = theano.function(inputs=[],
                                            outputs=self.errors(y),
                                            givens={
                                                x: self.data.valid.x,
                                                y: self.data.valid.y}) 
                                                        
        self.minibatch = q.newObject()        
        self.minibatch.test = theano.function(  inputs=[index],
                                            outputs=self.errors(y),
                                            givens={
                                                x: self.data.test.x[index * self.batch_size:(index + 1) * self.batch_size],
                                                y: self.data.test.y[index * self.batch_size:(index + 1) * self.batch_size]})
                                                
        self.minibatch.validate = theano.function(inputs=[index],
                                            outputs=self.errors(y),
                                            givens={
                                                x: self.data.valid.x[index * self.batch_size:(index + 1) * self.batch_size],
                                                y: self.data.valid.y[index * self.batch_size:(index + 1) * self.batch_size]}) 
        self.minibatch.train = theano.function(inputs=[index],
                                            outputs=self.cost,
                                            updates=self.updates,
                                            givens={
                                                x: self.data.train.x[index * self.batch_size:(index + 1) * self.batch_size],
                                                y: self.data.train.y[index * self.batch_size:(index + 1) * self.batch_size]})                  
Beispiel #10
0
            epoch = epoch + 1
            if epoch == max_epochs or patience <= iteration:
                done_looping = True
        end_time = time.clock()
        print(('Optimization complete with best validation score of %f %%,'
               'with test performance %f %%') %
              (best_validation_loss * 100., test_score * 100.))
        print 'The code run for %d epochs, with %f epochs/sec' % (
            epoch, 1. * epoch / (end_time - start_time))
        print('The code ran for %.1fs' % ((end_time - start_time)))


data_faces = faces_data()
mnist = load_data('mnist.pkl.gz')

data_digits = q.newObject()
data_digits.train = q.newObject()
data_digits.valid = q.newObject()
data_digits.test = q.newObject()

data_digits.train.x = mnist[0][0]
data_digits.valid.x = mnist[1][0]
data_digits.test.x = mnist[2][0]

data_digits.train.y = mnist[0][1]
data_digits.valid.y = mnist[1][1]
data_digits.test.y = mnist[2][1]

print "Ready, Cap\'n!"

M = LogisticRegression_model(28 * 28,