class MLP: def __init__(self, input, label, n_in, n_hidden, n_out, rng=None): self.x = input self.y = label if rng is None: rng = numpy.random.RandomState(1234) # construct hidden_layer self.hidden_layer = HiddenLayer(input=self.x, n_in=n_in, n_out=n_hidden, rng=rng, activation=tanh) # construct log_layer self.log_layer = LogisticRegression(input=self.hidden_layer.output, label=self.y, n_in=n_hidden, n_out=n_out) def train(self): # forward hidden_layer layer_input = self.hidden_layer.forward() self.log_layer.train(input=layer_input) # backward hidden_layer self.hidden_layer.backward(prev_layer=self.log_layer) def predict(self, x): x = self.hidden_layer.output(input=x) return self.log_layer.predict(x)
def __init__(self, input=None, label=None, n_ins=2, hidden_layer_sizes=[3, 3], n_outs=2, rng=None): self.x = input self.y = label self.sigmoid_layers = [] self.dA_layers = [] self.n_layers = len(hidden_layer_sizes) # = len(self.rbm_layers) if rng is None: rng = numpy.random.RandomState(1234) assert self.n_layers > 0 # construct multi-layer for i in xrange(self.n_layers): # layer_size if i == 0: input_size = n_ins else: input_size = hidden_layer_sizes[i - 1] # layer_input if i == 0: layer_input = self.x else: layer_input = self.sigmoid_layers[-1].sample_h_given_v() # construct sigmoid_layer sigmoid_layer = HiddenLayer(input=layer_input, n_in=input_size, n_out=hidden_layer_sizes[i], rng=rng, activation=sigmoid) self.sigmoid_layers.append(sigmoid_layer) # construct dA_layers dA_layer = dA(input=layer_input, n_visible=input_size, n_hidden=hidden_layer_sizes[i], W=sigmoid_layer.W, hbias=sigmoid_layer.b) self.dA_layers.append(dA_layer) # layer for output using Logistic Regression self.log_layer = LogisticRegression( input=self.sigmoid_layers[-1].sample_h_given_v(), label=self.y, n_in=hidden_layer_sizes[-1], n_out=n_outs) # finetune cost: the negative log likelihood of the logistic regression layer self.finetune_cost = self.log_layer.negative_log_likelihood()
def __init__(self, N, label, n_hidden, n_out, image_size, channel, n_kernels, kernel_sizes, pool_sizes, rng=None, activation=ReLU): if rng is None: rng = numpy.random.RandomState(1234) self.N = N self.n_hidden = n_hidden self.n_kernels = n_kernels self.pool_sizes = pool_sizes self.conv_layers = [] self.conv_sizes = [] # construct 1st conv_layer conv_layer0 = ConvPoolLayer(N, image_size, channel, n_kernels[0], kernel_sizes[0], pool_sizes[0], rng, activation) self.conv_layers.append(conv_layer0) conv_size = [ (image_size[0] - kernel_sizes[0][0] + 1) / pool_sizes[0][0], (image_size[1] - kernel_sizes[0][1] + 1) / pool_sizes[0][1] ] self.conv_sizes.append(conv_size) # construct 2nd conv_layer conv_layer1 = ConvPoolLayer(N, conv_size, n_kernels[0], n_kernels[1], kernel_sizes[1], pool_sizes[1], rng, activation) self.conv_layers.append(conv_layer1) conv_size = [ (conv_size[0] - kernel_sizes[1][0] + 1) / pool_sizes[1][0], (conv_size[1] - kernel_sizes[1][0] + 1) / pool_sizes[1][1] ] self.conv_sizes.append(conv_size) # construct hidden_layer self.hidden_layer = HiddenLayer( None, n_kernels[-1] * conv_size[0] * conv_size[1], n_hidden, None, None, rng, activation) # construct log_layer self.log_layer = LogisticRegression(None, label, n_hidden, n_out)
def __init__(self, rng, input, n_in, n_hidden, n_out): """Initialize the parameters for the multilayer perceptron :type rng: numpy.random.RandomState :param rng: a random number generator used to initialize weights :type input: theano.tensor.TensorType :param input: symbolic variable that describes the input of the architecture (one minibatch) :type n_in: int :param n_in: number of input units, the dimension of the space in which the datapoints lie :type n_hidden: int :param n_hidden: number of hidden units :type n_out: int :param n_out: number of output units, the dimension of the space in which the labels lie """ # Since we are dealing with a one hidden layer MLP, this will # translate into a TanhLayer connected to the LogisticRegression # layer; this can be replaced by a SigmoidalLayer, or a layer # implementing any other nonlinearity self.hiddenLayer = HiddenLayer(rng=rng, input=input, n_in=n_in, n_out=n_hidden, activation=T.tanh) # The logistic regression layer gets as input the hidden units # of the hidden layer self.logRegressionLayer = LogisticRegression( input=self.hiddenLayer.output, n_in=n_hidden, n_out=n_out) # L1 norm ; one regularization option is to enforce L1 norm to # be small self.L1 = abs(self.hiddenLayer.W).sum() \ + abs(self.logRegressionLayer.W).sum() # square of L2 norm ; one regularization option is to enforce # square of L2 norm to be small self.L2_sqr = (self.hiddenLayer.W ** 2).sum() \ + (self.logRegressionLayer.W ** 2).sum() # negative log likelihood of the MLP is given by the negative # log likelihood of the output of the model, computed in the # logistic regression layer self.negative_log_likelihood = self.logRegressionLayer.negative_log_likelihood # same holds for the function computing the number of errors self.errors = self.logRegressionLayer.errors # the parameters of the model are the parameters of the two layer it is # made out of self.params = self.hiddenLayer.params + self.logRegressionLayer.params
def __init__(self, input, label, n_in, n_hidden, n_out, rng=None): self.x = input self.y = label if rng is None: rng = numpy.random.RandomState(1234) # construct hidden_layer self.hidden_layer = HiddenLayer(input=self.x, n_in=n_in, n_out=n_hidden, rng=rng, activation=tanh) # construct log_layer self.log_layer = LogisticRegression(input=self.hidden_layer.output, label=self.y, n_in=n_hidden, n_out=n_out)
def cloicTest(self): trainDataFile = codecs.open( "../data/LRTestData/horseColicTraining.txt", 'r', 'utf-8') trainDataset = [] trainDataLabel = [] ''' 加载训练数据到数据集,并训练数据,得出逻辑回归参数 ''' for line in trainDataFile.readlines(): curLine = line.strip().split('\t') lineArr = [] for i in range(21): lineArr.append(float(curLine[i])) trainDataset.append(lineArr) trainDataLabel.append(float(lineArr[-1])) trainWeights = LogisticRegression().randomGradAscent( array(trainDataset), trainDataLabel, 150) print trainWeights ''' 使用测试集来对回归模型进行测试,并计算该模型的错误率 ''' errorCount = 0.0 numTestVec = 0.0 frTest = codecs.open("../data/LRTestData/horseColicTest.txt", 'r', 'utf-8') for line in frTest.readlines(): numTestVec += 1 curLine = line.strip().split('\t') lineArr = [] for i in range(21): lineArr.append(float(curLine[i])) if int(self.checkVector(array(lineArr), trainWeights)) != int( curLine[-1]): errorCount += 1 errorRate = (float(errorCount / numTestVec)) print "the error rate is %f" % errorRate return errorRate
def evaluate_lenet5(learning_rate=0.1, n_epochs=200, dataset='emotion', nkerns=[20, 50], batch_size=500): """ Demonstrates lenet on MNIST dataset :type learning_rate: float :param learning_rate: learning rate used (factor for the stochastic gradient) :type n_epochs: int :param n_epochs: maximal number of epochs to run the optimizer :type dataset: string :param dataset: path to the dataset used for training /testing (MNIST here) :type nkerns: list of ints :param nkerns: number of kernels on each layer """ rng = numpy.random.RandomState(23455) datasets = Ld.load_share(dataset) if dataset == 'mnist': ishape = (28, 28) # this is the size of MNIST images num_label = 10 elif dataset == 'emotion': ishape = (48, 48) # this is the size of MNIST images num_label = 7 train_set_x, train_set_y = datasets[0] valid_set_x, valid_set_y = datasets[1] test_set_x, test_set_y = datasets[2] # compute number of minibatches for training, validation and testing n_train_batches = train_set_x.get_value(borrow=True).shape[0] n_valid_batches = valid_set_x.get_value(borrow=True).shape[0] n_test_batches = test_set_x.get_value(borrow=True).shape[0] n_train_batches /= batch_size n_valid_batches /= batch_size n_test_batches /= batch_size # allocate symbolic variables for the data index = T.lscalar() # index to a [mini]batch x = T.matrix('x') # the data is presented as rasterized images y = T.ivector('y') # the labels are presented as 1D vector of # [int] labels ###################### # BUILD ACTUAL MODEL # ###################### print '... building the model' # Reshape matrix of rasterized images of shape (batch_size,28*28) # to a 4D tensor, compatible with our LeNetConvPoolLayer layer0_input = x.reshape((batch_size, 1, ishape[0], ishape[1])) # Construct the first convolutional pooling layer: # filtering reduces the image size to (28-5+1,28-5+1)=(24,24) # maxpooling reduces this further to (24/2,24/2) = (12,12) # 4D output tensor is thus of shape (batch_size,nkerns[0],12,12) layer0 = LeNetConvPoolLayer(rng, input=layer0_input, image_shape=(batch_size, 1, ishape[0], ishape[1]), filter_shape=(nkerns[0], 1, 5, 5), poolsize=(2, 2)) # Construct the second convolutional pooling layer # filtering reduces the image size to (12-5+1,12-5+1)=(8,8) # maxpooling reduces this further to (8/2,8/2) = (4,4) # 4D output tensor is thus of shape (nkerns[0],nkerns[1],4,4) if dataset == 'emotion': layer05 = LeNetConvPoolLayer(rng, input=layer0.output, image_shape=(batch_size, nkerns[0], 22, 22), filter_shape=(nkerns[0], nkerns[0], 3, 3), poolsize=(2, 2)) layer1 = LeNetConvPoolLayer(rng, input=layer05.output, image_shape=(batch_size, nkerns[0], 10, 10), filter_shape=(nkerns[1], nkerns[0], 3, 3), poolsize=(2, 2)) elif dataset == 'mnist': layer1 = LeNetConvPoolLayer(rng, input=layer0.output, image_shape=(batch_size, nkerns[0], 12, 12), filter_shape=(nkerns[1], nkerns[0], 5, 5), poolsize=(2, 2)) # the TanhLayer being fully-connected, it operates on 2D matrices of # shape (batch_size,num_pixels) (i.e matrix of rasterized images). # This will generate a matrix of shape (20,32*4*4) = (20,512) layer2_input = layer1.output.flatten(2) # construct a fully-connected sigmoidal layer layer2 = HiddenLayer(rng, input=layer2_input, n_in=nkerns[1] * 4 * 4, n_out=500, activation=T.tanh) # classify the values of the fully-connected sigmoidal layer layer3 = LogisticRegression(input=layer2.output, n_in=500, n_out=num_label) # the cost we minimize during training is the NLL of the model cost = layer3.negative_log_likelihood(y) # create a function to compute the mistakes that are made by the model test_model = theano.function( [index], layer3.errors(y), givens={ x: test_set_x[index * batch_size:(index + 1) * batch_size], y: test_set_y[index * batch_size:(index + 1) * batch_size] }) validate_model = theano.function( [index], layer3.errors(y), givens={ x: valid_set_x[index * batch_size:(index + 1) * batch_size], y: valid_set_y[index * batch_size:(index + 1) * batch_size] }) # create a list of all model parameters to be fit by gradient descent params = layer3.params + layer2.params + layer1.params + layer0.params # create a list of gradients for all model parameters grads = T.grad(cost, params) # train_model is a function that updates the model parameters by # SGD Since this model has many parameters, it would be tedious to # manually create an update rule for each model parameter. We thus # create the updates list by automatically looping over all # (params[i],grads[i]) pairs. updates = [] for param_i, grad_i in zip(params, grads): updates.append((param_i, param_i - learning_rate * grad_i)) train_model = theano.function( [index], cost, updates=updates, givens={ x: train_set_x[index * batch_size:(index + 1) * batch_size], y: train_set_y[index * batch_size:(index + 1) * batch_size] }) ############### # TRAIN MODEL # ############### print '... training' # early-stopping parameters patience = 10000 # look as this many examples regardless patience_increase = 2 # wait this much longer when a new best is # found improvement_threshold = 0.995 # a relative improvement of this much is # considered significant validation_frequency = min(n_train_batches, patience / 2) # go through this many # minibatche before checking the network # on the validation set; in this case we # check every epoch best_params = None best_validation_loss = numpy.inf best_iter = 0 test_score = 0. start_time = time.clock() epoch = 0 done_looping = False while (epoch < n_epochs) and (not done_looping): epoch = epoch + 1 for minibatch_index in xrange(n_train_batches): iter = (epoch - 1) * n_train_batches + minibatch_index if iter % 100 == 0: print 'training @ iter = ', iter cost_ij = train_model(minibatch_index) if (iter + 1) % validation_frequency == 0: # compute zero-one loss on validation set validation_losses = [ validate_model(i) for i in xrange(n_valid_batches) ] this_validation_loss = numpy.mean(validation_losses) print('epoch %i, minibatch %i/%i, validation error %f %%' % \ (epoch, minibatch_index + 1, n_train_batches, \ this_validation_loss * 100.)) # if we got the best validation score until now if this_validation_loss < best_validation_loss: #improve patience if loss improvement is good enough if this_validation_loss < best_validation_loss * \ improvement_threshold: patience = max(patience, iter * patience_increase) # save best validation score and iteration number best_validation_loss = this_validation_loss best_iter = iter # test it on the test set test_losses = [ test_model(i) for i in xrange(n_test_batches) ] test_score = numpy.mean(test_losses) print( (' epoch %i, minibatch %i/%i, test error of best ' 'model %f %%') % (epoch, minibatch_index + 1, n_train_batches, test_score * 100.)) if patience <= iter: done_looping = True break end_time = time.clock() print('Optimization complete.') print('Best validation score of %f %% obtained at iteration %i,'\ 'with test performance %f %%' % (best_validation_loss * 100., best_iter + 1, test_score * 100.)) print >> sys.stderr, ('The code for file ' + os.path.split(__file__)[1] + ' ran for %.2fm' % ((end_time - start_time) / 60.))
class SdA: def __init__(self, input=None, label=None, n_ins=2, hidden_layer_sizes=[3, 3], n_outs=2, rng=None): self.x = input self.y = label self.sigmoid_layers = [] self.dA_layers = [] self.n_layers = len(hidden_layer_sizes) # = len(self.rbm_layers) if rng is None: rng = numpy.random.RandomState(1234) assert self.n_layers > 0 # construct multi-layer for i in xrange(self.n_layers): # layer_size if i == 0: input_size = n_ins else: input_size = hidden_layer_sizes[i - 1] # layer_input if i == 0: layer_input = self.x else: layer_input = self.sigmoid_layers[-1].sample_h_given_v() # construct sigmoid_layer sigmoid_layer = HiddenLayer(input=layer_input, n_in=input_size, n_out=hidden_layer_sizes[i], rng=rng, activation=sigmoid) self.sigmoid_layers.append(sigmoid_layer) # construct dA_layers dA_layer = dA(input=layer_input, n_visible=input_size, n_hidden=hidden_layer_sizes[i], W=sigmoid_layer.W, hbias=sigmoid_layer.b) self.dA_layers.append(dA_layer) # layer for output using Logistic Regression self.log_layer = LogisticRegression( input=self.sigmoid_layers[-1].sample_h_given_v(), label=self.y, n_in=hidden_layer_sizes[-1], n_out=n_outs) # finetune cost: the negative log likelihood of the logistic regression layer self.finetune_cost = self.log_layer.negative_log_likelihood() def pretrain(self, lr=0.1, corruption_level=0.3, epochs=100): for i in xrange(self.n_layers): if i == 0: layer_input = self.x else: layer_input = self.sigmoid_layers[i - 1].sample_h_given_v( layer_input) da = self.dA_layers[i] for epoch in xrange(epochs): da.train(lr=lr, corruption_level=corruption_level, input=layer_input) def finetune(self, lr=0.1, epochs=100): layer_input = self.sigmoid_layers[-1].sample_h_given_v() # train log_layer epoch = 0 while epoch < epochs: self.log_layer.train(lr=lr, input=layer_input) lr *= 0.95 epoch += 1 def predict(self, x): layer_input = x for i in xrange(self.n_layers): sigmoid_layer = self.sigmoid_layers[i] layer_input = sigmoid_layer.output(input=layer_input) return self.log_layer.predict(layer_input)
def checkVector(self, inX, weights): prob = LogisticRegression().sigmoid(sum(inX * weights)) if prob > 0.5: return 1.0 else: return 0.0
def __init__(self, n_conv_layers=2, filter_shapes=((20, 1, 5, 5), (50, 20, 5, 5)), image_shape=(500, 1, 28, 28), poolsize=(2, 2), n_hidden_neurons = 500, learning_rate=0.1, dataset_name='mnist.pkl.gz'): """ Инициализирует сеть, в соответствии с переданными параметрами. :type n_conv_layers: int > 0 :param n_conv_layers: количество свёрточных слоёв :type filter_shapes: tuple, длины n_conv_layers, состоящий из описаний фильтров(tuple длины 4) :param filter_shapes: каждый filter_shape имеет следующий формат: (количество фильтров, количество входных каналов, высота фильтра, ширина фильтра) количество фильтров соотвествует количеству выходных каналов :type image_shape: tuple или list длины 4 :param image_shape: (размер одного пакета, количество входных каналов(карт признаков), высота изображения, ширина изображения) :param poolsize: tuple длины 2 :type n_hidden_neurons: int > 0 :param n_hidden_neurons: количество нейронов в полносвязном скрытом слое :type learning_rate: double :param learning_rate: параметр, отвественный за скорость обучения методом градиентного спуска """ # Проверка корректности входных параметров assert len(image_shape) == 4 assert len(filter_shapes) == n_conv_layers assert len(poolsize) == 2 self.n_conv_layers = n_conv_layers self.batch_size = image_shape[0] self.rng = numpy.random.RandomState(23455) # резервируем символические переменные для данных ###################### # BUILD ACTUAL MODEL # ###################### print '... building the model' x = T.matrix('x') # the data is presented as rasterized images y = T.ivector('y') # the labels are presented as 1D vector of [int] labels layer_input = x.reshape(image_shape) # лист из параметров скрытых слоёв params = [] # Инициализирую сверточные слоя, в соответствии с архитектурой, указанной в параметрах for i in xrange(n_conv_layers): batch_size, n_filter_maps, image_height, image_weight = image_shape n_filters, n_input_filter_maps, filer_heigth, filter_weight = filter_shapes[i] pool_height, pool_weight = poolsize # Построение свёртного слоя + пулинг conv_layer = LeNetConvPoolLayer( self.rng, input=layer_input, image_shape=image_shape, filter_shape=filter_shapes[i], poolsize=poolsize ) layer_input = conv_layer.output # Сохраняю параметры сети params.append(conv_layer.params) # Фильтр сокращает размер изображение, новый размер: (28-5+1 , 28-5+1) = (24, 24) image_height = image_height - filer_heigth + 1 image_weight = image_weight - filter_weight + 1 # maxpooling также сокращает размер, новый размер: (24/2, 24/2) = (12, 12) image_height /= pool_height image_weight /= pool_weight # Таким образом новый размер тензора изображения: (batch_size, 20, 12, 12) image_shape = (batch_size, n_filters, image_height, image_weight) # Результат прохождения изображений через свёрточные слои записан в layer_input # Размер потока данный теперь соответсвует image_shape # Если в сети два слоя с дефолтными параметрами, то выходное изображением имеет формат: # filtering reduces the image size to (12-5+1, 12-5+1) = (8, 8) # maxpooling reduces this further to (8/2, 8/2) = (4, 4) # 4D output tensor is thus of shape (batch_size, 50, 4, 4) batch_size, n_filter_maps, image_height, image_weight = image_shape # Полносвязный скытый слой принимает на вход 2D матрицу, но у нас есть 4D тензор # Поэтому превращаем тензор в матрицу размера (batch_size, n_filters * image_height * image_weight) fully_connected_layer_input = layer_input.flatten(2) n_filter_maps = image_shape[1] # n_in: размерность входа # n_out: количество нейронов в скрытом слое # Построение полносвязного слоя # TODO: тут по-хорошему можно задавать активационную функцию в качестве параметра fully_connected_layer = HiddenLayer( self.rng, input=fully_connected_layer_input, n_in=n_filter_maps * image_height * image_weight, n_out=n_hidden_neurons, activation=T.tanh ) # classify the values of the fully-connected sigmoidal layer logistic_regression_layer = LogisticRegression(input=fully_connected_layer.output, n_in=n_hidden_neurons, n_out=10) index = T.lscalar() # индекс пакета x_set = T.matrix('x_set') # Функция, распознающая изображения (используется уже после обучения) self.prediction_model = theano.function( [index, x_set], outputs=logistic_regression_layer.y_pred, givens={ x: x_set[index * self.batch_size: (index + 1) * self.batch_size] } ) # the cost we minimize during training is the NLL of the model cost = logistic_regression_layer.negative_log_likelihood(y) self.load(dataset_name=dataset_name) # Создаём функцию, подсчитывающую ошибку модели self.test_model = theano.function( [index], logistic_regression_layer.errors(y), givens={ x: self.test_set_x[index * self.batch_size: (index + 1) * self.batch_size], y: self.test_set_y[index * self.batch_size: (index + 1) * self.batch_size] } ) self.validate_model = theano.function( [index], logistic_regression_layer.errors(y), givens={ x: self.valid_set_x[index * self.batch_size: (index + 1) * self.batch_size], y: self.valid_set_y[index * self.batch_size: (index + 1) * self.batch_size] } ) self.inverted_params = logistic_regression_layer.params + fully_connected_layer.params for i in xrange(n_conv_layers - 1, -1, -1): self.inverted_params += params[i] # Создаём список градиентов для всех параметров модели grads = T.grad(cost, self.inverted_params) # train_model это функция, которая обновляет параметры модели с помощью SGD # Так как модель имеет много парамметров, было бы утомтельным вручную создавать правила обновления # для каждой модели, поэтому мы создали updates list для автоматического прохождения по парам # (params[i], grads[i]) updates = [ (param_i, param_i - learning_rate * grad_i) for param_i, grad_i in zip(self.inverted_params, grads) ] self.train_model = theano.function( [index], cost, updates=updates, givens={ x: self.train_set_x[index * self.batch_size: (index + 1) * self.batch_size], y: self.train_set_y[index * self.batch_size: (index + 1) * self.batch_size] } ) set_x = T.matrix("set_x") self.predict = theano.function( [set_x], logistic_regression_layer.y_pred, givens={ x: set_x } )
class DBN: def __init__(self, input=None, label=None, n_ins=2, hidden_layer_sizes=[3, 3], n_outs=2, rng=None): self.x = input self.y = label self.sigmoid_layers = [] self.rbm_layers = [] self.n_layers = len(hidden_layer_sizes) # = len(self.rbm_layers) if rng is None: rng = numpy.random.RandomState(1234) assert self.n_layers > 0 # construct multi-layer for i in xrange(self.n_layers): # layer_size if i == 0: input_size = n_ins else: input_size = hidden_layer_sizes[i - 1] # layer_input if i == 0: layer_input = self.x else: layer_input = self.sigmoid_layers[-1].sample_h_given_v() # construct sigmoid_layer sigmoid_layer = HiddenLayer(input=layer_input, n_in=input_size, n_out=hidden_layer_sizes[i], rng=rng, activation=sigmoid) self.sigmoid_layers.append(sigmoid_layer) # construct rbm_layer rbm_layer = RBM(input=layer_input, n_visible=input_size, n_hidden=hidden_layer_sizes[i], W=sigmoid_layer.W, # W, b are shared hbias=sigmoid_layer.b) self.rbm_layers.append(rbm_layer) # layer for output using Logistic Regression self.log_layer = LogisticRegression(input=self.sigmoid_layers[-1].sample_h_given_v(), label=self.y, n_in=hidden_layer_sizes[-1], n_out=n_outs) # finetune cost: the negative log likelihood of the logistic regression layer self.finetune_cost = self.log_layer.negative_log_likelihood() def pretrain(self, lr=0.1, k=1, epochs=100): # pre-train layer-wise for i in xrange(self.n_layers): if i == 0: layer_input = self.x else: layer_input = self.sigmoid_layers[i-1].sample_h_given_v(layer_input) rbm = self.rbm_layers[i] for epoch in xrange(epochs): rbm.contrastive_divergence(lr=lr, k=k, input=layer_input) def finetune(self, lr=0.1, epochs=100): layer_input = self.sigmoid_layers[-1].sample_h_given_v() # train log_layer epoch = 0 done_looping = False while (epoch < epochs) and (not done_looping): self.log_layer.train(lr=lr, input=layer_input) lr *= 0.95 epoch += 1 def predict(self, x): layer_input = x for i in xrange(self.n_layers): sigmoid_layer = self.sigmoid_layers[i] layer_input = sigmoid_layer.output(input=layer_input) out = self.log_layer.predict(layer_input) return out
class CNN: def __init__(self, N, label, n_hidden, n_out, image_size, channel, n_kernels, kernel_sizes, pool_sizes, rng=None, activation=ReLU): if rng is None: rng = numpy.random.RandomState(1234) self.N = N self.n_hidden = n_hidden self.n_kernels = n_kernels self.pool_sizes = pool_sizes self.conv_layers = [] self.conv_sizes = [] # construct 1st conv_layer conv_layer0 = ConvPoolLayer(N, image_size, channel, n_kernels[0], kernel_sizes[0], pool_sizes[0], rng, activation) self.conv_layers.append(conv_layer0) conv_size = [ (image_size[0] - kernel_sizes[0][0] + 1) / pool_sizes[0][0], (image_size[1] - kernel_sizes[0][1] + 1) / pool_sizes[0][1] ] self.conv_sizes.append(conv_size) # construct 2nd conv_layer conv_layer1 = ConvPoolLayer(N, conv_size, n_kernels[0], n_kernels[1], kernel_sizes[1], pool_sizes[1], rng, activation) self.conv_layers.append(conv_layer1) conv_size = [ (conv_size[0] - kernel_sizes[1][0] + 1) / pool_sizes[1][0], (conv_size[1] - kernel_sizes[1][0] + 1) / pool_sizes[1][1] ] self.conv_sizes.append(conv_size) # construct hidden_layer self.hidden_layer = HiddenLayer( None, n_kernels[-1] * conv_size[0] * conv_size[1], n_hidden, None, None, rng, activation) # construct log_layer self.log_layer = LogisticRegression(None, label, n_hidden, n_out) # def train(self, epochs, learning_rate, input=None): def train(self, epochs, learning_rate, input, test_input=None): for epoch in xrange(epochs): if (epoch + 1) % 5 == 0: print 'iter = %d/%d' % (epoch + 1, epochs) print print '------------------' print 'TEST PROCESSING...' print self.predict(test_input) print '------------------' print # forward first conv layer pooled_X = self.conv_layers[0].forward(input=input) # forward second conv layer pooled_X = self.conv_layers[1].forward(input=pooled_X) # flatten input layer_input = self.flatten(pooled_X) # forward hidden layer layer_input = self.hidden_layer.forward(input=layer_input) # forward & backward logistic layer self.log_layer.train(lr=learning_rate, input=layer_input) # backward hidden layer self.hidden_layer.backward(prev_layer=self.log_layer, lr=learning_rate) flatten_size = self.n_kernels[-1] * self.conv_sizes[-1][ 0] * self.conv_sizes[-1][1] delta_flatten = numpy.zeros((self.N, flatten_size)) for n in xrange(self.N): for i in xrange(flatten_size): for j in xrange(self.n_hidden): delta_flatten[n][i] += self.hidden_layer.W[i][ j] * self.hidden_layer.d_y[n][j] # unflatten delta delta = numpy.zeros( (len(delta_flatten), self.n_kernels[-1], self.conv_sizes[-1][0], self.conv_sizes[-1][1])) for n in xrange(len(delta)): index = 0 for k in xrange(self.n_kernels[-1]): for i in xrange(self.conv_sizes[-1][0]): for j in xrange(self.conv_sizes[-1][1]): delta[n][k][i][j] = delta_flatten[n][index] index += 1 # backward second conv layer delta = self.conv_layers[1].backward(delta, self.conv_sizes[1], learning_rate) # backward first conv layer self.conv_layers[0].backward(delta, self.conv_sizes[0], learning_rate) def flatten(self, input): flatten_size = self.n_kernels[-1] * self.conv_sizes[-1][ 0] * self.conv_sizes[-1][1] flattened_input = numpy.zeros((len(input), flatten_size)) for n in xrange(len(flattened_input)): index = 0 for k in xrange(self.n_kernels[-1]): for i in xrange(self.conv_sizes[-1][0]): for j in xrange(self.conv_sizes[-1][1]): flattened_input[n][index] = input[n][k][i][j] index += 1 # print flattened_input return flattened_input def predict(self, x): pooled_X = self.conv_layers[0].forward(input=x) pooled_X = self.conv_layers[1].forward(input=pooled_X) layer_input = self.flatten(pooled_X) x = self.hidden_layer.output(input=layer_input) return self.log_layer.predict(x)
def evaluate_lenet5(learning_rate=0.1, n_epochs=200, dataset="emotion", nkerns=[20, 50], batch_size=500): """ Demonstrates lenet on MNIST dataset :type learning_rate: float :param learning_rate: learning rate used (factor for the stochastic gradient) :type n_epochs: int :param n_epochs: maximal number of epochs to run the optimizer :type dataset: string :param dataset: path to the dataset used for training /testing (MNIST here) :type nkerns: list of ints :param nkerns: number of kernels on each layer """ rng = numpy.random.RandomState(23455) datasets = Ld.load_share(dataset) if dataset == "mnist": ishape = (28, 28) # this is the size of MNIST images num_label = 10 elif dataset == "emotion": ishape = (48, 48) # this is the size of MNIST images num_label = 7 train_set_x, train_set_y = datasets[0] valid_set_x, valid_set_y = datasets[1] test_set_x, test_set_y = datasets[2] # compute number of minibatches for training, validation and testing n_train_batches = train_set_x.get_value(borrow=True).shape[0] n_valid_batches = valid_set_x.get_value(borrow=True).shape[0] n_test_batches = test_set_x.get_value(borrow=True).shape[0] n_train_batches /= batch_size n_valid_batches /= batch_size n_test_batches /= batch_size # allocate symbolic variables for the data index = T.lscalar() # index to a [mini]batch x = T.matrix("x") # the data is presented as rasterized images y = T.ivector("y") # the labels are presented as 1D vector of # [int] labels ###################### # BUILD ACTUAL MODEL # ###################### print "... building the model" # Reshape matrix of rasterized images of shape (batch_size,28*28) # to a 4D tensor, compatible with our LeNetConvPoolLayer layer0_input = x.reshape((batch_size, 1, ishape[0], ishape[1])) # Construct the first convolutional pooling layer: # filtering reduces the image size to (28-5+1,28-5+1)=(24,24) # maxpooling reduces this further to (24/2,24/2) = (12,12) # 4D output tensor is thus of shape (batch_size,nkerns[0],12,12) layer0 = LeNetConvPoolLayer( rng, input=layer0_input, image_shape=(batch_size, 1, ishape[0], ishape[1]), filter_shape=(nkerns[0], 1, 5, 5), poolsize=(2, 2), ) # Construct the second convolutional pooling layer # filtering reduces the image size to (12-5+1,12-5+1)=(8,8) # maxpooling reduces this further to (8/2,8/2) = (4,4) # 4D output tensor is thus of shape (nkerns[0],nkerns[1],4,4) if dataset == "emotion": layer05 = LeNetConvPoolLayer( rng, input=layer0.output, image_shape=(batch_size, nkerns[0], 22, 22), filter_shape=(nkerns[0], nkerns[0], 3, 3), poolsize=(2, 2), ) layer1 = LeNetConvPoolLayer( rng, input=layer05.output, image_shape=(batch_size, nkerns[0], 10, 10), filter_shape=(nkerns[1], nkerns[0], 3, 3), poolsize=(2, 2), ) elif dataset == "mnist": layer1 = LeNetConvPoolLayer( rng, input=layer0.output, image_shape=(batch_size, nkerns[0], 12, 12), filter_shape=(nkerns[1], nkerns[0], 5, 5), poolsize=(2, 2), ) # the TanhLayer being fully-connected, it operates on 2D matrices of # shape (batch_size,num_pixels) (i.e matrix of rasterized images). # This will generate a matrix of shape (20,32*4*4) = (20,512) layer2_input = layer1.output.flatten(2) # construct a fully-connected sigmoidal layer layer2 = HiddenLayer(rng, input=layer2_input, n_in=nkerns[1] * 4 * 4, n_out=500, activation=T.tanh) # classify the values of the fully-connected sigmoidal layer layer3 = LogisticRegression(input=layer2.output, n_in=500, n_out=num_label) # the cost we minimize during training is the NLL of the model cost = layer3.negative_log_likelihood(y) # create a function to compute the mistakes that are made by the model test_model = theano.function( [index], layer3.errors(y), givens={ x: test_set_x[index * batch_size : (index + 1) * batch_size], y: test_set_y[index * batch_size : (index + 1) * batch_size], }, ) validate_model = theano.function( [index], layer3.errors(y), givens={ x: valid_set_x[index * batch_size : (index + 1) * batch_size], y: valid_set_y[index * batch_size : (index + 1) * batch_size], }, ) # create a list of all model parameters to be fit by gradient descent params = layer3.params + layer2.params + layer1.params + layer0.params # create a list of gradients for all model parameters grads = T.grad(cost, params) # train_model is a function that updates the model parameters by # SGD Since this model has many parameters, it would be tedious to # manually create an update rule for each model parameter. We thus # create the updates list by automatically looping over all # (params[i],grads[i]) pairs. updates = [] for param_i, grad_i in zip(params, grads): updates.append((param_i, param_i - learning_rate * grad_i)) train_model = theano.function( [index], cost, updates=updates, givens={ x: train_set_x[index * batch_size : (index + 1) * batch_size], y: train_set_y[index * batch_size : (index + 1) * batch_size], }, ) ############### # TRAIN MODEL # ############### print "... training" # early-stopping parameters patience = 10000 # look as this many examples regardless patience_increase = 2 # wait this much longer when a new best is # found improvement_threshold = 0.995 # a relative improvement of this much is # considered significant validation_frequency = min(n_train_batches, patience / 2) # go through this many # minibatche before checking the network # on the validation set; in this case we # check every epoch best_params = None best_validation_loss = numpy.inf best_iter = 0 test_score = 0.0 start_time = time.clock() epoch = 0 done_looping = False while (epoch < n_epochs) and (not done_looping): epoch = epoch + 1 for minibatch_index in xrange(n_train_batches): iter = (epoch - 1) * n_train_batches + minibatch_index if iter % 100 == 0: print "training @ iter = ", iter cost_ij = train_model(minibatch_index) if (iter + 1) % validation_frequency == 0: # compute zero-one loss on validation set validation_losses = [validate_model(i) for i in xrange(n_valid_batches)] this_validation_loss = numpy.mean(validation_losses) print ( "epoch %i, minibatch %i/%i, validation error %f %%" % (epoch, minibatch_index + 1, n_train_batches, this_validation_loss * 100.0) ) # if we got the best validation score until now if this_validation_loss < best_validation_loss: # improve patience if loss improvement is good enough if this_validation_loss < best_validation_loss * improvement_threshold: patience = max(patience, iter * patience_increase) # save best validation score and iteration number best_validation_loss = this_validation_loss best_iter = iter # test it on the test set test_losses = [test_model(i) for i in xrange(n_test_batches)] test_score = numpy.mean(test_losses) print ( (" epoch %i, minibatch %i/%i, test error of best " "model %f %%") % (epoch, minibatch_index + 1, n_train_batches, test_score * 100.0) ) if patience <= iter: done_looping = True break end_time = time.clock() print ("Optimization complete.") print ( "Best validation score of %f %% obtained at iteration %i," "with test performance %f %%" % (best_validation_loss * 100.0, best_iter + 1, test_score * 100.0) ) print >> sys.stderr, ( "The code for file " + os.path.split(__file__)[1] + " ran for %.2fm" % ((end_time - start_time) / 60.0) )