def __init__(self,batch_size,num_kernels,kernel_sizes,channel): self.layer0_input_size = (batch_size, 1, 100, 100) # fixed size from the data self.edge0 = (100 - kernel_sizes[0][0] + 1) / 2 self.layer0_output_size = (batch_size, num_kernels[0], self.edge0, self.edge0) # check that we have an even multiple of 2 before pooling assert ((100 - kernel_sizes[0][0] + 1) % 2) == 0 # The actual input is the placeholder x reshaped to the input size of the network self.layer0_input = x[channel].reshape(self.layer0_input_size) self.layer0 = LeNetConvPoolLayer(rng, input=self.layer0_input, image_shape=self.layer0_input_size, subsample= (1,1), filter_shape=(num_kernels[0], 1) + kernel_sizes[0], poolsize=(2, 2)) # ## Layer 1 - Second convolutional Layer # The second layer takes **`(batch_size, 10, 10, 10)`** as input, convolves it with 10 different **10x5x5** filters, and then downsamples (via maxpooling) in a **2x2** region. Each filter/maxpool combination produces an output of size **`(10-5+1)/2 = 3`** on a side. # The size of the second layer's output is therefore **`(batch_size, 10, 3, 3)`**. self.layer1_input_size = self.layer0_output_size self.edge1 = (self.edge0 - kernel_sizes[1][0] + 1) / 2 self.layer1_output_size = (batch_size, num_kernels[1], self.edge1, self.edge1) # check that we have an even multiple of 2 before pooling assert ((self.edge0 - kernel_sizes[1][0] + 1) % 2) == 0 self.layer1 = LeNetConvPoolLayer(rng, input=self.layer0.output, image_shape=self.layer1_input_size, subsample= (1,1), filter_shape=(num_kernels[1], num_kernels[0]) + kernel_sizes[1], poolsize=(2, 2))
def __init__(self, batch_size, num_kernels, kernel_sizes, channel, x, y): self.layer0_input_size = (batch_size, 1, 100, 100 ) # Input size from data self.edge0 = (100 - kernel_sizes[0][0] + 1) / 3 # New edge size self.layer0_output_size = (batch_size, num_kernels[0], self.edge0, self.edge0) # Output size assert ( (100 - kernel_sizes[0][0] + 1) % 3 ) == 0 # Check pooling size # Check pooling size # Initialize Layer 0 self.layer0_input = x.reshape(self.layer0_input_size) self.layer0 = LeNetConvPoolLayer(rng, input=self.layer0_input, image_shape=self.layer0_input_size, subsample=(1, 1), filter_shape=(num_kernels[0], 1) + kernel_sizes[0], poolsize=(3, 3)) self.layer1_input_size = self.layer0_output_size # Input size Layer 1 self.edge1 = (self.edge0 - kernel_sizes[1][0] + 1) / 2 # New edge size self.layer1_output_size = (batch_size, num_kernels[1], self.edge1, self.edge1) # Output size assert ((self.edge0 - kernel_sizes[1][0] + 1) % 2) == 0 # Check pooling size # Initialize Layer 1 self.layer1 = LeNetConvPoolLayer( rng, input=self.layer0.output, image_shape=self.layer1_input_size, subsample=(1, 1), filter_shape=(num_kernels[1], num_kernels[0]) + kernel_sizes[1], poolsize=(2, 2))
def load_trained_model(): global if_load_trained_model global train_model_route global layer0_input global layer0 global layer1 global layer2_input global layer2 global layer3 global test_results if_load_trained_model = 1 print "loading trained model for the first time" trained_model_pkl = open(train_model_route, 'r') trained_model_state_list = cPickle.load(trained_model_pkl) trained_model_state_array = numpy.load(trained_model_pkl) layer0_state, layer1_state, layer2_state, layer3_state = trained_model_state_array ishape = (50, 50) # this is the size of MNIST images ###################### # BUILD ACTUAL MODEL # ###################### print '... building the model' layer0_input = x.reshape((batch_size, 1, 50, 50)) # Construct the first convolutional pooling layer: # filtering reduces the image size to (28-5+1,28-5+1)=(24,24) # maxpooling reduces this further to (24/2,24/2) = (12,12) # 4D output tensor is thus of shape (batch_size,nkerns[0],12,12) layer0 = LeNetConvPoolLayer(rng, input=layer0_input, \ image_shape=(batch_size, 1, 50, 50), \ filter_shape=(nkerns[0], 1, 10, 10), poolsize=(2, 2), \ W=layer0_state[0], b=layer0_state[1] \ ) # Construct the second convolutional pooling layer # filtering reduces the image size to (12-5+1,12-5+1)=(8,8) # maxpooling reduces this further to (8/2,8/2) = (4,4) # 4D output tensor is thus of shape (nkerns[0],nkerns[1],4,4) layer1 = LeNetConvPoolLayer(rng, input=layer0.output, image_shape=(batch_size, nkerns[0], 20, 20), filter_shape=(nkerns[1], nkerns[0], 5, 5), poolsize=(2, 2), \ W=layer1_state[0], b=layer1_state[1] \ ) layer2_input = layer1.output.flatten(2) # construct a fully-connected sigmoidal layer layer2 = HiddenLayer(rng, input=layer2_input, n_in=nkerns[1] * 8 * 8, n_out=100, activation=T.tanh,\ W=layer2_state[0], b=layer2_state[1] \ ) # classify the values of the fully-connected sigmoidal layer layer3 = LogisticRegression(input=layer2.output, n_in=100, n_out=3, \ W=layer3_state[0], b=layer3_state[1] \ ) test_results = theano.function(inputs=[x], \ outputs= layer3.y_pred)
def convLayer0(input2, nkerns=[20, 50]): rng = numpy.random.RandomState(23455) x = T.matrix('x') # the data is presented as rasterized images layer0_input = x.reshape((1, 1, 50, 50)) print(type(layer0_input)) print(layer0_input.ndim) # Construct the first convolutional pooling layer: # filtering reduces the image size to (28-5+1,28-5+1)=(24,24) # maxpooling reduces this further to (24/2,24/2) = (12,12) # 4D output tensor is thus of shape (batch_size,nkerns[0],12,12) layer0 = LeNetConvPoolLayer(rng, input=layer0_input, image_shape=(1, 1, IMAGE_WIDTH, IMAGE_HEIGHT), filter_shape=(nkerns[0], 1, 5, 5), poolsize=(2, 2)) f = theano.function([x], layer0.output) print(type(input2)) output = f(input2) print(output.shape) for k in range(20): for i in range(23): for j in range(23): output[0][k][i][j] = output[0][k][i][j] * 256 for i in range(20): cv2.imwrite(Constants.IMG_DIR_TENCENT_SPLIT + str(i) + "111.jpg", output[0][i]) for i in range(22): for j in range(22): print(output[0][0][i][j])
f = open('model.dat', 'rb') params = cPickle.load(f) f.close() input = T.matrix('input') label = T.ivector('label') nkerns = [20, 50] rng = np.random.RandomState(3510) batch_size = 1 layer0_input = input.reshape((batch_size, 1, 50, 50)) layer0 = LeNetConvPoolLayer( rng, input=layer0_input, image_shape=(batch_size, 1, 50, 50), filter_shape=(nkerns[0], 1, 5, 5), poolsize=(2, 2), stride=(3, 3), W=params[6].get_value(), b=params[7].get_value(), ) layer1 = LeNetConvPoolLayer( rng, input=layer0.output, image_shape=(batch_size, nkerns[0], 8, 8), filter_shape=(nkerns[1], nkerns[0], 4, 4), poolsize=(1, 1), stride=(1, 1), W=params[4].get_value(), b=params[5].get_value(), )
def main_ver1_sqeu_2(learning_rate=0.05, weight_decay=0.001, n_epochs=200, nkerns=[20, 30],batch_size=500): name = 'Sequence_' rng = numpy.random.RandomState(23455) datasets = loaddata_mnist() train_set_x, train_set_y = datasets[0] valid_set_x, valid_set_y = datasets[1] test_set_x, test_set_y = datasets[2] n_train = train_set_x.get_value(borrow=True).shape[0] n_valid = valid_set_x.get_value(borrow=True).shape[0] n_test = test_set_x.get_value(borrow=True).shape[0] # print(str(n_train), str(n_valid),str(n_test)) test_set_x = test_set_x.reshape((n_test, 1, 28, 28)) valid_set_x = valid_set_x.reshape((n_valid, 1, 28, 28)) train_set_x = train_set_x.reshape((n_train, 1, 28, 28)) n_train_batches = n_train // batch_size n_valid_batches = n_valid // batch_size n_test_batches = n_test // batch_size x = T.matrix('x') y = T.ivector('y') index = T.lscalar() print('... loading the model') layer0_input = x.reshape((batch_size, 1, 28, 28)) layer0 = LeNetConvPoolLayer( rng, input=layer0_input, image_shape=(batch_size, 1, 28, 28), filter_shape=(nkerns[0], 1, 5, 5), poolsize=(2, 2) ) layer1 = LeNetConvPoolLayer( rng, input=layer0.output, image_shape=(batch_size, nkerns[0], 12, 12), filter_shape=(nkerns[1], nkerns[0], 5, 5), poolsize=(2, 2) ) layer2_input = layer1.output.flatten(2) # construct a fully-connected sigmoidal layer layer2 = HiddenLayer( rng, input=layer2_input, n_in=nkerns[1] * 4 * 4, n_out=500, activation=T.tanh ) # classify the values of the fully-connected sigmoidal layer layer3 = LogisticRegression_nonzeroini(rng, input=layer2.output, n_in=500, n_out=10) cost = layer3.negative_log_likelihood(y) params = layer3.params + layer2.params + layer1.params + layer0.params grads = T.grad(cost, params) updates = [ (param_i, param_i - learning_rate * grad_i)# + weight_decay * param_i) for param_i, grad_i in zip(params, grads)] patience_increase = 4 improvement_threshold = 0.00001 start_time = timeit.default_timer() print('... training') temp_time_1 = timeit.default_timer() best_validation_loss = numpy.inf best_iter = 0 test_score = 0. patience = 200000 validation_frequency = min(n_train_batches, patience // 2) epoch = 0 done_looping = False error_line = numpy.zeros(n_epochs) test_model = theano.function( [index], layer3.errors(y), givens={ layer0.input: test_set_x[index * batch_size: (index + 1) * batch_size], y: test_set_y[index * batch_size: (index + 1) * batch_size]}) validate_model = theano.function( [index], layer3.errors(y), givens={ layer0.input: valid_set_x[index * batch_size: (index + 1) * batch_size], y: valid_set_y[index * batch_size: (index + 1) * batch_size]}) train_model = theano.function( [index], cost, updates=updates, givens={ layer0.input: train_set_x[index * batch_size: (index + 1) * batch_size], y: train_set_y[index * batch_size: (index + 1) * batch_size]}) while (epoch < n_epochs) and (not done_looping): epoch = epoch + 1 for minibatch_index in range(n_train_batches): iter = (epoch - 1) * n_train_batches + minibatch_index if iter % 100 == 0: print('training @ iter = ', iter) cost_ij = train_model(minibatch_index) if (iter + 1) % validation_frequency == 0: validation_losses = [validate_model(i) for i in range(n_valid_batches)] this_validation_loss = numpy.mean(validation_losses) print('epoch %i, minibatch %i/%i, validation error %f' % (epoch, minibatch_index + 1, n_train_batches, this_validation_loss*100)) error_line[epoch-1] = this_validation_loss if this_validation_loss < best_validation_loss: if this_validation_loss < best_validation_loss * \ improvement_threshold: patience = max(patience, iter * patience_increase) best_validation_loss = this_validation_loss best_iter = iter test_losses = [ test_model(i) for i in range(n_test_batches) ] test_score = numpy.mean(test_losses) print((' epoch %i, minibatch %i/%i, test error of ' 'best model %f') % (epoch, minibatch_index + 1, n_train_batches, test_score*100)) [t_layer0, t_layer1, t_layer2_input, t_layer2, t_layer3] = \ [layer0, layer1, layer2_input, layer2, layer3] temp_model = [layer0, layer1, layer2_input, layer2, layer3] with open(name + str(epoch) + '.pkl', 'wb') as f: pickle.dump(temp_model, f) if patience <= iter: done_looping = True break with open(name + 'final.pkl', 'wb') as f: pickle.dump([t_layer0, t_layer1, t_layer2_input, t_layer2, t_layer3], f) error_line = error_line[0:epoch-1]/100 scipy.io.savemat('Sqeuence.mat', mdict={'Error_Spectrum': error_line}) temp_time_2 = timeit.default_timer() print('%.2fm' % ((temp_time_2 - temp_time_1) / 60.)) end_time = timeit.default_timer() print('Optimization complete.') print('Best validation score of %f obtained at iteration %i, ' 'with test performance %f ' % (best_validation_loss, best_iter + 1, test_score)) print('The code for file ran for %.2fm' % ((end_time - start_time) / 60.))
def random_epoch_train_begining(learning_rate=0.05, weight_decay=0.001, nkerns=[20, 50], n_epochs=200, batch_size=500, dataset='mnist.pkl.gz', name_given='test'): #name = 'FashionMnist_'+str(learning_rate)+'_'+str(weight_decay) + '_' + str(nkerns) + 'Rand_Trans_Relu2_Begin' name = name_given rng = numpy.random.RandomState(23455) datasets = loaddata_mnist(dataset) train_set_x, train_set_y = datasets[0] valid_set_x, valid_set_y = datasets[1] test_set_x, test_set_y = datasets[2] n_train = train_set_x.get_value(borrow=True).shape[0] n_valid = valid_set_x.get_value(borrow=True).shape[0] n_test = test_set_x.get_value(borrow=True).shape[0] test_set_x = test_set_x.reshape((n_test, 1, 28, 28)) valid_set_x = valid_set_x.reshape((n_valid, 1, 28, 28)) train_set_x = train_set_x.reshape((n_train, 1, 28, 28)) temp_train_set_x = theano.shared(numpy.zeros(train_set_x.shape.eval(), dtype=theano.config.floatX), borrow=True) temp_train_set_xx = T.Rebroadcast((1, True))(temp_train_set_x) temp_valid_set_x = theano.shared(numpy.zeros(valid_set_x.shape.eval(), dtype=theano.config.floatX), borrow=True) temp_valid_set_xx = T.Rebroadcast((1, True))(temp_valid_set_x) temp_test_set_x = theano.shared(numpy.zeros(test_set_x.shape.eval(), dtype=theano.config.floatX), borrow=True) temp_test_set_xx = T.Rebroadcast((1, True))(temp_test_set_x) n_train_batches = n_train // batch_size n_valid_batches = n_valid // batch_size n_test_batches = n_test // batch_size x = T.matrix('x') y = T.ivector('y') index = T.lscalar() dummy = T.ftensor4('dummy') update_train = (temp_train_set_x, dummy) update_valid = (temp_valid_set_x, dummy) update_test = (temp_test_set_x, dummy) replace_train = theano.function([dummy], temp_train_set_x, updates=[update_train]) replace_valid = theano.function([dummy], temp_valid_set_x, updates=[update_valid]) replace_test = theano.function([dummy], temp_test_set_x, updates=[update_test]) print('... loading the model') layer0_input = x.reshape((batch_size, 1, 28, 28)) layer0 = LeNetConvPoolLayer(rng, input=layer0_input, image_shape=(batch_size, 1, 28, 28), filter_shape=(nkerns[0], 1, 5, 5), poolsize=(2, 2)) layer1 = LeNetConvPoolLayer(rng, input=layer0.output, image_shape=(batch_size, nkerns[0], 12, 12), filter_shape=(nkerns[1], nkerns[0], 5, 5), poolsize=(2, 2)) layer2_input = layer1.output.flatten(2) # construct a fully-connected sigmoidal layer layer2 = HiddenLayer(rng, input=layer2_input, n_in=nkerns[1] * 4 * 4, n_out=500, activation=T.tanh) # classify the values of the fully-connected sigmoidal layer layer3 = LogisticRegression(input=layer2.output, n_in=500, n_out=10) cost = layer3.negative_log_likelihood(y) params = layer3.params + layer2.params + layer1.params + layer0.params grads = T.grad(cost, params) updates = [(param_i, param_i - learning_rate * (grad_i + weight_decay * param_i)) for param_i, grad_i in zip(params, grads)] patience_increase = 2 improvement_threshold = 0.995 start_time = timeit.default_timer() rand_trans_x = numpy.random.random_integers(-10, 10, 200) rand_trans_y = numpy.random.random_integers(-10, 10, 200) numpy.save('rand_trans_x.npy', rand_trans_x) numpy.save('rand_trans_y.npy', rand_trans_y) error_line = numpy.zeros(n_epochs) test_model = theano.function( [index], layer3.errors(y), givens={ layer0.input: temp_test_set_xx[index * 500:(index + 1) * 500], y: test_set_y[index * 500:(index + 1) * 500] }) validate_model = theano.function( [index], layer3.errors(y), givens={ layer0.input: temp_valid_set_xx[index * 500:(index + 1) * 500], y: valid_set_y[index * 500:(index + 1) * 500] }) train_model = theano.function( [index], cost, updates=updates, givens={ layer0.input: temp_train_set_xx[index * 500:(index + 1) * 500], y: train_set_y[index * 500:(index + 1) * 500] }) print('... training') best_validation_loss = numpy.inf best_iter = 0 test_score = 0. patience = 20000 validation_frequency = min(n_train_batches, patience // 2) epoch = 0 done_looping = False while (epoch < n_epochs) and (not done_looping): horizontal = rand_trans_x[epoch] vertical = rand_trans_y[epoch] tran_test_set_x = theano_translation_updating(test_set_x, horizontal, vertical).reshape( (-1, 1, 28, 28)) tran_valid_set_x = theano_translation_updating(valid_set_x, horizontal, vertical).reshape( (-1, 1, 28, 28)) tran_train_set_x = theano_translation_updating(train_set_x, horizontal, vertical).reshape( (-1, 1, 28, 28)) replace_test(tran_test_set_x) replace_valid(tran_valid_set_x) replace_train(tran_train_set_x) epoch = epoch + 1 for minibatch_index in range(n_train_batches): iter = (epoch - 1) * n_train_batches + minibatch_index if iter % 100 == 0: print('training @ iter = ', iter) cost_ij = train_model(minibatch_index) if (iter + 1) % validation_frequency == 0: # compute zero-one loss on validation set validation_losses = [ validate_model(i) for i in range(n_valid_batches) ] this_validation_loss = numpy.mean(validation_losses) print('Horizontal Shift:', horizontal, 'Vertical Shift:', vertical) print('epoch %i, minibatch %i/%i, validation error %f %%' % (epoch, minibatch_index + 1, n_train_batches, this_validation_loss * 100.)) error_line[epoch - 1] = this_validation_loss # if we got the best validation score until now if this_validation_loss < best_validation_loss: # improve patience if loss improvement is good enough if this_validation_loss < best_validation_loss * \ improvement_threshold: patience = max(patience, iter * patience_increase) # save best validation score and iteration number best_validation_loss = this_validation_loss best_iter = iter # test it on the test set test_losses = [ test_model(i) for i in range(n_test_batches) ] test_score = numpy.mean(test_losses) print((' epoch %i, minibatch %i/%i, test error of ' 'best model %f %%') % (epoch, minibatch_index + 1, n_train_batches, test_score * 100.)) if patience <= iter: done_looping = True break [t_layer0, t_layer1, t_layer2_input, t_layer2, t_layer3] = \ [layer0, layer1, layer2_input, layer2, layer3] with open(name + '.pkl', 'wb') as f: pickle.dump([t_layer0, t_layer1, t_layer2_input, t_layer2, t_layer3], f) error_line = error_line[0:epoch - 1] * 100 scipy.io.savemat(name + '.mat', mdict={'Error_Spectrum': error_line}) end_time = timeit.default_timer() print('Optimization complete.') print('Best validation score of %f %% obtained at iteration %i, ' 'with test performance %f %%' % (best_validation_loss * 100., best_iter + 1, test_score * 100.)) print('The code for file ran for %.2fm' % ((end_time - start_time) / 60.))
def run(): preProcess = PreProcess(load_in=True) data = preProcess.run() train_set_x, train_set_y = data[0], data[3] valid_set_x, valid_set_y = data[1], data[4] test_set_x, test_set_y = data[2], data[5] # network parameters num_kernels = [10, 10] kernel_sizes = [(9, 9), (5, 5)] #exit() sigmoidal_output_size = 20 # training parameters learning_rate = 0.1 batch_size = 50 # Setup 2: compute batch sizes for train/test/validation # borrow=True gets us the value of the variable without making a copy. n_train_batches = train_set_x.get_value(borrow=True).shape[0] n_test_batches = test_set_x.get_value(borrow=True).shape[0] n_valid_batches = valid_set_x.get_value(borrow=True).shape[0] n_train_batches /= batch_size n_test_batches /= batch_size n_valid_batches /= batch_size # Setup 3. # Declare inputs to network - x and y are placeholders # that will be used in the training/testing/validation functions below. x = T.matrix('x') # input image data y = T.ivector('y') # input label data # ## Layer 0 - First convolutional Layer # The first layer takes **`(batch_size, 1, 28, 28)`** as input, convolves it with **10** different **9x9** filters, and then downsamples (via maxpooling) in a **2x2** region. Each filter/maxpool combination produces an output of size **`(28-9+1)/2 = 10`** on a side. # The size of the first layer's output is therefore **`(batch_size, 10, 10, 10)`**. layer0_input_size = (batch_size, 1, 100, 100) # fixed size from the data edge0 = (100 - kernel_sizes[0][0] + 1) / 2 layer0_output_size = (batch_size, num_kernels[0], edge0, edge0) # check that we have an even multiple of 2 before pooling assert ((100 - kernel_sizes[0][0] + 1) % 2) == 0 # The actual input is the placeholder x reshaped to the input size of the network layer0_input = x.reshape(layer0_input_size) layer0 = LeNetConvPoolLayer(rng, input=layer0_input, image_shape=layer0_input_size, filter_shape=(num_kernels[0], 1) + kernel_sizes[0], poolsize=(2, 2)) # ## Layer 1 - Second convolutional Layer # The second layer takes **`(batch_size, 10, 10, 10)`** as input, convolves it with 10 different **10x5x5** filters, and then downsamples (via maxpooling) in a **2x2** region. Each filter/maxpool combination produces an output of size **`(10-5+1)/2 = 3`** on a side. # The size of the second layer's output is therefore **`(batch_size, 10, 3, 3)`**. layer1_input_size = layer0_output_size edge1 = (edge0 - kernel_sizes[1][0] + 1) / 2 layer1_output_size = (batch_size, num_kernels[1], edge1, edge1) # check that we have an even multiple of 2 before pooling assert ((edge0 - kernel_sizes[1][0] + 1) % 2) == 0 layer1 = LeNetConvPoolLayer(rng, input=layer0.output, image_shape=layer1_input_size, filter_shape=(num_kernels[1], num_kernels[0]) + kernel_sizes[1], poolsize=(2, 2)) # ## Layer 2 - Fully connected sigmoidal layer #exit() # The sigmoidal layer takes a vector as input. # We flatten all but the first two dimensions, to get an input of size **`(batch_size, 30 * 4 * 4)`**. #raw_random= raw_random.RandomStreamsBase() srng = theano.tensor.shared_randomstreams.RandomStreams( rng.randint(999999)) #def rectify(X): # return T.maximum(X,0.) def dropout(X, p=0.5): if p > 0: retain_prob = 1 - p X *= srng.binomial(X.shape, p=retain_prob, dtype=theano.config.floatX) X /= retain_prob return X layer2_input = layer1.output.flatten(2) layer2 = HiddenLayer(rng, input=dropout(layer2_input), n_in=num_kernels[1] * edge1 * edge1, n_out=sigmoidal_output_size, activation=T.tanh) # ## Layer 3 - Logistic regression output layer # A fully connected logistic regression layer converts the sigmoid's layer output to a class label. layer3 = LogisticRegression(input=layer2.output, n_in=sigmoidal_output_size, n_out=sport_n) # # Training the network # To train the network, we have to define a cost function. We'll use the Negative Log Likelihood of the model, relative to the true labels **`y`**. # The cost we minimize during training is the NLL of the model. # Recall: y is a placeholder we defined above cost = layer3.negative_log_likelihood(y) # ### Gradient descent # We will train with Stochastic Gradient Descent. To do so, we need the gradient of the cost relative to the parameters of the model. We can get the parameters for each label via the **`.params`** attribute. # create a list of all model parameters to be fit by gradient descent params = layer3.params + layer2.params + layer1.params + layer0.params # create a list of gradients for all model parameters grads = T.grad(cost, params) # ## Update updates = [ (param_i, param_i - learning_rate * grad_i) # <=== SGD update step for param_i, grad_i in zip(params, grads) ] index = T.lscalar( ) # index to a batch of training/validation/testing examples train_model = theano.function( [index], cost, updates=updates, givens={ x: train_set_x[index * batch_size:(index + 1) * batch_size], # <=== batching y: train_set_y[index * batch_size:(index + 1) * batch_size] # <=== batching }) # ## Validation function # To track progress on a held-out set, we count the number of misclassified examples in the validation set. validate_model = theano.function( [index], layer3.errors(y), givens={ x: valid_set_x[index * batch_size:(index + 1) * batch_size], y: valid_set_y[index * batch_size:(index + 1) * batch_size] }) # ## Test function # After training, we check the number of misclassified examples in the test set. test_model = theano.function( [index], layer3.errors(y), givens={ x: test_set_x[index * batch_size:(index + 1) * batch_size], y: test_set_y[index * batch_size:(index + 1) * batch_size] }) # # Training loop # We use SGD for a fixed number of iterations over the full training set (an "epoch"). Usually, we'd use a more complicated rule, such as iterating until a certain number of epochs fail to produce improvement in the validation set. for epoch in range(30): costs = [train_model(i) for i in xrange(n_train_batches)] validation_losses = [ validate_model(i) for i in xrange(n_valid_batches) ] print "Epoch {} NLL {:.2} %err in validation set {:.1%}".format( epoch + 1, np.mean(costs), np.mean(validation_losses)) # ## Learned features #filters = tile_raster_images(layer0.W.get_value(borrow=True), img_shape=(9, 9), tile_shape=(1,10), tile_spacing=(3, 3), # scale_rows_to_unit_interval=True, # output_pixel_vals=True) #plt.imshow(filters) #plt.show() # ## Check performance on the test set test_errors = [test_model(i) for i in range(n_test_batches)] print "test errors: {:.1%}".format(np.mean(test_errors))
input = T.matrix('input') label = T.ivector('label') batch_size = 100 nkerns = [10, 25] layer0_input = input.reshape((batch_size, 1, 50, 50)) # layer 0: convolution # input: batch_size * 1 * 50 * 50 # conv width: (50 - 5)/3 + 1 = 16 # pool width: 16 / 2 = 8 # output: batch_size * nkerns[0] * 8 * 8 layer0 = LeNetConvPoolLayer(rng, input=layer0_input, image_shape=(batch_size, 1, 50, 50), filter_shape=(nkerns[0], 1, 5, 5), poolsize=(2, 2), stride=(3, 3)) # layer 1: convolution # input: batch_size * nkerns[0] * 8 * 8 # conv width: (8 - 4) + 1 = 5 # pool width: 5 / 1 = 5 # output: batch_size * nkerns[1] * 5 * 5 layer1 = LeNetConvPoolLayer(rng, input=layer0.output, image_shape=(batch_size, nkerns[0], 8, 8), filter_shape=(nkerns[1], nkerns[0], 4, 4), poolsize=(1, 1), stride=(1, 1))
def evaluate_lenet5(learning_rate, n_epochs, nkerns, batch_size): """ Demonstrates lenet on a small sample of the cacophony dataset using a network consisting of: - two (convolutional + max pool) layers - one fully connected hidden layer - logistic regression to determine the final class from the hidden layer outputs :type learning_rate: float :param learning_rate: learning rate used (factor for the stochastic gradient) :type n_epochs: int :param n_epochs: maximal number of epochs to run the optimizer :type nkerns: list of ints :param nkerns: number of kernels in each layer Adapted from convolutional_mlp::evaluate_lenet5 """ filter_size = 5 # number of pixels across for the convolutional filter rng = numpy.random.RandomState( 23455) # Use this one for the same result each time # rng = numpy.random.RandomState() datasets = load_data() # Image list, classification list train_set_x, train_set_y = datasets[0] valid_set_x, valid_set_y = datasets[1] test_set_x, test_set_y = datasets[2] # compute number of minibatches for training, validation and testing n_train_batches = train_set_x.get_value(borrow=True).shape[0] n_valid_batches = valid_set_x.get_value(borrow=True).shape[0] n_test_batches = test_set_x.get_value(borrow=True).shape[0] n_train_batches //= batch_size n_valid_batches //= batch_size n_test_batches //= batch_size # allocate symbolic variables for the data index = T.lscalar() # index to a [mini]batch # start-snippet-1 x = T.matrix('x') # the data is presented as rasterized images y = T.vector('y', "int64") # the labels are presented as 1D vector of # [int] labels ###################### # BUILD ACTUAL MODEL # ###################### print('... building the model') # Reshape matrix of rasterized images of shape (batch_size, 48 * 64) # to a 4D tensor, compatible with our LeNetConvPoolLayer # (48, 64) is the size of cacophony small images. (height, width) layer0_input = x.reshape( (batch_size, 1, IMAGE_HEIGHTS[0], IMAGE_WIDTHS[0])) # Construct the first convolutional pooling layer: # filtering reduces the image size to (48-5+1 , 64-5+1) = (44, 60) # maxpooling reduces this further to (44/2, 60/2) = (22, 30) # 4D output tensor is thus of shape (batch_size, nkerns[0], 22, 30) layer0 = LeNetConvPoolLayer( rng, input=layer0_input, image_shape=(batch_size, 1, IMAGE_HEIGHTS[0], IMAGE_WIDTHS[0]), filter_shape=(nkerns[0], 1, FILTER_SIZES[0], FILTER_SIZES[0]), poolsize=(MAX_POOLING_SIZES[0], MAX_POOLING_SIZES[0])) # Construct the second convolutional pooling layer # filtering reduces the image size to (22-5+1, 30-5+1) = (18, 26) # maxpooling reduces this further to (18/2, 26/2) = (9, 13) # 4D output tensor is thus of shape (batch_size, nkerns[1], 9, 13) layer1 = LeNetConvPoolLayer( rng, input=layer0.output, image_shape=( batch_size, nkerns[0], IMAGE_HEIGHTS[1], IMAGE_WIDTHS[1]), # previous layer generated 22*30 sized images filter_shape=(nkerns[1], nkerns[0], FILTER_SIZES[1], FILTER_SIZES[1]), poolsize=(MAX_POOLING_SIZES[1], MAX_POOLING_SIZES[1])) # the HiddenLayer being fully-connected, it operates on 2D matrices of # shape (batch_size, num_pixels) (i.e matrix of rasterized images). # This will generate a matrix of shape (batch_size, nkerns[1] * 9 * 13), # or (1, 50 * 9 * 13) with the default values. layer2_input = layer1.output.flatten(2) # construct a fully-connected sigmoidal layer layer2 = HiddenLayer( rng, input=layer2_input, n_in=nkerns[1] * IMAGE_HEIGHTS[2] * IMAGE_WIDTHS[ 2], # 9*13 is the number of pixels in the "image" from the previous layer n_out=batch_size, activation=T.tanh) # classify the values of the fully-connected sigmoidal layer layer3 = LogisticRegression(input=layer2.output, n_in=batch_size, n_out=5) # n_out is the number of classes # the cost we minimize during training is the NLL of the model cost = layer3.negative_log_likelihood(y) # create a function to compute the mistakes that are made by the model test_model = theano.function( [index], layer3.errors(y), givens={ x: test_set_x[index * batch_size:(index + 1) * batch_size], y: test_set_y[index * batch_size:(index + 1) * batch_size] }) validate_model = theano.function( [index], layer3.errors(y), givens={ x: valid_set_x[index * batch_size:(index + 1) * batch_size], y: valid_set_y[index * batch_size:(index + 1) * batch_size] }) # create a list of all model parameters to be fit by gradient descent params = layer3.params + layer2.params + layer1.params + layer0.params # create a list of gradients for all model parameters grads = T.grad(cost, params) # train_model is a function that updates the model parameters by # SGD Since this model has many parameters, it would be tedious to # manually create an update rule for each model parameter. We thus # create the updates list by automatically looping over all # (params[i], grads[i]) pairs. updates = [(param_i, param_i - learning_rate * grad_i) for param_i, grad_i in zip(params, grads)] train_model = theano.function( [index], cost, updates=updates, givens={ x: train_set_x[index * batch_size:(index + 1) * batch_size], y: train_set_y[index * batch_size:(index + 1) * batch_size] }) # end-snippet-1 ############### # TRAIN MODEL # ############### print('... training') # early-stopping parameters patience = 10000 # look as this many examples regardless patience_increase = 2 # wait this much longer when a new best is # found improvement_threshold = 0.995 # a relative improvement of this much is # considered significant validation_frequency = min(n_train_batches, patience // 2) # go through this many # minibatches before checking the network # on the validation set; in this case we # check every epoch best_validation_loss = numpy.inf best_iter = 0 test_score = 0. start_time = timeit.default_timer() epoch = 0 done_looping = False while (epoch < n_epochs) and (not done_looping): epoch += 1 for minibatch_index in range(n_train_batches): iterator = (epoch - 1) * n_train_batches + minibatch_index if iterator % 100 == 0: print('training @ iterator = ', iterator) cost_ij = train_model(minibatch_index) if (iterator + 1) % validation_frequency == 0: # compute zero-one loss on validation set validation_losses = [ validate_model(i) for i in range(n_valid_batches) ] this_validation_loss = numpy.mean(validation_losses) print('epoch %i, minibatch %i/%i, validation error %f %%' % (epoch, minibatch_index + 1, n_train_batches, this_validation_loss * 100.)) # if we got the best validation score until now if this_validation_loss < best_validation_loss: # improve patience if loss improvement is good enough if this_validation_loss < best_validation_loss * \ improvement_threshold: patience = max(patience, iterator * patience_increase) # save best validation score and iteration number best_validation_loss = this_validation_loss best_iter = iterator # test it on the test set test_losses = [ test_model(i) for i in range(n_test_batches) ] test_score = numpy.mean(test_losses) print((' epoch %i, minibatch %i/%i, test error of ' 'best model %f %%') % (epoch, minibatch_index + 1, n_train_batches, test_score * 100.)) if patience <= iterator: done_looping = True break end_time = timeit.default_timer() print('Optimization complete.') print('Best validation score of %f %% obtained at iteration %i, ' 'with test performance %f %%' % (best_validation_loss * 100., best_iter + 1, test_score * 100.)) print( ('The code for file ' + os.path.split(__file__)[1] + ' ran for %.2fm' % ((end_time - start_time) / 60.)), file=sys.stderr) display_output(test_set_x, batch_size, layer0, nkerns[0]) # display the final filters for the convolutional layers display_conv_filters("Layer 0", layer0) display_conv_filters("Layer 1", layer1)
def __init__(self, datasets, nkerns=[32, 48], batch_size=1000, normalized_width=20, distortion=0, cuda_convnet=1, params=[None, None, None, None, None, None, None, None]): """ Demonstrates Ciresan 2012 on MNIST dataset Some minor differences here: --- - Ciresan initializes Conv layers with: "uniform random distribution in the range [−0.05, 0.05]." (Ciresan IJCAI 2011) - Ciresan uses a sigma of 6 - Ciresan uses nkerns=[20, 40] which were increased here to be nkerns=[32, 48] in order to be compatible with cuda_convnet :type learning_rate: float :param learning_rate: learning rate used (factor for the stochastic gradient) :type n_epochs: int :param n_epochs: maximal number of epochs to run the optimizer :type dataset: string :param dataset: path to the dataset used for training /testing (MNIST here) :type nkerns: list of ints :param nkerns: number of kernels on each layer :type params: list of None or Numpy matricies/arrays :param params: W/b weights in the order: layer3W, layer3b, layer2W, layer2b, layer1W, layer1b, layer0W, layer0b """ layer3W, layer3b, layer2W, layer2b, layer1W, layer1b, layer0W, layer0b = params rng = numpy.random.RandomState(23455) # TODO: could make this a theano sym variable to abstract # loaded data from column instantiation train_set_x, train_set_y = datasets[0] valid_set_x, valid_set_y = datasets[1] test_set_x, test_set_y = datasets[2] # TODO: could move this to train method # compute number of minibatches for training, validation and testing self.n_train_batches = train_set_x.get_value(borrow=True).shape[0] self.n_valid_batches = valid_set_x.get_value(borrow=True).shape[0] self.n_test_batches = test_set_x.get_value(borrow=True).shape[0] self.n_train_batches /= batch_size self.n_valid_batches /= batch_size self.n_test_batches /= batch_size # allocate symbolic variables for the data index = T.lscalar() # index to a [mini]batch learning_rate = T.fscalar() # start-snippet-1 x = T.matrix('x') # the data is presented as rasterized images y = T.ivector('y') # the labels are presented as 1D vector of # [int] labels ###################### # BUILD ACTUAL MODEL # ###################### print '... building the column' if distortion: distortion_layer = ElasticLayer(x.reshape((batch_size, 29, 29)), 29, magnitude=ALPHA, sigma=SIGMA) network_input = distortion_layer.output.reshape( (batch_size, 1, 29, 29)) else: network_input = x.reshape((batch_size, 1, 29, 29)) if cuda_convnet: layer0_input = network_input.dimshuffle(1, 2, 3, 0) else: layer0_input = network_input layer0_imageshape = (1, 29, 29, batch_size) if cuda_convnet else (batch_size, 1, 29, 29) layer0_filtershape = (1, 4, 4, nkerns[0]) if cuda_convnet else (nkerns[0], 1, 4, 4) layer0 = LeNetConvPoolLayer(rng, input=layer0_input, image_shape=layer0_imageshape, filter_shape=layer0_filtershape, poolsize=(2, 2), cuda_convnet=cuda_convnet, W=layer0W, b=layer0b) layer1_imageshape = (nkerns[0], 13, 13, batch_size) if cuda_convnet else (batch_size, nkerns[0], 13, 13) layer1_filtershape = (nkerns[0], 5, 5, nkerns[1]) if cuda_convnet else (nkerns[1], nkerns[0], 5, 5) layer1 = LeNetConvPoolLayer(rng, input=layer0.output, image_shape=layer1_imageshape, filter_shape=layer1_filtershape, poolsize=(3, 3), cuda_convnet=cuda_convnet, W=layer1W, b=layer1b) # the HiddenLayer being fully-connected, it operates on 2D matrices of # shape (batch_size, num_pixels) (i.e matrix of rasterized images). # This will generate a matrix of shape (batch_size, nkerns[1] * 4 * 4), # or (500, 50 * 4 * 4) = (500, 800) with the default values. if cuda_convnet: layer2_input = layer1.output.dimshuffle(3, 0, 1, 2).flatten(2) else: layer2_input = layer1.output.flatten(2) layer2 = HiddenLayer(rng, input=layer2_input, n_in=nkerns[1] * 3 * 3, n_out=150, W=layer2W, b=layer2b, activation=T.tanh) # classify the values of the fully-connected sigmoidal layer layer3 = LogisticRegression(input=layer2.output, n_in=150, n_out=10, W=layer3W, b=layer3b) # the cost we minimize during training is the NLL of the model cost = layer3.negative_log_likelihood(y) # create a function to compute the mistakes that are made by the model self.test_model = theano.function( [index], layer3.errors(y), givens={ x: test_set_x[index * batch_size:(index + 1) * batch_size], y: test_set_y[index * batch_size:(index + 1) * batch_size] }) # create a function to compute probabilities of all output classes self.test_output_batch = theano.function( [index], layer3.p_y_given_x, givens={ x: test_set_x[index * batch_size:(index + 1) * batch_size] }) self.validate_model = theano.function( [index], layer3.errors(y), givens={ x: valid_set_x[index * batch_size:(index + 1) * batch_size], y: valid_set_y[index * batch_size:(index + 1) * batch_size] }) # create a list of all model parameters to be fit by gradient descent self.params = layer3.params + layer2.params + layer1.params + layer0.params self.column_params = [ nkerns, batch_size, normalized_width, distortion, cuda_convnet ] # create a list of gradients for all model parameters grads = T.grad(cost, self.params) # train_model is a function that updates the model parameters by # SGD Since this model has many parameters, it would be tedious to # manually create an update rule for each model parameter. We thus # create the updates list by automatically looping over all # (params[i], grads[i]) pairs. updates = [(param_i, param_i - (learning_rate) * grad_i) for param_i, grad_i in zip(self.params, grads)] # Suggested by Alex Krizhevsky, found on: # http://yyue.blogspot.com/2015/01/a-brief-overview-of-deep-learning.html optimal_ratio = 0.001 # should show what multiple current learning rate is of optimal learning rate grads_L1 = sum([abs(grad).sum() for grad in grads]) params_L1 = sum([abs(param).sum() for param in self.params]) update_ratio = (learning_rate / (optimal_ratio)) * (grads_L1 / params_L1) self.train_model = theano.function( [index, learning_rate], [cost, update_ratio], updates=updates, givens={ x: train_set_x[index * batch_size:(index + 1) * batch_size], y: train_set_y[index * batch_size:(index + 1) * batch_size] })
def runDeepLearning(): ### Loading training set and separting it into training set and testing set myDataset = Dataset() preprocess = 0 datasets = myDataset.loadTrain(preprocess) train_set_x, train_set_y = datasets[0] valid_set_x, valid_set_y = datasets[1] dataset_test = myDataset.loadTest(preprocess) test_set_x, test_set_y, test_set_y_array = dataset_test[0] # temporary solution to get the ground truth of sample out to test_set_y_array. # the reason is that after T.cast, test_set_y becomes TensorVariable, which I do not find way to output its # value...anyone can help? ### Model parameters learning_rate = 0.02 n_epochs = 3000 nkerns = [ 30, 40, 40 ] # number of kernal at each layer, current best performance is 50.0% on testing set, kernal number is [30,40,40] batch_size = 500 # compute number of minibatches for training, validation and testing n_train_batches = train_set_x.get_value(borrow=True).shape[0] n_valid_batches = valid_set_x.get_value(borrow=True).shape[0] n_test_batches = test_set_x.get_value(borrow=True).shape[0] n_train_batches /= batch_size n_valid_batches /= batch_size n_test_batches /= batch_size # allocate symbolic variables for the data index = T.lscalar() # index to a [mini]batch x = T.matrix('x') # the data is presented as rasterized images y = T.ivector('y') # the labels are presented as 1D vector of # [int] labels ishape = (48, 48) # size of input images nClass = 7 rng = np.random.RandomState(23455) ###################### # BUILD ACTUAL MODEL # ###################### print '... building the model' # Reshape matrix of rasterized images of shape (batch_size,28*28) # to a 4D tensor, compatible with our LeNetConvPoolLayer layer0_input = x.reshape((batch_size, 1, ishape[0], ishape[0])) # Construct the first convolutional pooling layer: # filtering reduces the image size to (28-5+1,28-5+1)=(24,24) # maxpooling reduces this further to (24/2,24/2) = (12,12) # 4D output tensor is thus of shape (batch_size,nkerns[0],12,12) layer0 = LeNetConvPoolLayer(rng, input=layer0_input, image_shape=(batch_size, 1, ishape[0], ishape[0]), filter_shape=(nkerns[0], 1, 5, 5), poolsize=(2, 2)) # Construct the second convolutional pooling layer # filtering reduces the image size to (12-5+1,12-5+1)=(8,8) # maxpooling reduces this further to (8/2,8/2) = (4,4) # 4D output tensor is thus of shape (nkerns[0],nkerns[1],4,4) layer1 = LeNetConvPoolLayer(rng, input=layer0.output, image_shape=(batch_size, nkerns[0], 22, 22), filter_shape=(nkerns[1], nkerns[0], 5, 5), poolsize=(2, 2)) layer2 = LeNetConvPoolLayer(rng, input=layer1.output, image_shape=(nkerns[0], nkerns[1], 9, 9), filter_shape=(nkerns[2], nkerns[1], 2, 2), poolsize=(2, 2)) # the TanhLayer being fully-connected, it operates on 2D matrices of # shape (batch_size,num_pixels) (i.e matrix of rasterized images). # This will generate a matrix of shape (20,32*4*4) = (20,512) layer3_input = layer2.output.flatten(2) # construct a fully-connected sigmoidal layer layer3 = HiddenLayer(rng, input=layer3_input, n_in=nkerns[2] * 4 * 4, n_out=500, activation=T.tanh) # classify the values of the fully-connected sigmoidal layer layer4 = LogisticRegression(input=layer3.output, n_in=500, n_out=nClass) # the cost we minimize during training is the NLL of the model cost = layer4.negative_log_likelihood(y) # create a function to compute the mistakes that are made by the model validate_model = theano.function( [index], layer4.errors(y), givens={ x: valid_set_x[index * batch_size:(index + 1) * batch_size], y: valid_set_y[index * batch_size:(index + 1) * batch_size] }) test_model = theano.function( [index], layer4.errorsLabel(y), givens={ x: test_set_x[index * batch_size:(index + 1) * batch_size], y: test_set_y[index * batch_size:(index + 1) * batch_size] }) # create a list of all model parameters to be fit by gradient descent params = layer4.params + layer3.params + layer2.params + layer1.params + layer0.params # create a list of gradients for all model parameters grads = T.grad(cost, params) # train_model is a function that updates the model parameters by # SGD Since this model has many parameters, it would be tedious to # manually create an update rule for each model parameter. We thus # create the updates list by automatically looping over all # (params[i],grads[i]) pairs. updates = [] for param_i, grad_i in zip(params, grads): updates.append((param_i, param_i - learning_rate * grad_i)) train_model = theano.function( [index], cost, updates=updates, givens={ x: train_set_x[index * batch_size:(index + 1) * batch_size], y: train_set_y[index * batch_size:(index + 1) * batch_size] }) ############### # TRAIN MODEL # ############### print '... training' # early-stopping parameters patience = 10000 # look as this many examples regardless patience_increase = 2 # wait this much longer when a new best is # found improvement_threshold = 0.995 # a relative improvement of this much is # considered significant validation_frequency = min(n_train_batches, patience / 2) # go through this many # minibatche before checking the network # on the validation set; in this case we # check every epoch best_params = None best_validation_loss = np.inf best_iter = 0 test_score = 0. start_time = time.clock() epoch = 0 done_looping = False while (epoch < n_epochs) and (not done_looping): epoch = epoch + 1 for minibatch_index in xrange(n_train_batches): iter = (epoch - 1) * n_train_batches + minibatch_index if iter % 100 == 0: print 'training @ iter = ', iter cost_ij = train_model(minibatch_index) if (iter + 1) % validation_frequency == 0: # compute zero-one loss on validation set validation_losses = [ validate_model(i) for i in xrange(n_valid_batches) ] this_validation_loss = np.mean(validation_losses) print('epoch %i, minibatch %i/%i, validation error %f %%' % \ (epoch, minibatch_index + 1, n_train_batches, \ this_validation_loss * 100.)) # if we got the best validation score until now if this_validation_loss < best_validation_loss: #improve patience if loss improvement is good enough if this_validation_loss < best_validation_loss * \ improvement_threshold: patience = max(patience, iter * patience_increase) # save best validation score and iteration number best_validation_loss = this_validation_loss best_iter = iter # test it on the test set #test_losses = [test_model(i) for i in xrange(n_test_batches)] test_output = [ test_model(i) for i in xrange(n_test_batches) ] test_losses = [item[0] for item in test_output] #test_y_gt = [label[0] for label in item[1] for item in test_output] # test_y_pred = np.array( [label for label in item[1] for item in test_output]) test_y_gt = np.array( [label for label in item[2] for item in test_output]) #test_y_pred = np.array([item[1] for item in test_output] ) ## the predicted_labels for the input ### it seems that the batchsize cannot be change in Theano.function while training model ### #test_label = reduce(lambda x,y: x+y,test_label) #print test_y_pred #print test_y_gt #print test_set_y_array errorNum = np.count_nonzero(test_y_gt - test_y_pred) errorSampleIndex = [ i for i in range(len(test_y_pred)) if test_y_pred[i] != test_set_y_array[i] ] #print errorNum, len(errorSampleIndex) test_score = np.mean(test_losses) print((' epoch %i, minibatch %i/%i, test error of best ' 'model %f %%') % (epoch, minibatch_index + 1, n_train_batches, test_score * 100.)) print((' on all test sample %f %%') % ((float(errorNum) / float(len(test_y_pred)) * 100.))) if patience <= iter: done_looping = True break end_time = time.clock() print('Optimization complete.') #TODO: write the code to save the trained model and test the trained model on test data print('Best validation score of %f %% obtained at iteration %i,'\ 'with test performance %f %%' % (best_validation_loss * 100., best_iter + 1, test_score * 100.)) print >> sys.stderr, ('The code for file ' + os.path.split(__file__)[1] + ' ran for %.2fm' % ((end_time - start_time) / 60.)) # save the misclassified samples myDataset.plotSample(test_set_x.get_value(), test_set_y, [i for i in range(0, 100)])
def Buildnet(params, nkerns=[20, 50], batch_size=500): rng = numpy.random.RandomState(23455) datasets = load_data(0) train_set_x, train_set_y = datasets[0] valid_set_x, valid_set_y = datasets[1] test_set_x, test_set_y = datasets[2] # compute number of minibatches for training, validation and testing n_train_batches = train_set_x.get_value(borrow=True).shape[0] n_valid_batches = valid_set_x.get_value(borrow=True).shape[0] n_test_batches = test_set_x.get_value(borrow=True).shape[0] n_train_batches /= batch_size n_valid_batches /= batch_size n_test_batches /= batch_size # allocate symbolic variables for the data index = T.lscalar() # index to a [mini]batch x = T.matrix('x') # the data is presented as rasterized images y = T.ivector('y') # the labels are presented as 1D vector of # [int] labels ishape = (28, 28) # this is the size of MNIST images ###################### # BUILD ACTUAL MODEL # ###################### print '... building the model' # Reshape matrix of rasterized images of shape (batch_size,28*28) # to a 4D tensor, compatible with our LeNetConvPoolLayer layer0_input = x.reshape((batch_size, 1, 28, 28)) # Construct the first convolutional pooling layer: # filtering reduces the image size to (28-5+1,28-5+1)=(24,24) # maxpooling reduces this further to (24/2,24/2) = (12,12) # 4D output tensor is thus of shape (batch_size,nkerns[0],12,12) layer0 = LeNetConvPoolLayer(rng, input=layer0_input, image_shape=(batch_size, 1, 28, 28), filter_shape=(nkerns[0], 1, 5, 5), poolsize=(2, 2)) # Construct the second convolutional pooling layer # filtering reduces the image size to (12-5+1,12-5+1)=(8,8) # maxpooling reduces this further to (8/2,8/2) = (4,4) # 4D output tensor is thus of shape (nkerns[0],nkerns[1],4,4) layer1 = LeNetConvPoolLayer(rng, input=layer0.output, image_shape=(batch_size, nkerns[0], 12, 12), filter_shape=(nkerns[1], nkerns[0], 5, 5), poolsize=(2, 2)) # the HiddenLayer being fully-connected, it operates on 2D matrices of # shape (batch_size,num_pixels) (i.e matrix of rasterized images). # This will generate a matrix of shape (20,32*4*4) = (20,512) layer2_input = layer1.output.flatten(2) # construct a fully-connected sigmoidal layer layer2 = HiddenLayer(rng, input=layer2_input, n_in=nkerns[1] * 4 * 4, n_out=500, activation=T.tanh) # classify the values of the fully-connected sigmoidal layer layer3 = LogisticRegression(input=layer2.output, n_in=500, n_out=3) # the cost we minimize during training is the NLL of the model cost = layer3.negative_log_likelihood(y) f = theano.function( inputs=[index], outputs=[layer2.output, layer3.y_pred, y], givens={ x: test_set_x[index * batch_size:(index + 1) * batch_size], y: test_set_y[index * batch_size:(index + 1) * batch_size] }) # numepoch = len(params) layer3.W.set_value(params[-1][0]) layer3.b.set_value(params[-1][1]) layer2.W.set_value(params[-1][2]) layer2.b.set_value(params[-1][3]) layer1.W.set_value(params[-1][4]) layer1.b.set_value(params[-1][5]) layer0.W.set_value(params[-1][6]) layer0.b.set_value(params[-1][7]) outputvectors = numpy.zeros((10000, 500)) labels = numpy.zeros((10000, 1)) reallabels = numpy.zeros((10000, 1)) for minibatch_index in xrange(n_test_batches): vector, label, reallabel = f(minibatch_index) outputvectors[minibatch_index * batch_size:(minibatch_index + 1) * batch_size] = vector labels[minibatch_index * batch_size:(minibatch_index + 1) * batch_size, 0] = label reallabels[minibatch_index * batch_size:(minibatch_index + 1) * batch_size, 0] = reallabel return [outputvectors, labels, reallabels]
def perturb_bfgs(perturbation, params, shape, oldoutput, c=1, nkerns=[20, 50], batch_size=1): #print '... building the model' rng = numpy.random.RandomState(23455) x = T.tensor4() # Reshape matrix of rasterized images of shape (batch_size,28*28) # to a 4D tensor, compatible with our LeNetConvPoolLayer layer0_input = x.reshape((batch_size, 1, 28, 28)) # Construct the first convolutional pooling layer: # filtering reduces the image size to (28-5+1,28-5+1)=(24,24) # maxpooling reduces this further to (24/2,24/2) = (12,12) # 4D output tensor is thus of shape (batch_size,nkerns[0],12,12) layer0 = LeNetConvPoolLayer(rng, input=layer0_input, image_shape=(batch_size, 1, 28, 28), filter_shape=(nkerns[0], 1, 5, 5), poolsize=(2, 2)) # Construct the second convolutional pooling layer # filtering reduces the image size to (12-5+1,12-5+1)=(8,8) # maxpooling reduces this further to (8/2,8/2) = (4,4) # 4D output tensor is thus of shape (nkerns[0],nkerns[1],4,4) layer1 = LeNetConvPoolLayer(rng, input=layer0.output, image_shape=(batch_size, nkerns[0], 12, 12), filter_shape=(nkerns[1], nkerns[0], 5, 5), poolsize=(2, 2)) # the HiddenLayer being fully-connected, it operates on 2D matrices of # shape (batch_size,num_pixels) (i.e matrix of rasterized images). # This will generate a matrix of shape (20,32*4*4) = (20,512) layer2_input = layer1.output.flatten(2) # construct a fully-connected sigmoidal layer layer2 = HiddenLayer(rng, input=layer2_input, n_in=nkerns[1] * 4 * 4, n_out=500, activation=T.tanh) # classify the values of the fully-connected sigmoidal layer layer3 = LogisticRegression(input=layer2.output, n_in=500, n_out=3) f = theano.function(inputs=[x], outputs=[layer2.output, layer3.y_pred]) layer3.W.set_value(params[-1][0]) layer3.b.set_value(params[-1][1]) layer2.W.set_value(params[-1][2]) layer2.b.set_value(params[-1][3]) layer1.W.set_value(params[-1][4]) layer1.b.set_value(params[-1][5]) layer0.W.set_value(params[-1][6]) layer0.b.set_value(params[-1][7]) perturbed = shape oldoutputs = oldoutput distances = 0 perturblength = numpy.sqrt(numpy.sum(perturbation**2)) shapes = perturbed + perturbation outputs, labels = f(shapes.reshape(1, 1, 28, 28)) print labels for o in oldoutputs: distances += numpy.sqrt(numpy.sum((outputs - o)**2)) distances /= len(oldoutputs) return c * perturblength + distances
def perturb_random(params, shape, oldoutput, nkerns=[20, 50], batch_size=500): print '... building the model' rng = numpy.random.RandomState(23455) x = T.tensor4() # Reshape matrix of rasterized images of shape (batch_size,28*28) # to a 4D tensor, compatible with our LeNetConvPoolLayer layer0_input = x.reshape((batch_size, 1, 28, 28)) # Construct the first convolutional pooling layer: # filtering reduces the image size to (28-5+1,28-5+1)=(24,24) # maxpooling reduces this further to (24/2,24/2) = (12,12) # 4D output tensor is thus of shape (batch_size,nkerns[0],12,12) layer0 = LeNetConvPoolLayer(rng, input=layer0_input, image_shape=(batch_size, 1, 28, 28), filter_shape=(nkerns[0], 1, 5, 5), poolsize=(2, 2)) # Construct the second convolutional pooling layer # filtering reduces the image size to (12-5+1,12-5+1)=(8,8) # maxpooling reduces this further to (8/2,8/2) = (4,4) # 4D output tensor is thus of shape (nkerns[0],nkerns[1],4,4) layer1 = LeNetConvPoolLayer(rng, input=layer0.output, image_shape=(batch_size, nkerns[0], 12, 12), filter_shape=(nkerns[1], nkerns[0], 5, 5), poolsize=(2, 2)) # the HiddenLayer being fully-connected, it operates on 2D matrices of # shape (batch_size,num_pixels) (i.e matrix of rasterized images). # This will generate a matrix of shape (20,32*4*4) = (20,512) layer2_input = layer1.output.flatten(2) # construct a fully-connected sigmoidal layer layer2 = HiddenLayer(rng, input=layer2_input, n_in=nkerns[1] * 4 * 4, n_out=500, activation=T.tanh) # classify the values of the fully-connected sigmoidal layer layer3 = LogisticRegression(input=layer2.output, n_in=500, n_out=10) f = theano.function(inputs=[x], outputs=[layer2.output, layer3.y_pred]) layer3.W.set_value(params[-1][0]) layer3.b.set_value(params[-1][1]) layer2.W.set_value(params[-1][2]) layer2.b.set_value(params[-1][3]) layer1.W.set_value(params[-1][4]) layer1.b.set_value(params[-1][5]) layer0.W.set_value(params[-1][6]) layer0.b.set_value(params[-1][7]) # perturb 500 shapes at each iteration, with ptimes iterations perturbed = numpy.tile(shape[0], (500, 1)) oldoutputs = numpy.tile(oldoutput, (500, 1)) label = shape[1] ptimes = 500 imagelength = numpy.sqrt(numpy.sum(shape[0]**2)) outputlength = numpy.sqrt(numpy.sum(oldoutput**2)) p = [] s = [] for i in range(ptimes): print 'perturbing ' + str(i) + ' ......' perturbation = numpy.random.normal(0, 0.15, perturbed.shape) perturblength = numpy.sqrt(numpy.sum(perturbation**2, axis=1)) shapes = perturbed + perturbation outputs, labels = f(shapes.reshape(500, 1, 28, 28)) distances = numpy.sum((outputs - oldoutputs)**2, axis=1) pos = numpy.argmax(distances) print 'distance ' + str(numpy.sqrt(distances[pos])) pert = {} pert['perturbation'] = perturbation[pos] pert['plength'] = perturblength[pos] pert['ilength'] = imagelength pert['olength'] = outputlength pert['distance'] = numpy.sqrt(distances[pos]) pert['output'] = outputs[pos] pert['label'] = labels[pos] p.append(pert) if len(numpy.nonzero(labels != label)[0]) != 0: print 'success!' + str(label) + ' ' pos = numpy.nonzero(labels != label)[0][0] print labels[pos] pert = {} pert['perturbation'] = perturbation[pos] pert['plength'] = perturblength[pos] pert['ilength'] = imagelength pert['olength'] = outputlength pert['distance'] = numpy.sqrt(distances[pos]) pert['output'] = outputs[pos] pert['label'] = labels[pos] s.append(pert) return p, s
def __init__(self, n_basket, n_hidden, n_vocabulary, n_embedding_dimension): # n_window to put together a few records of input training (may enhance the sense of sequence) # Such that x is an n_embedding_dimension * n_window-dimensional vector # The transformation matrix of the word vector (broadly, the property vector of the item) iscnn = False iscostplus = True nkerns = n_embedding_dimension # Seemingly can not equal! . !! . . . . . First keep filter_shape = (nkerns, 1, n_basket, 1) # rng = np.random.RandomState(23455) rng = np.random.RandomState(23456) poolsize = (1, n_embedding_dimension) print "1. Neuron parameter construction ............", embedding = np.random.uniform(-0.5, 0.5, (n_vocabulary, n_embedding_dimension)).astype(theano.config.floatX) embedding[-1] = 0. self.embedding = theano.shared( value=embedding.astype(theano.config.floatX), name='embedding', borrow=True ) # Simply defining -1 as an attribute does not seem right, but you can not think of a good way to change it. # X by u self.u = theano.shared( value=np.random.uniform(-0.5, 0.5, (nkerns, n_hidden)).astype(theano.config.floatX), # This dimension should be modified like the vector dimension of the cnn output name='u', borrow=True ) # H by w self.w = theano.shared( value=np.random.uniform(-0.5, 0.5, (n_hidden, n_hidden)).astype(theano.config.floatX), name='w', borrow=True ) self.hidden_lay0 = theano.shared( value=np.zeros(n_hidden, dtype=theano.config.floatX), name='hidden_lay0', borrow=True ) fan_in = np.prod(filter_shape[1:]) fan_out = (filter_shape[0] * np.prod(filter_shape[2:]) // np.prod(poolsize)) # initialize weights with random weights W_bound = np.sqrt(6. / (fan_in + fan_out)) self.w_cnn = theano.shared(np.asarray(rng.uniform(low=-W_bound, high=W_bound, size=filter_shape), dtype=theano.config.floatX), borrow=True) b_values = np.zeros((filter_shape[0],), dtype=theano.config.floatX) self.b_cnn = theano.shared(value=b_values, borrow=True) input_item_id = T.lmatrix('input_item_id') # The the input matrix input_size = T.lvector('input_size') neg_item_id = T.lmatrix('neg_item_id') x = self.embedding[input_item_id].reshape((input_item_id.shape[0], n_basket, n_embedding_dimension)) x.name = 'x' # y = self.embedding[next_item_id].reshape((1, n_window * n_embedding_dimension))[0] neg = self.embedding[neg_item_id].reshape((neg_item_id.shape[0], n_basket, n_embedding_dimension)) neg.name = 'neg' # After the embedding of the feature matrix through a cnn # . . . Note the convolution here first if iscnn: cnn_x = LeNetConvPoolLayer( rng, input=x.reshape((x.shape[0], 1, n_basket, n_embedding_dimension)), image_shape=(None, 1, n_basket, n_embedding_dimension), # In fact image_shape almost no role in this variable, the first dimension casually write on the line filter_shape=filter_shape, W=self.w_cnn, b=self.b_cnn, poolsize=poolsize ) cnn_x_output = cnn_x.output.flatten(2) self.param = (self.embedding, self.u, self.w, self.w_cnn, self.b_cnn) # , self.v) self.name = ('embedding', 'u', 'w', 'w_cnn', 'b_cnn') else: def pooling_max(abasker_t, basket_size_t): pool_result_t = T.max(abasker_t[: basket_size_t], axis=0) return pool_result_t pool_result, _ = theano.scan(fn=pooling_max, sequences=[x.reshape((x.shape[0], n_basket, n_embedding_dimension)), input_size]) cnn_x = pool_result cnn_x_output = cnn_x.flatten(2) self.param = (self.embedding, self.u, self.w) self.name = ('embedding', 'u', 'w') print "done" print "2. Loss function construction ..............", def recurrence(x_t, h_tml): # Defines the looping function h_t = T.nnet.sigmoid(T.dot(x_t, self.u) + T.dot(h_tml, self.w)) return h_t h, _ = theano.scan( fn=recurrence, sequences=cnn_x_output, outputs_info=[self.hidden_lay0] ) h.name = 'h' self.user_feature = h[-1, :] # self.user_feature.name = 'user_feature' # Loss function if iscostplus: def cla_cost(x_t, h_t): s_tt = T.dot((x[x_t+1][:input_size[x_t+1]] - neg[x_t+1][:input_size[x_t+1]]), h_t) s_t = T.sum(T.log(1 + T.exp(-s_tt))) return s_t s, _ = theano.scan( fn=cla_cost, sequences=[T.arange(x.shape[0]-1), h] ) cost = T.sum(s) else: cost_temp = T.dot(x[-1][:input_size[-1]], h[-2]) - T.dot(neg[-1][:input_size[-1]], h[-2]) cost = T.sum(T.log(1 + T.exp(-cost_temp))) print "done" print "3. Random gradient descending update formula ......", learning_rate = T.dscalar('learning_rate') lamda = T.dscalar('lamda') gradient = T.grad(cost, self.param) updates = [(p, p - learning_rate * (g + p * lamda)) for p, g in zip(self.param, gradient)] print "done" print "4. Predictive function definition ..............", y_pred = T.argsort(T.dot(self.embedding, self.user_feature)) # quel -6 fa prendere la top 5 in ordine crescente self.predict = theano.function(inputs=[input_item_id, input_size], outputs=y_pred) print "done" print "5. Training function definition ..............", self.train = theano.function(inputs=[input_item_id, neg_item_id, input_size, learning_rate, lamda], outputs=cost, updates=updates) print "done" print "6. Evaluation function definition ..............", self.evaluation_recall_6 = theano.function(inputs=[input_item_id, input_size], outputs=y_pred) print "done" self.normalize = theano.function(inputs=[], updates={self.embedding:\ self.embedding/T.sqrt((self.embedding**2).sum(axis=1)).dimshuffle(0, 'x')*10})
batch_size = 1 # allocate symbolic variables for the data #index = T.lscalar() # index to a [mini]batch x = T.matrix('x') # the data is presented as rasterized images # Reshape matrix of rasterized images of shape (1, 50*50) # to a 4D tensor, compatible with our LeNetConvPoolLayer layer0_input = x.reshape((batch_size, 1, 50, 50)) # Construct the first convolutional pooling layer: # filtering reduces the image size to (28-5+1,28-5+1)=(24,24) # maxpooling reduces this further to (24/2,24/2) = (12,12) # 4D output tensor is thus of shape (batch_size,nkerns[0],12,12) layer0 = LeNetConvPoolLayer(rng, input=layer0_input, \ image_shape=(batch_size, 1, 50, 50), \ filter_shape=(nkerns[0], 1, 10, 10), poolsize=(2, 2) \ ) # Construct the second convolutional pooling layer # filtering reduces the image size to (12-5+1,12-5+1)=(8,8) # maxpooling reduces this further to (8/2,8/2) = (4,4) # 4D output tensor is thus of shape (nkerns[0],nkerns[1],4,4) layer1 = LeNetConvPoolLayer(rng, input=layer0.output, image_shape=(batch_size, nkerns[0], 20, 20), filter_shape=(nkerns[1], nkerns[0], 5, 5), poolsize=(2, 2) \ ) # the TanhLayer being fully-connected, it operates on 2D matrices of # shape (batch_size,num_pixels) (i.e matrix of rasterized images). # This will generate a matrix of shape (20,32*4*4) = (20,512) layer2_input = layer1.output.flatten(2)
def display(params, digit, epoch, mode = 'mat', size = (56, 56)): #epoch contains a list of numbers to show #for example, epoch = [0, 2, 4] can show epoch 0 (original stage) and epoch 2 4 #after running the CNN, params can be used directly, and can also use numpy.load('params.npy') to get #digit is a single digit of image set, for example, digit = train_set_x.get_value()[number] nkerns=[20, 50] rng = numpy.random.RandomState(23455) #show original digit if os.path.exists('digit') == 0: os.mkdir('digit') if mode == 'png': plt.figure(1) plt.gray() plt.axis('off') plt.imshow(digit.reshape(size)) plt.savefig('digit/activity of layer0 (original digit).png') digit = digit.reshape(1, 1, size[0], size[1]) inputdigit = T.tensor4() #building CNN with exactly the same parameters print '...building layer1' layer0_input = inputdigit layer0 = LeNetConvPoolLayer(rng, input=layer0_input, image_shape=(1, 1, size[0], size[1]), filter_shape=(nkerns[0], 1, 5, 5), poolsize=(2, 2)) print '...building layer2' layer1 = LeNetConvPoolLayer(rng, input=layer0.output, image_shape=(1, nkerns[0], (size[0] - 4) / 2, (size[1] - 4) / 2), filter_shape=(nkerns[1], nkerns[0], 5, 5), poolsize=(2, 2)) print '...building layer3' layer2_input = layer1.output.flatten(2) layer2 = HiddenLayer(rng, input=layer2_input, n_in=nkerns[1] * (size[0] / 4 - 3) * (size[1] / 4 - 3), n_out=500, activation=T.tanh) print '...building layer4' layer3 = LogisticRegression(input=layer2.output, n_in=500, n_out=10) f = theano.function(inputs = [inputdigit], outputs = [layer0.conv_out, layer0.output, layer1.conv_out, layer1.output, layer2.output, layer3.p_y_given_x, layer3.y_pred]) #export filters and activity in different epochs for num in epoch: print '...epoch ' + str(num) layer3.W.set_value(params[num][0]) layer3.b.set_value(params[num][1]) layer2.W.set_value(params[num][2]) layer2.b.set_value(params[num][3]) layer1.W.set_value(params[num][4]) layer1.b.set_value(params[num][5]) layer0.W.set_value(params[num][6]) layer0.b.set_value(params[num][7]) [conv0, output0, conv1, output1, output2, output3, y] = f(digit) if mode == 'png': plt.figure(2) plt.gray() for i in range(nkerns[0]): plt.subplot(4, 5, i + 1) plt.axis('off') plt.imshow(layer0.W.get_value()[i, 0]) plt.savefig('digit/filter of layer1 in epoch ' + str(num) + '.png') plt.figure(3) plt.gray() for i in range(nkerns[1]): plt.subplot(5, 10, i + 1) plt.axis('off') plt.imshow(layer1.W.get_value()[i, 0]) plt.savefig('digit/filter of layer2 in epoch ' + str(num) + '.png') plt.figure(4) plt.gray() plt.axis('off') plt.imshow(layer2.W.get_value()) plt.savefig('digit/filter of layer3 in epoch ' + str(num) + '.png') plt.figure(5) plt.gray() plt.axis('off') plt.imshow(layer3.W.get_value()) plt.savefig('digit/filter of layer4 in epoch ' + str(num) + '.png') plt.figure(6) plt.gray() for i in range(nkerns[0]): plt.subplot(4, 5, i + 1) plt.axis('off') plt.imshow(output0[0, i]) plt.savefig('digit/activity of layer1 after downsampling in epoch ' + str(num) + '.png') plt.figure(7) plt.gray() plt.axis('off') for i in range(nkerns[1]): plt.subplot(5, 10, i + 1) plt.axis('off') plt.imshow(conv1[0, i]) plt.savefig('digit/activity of layer2 before downsampling in epoch ' + str(num) + '.png') plt.figure(8) plt.gray() plt.axis('off') for i in range(nkerns[0]): plt.subplot(4, 5, i + 1) plt.axis('off') plt.imshow(conv0[0, i]) plt.savefig('digit/activity of layer1 before downsampling in epoch ' + str(num) + '.png') plt.figure(9) plt.gray() for i in range(nkerns[1]): plt.subplot(5, 10, i + 1) plt.axis('off') plt.imshow(output1[0, i]) plt.savefig('digit/activity of layer2 after downsampling in epoch ' + str(num) + '.png') plt.figure(10) plt.gray() plt.axis('off') plt.imshow(numpy.tile(output2, (10, 1))) plt.savefig('digit/activity of layer3 in epoch ' + str(num) + '.png') plt.figure(11) plt.gray() plt.axis('off') plt.imshow(numpy.tile(output3, (10, 1))) plt.savefig('digit/activity of layer4 in epoch ' + str(num) + '.png') if mode == 'mat': sio.savemat('digit in epoch ' + str(num) + '.mat', {'ActivityOfLayer0' : digit.reshape(size), 'ActivityOfLayer1before' : conv0[0], 'ActivityOfLayer1after' : output0[0], 'ActivityOfLayer2before' : conv1[0], 'ActivityOfLayer2after' : output1[0], 'ActivityOfLayer3' : output2, 'ActivityOfLayer4' : output3, 'FilterOfLayer1' : layer0.W.get_value()[:, 0, :, :], 'FilterOfLayer2' : layer1.W.get_value()[:, 0, :, :], 'FilterOfLayer3' : layer2.W.get_value(), 'FilterOfLayer4' : layer3.W.get_value(), 'y_predict' : y}) return y
def evaluate_lenet5(learning_rate=0.05, n_epochs=10, nkerns=[20, 50], batch_size=50): global train_dataset_route global valid_dataset_route global train_limit global valid_limit print train_dataset_route, type(train_dataset_route) """ Demonstrates lenet on MNIST dataset :type learning_rate: float :param learning_rate: learning rate used (factor for the stochastic gradient) :type n_epochs: int :param n_epochs: maximal number of epochs to run the optimizer :type nkerns: list of ints :param nkerns: number of kernels on each layer """ rng = numpy.random.RandomState(23455) datasets = load_data.load_spc_data(train_dataset_route, valid_dataset_route, train_limit, valid_limit) train_set_x, train_set_y = datasets[0] valid_set_x, valid_set_y = datasets[1] # compute number of minibatches for training, validation and testing n_train_batches = train_set_x.get_value(borrow=True).shape[0] n_valid_batches = valid_set_x.get_value(borrow=True).shape[0] n_train_batches /= batch_size n_valid_batches /= batch_size # allocate symbolic variables for the data index = T.lscalar() # index to a [mini]batch x = T.matrix('x') # the data is presented as rasterized images y = T.ivector('y') # the labels are presented as 1D vector of # [int] labels ishape = (100, 100) # this is the size of MNIST images ###################### # BUILD ACTUAL MODEL # ###################### print '... building the model' # Reshape matrix of rasterized images of shape (batch_size,28*28) # to a 4D tensor, compatible with our LeNetConvPoolLayer layer0_input = x.reshape((batch_size, 1, 100, 100)) # Construct the first convolutional pooling layer: # filtering reduces the image size to (28-5+1,28-5+1)=(24,24) # maxpooling reduces this further to (24/2,24/2) = (12,12) # 4D output tensor is thus of shape (batch_size,nkerns[0],12,12) layer0 = LeNetConvPoolLayer(rng, input=layer0_input, image_shape=(batch_size, 1, 100, 100), filter_shape=(nkerns[0], 1, 40, 40), poolsize=(2, 2)) # Construct the second convolutional pooling layer # filtering reduces the image size to (12-5+1,12-5+1)=(8,8) # maxpooling reduces this further to (8/2,8/2) = (4,4) # 4D output tensor is thus of shape (nkerns[0],nkerns[1],4,4) layer1 = LeNetConvPoolLayer(rng, input=layer0.output, image_shape=(batch_size, nkerns[0], 30, 30), filter_shape=(nkerns[1], nkerns[0], 15, 15), poolsize=(2, 2)) # the TanhLayer being fully-connected, it operates on 2D matrices of # shape (batch_size,num_pixels) (i.e matrix of rasterized images). # This will generate a matrix of shape (20,32*4*4) = (20,512) layer2_input = layer1.output.flatten(2) # construct a fully-connected sigmoidal layer layer2 = HiddenLayer(rng, input=layer2_input, n_in=nkerns[1] * 8 * 8, n_out=100, activation=T.tanh) # classify the values of the fully-connected sigmoidal layer layer3 = LogisticRegression(input=layer2.output, n_in=100, n_out=2) # the cost we minimize during training is the NLL of the model cost = layer3.negative_log_likelihood(y) # create a function to compute the mistakes that are made by the model ''' test_model = theano.function([index], layer3.errors(y), givens={ x: test_set_x[index * batch_size: (index + 1) * batch_size], y: test_set_y[index * batch_size: (index + 1) * batch_size]}) test_results = theano.function(inputs=[index], outputs= layer3.y_pred, givens={ x: test_set_x[index * batch_size: (index + 1) * batch_size], y: test_set_y[index * batch_size: (index + 1) * batch_size]}) ''' validate_model = theano.function([index], layer3.errors(y), givens={ x: valid_set_x[index * batch_size: (index + 1) * batch_size], y: valid_set_y[index * batch_size: (index + 1) * batch_size]}) # create a list of all model parameters to be fit by gradient descent params = layer3.params + layer2.params + layer1.params + layer0.params # create a list of gradients for all model parameters grads = T.grad(cost, params) # train_model is a function that updates the model parameters by # SGD Since this model has many parameters, it would be tedious to # manually create an update rule for each model parameter. We thus # create the updates list by automatically looping over all # (params[i],grads[i]) pairs. updates = [] for param_i, grad_i in zip(params, grads): updates.append((param_i, param_i - learning_rate * grad_i)) train_model = theano.function([index], cost, updates=updates, givens={ x: train_set_x[index * batch_size: (index + 1) * batch_size], y: train_set_y[index * batch_size: (index + 1) * batch_size]}) ############### # TRAIN MODEL # ############### print '... training' # early-stopping parameters patience = 10000 # look as this many examples regardless patience_increase = 2 # wait this much longer when a new best is # found improvement_threshold = 0.995 # a relative improvement of this much is # considered significant validation_frequency = min(n_train_batches, patience / 2) # go through this many # minibatche before checking the network # on the validation set; in this case we # check every epoch best_params = None best_validation_loss = numpy.inf best_iter = 0 test_score = 0. start_time = time.clock() epoch = 0 done_looping = False while (epoch < n_epochs) and (not done_looping): epoch = epoch + 1 for minibatch_index in xrange(n_train_batches): iter = (epoch - 1) * n_train_batches + minibatch_index if iter % 100 == 0: print 'training @ iter = ', iter , ' patience = ' , patience cost_ij = train_model(minibatch_index) if (iter + 1) % validation_frequency == 0: # compute zero-one loss on validation set validation_losses = [validate_model(i) for i in xrange(n_valid_batches)] this_validation_loss = numpy.mean(validation_losses) print('epoch %i, minibatch %i/%i, validation error %f %%' % \ (epoch, minibatch_index + 1, n_train_batches, \ this_validation_loss * 100.)) # if we got the best validation score until now if this_validation_loss < best_validation_loss: #improve patience if loss improvement is good enough if this_validation_loss < best_validation_loss * \ improvement_threshold: patience = max(patience, iter * patience_increase) # save best validation score and iteration number best_validation_loss = this_validation_loss best_iter = iter layer0_state = layer0.__getstate__() layer1_state = layer1.__getstate__() layer2_state = layer2.__getstate__() layer3_state = layer3.__getstate__() trained_model_list = [layer0_state, layer1_state, layer2_state, layer3_state] trained_model_array = numpy.asarray(trained_model_list) classifier_file = open(train_model_route, 'w') cPickle.dump([1,2,3], classifier_file, protocol=2) numpy.save(classifier_file, trained_model_array) classifier_file.close() if patience <= iter: done_looping = True print patience , iter break end_time = time.clock() print >> sys.stderr, ('The code for file ' + os.path.split(__file__)[1] + ' ran for %.2fm' % ((end_time - start_time) / 60.)) print('Optimization complete.') print('Best validation score of %f %% obtained at iteration %i,'\ 'with test performance %f %%' % (best_validation_loss * 100., best_iter + 1, test_score * 100.))
def create_cf_cnn(n_in, n_out, nkerns=[20, 50]): params_DB = shelve.open('params_cnn.dat') best_params = params_DB['params'] params_DB.close() x = T.vector('x') rng = numpy.random.RandomState(1234) # Reshape matrix of rasterized images of shape (batch_size,28*28) # to a 4D tensor, compatible with our LeNetConvPoolLayer layer0_input = x.reshape((1, 1, 30, 50)) # Construct the first convolutional pooling layer: # filtering reduces the image size to (28-5+1,28-5+1)=(24,24) # maxpooling reduces this further to (24/2,24/2) = (12,12) # 4D output tensor is thus of shape (batch_size,nkerns[0],12,12) layer0 = LeNetConvPoolLayer(rng, input=layer0_input, image_shape=(1, 1, 30, 50), filter_shape=(nkerns[0], 1, 5, 5), poolsize=(2, 2)) # Construct the second convolutional pooling layer # filtering reduces the image size to (12-5+1,12-5+1)=(8,8) # maxpooling reduces this further to (8/2,8/2) = (4,4) # 4D output tensor is thus of shape (nkerns[0],nkerns[1],4,4) layer1 = LeNetConvPoolLayer(rng, input=layer0.output, image_shape=(1, nkerns[0], 13, 23), filter_shape=(nkerns[1], nkerns[0], 5, 5), poolsize=(2, 2)) # the HiddenLayer being fully-connected, it operates on 2D matrices of # shape (batch_size,num_pixels) (i.e matrix of rasterized images). # This will generate a matrix of shape (20,32*4*4) = (20,512) layer2_input = layer1.output.flatten(2) # construct a fully-connected sigmoidal layer layer2 = HiddenLayer(rng, input=layer2_input, n_in=nkerns[1] * 4 * 9, n_out=500, activation=T.tanh) # classify the values of the fully-connected sigmoidal layer layer3 = LogisticRegression(input=layer2.output, n_in=500, n_out=2) params = layer3.params + layer2.params + layer1.params + layer0.params #print best_params[0].get_value() for i in range(len(best_params)): try: inc = T.vector('inc') setvalue = theano.function([inc], params[i], updates=[(params[i], inc)]) setvalue(best_params[i]) except: try: inc = T.matrix('inc') setvalue = theano.function([inc], params[i], updates=[(params[i], inc)]) setvalue(best_params[i]) except: inc = T.tensor4('inc') setvalue = theano.function([inc], params[i], updates=[(params[i], inc)]) setvalue(best_params[i]) #print classifier.params[0].get_value() #print classifier.logRegressionLayer.W.get_value() vect = T.vector('vect') cf = theano.function(inputs=[vect], outputs=layer3.p_y_given_x, givens={x: vect}) return cf
def evaluate_lenet5(dataset_route=DataHome+"DogVsCat_test_feature_2500.csv", \ nkerns=[20, 50], batch_size=5): """ Demonstrates lenet on MNIST dataset :type learning_rate: float :param learning_rate: learning rate used (factor for the stochastic gradient) :type n_epochs: int :param n_epochs: maximal number of epochs to run the optimizer :type dataset: string :param dataset: path to the dataset used for training /testing (MNIST here) :type nkerns: list of ints :param nkerns: number of kernels on each layer """ rng = numpy.random.RandomState(23455) trained_model_pkl = open(ModelHome + train_model_route, 'r') trained_model_state_list = cPickle.load(trained_model_pkl) trained_model_state_array = numpy.load(trained_model_pkl) layer0_state, layer1_state, layer2_state, layer3_state = trained_model_state_array test_set = tdtf.read_data_to_ndarray(dataset_route, limit=None, header_n=0) test_set_x, id_arr = test_set datasets = load_data.shared_dataset(test_set) test_set_x, test_set_y = datasets print test_set_x.shape, test_set_y.shape # compute number of minibatches for training, validation and testing n_test_batches = test_set_x.get_value(borrow=True).shape[0] n_test_batches /= batch_size # allocate symbolic variables for the data index = T.lscalar() # index to a [mini]batch x = T.matrix('x') # the data is presented as rasterized images y = T.ivector('y') # the labels are presented as 1D vector of # [int] labels ishape = (50, 50) # this is the size of MNIST images ###################### # BUILD ACTUAL MODEL # ###################### print '... building the model' # Reshape matrix of rasterized images of shape (batch_size,28*28) # to a 4D tensor, compatible with our LeNetConvPoolLayer layer0_input = x.reshape((batch_size, 1, 50, 50)) # Construct the first convolutional pooling layer: # filtering reduces the image size to (28-5+1,28-5+1)=(24,24) # maxpooling reduces this further to (24/2,24/2) = (12,12) # 4D output tensor is thus of shape (batch_size,nkerns[0],12,12) layer0 = LeNetConvPoolLayer(rng, input=layer0_input, \ image_shape=(batch_size, 1, 50, 50), \ filter_shape=(nkerns[0], 1, 10, 10), poolsize=(2, 2), \ W=layer0_state[0], b=layer0_state[1] \ ) # Construct the second convolutional pooling layer # filtering reduces the image size to (12-5+1,12-5+1)=(8,8) # maxpooling reduces this further to (8/2,8/2) = (4,4) # 4D output tensor is thus of shape (nkerns[0],nkerns[1],4,4) layer1 = LeNetConvPoolLayer(rng, input=layer0.output, image_shape=(batch_size, nkerns[0], 20, 20), filter_shape=(nkerns[1], nkerns[0], 5, 5), poolsize=(2, 2), \ W=layer1_state[0], b=layer1_state[1] \ ) # the TanhLayer being fully-connected, it operates on 2D matrices of # shape (batch_size,num_pixels) (i.e matrix of rasterized images). # This will generate a matrix of shape (20,32*4*4) = (20,512) layer2_input = layer1.output.flatten(2) # construct a fully-connected sigmoidal layer layer2 = HiddenLayer(rng, input=layer2_input, n_in=nkerns[1] * 8 * 8, n_out=100, activation=T.tanh,\ W=layer2_state[0], b=layer2_state[1] \ ) # classify the values of the fully-connected sigmoidal layer layer3 = LogisticRegression(input=layer2.output, n_in=100, n_out=2, \ W=layer3_state[0], b=layer3_state[1] \ ) print "predicting" start_time = time.clock() # create a function to compute the mistakes that are made by the model test_results = theano.function( inputs=[index], outputs=layer3.y_pred, givens={x: test_set_x[index * batch_size:(index + 1) * batch_size]}) test_res = [test_results(i) for i in xrange(n_test_batches)] print test_res id_l = [] label_l = [] index = 0 for arr in test_res: for label in arr: label_l.append(label) id_l.append(id_arr[index]) index += 1 tdtf.wr_to_csv(header=['id', 'label'], id_list=id_l, pred_list=label_l, filename=test_label_route) end_time = time.clock() print >> sys.stderr, ('The code for file ' + os.path.split(__file__)[1] + ' ran for %.2fm' % ((end_time - start_time) / 60.))
def train(learning_rate=0.1, n_epochs=10, kernel_shapes = [7,5], nkerns=[15,15], batch_size=1000, batch_type = 'fast', mynet = 'best', representation='raw', momentum=0, history=4): # TODO: implement history of boards rng = numpy.random.RandomState(42) trainP = 0.998 validP = 0.001 testP = 0.001 print "... Reading cached values ..." (trainCumLengths,validCumLengths,testCumLengths,filenames) = pickle.load(open("results/lengths.cache",'r')) print "... Getting filenames ..." datasetKGS = "../../go-data" datasetPro = "../../pro-GoGod" # use both datasets, test and valid set are only Pro games # fn1 = readGame.getFilenames(datasetKGS,1,0,1)[0] # random.shuffle(fn1) # fn2 = readGame.getFilenames(datasetPro,1,0,1)[0] # NOTE: last 5% of professional games never used! # fn2 = fn2[:int(len(fn2)*0.95)] # random.shuffle(fn2) # filenames = fn2 #fn1 + fn2 n = len(filenames) print "... Learning set contains " + str(n) + " games" print "... Computing cumulative game lengths ..." trainNames = filenames[:int(trainP*n)] validNames = filenames[int(trainP*n):int(trainP*n+validP*n)] testNames = filenames[int(trainP*n+validP*n):int(trainP*n+validP*n+testP*n)] # random.shuffle(trainNames) # trainCumLengths = readGame.getCumGameLengths(trainNames) # validCumLengths = readGame.getCumGameLengths(validNames) # testCumLengths = readGame.getCumGameLengths(testNames) # fw = open("results/"lengths.cache","wb") # pickle.dump((trainCumLengths,validCumLengths,testCumLengths,filenames),fw) # fw.close() print "... Preprocessing initial batches ..." minn = batch_size / 80 +1 temp = time.time() test_batch_x, test_batch_y = utils.shared_dataset(readGame.processSGFs(testNames[:minn],representation),batch_size=batch_size) train_batch_x, train_batch_y = utils.shared_dataset(readGame.processSGFs(trainNames[:minn],representation),batch_size=batch_size) valid_batch_x, valid_batch_y = utils.shared_dataset(readGame.processSGFs(validNames[:minn],representation),batch_size=batch_size) print " average processing time per game: " + str((time.time()-temp)/18.0) + " seconds, per epoch: " + str(int((time.time()-temp)/18*n/60/60)) + " hours" # compute number of minibatches for training, validation and testing n_train_batches = trainCumLengths[-1] n_valid_batches = validCumLengths[-1] n_test_batches = testCumLengths[-1] n_train_batches /= batch_size n_valid_batches /= batch_size n_test_batches /= batch_size # allocate symbolic variables for the data iteration = T.lscalar() # iteration number of a minibatch x = T.matrix('x') # the data is presented as rasterized images y = T.ivector('y') # the labels are presented as 1D vector of # [int] labels gs = 19 # size of the go board ishape = (gs, gs) # this is the size of MNIST images fw = open("results/"+mynet+"_"+str(learning_rate)+"_"+str(nkerns[0])+".res","w") ###################### # BUILD ACTUAL MODEL # ###################### print '... Building the model ...' nc = 2 if representation=='raw' else 6 # if raw nc *= 1+history if mynet == "default": # default is 7x7, regular 3 kernels layer0_input = x.reshape((batch_size, nc, gs, gs)) layer0 = LeNetConvPoolLayer(rng, input=layer0_input, image_shape=(batch_size, nc, gs, gs), filter_shape=(nkerns[0], nc, 7, 7), poolsize=(1, 1)) layer2_input = layer0.output.flatten(2) layer2 = HiddenLayer(rng, input=layer2_input, n_in=nkerns[0] * 13 * 13, n_out=500, activation=T.tanh) layer3 = LogisticRegression(input=layer2.output, n_in=500, n_out=361) cost = layer3.negative_log_likelihood(y) # prevGrads = [theano.shared(numpy.zeros((500,361),dtype=theano.config.floatX),borrow=True), # theano.shared(numpy.zeros((361,),dtype=theano.config.floatX),borrow=True), # theano.shared(numpy.zeros((nkerns[0] *13*13,500), dtype=theano.config.floatX),borrow=True), # theano.shared(numpy.zeros((500,),dtype=theano.config.floatX),borrow=True), # theano.shared(numpy.zeros((nkerns[0],nc,7,7),dtype=theano.config.floatX),borrow=True), # theano.shared(numpy.zeros((nkerns[0],),dtype=theano.config.floatX),borrow=True), # ] params = layer3.params + layer2.params + layer0.params if mynet == "best": ks = kernel_shapes sp1= gs-ks[0]+1 sp2= sp1-ks[1]+1 layer0_input = x.reshape((batch_size, nc, gs, gs)) layer0 = LeNetConvPoolLayer(rng, input=layer0_input, image_shape=(batch_size, nc, gs, gs), filter_shape=(nkerns[0], nc, ks[0], ks[0]), poolsize=(1, 1)) layer1 = LeNetConvPoolLayer(rng, input=layer0.output, image_shape=(batch_size, nkerns[0], sp1, sp1), filter_shape=(nkerns[1], nkerns[0], ks[1], ks[1]), poolsize=(1, 1)) layer3 = LogisticRegression(input=layer1.output.flatten(2), n_in=nkerns[1]*sp2*sp2, n_out=gs*gs) cost = layer3.negative_log_likelihood(y) prevGrads = [theano.shared(numpy.zeros((nkerns[1]*9*9,361),dtype=theano.config.floatX),borrow=True), theano.shared(numpy.zeros((gs*gs,),dtype=theano.config.floatX),borrow=True), theano.shared(numpy.zeros((nkerns[0],nkerns[1],ks[1],ks[1]), dtype=theano.config.floatX),borrow=True), theano.shared(numpy.zeros((nkerns[1],),dtype=theano.config.floatX),borrow=True), theano.shared(numpy.zeros((nkerns[0],nc,ks[0],ks[0]),dtype=theano.config.floatX),borrow=True), theano.shared(numpy.zeros((nkerns[0],),dtype=theano.config.floatX),borrow=True), ] params = layer3.params + layer1.params + layer0.params if mynet == "padded": # TODO: add zero padding test deeper architectures ks = kernel_shapes sp1= gs-ks[0]+1 sp2= sp1-ks[1]+1 layer0_input = x.reshape((batch_size, nc, gs, gs)) layer0 = LeNetConvPoolLayer(rng, input=layer0_input, image_shape=(batch_size, nc, gs, gs), filter_shape=(nkerns[0], nc, ks[0], ks[0]), poolsize=(1, 1)) layer1 = LeNetConvPoolLayer(rng, input=layer0.output, image_shape=(batch_size, nkerns[0], sp1, sp1), filter_shape=(nkerns[1], nkerns[0], ks[1], ks[1]), poolsize=(1, 1)) layer3 = LogisticRegression(input=layer1.output.flatten(2), n_in=nkerns[1]*sp2*sp2, n_out=gs*gs) cost = layer3.negative_log_likelihood(y) params = layer3.params + layer1.params + layer0.params # create a function to compute the mistakes that are made by the model test_model = theano.function([], layer3.errors(y), givens={ x: test_batch_x, y: T.cast(test_batch_y, 'int32')}) validate_model = theano.function([], layer3.errors(y), givens={ x: valid_batch_x, y: T.cast(valid_batch_y, 'int32')}) predictions = theano.function([], layer3.get_predictions(), givens={ x: valid_batch_x}) conditional_dist = theano.function([], layer3.get_conditional_dist(), givens={ x: valid_batch_x}) # create a list of gradients for all model parameters grads = T.grad(cost, params) # train_model is a function that updates the model parameters by # SGD Since this model has many parameters, it would be tedious to # manually create an update rule for each model parameter. We thus # create the updates list by automatically looping over all # (params[i],grads[i]) pairs. updates = [] #adjusted_rate = learning_rate - iteration*(learning_rate/(float(n_epochs) * n_train_batches)) adjusted_rate = learning_rate if T.lt(iteration,3000*200) else 0.1*learning_rate for param_i, grad_i in zip(params, grads):#, prev_grad_i , prevGrads): updates.append((param_i, param_i - adjusted_rate * grad_i))# - momentum * prev_grad_i)) #for i,grad in enumerate(grads): # updates.append((prevGrads[i], grad)) train_model = theano.function([iteration], cost, updates=updates, givens={ x: train_batch_x, y: T.cast(train_batch_y, 'int32')},on_unused_input='ignore') ############### # TRAIN MODEL # ############### print '... Training ...' # early-stopping parameters patience = 10000 # look as this many examples regardless patience_increase = 2 # wait this much longer when a new best is # found improvement_threshold = 0.999 # a relative improvement of this much is # considered significant validation_frequency = 10000 # min(n_train_batches, patience / 2) # go through this many # minibatche before checking the network # on the validation set; in this case we # check every epoch best_params = None best_validation_loss = numpy.inf best_iter = 0 test_score = 0. start_time = time.clock() epoch = 0 done_looping = False stime = time.time() while (epoch < n_epochs) and (not done_looping): epoch = epoch + 1 for minibatch_index in xrange(n_train_batches): iter = (epoch - 1) * n_train_batches + minibatch_index if iter % 1000 == 0: print 'training @ iter = ', iter pickle.dump((updates,cost,layer0,layer1,layer3,test_model,predictions,conditional_dist),open("results/"+str(batch_size)+representation+str(history)+".model","w")) if iter ==5: print 'estimated train time per epoch = '+ str((time.time() - stime) * n_train_batches/60.0/iter/60.0) + " hours" ax,ay = getBatch(trainNames, minibatch_index, trainCumLengths, batch_size,representation,batchType=batch_type,history=history) train_batch_x.set_value(ax) train_batch_y.set_value(ay) cost_ij = train_model(iter) if (iter + 1) % validation_frequency == 0 or iter==5: # compute zero-one loss on validation set validation_losses = [] for i in xrange(n_valid_batches): vx,vy = getBatch(validNames, i, validCumLengths, batch_size,representation,batchType='fast',history=history) valid_batch_x.set_value(vx) valid_batch_y.set_value(vy) validation_losses.append(validate_model()) this_validation_loss = numpy.mean(validation_losses) print('epoch %i, minibatch %i/%i, validation error %f %%' % \ (epoch, minibatch_index + 1, n_train_batches, \ this_validation_loss * 100.)) # if we got the best validation score until now if this_validation_loss < best_validation_loss: #improve patience if loss improvement is good enough if this_validation_loss < best_validation_loss * \ improvement_threshold: patience = max(patience, iter * patience_increase) # save best validation score and iteration number best_validation_loss = this_validation_loss best_iter = iter # test it on the test set test_losses=[] for i in xrange(n_test_batches): tx,ty = getBatch(testNames, i, testCumLengths, batch_size,representation,batchType='fast',history=history) test_batch_x.set_value(tx) test_batch_y.set_value(ty) test_losses.append(test_model()) test_score = numpy.mean(test_losses) print((' epoch %i, minibatch %i/%i, test error of best ' 'model %f %%') % (epoch, minibatch_index + 1, n_train_batches, test_score * 100.)) fw.write("Epoch "+str(epoch) + ": " +str((1-this_validation_loss)*100.)+ "%\n") pickle.dump((updates,cost,layer0,layer1,layer3,test_model,predictions,conditional_dist),open("results/"+str(batch_size)+representation+str(history)+".model","w")) #if patience <= iter: # done_looping = True # break fw.close() end_time = time.clock() print('Optimization complete.') print('Best validation score of %f %% obtained at iteration %i,'\ 'with test performance %f %%' % (best_validation_loss * 100., best_iter + 1, test_score * 100.)) print >> sys.stderr, ('The code for file ' + os.path.split(__file__)[1] + ' ran for %.2fm' % ((end_time - start_time) / 60.))
# allocate symbolic variables for the data index = T.lscalar() # index to a [mini]batch x = T.matrix('x') # the data is presented as rasterized images y = T.ivector('y') # the labels are presented as 1D vector of # [int] labels # Reshape matrix of rasterized images of shape (1, 50*50) # to a 4D tensor, compatible with our LeNetConvPoolLayer layer0_input = x.reshape((batch_size, 1, layer0_input_img_size[0], layer0_input_img_size[1])) # Construct the first convolutional pooling layer: # filtering reduces the image size to (28-5+1,28-5+1)=(24,24) # maxpooling reduces this further to (24/2,24/2) = (12,12) # 4D output tensor is thus of shape (batch_size,nkerns[0],12,12) layer0 = LeNetConvPoolLayer(rng, input=layer0_input, \ image_shape=(batch_size, 1, layer0_input_img_size[0], layer0_input_img_size[1]), \ filter_shape=(nkerns[0], 1, filter0_shape[0], filter0_shape[1]), poolsize=(2, 2) \ ) # Construct the second convolutional pooling layer # filtering reduces the image size to (12-5+1,12-5+1)=(8,8) # maxpooling reduces this further to (8/2,8/2) = (4,4) # 4D output tensor is thus of shape (nkerns[0],nkerns[1],4,4) layer1 = LeNetConvPoolLayer(rng, input=layer0.output, image_shape=(batch_size, nkerns[0], layer1_input_img_size[0], layer1_input_img_size[1]), filter_shape=(nkerns[1], nkerns[0], filter1_shape[0], filter1_shape[1]), poolsize=(2, 2) \ ) # the TanhLayer being fully-connected, it operates on 2D matrices of # shape (batch_size,num_pixels) (i.e matrix of rasterized images). # This will generate a matrix of shape (20,32*4*4) = (20,512) layer2_input = layer1.output.flatten(2)
def load_trained_model(): global if_load_trained_model global train_model_route global layer0_input global layer0 global layer1 global layer2_input global layer2 global layer3 global test_results global layer0_input_img_size # ishape global filter0_shape global layer1_input_img_size global filter1_shape global layer2_input_img_size global layer2_out if_load_trained_model = True print "loading trained model for the first time" trained_model_pkl = open(train_model_route, 'r') trained_model_state_list = cPickle.load(trained_model_pkl) trained_model_state_array = numpy.load(trained_model_pkl) layer0_state, layer1_state, layer2_state, layer3_state = trained_model_state_array ###################### # BUILD ACTUAL MODEL # ###################### print '... loading the model' # Reshape matrix of rasterized images of shape (1, 50*50) # to a 4D tensor, compatible with our LeNetConvPoolLayer layer0_input = x.reshape((batch_size, 1, layer0_input_img_size[0], layer0_input_img_size[1])) # Construct the first convolutional pooling layer: # filtering reduces the image size to (28-5+1,28-5+1)=(24,24) # maxpooling reduces this further to (24/2,24/2) = (12,12) # 4D output tensor is thus of shape (batch_size,nkerns[0],12,12) layer0 = LeNetConvPoolLayer(rng, input=layer0_input, \ image_shape=(batch_size, 1, layer0_input_img_size[0], layer0_input_img_size[1]), \ filter_shape=(nkerns[0], 1, filter0_shape[0], filter0_shape[1]), poolsize=(2, 2), \ W=layer0_state[0], b=layer0_state[1] \ ) # Construct the second convolutional pooling layer # filtering reduces the image size to (12-5+1,12-5+1)=(8,8) # maxpooling reduces this further to (8/2,8/2) = (4,4) # 4D output tensor is thus of shape (nkerns[0],nkerns[1],4,4) layer1 = LeNetConvPoolLayer(rng, input=layer0.output, image_shape=(batch_size, nkerns[0], layer1_input_img_size[0], layer1_input_img_size[1]), filter_shape=(nkerns[1], nkerns[0], filter1_shape[0], filter1_shape[1]), poolsize=(2, 2), \ W=layer1_state[0], b=layer1_state[1] \ ) # the TanhLayer being fully-connected, it operates on 2D matrices of # shape (batch_size,num_pixels) (i.e matrix of rasterized images). # This will generate a matrix of shape (20,32*4*4) = (20,512) layer2_input = layer1.output.flatten(2) # construct a fully-connected sigmoidal layer layer2 = HiddenLayer(rng, input=layer2_input, n_in=nkerns[1] * layer2_input_img_size[0] * layer2_input_img_size[1], n_out=layer2_out, activation=T.tanh, \ W=layer2_state[0], b=layer2_state[1] \ ) # classify the values of the fully-connected sigmoidal layer layer3 = LogisticRegression(input=layer2.output, n_in=layer2_out, n_out=N_OUT, \ W=layer3_state[0], b=layer3_state[1] \ ) test_results = theano.function(inputs=[x], \ outputs= layer3.y_pred)
###################### print '... building the model' # Reshape matrix of rasterized images of shape (batch_size, 64 * 64) # to a 4D tensor, compatible with our LeNetConvPoolLayer # (28, 28) is the size of MNIST images. layer0_input = x.reshape((batch_size, 1, 64,64)) # Construct the first convolutional pooling layer: # filtering reduces the image size to (64-5+1 , 64-5+1) = (60, 60) # maxpooling reduces this further to (60/2, 60/2) = (30, 30) # 4D output tensor is thus of shape (batch_size, nkerns[0], 12, 12) layer0 = LeNetConvPoolLayer( rng, input=layer0_input, image_shape=(batch_size, 1, 64, 64), filter_shape=(nkerns[0], 1, 5, 5), poolsize=(2, 2) ) # Construct the second convolutional pooling layer # filtering reduces the image size to (30-5+1, 30-5+1) = (26, 26) # maxpooling reduces this further to (26/2, 26/2) = (13,13) # 4D output tensor is thus of shape (nkerns[0], nkerns[1], 13, 13) layer1 = LeNetConvPoolLayer( rng, input=layer0.output, image_shape=(batch_size, nkerns[0], 30, 30), filter_shape=(nkerns[1], nkerns[0], 5, 5), poolsize=(2, 2) )