def test_RMSLE(): A=np.ones((2,1)) B=np.ones((2,1)) A[0,0]=2 A[1,0]=2 y,y_pred,yy=load_data.shared_dataset(A,B,sample_size=2) cost=cnn.RMSLE(y,y_pred) assert (cost.eval() - np.sqrt(( np.log(2) - np.log(3) )**2 ) < 0.001 )
def test_MSE(): A=np.zeros((2,1)) B=np.zeros((2,1)) A[0,0]=1 A[1,0]=1 y,y_pred,yy=load_data.shared_dataset(A,B,sample_size=2) cost=cnn.MSE(y,y_pred) assert (cost.eval() < 1.0001) assert (cost.eval() > 0.9999)
def test_shared_dataset(): # Below AB is not tested, it returns list of input arrays A = np.ones((2, 2)) B = np.ones((2, 1)) C = np.ones((2, )) As, Bs, AB = load_data.shared_dataset(A, B, sample_size=2) As, Cs, AC = load_data.shared_dataset(A, B, sample_size=2) same = True for i in range(2): for j in range(2): if (np.abs(A[i, j] - As.get_value(borrow=True)[i, j]) > 0.01): same = False for i in range(2): if (np.abs(B[i, 0] - Bs.get_value(borrow=True)[i, 0]) > 0.01): same = False for i in range(2): if (np.abs(C[i] - Cs.get_value(borrow=True)[i]) > 0.01): same = False assert same
def TrainCNN(): # Training, validation and test data valid_set_x, valid_set_y, valid_set = load_data.shared_dataset( datapar.Xval, datapar.Yval, sample_size=hyppar.Nval) train_set_x, train_set_y, train_set = load_data.shared_dataset( datapar.Xtrain, datapar.Ytrain, sample_size=hyppar.Ntrain) test_set_x = load_data.shared_testset(datapar.Xtest) # Hyperparameters learning_rate = hyppar.learning_rate num_epochs = hyppar.Nepoch num_filters = hyppar.Nchannel mini_batch_size = hyppar.mbs reg = hyppar.reg # Random set for following activations rset = rd.sample(range(valid_set_x.get_value(borrow=True).shape[0]), mini_batch_size) print(rset) # Seeding the random number generator rng = np.random.RandomState(23455) # Computing number of mini-batches n_train_batches = train_set_x.get_value(borrow=True).shape[0] n_valid_batches = valid_set_x.get_value(borrow=True).shape[0] n_test_batches = test_set_x.get_value(borrow=True).shape[0] n_train_batches //= mini_batch_size n_valid_batches //= mini_batch_size n_test_batches //= mini_batch_size print('train: %d batches, validation: %d batches' % (n_train_batches, n_valid_batches)) # mini-batch index mb_index = T.lscalar() # Coulomb matrices ( mini_batch_size x 80 x 80 matrix) x = T.matrix('x') # Target energies (1 x mini_batch_size) y = T.matrix('y') print('***** Constructing model ***** ') # Reshaping tensor of mini_batch_size set of images into a # 4-D tensor of dimensions: mini_batch_size x 1 x 80 x 80 layer0_input = x.reshape((mini_batch_size, 1, 80, 80)) # Define the CNN function E_pred, cn_output, params = CNNStructure(layer0_input, mini_batch_size, rng) # Cost that is minimised during stochastic descent. Includes regularization cost = cnn.MSE(y, E_pred) L2_reg = 0 for i in range(len(params)): L2_reg = L2_reg + T.mean(T.sqr(params[i][0])) cost = cost + reg * L2_reg # Creates a Theano function that computes the mistakes on the validation set. # This performs validation. # Note: the givens parameter allows us to separate the description of the # Theano model from the exact definition of the inputs variable. The 'key' # that is passed to the graph is subsituted with the data from the givens # parameter. In this demo we built the model with a regular Theano tensor # and we use givens to speed up the GPU. We swap the input index with a # slice corresponding to the mini-batch of the dataset to use. # mb_index is the mini_batch_index valid_model = theano.function( [mb_index], cost, givens={ x: valid_set_x[mb_index * mini_batch_size:(mb_index + 1) * mini_batch_size], y: valid_set_y[mb_index * mini_batch_size:(mb_index + 1) * mini_batch_size] }) predict = theano.function( [mb_index], E_pred, givens={ x: valid_set_x[mb_index * mini_batch_size:(mb_index + 1) * mini_batch_size] }) test_model = theano.function( [mb_index], E_pred, givens={ x: test_set_x[mb_index * mini_batch_size:(mb_index + 1) * mini_batch_size] }) get_activations = theano.function([], cn_output, givens={x: valid_set_x[rset]}) # Creates a function that updates the model parameters by SGD. # The updates list is created by looping over all # (params[i], grads[i]) pairs. updates = cnn.gradient_updates_Adam(cost, params, learning_rate) # Create a Theano function to train our convolutional neural network. train_model = theano.function( [mb_index], cost, updates=updates, givens={ x: train_set_x[mb_index * mini_batch_size:(mb_index + 1) * mini_batch_size], y: train_set_y[mb_index * mini_batch_size:(mb_index + 1) * mini_batch_size] }) iter = 0 epoch = 0 cost_ij = 0 valid_losses = [valid_model(i) for i in range(n_valid_batches)] valid_score = np.mean(valid_losses) train_error = [] valid_error = [] statistics.saveParameters(params) # This is where we call the previously defined Theano functions. print('***** Training model *****') while (epoch < num_epochs): epoch = epoch + 1 for minibatch_index in range(n_train_batches): # Compute number of iterations performed or total number # of mini-batches executed. iter = (epoch - 1) * n_train_batches + minibatch_index # Perform the training of our convolution neural network. # Obtain the cost of each minibatch specified using the # minibatch_index. cost_ij = train_model(minibatch_index) if iter % 10 == 0: statistics.saveParameters(params) if iter % 2 == 0: activations = get_activations() statistics.saveActivations(activations) # Save training error train_error.append(float(cost_ij)) valid_losses = [valid_model(i) for i in range(n_valid_batches)] # Compute the mean prediction error across all the mini-batches. valid_score = np.mean(valid_losses) # Save validation error valid_error.append(valid_score) print("Iteration: " + str(iter + 1) + "/" + str(num_epochs * n_train_batches) + ", training error: " + str(cost_ij) + ", validation error: " + str(valid_score)) if (iter % 20 == 0): # Get predicted energies from validation set E = np.zeros((n_valid_batches * mini_batch_size, 1)) step = 0 for i in range(n_valid_batches): buf = predict(i) for j in range(mini_batch_size): E[step, 0] = buf[j] step = step + 1 np.savetxt('output/E_pred_' + str(iter) + '.txt', E) # Predict energies for test set E_test = np.zeros((n_test_batches * mini_batch_size, 1)) step = 0 for i in range(n_test_batches): buf = test_model(i) for j in range(mini_batch_size): E_test[step, 0] = buf[j] step = step + 1 statistics.writeActivations() # Return values: statistics.saveParameters(params) statistics.writeParameters()
def cnnText(initial_learning_rate=0.0001, initial_momentum=0.5, n_epochs=100, dataset='mnist.pkl.gz', nkerns=[64, 64], batch_size=5): """ :type learning_rate: float :param learning_rate: learning rate used (factor for the stochastic gradient) :type n_epochs: int :param n_epochs: maximal number of epochs to run the optimizer :type dataset: string :param dataset: path to the dataset used for training /testing (MNIST here) :type nkerns: list of ints :param nkerns: number of kernels on each layer """ rng = numpy.random.RandomState(23455) print 'loading data' datasets = load_ag_news() print 'finished' train_x, train_y = datasets[0] #valid_set_x, valid_set_y = datasets[1] test_x, test_y = datasets[1] train_set_x, train_set_y = shared_dataset([train_x, train_y]) test_set_x, test_set_y = shared_dataset([test_x, test_y]) # compute number of minibatches for training, validation and testing n_train_batches = train_set_x.get_value(borrow=True).shape[0] / batch_size #n_train_batches = 2000 / batch_size #n_valid_batches = valid_set_x.get_value(borrow=True).shape[0] / batch_size #n_valid_batches = 1000 / batch_size n_test_batches = test_set_x.get_value(borrow=True).shape[0] / batch_size #n_test_batches = 1000 / batch_size # allocate symbolic variables for the data index = T.lscalar() # index to a [mini]batch # start-snippet-1 x = T.matrix('x') # the data is presented as rasterized images y = T.ivector('y') # the labels are presented as 1D vector of # [int] labels is_train = T.iscalar( 'is_train' ) # pseudo boolean for switching between training and prediction print '... building the model' # Reshape matrix of rasterized images of shape (batch_size, 28 * 28) # to a 4D tensor, compatible with our LeNetConvPoolLayer # (28, 28) is the size of MNIST images. total_len = 231 print 'total_len = ', total_len layer0_input = x.reshape((batch_size, 1, total_len, 70)) layer_conv0 = ConvLayer(rng, input=layer0_input, image_shape=(batch_size, 1, total_len, 70), filter_shape=(256, 1, 7, 1)) layer_pool0 = PoolLayer(input=layer_conv0.output, poolsize=(3, 1)) layer_conv1 = ConvLayer(rng, input=layer_pool0.output, image_shape=(batch_size, 256, (total_len - 6) / 3, 70), filter_shape=(256, 256, 7, 1)) layer_pool1 = PoolLayer(input=layer_conv1.output, poolsize=(3, 1)) layer_conv20 = ConvLayer(rng, input=layer_pool1.output, image_shape=(batch_size, 256, ((total_len - 6) / 3 - 6) / 3, 70), filter_shape=(256, 256, 3, 1)) layer_conv21 = ConvLayer(rng, input=layer_conv20.output, image_shape=(batch_size, 256, ((total_len - 6) / 3 - 6) / 3 - 2, 70), filter_shape=(256, 256, 3, 1)) layer_conv22 = ConvLayer(rng, input=layer_conv21.output, image_shape=(batch_size, 256, ((total_len - 6) / 3 - 6) / 3 - 4, 70), filter_shape=(256, 256, 3, 1)) layer_conv23 = ConvLayer(rng, input=layer_conv22.output, image_shape=(batch_size, 256, ((total_len - 6) / 3 - 6) / 3 - 6, 70), filter_shape=(256, 256, 3, 1)) layer_pool2 = PoolLayer(input=layer_conv23.output, poolsize=(3, 1)) layer_fc0_input = layer_pool2.output.flatten(2) layer_fc0 = fcLayer(rng, is_train=is_train, input=layer_fc0_input, n_in=(((total_len - 6) / 3 - 6) / 3 - 8) / 3 * 70 * 256, n_out=1024, activation=ReLu, dropout_rate=0.5) layer_fc1 = fcLayer(rng, is_train=is_train, input=layer_fc0.output, n_in=1024, n_out=1024, activation=ReLu, dropout_rate=0.5) # classify the values of the fully-connected sigmoidal layer layer_softmax = LogisticRegression(input=layer_fc1.output, n_in=1024, n_out=4) # the cost we minimize during training is the NLL of the model cost = layer_softmax.negative_log_likelihood(y) # create a function to compute the mistakes that are made by the model test_model = theano.function( [index], layer_softmax.errors(y), givens={ x: test_set_x[index * batch_size:(index + 1) * batch_size], y: test_set_y[index * batch_size:(index + 1) * batch_size], is_train: numpy.cast['int32'](0) }) ''' validate_model = theano.function( [index], layer_softmax.errors(y), givens={ x: valid_set_x[index * batch_size: (index + 1) * batch_size], y: valid_set_y[index * batch_size: (index + 1) * batch_size], is_train: numpy.cast['int32'](0) } ) ''' # create a list of all model parameters to be fit by gradient descent params = layer_conv0.params + layer_conv1.params + layer_conv20.params + layer_conv21.params + layer_conv22.params + layer_conv23.params\ + layer_fc0.params + layer_fc1.params + layer_softmax.params learning_rate = theano.shared( numpy.cast[theano.config.floatX](initial_learning_rate)) initial_learning_rate_val = initial_learning_rate # momentum method assert initial_momentum >= 0. and initial_momentum < 1. momentum = theano.shared( numpy.cast[theano.config.floatX](initial_momentum), name='momentum') updates = [] for param in params: param_update = theano.shared(param.get_value() * numpy.cast[theano.config.floatX](0.)) updates.append((param, param - learning_rate * param_update)) updates.append((param_update, momentum * param_update + (numpy.cast[theano.config.floatX](1.) - momentum) * T.grad(cost, param))) train_model = theano.function( [index], layer_softmax.errors(y), updates=updates, givens={ x: train_set_x[index * batch_size:(index + 1) * batch_size], y: train_set_y[index * batch_size:(index + 1) * batch_size], is_train: numpy.cast['int32'](1) }) print '... training' # early-stopping parameters patience = 10000 # look as this many examples regardless patience_increase = 2 # wait this much longer when a new best is # found improvement_threshold = 0.995 # a relative improvement of this much is # considered significant # go through this many # minibatche before checking the network # on the validation set; in this case we # check every epoch best_validation_loss = numpy.inf best_iter = 0 test_score = 0. start_time = time.clock() epoch = 0 done_looping = False train = [] valid = [] test = [] while (epoch < n_epochs) and (not done_looping): epoch = epoch + 1 train_losses = 0. for minibatch_index in xrange(n_train_batches): iter = (epoch - 1) * n_train_batches + minibatch_index train_losses = train_losses + train_model(minibatch_index) # test it on the test set test_losses = [test_model(i) for i in xrange(n_test_batches)] test_score = numpy.mean(test_losses) print((' epoch %i, minibatch %i/%i, test error of ' ' model %f %%') % (epoch, minibatch_index + 1, n_train_batches, test_score * 100.)) test.append((epoch, minibatch_index + 1, n_train_batches, test_score * 100.)) # print('epoch %i, training error %f %%' % (epoch, train_losses * 100. / n_train_batches)) train.append(train_losses * 100) if momentum.get_value() < 0.99: new_momentum = 1. - (1. - momentum.get_value()) * 0.98 momentum.set_value(numpy.cast[theano.config.floatX](new_momentum)) # adaption of learning rate new_learning_rate = learning_rate.get_value() * 0.985 #new_learning_rate = initial_learning_rate_val*1.1 /(1+0.1*epoch) #get from tutorial p48 learning_rate.set_value( numpy.cast[theano.config.floatX](new_learning_rate)) end_time = time.clock() print('Optimization complete.') print( 'Best validation score of %f %% obtained at iteration %i, ' 'with test performance %f %%' % (best_validation_loss * 100., best_iter + 1, test_score * 100.)) print >> sys.stderr, ('The code for file ' + os.path.split(__file__)[1] + ' ran for %.2fm' % ((end_time - start_time) / 60.)) print train print test f = file('log_BN1_wb.txt', 'wb') cPickle.dump((train, test), f) f.close() f1 = file('log_BN1_w.txt', 'b') cPickle.dump((train, test), f1) f1.close() ''' params = [layer_conv0.W.get_value(), layer_conv0.b.get_value(), layer_conv1.W.get_value(), layer_conv1.b.get_value(), layer_conv2.W.get_value(), layer_conv2.b.get_value(), layer_conv3.W.get_value(), layer_conv3.b.get_value(), layer_fc0.W.get_value(), layer_fc0.b.get_value(), layer_fc1.W.get_value(), layer_fc1.b.get_value(), layer_softmax.W.get_value(), layer_softmax.b.get_value(), random_l.W.get_value()] ''' f = file('bn0.save', 'wb') cPickle.dump(params, f) spath = 'mao_drop.txt' fii = open(spath, "w") cPickle.dump(best_validation_loss, fii) cPickle.dump(best_iter + 1, fii) cPickle.dump(test_score * 100., fii) cPickle.dump((end_time - start_time) / 60., fii) f.close()
def evaluate(init_learning_rate=0.1, n_epochs=200, datasets='Trace' ,nkerns=[256, 256], n_train_batch=10, trans='euc', active_func=T.tanh, window_size = 0.2, ada_flag = False, pool_factor = 2, slice_ratio = 1 ): rng = numpy.random.RandomState(23455) #set random seed learning_rate = theano.shared(numpy.asarray(init_learning_rate,dtype=theano.config.floatX)) #used for learning_rate decay train_set_x, train_set_y = datasets[0] valid_set_x, valid_set_y = datasets[1] test_set_x, test_set_y = datasets[2] ori_len = datasets[3] slice_ratio = datasets[4] valid_num = valid_set_x.shape[0] increase_num = ori_len - int(ori_len * slice_ratio) + 1 #this can be used as the bath size print "increase factor is ", increase_num, ', ori len', ori_len valid_num_batch = valid_num / increase_num test_num = test_set_x.shape[0] test_num_batch = test_num / increase_num length_train = train_set_x.shape[1] #length after slicing. num_of_categories = int(train_set_y.max()) + 1 window_size = int(length_train * window_size) if window_size < 1 else int(window_size) #*******set up the ma and ds********# ma_base,ma_step,ma_num = 5, 6, 0 ds_base,ds_step, ds_num = 2, 1, 4 ds_num_max = length_train / (pool_factor * window_size) ds_num = min(ds_num, ds_num_max) #*******set up the ma and ds********# (ma_train, ma_valid, ma_test , ma_lengths) = batch_movingavrg(train_set_x, valid_set_x, test_set_x, ma_base, ma_step, ma_num) (ds_train, ds_valid, ds_test , ds_lengths) = batch_downsample(train_set_x, valid_set_x, test_set_x, ds_base, ds_step, ds_num) #concatenate directly data_lengths = [length_train] #downsample part: if ds_lengths != []: data_lengths += ds_lengths train_set_x = numpy.concatenate([train_set_x, ds_train], axis = 1) valid_set_x = numpy.concatenate([valid_set_x, ds_valid], axis = 1) test_set_x = numpy.concatenate([test_set_x, ds_test], axis = 1) #moving average part if ma_lengths != []: data_lengths += ma_lengths train_set_x = numpy.concatenate([train_set_x, ma_train], axis = 1) valid_set_x = numpy.concatenate([valid_set_x, ma_valid], axis = 1) test_set_x = numpy.concatenate([test_set_x, ma_test], axis = 1) train_set_x, train_set_y = shared_dataset(train_set_x, train_set_y) valid_set_x = shared_data_x(valid_set_x) test_set_x = shared_data_x(test_set_x) #compute number of minibatches for training, validation and testing n_train_size = train_set_x.get_value(borrow=True).shape[0] n_valid_size = valid_set_x.get_value(borrow=True).shape[0] n_test_size = test_set_x.get_value(borrow=True).shape[0] batch_size = n_train_size / n_train_batch n_train_batches = n_train_size / batch_size data_dim = train_set_x.get_value(borrow=True).shape[1] print 'train size', n_train_size, ',valid size', n_valid_size, ' test size', n_test_size print 'batch size ', batch_size print 'n_train_batches is ', n_train_batches print 'data dim is ', data_dim print '---------------------------' # allocate symbolic variables for the data index = T.lscalar('index') # index to a [mini]batch # start-snippet-1 x = T.matrix('x') y = T.ivector('y') x_vote = T.matrix('xvote') # the data is presented as rasterized images #y_vote = T.ivector('y_vote') # the labels are presented as 1D vector of ###################### # BUILD ACTUAL MODEL # ###################### print 'building the model...' # Reshape matrix of rasterized images of shape (batch_size, 28 * 28) # to a 4D tensor, compatible with our LeNetConvPoolLayer # (28, 28) is the size of MNIST images. layer0_input = [] inputs = x.reshape((batch_size, 1, data_dim, 1)) layer0_input_vote = [] inputs_vote = x_vote.reshape((increase_num, 1, data_dim, 1)) ind = 0 for i in xrange(len(data_lengths)): layer0_input.append(inputs[:,:,ind : ind + data_lengths[i],:]) layer0_input_vote.append(inputs_vote[:,:,ind : ind + data_lengths[i],:]) ind += data_lengths[i] layer0 = [] layer0_vote = [] feature_map_size = 0 for i in xrange(len(layer0_input)): pool_size = (data_lengths[i] - window_size + 1) / pool_factor feature_map_size += (data_lengths[i] - window_size + 1) / pool_size layer0.append(ShapeletPoolLayer( numpy.random.RandomState(23455 + i), input=layer0_input[i], image_shape=(batch_size, 1, data_lengths[i], 1), filter_shape=(nkerns[0], 1, window_size, 1), poolsize=(pool_size , 1), trans = trans, active_func=active_func )) layer0_vote.append(ShapeletPoolLayer( numpy.random.RandomState(23455 + i), input=layer0_input_vote[i], image_shape=(increase_num, 1, data_lengths[i], 1), filter_shape=(nkerns[0], 1, window_size, 1), poolsize=(pool_size , 1), W = layer0[i].W, trans = trans, active_func=active_func )) layer1_input = layer0[0].output.flatten(2) layer1_vote_input = layer0_vote[0].output.flatten(2) for i in xrange(1, len(data_lengths)): layer1_input = T.concatenate([layer1_input, layer0[i].output.flatten(2)], axis = 1) layer1_vote_input = T.concatenate([layer1_vote_input, layer0_vote[i].output.flatten(2)], axis = 1) # construct a fully-connected sigmoidal layer layer1 = HiddenLayer( rng, input=layer1_input, n_in=nkerns[0] * feature_map_size, n_out=nkerns[1], activation=active_func, previous_layer = None ) # construct a fully-connected sigmoidal layer for prediction layer1_vote = HiddenLayer( rng, input=layer1_vote_input, n_in=nkerns[0] * feature_map_size, n_out=nkerns[1], activation=active_func, previous_layer = None, W = layer1.W, b = layer1.b ) # classify the values of the fully-connected sigmoidal layer layer2 = LogisticRegression(input=layer1.output, n_in=nkerns[1], n_out= num_of_categories , previous_layer = None) layer2_vote = LogisticRegressionVote(input=layer1_vote.output, n_in=nkerns[1], n_out= num_of_categories , previous_layer = None, W = layer2.W, b = layer2.b) # the cost we minimize during training is the NLL of the model cost = layer2.negative_log_likelihood(y) # create a function to compute the mistakes that are made by the model test_model = theano.function( [index], layer2_vote.prediction(), givens={ x_vote : test_set_x[index * (increase_num) : (index + 1) * (increase_num)] } ) # function for validation set. Return the prediction value validate_model = theano.function( [index], layer2_vote.prediction(), givens={ x_vote : valid_set_x[index * (increase_num) : (index + 1) * (increase_num)] } ) # create a list of all model parameters to be fit by gradient descent params = layer2.params + layer1.params for i in xrange(len(layer0_input)): params += layer0[i].params # Adagradient part grads = T.grad(cost, params) import copy G = [] for i in xrange(2 + len(layer0_input)): G.append( theano.shared( numpy.zeros(params[i].shape.eval(), dtype=theano.config.floatX ), borrow=True )) # parameter update methods if ada_flag == True: updates = [ (param_i, param_i - learning_rate * (grad_i / (T.sqrt(G_i) + 1e-5) )) for param_i, grad_i, G_i in zip(params, grads, G) ] else: updates = [ (param_i, param_i - learning_rate * grad_i ) for param_i, grad_i in zip(params, grads) ] update_G = theano.function(inputs=[index], outputs = G, updates=[(G_i, G_i + T.sqr(grad_i) ) for G_i, grad_i in zip(G,grads)], givens={ x: train_set_x[index * batch_size:(index + 1) * batch_size], y: train_set_y[index * batch_size:(index + 1) * batch_size] } ) reset_G = theano.function(inputs=[index],outputs = G, updates=[(G_i, grad_i - grad_i) for G_i, grad_i in zip(G,grads)], givens={ x: train_set_x[index * batch_size:(index + 1) * batch_size], y: train_set_y[index * batch_size:(index + 1) * batch_size] } ) #Our training function, return value: NLL cost and training error train_model = theano.function( [index], [cost, layer2.errors(y)], updates=updates, givens={ x: train_set_x[index * batch_size: (index + 1) * batch_size], y: train_set_y[index * batch_size: (index + 1) * batch_size] } ) decrease_learning_rate = theano.function(inputs=[], outputs = learning_rate, updates={learning_rate: learning_rate * 1e-4}) ############### # TRAIN MODEL # ############### print 'training...' # early-stopping parameters patience = 10000 # look as this many examples regardless patience_increase = 2 # wait this much longer when a new best is # found improvement_threshold = 0.995 # a relative improvement of this much is # considered significant validation_frequency = min(n_train_batches, patience / 2) # go through this many # minibatche before checking the network # on the validation set; in this case we # check every epoch best_validation_loss = numpy.inf best_iter = 0 best_test_iter = 0 best_test_loss = numpy.inf test_patience = 200 valid_loss = 0. test_loss = 0. start_time = time.clock() epoch = 0 done_looping = False last_train_err = 1 last_avg_err = float('inf') first_layer_prev = 0 num_no_update_epoch = 0 epoch_avg_cost = float('inf') epoch_avg_err = float('inf') while (epoch < n_epochs) and (not done_looping): epoch = epoch + 1 epoch_train_err = 0. epoch_cost = 0. if ada_flag: reset_G(0) num_no_update_epoch += 1 if num_no_update_epoch == 500: break for minibatch_index in xrange(n_train_batches): iteration = (epoch - 1) * n_train_batches + minibatch_index if ada_flag: update_G(minibatch_index) [cost_ij,train_err] = train_model(minibatch_index) epoch_train_err = epoch_train_err + train_err epoch_cost = epoch_cost + cost_ij if (iteration + 1) % validation_frequency == 0: # compute zero-one loss on validation set # validation set loss valid_results = [validate_model(i) for i in xrange(valid_num_batch)] valid_losses = [] for i in xrange(valid_num_batch): y_pred = valid_results[i] label = valid_set_y[i * increase_num] unique_value, sub_ind, correspond_ind, count = numpy.unique(y_pred, True, True, True) unique_value = unique_value.tolist() curr_err = 1. if label in unique_value: target_ind = unique_value.index(label) count = count.tolist() sorted_count = sorted(count) if count[target_ind] == sorted_count[-1]: if len(sorted_count) > 1 and sorted_count[-1] == sorted_count[-2]: curr_err = 0.5 #tie else: curr_err = 0. valid_losses.append(curr_err) valid_loss = sum(valid_losses) / float(len(valid_losses)) print('...epoch %i, valid err: %.5f |' % (epoch, valid_loss)), # if we got the best validation score until now if valid_loss <= best_validation_loss: num_no_update_epoch = 0 #improve patience if loss improvement is good enough if valid_loss < best_validation_loss * \ improvement_threshold: patience = max(patience, iteration * patience_increase) # save best validation score and iteration number best_validation_loss = valid_loss best_iter = iteration # test it on the test set test_results = [test_model(i) for i in xrange(test_num_batch)] test_losses = [] for i in xrange(test_num_batch): y_pred = test_results[i] label = test_set_y[i * increase_num] unique_value, sub_ind, correspond_ind, count = numpy.unique(y_pred, True, True, True) unique_value = unique_value.tolist() curr_err = 1 if label in unique_value: target_ind = unique_value.index(label) count = count.tolist() sorted_count = sorted(count) if count[target_ind] == sorted_count[-1]: if len(sorted_count) > 1 and sorted_count[-1] == sorted_count[-2]: curr_err = 0.5 # tie else: curr_err = 0. test_losses.append(curr_err) test_loss = sum(test_losses) / float(len(test_losses)) print(('test err: %.5f |') % (test_loss)), best_test_loss = test_loss test_patience = 200 #test_patience -= 1 #if test_patience <= 0: # break if patience <= iteration: done_looping = True break epoch_avg_cost = epoch_cost/n_train_batches epoch_avg_err = epoch_train_err/n_train_batches #curr_lr = decrease_learning_rate() last_avg_err = epoch_avg_cost print ('train err %.5f, cost %.4f' %(epoch_avg_err,epoch_avg_cost)) if epoch_avg_cost == 0: break end_time = time.clock() print('Optimization complete.') print('Best validation score of %f %% obtained at iteration %i, ' 'with test error: %f %%' % (best_validation_loss * 100., best_iter + 1, best_test_loss * 100.)) print('The code for file ' + os.path.split(__file__)[1] + ' ran for %.2fm' % ((end_time - start_time) / 60.)) return best_validation_loss
def train_by_lenet5(tr_start_index, tr_limit, vl_start_index, vl_limit, output_filename="tmp.file", learning_rate=0.13, n_epochs=5000): global train_dataset_route global valid_dataset_route output_file = open(output_filename, 'w') print train_dataset_route, type(train_dataset_route) """ :type learning_rate: float :param learning_rate: learning rate used (factor for the stochastic gradient) :type n_epochs: int :param n_epochs: maximal number of epochs to run the optimizer :type nkerns: list of ints :param nkerns: number of kernels on each layer """ train_set = tdtf.read_data_patch_to_ndarray(train_dataset_route, tr_start_index, tr_limit) datasets = load_data.shared_dataset(train_set) train_set_x, train_set_y = datasets valid_set = tdtf.read_data_patch_to_ndarray(valid_dataset_route, vl_start_index, vl_limit) print valid_set[1] datasets = load_data.shared_dataset(valid_set) valid_set_x, valid_set_y = datasets # compute number of minibatches for training, validation and testing n_train_batches = train_set_x.get_value(borrow=True).shape[0] n_valid_batches = valid_set_x.get_value(borrow=True).shape[0] n_train_batches /= batch_size n_valid_batches /= batch_size # allocate symbolic variables for the data ###################### # BUILD ACTUAL MODEL # ###################### print '... building the model' # the cost we minimize during training is the NLL of the model cost = layer3.negative_log_likelihood(y) # create a function to compute the mistakes that are made by the model validate_model = theano.function([index], layer3.errors(y), givens={ x: valid_set_x[index * batch_size: (index + 1) * batch_size], y: valid_set_y[index * batch_size: (index + 1) * batch_size]}) # create a list of all model parameters to be fit by gradient descent params = layer3.params + layer2.params + layer1.params + layer0.params # create a list of gradients for all model parameters grads = T.grad(cost, params) # train_model is a function that updates the model parameters by # SGD Since this model has many parameters, it would be tedious to # manually create an update rule for each model parameter. We thus # create the updates list by automatically looping over all # (params[i],grads[i]) pairs. updates = [] for param_i, grad_i in zip(params, grads): updates.append((param_i, param_i - learning_rate * grad_i)) train_model = theano.function([index], [cost, layer3.errors(y), layer3.params[0][0][0]], updates=updates, givens={ x: train_set_x[index * batch_size: (index + 1) * batch_size], y: train_set_y[index * batch_size: (index + 1) * batch_size]}) ############### # TRAIN MODEL # ############### print '... training' # early-stopping parameters patience = 50000 # look as this many examples regardless patience_increase = 2 # wait this much longer when a new best is # found improvement_threshold = 0.995 # a relative improvement of this much is # considered significant validation_frequency = min(n_train_batches, patience / 2) # go through this many # minibatche before checking the network # on the validation set; in this case we # check every epoch best_params = None best_validation_loss = numpy.inf best_iter = 0 test_score = 0. start_time = time.clock() epoch = 0 done_looping = False min_train_cost = 10000 decreasing_num = 0 last_train_err = 1 last_train_cost = 1 while (epoch < n_epochs) and (not done_looping): epoch = epoch + 1 for minibatch_index in xrange(n_train_batches): iter = (epoch - 1) * n_train_batches + minibatch_index if iter % 100 == 0: print 'training @ iter = ', iter , ' patience = ' , patience cost_ij, train_err, par = train_model(minibatch_index) decreasing_rate = (last_train_err - train_err) / (last_train_err) * 100. last_train_err = train_err if last_train_err == 0: last_train_err += 0.0000001 c_d_rate = (last_train_cost - cost_ij) / (last_train_cost) * 100. last_train_cost = cost_ij print >> output_file, ('epoch %i, minibatch %i/%i, train_cost %f , train_error %.2f %%, decreasing rate %f %%, cost_decreasing rate %f %%, W00 ' % \ (epoch, minibatch_index + 1, n_train_batches, cost_ij, train_err* 100. ,decreasing_rate ,c_d_rate)) #print layer1.params[0:1][0][0:3] #print layer2.params[0:1][0][0:3] if cost_ij < min_train_cost: decreasing_num = 0 min_train_cost = cost_ij layer0_state = layer0.__getstate__() layer1_state = layer1.__getstate__() layer2_state = layer2.__getstate__() layer3_state = layer3.__getstate__() trained_model_list = [layer0_state, layer1_state, layer2_state, layer3_state] trained_model_array = numpy.asarray(trained_model_list) classifier_file = open(train_model_route, 'w') cPickle.dump([1,2,3], classifier_file, protocol=2) numpy.save(classifier_file, trained_model_array) classifier_file.close() else: print "decreasing" decreasing_num += 1 if decreasing_num > 100: done_looping = True break if (iter + 1) % validation_frequency == 0: # compute zero-one loss on validation set validation_losses = [validate_model(i) for i in xrange(n_valid_batches)] this_validation_loss = numpy.mean(validation_losses) print >> output_file, ('epoch %i, minibatch %i/%i, validation error %f %%' % \ (epoch, minibatch_index + 1, n_train_batches, \ this_validation_loss * 100.)) # if we got the best validation score until now if this_validation_loss < best_validation_loss: #improve patience if loss improvement is good enough if this_validation_loss < best_validation_loss * \ improvement_threshold: patience = max(patience, iter * patience_increase) # save best validation score and iteration number best_validation_loss = this_validation_loss best_iter = iter if patience <= iter: done_looping = True print patience , iter break end_time = time.clock() print >> sys.stderr, ('The code for file ' + os.path.split(__file__)[1] + ' ran for %.2fm' % ((end_time - start_time) / 60.)) print >> output_file, ('Optimization complete.') print >> output_file, ('Best validation score of %f %% obtained at iteration %i,'\ 'with test performance %f %%' % (best_validation_loss * 100., best_iter + 1, test_score * 100.)) output_file.close()
def fit(self, X, Y): # Create a report to be saved at the end of execution # (when running on the remote server) if self.do_report: report = {"learning_rate":self.learning_rate, "training_epochs":self.training_epochs, "batch_size":self.batch_size, "n_chains":self.n_chains, "n_samples":self.n_samples, "n_hidden":self.n_hidden, "k":self.k, "costs":np.zeros(self.training_epochs), # "accuracy":np.zeros(self.training_epochs), "pretraining_time":0} train_data = np.hstack([Y,X]) n_visible = train_data.shape[1] # Building of theano format datasets train_set = shared_dataset(train_data) # compute number of minibatches for training, validation and testing n_train_batches = train_set.get_value(borrow=True).shape[0] / \ self.batch_size # allocate symbolic variables for the data index = T.lscalar() # index to a [mini]batch x = T.matrix('x') # the data rng = np.random.RandomState(123) theano_rng = RandomStreams(rng.randint(2 ** 30)) # initialize storage for the persistent chain (state = hidden # layer of chain) persistent_chain = theano.shared(np.zeros((self.batch_size, self.n_hidden), dtype=theano.config.floatX), borrow=True) # construct the RBM class self.rbm = RBM(input=x, n_visible=n_visible, n_labels=self.n_labels, n_hidden=self.n_hidden, np_rng=rng, theano_rng=theano_rng) # get the cost and the gradient corresponding to one step of CD-k cost, updates = self.rbm.get_cost_updates(lr=self.learning_rate, persistent=persistent_chain, k=self.k) # accuracy = self.rbm.get_cv_error() #%%==================================================================== # Training the RBM #====================================================================== # it is ok for a theano function to have no output # the purpose of train_rbm is solely to update the RBM parameters train_rbm = theano.function( [index], cost, updates=updates, givens={ x: train_set[index * self.batch_size: \ (index + 1) * self.batch_size] }, name='train_rbm' ) start_time = timeit.default_timer() max_score = -np.inf argmax_score = RBM(input=x, n_visible=n_visible, n_labels=self.n_labels, n_hidden=self.n_hidden, np_rng=rng, theano_rng=theano_rng) # count = 0 ## go through training epochs for epoch in xrange(self.training_epochs): # go through the training set mean_cost = [] for batch_index in xrange(n_train_batches): mean_cost += [train_rbm(batch_index)] print 'Training epoch %d, cost is ' % epoch, np.mean(mean_cost) score = np.mean(mean_cost) if score>max_score: max_score = score argmax_score.clone(self.rbm) # acc = accuracy.eval() # # if self.scoring=='cost': # score = np.mean(mean_cost) # elif self.scoring=='accuracy': # score = acc # else: # raise Warning('''scoring must be cost or accuracy, # set to accuracy''') # score = acc # # if score>max_score: # max_score = score # argmax_score.clone(self.rbm) # count = 0 # else: # count += 1 # # if count>2: # break if self.do_report: report["costs"][epoch] = np.mean(mean_cost) # report["accuracy"][epoch] = acc end_time = timeit.default_timer() pretraining_time = (end_time - start_time) report['pretraining_time'] = pretraining_time self.rbm = argmax_score if self.do_report: try: np.save(self.report_folder+'/'+self.report_name, report) except OSError: os.mkdir(self.report_folder) np.save(self.report_folder+'/'+self.report_name, report)
def evaluate_lenet5(dataset_route=DataHome+"DogVsCat_test_feature_2500.csv", \ nkerns=[20, 50], batch_size=5): """ Demonstrates lenet on MNIST dataset :type learning_rate: float :param learning_rate: learning rate used (factor for the stochastic gradient) :type n_epochs: int :param n_epochs: maximal number of epochs to run the optimizer :type dataset: string :param dataset: path to the dataset used for training /testing (MNIST here) :type nkerns: list of ints :param nkerns: number of kernels on each layer """ rng = numpy.random.RandomState(23455) trained_model_pkl = open(ModelHome + train_model_route, 'r') trained_model_state_list = cPickle.load(trained_model_pkl) trained_model_state_array = numpy.load(trained_model_pkl) layer0_state, layer1_state, layer2_state, layer3_state = trained_model_state_array test_set = tdtf.read_data_to_ndarray(dataset_route, limit=None, header_n=0) test_set_x, id_arr = test_set datasets = load_data.shared_dataset(test_set) test_set_x, test_set_y = datasets print test_set_x.shape, test_set_y.shape # compute number of minibatches for training, validation and testing n_test_batches = test_set_x.get_value(borrow=True).shape[0] n_test_batches /= batch_size # allocate symbolic variables for the data index = T.lscalar() # index to a [mini]batch x = T.matrix('x') # the data is presented as rasterized images y = T.ivector('y') # the labels are presented as 1D vector of # [int] labels ishape = (50, 50) # this is the size of MNIST images ###################### # BUILD ACTUAL MODEL # ###################### print '... building the model' # Reshape matrix of rasterized images of shape (batch_size,28*28) # to a 4D tensor, compatible with our LeNetConvPoolLayer layer0_input = x.reshape((batch_size, 1, 50, 50)) # Construct the first convolutional pooling layer: # filtering reduces the image size to (28-5+1,28-5+1)=(24,24) # maxpooling reduces this further to (24/2,24/2) = (12,12) # 4D output tensor is thus of shape (batch_size,nkerns[0],12,12) layer0 = LeNetConvPoolLayer(rng, input=layer0_input, \ image_shape=(batch_size, 1, 50, 50), \ filter_shape=(nkerns[0], 1, 10, 10), poolsize=(2, 2), \ W=layer0_state[0], b=layer0_state[1] \ ) # Construct the second convolutional pooling layer # filtering reduces the image size to (12-5+1,12-5+1)=(8,8) # maxpooling reduces this further to (8/2,8/2) = (4,4) # 4D output tensor is thus of shape (nkerns[0],nkerns[1],4,4) layer1 = LeNetConvPoolLayer(rng, input=layer0.output, image_shape=(batch_size, nkerns[0], 20, 20), filter_shape=(nkerns[1], nkerns[0], 5, 5), poolsize=(2, 2), \ W=layer1_state[0], b=layer1_state[1] \ ) # the TanhLayer being fully-connected, it operates on 2D matrices of # shape (batch_size,num_pixels) (i.e matrix of rasterized images). # This will generate a matrix of shape (20,32*4*4) = (20,512) layer2_input = layer1.output.flatten(2) # construct a fully-connected sigmoidal layer layer2 = HiddenLayer(rng, input=layer2_input, n_in=nkerns[1] * 8 * 8, n_out=100, activation=T.tanh,\ W=layer2_state[0], b=layer2_state[1] \ ) # classify the values of the fully-connected sigmoidal layer layer3 = LogisticRegression(input=layer2.output, n_in=100, n_out=2, \ W=layer3_state[0], b=layer3_state[1] \ ) print "predicting" start_time = time.clock() # create a function to compute the mistakes that are made by the model test_results = theano.function( inputs=[index], outputs=layer3.y_pred, givens={x: test_set_x[index * batch_size:(index + 1) * batch_size]}) test_res = [test_results(i) for i in xrange(n_test_batches)] print test_res id_l = [] label_l = [] index = 0 for arr in test_res: for label in arr: label_l.append(label) id_l.append(id_arr[index]) index += 1 tdtf.wr_to_csv(header=['id', 'label'], id_list=id_l, pred_list=label_l, filename=test_label_route) end_time = time.clock() print >> sys.stderr, ('The code for file ' + os.path.split(__file__)[1] + ' ran for %.2fm' % ((end_time - start_time) / 60.))
def evaluate_lenet5(dataset_route=DataHome+"DogVsCat_test_feature_2500.csv", \ nkerns=[20, 50], batch_size=5): """ Demonstrates lenet on MNIST dataset :type learning_rate: float :param learning_rate: learning rate used (factor for the stochastic gradient) :type n_epochs: int :param n_epochs: maximal number of epochs to run the optimizer :type dataset: string :param dataset: path to the dataset used for training /testing (MNIST here) :type nkerns: list of ints :param nkerns: number of kernels on each layer """ rng = numpy.random.RandomState(23455) trained_model_pkl = open(ModelHome + train_model_route, 'r') trained_model_state_list = cPickle.load(trained_model_pkl) trained_model_state_array = numpy.load(trained_model_pkl) layer0_state, layer1_state, layer2_state, layer3_state = trained_model_state_array test_set = tdtf.read_data_to_ndarray(dataset_route, limit=None, header_n=0) test_set_x, id_arr = test_set datasets = load_data.shared_dataset(test_set) test_set_x, test_set_y = datasets print test_set_x.shape, test_set_y.shape # compute number of minibatches for training, validation and testing n_test_batches = test_set_x.get_value(borrow=True).shape[0] n_test_batches /= batch_size # allocate symbolic variables for the data index = T.lscalar() # index to a [mini]batch x = T.matrix('x') # the data is presented as rasterized images y = T.ivector('y') # the labels are presented as 1D vector of # [int] labels ishape = (50, 50) # this is the size of MNIST images ###################### # BUILD ACTUAL MODEL # ###################### print '... building the model' # Reshape matrix of rasterized images of shape (batch_size,28*28) # to a 4D tensor, compatible with our LeNetConvPoolLayer layer0_input = x.reshape((batch_size, 1, 50, 50)) # Construct the first convolutional pooling layer: # filtering reduces the image size to (28-5+1,28-5+1)=(24,24) # maxpooling reduces this further to (24/2,24/2) = (12,12) # 4D output tensor is thus of shape (batch_size,nkerns[0],12,12) layer0 = LeNetConvPoolLayer(rng, input=layer0_input, \ image_shape=(batch_size, 1, 50, 50), \ filter_shape=(nkerns[0], 1, 10, 10), poolsize=(2, 2), \ W=layer0_state[0], b=layer0_state[1] \ ) # Construct the second convolutional pooling layer # filtering reduces the image size to (12-5+1,12-5+1)=(8,8) # maxpooling reduces this further to (8/2,8/2) = (4,4) # 4D output tensor is thus of shape (nkerns[0],nkerns[1],4,4) layer1 = LeNetConvPoolLayer(rng, input=layer0.output, image_shape=(batch_size, nkerns[0], 20, 20), filter_shape=(nkerns[1], nkerns[0], 5, 5), poolsize=(2, 2), \ W=layer1_state[0], b=layer1_state[1] \ ) # the TanhLayer being fully-connected, it operates on 2D matrices of # shape (batch_size,num_pixels) (i.e matrix of rasterized images). # This will generate a matrix of shape (20,32*4*4) = (20,512) layer2_input = layer1.output.flatten(2) # construct a fully-connected sigmoidal layer layer2 = HiddenLayer(rng, input=layer2_input, n_in=nkerns[1] * 8 * 8, n_out=100, activation=T.tanh,\ W=layer2_state[0], b=layer2_state[1] \ ) # classify the values of the fully-connected sigmoidal layer layer3 = LogisticRegression(input=layer2.output, n_in=100, n_out=2, \ W=layer3_state[0], b=layer3_state[1] \ ) print "predicting" start_time = time.clock() # create a function to compute the mistakes that are made by the model test_results = theano.function(inputs=[index], outputs= layer3.y_pred, givens={ x: test_set_x[index * batch_size: (index + 1) * batch_size]}) test_res = [test_results(i) for i in xrange(n_test_batches)] print test_res id_l = [] label_l = [] index = 0 for arr in test_res: for label in arr: label_l.append(label) id_l.append(id_arr[index]) index += 1 tdtf.wr_to_csv(header=['id','label'], id_list=id_l, pred_list=label_l, filename=test_label_route) end_time = time.clock() print >> sys.stderr, ('The code for file ' + os.path.split(__file__)[1] + ' ran for %.2fm' % ((end_time - start_time) / 60.))
def visualize_MISTtraining(): ''' A function to demonstrate how convolutional and fully connected layers are used to train CNN to learn to label MNIST digits. Same function is used in testing, without any output. Downloads data from online, if mnist zip file is dot present. More plotting features and such should be included, now only terminal output. Benchmark error on test set with current settings: 0.0445 ''' dataset = 'mnist.pkl.gz' data_dir, data_file = os.path.split(dataset) rd.seed(23455) # Check if data file present if data_dir == "" and not os.path.isfile(dataset): new_path = os.path.join('', dataset) if os.path.isfile(new_path) or data_file == 'mnist.pkl.gz': dataset = new_path # Download the file from MILA if not present if (not os.path.isfile(dataset)) and data_file == 'mnist.pkl.gz': from six.moves import urllib origin = ( 'http://www.iro.umontreal.ca/~lisa/deep/data/mnist/mnist.pkl.gz') print('Downloading data from %s' % origin) urllib.request.urlretrieve(origin, dataset) print('***** Loading data *****') # Open the file with gzip.open(dataset, 'rb') as f: try: train_set, valid_set, test_set = pickle.load(f, encoding='latin1') except: train_set, valid_set, test_set = pickle.load(f) train_x, train_y = train_set valid_x, valid_y = valid_set test_x, test_y = test_set # Load data into tensors train_size = 6000 test_set_x, test_set_y_float, test_set = load_data.shared_dataset( test_x, test_y, sample_size=train_size // 3) valid_set_x, valid_set_y_float, valid_set = load_data.shared_dataset( valid_x, valid_y, sample_size=train_size // 3) train_set_x, train_set_y_float, train_set = load_data.shared_dataset( train_x, train_y, sample_size=train_size) train_set_y = T.cast(train_set_y_float, 'int32') valid_set_y = T.cast(valid_set_y_float, 'int32') test_set_y = T.cast(test_set_y_float, 'int32') # Training set dimension: 6000 x 784 print('Training set: %d samples' % (train_set_x.get_value(borrow=True).shape[0])) # Test set dimension: 2000 x 784 print('Test set: %d samples' % (test_set_x.get_value(borrow=True).shape[0])) # Validation set dimension: 2000 x 784 print('Validation set: %d samples' % (valid_set_x.get_value(borrow=True).shape[0])) print('The training set looks like this: ') print(train_set[0]) print('The labels looks like this:') print(train_set[1]) # set learning rate used for Stochastic Gradient Descent learning_rate = 0.005 # set number of training epochs num_epochs = 4 # set number of kernels for each convolution layer # for e.g. 2 layers - [20, 50]. layer1 = 20, layer2 = 50 num_filters = [9] # set mini-batch size to be used mini_batch_size = 50 # Seeding the random number generator rng = np.random.RandomState(23455) # Computing number of mini-batches n_train_batches = train_set_x.get_value(borrow=True).shape[0] n_valid_batches = valid_set_x.get_value(borrow=True).shape[0] n_test_batches = test_set_x.get_value(borrow=True).shape[0] n_train_batches //= mini_batch_size n_valid_batches //= mini_batch_size n_test_batches //= mini_batch_size print('train: %d batches, test: %d batches, validation: %d batches' % (n_train_batches, n_test_batches, n_valid_batches)) # mini-batch index mb_index = T.lscalar() # rasterised images x = T.matrix('x') # image labels y = T.ivector('y') print('***** Constructing model ***** ') # Reshaping matrix of mini_batch_size set of images into a # 4-D tensor of dimensions: mini_batch_size x 1 x 28 x 28 layer0_input = x.reshape((mini_batch_size, 1, 28, 28)) # First convolution and pooling layer # 4D output tensor is of shape: # mini_batch_size x 9 x 12 x 12 [layer0_output, layer0_params] = cnn.convLayer(rng, data_input=layer0_input, image_spec=(mini_batch_size, 1, 28, 28), filter_spec=(num_filters[0], 1, 5, 5), pool_size=(2, 2), activation=T.tanh) # Flatten the output into dimensions: # mini_batch_size x 1296 fc_layer_input = layer0_output.flatten(2) # The fully connected layer operates on a matrix of # dimensions: mini_batch_size x 1296 # It clasifies the values using the softmax function. [y_lin, fc_layer_params ] = cnn.fullyConnectedLayer(rng, data_input=fc_layer_input, num_in=num_filters[0] * 12 * 12, num_out=10) # The likelihood of the categories p_y_given_x = T.nnet.softmax(y_lin) # Predictions y_pred = T.argmax(p_y_given_x, axis=1) # Cost that is minimised during stochastic descent. cost = negative_log_lik(y=y, p_y_given_x=p_y_given_x) # Creates a Theano function that computes the mistakes on the validation set. # This performs validation. # Note: the givens parameter allows us to separate the description of the # Theano model from the exact definition of the inputs variable. The 'key' # that is passed to the graph is subsituted with the data from the givens # parameter. In this demo we built the model with a regular Theano tensor # and we use givens to speed up the GPU. We swap the input index with a # slice corresponding to the mini-batch of the dataset to use. # mb_index is the mini_batch_index valid_model = theano.function( [mb_index], errors(y, y_pred), givens={ x: valid_set_x[mb_index * mini_batch_size:(mb_index + 1) * mini_batch_size], y: valid_set_y[mb_index * mini_batch_size:(mb_index + 1) * mini_batch_size] }) # Create a Theano function that computes the mistakes on the test set. # This evaluated our model's accuracy. test_model = theano.function( [mb_index], errors(y, y_pred), givens={ x: test_set_x[mb_index * mini_batch_size:(mb_index + 1) * mini_batch_size], y: test_set_y[mb_index * mini_batch_size:(mb_index + 1) * mini_batch_size] }) # List of parameters to be fit during training params = fc_layer_params + layer0_params # Creates a list of gradients grads = T.grad(cost, params) # Creates a function that updates the model parameters by SGD. # The updates list is created by looping over all # (params[i], grads[i]) pairs. #updates = [(param_i, param_i - learning_rate * grad_i) # for param_i, grad_i in zip(params, grads)] updates = cnn.gradient_updates_Adam(cost, params, learning_rate) # Create a Theano function to train our convolutional neural network. train_model = theano.function( [mb_index], cost, updates=updates, givens={ x: train_set_x[mb_index * mini_batch_size:(mb_index + 1) * mini_batch_size], y: train_set_y[mb_index * mini_batch_size:(mb_index + 1) * mini_batch_size] }) # Some code to help with the plotting. # You don't need to go through the plotting # code in detail. iter = 0 epoch = 0 cost_ij = 0 train_costs = [] valid_accuracy = [] # This is where we call the previously defined Theano functions. print('***** Training model *****') while (epoch < num_epochs): print('epoch: ' + str(epoch)) epoch = epoch + 1 for minibatch_index in range(n_train_batches): # Compute number of iterations performed or total number # of mini-batches executed. iter = (epoch - 1) * n_train_batches + minibatch_index # Perform the training of our convolution neural network. # Obtain the cost of each minibatch specified using the # minibatch_index. cost_ij = train_model(minibatch_index) print('iter: ' + str(iter) + ', cost_ij: ' + str(cost_ij)) train_costs.append(cost_ij) # Compute the prediction error on each validation mini-batch by # calling the previously defined Theano valid_losses = [valid_model(i) for i in range(n_valid_batches)] # Compute the mean prediction error across all the mini-batches. valid_score = np.mean(valid_losses) valid_accuracy.append(valid_score) print('***** Training Complete *****') test_losses = [test_model(i) for i in range(n_test_batches)] # Compute the mean prediction error across all the mini-batches. test_score = np.mean(test_losses) print('Accuracy on the test set: ' + str(test_score)) fig, (ax1, ax2) = plt.subplots(2) ax1.plot(train_costs) ax2.plot(valid_accuracy) plt.show()
def test_convLayer(): ''' Constructs a CNN with one convolutional and one fully connected layer. Then the function trains the network to interpret MNIST digits. Same script with some output and plotting features is found from "test.py". Calls two functions for cost and accuracy from above. Test: digit labeling accuracy > 92% NOTE: Valid set is not present here. ''' dataset = 'mnist.pkl.gz' data_dir, data_file = os.path.split(dataset) rd.seed(23455) # Check if data file present if data_dir == "" and not os.path.isfile(dataset): new_path = os.path.join('', dataset) if os.path.isfile(new_path) or data_file == 'mnist.pkl.gz': dataset = new_path # Download the file from MILA if not present if (not os.path.isfile(dataset)) and data_file == 'mnist.pkl.gz': from six.moves import urllib origin = ( 'http://www.iro.umontreal.ca/~lisa/deep/data/mnist/mnist.pkl.gz' ) print('Downloading data from %s' % origin) urllib.request.urlretrieve(origin, dataset) print('***** Loading data *****') # Open the file with gzip.open(dataset, 'rb') as f: try: train_set, valid_set, test_set = pickle.load(f, encoding='latin1') except: train_set, valid_set, test_set = pickle.load(f) train_x,train_y=train_set test_x,test_y=test_set # Load data into tensors train_size = 6000 test_set_x, test_set_y_float, test_set = load_data.shared_dataset( test_x,test_y, sample_size=train_size//3 ) train_set_x, train_set_y_float, train_set = load_data.shared_dataset( train_x,train_y, sample_size=train_size ) train_set_y=T.cast(train_set_y_float,'int32') test_set_y=T.cast(test_set_y_float,'int32') # set learning rate used for Stochastic Gradient Descent learning_rate = 0.005 # set number of training epochs num_epochs = 4 # set number of kernels for each convolution layer # for e.g. 2 layers - [20, 50]. layer1 = 20, layer2 = 50 num_filters = [9] # set mini-batch size to be used mini_batch_size = 50 # Seeding the random number generator rng = np.random.RandomState(23455) # Computing number of mini-batches n_train_batches = train_set_x.get_value(borrow=True).shape[0] n_test_batches = test_set_x.get_value(borrow=True).shape[0] n_train_batches //= mini_batch_size n_test_batches //= mini_batch_size # mini-batch index mb_index = T.lscalar() # rasterised images x = T.matrix('x') # image labels y = T.ivector('y') # Reshaping matrix of mini_batch_size set of images into a # 4-D tensor of dimensions: mini_batch_size x 1 x 28 x 28 layer0_input = x.reshape((mini_batch_size, 1, 28, 28)) # First convolution and pooling layer # 4D output tensor is of shape: # mini_batch_size x 9 x 12 x 12 [layer0_output, layer0_params] = cnn.convLayer( rng, data_input=layer0_input, image_spec=(mini_batch_size, 1, 28, 28), filter_spec=(num_filters[0], 1, 5, 5), pool_size=(2, 2), activation=T.tanh) # Flatten the output into dimensions: # mini_batch_size x 1296 fc_layer_input = layer0_output.flatten(2) # The fully connected layer operates on a matrix of # dimensions: mini_batch_size x 1296 # It clasifies the values using the softmax function. [y_lin, fc_layer_params] = cnn.fullyConnectedLayer( rng, data_input=fc_layer_input, num_in=num_filters[0]*12*12, num_out=10) # The likelihood of the categories p_y_given_x = T.nnet.softmax(y_lin) # Predictions y_pred = T.argmax(p_y_given_x,axis=1) # Cost that is minimised during stochastic descent. cost = negative_log_lik(y=y, p_y_given_x=p_y_given_x) # Create a Theano function that computes the mistakes on the test set. # This evaluated our model's accuracy. test_model = theano.function( [mb_index], errors(y, y_pred), givens={ x: test_set_x[ mb_index * mini_batch_size: (mb_index + 1) * mini_batch_size ], y: test_set_y[ mb_index * mini_batch_size: (mb_index + 1) * mini_batch_size ]}) # List of parameters to be fit during training params = fc_layer_params + layer0_params updates = cnn.gradient_updates_Adam(cost,params,learning_rate) # Create a Theano function to train our convolutional neural network. train_model = theano.function( [mb_index], cost, updates=updates, givens={ x: train_set_x[ mb_index * mini_batch_size: (mb_index + 1) * mini_batch_size ], y: train_set_y[ mb_index * mini_batch_size: (mb_index + 1) * mini_batch_size ]}) iter = 0 epoch = 0 cost_ij = 0 # This is where we call the previously defined Theano functions. while (epoch < num_epochs): epoch = epoch + 1 for minibatch_index in range(n_train_batches): cost_ij = train_model(minibatch_index) test_losses = [test_model(i) for i in range(n_test_batches)] test_score = np.mean(test_losses) assert test_score < 0.08
if datatype=='line': data_path = '../synthetic_data/line_data.save' elif datatype=='circle': data_path = '../synthetic_data/circle_data.save' elif datatype=='spiral': data_path = '../synthetic_data/spiral_data.save' print 'opening data' f = open(data_path) train_data, valid_data, test_data = pickle.load(f) f.close() N, D = train_data[0].shape Nv = valid_data[0].shape[0] numpy_rng=numpy.random.RandomState() train_set = ld.shared_dataset(train_data); test_set = ld.shared_dataset(test_data); valid_set = ld.shared_dataset(valid_data); save = [] sym_test_eucs = [] sym_test_angs = [] logs=[] print '---Trained : ' + model_type + '---Noise Type: ' + noise_type for tmp in [1]: #for num_epoch in [200]: #for corrupt in [0.1,0.2]: #for epsilon in [0.01,0.06, 0.03, 0.001]: #for epsilon in [0.01,0.05, 0.03]: #for epsilon in [0.005, 0.003]: #for lam in [0.0005, 0.001, 0.005]:
if datatype == 'line': data_path = '../synthetic_data/line_data.save' elif datatype == 'circle': data_path = '../synthetic_data/circle_data.save' elif datatype == 'spiral': data_path = '../synthetic_data/spiral_data.save' print 'opening data' f = open(data_path) train_data, valid_data, test_data = pickle.load(f) f.close() N, D = train_data[0].shape Nv = valid_data[0].shape[0] numpy_rng = numpy.random.RandomState() train_set = ld.shared_dataset(train_data) test_set = ld.shared_dataset(test_data) valid_set = ld.shared_dataset(valid_data) save = [] sym_test_eucs = [] sym_test_angs = [] logs = [] print '---Trained : ' + model_type + '---Noise Type: ' + noise_type for tmp in [1]: #for num_epoch in [200]: #for corrupt in [0.1,0.2]: #for epsilon in [0.01,0.06, 0.03, 0.001]: #for epsilon in [0.01,0.05, 0.03]: #for epsilon in [0.005, 0.003]: #for lam in [0.0005, 0.001, 0.005]:
def train_by_lenet5(tr_start_index, tr_limit, vl_start_index, vl_limit, output_filename="tmp.file", learning_rate=0.13, n_epochs=5000): global train_dataset_route global valid_dataset_route output_file = open(output_filename, 'w') print train_dataset_route, type(train_dataset_route) """ :type learning_rate: float :param learning_rate: learning rate used (factor for the stochastic gradient) :type n_epochs: int :param n_epochs: maximal number of epochs to run the optimizer :type nkerns: list of ints :param nkerns: number of kernels on each layer """ train_set = tdtf.read_data_patch_to_ndarray(train_dataset_route, tr_start_index, tr_limit) datasets = load_data.shared_dataset(train_set) train_set_x, train_set_y = datasets valid_set = tdtf.read_data_patch_to_ndarray(valid_dataset_route, vl_start_index, vl_limit) print valid_set[1] datasets = load_data.shared_dataset(valid_set) valid_set_x, valid_set_y = datasets # compute number of minibatches for training, validation and testing n_train_batches = train_set_x.get_value(borrow=True).shape[0] n_valid_batches = valid_set_x.get_value(borrow=True).shape[0] n_train_batches /= batch_size n_valid_batches /= batch_size # allocate symbolic variables for the data ###################### # BUILD ACTUAL MODEL # ###################### print '... building the model' # the cost we minimize during training is the NLL of the model cost = layer3.negative_log_likelihood(y) # create a function to compute the mistakes that are made by the model validate_model = theano.function( [index], layer3.errors(y), givens={ x: valid_set_x[index * batch_size:(index + 1) * batch_size], y: valid_set_y[index * batch_size:(index + 1) * batch_size] }) # create a list of all model parameters to be fit by gradient descent params = layer3.params + layer2.params + layer1.params + layer0.params # create a list of gradients for all model parameters grads = T.grad(cost, params) # train_model is a function that updates the model parameters by # SGD Since this model has many parameters, it would be tedious to # manually create an update rule for each model parameter. We thus # create the updates list by automatically looping over all # (params[i],grads[i]) pairs. updates = [] for param_i, grad_i in zip(params, grads): updates.append((param_i, param_i - learning_rate * grad_i)) train_model = theano.function( [index], [cost, layer3.errors(y), layer3.params[0][0][0]], updates=updates, givens={ x: train_set_x[index * batch_size:(index + 1) * batch_size], y: train_set_y[index * batch_size:(index + 1) * batch_size] }) ############### # TRAIN MODEL # ############### print '... training' # early-stopping parameters patience = 50000 # look as this many examples regardless patience_increase = 2 # wait this much longer when a new best is # found improvement_threshold = 0.995 # a relative improvement of this much is # considered significant validation_frequency = min(n_train_batches, patience / 2) # go through this many # minibatche before checking the network # on the validation set; in this case we # check every epoch best_params = None best_validation_loss = numpy.inf best_iter = 0 test_score = 0. start_time = time.clock() epoch = 0 done_looping = False min_train_cost = 10000 decreasing_num = 0 last_train_err = 1 last_train_cost = 1 while (epoch < n_epochs) and (not done_looping): epoch = epoch + 1 for minibatch_index in xrange(n_train_batches): iter = (epoch - 1) * n_train_batches + minibatch_index if iter % 100 == 0: print 'training @ iter = ', iter, ' patience = ', patience cost_ij, train_err, par = train_model(minibatch_index) decreasing_rate = (last_train_err - train_err) / (last_train_err) * 100. last_train_err = train_err if last_train_err == 0: last_train_err += 0.0000001 c_d_rate = (last_train_cost - cost_ij) / (last_train_cost) * 100. last_train_cost = cost_ij print >> output_file, ('epoch %i, minibatch %i/%i, train_cost %f , train_error %.2f %%, decreasing rate %f %%, cost_decreasing rate %f %%, W00 ' % \ (epoch, minibatch_index + 1, n_train_batches, cost_ij, train_err* 100. ,decreasing_rate ,c_d_rate)) #print layer1.params[0:1][0][0:3] #print layer2.params[0:1][0][0:3] if cost_ij < min_train_cost: decreasing_num = 0 min_train_cost = cost_ij layer0_state = layer0.__getstate__() layer1_state = layer1.__getstate__() layer2_state = layer2.__getstate__() layer3_state = layer3.__getstate__() trained_model_list = [ layer0_state, layer1_state, layer2_state, layer3_state ] trained_model_array = numpy.asarray(trained_model_list) classifier_file = open(train_model_route, 'w') cPickle.dump([1, 2, 3], classifier_file, protocol=2) numpy.save(classifier_file, trained_model_array) classifier_file.close() else: print "decreasing" decreasing_num += 1 if decreasing_num > 100: done_looping = True break if (iter + 1) % validation_frequency == 0: # compute zero-one loss on validation set validation_losses = [ validate_model(i) for i in xrange(n_valid_batches) ] this_validation_loss = numpy.mean(validation_losses) print >> output_file, ('epoch %i, minibatch %i/%i, validation error %f %%' % \ (epoch, minibatch_index + 1, n_train_batches, \ this_validation_loss * 100.)) # if we got the best validation score until now if this_validation_loss < best_validation_loss: #improve patience if loss improvement is good enough if this_validation_loss < best_validation_loss * \ improvement_threshold: patience = max(patience, iter * patience_increase) # save best validation score and iteration number best_validation_loss = this_validation_loss best_iter = iter if patience <= iter: done_looping = True print patience, iter break end_time = time.clock() print >> sys.stderr, ('The code for file ' + os.path.split(__file__)[1] + ' ran for %.2fm' % ((end_time - start_time) / 60.)) print >> output_file, ('Optimization complete.') print >> output_file, ('Best validation score of %f %% obtained at iteration %i,'\ 'with test performance %f %%' % (best_validation_loss * 100., best_iter + 1, test_score * 100.)) output_file.close()
def sgd_optimization_mnist(tr_start_index=1, tr_limit=5000, vl_start_index=1, vl_limit=5000, learning_rate=0.015, n_epochs=5000 , output_filename="ls.out"): output_file = open(output_filename,'w') # allocate symbolic variables for the data index = T.lscalar() # index to a [mini]batch x = T.matrix('x') # the data is presented as rasterized images y = T.ivector('y') # the labels are presented as 1D vector of # [int] labels in_shape = layer0_input_shape[0] * layer0_input_shape[1] batch_size = tr_limit train_set = tdtf.read_data_patch_to_ndarray(train_dataset_route, tr_start_index, tr_limit) datasets = load_data.shared_dataset(train_set) train_set_x, train_set_y = datasets valid_set = tdtf.read_data_patch_to_ndarray(valid_dataset_route, vl_start_index, vl_limit) print valid_set[1] datasets = load_data.shared_dataset(valid_set) valid_set_x, valid_set_y = datasets # compute number of minibatches for training, validation and testing n_train_batches = train_set_x.get_value(borrow=True).shape[0] / batch_size n_valid_batches = valid_set_x.get_value(borrow=True).shape[0] / batch_size #n_test_batches = test_set_x.get_value(borrow=True).shape[0] / batch_size if not if_load_trained_model : trained_model_pkl = open(train_model_route, 'r') trained_model_state_list = cPickle.load(trained_model_pkl) trained_model_state_array = numpy.load(trained_model_pkl) classifier_state = trained_model_state_array[0] classifier = LogisticRegression(input=x, n_in=in_shape, n_out=layer0_output_shape , W=classifier_state[0], b=classifier_state[1]) else: ###################### # BUILD ACTUAL MODEL # ###################### #print '... building the model' # construct the logistic regression class rng = numpy.random.RandomState(23555) W_bound=1 tmp_W = theano.shared(numpy.asarray( rng.uniform(low=0, high=W_bound, size=(in_shape, layer0_output_shape)), dtype=theano.config.floatX), borrow=True) classifier = LogisticRegression(input=x, n_in=in_shape, n_out=layer0_output_shape) #,W=tmp_W) # the cost we minimize during training is the negative log likelihood of # the model in symbolic format cost = classifier.negative_log_likelihood(y) # compiling a Theano function that computes the mistakes that are made by # the model on a minibatch validate_model = theano.function(inputs=[index], outputs=classifier.errors(y), givens={ x: valid_set_x[index * batch_size:(index + 1) * batch_size], y: valid_set_y[index * batch_size:(index + 1) * batch_size]}) # compute the gradient of cost with respect to theta = (W,b) g_W = T.grad(cost=cost, wrt=classifier.W) g_b = T.grad(cost=cost, wrt=classifier.b) # specify how to update the parameters of the model as a list of # (variable, update expression) pairs. updates = [(classifier.W, classifier.W - learning_rate * g_W), (classifier.b, classifier.b - learning_rate * g_b)] # compiling a Theano function `train_model` that returns the cost, but in # the same time updates the parameter of the model based on the rules # defined in `updates` train_model = theano.function(inputs=[index], \ outputs=[cost, classifier.errors(y)], \ updates=updates, \ givens={ \ x: train_set_x[index * batch_size:(index + 1) * batch_size], \ y: train_set_y[index * batch_size:(index + 1) * batch_size]}) ############### # TRAIN MODEL # ############### #print '... training the model' # early-stopping parameters patience = 50000 # look as this many examples regardless patience_increase = 2 # wait this much longer when a new best is # found improvement_threshold = 0.995 # a relative improvement of this much is # considered significant validation_frequency = min(n_train_batches, patience / 2) # go through this many # minibatche before checking the network # on the validation set; in this case we # check every epoch best_params = None best_validation_loss = numpy.inf test_score = 0. start_time = time.clock() best_train_loss = numpy.inf done_looping = False epoch = 0 last_train_err = 1 last_train_cost = 1 while (epoch < n_epochs) and (not done_looping): epoch = epoch + 1 for minibatch_index in xrange(n_train_batches): minibatch_avg_cost, train_err = train_model(minibatch_index) decreasing_rate = (last_train_err - train_err) / (last_train_err) * 100. last_train_err = train_err c_d_rate = (last_train_cost - minibatch_avg_cost) / (last_train_cost) * 100. last_train_cost = minibatch_avg_cost print >> output_file, ('epoch %i, minibatch %i/%i, train_cost %f , train_error %.2f %%, decreasing rate %f %%, cost_decreasing rate %f %%' % \ (epoch, minibatch_index + 1, n_train_batches, minibatch_avg_cost, train_err* 100. ,decreasing_rate ,c_d_rate)) if best_train_loss > train_err: best_train_loss = train_err # iteration number iter = (epoch - 1) * n_train_batches + minibatch_index if (iter + 1) % validation_frequency == 0: # compute zero-one loss on validation set validation_losses = [validate_model(i) for i in xrange(n_valid_batches)] this_validation_loss = numpy.mean(validation_losses) print >> output_file, ('epoch %i, minibatch %i/%i, validation error %f %%' % \ (epoch, minibatch_index + 1, n_train_batches, this_validation_loss * 100.)) # if we got the best validation score until now if this_validation_loss < best_validation_loss: #improve patience if loss improvement is good enough if this_validation_loss < best_validation_loss * \ improvement_threshold: patience = max(patience, iter * patience_increase) best_validation_loss = this_validation_loss # load trained_model to ''' layer_state = classifier.__getstate__() trained_model_list = [layer_state] trained_model_array = numpy.asarray(trained_model_list) classifier_file = open(train_model_route, 'w') cPickle.dump([1,2,3], classifier_file, protocol=2) numpy.save(classifier_file, trained_model_array) classifier_file.close() ''' ''' test_losses = [test_model(i) for i in xrange(n_test_batches)] test_score = numpy.mean(test_losses) test_res = [test_results(i) for i in xrange(n_test_batches)] print((' epoch %i, minibatch %i/%i, test error of best' ' model %f %%') % (epoch, minibatch_index + 1, n_train_batches, test_score * 100.)) ''' if patience <= iter: done_looping = True break end_time = time.clock() print >> output_file, (('Optimization complete with best validation score of %f %%,' 'with test performance %f %%' 'with best train_performance %f %%') % (best_validation_loss * 100., test_score * 100., best_train_loss * 100.)) print >> output_file, 'The code run for %d epochs, with %f epochs/sec' % ( epoch, 1. * epoch / (end_time - start_time)) print >> sys.stderr, ('The code for file ' + os.path.split(__file__)[1] + ' ran for %.1fs' % ((end_time - start_time))) output_file.close()