def predict(): """ An example of how to load a trained model and use it to predict labels. """ print("Michael Holt\nAssignment 4") print("-------------------------------------------------------------------------\n\ Part 2 \n\ -------------------------------------------------------------------------\n" ) print("... loading the saved mlp model") # load the saved model classifier = pickle.load(open('best_mlp_model.pkl', 'rb')) # compile a predictor function predict_model = theano.function( inputs=[classifier.input], outputs=classifier.logRegressionLayer.y_pred) # We can test it on some examples from test test dataset='mnist.pkl.gz' datasets = load_data(dataset) test_set_x, test_set_y = datasets[2] test_set_x = test_set_x.get_value() predicted_values = predict_model(test_set_x[:10]) print("Predicted values for the first 10 examples in test set:") print(predicted_values)
def build_confusion_matrix(): """ Builds a confusion matrix from a loaded trained model and a given test set. """ confusionMatrix = np.zeros((10,10)) #load saved model classifier = pickle.load(open('best_logistic_model.pkl')) #predictor function predict_model = theano.function( inputs=[classifier.input], outputs=classifier.y_pred) dataset = 'mnist.pkl.gz' datasets = lsgd.load_data(dataset) test_set_x, test_set_y = datasets[2] test_set_x = test_set_x.eval() test_set_y = test_set_y.eval() predictions = predict_model(test_set_x) print("Confusion Matrix for digit classification: \n") for i in range(len(test_set_x)): a = test_set_y[i] b = predictions[i] confusionMatrix[a][b] += 1 np.set_printoptions(suppress=True) print(confusionMatrix)
def minifyDataset(input='data/mnist.pkl.gz', output='data/minified.pkl.gz'): print('read ' + input) datasets = load_data(input) train_set_x, train_set_y = datasets[0] valid_set_x, valid_set_y = datasets[1] test_set_x, test_set_y = datasets[2] train_len = 500 valid_len = 100 test_len = 100 minified_train_x = theanoTensor2NumpyArray(train_set_x[0:train_len:1]) minified_valid_x = theanoTensor2NumpyArray(valid_set_x[0:valid_len:1]) minified_test_x = theanoTensor2NumpyArray(test_set_x[0:test_len:1]) minified_train_y = theanoTensor2NumpyArray(train_set_y[0:train_len:1]) minified_valid_y = theanoTensor2NumpyArray(train_set_y[0:valid_len:1]) minified_test_y = theanoTensor2NumpyArray(train_set_y[0:test_len:1]) data = (minified_train_x, minified_train_y),(minified_valid_x, minified_valid_y), (minified_test_x, minified_test_y) f = gzip.open(output, 'wb') cPickle.dump(data, f, -1) f.close() print('output to ' + output)
def mlp_test(test_set, Model, n_input=2030, n_output=150, n_hidden=50): datasets = load_data(test_set, test_set, test_set) test_set_x, test_set_y = datasets[0] index = T.lscalar() # index to a [mini]batch x = T.vector('x') # the data is presented as rasterized images # y = T.ivector('y') # the labels are presented as 1D vector of rng = numpy.random.RandomState(1234) # construct the MLP class classifier = MLP( rng=rng, input=x, n_in=n_input, n_hidden=n_hidden, n_out=n_output, Model=Model ) # classifier.hiddenLayer.__setstate__((Model['hidden_W'], Model['hidden_b'])) # classifier.logRegressionLayer.__setstate__((Model['logRegression_W'], Model['logRegression_b'])) test_model = theano.function( inputs=[index], outputs=classifier.predictAll, givens={ x: test_set_x[index], } ) out = test_model(0).tolist() return out
def test_mlp(learning_rate=0.01, L1_reg=0.00, L2_reg=0.0001, n_epochs=1000, dataset='or.pkl.gz', batch_size = 3, n_hidden=500): # inputs [[1,0,0], [1,0,1], [1,1,0], [1,1,1], [0,1,1], [0,1,0], [0,0,1], [0,0,0]] # outputs [[1], [1], [1], [1], [1], [1], [1], [0]] datasets = load_data(dataset) train_set_x, train_set_y = datasets[0] print train_set_x print '...building the model' index = T.lscalar() x = T.ivector('x') y = T.ivector('y') rng = numpy.random.RandomState(1234) classifier = MLP(rng=rng, input=x, n_in=3, n_hidden=3, n_out=1) cost = ( classifier.negative_log_likelihood(y) + L1_reg * classifier.L1 + L2_reg * classifier.L2_sqr ) #index = 0 test_model = theano.function( inputs=[index], outputs=classifier.errors(y), givens={ x: train_set_x[index * batch_size: (index + 1) * batch_size], y: train_set_y[index * batch_size: (index + 1) * batch_size] } )
def train_autos_l2(corruption=0): #load data dataset='mnist.pkl.gz' datasets = load_data(dataset) train_set_x, train_set_y = datasets[0] #train dA's dAs = [] n_hiddens = [10,25,50,100] x = T.matrix('x') rng = numpy.random.RandomState(123) theano_rng = RandomStreams(rng.randint(2 ** 30)) for n_hidden in n_hiddens: path_da = '../data/dA_l2/dA_l2_nhid'+str(n_hidden)+'_corr'+str(corruption)+'.p' if not os.path.isfile(path_da): print 'defining for n_hidden = ',n_hidden da = dA( numpy_rng=rng, theano_rng=theano_rng, input=x, n_visible=28 * 28, n_hidden=n_hidden ) print 'training for n_hidden = ',n_hidden train_dA(da,train_set_x = train_set_x, train_set_y = train_set_y, corruption=corruption) print 'storing dA to file' da.dump(open(path_da,'w'))
def test_mlp(dataset = 'mnist.pkl.gz', batch_size = 128): datasets = load_data(dataset) train_set_x, train_set_y = datasets[0] valid_set_x, valid_set_y = datasets[1] test_set_x, test_set_y = datasets[2] # compute number of minibatches for testing n_test = test_set_x.get_value(borrow=True).shape[0] n_test_batches = n_test / batch_size ###################### # LOAD ACTUAL MODEL # ###################### print '... loading the model' # allocate symbolic variables for the data index = T.lscalar() # index to a [mini]batch x = T.matrix('x') # the data is presented as rasterized images y = T.ivector('y') # the labels are presented as 1D vector of # [int] labels # construct the MLP class classifier = MLP( input=x, n_in=28 * 28, n_hidden1=394, n_hidden2=196, n_out=10 ) f = open('/home/dmitry/Projects/DNN-develop/theano/results/mlp',"rb") classifier.hiddenLayer1.W.set_value(cPickle.load(f), borrow=True) classifier.hiddenLayer1.b.set_value(cPickle.load(f), borrow=True) classifier.hiddenLayer2.W.set_value(cPickle.load(f), borrow=True) classifier.hiddenLayer2.b.set_value(cPickle.load(f), borrow=True) classifier.logRegressionLayer.W.set_value(cPickle.load(f), borrow=True) classifier.logRegressionLayer.b.set_value(cPickle.load(f), borrow=True) test_model = theano.function( inputs=[index], outputs=classifier.errors(y), givens={ x: test_set_x[index * batch_size:(index + 1) * batch_size], y: test_set_y[index * batch_size:(index + 1) * batch_size] } ) print('batch_size ' + str(batch_size)) print('n_test ' + str(n_test)) print('n_test_batches ' + str(n_test_batches)) wtime = time.clock() test_losses = [test_model(i) for i in xrange(n_test_batches)] test_score = numpy.mean(test_losses) wtime = (time.clock() - wtime) / n_test * 1000.; print('for 1 sample needed ' + str(wtime) + ' msec') print('test score ' + str(test_score * 100.))
def test(dataset = 'mnist.pkl.gz', output_folder = 'plots'): datasets = load_data(dataset) train_set_x, train_set_y = datasets[0] if not os.path.isdir(output_folder): os.makedirs(output_folder) os.chdir(output_folder) rng = numpy.random.RandomState(123) theano_rng = RandomStreams(rng.randint(2 ** 30)) input = T.matrix('input') output = get_corrupted_input_gaussian(theano_rng = theano_rng, input = input) corrupt = theano.function([input], output) mnist_noise = corrupt(train_set_x.get_value(borrow = True)) mnist_noise = theano.shared(value=mnist_noise, name='mnist_noise', borrow = True) # print train_set_x.get_value(borrow=True)[0] # print mnist_noise.get_value(borrow=True)[0] image_clean = Image.fromarray(tile_raster_images(X = train_set_x.get_value(borrow = True), img_shape=(28, 28), tile_shape=(1, 6), tile_spacing=(1,1))) image_clean.save('clean_6.png') image_noise = Image.fromarray(tile_raster_images(X = mnist_noise.get_value(borrow = True), img_shape=(28, 28), tile_shape=(1, 6), tile_spacing=(1,1))) image_noise.save('noise_6.png') print 'Done!'
def test_autos_l2(corruption=0): #load data dataset='mnist.pkl.gz' datasets = load_data(dataset) train_set_x, train_set_y = datasets[0] valid_set_x,valid_set_y = datasets[1] #test against validation set n_hiddens = [10,25,50,100] x = T.matrix('x') rng = numpy.random.RandomState(123) theano_rng = RandomStreams(rng.randint(2 ** 30)) for n_hidden in n_hiddens: #load model da = dA(numpy_rng = rng, theano_rng = theano_rng, input=x, n_visible=28*28, n_hidden=n_hidden) da.load(open('../data/dA_l2/dA_l2_nhid'+str(n_hidden)+'_corr'+str(corruption)+'.p','r')) reconstructed = da.get_reconstructed_input(input=valid_set_x) image = Image.fromarray(tile_raster_images(X=reconstructed.eval(),img_shape=(28, 28), tile_shape=(10, 10),tile_spacing=(1, 1))) image.save('../data/dA_l2/pics/dAs_reconstructed_nhid'+str(da.n_hidden)+'_corr'+str(corruption)+'.png') image = Image.fromarray(tile_raster_images(X=valid_set_x.get_value(),img_shape=(28, 28), tile_shape=(10, 10),tile_spacing=(1, 1))) image.save('../data/dA_l2/pics/original.png')
def setData(self, datasetPath): self._datasets = load_data( datasetPath ) self._setTrainingData( self._datasets[0] ) self._setValidateData( self._datasets[1] ) self._setTestData( self._datasets[2] ) print( "... Triple DataSet in position. \n" )
def test_stack_machine(): # Load learning: save_path = "Saving/Stack_AE_theano" save_dir = "10-06_3L1000" load_dir = os.path.join(save_path,save_dir) stack_AE = SAE.load(load_dir) # Load the dataset: print("Loading dataset...") datasets = load_data('mnist.pkl.gz') train_set_x, train_set_y = datasets[0] test_set_x, test_set_y = datasets[2] reconstructed_layer_value, error = stack_AE.reconstruct(test_set_x) print("The error of reconstruction is: {0}".format(error.eval()), "%") # Classification: # Perform the feed-forward pass for train & testing sets: train_deconstructed_layer_value = stack_AE.forward_encoding (train_set_x, 0, stack_AE.architecture.shape[0]) train_reconstructed_layer_value = stack_AE.forward_decoding( train_deconstructed_layer_value, 0, stack_AE.architecture.shape[0]) test_deconstructed_layer_value = stack_AE.forward_encoding (test_set_x,0, stack_AE.architecture.shape[0]) test_reconstructed_layer_value = stack_AE.forward_decoding( test_deconstructed_layer_value, 0, stack_AE.architecture.shape[0]) # Classifiers: classifier = 'AdaBoostClassifier' print("Classifier used: ", classifier) print ("Learning the logistic regression without stack...") logReg_withoutStack = stack_AE.supervized_classification( train_set_x.eval(), train_set_y.eval(), classification_method= classifier) print ("Learning the logistic regression with stack...") logReg_afterStack = stack_AE.supervized_classification( train_reconstructed_layer_value.eval(), train_set_y.eval(), classification_method= classifier) # Performances: print("Without Stack_AE:") print ("Accuracy training set:", logReg_withoutStack.score(train_set_x.eval(), train_set_y.eval())) print ("Accuracy test set:", logReg_withoutStack.score(test_set_x.eval(), test_set_y.eval())) print("With Stack_AE:") print ("Accuracy training set:", logReg_afterStack.score( train_reconstructed_layer_value.eval(), train_set_y.eval())) print ("Accuracy test set:", logReg_afterStack.score( test_reconstructed_layer_value.eval(), test_set_y.eval())) return stack_AE
def predict(dataset): """ example of loading and running a model """ # test on some examples from the test set datasets = load_data(dataset) test_set_x, test_set_y = datasets[2] test_set_x = test_set_x.get_value() pars = cPickle.load(open(BEST_PICKLEJAR)) # load the saved weights and bias vectors for i, p in enumerate(pars): print("Checking loaded parameter %i type and shape..." % i) print(type(p)) print(p.eval().shape) # symbolic vars for the data x = T.matrix('x') # rasterized image data rng = numpy.random.RandomState(1234) # use our loaded params to init the model classifier = MLP( rng=rng, input=x, n_in=NINPUT, n_hidden1=NHIDDEN1, n_hidden2=NHIDDEN2, n_out=NOUT, W_hidden1=pars[0], b_hidden1=pars[1], W_hidden2=pars[2], b_hidden2=pars[3], W_logreg=pars[4], b_logreg=pars[5] ) # compile a predictor fn # use classifier.logRegressionLayer.p_y_given_x to look at the full # softmax array prior to the argmax call. predict_model = theano.function( inputs=[classifier.input], outputs=classifier.logRegressionLayer.y_pred # outputs=classifier.logRegressionLayer.p_y_given_x ) predict_model_show_probs = theano.function( inputs=[classifier.input], outputs=classifier.logRegressionLayer.p_y_given_x ) show_size = 50 predicted_values_probs = predict_model_show_probs(test_set_x[:show_size]) predicted_values = predict_model(test_set_x[:show_size]) print("Predicted values (probs) for the first %d:" % (show_size)) print(predicted_values_probs) print("Predicted values for the first %d:" % (show_size)) print(predicted_values) print("Actual values:") print(T.cast(test_set_y, 'int32').eval()[:show_size])
def main(): dataset = '../subimages/cache.pkl.gz' datasets = load_data(dataset) epochs = 200 batch_size=500 rvals = [0.1, 0.05, 0.01, 0.005] kerns1vals = [10, 20] kerns2vals = [20, 50] hypers = list(itertools.product(rvals, kerns1vals, kerns2vals)) names = ['r', 'k1', 'k2'] trainerWrapper = lambda dim_in, r, k1, k2: Trainer(r, k1, k2, batch_size) xdata, ydata = datasets[0] xverify, yverify = datasets[1] xtest, ytest = datasets[2] xdata_ref = xdata.get_value(borrow=True) ydata_ref = ydata.eval() xverify_ref = xverify.get_value(borrow=True) yverify_ref = yverify.eval() xtest_ref = xtest.get_value(borrow=True) ytest_ref = ytest.eval() print 'Learning a convolutional neural network (CNN)' print ' dataset: ', dataset print ' training size: ', xdata_ref.shape print ' verify size: ', xverify_ref.shape print ' test size: ', xtest_ref.shape k = 3 cross_epochs = 5 print '... cross-validating' print ' k: ', k print ' cross-val epochs: ', cross_epochs print ' batch size: ', batch_size print ' params: ', names for i in xrange(len(names)): print ' {0:3s} values: '.format(names[i]), sorted(set([x[i] for x in hypers])) #best = crossvalidate(trainerWrapper, xdata_ref, ydata_ref, k, cross_epochs, batch_size, hypers, names) print 'Skipping cross-validation. Using best value from previous run' best = [0.05, 20, 50] print ' best hyper-params: ', names, ' = ', best classifier = trainerWrapper(0, *best) print '... training' print ' batch size: ', batch_size print ' epochs: ', epochs print ' hyper-params: ', names, ' = ', best classifier.train(xdata_ref, ydata_ref, epochs, batch_size, valid_set_x=xverify_ref, valid_set_y=yverify_ref, quiet=False) print '... testing' print ' training accuracy: ', 1 - classifier.errors(xdata_ref, ydata_ref) print ' validation accuracy: ', 1 - classifier.errors(xverify_ref, yverify_ref) print ' testing accuracy: ', 1 - classifier.errors(xtest_ref, ytest_ref) print '... pickling' print ' classifier: ', classifier filename = pickleSafely(classifier, 'classifier', '.pkl.gz') print ' saved to: ', filename
def main(dataset = 'mnist.pkl.gz',no_model = None, no_hid = None): # initiate hyper-parameters if no_model == None: no_model = 10 if no_hid == None: no_hid = 5 # data loading print 'loading data ...' datasets = load_data(dataset) x_tr, y_tr = datasets[0] x_va, y_va = datasets[1] x_te, y_te = datasets[2] x = T.matrix('x') y = T.ivector('y') no_train = x_tr.get_value(borrow=True).shape[0] dim = x_tr.get_value(borrow=True).shape[1] K = y_tr.eval().max() # 9 for mnsit # global variables weight = np.tile(1.0/no_train,(no_train,)) # weights for each instances alpha = np.zeros((no_model,)) # model nets = [] rng = np.random.RandomState(1234) for i in xrange(no_model): net = MLP( rng=rng, input=x, n_in=dim, n_hidden=no_hid, n_out=K+1 ) print('adding network %i into nets list')%(i) nets.append(net) # train model for i in xrange(no_model): # train model based on current weights # make prediction, compute weighted error/alpha # update weights return def train_mlp(): if __name__=='__main__': main()
def test_dA(learning_rate=0.1, training_epochs=15, dataset='./data/mnist.pkl.gz', batch_size=20, output_folder='dA_plots'): datasets = load_data(dataset) train_set_x, train_set_y = datasets[0] n_train_batches = train_set_x.get_value(borrow=True).shape[0] / batch_size index = T.lscalar() x = T.matrix('x') if not os.path.isdir(output_folder): os.makedirs(output_folder) os.chdir(output_folder) rng = numpy.random.RandomState(123) theano_rng = RandomStreams(rng.randint(2 ** 30)) da = dA(numpy_rng=rng, theano_rng=theano_rng, input=x, n_visible=28*28, n_hidden=500) cost, updates = da.get_cost_updates(corruption_level=0., learning_rate=learning_rate) train_da = theano.function([index], cost, updates=updates, givens={x: train_set_x[index * batch_size: (index + 1) * batch_size]}) start_time = time.clock(); # training for epoch in xrange(training_epochs): c = [] for batch_index in xrange(n_train_batches): c.append(train_da(batch_index)) print 'Training epoch %d, cost ' % epoch, numpy.mean(c) end_time = time.clock() training_time = (end_time - start_time) print >> sys.stderr, ('The no corruption code for file ' + os.path.split(__file__)[1] + ' ran for %.2fm' % ((training_time) / 60.)) image = PIL.Image.fromarray( tile_raster_images(X=da.W.get_value(borrow=True).T, img_shape=(28, 28), tile_shape=(10, 10), tile_spacing=(1, 1))) image.save('filters_corruption_0.png') # training with corruption_level is 30% ...... os.chdir('../')
def test_DimentionalReduction(learning_rate=0.1, training_epochs=15, dataset="mnist.pkl.gz", batch_size=20): datasets = load_data(dataset) train_set_x, train_set_y = datasets[0] n_train_batches = train_set_x.get_value(borrow=True).shape[0] / batch_size index = T.lscalar() # index to a [mini]batch x = T.matrix("x") # the data is presented as rasterized images rng = numpy.random.RandomState(123) theano_rng = RandomStreams(rng.randint(2 ** 30)) da = dA(numpy_rng=rng, theano_rng=theano_rng, input=x, n_visible=28 * 28, n_hidden=2) cost, updates = da.get_cost_updates(corruption_level=0.0, learning_rate=learning_rate) train_da = theano.function( [index], cost, updates=updates, givens={x: train_set_x[index * batch_size : (index + 1) * batch_size]} ) for epoch in xrange(training_epochs): # go through trainng set c = [] for batch_index in xrange(n_train_batches): c.append(train_da(batch_index)) print "Training epoch %d, cost " % epoch, numpy.mean(c) x = T.matrix("x") hidden_values_function = da.get_hidden_values2(x) result_function = theano.function(inputs=[x], outputs=hidden_values_function) colors = ["b", "g", "r", "c", "m", "y", "k", "w"] n = 0 for x, y in zip(result_function(train_set_x.get_value()), train_set_y.eval()): if y < len(colors): plt.scatter(x[0], x[1], c=colors[y]) n += 1 if n > 2000: break plt.show() n = 0 pca = PCA(n_components=2) for x, y in zip(pca.fit_transform(train_set_x.get_value()), train_set_y.eval()): if y < len(colors): plt.scatter(x[0], x[1], c=colors[y]) n += 1 if n > 2000: break plt.show()
def test_stacked_autoencoder( finetune_lr=0.1, pretraining_epochs=15, pretrain_lr=0.001, training_epochs=100, dataset="mnist.pkl.gz", batch_size=1, hidden_layers_sizes=[1000, 1000, 1000], corruption_levels=[0.1, 0.2, 0.3], pretrain_flag=True, testerr_file="test_error.txt", ): datasets = load_data("../data/mnist.pkl.gz") train_set_x = datasets[0][0] n_train_batches = train_set_x.get_value(borrow=True).shape[0] / batch_size numpy_rng = np.random.RandomState(89677) print "building the model ..." sda = StackedDenoisingAutoencoder(numpy_rng, 28 * 28, hidden_layers_sizes, 10, corruption_levels) # Pre-training if pretrain_flag: print "getting the pre-training functions ..." pretraining_functions = sda.pretraining_functions(train_set_x=train_set_x, batch_size=batch_size) print "pre-training the model ..." for i in xrange(sda.n_layers): for epoch in xrange(pretraining_epochs): c = [] for batch_index in xrange(n_train_batches): c.append( pretraining_functions[i](index=batch_index, corruption=corruption_levels[i], lr=pretrain_lr) ) print "Pre-training layer %i, epoch %d, cost %f" % (i, epoch, np.mean(c)) # Fine-tuning print "getting the fine-tuning functions ..." train_model, _, test_model = sda.build_finetune_functions( datasets=datasets, batch_size=batch_size, learning_rate=finetune_lr ) print "fine-tuning the model ..." epoch = 0 fp = open(testerr_file, "w") while epoch < training_epochs: epoch = epoch + 1 for minibatch_index in xrange(n_train_batches): train_model(minibatch_index) test_losses = test_model() test_score = np.mean(test_losses) print "Fine-tuning, epoch %d, test error %f" % (epoch, test_score * 100) fp.write("%d\t%f\n" % (epoch, test_score * 100)) fp.close()
def my_test(): datasets = load_data('../data/mnist.pkl.gz') train_set_x, train_set_y = datasets[0] image_data = numpy.zeros((29 + 1, 29 * 10 - 1), dtype='uint8') image_data[:,:,:] = tile_raster_images(X = train_set_x.get_value(borrow=True)[0:10], img_shape = (28,28), tile_shape = (1, 10), tile_spacing = (1, 1)) image = PIL.Image.fromarray(image_data) image.save('samples.png')
def load2d(dataset='ISH.pkl.gz', toShuffleInput = False , withZeroMeaning = False): print 'loading data...' datasets = load_data(dataset, toShuffleInput, withZeroMeaning) train_set_x, train_set_y = datasets[0] # valid_set_x, valid_set_y = datasets[1] # test_set_x, test_set_y = datasets[2] train_set_x = train_set_x.reshape(-1, 1, input_width, input_height) print(train_set_x.shape[0], 'train samples') return train_set_x, train_set_y
def showDataset(dataset='data/mnist.pkl.gz'): """ :type dataset: string :param dataset: path the the pickled dataset """ datasets = load_data(dataset) train_set_x, train_set_y = datasets[0] valid_set_x, valid_set_y = datasets[1] test_set_x, test_set_y = datasets[2] #type(train_set_x) => <class 'theano.tensor.sharedvar.TensorSharedVariable'> np_train_x = train_set_x.get_value(borrow=True) np_valid_x = valid_set_x.get_value(borrow=True) np_test_x = test_set_x.get_value(borrow=True) #type(train_set_y) => <class 'theano.tensor.basic.TensorVariable'> np_train_y = theanoTensor2NumpyArray(train_set_y) np_valid_y = theanoTensor2NumpyArray(valid_set_y) np_test_y = theanoTensor2NumpyArray(test_set_y) np_train_x0 = np_train_x[0] np_valid_x0 = np_valid_x[0] np_test_x0 = np_test_x[0] #varNames = "np_train_x[0] np_train_y np_valid_x[0] np_valid_y np_test_x[0] np_test_y".split(" ") #for varName in varNames: # var = eval(varName) # print(varName) # print("len:" + str(len(var))) # pprint(var) # #for varName in varNames: #print again for convenience # var = eval(varName) # print(varName + " len:" + str(len(var))) vars = [np_train_x0, np_train_y, np_valid_x0, np_valid_y, np_test_x0, np_test_y] var_names = locals() for var in vars: name = getVarNames(var, var_names) print(str(name) + ' start') pprint(var) print("len:%d max:%f min:%f"%(len(var), numpy.max(var), numpy.min(var))) print(str(name) + ' end\n') pairs = [(np_train_x, np_train_y), (np_valid_x, np_valid_y), (np_test_x, np_test_y)] for pair in pairs: if not (len(pair[0]) == len(pair[1])): name_x = getVarNames(pair[0], locals()) name_y = getVarNames(pair[1], locals()) print("WARNING: the lengths of %s & %s are different" % (name_x, name_y))
def test_DimentionalReduction(dataset='mnist.pkl.gz', pretraining_epochs=50, pretrain_lr=0.01, batch_size=5): datasets = load_data(dataset) train_set_x, train_set_y = datasets[0] valid_set_x, valid_set_y = datasets[1] test_set_x, test_set_y = datasets[2] numpy_rng = numpy.random.RandomState(89677) n_train_batches = train_set_x.get_value(borrow=True).shape[0] n_train_batches /= batch_size print '... building the model' # construct the stacked denoising autoencoder class sda = SdA( numpy_rng=numpy_rng, n_ins=28 * 28, hidden_layers_sizes=[300, 50, 2], n_outs=2 ) print '... getting the pretraining functions' pretraining_fns = sda.pretraining_functions(train_set_x=train_set_x, batch_size=batch_size) corruption_levels = [0., 0., 0.] for i in xrange(sda.n_layers): # go through pretraining epochs for epoch in xrange(pretraining_epochs): # go through the training set c = [] for batch_index in xrange(n_train_batches): c.append(pretraining_fns[i](index=batch_index, corruption=corruption_levels[i], lr=pretrain_lr)) print 'Pre-training layer %i, epoch %d, cost ' % (i, epoch), print numpy.mean(c) target = train_set_x.get_value() for dA_layer in sda.dA_layers: hidden_values_function = dA_layer.get_hidden_values2(sda.x) result_function = theano.function(inputs=[sda.x],outputs=hidden_values_function) target = result_function(target) print target colors = ['b', 'g', 'r', 'c', 'm', 'y', 'k','w'] n = 0 for x,y in zip(target,train_set_y.eval()): if y < len(colors): plt.scatter(x[0], x[1],c=colors[y]) n += 1 if n > 2000: break plt.show()
def test_columns(exclude_mode, models, valid_test='V'): dataset='mnist.pkl.gz' print '... Starting to test %i columns' % len(models) # create data hash that will be filled with data from different normalizations all_datasets = {} # instantiate multiple columns columns = [] for model in models: # load model params f = open('./models/'+model) params = cPickle.load(f) nkerns, batch_size, normalized_width, distortion = cPickle.load(f) if all_datasets.get(normalized_width): datasets = all_datasets[normalized_width] else: datasets = load_data(dataset, normalized_width, 29) all_datasets[normalized_width] = datasets # no distortion during testing columns.append(DNNColumn(datasets, nkerns, batch_size, normalized_width, 0, params)) print '... Forward propagating %i columns' % len(models) # call test on all of them recieving 10 outputs if valid_test=='V': model_outputs = [column.valid_outputs() for column in columns] position_ds = 1 else: model_outputs = [column.test_outputs() for column in columns] position_ds = 2 # average 10 outputs avg_output = numpy.mean(model_outputs, axis=0) # argmax over them predictions = numpy.argmax(avg_output, axis=1) # compare predictions with true labels pred = T.ivector('pred') all_true_labels_length = theano.function([], all_datasets.values()[0][position_ds][1].shape) remainder = all_true_labels_length() - len(predictions) if exclude_mode and remainder: print '... Excluding FIRST %i points' % remainder true_labels = all_datasets.values()[0][position_ds][1][remainder:] elif remainder: # TODO: remove this, doesn't seem to make sense since the predictions would be misaligned print '... Excluding LAST %i points' % remainder true_labels = all_datasets.values()[0][position_ds][1][:len(predictions)] else: true_labels = all_datasets.values()[0][position_ds][1][:] error = theano.function([pred], T.mean(T.neq(pred, true_labels))) acc = error(predictions.astype(dtype=numpy.int32)) print '....' print 'Error across %i columns: %f %%' % (len(models), 100*acc) return [predictions, acc]
def load2d(test=False, cols=None): print 'loading data...' datasets = load_data('ISH.pkl.gz',withZeroMeaning=False) train_set_x, train_set_y = datasets[0] valid_set_x, valid_set_y = datasets[1] test_set_x, test_set_y = datasets[2] batch_size = 5 input_width = 300 input_height = 140 train_set_x = train_set_x.reshape(-1, 1, input_width, input_height) return train_set_x, train_set_y
def run(): datasets = load_data('mnist.pkl.gz') X, y = datasets[0] clf = MLPClassifier(28 * 28, 10, n_epochs = 10) clf.train(X.get_value(),y.eval()) #print(X_train.get_value().shape[0]) #X_train = shared(X_train.get_value()) #print(X_train.get_value().shape[0]) X_test, y_test = datasets[2] clf.fit(X_test,y_test)
def fromPickledData(zipName): datasets = load_data(zipName) train_set_x, train_set_y = datasets[0] valid_set_x, valid_set_y = datasets[1] test_set_x, test_set_y = datasets[2] Data = train_set_x + valid_set_x + test_set_x # Data = [] # for i in xrange(train_set_x.get_value(borrow=True).shape[0]): # Data.append(train_set_x[i]) # for i in xrange(valid_set_x.get_value(borrow=True).shape[0]): # Data.append(valid_set_x[i]) # for i in xrange(test_set_x.get_value(borrow=True).shape[0]): # Data.append(test_set_x[i]) return Data
def test_dA(learning_rate=0.1,training_epochs=15, dataset='mnist.pkl.gz', batch_size=20, output_folder='dA_plots'): """ :type learning_rate: float :param learning_rate: learning rate used for training the DeNosing AutoEncoder :type training_epochs: int :param training_epochs: number of epochs used for training :type dataset: string :param dataset: path to the picked dataset """ datasets=load_data(dataset) train_set_x, train_set_y = datasets[0] # compute number of minibatches for training, validation and testing n_train_batches = train_set_x.get_value(borrow=True).shape[0] // batch_size if not os.path.isdir(output_folder): os.makedirs(output_folder) #os.chdir(output_folder) #os.chdir('../') #################################### # BUILDING THE MODEL NO CORRUPTION # #################################### train_da,da=build_mdl(n_train_batches,batch_size, 0., learning_rate,train_set_x) train_mdl(train_da,da,training_epochs, n_train_batches, 0.0, output_folder) ##################################### # BUILDING THE MODEL CORRUPTION 30% # ##################################### train_da,da=build_mdl(n_train_batches,batch_size, 0.3, learning_rate,train_set_x) train_mdl(train_da,da,training_epochs, n_train_batches, 0.3, output_folder)
def train_mcdnn_column(normalized_width=0, n_epochs=800, trail=0): print '... train %i column of normalization %i' % (trail, normalized_width) print '... num_epochs %i' % (n_epochs) # load data using logistic_sgd width size normalization param # if normalized_width == 0 then the data_set comes without changes on width digit # this method load_data reshape all images from 28x28 to 29x29 with padding method # is important to use dataset on dnncolumn class datasets = load_data(dataset='mnist.pkl.gz', digit_normalized_width=normalized_width, digit_out_image_size=29) # initialize dnn column with dataset above column = DNNColumn(ds=datasets, normalized_width=normalized_width) column.train_column(n_epochs=n_epochs, init_learning_rate=0.1) #save the model with params from train filename = 'mcdnn_nm%i_trail%i_Layers_time_%i' % (normalized_width, trail, int(time.time())) column.save(filename)
def load2d(num_labels,outputFile=None, input_width=300, input_height=140,end_index=16351,MULTI_POSITIVES=20,dropout_percent=0.1, dataset='ISH.pkl.gz', toShuffleInput = False , withZeroMeaning = False): print 'loading data...' datasets = load_data(dataset, toShuffleInput=toShuffleInput, withZeroMeaning=withZeroMeaning,end_index=end_index,MULTI_POSITIVES=MULTI_POSITIVES,dropout_percent=dropout_percent, labelset=num_labels) train_set_x, train_set_y = datasets[0] # valid_set_x, valid_set_y = datasets[1] test_set_x, test_set_y = datasets[2] train_set_x = train_set_x.reshape(-1, 1, input_width, input_height) # valid_set_x = valid_set_x.reshape(-1, 1, input_width, input_height) test_set_x = test_set_x.reshape(-1, 1, input_width, input_height) print(train_set_x.shape[0], 'train samples') if outputFile is not None: outputFile.write("Number of training examples: "+str(train_set_x.shape[0]) + "\n\n") return train_set_x, train_set_y, test_set_x, test_set_y
def evaluate_lenet5(learning_rate=0.1, n_epochs=2, dataset='mnist.pkl.gz', nkerns=[20, 50], batch_size=500): """ Demonstrates lenet on MNIST dataset :type learning_rate: float :param learning_rate: learning rate used (factor for the stochastic gradient) :type n_epochs: int :param n_epochs: maximal number of epochs to run the optimizer :type dataset: string :param dataset: path to the dataset used for training /testing (MNIST here) :type nkerns: list of ints :param nkerns: number of kernels on each layer """ datasets = load_data(dataset) (n_train_batches, n_valid_batches, n_test_batches, train_model, validate_model, test_model) = \ build_models(learning_rate, datasets, nkerns, batch_size) ############### # TRAIN MODEL # ############### print '... training' start_time = time.clock() (best_validation_loss, best_iter, test_score) = \ train_models(n_epochs, n_train_batches, n_valid_batches, n_test_batches, train_model, validate_model, test_model) end_time = time.clock() print('Optimization complete.') print('Best validation score of %f %% obtained at iteration %i, ' 'with test performance %f %%' % (best_validation_loss * 100., best_iter + 1, test_score * 100.)) print >> sys.stderr, ('The code for file ' + os.path.split(__file__)[1] + ' ran for %.2fm' % ((end_time - start_time) / 60.))
def minifyDataset(input, output, minify_rate=0.01): usage = 'usage: minify_dataset.py <input_file> <output_file>' if not input: print(usage) return False if not os.path.exists(input): print('file ' + input + ' not found') return False if not output: print(usage) return False if os.path.exists(output): print('output file ' + output + ' already exists') return False print('read ' + input) datasets = load_data(input) train_set_x, train_set_y = datasets[0] valid_set_x, valid_set_y = datasets[1] test_set_x, test_set_y = datasets[2] orig_train_len = len(train_set_x.get_value(borrow=True)) orig_valid_len = len(valid_set_x.get_value(borrow=True)) orig_test_len = len(test_set_x.get_value(borrow=True)) mini_train_len = int(math.floor(orig_train_len * minify_rate)) mini_valid_len = int(math.floor(orig_valid_len * minify_rate)) mini_test_len = int(math.floor(orig_test_len * minify_rate)) minified_train_x = theanoTensor2NumpyArray(train_set_x[0:mini_train_len]) minified_valid_x = theanoTensor2NumpyArray(valid_set_x[0:mini_valid_len]) minified_test_x = theanoTensor2NumpyArray(test_set_x[0:mini_test_len]) minified_train_y = theanoTensor2NumpyArray(train_set_y[0:mini_train_len]) minified_valid_y = theanoTensor2NumpyArray(valid_set_y[0:mini_valid_len]) minified_test_y = theanoTensor2NumpyArray(test_set_y[0:mini_test_len]) data = ((minified_train_x, minified_train_y),(minified_valid_x, minified_valid_y), (minified_test_x, minified_test_y)) f = gzip.open(output, 'wb') cPickle.dump(data, f, -1) f.close() print('output to ' + output) return True
def test_rbm(learning_rate=0.1, training_epochs=15, dataset='../data/mnist.pkl.gz', batch_size=20, n_chains=20, n_samples=10, output_folder='rbm_plots', n_hidden=500): """ Demonstrate how to train and afterwards sample from it using Theano. This is demonstrated on MNIST. :param learning_rate: learning rate used for training the RBM :param training_epochs: number of epochs used for training :param dataset: path the the pickled dataset :param batch_size: size of a batch used to train the RBM :param n_chains: number of parallel Gibbs chains to be used for sampling :param n_samples: number of samples to plot for each chain """ datasets = load_data(dataset) train_set_x, train_set_y = datasets[0] test_set_x, test_set_y = datasets[2] # compute number of minibatches for training, validation and testing n_train_batches = train_set_x.get_value(borrow=True).shape[0] / batch_size # allocate symbolic variables for the data index = T.lscalar() # index to a [mini]batch x = T.matrix('x') # the data is presented as rasterized images rng = numpy.random.RandomState(123) theano_rng = RandomStreams(rng.randint(2**30)) # initialize storage for the persistent chain (state = hidden layer of chain) persistent_chain = theano.shared( numpy.zeros((batch_size, n_hidden), dtype=theano.config.floatX)) # construct the RBM class rbm = RBM( input = x, n_visible=28*28, \ n_hidden = n_hidden, numpy_rng = rng, theano_rng = theano_rng) # get the cost and the gradient corresponding to one step of CD-15 cost, updates = rbm.get_cost_updates(lr=learning_rate, persistent=persistent_chain, k=15) ################################# # Training the RBM # ################################# if not os.path.isdir(output_folder): os.makedirs(output_folder) os.chdir(output_folder) # it is ok for a theano function to have no output # the purpose of train_rbm is solely to update the RBM parameters train_rbm = theano.function( [index], cost, updates=updates, givens={x: train_set_x[index * batch_size:(index + 1) * batch_size]}, name='train_rbm') plotting_time = 0. start_time = time.clock() # go through training epochs for epoch in xrange(training_epochs): # go through the training set mean_cost = [] for batch_index in xrange(n_train_batches): mean_cost += [train_rbm(batch_index)] print 'Training epoch %d, cost is ' % epoch, numpy.mean(mean_cost) # Plot filters after each training epoch plotting_start = time.clock() # Construct image from the weight matrix image = PIL.Image.fromarray( tile_raster_images(X=rbm.W.get_value(borrow=True).T, img_shape=(28, 28), tile_shape=(10, 10), tile_spacing=(1, 1))) image.save('filters_at_epoch_%i.png' % epoch) plotting_stop = time.clock() plotting_time += (plotting_stop - plotting_start) end_time = time.clock() pretraining_time = (end_time - start_time) - plotting_time print('Training took %f minutes' % (pretraining_time / 60.)) ################################# # Sampling from the RBM # ################################# # find out the number of test samples number_of_test_samples = test_set_x.get_value(borrow=True).shape[0] # pick random test examples, with which to initialize the persistent chain test_idx = rng.randint(number_of_test_samples - n_chains) persistent_vis_chain = theano.shared( numpy.asarray(test_set_x.get_value(borrow=True)[test_idx:test_idx + n_chains], dtype=theano.config.floatX)) plot_every = 1000 # define one step of Gibbs sampling (mf = mean-field) # define a function that does `plot_every` steps before returning the sample for # plotting [presig_hids, hid_mfs, hid_samples, presig_vis, vis_mfs, vis_samples], updates = \ theano.scan(rbm.gibbs_vhv, outputs_info = [None, None,None,None,None,persistent_vis_chain], n_steps = plot_every) # add to updates the shared variable that takes care of our persistent # chain :. updates.update({persistent_vis_chain: vis_samples[-1]}) # construct the function that implements our persistent chain. # we generate the "mean field" activations for plotting and the actual # samples for reinitializing the state of our persistent chain sample_fn = theano.function([], [vis_mfs[-1], vis_samples[-1]], updates=updates, name='sample_fn') # create a space to store the image for plotting ( we need to leave # room for the tile_spacing as well) image_data = numpy.zeros((29 * n_samples + 1, 29 * n_chains - 1), dtype='uint8') for idx in xrange(n_samples): # generate `plot_every` intermediate samples that we discard, because successive samples in the chain are too correlated vis_mf, vis_sample = sample_fn() print ' ... plotting sample ', idx image_data[29 * idx:29 * idx + 28, :] = tile_raster_images( X=vis_mf, img_shape=(28, 28), tile_shape=(1, n_chains), tile_spacing=(1, 1)) # construct image image = PIL.Image.fromarray(image_data) image.save('samples.png') os.chdir('../')
def cg_optimization_mnist(n_epochs=50, mnist_pkl_gz='mnist.pkl.gz'): """Demonstrate conjugate gradient optimization of a log-linear model This is demonstrated on MNIST. :type n_epochs: int :param n_epochs: number of epochs to run the optimizer :type mnist_pkl_gz: string :param mnist_pkl_gz: the path of the mnist training file from http://www.iro.umontreal.ca/~lisa/deep/data/mnist/mnist.pkl.gz """ ############# # LOAD DATA # ############# datasets = load_data(mnist_pkl_gz) train_set_x, train_set_y = datasets[0] valid_set_x, valid_set_y = datasets[1] test_set_x, test_set_y = datasets[2] batch_size = 600 # size of the minibatch n_train_batches = train_set_x.get_value(borrow=True).shape[0] / batch_size n_valid_batches = valid_set_x.get_value(borrow=True).shape[0] / batch_size n_test_batches = test_set_x.get_value(borrow=True).shape[0] / batch_size n_in = 28 * 28 # number of input units n_out = 10 # number of output units ###################### # BUILD ACTUAL MODEL # ###################### print '... building the model' # allocate symbolic variables for the data minibatch_offset = T.lscalar() # offset to the start of a [mini]batch x = T.matrix() # the data is presented as rasterized images y = T.ivector() # the labels are presented as 1D vector of # [int] labels # construct the logistic regression class classifier = LogisticRegression(input=x, n_in=28 * 28, n_out=10) # the cost we minimize during training is the negative log likelihood of # the model in symbolic format cost = classifier.negative_log_likelihood(y).mean() # compile a theano function that computes the mistakes that are made by # the model on a minibatch test_model = theano.function( [minibatch_offset], classifier.errors(y), givens={ x: test_set_x[minibatch_offset:minibatch_offset + batch_size], y: test_set_y[minibatch_offset:minibatch_offset + batch_size] }, name="test") validate_model = theano.function( [minibatch_offset], classifier.errors(y), givens={ x: valid_set_x[minibatch_offset:minibatch_offset + batch_size], y: valid_set_y[minibatch_offset:minibatch_offset + batch_size] }, name="validate") # compile a theano function that returns the cost of a minibatch batch_cost = theano.function( [minibatch_offset], cost, givens={ x: train_set_x[minibatch_offset:minibatch_offset + batch_size], y: train_set_y[minibatch_offset:minibatch_offset + batch_size] }, name="batch_cost") # compile a theano function that returns the gradient of the minibatch # with respect to theta batch_grad = theano.function( [minibatch_offset], T.grad(cost, classifier.theta), givens={ x: train_set_x[minibatch_offset:minibatch_offset + batch_size], y: train_set_y[minibatch_offset:minibatch_offset + batch_size] }, name="batch_grad") # creates a function that computes the average cost on the training set def train_fn(theta_value): classifier.theta.set_value(theta_value, borrow=True) train_losses = [ batch_cost(i * batch_size) for i in xrange(n_train_batches) ] return numpy.mean(train_losses) # creates a function that computes the average gradient of cost with # respect to theta def train_fn_grad(theta_value): classifier.theta.set_value(theta_value, borrow=True) grad = batch_grad(0) for i in xrange(1, n_train_batches): grad += batch_grad(i * batch_size) return grad / n_train_batches validation_scores = [numpy.inf, 0] # creates the validation function def callback(theta_value): classifier.theta.set_value(theta_value, borrow=True) #compute the validation loss validation_losses = [ validate_model(i * batch_size) for i in xrange(n_valid_batches) ] this_validation_loss = numpy.mean(validation_losses) print('validation error %f %%' % (this_validation_loss * 100., )) # check if it is better then best validation score got until now if this_validation_loss < validation_scores[0]: # if so, replace the old one, and compute the score on the # testing dataset validation_scores[0] = this_validation_loss test_losses = [ test_model(i * batch_size) for i in xrange(n_test_batches) ] validation_scores[1] = numpy.mean(test_losses) ############### # TRAIN MODEL # ############### # using scipy conjugate gradient optimizer import scipy.optimize print("Optimizing using scipy.optimize.fmin_cg...") start_time = timeit.default_timer() best_w_b = scipy.optimize.fmin_cg(f=train_fn, x0=numpy.zeros((n_in + 1) * n_out, dtype=x.dtype), fprime=train_fn_grad, callback=callback, disp=0, maxiter=n_epochs) end_time = timeit.default_timer() print(('Optimization complete with best validation score of %f %%, with ' 'test performance %f %%') % (validation_scores[0] * 100., validation_scores[1] * 100.)) print >> sys.stderr, ('The code for file ' + os.path.split(__file__)[1] + ' ran for %.1fs' % ((end_time - start_time)))
def test_ssDA(finetune_lr=0.1, pretraining_epochs=15, pretrain_lr=0.001, training_epochs=1000, dataset='mnist.pkl.gz', batch_size=1): """ Demonstrates how to train and test a stochastic denoising autoencoder. This is demonstrated on MNIST. :type learning_rate: float :param learning_rate: learning rate used in the finetune stage (factor for the stochastic gradient) :type pretraining_epochs: int :param pretraining_epochs: number of epoch to do pretraining :type pretrain_lr: float :param pretrain_lr: learning rate to be used during pre-training :type n_iter: int :param n_iter: maximal number of iterations ot run the optimizer :type dataset: string :param dataset: path the the pickled dataset """ xtropy_fraction = 1 dir_pretrained = '../data/train_snapshots/stacked_sda/' path_finetuned_pre = '../data/train_snapshots/stacked_sda/stackedSDA_pretrainedxtropy2.p'#'/Users/vmisra/data/deepCompress_data/stackedSDA_xtropy1params.p'#../data/train_snapshots/stacked_sda/stackedSDA_pretrainedxtropy.p' path_finetuned_post = '../data/train_snapshots/stacked_sda/stackedSDA_finetunedxtropy2.p'#'/Users/vmisra/data/deepCompress_data/stackedSDA_prextropy1_postxtropy0_B.p'#../data/train_snapshots/stacked_sda/stackedSDA_prextropy1_postxtropy0.p' datasets = load_data(dataset) train_set_x, train_set_y = datasets[0] valid_set_x, valid_set_y = datasets[1] test_set_x, test_set_y = datasets[2] # compute number of minibatches for training, validation and testing n_train_batches = train_set_x.get_value(borrow=True).shape[0] n_train_batches /= batch_size # numpy random generator # start-snippet-3 numpy_rng = numpy.random.RandomState(89677) print '... building the model' # construct the stacked denoising autoencoder class ssda = ssDA( numpy_rng=numpy_rng, n_ins=28 * 28, hidden_layers_sizes=[1000, 1000, 1000, 15], f_load_SDA = open('../data/Stacked_DA_params.p','r'), xtropy_fraction=xtropy_fraction ) ######################### # PRETRAINING THE MODEL # ######################### print '... getting the pretraining functions' pretraining_fns = ssda.pretraining_functions(train_set_x=train_set_x, batch_size=batch_size) print '... pre-training the model' start_time = time.clock() ## Pre-train layer-wise corruption_levels = [.1, .2, .3, .3]#[0] #[.1, .2, .3] for i in xrange(ssda.n_layers): layerpath = dir_pretrained+ 'layer'+str(i)+'_snapshot_stacked_sda2.p' if os.path.isfile(layerpath): ssda.load(open(layerpath,'r')) continue # go through pretraining epochs for epoch in xrange(pretraining_epochs): # go through the training set c = [] for batch_index in xrange(n_train_batches): c.append(pretraining_fns[i](index=batch_index, corruption=corruption_levels[i], lr=pretrain_lr)) print 'Pre-training layer %i, epoch %d, cost ' % (i, epoch), print numpy.mean(c) #COPY OVER PRETRAINED PARAMS FROM DA'S TO HIDDEN SIGMOIDS ssda.sigmoid_layers[i].W.set_value(ssda.dA_layers[i].W.eval()) ssda.sigmoid_layers[i].b.set_value(ssda.dA_layers[i].b.eval()) ssda.out_sigmoid_layers[-i-1].W.set_value(ssda.dA_layers[i].W.T.eval()) ssda.out_sigmoid_layers[-i-1].b.set_value(ssda.dA_layers[i].b_prime.get_value()) #dump snapshot ssda.dump(open(layerpath,'w')) end_time = time.clock() print >> sys.stderr, ('The pretraining code for file ' + os.path.split(__file__)[1] + ' ran for %.2fm' % ((end_time - start_time) / 60.)) ######################## # FINETUNING THE MODEL ######################## #pre-load partially finetuned version, if it exists if os.path.isfile(path_finetuned_pre): ssda.load(open(path_finetuned_pre,'r')) # get the training, validation and testing function for the model print '... getting the finetuning functions' train_fn, validate_model, test_model, valid_xtropy_logloss = ssda.build_finetune_functions( datasets=datasets, batch_size=batch_size, learning_rate=finetune_lr ) print '... finetuning the model' # early-stopping parameters patience = 10 * n_train_batches # look as this many examples regardless patience_increase = 2. # wait this much longer when a new best is # found improvement_threshold = 0.995 # a relative improvement of this much is # considered significant validation_frequency = min(n_train_batches, patience / 2) # go through this many # minibatche before checking the network # on the validation set; in this case we # check every epoch best_validation_loss = numpy.inf test_score = 0. start_time = time.clock() done_looping = False epoch = 0 while (epoch < training_epochs) and (not done_looping): epoch = epoch + 1 for minibatch_index in xrange(n_train_batches): minibatch_avg_cost = train_fn(minibatch_index) iter = (epoch - 1) * n_train_batches + minibatch_index if (iter + 1) % validation_frequency == 0: validation_losses = validate_model() this_validation_loss = numpy.mean(validation_losses) # xtropy_logloss_loss = valid_xtropy_logloss() # xtropy_loss = [x[0] for x in xtropy_logloss_loss] print('epoch %i, minibatch %i/%i, validation error %f %%' % (epoch, minibatch_index + 1, n_train_batches, this_validation_loss * 100., )) ssda.dump(open(path_finetuned_post,'w')) # if we got the best validation score until now if this_validation_loss < best_validation_loss: #improve patience if loss improvement is good enough if ( this_validation_loss < best_validation_loss * improvement_threshold ): patience = max(patience, iter * patience_increase) # save best validation score and iteration number best_validation_loss = this_validation_loss best_iter = iter # test it on the test set test_losses = test_model() test_score = numpy.mean(test_losses) print((' epoch %i, minibatch %i/%i, test error of ' 'best model %f %%') % (epoch, minibatch_index + 1, n_train_batches, test_score * 100.)) if patience <= iter: done_looping = True break end_time = time.clock() print( ( 'Optimization complete with best validation score of %f %%, ' 'on iteration %i, ' 'with test performance %f %%' ) % (best_validation_loss * 100., best_iter + 1, test_score * 100.) ) print >> sys.stderr, ('The training code for file ' + os.path.split(__file__)[1] + ' ran for %.2fm' % ((end_time - start_time) / 60.)) ssda.dump(open(path_finetuned_post,'w')) return ssda
def test_ssDA_nopretraining(finetune_lr=0.1, pretraining_epochs=15, pretrain_lr=0.001, training_epochs=1000, dataset='mnist.pkl.gz', batch_size=1, data_dir = '../data/'): xtropy_fraction = 1 path_finetuned_pre = os.path.join(data_dir,'train_snapshots/stacked_sda/stackedSDA_nopretrained_ReLU.p') path_finetuned_post = os.path.join(data_dir,'train_snapshots/stacked_sda/stackedSDA_nopretrained_ReLU_post.p') path_stacked_da = os.path.join(data_dir,'Stacked_DA_params.p') datasets = load_data(os.path.join(data_dir,dataset)) train_set_x, train_set_y = datasets[0] valid_set_x, valid_set_y = datasets[1] test_set_x, test_set_y = datasets[2] # compute number of minibatches for training, validation and testing n_train_batches = train_set_x.get_value(borrow=True).shape[0] n_train_batches /= batch_size # numpy random generator # start-snippet-3 numpy_rng = numpy.random.RandomState(89677) print '... building the model' # construct the stacked denoising autoencoder class ssda = ssDA( numpy_rng=numpy_rng, n_ins=28 * 28, hidden_layers_sizes=[1000, 1000, 1000, 15], f_load_SDA = open(path_stacked_da,'r'), xtropy_fraction=xtropy_fraction ) #finetune training #pre-load partially finetuned version, if it exists if os.path.isfile(path_finetuned_pre): ssda.load(open(path_finetuned_pre,'r')) # get the training, validation and testing function for the model print '... getting the finetuning functions' train_fn, validate_model, test_model, valid_xtropy_logloss = ssda.build_finetune_functions( datasets=datasets, batch_size=batch_size, learning_rate=finetune_lr ) print '... finetuning the model' # early-stopping parameters patience = 10 * n_train_batches # look as this many examples regardless patience_increase = 2. # wait this much longer when a new best is # found improvement_threshold = 0.995 # a relative improvement of this much is # considered significant validation_frequency = min(n_train_batches, patience / 2) # go through this many # minibatche before checking the network # on the validation set; in this case we # check every epoch best_validation_loss = numpy.inf test_score = 0. start_time = time.clock() done_looping = False epoch = 0 while (epoch < training_epochs) and (not done_looping): epoch = epoch + 1 for minibatch_index in xrange(n_train_batches): minibatch_avg_cost = train_fn(minibatch_index) iter = (epoch - 1) * n_train_batches + minibatch_index if (iter + 1) % validation_frequency == 0: validation_losses = validate_model() this_validation_loss = numpy.mean(validation_losses) # xtropy_logloss_loss = valid_xtropy_logloss() # xtropy_loss = [x[0] for x in xtropy_logloss_loss] print('epoch %i, minibatch %i/%i, validation error %f %%' % (epoch, minibatch_index + 1, n_train_batches, this_validation_loss * 100., )) ssda.dump(open(path_finetuned_post,'w')) # if we got the best validation score until now if this_validation_loss < best_validation_loss: #improve patience if loss improvement is good enough if ( this_validation_loss < best_validation_loss * improvement_threshold ): patience = max(patience, iter * patience_increase) # save best validation score and iteration number best_validation_loss = this_validation_loss best_iter = iter # test it on the test set test_losses = test_model() test_score = numpy.mean(test_losses) print((' epoch %i, minibatch %i/%i, test error of ' 'best model %f %%') % (epoch, minibatch_index + 1, n_train_batches, test_score * 100.)) if patience <= iter: done_looping = True break end_time = time.clock() print( ( 'Optimization complete with best validation score of %f %%, ' 'on iteration %i, ' 'with test performance %f %%' ) % (best_validation_loss * 100., best_iter + 1, test_score * 100.) ) print >> sys.stderr, ('The training code for file ' + os.path.split(__file__)[1] + ' ran for %.2fm' % ((end_time - start_time) / 60.)) ssda.dump(open(path_finetuned_post,'w')) return ssda
def test_rbm(dataset,learning_rate=0.1, training_epochs=5, batch_size=4,n_chains=4, n_hidden=7): """ Demonstrate how to train and afterwards sample from it using Theano. This is demonstrated on MNIST. :param learning_rate: learning rate used for training the RBM :param training_epochs: number of epochs used for training :param dataset: numpy array :param batch_size: size of a batch used to train the RBM :param n_chains: number of parallel Gibbs chains to be used for sampling :param n_samples: number of samples to plot for each chain """ datasets = load_data(dataset) #change thissssssssssssssssss train_set_x, train_set_y = datasets[0] test_set_x, test_set_y = datasets[2] # compute number of minibatches for training, validation and testing n_train_batches = train_set_x.shape[0] // batch_size # allocate symbolic variables for the data index = T.lscalar() # index to a [mini]batch x = T.matrix('x') # the data is presented as rasterized images shared_x = theano.shared(train_set_x) rng = numpy.random.RandomState(123) theano_rng = RandomStreams(rng.randint(2 ** 30)) # initialize storage for the persistent chain (state = hidden # layer of chain) persistent_chain = theano.shared(numpy.zeros((batch_size, n_hidden), dtype=theano.config.floatX), borrow=True) # construct the RBM class rbm = RBM(input=x, n_visible=n_hidden, n_hidden=n_hidden, numpy_rng=rng, theano_rng=theano_rng) # get the cost and the gradient corresponding to one step of CD-15 cost, updates = rbm.get_cost_updates(lr=learning_rate, persistent=persistent_chain, k=15) ################################# # Training the RBM # ################################# # start-snippet-5 # it is ok for a theano function to have no output # the purpose of train_rbm is solely to update the RBM parameters train_rbm = theano.function( [index], cost, updates=updates, givens={ x: shared_x[index * batch_size: (index + 1) * batch_size] }, name='train_rbm' ) start_time = timeit.default_timer() #change thissssssssssssssssss # go through training epochs for epoch in range(training_epochs): # go through the training set mean_cost = [] for batch_index in range(n_train_batches): mean_cost += [train_rbm(batch_index)] print('Training epoch %d, cost is ' % epoch, numpy.mean(mean_cost)) end_time = timeit.default_timer() pretraining_time = (end_time - start_time) print ('Training took %f minutes' % (pretraining_time / 60.)) # end-snippet-5 start-snippet-6 ################################# # Sampling from the RBM # ################################# #change thissssssssssssssssss # find out the number of test samples number_of_test_samples = test_set_x.shape[0] print(number_of_test_samples) # pick random test examples, with which to initialize the persistent chain test_idx = rng.randint(number_of_test_samples - n_chains) persistent_vis_chain = theano.shared( numpy.asarray( test_set_x[test_idx:test_idx + n_chains], dtype=theano.config.floatX ) ) # end-snippet-6 start-snippet-7 plot_every = 1000 # define one step of Gibbs sampling (mf = mean-field) define a # function that does `plot_every` steps before returning the # sample for plotting ( [ presig_hids, hid_mfs, hid_samples, presig_vis, vis_mfs, vis_samples ], updates ) = theano.scan( rbm.gibbs_vhv, outputs_info=[None, None, None, None, None, persistent_vis_chain], n_steps=plot_every, name="gibbs_vhv" ) # add to updates the shared variable that takes care of our persistent # chain :. #change thissssssssssssssssss updates.update({persistent_vis_chain: vis_samples[-1]}) # construct the function that implements our persistent chain. # we generate the "mean field" activations for plotting and the actual # samples for reinitializing the state of our persistent chain sample_fn = theano.function( [], [ vis_mfs[-1], vis_samples[-1] ], updates=updates, name='sample_fn' ) W =rbm.W.get_value(borrow=True).T H =rbm.hbias.get_value(borrow=True).T V = rbm.vbias.get_value(borrow=True).T print(W) print(H) print(V) print("\n\n\nEnhanced Feature Matrix: ") temp = numpy.dot(dataset, numpy.transpose(W)) print(temp) dataframe = pd.DataFrame(data=temp.astype(float)) dataframe.to_csv('enhancedFMatrix.csv', sep=' ', header=False, float_format='%.4f', index=False) return temp
import cPickle n_ins, hidden_layers_sizes, n_outs, corruption_levels, params = cPickle.load(open(filename, 'rb')) nsda = SdA(n_ins = n_ins, hidden_layers_sizes = hidden_layers_sizes, n_outs = n_outs, corruption_levels = corruption_levels) for ele, data in zip(nsda.params, params): ele.set_value(data) print nsda return nsda if __name__ == '__main__': train_ = 0 if train_: datasets = load_data('../../../Data/mnist/mnist.pkl.gz', 1000) sda = SdA(n_ins=28 * 28, hidden_layers_sizes=[10, 10, 10], n_outs=10) sda.fit(datasets) raw_dump(sda, 'testraw.dat') raw_load('testraw.dat') test_ = 1 if test_: sda = raw_load('testraw.dat') sda. #here
def test_DBN(finetune_lr=0.1,pretraining_epochs=100, pretrain_lr=0.01,k=1,training_epoch=1000, dataset='../data/mnist.pkl.gz',batch_size=10): """ 定义训练和测试深度置信网络的函数 :param finetune_lr: float 微调阶段的学习率 :param pretraining_epochs: int 进行预训练的迭代次数 :param pretrain_lr: float 预训练阶段的学习率 :param training_epoch: int 进行训练的迭代次数 :param dataset: str 数据集的路径 :param batch_size: int minibatch的大小 :return: """ ######################### # 模型初始化过程 # ######################### datasets=load_data(dataset) train_set_x,train_set_y=datasets[0] valid_set_x,valid_set_y=datasets[1] test_set_x,test_set_y=datasets[2] #计算minibatch的数量 n_train_batches=train_set_x.get_value(borrow=True).shape[0]/batch_size #numpy生成的随机数种子 numpy_rng=numpy.random.RandomState(123) print '...building the model' #实例化DBN,有三个隐层 dbn=DBN(numpy_rng,n_ins=28*28,hidden_layers_sizes=[1000,1000,1000],n_outs=10) ######################### # 模型预训练过程 # ######################### print "...getting the pretraining functions" pretraining_fns=dbn.pretraining_functions(train_set_x=train_set_x,batch_size=batch_size,k=k) print "...pretraining the model" start_time=time.clock() #逐层预训练 for i in xrange(dbn.n_layers): #遍历训练次数 for epoch in xrange(pretraining_epochs): #遍历每个minibatch c=[] #定义储存RBM中cost的列表 for batch_index in xrange(n_train_batches): c.append(pretraining_fns[i](index=batch_index,lr=pretrain_lr)) print "pretraining layer %i, epoch %i,cost " %(i,epoch), print numpy.mean(c) end_time=time.clock() print >>sys.stderr,("The pretraining code for file "+ os.path.split(__file__)[1]+ " ran for %0.2fm")%((end_time-start_time)/60.) ######################### # 模型微调过程 # ######################### #构造微调过程的训练函数、验证函数和测试函数 print "...getting finetuning functions" train_fn,valid_model,test_model=dbn.build_finetune_function(datasets=datasets, batch_size=batch_size,learning_rate=finetune_lr) print "...finetuning the model" #提前结束的参数设置 patience=4*n_train_batches patience_increase=2. improvement_threshold=0.995 #每次优化效果阈值 #在验证集合检查minibatch, #该程序中每个epoch都要检查 validation_frequency=min(n_train_batches,patience/2) #微调过程初始参数设置 best_params=None best_validation_loss=numpy.inf test_score=0. start_time=time.clock() done_looping=False epoch=0 #设置终止条件:大于设定的迭代次数或者达到don_looping while(epoch<training_epoch)and(not done_looping): epoch+=1 for minibath_index in xrange(n_train_batches): minibath_avg_lost=train_fn(minibath_index) iter=(epoch-1)*n_train_batches+minibath_index #当前minibath的总索引 #判断是否达到validation_frequency if (iter+1)%validation_frequency==0: validation_losses=valid_model() this_validation_loss=numpy.mean(validation_losses) print('epoch %i, minibatch %i/%i, validation error %f %%') %\ (epoch,minibath_index+1,n_train_batches,this_validation_loss*100) #如果当前代价值优于历史代价值 if this_validation_loss<best_validation_loss: if this_validation_loss<best_validation_loss*improvement_threshold: patience=max(patience,iter*patience_increase) #保存最优验证值和minibatch索引 best_validation_loss=this_validation_loss best_iter=iter #在测试集进行测试 test_losses=test_model() test_score=numpy.mean(test_losses) print "epoch %i, minibath %i/%i, test error of best model %f %%" %\ (epoch,minibath_index+1,n_train_batches,test_score*100) if patience<=iter: done_looping=True break end_time=time.clock() print "Optimizaiton complete with best validation score of %f %%,"\ "with best performance %f %%"%(best_validation_loss*100.,test_score*100.) print >> sys.stderr, ("The fine tuning code for file "+ os.path.split(__file__)[1]+ " ran for %.2fm")%((end_time-start_time)/60.)
def test_dA(learning_rate=0.1, training_epochs=15, dataset='mnist.pkl.gz', batch_size=20, output_folder='dA_plots'): """ This demo is tested on MNIST :type learning_rate: float :param learning_rate: learning rate used for training the DeNosing AutoEncoder :type training_epochs: int :param training_epochs: number of epochs used for training :type dataset: string :param dataset: path to the picked dataset """ datasets = load_data(dataset) train_set_x, train_set_y = datasets[0] # compute number of minibatches for training, validation and testing n_train_batches = train_set_x.get_value(borrow=True).shape[0] / batch_size # start-snippet-2 # allocate symbolic variables for the data index = T.lscalar() # index to a [mini]batch x = T.matrix('x') # the data is presented as rasterized images # end-snippet-2 if not os.path.isdir(output_folder): os.makedirs(output_folder) os.chdir(output_folder) #################################### # BUILDING THE MODEL NO CORRUPTION # #################################### rng = numpy.random.RandomState(123) theano_rng = RandomStreams(rng.randint(2**30)) da = dA(numpy_rng=rng, theano_rng=theano_rng, input=x, n_visible=28 * 28, n_hidden=500) cost, updates = da.get_cost_updates(corruption_level=0., learning_rate=learning_rate) train_da = theano.function( [index], cost, updates=updates, givens={x: train_set_x[index * batch_size:(index + 1) * batch_size]}) start_time = time.clock() ############ # TRAINING # ############ # go through training epochs for epoch in xrange(training_epochs): # go through trainng set c = [] for batch_index in xrange(n_train_batches): c.append(train_da(batch_index)) print 'Training epoch %d, cost ' % epoch, numpy.mean(c) end_time = time.clock() training_time = (end_time - start_time) print >> sys.stderr, ('The no corruption code for file ' + os.path.split(__file__)[1] + ' ran for %.2fm' % ((training_time) / 60.)) image = Image.fromarray( tile_raster_images(X=da.W.get_value(borrow=True).T, img_shape=(28, 28), tile_shape=(10, 10), tile_spacing=(1, 1))) image.save('filters_corruption_0.png') # start-snippet-3 ##################################### # BUILDING THE MODEL CORRUPTION 30% # ##################################### rng = numpy.random.RandomState(123) theano_rng = RandomStreams(rng.randint(2**30)) da = dA(numpy_rng=rng, theano_rng=theano_rng, input=x, n_visible=28 * 28, n_hidden=500) cost, updates = da.get_cost_updates(corruption_level=0.3, learning_rate=learning_rate) train_da = theano.function( [index], cost, updates=updates, givens={x: train_set_x[index * batch_size:(index + 1) * batch_size]}) start_time = time.clock() ############ # TRAINING # ############ # go through training epochs for epoch in xrange(training_epochs): # go through trainng set c = [] for batch_index in xrange(n_train_batches): c.append(train_da(batch_index)) print 'Training epoch %d, cost ' % epoch, numpy.mean(c) end_time = time.clock() training_time = (end_time - start_time) print >> sys.stderr, ('The 30% corruption code for file ' + os.path.split(__file__)[1] + ' ran for %.2fm' % (training_time / 60.)) # end-snippet-3 # start-snippet-4 image = Image.fromarray( tile_raster_images(X=da.W.get_value(borrow=True).T, img_shape=(28, 28), tile_shape=(10, 10), tile_spacing=(1, 1))) image.save('filters_corruption_30.png') # end-snippet-4 os.chdir('../')
def test_dA(learning_rate=0.1, training_epochs=15, dataset='mnist.pkl.gz', batch_size=20, output_folder='dA_plots'): datasets = load_data(dataset) train_set_x, train_set_y = datasets[0] n_train_batches = train_set_x.get_value(borrow=True).shape[0] / batch_size index = T.lscalar() x = T.matrix('x') if not os.path.isdir(output_folder): os.makedirs(output_folder) os.chdir(output_folder) rng = numpy.random.RandomState(123) theano_rng = RandomStreams(rng.randint(2**30)) da = dA(numpy_rng=rng, theano_rng=theano_rng, input=x, n_visible=28 * 28, n_hidden=500) cost, updates = da.get_cost_updates(corruption_level=0., learning_rate=learning_rate) train_da = theano.function( [index], cost, updates=updates, givens={x: train_set_x[index * batch_size:(index + 1) * batch_size]}) start_time = timeit.default_timer() for epoch in range(training_epochs): c = [] for batch_index in range(int(n_train_batches)): c.append(train_da(batch_index)) print('Training epoch %d, cost ' % epoch, numpy.mean(c)) end_time = timeit.default_timer() training_time = (end_time - start_time) print(('The no corruption code for file ' + os.path.split(__file__)[1] + ' ran for %.2fm' % ((training_time) / 60.)), file=sys.stderr) image = Image.fromarray( tile_raster_images(X=da.W.get_value(borrow=True).T, img_shape=(28, 28), tile_shape=(10, 10), tile_spacing=(1, 1))) image.save('filters_corruption_0.png') rng = numpy.random.RandomState(123) theano_rng = RandomStreams(rng.randint(2**30)) da = dA(numpy_rng=rng, theano_rng=theano_rng, input=x, n_visible=28 * 28, n_hidden=500) cost, updates = da.get_cost_updates(corruption_level=0.3, learning_rate=learning_rate) train_da = theano.function( [index], cost, updates=updates, givens={x: train_set_x[index * batch_size:(index + 1) * batch_size]}) start_time = timeit.default_timer() for epoch in range(training_epochs): c = [] for batch_index in range(int(n_train_batches)): c.append(train_da(batch_index)) print('Training epoch %d, cost ' % epoch, numpy.mean(c)) end_time = timeit.default_timer() training_time = (end_time - start_time) print(('The 30% corruption code for file ' + os.path.split(__file__)[1] + ' ran for %.2fm' % (training_time / 60.)), file=sys.stderr) image = Image.fromarray( tile_raster_images(X=da.W.get_value(borrow=True).T, img_shape=(28, 28), tile_shape=(10, 10), tile_spacing=(1, 1))) image.save('filters_corruption_30.png') # end-snippet-4 os.chdir('../')
def test_dA(learning_rate=0.1, training_epochs=15, dataset='mnist.pkl.gz', batch_size=20, output_folder='dA_plots'): """ This demo is tested on MNIST :type learning_rate: float :param learning_rate: learning rate used for training the DeNosing AutoEncoder :type training_epochs: int :param training_epochs: number of epochs used for training :type dataset: string :param dataset: path to the picked dataset """ datasets = load_data(dataset) train_set_x, train_set_y = datasets[0] # compute number of minibatches for training, validation and testing n_train_batches = train_set_x.get_value(borrow=True).shape[0] // batch_size # start-snippet-2 # allocate symbolic variables for the data index = T.lscalar() # index to a [mini]batch x = T.matrix('x') # the data is presented as rasterized images # end-snippet-2 if not os.path.isdir(output_folder): os.makedirs(output_folder) os.chdir(output_folder) #################################### # BUILDING THE MODEL NO CORRUPTION # #################################### rng = numpy.random.RandomState(123) theano_rng = RandomStreams(rng.randint(2**30)) da = dA(numpy_rng=rng, theano_rng=theano_rng, input=x, n_visible=28 * 28, n_hidden=500) cost, updates = da.get_cost_updates(corruption_level=0., learning_rate=learning_rate) """The 'givens' below allows us to pick out one slice of the input matrix, and set the value of x in the graph. This means that the graph will only have 'index' as an input. This function uses the usual mechanims of mapping from inputs to costs, so the graph forces calculations of all the intermediate symbolic variables""" train_da = theano.function( [index], cost, updates=updates, givens={x: train_set_x[index * batch_size:(index + 1) * batch_size]}) start_time = timeit.default_timer() ############ # TRAINING # ############ # go through training epochs for epoch in range(training_epochs): # go through trainng set c = [] for batch_index in range(n_train_batches): c.append(train_da(batch_index)) print('Training epoch %d, cost ' % epoch, numpy.mean(c)) end_time = timeit.default_timer() training_time = (end_time - start_time) print(('The no corruption code for file ' + os.path.split(__file__)[1] + ' ran for %.2fm' % ((training_time) / 60.)), file=sys.stderr) image = Image.fromarray( tile_raster_images(X=da.W.get_value(borrow=True).T, img_shape=(28, 28), tile_shape=(10, 10), tile_spacing=(1, 1))) image.save('filters_corruption_0.png') tempW = da.W.get_value() plt.figure(2) n, bins, patches = plt.hist(numpy.reshape(tempW, tempW.size), 100, normed=1, facecolor='green') # start-snippet-3 ##################################### # BUILDING THE MODEL CORRUPTION 30% # ##################################### rng = numpy.random.RandomState(123) theano_rng = RandomStreams(rng.randint(2**30)) da = dA(numpy_rng=rng, theano_rng=theano_rng, input=x, n_visible=28 * 28, n_hidden=500) cost, updates = da.get_cost_updates(corruption_level=0.3, learning_rate=learning_rate) train_da = theano.function( [index], cost, updates=updates, givens={x: train_set_x[index * batch_size:(index + 1) * batch_size]}) start_time = timeit.default_timer() ############ # TRAINING # ############ # go through training epochs for epoch in range(training_epochs): # go through trainng set c = [] for batch_index in range(n_train_batches): c.append(train_da(batch_index)) print('Training epoch %d, cost ' % epoch, numpy.mean(c)) end_time = timeit.default_timer() training_time = (end_time - start_time) print(('The 30% corruption code for file ' + os.path.split(__file__)[1] + ' ran for %.2fm' % (training_time / 60.)), file=sys.stderr) # end-snippet-3 # start-snippet-4 image = Image.fromarray( tile_raster_images(X=da.W.get_value(borrow=True).T, img_shape=(28, 28), tile_shape=(10, 10), tile_spacing=(1, 1))) image.save('filters_corruption_30.png') # end-snippet-4 tempW = da.W.get_value() plt.figure(3) n, bins, patches = plt.hist(numpy.reshape(tempW, tempW.size), 100, normed=1, facecolor='green') plt.show() os.chdir('../')
def evaluate_lenet5(learning_rate=0.1, n_epochs=200, dataset=DataSet, nkerns=[cls1, cls2], batch_size=100): """ Demonstrates lenet on MNIST dataset :type learning_rate: float :param learning_rate: learning rate used (factor for the stochastic gradient) :type n_epochs: int :param n_epochs: maximal number of epochs to run the optimizer :type dataset: string :param dataset: path to the dataset used for training /testing (MNIST here) :type nkerns: list of ints :param nkerns: number of kernels on each layer """ rng = numpy.random.RandomState(23455) datasets = load_data(dataset) train_set_x, train_set_y = datasets[0] valid_set_x, valid_set_y = datasets[1] test_set_x, test_set_y = datasets[2] print(type(train_set_x)) #train_set_x.set_value(train_set_x.get_value(borrow=True)[:,:540]) #valid_set_x.set_value(valid_set_x.get_value(borrow=True)[:,:540]) #test_set_x.set_value(test_set_x.get_value(borrow=True)[:,:540]) #train_set_x = train_set_x / 100 #valid_set_x = valid_set_x / 100 #test_set_x = test_set_x / 100 # compute number of minibatches for training, validation and testing n_train_batches = train_set_x.get_value(borrow=True).shape[0] n_valid_batches = valid_set_x.get_value(borrow=True).shape[0] n_test_batches = test_set_x.get_value(borrow=True).shape[0] n_train_batches /= batch_size n_valid_batches /= batch_size n_test_batches /= batch_size #n_test_batches = (n_test_batches/batch_size) + (n_test_batches % batch_size > 0) print(n_test_batches) # allocate symbolic variables for the data index = T.lscalar() # index to a [mini]batch Alr = T.scalar('Alr') x = T.matrix('x') # the data is presented as rasterized images y = T.ivector('y') # the labels are presented as 1D vector of # [int] labels ishape = (nFB, nFs) # this is the size of MNIST images ###################### # BUILD ACTUAL MODEL # ###################### print('... building the model') # Reshape matrix of rasterized images of shape (batch_size,28*28) # to a 4D tensor, compatible with our LeNetConvPoolLayer dFeatureV = iFMs * nFB * nFs xinp = x[:, :dFeatureV] # print (x.shahpe) layer0_input = xinp.reshape((batch_size, iFMs, nFB, nFs)) layer1H_input = x[:, dFeatureV:] # Construct the first convolutional pooling layer: # filtering reduces the image size to (28-5+1,28-5+1)=(24,24) # maxpooling reduces this further to (24/2,24/2) = (12,12) # 4D output tensor is thus of shape (batch_size,nkerns[0],12,12) layer0 = LeNetConvPoolLayer(rng, input=layer0_input, image_shape=(batch_size, iFMs, nFB, nFs), filter_shape=(nkerns[0], iFMs, fsx, fsy), poolsize=(p, p)) cl2x = (nFB - fsx + 1) / p cl2y = (nFs - fsy + 1) / p layer1H = HiddenLayer(rng, input=layer1H_input, n_in=14, n_out=nhus, activation=T.tanh) # Construct the second convolutional pooling layer # filtering reduces the image size to (12-5+1,12-5+1)=(8,8) # maxpooling reduces this further to (8/2,8/2) = (4,4) # 4D output tensor is thus of shape (nkerns[0],nkerns[1],4,4) #layer1 = LeNetConvPoolLayer(rng, input=layer0.output, # image_shape=(batch_size, nkerns[0], cl2x, cl2y), # filter_shape=(nkerns[1], nkerns[0], fsx, 1), poolsize=(p2, 1)) #hl1 = (cl2x - fsx + 1)/p2 hl1 = cl2x * cl2y # the HiddenLayer being fully-connected, it operates on 2D matrices of # shape (batch_size,num_pixels) (i.e matrix of rasterized images). # This will generate a matrix of shape (20,32*4*4) = (20,512) layer2_input = layer0.output.flatten(2) #layer2_inputT = T.concatenate([layer2_input,x[:,dFeatureV:]],axis = 1) layer2_inputT = T.concatenate([layer2_input, layer1H.output], axis=1) # construct a fully-connected sigmoidal layer layer2 = HiddenLayer(rng, input=layer2_inputT, n_in=(nkerns[0] * hl1 * 1) + nhus, n_out=nhu1, activation=T.tanh) #layer22 = HiddenLayer(rng, input=layer2.output, n_in=nhu1, # n_out=nhu1, activation=T.tanh) # classify the values of the fully-connected sigmoidal layer layer3 = LogisticRegression(input=layer2.output, n_in=nhu1, n_out=n_out) # the cost we minimize during training is the NLL of the model cost = layer3.negative_log_likelihood(y) #yPred = layer3.ypred(layer2.output) # create a function to compute the mistakes that are made by the model test_model = theano.function( [index], [layer3.errors(y), layer3.y_pred], givens={ x: test_set_x[index * batch_size:(index + 1) * batch_size], y: test_set_y[index * batch_size:(index + 1) * batch_size] }) validate_model = theano.function( [index], layer3.errors(y), givens={ x: valid_set_x[index * batch_size:(index + 1) * batch_size], y: valid_set_y[index * batch_size:(index + 1) * batch_size] }) # create a list of all model parameters to be fit by gradient descent #params = layer3.params + layer22.params + layer2.params + layer1.params + layer0.params params = layer3.params + layer2.params + layer1H.params + layer0.params # create a list of gradients for all model parameters grads = T.grad(cost, params) # train_model is a function that updates the model parameters by # SGD Since this model has many parameters, it would be tedious to # manually create an update rule for each model parameter. We thus # create the updates list by automatically looping over all # (params[i],grads[i]) pairs. updates = [] for param_i, grad_i in zip(params, grads): #updates.append((param_i, param_i - learning_rate * grad_i)) updates.append((param_i, param_i - Alr * grad_i)) train_model = theano.function( [index, Alr], cost, updates=updates, givens={ x: train_set_x[index * batch_size:(index + 1) * batch_size][:], y: train_set_y[index * batch_size:(index + 1) * batch_size][:] }) ############### # TRAIN MODEL # ############### print('... training') # early-stopping parameters patience = 10000 # look as this many examples regardless patience_increase = 2 # wait this much longer when a new best is # found improvement_threshold = 0.995 # a relative improvement of this much is # considered significant validation_frequency = min(n_train_batches, patience / 2) # go through this many # minibatche before checking the network # on the validation set; in this case we # check every epoch #best_params = None best_params = [] best_validation_loss = numpy.inf prev_validation_loss = 200 best_iter = 0 test_score = 0. start_time = time.clock() Alrc = 0.2 AlrE = 0.00001 epochC = 0 epoch = 0 done_looping = False for param in params: best_params.append(param.get_value()) while (epoch < n_epochs) and (not done_looping): epoch = epoch + 1 epochC = epochC + 1 for minibatch_index in xrange(n_train_batches): iter = (epoch - 1) * n_train_batches + minibatch_index if iter % 100 == 0: print('training @ iter = ', iter) cost_ij = train_model(minibatch_index, Alrc) if (iter + 1) % validation_frequency == 0: # compute zero-one loss on validation set validation_losses = [ validate_model(i) for i in xrange(n_valid_batches) ] this_validation_loss = numpy.mean(validation_losses) lossratio = (this_validation_loss - prev_validation_loss) / (prev_validation_loss + 1) print(lossratio) print('epoch %i, minibatch %i/%i, validation error %f, lr %f %%' % \ (epoch, minibatch_index + 1, n_train_batches, \ this_validation_loss * 100., Alrc)) # if we got the best validation score until now #if this_validation_loss < best_validation_loss: if lossratio <= 0.0: for i in range(len(params)): best_params[i] = params[i].get_value() #improve patience if loss improvement is good enough if this_validation_loss < best_validation_loss * \ improvement_threshold: patience = max(patience, iter * patience_increase) # save best validation score and iteration number best_validation_loss = this_validation_loss prev_validation_loss = this_validation_loss best_iter = iter # test it on the test set #tm = test_model(0) yP = numpy.asarray([]) test_losses = [ test_model(i)[0] for i in xrange(n_test_batches) ] for i in xrange(n_test_batches): yP = numpy.concatenate((yP, test_model(i)[1])) print(yP.shape) test_score = numpy.mean(test_losses) #yP = yPred#yPred(layer2.output.owner.inputs[0].get_value()) y = test_set_y.owner.inputs[0].get_value()[:3000] print(yP.shape) print(y.shape) I1 = numpy.nonzero(y == 0.0) I2 = numpy.nonzero(y == 1.0) I3 = numpy.nonzero(y == 2.0) I4 = numpy.nonzero(y == 3.0) print(I1[0].shape) print(I2[0].shape) print(I3[0].shape) print(I4[0].shape) I11 = numpy.nonzero(yP[I1[0]] == 0) I12 = numpy.nonzero(yP[I1[0]] == 1) I13 = numpy.nonzero(yP[I1[0]] == 2) I14 = numpy.nonzero(yP[I1[0]] == 3) I21 = numpy.nonzero(yP[I2[0]] == 0) I22 = numpy.nonzero(yP[I2[0]] == 1) I23 = numpy.nonzero(yP[I2[0]] == 2) I24 = numpy.nonzero(yP[I2[0]] == 3) I31 = numpy.nonzero(yP[I3[0]] == 0) I32 = numpy.nonzero(yP[I3[0]] == 1) I33 = numpy.nonzero(yP[I3[0]] == 2) I34 = numpy.nonzero(yP[I3[0]] == 3) I41 = numpy.nonzero(yP[I4[0]] == 0) I42 = numpy.nonzero(yP[I4[0]] == 1) I43 = numpy.nonzero(yP[I4[0]] == 2) I44 = numpy.nonzero(yP[I4[0]] == 3) acc1 = float(float(I11[0].size) / float(I1[0].size)) acc2 = float(float(I22[0].size) / float(I2[0].size)) if n_out == 3: acc3 = float(float(I33[0].size) / float(I3[0].size)) elif n_out == 4: acc3 = float(float(I33[0].size) / float(I3[0].size)) acc4 = float(float(I44[0].size) / float(I4[0].size)) else: acc3 = 0 acc4 = 0 print(( ' epoch %i, minibatch %i/%i, test error of ' 'best model %f, acc1 = %f, acc2 = %f, acc3 = %f, acc4 = %f, I11 = %i, I12 = %i, I13 = %i, I14 = %i, I21 = %i, I22 = %i, I23 = %i, I24 = %i, I31 = %i, I32 = %i, I33 = %i, I34 = %i, I41 = %i, I42 = %i, I43 = %i, I44 = %i %%' ) % (epoch, minibatch_index + 1, n_train_batches, test_score * 100., acc1 * 100., acc2 * 100., acc3 * 100, acc4 * 100, I11[0].size, I12[0].size, I13[0].size, I14[0].size, I21[0].size, I22[0].size, I23[0].size, I24[0].size, I31[0].size, I32[0].size, I33[0].size, I34[0].size, I41[0].size, I42[0].size, I43[0].size, I44[0].size)) #print((' epoch %i, minibatch %i/%i, test error of best ' # 'model %f %%') % # (epoch, minibatch_index + 1, n_train_batches, # test_score * 100.)) else: if Alrc <= AlrE: done_looping = True break elif epochC > 40: Alrc = Alrc / 2 for param, best_param in zip(params, best_params): param.set_value(best_param) epochC = 0 #if patience <= iter: # done_looping = True # break end_time = time.clock() print('Optimization complete.') print('Best validation score of %f %% obtained at iteration %i,'\ 'with test performance %f %%' % (best_validation_loss * 100., best_iter + 1, test_score * 100.)) #print >> sys.stderr, ('The code for file ' + # os.path.split(__file__)[1] + # ' ran for %.2fm' % ((end_time - start_time) / 60.)) OF = open(outFile, 'a') print(DataSet, n_out, fsx, fsy, p, cls1, cls2, nhu1, nFB, nFs, iFMs, nhus, batch_size, test_score * 100., acc1 * 100., acc2 * 100., acc3 * 100, acc4 * 100, I11[0].size, I12[0].size, I13[0].size, I14[0].size, I21[0].size, I22[0].size, I23[0].size, I24[0].size, I31[0].size, I32[0].size, I33[0].size, I34[0].size, I41[0].size, I42[0].size, I43[0].size, I44[0].size, file=OF) OF.close()
def mlp_run(train_set, valid_set, test_set, learning_rate=0.01, L1_reg=0.00, L2_reg=0.0001, n_epochs=1000, batch_size=20, n_hidden=500): """ Demonstrate stochastic gradient descent optimization of a log-linear model """ print 'loading ', train_set, ' for train' train_set_x, train_set_y = load_data(train_set) print 'loading ', valid_set, ' for valid' valid_set_x, valid_set_y = load_data(valid_set) print 'loading ', test_set, ' for test' if test_set != valid_set: test_set_x, test_set_y = load_data(test_set) else: test_set_x, test_set_y = valid_set_x, valid_set_y # compute number of minibatches for training, validation and testing n_train_batches = train_set_x.get_value(borrow=True).shape[0] / batch_size n_valid_batches = valid_set_x.get_value(borrow=True).shape[0] / batch_size n_test_batches = test_set_x.get_value(borrow=True).shape[0] / batch_size print "train_set_x size:", train_set_x.get_value(borrow=True).shape[0] print "batch_size:", batch_size print "n_train_batches:", n_train_batches ###################### # BUILD ACTUAL MODEL # ###################### print '... building the model' # allocate symbolic variables for the data index = T.lscalar() # index to a [mini]batch # generate symbolic variables for input (x and y represent a # minibatch) x = T.matrix('x') # data, presented as rasterized images y = T.ivector('y') # labels, presented as 1D vector of [int] labels # construct the logistic regression class # Each MNIST image has size 28*28 total_dim = train_set_x.get_value(borrow=True).shape[1] rng = numpy.random.RandomState(1234) # construct the MLP class classifier = MLP(rng=rng, input=x, n_in=total_dim, n_hidden=n_hidden, n_out=2) # start-snippet-4 # the cost we minimize during training is the negative log likelihood of # the model plus the regularization terms (L1 and L2); cost is expressed # here symbolically cost = (classifier.negative_log_likelihood(y) + L1_reg * classifier.L1 + L2_reg * classifier.L2_sqr) # end-snippet-4 # start-snippet-5 # compute the gradient of cost with respect to theta (sotred in params) # the resulting gradients will be stored in a list gparams gparams = [T.grad(cost, param) for param in classifier.params] # specify how to update the parameters of the model as a list of # (variable, update expression) pairs # given two lists of the same length, A = [a1, a2, a3, a4] and # B = [b1, b2, b3, b4], zip generates a list C of same size, where each # element is a pair formed from the two lists : # C = [(a1, b1), (a2, b2), (a3, b3), (a4, b4)] updates = [(param, param - learning_rate * gparam) for param, gparam in zip(classifier.params, gparams)] # compiling a Theano function that computes the mistakes that are made by # the model on a minibatch test_model = theano.function( inputs=[index], outputs=classifier.errors(y), givens={ x: test_set_x[index * batch_size:(index + 1) * batch_size], y: test_set_y[index * batch_size:(index + 1) * batch_size] }) validate_model = theano.function( inputs=[index], outputs=classifier.errors(y), givens={ x: valid_set_x[index * batch_size:(index + 1) * batch_size], y: valid_set_y[index * batch_size:(index + 1) * batch_size] }) validate_auc = theano.function(inputs=[], outputs=classifier.auc(y), givens={ x: valid_set_x, y: valid_set_y }) test_auc = theano.function(inputs=[], outputs=classifier.auc(y), givens={ x: test_set_x, y: test_set_y }) # compiling a Theano function `train_model` that returns the cost, but in # the same time updates the parameter of the model based on the rules # defined in `updates` train_model = theano.function( inputs=[index], outputs=cost, updates=updates, givens={ x: train_set_x[index * batch_size:(index + 1) * batch_size], y: train_set_y[index * batch_size:(index + 1) * batch_size] }) # end-snippet-3 ############### # TRAIN MODEL # ############### print '... training the model' # early-stopping parameters patience = FLAGS.iter # look as this many examples regardless patience_increase = 2 # wait this much longer when a new best is # found improvement_threshold = 0.995 # a relative improvement of this much is # considered significant validation_frequency = min(n_train_batches, patience / 2) # go through this many # minibatche before checking the network # on the validation set; in this case we # check every epoch print "n_train_batches:", n_train_batches print "validation_frequency:", validation_frequency best_validation_loss = numpy.inf best_auc = 0 test_score = 0. start_time = timeit.default_timer() done_looping = False epoch = 0 while (epoch < n_epochs) and (not done_looping): epoch = epoch + 1 if epoch == 10: learning_rate *= 0.8 if epoch == 20: learning_rate *= 0.5 if epoch == 30: learning_rate = 0.01 for minibatch_index in xrange(n_train_batches): minibatch_avg_cost = train_model(minibatch_index) # iteration number iter = (epoch - 1) * n_train_batches + minibatch_index if (iter + 1) % validation_frequency == 0: # compute zero-one loss on validation set validation_losses = [ validate_model(i) for i in xrange(n_valid_batches) ] this_validation_loss = numpy.mean(validation_losses) print('epoch %i, minibatch %i/%i, validation error %f %%' % (epoch, minibatch_index + 1, n_train_batches, this_validation_loss * 100.)) auc_values = [validate_auc()] auc = numpy.mean(auc_values) print "current valid auc: ", auc, " best auc: ", best_auc, " imporve: ", auc - best_auc, " significant?: ", auc - best_auc > FLAGS.min_improvement #print validate_auc(0) if auc > best_auc: if auc - best_auc > FLAGS.min_improvement: print 'before patience:', patience, ' iter:', iter patience = max(patience, iter * patience_increase) print 'after patience:', patience best_auc = auc auc_values = [test_auc()] testauc = numpy.mean(auc_values) print "test auc: ", testauc #cPickle.dump(classifier, open('best_model.pkl', 'wb')) if patience <= iter: done_looping = True print "patience:", patience, "iter:", iter, "done_looping:", done_looping break end_time = timeit.default_timer() print(('Optimization complete with best validation score of %f %%,' 'with test performance %f %%') % (best_validation_loss * 100., test_score * 100.)) print 'The code run for %d epochs, with %f epochs/sec' % ( epoch, 1. * epoch / (end_time - start_time)) print 'best valid auc is ', best_auc print >> sys.stderr, ('The code for file ' + os.path.split(__file__)[1] + ' ran for %.1fs' % ((end_time - start_time)))
def test_SdA(finetune_lr=0.1, pretraining_epochs=15, pretrain_lr=0.001, training_epochs=1000, dataset='mnist.pkl.gz', batch_size=1): """ Demonstrates how to train and test a stochastic denoising autoencoder. This is demonstrated on MNIST. :type learning_rate: float :param learning_rate: learning rate used in the finetune stage (factor for the stochastic gradient) :type pretraining_epochs: int :param pretraining_epochs: number of epoch to do pretraining :type pretrain_lr: float :param pretrain_lr: learning rate to be used during pre-training :type n_iter: int :param n_iter: maximal number of iterations ot run the optimizer :type dataset: string :param dataset: path the the pickled dataset """ datasets = load_data(dataset) train_set_x, train_set_y = datasets[0] valid_set_x, valid_set_y = datasets[1] test_set_x, test_set_y = datasets[2] # compute number of minibatches for training, validation and testing n_train_batches = train_set_x.get_value(borrow=True).shape[0] n_train_batches /= batch_size # numpy random generator numpy_rng = numpy.random.RandomState(89677) print '... building the model' # construct the stacked denoising autoencoder class sda = SdA(numpy_rng=numpy_rng, n_ins=28 * 28, hidden_layers_sizes=[1000, 1000, 1000], n_outs=10) ######################### # PRETRAINING THE MODEL # ######################### print '... getting the pretraining functions' pretraining_fns = sda.pretraining_functions(train_set_x=train_set_x, batch_size=batch_size) print '... pre-training the model' start_time = time.clock() ## Pre-train layer-wise corruption_levels = [.1, .2, .3] for i in xrange(sda.n_layers): # go through pretraining epochs for epoch in xrange(pretraining_epochs): # go through the training set c = [] for batch_index in xrange(n_train_batches): c.append(pretraining_fns[i](index=batch_index, corruption=corruption_levels[i], lr=pretrain_lr)) print 'Pre-training layer %i, epoch %d, cost ' % (i, epoch), print numpy.mean(c) end_time = time.clock() print >> sys.stderr, ('The pretraining code for file ' + os.path.split(__file__)[1] + ' ran for %.2fm' % ((end_time - start_time) / 60.)) ######################## # FINETUNING THE MODEL # ######################## # get the training, validation and testing function for the model print '... getting the finetuning functions' train_fn, validate_model, test_model = sda.build_finetune_functions( datasets=datasets, batch_size=batch_size, learning_rate=finetune_lr) print '... finetunning the model' # early-stopping parameters patience = 10 * n_train_batches # look as this many examples regardless patience_increase = 2. # wait this much longer when a new best is # found improvement_threshold = 0.995 # a relative improvement of this much is # considered significant validation_frequency = min(n_train_batches, patience / 2) # go through this many # minibatche before checking the network # on the validation set; in this case we # check every epoch best_params = None best_validation_loss = numpy.inf test_score = 0. start_time = time.clock() done_looping = False epoch = 0 while (epoch < training_epochs) and (not done_looping): epoch = epoch + 1 for minibatch_index in xrange(n_train_batches): minibatch_avg_cost = train_fn(minibatch_index) iter = (epoch - 1) * n_train_batches + minibatch_index if (iter + 1) % validation_frequency == 0: validation_losses = validate_model() this_validation_loss = numpy.mean(validation_losses) print('epoch %i, minibatch %i/%i, validation error %f %%' % (epoch, minibatch_index + 1, n_train_batches, this_validation_loss * 100.)) # if we got the best validation score until now if this_validation_loss < best_validation_loss: #improve patience if loss improvement is good enough if (this_validation_loss < best_validation_loss * improvement_threshold): patience = max(patience, iter * patience_increase) # save best validation score and iteration number best_validation_loss = this_validation_loss best_iter = iter # test it on the test set test_losses = test_model() test_score = numpy.mean(test_losses) print((' epoch %i, minibatch %i/%i, test error of ' 'best model %f %%') % (epoch, minibatch_index + 1, n_train_batches, test_score * 100.)) if patience <= iter: done_looping = True break end_time = time.clock() print(('Optimization complete with best validation score of %f %%,' 'with test performance %f %%') % (best_validation_loss * 100., test_score * 100.)) print >> sys.stderr, ('The training code for file ' + os.path.split(__file__)[1] + ' ran for %.2fm' % ((end_time - start_time) / 60.))
def test_rbm(learning_rate=0.1, training_epochs=5, dataset='mnist.pkl.gz', batch_size=20, n_chains=20, n_samples=10, output_folder='newRBM_plots', n_hidden=625, n_output=10): """ Demonstrate how to train and afterwards sample from it using Theano. This is demonstrated on MNIST. :param learning_rate: learning rate used for training the RBM :param training_epochs: number of epochs used for training :param dataset: path the the pickled dataset :param batch_size: size of a batch used to train the RBM :param n_chains: number of parallel Gibbs chains to be used for sampling :param n_samples: number of samples to plot for each chain """ datasets = load_data(dataset, 1) train_set_x, train_set_y = datasets[0] test_set_x, test_set_y = datasets[2] # train_set_y = convert_yval(train_set_y) # test_set_y = convert_yval(train_set_y) # print type(train_set_x) # print 'sadfsdf' # print train_set_x[0,].eval() # print test_set_y[0:10].eval() # compute number of minibatches for training, validation and testing n_train_batches = train_set_x.get_value(borrow=True).shape[0] / batch_size number_of_test_samples = test_set_x.get_value(borrow=True).shape[0] print number_of_test_samples # print n_train_batches # allocate symbolic variables for the data index = T.lscalar() # index to a [mini]batch x = T.matrix('x') # the data is presented as rasterized images y = T.matrix('y', dtype='int32') test_x = T.matrix('test_x') test_y = T.matrix('test_y', dtype='int32') rng = numpy.random.RandomState(123) theano_rng = RandomStreams(rng.randint(2**30)) # initialize storage for the persistent chain (state = hidden # layer of chain) persistent_chain = theano.shared(numpy.zeros((batch_size, n_hidden), dtype=theano.config.floatX), borrow=True) # construct the RBM class rbm = classRBM(inputx=x, inputy=y, testx=test_x, testy=test_y, n_visible=28 * 28, n_hidden=n_hidden, n_output=n_output, numpy_rng=rng, theano_rng=theano_rng, batch_size=batch_size, n_samples=number_of_test_samples) # get the cost and the gradient corresponding to one step of CD-15 # cost, updates = rbm.get_cost_updates(lr=learning_rate, k=15) cost, updates = rbm.get_cost_updates(lr=learning_rate, k=1) # energy = theano.function([],rbm.free_energy1()) # print 'Energy = ',energy ################################# # Training the RBM # ################################# if not os.path.isdir(output_folder): os.makedirs(output_folder) os.chdir(output_folder) # start-snippet-5 # it is ok for a theano function to have no output # the purpose of train_rbm is solely to update the RBM parameters train_rbm = theano.function( [index], cost, updates=updates, givens={ x: train_set_x[index * batch_size:(index + 1) * batch_size], y: train_set_y[index * batch_size:(index + 1) * batch_size] }, name='train_rbm') plotting_time = 0. start_time = time.clock() # go through training epochs for epoch in xrange(training_epochs): # go through the training set mean_cost = [] print 'number of batches = ', n_train_batches print 'Epoch = ', epoch for batch_index in xrange(n_train_batches): if (batch_index % 500 == 0): print batch_index mean_cost += [train_rbm(batch_index)] # train_rbm(batch_index) # print 'epoch 1######' print 'Training epoch %d, cost is ' % epoch, numpy.mean(mean_cost) # print 'Training epoch %d, cost is ' % epoch # num_correct = get_num_correct(rbm.W.get_value(borrow=True), # rbm.U.get_value(borrow=True),rbm.hbias.get_value(borrow=True),rbm.outbias.get_value(borrow=True), # test_set_x.eval(),test_set_y.eval(),number_of_test_samples,n_output) # print 'Accuracy = ', float(num_correct)/number_of_test_samples # Plot filters after each training epoch plotting_start = time.clock() # Construct image from the weight matrix image = Image.fromarray( tile_raster_images(X=rbm.W.get_value(borrow=True).T, img_shape=(28, 28), tile_shape=(10, 10), tile_spacing=(1, 1))) image.save('filters_at_epoch_%i.png' % epoch) image = Image.fromarray( tile_raster_images(X=rbm.U.get_value(borrow=True).T, img_shape=(25, 25), tile_shape=(1, 10), tile_spacing=(1, 1))) image.save('U_filters_at_epoch_%i.png' % epoch) plotting_stop = time.clock() plotting_time += (plotting_stop - plotting_start) end_time = time.clock() pretraining_time = (end_time - start_time) - plotting_time print('Training took %f minutes' % (pretraining_time / 60.))
def test_static_activations(rng, batch_size=1, learning_rate=0.01, n_epochs=1000, L1_reg=0.0, L2_reg=0.0001): print "Loading data" print "... MNIST" dataset = 'mnist.pkl.gz' datasets = load_data(dataset) train_set_x, train_set_y = datasets[0] x_size = train_set_x.shape[1].eval() temp = train_set_x.get_value(borrow=True, return_internal_type=True) train_set_x = shared(temp.reshape((temp.shape[0], 1, temp.shape[1])), borrow=True, name='train_set_x') valid_set_x, valid_set_y = datasets[1] temp = valid_set_x.get_value(borrow=True, return_internal_type=True) valid_set_x = shared(temp.reshape((temp.shape[0], 1, temp.shape[1])), borrow=True, name='valid_set_x') test_set_x, test_set_y = datasets[2] temp = test_set_x.get_value(borrow=True, return_internal_type=True) test_set_x = shared(temp.reshape((temp.shape[0], 1, temp.shape[1])), borrow=True, name='valid_set_x') n_train_batches = train_set_x.get_value(borrow=True).shape[0] / batch_size n_valid_batches = valid_set_x.get_value(borrow=True).shape[0] / batch_size n_test_batches = test_set_x.get_value(borrow=True).shape[0] / batch_size n_in = x_size n_units_per = 32 n_out = 500 dims = [] print "... Activation patterns" ins, outs = cnx.load('ins_and_outs.pkl') in_idxs = [] out_idxs = [] for i in range(len(ins)): dims.append((ins[i].shape[0], outs[i].shape[0])) in_idxs.append( shared(cnx.repeat(ins[i], batch_size), name='in_idxs_%i' % i)) out_idxs.append( shared(cnx.repeat(outs[i], batch_size), name='out_idxs_%i' % i)) print "Building model" index = T.lscalar('index') x = T.tensor3('x', dtype=config.floatX) y = T.ivector('y') layers = [] n_in = 1 n_out = 500 layers.append( HiddenBlockLayer((n_in, x_size), (n_out, n_units_per), in_idxs[0], out_idxs[0], batch_size, activation=T.tanh, name='layer_' + str(len(layers)))) n_in = n_out n_out = 10 layers.append( HiddenBlockLayer((n_in, n_units_per), (n_out, n_units_per), in_idxs[1], out_idxs[1], batch_size, None, name='layer_' + str(len(layers)))) layers[-1].W.set_value(0 * layers[-1].W.get_value()) print "... Building cost and error equations" activation = x for layer in layers: activation = layer.output(activation) activation = T.nnet.softmax(T.mean(activation, axis=2)) cost = add_regularization(layers, layers[-1].cost(activation, y), L1_reg, L2_reg) error = layers[-1].error(activation, y) print "... Building parameter updates" param_updates = [] for layer in layers: for param in layer.params: gparam = T.grad(cost, param) param_updates.append((param, param - learning_rate * gparam)) print "... Compiling train function" train_model = function( [index], cost, updates=param_updates, givens={ x: train_set_x[index * batch_size:(index + 1) * batch_size], y: train_set_y[index * batch_size:(index + 1) * batch_size] }) print "... Compiling test function" test_model = function( [index], error, givens={ x: test_set_x[index * batch_size:(index + 1) * batch_size], y: test_set_y[index * batch_size:(index + 1) * batch_size] }) print "... Compiling validate function" validate_model = function( [index], error, givens={ x: valid_set_x[index * batch_size:(index + 1) * batch_size], y: valid_set_y[index * batch_size:(index + 1) * batch_size] }) ############### # TRAIN MODEL # ############### print '... training' # early-stopping parameters patience = 10000 # look as this many examples regardless patience_increase = 100 # wait this much longer when a new best is # found improvement_threshold = 0.995 # a relative improvement of this much is # considered significant validation_frequency = min(n_train_batches, patience / 2) # go through this many # minibatche before checking the network # on the validation set; in this case we # check every epoch best_params = None best_validation_loss = np.inf best_iter = 0 test_score = 0. start_time = time.clock() epoch = 0 done_looping = False accum = 0 while (epoch < n_epochs) and (not done_looping): epoch = epoch + 1 for minibatch_index in xrange(n_train_batches): minibatch_avg_cost = train_model(minibatch_index) accum = accum + minibatch_avg_cost # iteration number iter = (epoch - 1) * n_train_batches + minibatch_index if (iter + 1) % validation_frequency == 0: accum = accum / validation_frequency print "minibatch_avg_cost: ", accum # compute zero-one loss on validation set validation_losses = [ validate_model(i) for i in xrange(n_valid_batches) ] this_validation_loss = np.mean(validation_losses) print('epoch %i, minibatch %i/%i, validation error %f %%' % (epoch, minibatch_index + 1, n_train_batches, this_validation_loss * 100.)) # if we got the best validation score until now if this_validation_loss < best_validation_loss: #improve patience if loss improvement is good enough if this_validation_loss < best_validation_loss * \ improvement_threshold: patience = max(patience, iter * patience_increase) best_validation_loss = this_validation_loss best_iter = iter # test it on the test set test_losses = [ test_model(i) for i in xrange(n_test_batches) ] test_score = np.mean(test_losses) print((' epoch %i, minibatch %i/%i, test error of ' 'best model %f %%') % (epoch, minibatch_index + 1, n_train_batches, test_score * 100.)) if patience <= iter: done_looping = True break end_time = time.clock() print(('Optimization complete. Best validation score of %f %% ' 'obtained at iteration %i, with test performance %f %%') % (best_validation_loss * 100., best_iter + 1, test_score * 100.)) print >> sys.stderr, ('The code for file ' + os.path.split(__file__)[1] + ' ran for %.2fm' % ((end_time - start_time) / 60.))
end_time = timeit.default_timer() print(('Optimization complete with best validation score of %f %%, ' 'obtained at iteration %i, ' 'with test performance %f %%') % (best_validation_loss * 100., best_iter + 1, test_score * 100.)) print('The fine tuning code for file ' + os.path.split(__file__)[1] + ' ran for %.2fm' % ((end_time - start_time) / 60.), file=sys.stderr) return train_prob, test_prob, valid_prob, dbn.rbm_layers if __name__ == '__main__': dataset = 'mnist.pkl.gz' datasets = load_data(dataset) # train_set_x shape is 500000 *784 train_set_x, train_set_y = datasets[0] valid_set_x, valid_set_y = datasets[1] test_set_x, test_set_y = datasets[2] numpy_rng = numpy.random.RandomState(123) train_prob, test_prob, valid_prob, rbm_layers = test_DBN( finetune_lr=0.1, pretraining_epochs=10, pretrain_lr=0.01, k=1, training_epochs=10, train_set_x=train_set_x, train_set_y=train_set_y, valid_set_x=valid_set_x, valid_set_y=valid_set_y,
def test_dA_sanity(learning_rate=0.001, training_epochs=500, dataset='../datasets/da_sanity.pkl.gz', batch_size=15): """ This demo is tested on da_sanity :type learning_rate: float :param learning_rate: learning rate used for training the DeNosing AutoEncoder :type training_epochs: int :param training_epochs: number of epochs used for training :type dataset: string :param dataset: path to the picked dataset """ datasets = load_data(dataset) train_set_x, train_set_y = datasets[0] # compute number of minibatches for training, validation and testing n_train_batches = train_set_x.get_value(borrow=True).shape[0] / batch_size # start-snippet-2 # allocate symbolic variables for the data index = T.lscalar() # index to a [mini]batch x = T.matrix('x') # the data is presented as rasterized images # end-snippet-2 ##################################### # BUILDING THE MODEL 30% CORRUPTION # ##################################### rng = numpy.random.RandomState(42) theano_rng = RandomStreams(rng.randint(2**30)) da = dA(numpy_rng=rng, theano_rng=theano_rng, input=x, n_visible=100, n_hidden=80) cost, updates = da.get_cost_updates(corruption_level=0.3, learning_rate=learning_rate) train_da = theano.function( [index], cost, updates=updates, givens={x: train_set_x[index * batch_size:(index + 1) * batch_size]}) start_time = time.clock() ############ # TRAINING # ############ # go through training epochs for epoch in xrange(training_epochs): # go through training set c = [] for batch_index in xrange(n_train_batches): c.append(train_da(batch_index)) print 'Training epoch %d, cost ' % epoch, numpy.mean(c) end_time = time.clock() training_time = (end_time - start_time) print >> sys.stderr, ('The code for file ' + os.path.split(__file__)[1] + ' ran for %.2fm' % ((training_time) / 60.))
def evaluate_lenet5( learning_rate=0.1, n_epochs=200, dataset="mnist.pkl.gz", nkerns=[20, 50], batch_size=500, ): """ Demonstrates lenet on MNIST dataset :type learning_rate: float :param learning_rate: learning rate used (factor for the stochastic gradient) :type n_epochs: int :param n_epochs: maximal number of epochs to run the optimizer :type dataset: string :param dataset: path to the dataset used for training /testing (MNIST here) :type nkerns: list of ints :param nkerns: number of kernels on each layer """ rng = numpy.random.RandomState(23455) datasets = load_data(dataset) train_set_x, train_set_y = datasets[0] valid_set_x, valid_set_y = datasets[1] test_set_x, test_set_y = datasets[2] # display some chars: display_some(train_set_x, train_set_y.eval(), n=5, title="label=") # compute number of minibatches for training, validation and testing n_train_batches = train_set_x.get_value(borrow=True).shape[0] n_valid_batches = valid_set_x.get_value(borrow=True).shape[0] n_test_batches = test_set_x.get_value(borrow=True).shape[0] n_train_batches //= batch_size n_valid_batches //= batch_size n_test_batches //= batch_size # allocate symbolic variables for the data index = T.lscalar() # index to a [mini]batch x = T.matrix("x") # the data is presented as rasterized images y = T.ivector("y") # the labels are presented as 1D vector of [int] labels ###################### # BUILD ACTUAL MODEL # ###################### print("... building the model") # Reshape matrix of rasterized images of shape (batch_size, 28 * 28) # to a 4D tensor, compatible with our LeNetConvPoolLayer # (28, 28) is the size of MNIST images. layer0_input = x.reshape((batch_size, 1, 28, 28)) # Construct the first convolutional pooling layer: # filtering reduces the image size to (28-5+1 , 28-5+1) = (24, 24) # maxpooling reduces this further to (24/2, 24/2) = (12, 12) # 4D output tensor is thus of shape (batch_size, nkerns[0], 12, 12) layer0 = LeNetConvPoolLayer( rng, input=layer0_input, image_shape=(batch_size, 1, 28, 28), filter_shape=(nkerns[0], 1, 5, 5), poolsize=(2, 2), ) # Construct the second convolutional pooling layer # filtering reduces the image size to (12-5+1, 12-5+1) = (8, 8) # maxpooling reduces this further to (8/2, 8/2) = (4, 4) # 4D output tensor is thus of shape (batch_size, nkerns[1], 4, 4) layer1 = LeNetConvPoolLayer( rng, input=layer0.output, image_shape=(batch_size, nkerns[0], 12, 12), filter_shape=(nkerns[1], nkerns[0], 5, 5), poolsize=(2, 2), ) # the HiddenLayer being fully-connected, it operates on 2D matrices of # shape (batch_size, num_pixels) (i.e matrix of rasterized images). # This will generate a matrix of shape (batch_size, nkerns[1] * 4 * 4), # or (500, 50 * 4 * 4) = (500, 800) with the default values. layer2_input = layer1.output.flatten(2) # construct a fully-connected sigmoidal layer layer2 = HiddenLayer(rng, input=layer2_input, n_in=nkerns[1] * 4 * 4, n_out=500, activation=T.tanh) # classify the values of the fully-connected sigmoidal layer layer3 = LogisticRegression(input=layer2.output, n_in=500, n_out=10) # the cost we minimize during training is the NLL of the model cost = layer3.negative_log_likelihood(y) # create a function to compute the mistakes that are made by the model test_model = theano.function( [index], [layer3.errors(y), layer3.y_pred], givens={ x: test_set_x[index * batch_size:(index + 1) * batch_size], y: test_set_y[index * batch_size:(index + 1) * batch_size], }, ) validate_model = theano.function( [index], layer3.errors(y), givens={ x: valid_set_x[index * batch_size:(index + 1) * batch_size], y: valid_set_y[index * batch_size:(index + 1) * batch_size], }, ) # create a list of all model parameters to be fit by gradient descent params = layer3.params + layer2.params + layer1.params + layer0.params # create a list of gradients for all model parameters grads = T.grad(cost, params) # train_model is a function that updates the model parameters by # SGD Since this model has many parameters, it would be tedious to # manually create an update rule for each model parameter. We thus # create the updates list by automatically looping over all # (params[i], grads[i]) pairs. updates = [(param_i, param_i - learning_rate * grad_i) for param_i, grad_i in zip(params, grads)] train_model = theano.function( inputs=[index], outputs=[cost, layer3.errors(y)], updates=updates, givens={ x: train_set_x[index * batch_size:(index + 1) * batch_size], y: train_set_y[index * batch_size:(index + 1) * batch_size], }, ) ############### # TRAIN MODEL # ############### print("... training") # early-stopping parameters patience = 10000 # look as this many examples regardless patience_increase = 2 # wait this much longer when a new best is # found improvement_threshold = 0.995 # a relative improvement of this much is # considered significant validation_frequency = min(n_train_batches, patience // 2) # go through this many # minibatche before checking the network # on the validation set; in this case we # check every epoch best_validation_loss = numpy.inf best_iter = 0 test_score = 0.0 start_time = timeit.default_timer() epoch = 0 done_looping = False # for error_curve plot cost_train = [] # observe likelihood cost while training err_train = [] # observe train err while training err_valid = [] # observe valid err while training err_test = [] # observe test err while training while (epoch < n_epochs) and (not done_looping): epoch = epoch + 1 for minibatch_index in range(n_train_batches): iter = (epoch - 1) * n_train_batches + minibatch_index if iter % 100 == 0: print("training @ iter = ", iter) train_outputs = train_model(minibatch_index) cost_ij = train_outputs[0] err_train.append(train_outputs[1]) # add error_train if (iter + 1) % validation_frequency == 0: # compute zero-one loss on validation set validation_losses = [ validate_model(i) for i in range(n_valid_batches) ] this_validation_loss = numpy.mean(validation_losses) err_valid.append(this_validation_loss) print("epoch %i, minibatch %i/%i, validation error %f %%" % ( epoch, minibatch_index + 1, n_train_batches, this_validation_loss * 100.0, )) # if we got the best validation score until now if this_validation_loss < best_validation_loss: # improve patience if loss improvement is good enough if (this_validation_loss < best_validation_loss * improvement_threshold): patience = max(patience, iter * patience_increase) # save best validation score and iteration number best_validation_loss = this_validation_loss best_iter = iter # test it on the test set test_losses = [ test_model(i)[0] for i in range(n_test_batches) ] test_score = numpy.mean(test_losses) err_test.append(test_score) print((" epoch %i, minibatch %i/%i, test error of " "best model %f %%") % ( epoch, minibatch_index + 1, n_train_batches, test_score * 100.0, )) """ # save the best model with open('../doc/data/best_model.pkl', 'wb') as f: pickle.dump(layer0, layer1, layer2, layer3, f) """ if patience <= iter: done_looping = True break end_time = timeit.default_timer() print("Optimization complete.") print("Best validation score of %f %% obtained at iteration %i, " "with test performance %f %%" % (best_validation_loss * 100.0, best_iter + 1, test_score * 100.0)) print( ("The code for file " + os.path.split(__file__)[1] + " ran for %.2fm" % ((end_time - start_time) / 60.0)), file=sys.stderr, ) model = [layer0, layer1, layer2, layer3] # save the best model with open("../doc/data/best_model.pkl", "wb") as f: pickle.dump(model, f) test_pred_y = test_model(0)[1] # predict on first batch_size sampless # display some chars using predict display_some(test_set_x, test_pred_y, n=5, title="pred=") # n < batch_size return err_train, err_valid, err_test
def test_DBN(finetune_lr=lr, pretraining_epochs=100, pretrain_lr=0.0025, k=2, training_epochs=NOfepoch, dataset=DataSet, batch_size=batchSize): """ Demonstrates how to train and test a Deep Belief Network. This is demonstrated on MNIST. :type learning_rate: float :param learning_rate: learning rate used in the finetune stage :type pretraining_epochs: int :param pretraining_epochs: number of epoch to do pretraining :type pretrain_lr: float :param pretrain_lr: learning rate to be used during pre-training :type k: int :param k: number of Gibbs steps in CD/PCD :type training_epochs: int :param training_epochs: maximal number of iterations ot run the optimizer :type dataset: string :param dataset: path the the pickled dataset :type batch_size: int :param batch_size: the size of a minibatch """ datasets = load_data(dataset) train_set_x, train_set_y = datasets[0] valid_set_x, valid_set_y = datasets[1] test_set_x, test_set_y = datasets[2] # compute number of minibatches for training, validation and testing n_train_batches = train_set_x.get_value(borrow=True).shape[0] / batch_size # numpy random generator numpy_rng = numpy.random.RandomState(123) print('... building the model') # construct the Deep Belief Network dbn = DBN(numpy_rng=numpy_rng, n_ins=552, hidden_layers_sizes=[n_hus for i in range(n_hls)], n_outs=n_out) ######################### # PRETRAINING THE MODEL # ######################### print('... getting the pretraining functions') pretraining_fns = dbn.pretraining_functions(train_set_x=train_set_x, batch_size=batch_size, k=k) print('... pre-training the model') start_time = time.clock() # Pre-train layer-wise for i in xrange(dbn.n_layers): # go through pretraining epochs for epoch in xrange(pretraining_epochs): # go through the training set c = [] for batch_index in xrange(n_train_batches): c.append(pretraining_fns[i](index=batch_index, lr=pretrain_lr)) print('Pre-training layer ', i, ' epoch ', epoch, 'cost ', numpy.mean(c)) # print numpy.mean(c) end_time = time.clock() # print >> sys.stderr, ('The pretraining code for file ' + # os.path.split(__file__)[1] + # ' ran for %.2fm' % ((end_time - start_time) / 60.)) ######################## # FINETUNING THE MODEL # ######################## # get the training, validation and testing function for the model print('... getting the finetuning functions') train_fn, validate_model, test_model, gety_pred = dbn.build_finetune_functions( datasets=datasets, batch_size=batch_size, learning_rate=finetune_lr) print('... finetunning the model') # early-stopping parameters patience = 4 * n_train_batches # look as this many examples regardless patience_increase = 2. # wait this much longer when a new best is # found improvement_threshold = 0.995 # a relative improvement of this much is # considered significant validation_frequency = min(n_train_batches, patience / 2) # go through this many # minibatche before checking the network # on the validation set; in this case we # check every epoch best_params = [] best_validation_loss = numpy.inf prev_validation_loss = 200 test_score = 0. start_time = time.clock() Alrc = 0.1 AlrE = 0.00001 done_looping = False epoch = 0 epochC = 0 for param in dbn.params: best_params.append(param.get_value()) while (epoch < training_epochs) and (not done_looping): epoch = epoch + 1 epochC = epochC + 1 for minibatch_index in xrange(n_train_batches): #print n_train_batches, epoch, minibatch_index minibatch_avg_cost = train_fn(minibatch_index, Alrc) iter = (epoch - 1) * n_train_batches + minibatch_index if (iter + 1) % validation_frequency == 0: validation_losses = validate_model() this_validation_loss = numpy.mean(validation_losses) lossratio = (this_validation_loss - prev_validation_loss) / (prev_validation_loss + 1) print(lossratio) print('epoch %i, minibatch %i/%i, validation error %f, lr %f %%' % \ (epoch, minibatch_index + 1, n_train_batches, this_validation_loss * 100., Alrc)) # if we got the best validation score until now #if this_validation_loss < best_validation_loss: if lossratio <= 0.0: #print '*******************1**************' #print dbn.params[0].get_value() for i in range(len(dbn.params)): best_params[i] = dbn.params[i].get_value() #print '*******************2**************' #print best_params[0] #print 'zzzzzzzzzzzzzzzzzzzzzzz' #print best_params[-1] #print best_params[0].get_value() #dbn.params = best_params #improve patience if loss improvement is good enough if (this_validation_loss < best_validation_loss * improvement_threshold): patience = max(patience, iter * patience_increase) # save best validation score and iteration number best_validation_loss = this_validation_loss prev_validation_loss = this_validation_loss best_iter = iter # test it on the test set test_losses = test_model() test_score = numpy.mean(test_losses) yP = gety_pred() y = test_set_y.owner.inputs[0].get_value() #print (type(yP)) #acc2 = T.mean(T.neq(yP,y)) #print type(y) print(yP.shape) print(y.shape) I1 = numpy.nonzero(y == 0.0) I2 = numpy.nonzero(y == 1.0) I3 = numpy.nonzero(y == 2.0) I4 = numpy.nonzero(y == 3.0) print(I1[0].shape) print(I2[0].shape) print(I3[0].shape) print(I4[0].shape) I11 = numpy.nonzero(yP[I1[0]] == 0) I12 = numpy.nonzero(yP[I1[0]] == 1) I13 = numpy.nonzero(yP[I1[0]] == 2) I14 = numpy.nonzero(yP[I1[0]] == 3) I21 = numpy.nonzero(yP[I2[0]] == 0) I22 = numpy.nonzero(yP[I2[0]] == 1) I23 = numpy.nonzero(yP[I2[0]] == 2) I24 = numpy.nonzero(yP[I2[0]] == 3) I31 = numpy.nonzero(yP[I3[0]] == 0) I32 = numpy.nonzero(yP[I3[0]] == 1) I33 = numpy.nonzero(yP[I3[0]] == 2) I34 = numpy.nonzero(yP[I3[0]] == 3) I41 = numpy.nonzero(yP[I4[0]] == 0) I42 = numpy.nonzero(yP[I4[0]] == 1) I43 = numpy.nonzero(yP[I4[0]] == 2) I44 = numpy.nonzero(yP[I4[0]] == 3) #f = open('a.txt','w') #numpy.savetxt('a.txt',y) #print I3[0].shape #print I1[0].size,I11[0].size acc1 = float(float(I11[0].size) / float(I1[0].size)) acc2 = float(float(I22[0].size) / float(I2[0].size)) if n_out == 3: acc3 = float(float(I33[0].size) / float(I3[0].size)) elif n_out == 4: acc3 = float(float(I33[0].size) / float(I3[0].size)) acc4 = float(float(I44[0].size) / float(I4[0].size)) else: acc3 = 0 acc4 = 0 #print y #print yP #print 'ACC Next' #print acc1 #print acc2 #print 'ACC Prev' print(( ' epoch %i, minibatch %i/%i, test error of ' 'best model %f, acc1 = %f, acc2 = %f, acc3 = %f, acc4 = %f, I11 = %i, I12 = %i, I13 = %i, I14 = %i, I21 = %i, I22 = %i, I23 = %i, I24 = %i, I31 = %i, I32 = %i, I33 = %i, I34 = %i, I41 = %i, I42 = %i, I43 = %i, I44 = %i %%' ) % (epoch, minibatch_index + 1, n_train_batches, test_score * 100., acc1 * 100., acc2 * 100., acc3 * 100, acc4 * 100, I11[0].size, I12[0].size, I13[0].size, I14[0].size, I21[0].size, I22[0].size, I23[0].size, I24[0].size, I31[0].size, I32[0].size, I33[0].size, I34[0].size, I41[0].size, I42[0].size, I43[0].size, I44[0].size)) else: if Alrc <= AlrE: done_looping = True break elif epochC > 40: Alrc = Alrc / 2 #print '***************3****************' #print dbn.params[0].get_value() for param, best_param in zip(dbn.params, best_params): param.set_value(best_param) #print '***************4*****************' #print best_params[0] #print '***************5*****************' #print dbn.params[0].get_value() #print 'Epoch Rejected, ', Alrc epochC = 0 #else: # print dbn.params[0].get_value() # for param, best_param in zip(dbn.params,best_params): # param.set_value(best_param) # print dbn.params[0].get_value() #if patience <= iter: # done_looping = True # break end_time = time.clock() print(('Optimization complete with best validation score of %f %%,' 'with test performance %f %%') % (best_validation_loss * 100., test_score * 100.)) #print >> sys.stderr, ('The fine tuning code for file ' + # os.path.split(__file__)[1] + # ' ran for %.2fm' % ((end_time - start_time) # / 60.)) OF = open(outFile, 'a') print(DataSet, lr, n_hls, n_hus, n_out, batchSize, NOfepoch, outFile, test_score * 100., acc1 * 100., acc2 * 100., acc3 * 100, acc4 * 100, I11[0].size, I12[0].size, I13[0].size, I14[0].size, I21[0].size, I22[0].size, I23[0].size, I24[0].size, I31[0].size, I32[0].size, I33[0].size, I34[0].size, I41[0].size, I42[0].size, I43[0].size, I44[0].size, file=OF) OF.close()
def test_mlp(learning_rate=0.05, L1_reg=0.00, L2_reg=0.0001, n_epochs=1000, split=0, batch_size=1, n_hidden=[100], rot=5, seuil=0.25): datasets = load_data(split) train_set_x, train_set_y = datasets[0] valid_set_x, valid_set_y = datasets[1] test_set_x, test_set_y = datasets[2] # compute number of minibatches for training, validation and testing n_train_batches = train_set_x.get_value(borrow=True).shape[0] / batch_size n_valid_batches = valid_set_x.get_value( borrow=True).shape[0] #/ batch_size n_test_batches = test_set_x.get_value(borrow=True).shape[0] #/ batch_size ###################### # BUILD ACTUAL MODEL # ###################### print '... building the model' # allocate symbolic variables for the data index = T.lscalar() # index to a [mini]batch x = T.matrix('x') # the data is presented as rasterized images y = T.matrix('y') # the labels are presented as 1D vector of # [int] labels rng = numpy.random.RandomState(1234) shp = train_set_x.get_value().shape[1] # construct the MLP class classifier = MLP(rng=rng, input=x, n_in=shp, n_hidden=n_hidden, n_out=shp) # start-snippet-4 # the cost we minimize during training is the negative log likelihood of # the model plus the regularization terms (L1 and L2); cost is expressed # here symbolically cost = (classifier.negative_log_likelihood(y) + L1_reg * classifier.L1 + L2_reg * classifier.L2_sqr) # end-snippet-4 # compiling a Theano function that computes the mistakes that are made # by the model on a minibatch pred_test = theano.function(inputs=[index], outputs=[classifier.y_pred, y], givens={ x: test_set_x[index:(index + 1)], y: test_set_y[index:(index + 1)] }) pred_train = theano.function(inputs=[index], outputs=[classifier.y_pred, y], givens={ x: train_set_x[index:(index + 1)], y: train_set_y[index:(index + 1)] }) pred_valid = theano.function(inputs=[index], outputs=[classifier.y_pred, y], givens={ x: valid_set_x[index:(index + 1)], y: valid_set_y[index:(index + 1)] }) def evaluation(fn, d, ens, epoch, seuil, plot): x = d.get_value() n_samples = x.shape[0] if plot: bigpic = [] acc = [] for i in xrange(n_samples): pred, true = fn(i) pred_mask = pred * (x[i] > 0) pred_out = (pred_mask >= seuil).astype(numpy.int) true_out = true.astype(numpy.int) acc += [jaccard(pred_out, true_out)] if plot: bigpic += [x[i], pred, pred_mask, pred_out, true_out] this_acc = numpy.mean(acc) std_acc = numpy.std(acc) print('epoch %i, %s error %f +- %f %%' % (epoch, ens, this_acc * 100., std_acc * 100.)) if plot: bigpic = numpy.vstack(bigpic) tile = tile_raster_images(bigpic, (311, 457), (n_samples // 4, 5 * 4), output_pixel_vals=True) Im.fromarray(tile).convert("RGB").save("images/" + ens + str(epoch) + ".png") return this_acc gparams = [T.grad(cost, param) for param in classifier.params] updates = [(param, param - learning_rate * gparam) for param, gparam in zip(classifier.params, gparams)] train_model = theano.function( inputs=[index], outputs=cost, updates=updates, givens={ x: train_set_x[index * batch_size:(index + 1) * batch_size], y: train_set_y[index * batch_size:(index + 1) * batch_size] }) ############### # TRAIN MODEL # ############### n_training_samples = train_set_x.get_value().shape[0] print '... training over %i training samples' % n_training_samples # early-stopping parameters patience = 10000 # look as this many examples regardless patience_increase = 2 # wait this much longer when a new best is # found improvement_threshold = 1 # 0.995 # a relative improvement of this much is # considered significant validation_frequency = min(n_train_batches, patience / 2) # go through this many # minibatche before checking the network # on the validation set; in this case we # check every epoch best_validation_loss = -numpy.inf best_iter = 0 test_score = 0. start_time = time.clock() epoch = 0 done_looping = False evaluation(pred_train, train_set_x, "train", epoch, seuil, True) print "training started..." while (epoch < n_epochs) and (not done_looping): rotate_data((train_set_x, train_set_y), rot) epoch = epoch + 1 minibatch_avg_cost = [] for minibatch_index in xrange(n_train_batches): minibatch_avg_cost += [train_model(minibatch_index)] # iteration number iter = (epoch - 1) * n_train_batches + minibatch_index if (iter + 1) % validation_frequency == 0: print "mean avg cost over training :: ", numpy.mean( minibatch_avg_cost) evaluation(pred_train, train_set_x, "train", epoch, seuil, True) val = evaluation(pred_valid, valid_set_x, "valid", epoch, seuil, True) # if we got the best validation score until now if val > best_validation_loss: #improve patience if loss improvement is good enough if (val > best_validation_loss * improvement_threshold): patience = max(patience, iter * patience_increase) best_validation_loss = val best_iter = iter evaluation(pred_test, test_set_x, "test", epoch, seuil, True) if patience <= iter: done_looping = True break end_time = time.clock() print(('Optimization complete. Best validation score of %f %% ' 'obtained at iteration %i, with test performance %f %%') % (best_validation_loss * 100., best_iter + 1, test_score * 100.)) print >> sys.stderr, ('The code for file ' + os.path.split(__file__)[1] + ' ran for %.2fm' % ((end_time - start_time) / 60.))
def test_stack_machine(): # Load learning: save_path = "Saving/Stack_AE_theano" save_dir = "10-06_3L1000" load_dir = os.path.join(save_path, save_dir) stack_AE = SAE.load(load_dir) # Load the dataset: print("Loading dataset...") datasets = load_data('mnist.pkl.gz') train_set_x, train_set_y = datasets[0] test_set_x, test_set_y = datasets[2] reconstructed_layer_value, error = stack_AE.reconstruct(test_set_x) print("The error of reconstruction is: {0}".format(error.eval()), "%") # Classification: # Perform the feed-forward pass for train & testing sets: train_deconstructed_layer_value = stack_AE.forward_encoding( train_set_x, 0, stack_AE.architecture.shape[0]) train_reconstructed_layer_value = stack_AE.forward_decoding( train_deconstructed_layer_value, 0, stack_AE.architecture.shape[0]) test_deconstructed_layer_value = stack_AE.forward_encoding( test_set_x, 0, stack_AE.architecture.shape[0]) test_reconstructed_layer_value = stack_AE.forward_decoding( test_deconstructed_layer_value, 0, stack_AE.architecture.shape[0]) # Classifiers: classifier = 'AdaBoostClassifier' print("Classifier used: ", classifier) print("Learning the logistic regression without stack...") logReg_withoutStack = stack_AE.supervized_classification( train_set_x.eval(), train_set_y.eval(), classification_method=classifier) print("Learning the logistic regression with stack...") logReg_afterStack = stack_AE.supervized_classification( train_reconstructed_layer_value.eval(), train_set_y.eval(), classification_method=classifier) # Performances: print("Without Stack_AE:") print("Accuracy training set:", logReg_withoutStack.score(train_set_x.eval(), train_set_y.eval())) print("Accuracy test set:", logReg_withoutStack.score(test_set_x.eval(), test_set_y.eval())) print("With Stack_AE:") print( "Accuracy training set:", logReg_afterStack.score(train_reconstructed_layer_value.eval(), train_set_y.eval())) print( "Accuracy test set:", logReg_afterStack.score(test_reconstructed_layer_value.eval(), test_set_y.eval())) return stack_AE
def test_mlp(learning_rate=0.01, L1_reg=0.00, L2_reg=0.0001, n_epochs=1000, dataset='mnist.pkl.gz', batch_size=20, n_hidden1=500, n_hidden2=100, n_hidden3=50): """ Demonstrate stochastic gradient descent optimization for a multilayer perceptron This is demonstrated on MNIST. :type learning_rate: float :param learning_rate: learning rate used (factor for the stochastic gradient :type L1_reg: float :param L1_reg: L1-norm's weight when added to the cost (see regularization) :type L2_reg: float :param L2_reg: L2-norm's weight when added to the cost (see regularization) :type n_epochs: int :param n_epochs: maximal number of epochs to run the optimizer :type dataset: string :param dataset: the path of the MNIST dataset file from http://www.iro.umontreal.ca/~lisa/deep/data/mnist/mnist.pkl.gz """ datasets = load_data(dataset) ada_lr = theano.shared(np.float32(learning_rate), name="ada_lr") train_set_x, train_set_y = datasets[0] valid_set_x, valid_set_y = datasets[1] test_set_x, test_set_y = datasets[2] # compute number of minibatches for training, validation and testing n_train_batches = train_set_x.get_value(borrow=True).shape[0] / batch_size n_valid_batches = valid_set_x.get_value(borrow=True).shape[0] / batch_size n_test_batches = test_set_x.get_value(borrow=True).shape[0] / batch_size ###################### # BUILD ACTUAL MODEL # ###################### print '... building the model' # allocate symbolic variables for the data index = T.lscalar() # index to a [mini]batch x = T.matrix('x') # the data is presented as rasterized images y = T.ivector('y') # the labels are presented as 1D vector of # [int] labels rng = numpy.random.RandomState(1234) # construct the MLP class classifier = MLP(rng=rng, input=x, n_in=28 * 28, n_hidden1=n_hidden1, n_hidden2=n_hidden2, n_hidden3=n_hidden3, n_out=10) # the cost we minimize during training is the negative log likelihood of # the model plus the regularization terms (L1 and L2); cost is expressed # here symbolically cost = classifier.negative_log_likelihood(y) \ + L1_reg * classifier.L1 \ + L2_reg * classifier.L2_sqr # compiling a Theano function that computes the mistakes that are made # by the model on a minibatch test_model = theano.function( inputs=[index], outputs=classifier.errors(y), givens={ x: test_set_x[index * batch_size:(index + 1) * batch_size], y: test_set_y[index * batch_size:(index + 1) * batch_size] }) validate_model = theano.function( inputs=[index], outputs=classifier.errors(y), givens={ x: valid_set_x[index * batch_size:(index + 1) * batch_size], y: valid_set_y[index * batch_size:(index + 1) * batch_size] }) # compute the gradient of cost with respect to theta (sotred in params) # the resulting gradients will be stored in a list gparams gparams = [] for param in classifier.params: gparam = T.grad(cost, param) gparams.append(gparam) # specify how to update the parameters of the model as a list of # (variable, update expression) pairs updates = [] # given two list the zip A = [a1, a2, a3, a4] and B = [b1, b2, b3, b4] of # same length, zip generates a list C of same size, where each element # is a pair formed from the two lists : # C = [(a1, b1), (a2, b2), (a3, b3), (a4, b4)] for param, gparam in zip(classifier.params, gparams): updates.append((param, param - ada_lr.get_value() * gparam)) #updates.append((ada_lr, ada_lr - ada_lr**2.25+ 0.000002)) # compiling a Theano function `train_model` that returns the cost, but # in the same time updates the parameter of the model based on the rules # defined in `updates` train_model = theano.function( inputs=[index], outputs=cost, updates=updates, givens={ x: train_set_x[index * batch_size:(index + 1) * batch_size], y: train_set_y[index * batch_size:(index + 1) * batch_size] }) ############### # TRAIN MODEL # ############### print '... training' # early-stopping parameters patience = 10000 # look as this many examples regardless patience_increase = 25 # wait this much longer when a new best is # found improvement_threshold = 0.995 # a relative improvement of this much is # considered significant validation_frequency = min(n_train_batches, patience / 2) # go through this many # minibatche before checking the network # on the validation set; in this case we # check every epoch best_params = None best_validation_loss = numpy.inf best_iter = 0 test_score = 0. start_time = time.clock() epoch = 0 done_looping = False while (epoch < n_epochs) and (not done_looping): epoch = epoch + 1 for minibatch_index in xrange(n_train_batches): minibatch_avg_cost = train_model(minibatch_index) # iteration number iter = (epoch - 1) * n_train_batches + minibatch_index if (iter + 1) % validation_frequency == 0: # compute zero-one loss on validation set validation_losses = [ validate_model(i) for i in xrange(n_valid_batches) ] this_validation_loss = numpy.mean(validation_losses) print('epoch %i, minibatch %i/%i, validation error %f %%' % (epoch, minibatch_index + 1, n_train_batches, this_validation_loss * 100.)) # if we got the best validation score until now if this_validation_loss < best_validation_loss: #improve patience if loss improvement is good enough if this_validation_loss < best_validation_loss * \ improvement_threshold: patience = max(patience, iter * patience_increase) best_validation_loss = this_validation_loss best_iter = iter # test it on the test set test_losses = [ test_model(i) for i in xrange(n_test_batches) ] test_score = numpy.mean(test_losses) print((' epoch %i, minibatch %i/%i, test error of ' 'best model %f %%') % (epoch, minibatch_index + 1, n_train_batches, test_score * 100.)) if patience <= iter: done_looping = True break end_time = time.clock() print(('Optimization complete. Best validation score of %f %% ' 'obtained at iteration %i, with test performance %f %%') % (best_validation_loss * 100., best_iter + 1, test_score * 100.)) print('The code for file ' + os.path.split(__file__)[1] + ' ran for %.2fm' % ((end_time - start_time) / 60.))
def evaluate_lenet5(sigma=0.01, learning_rate=0.1, n_epochs=200, dataset='mnist.pkl.gz', nkerns=[20, 50], batch_size=500): """ Demonstrates lenet on MNIST dataset :type sigma: float :param sigma: standard deviation in normal distribution :type learning_rate: float :param learning_rate: learning rate used (factor for the stochastic gradient) :type n_epochs: int :param n_epochs: maximal number of epochs to run the optimizer :type dataset: string :param dataset: path to the dataset used for training /testing (MNIST here) :type nkerns: list of ints :param nkerns: number of kernels on each layer """ rng = numpy.random.RandomState(930508) datasets = load_data(dataset) train_set_x, train_set_y = datasets[0] valid_set_x, valid_set_y = datasets[1] test_set_x, test_set_y = datasets[2] # compute number of minibatches for training, validation and testing n_train_batches = train_set_x.get_value(borrow=True).shape[0] n_valid_batches = valid_set_x.get_value(borrow=True).shape[0] n_test_batches = test_set_x.get_value(borrow=True).shape[0] n_train_batches /= batch_size n_valid_batches /= batch_size n_test_batches /= batch_size # allocate symbolic variables for the data index = T.lscalar() # index to a [mini]batch # start-snippet-1 x = T.matrix('x') # the data is presented as rasterized images y = T.ivector('y') # the labels are presented as 1D vector of # [int] labels ###################### # BUILD ACTUAL MODEL # ###################### print '... building the model' # Reshape matrix of rasterized images of shape (batch_size, 28 * 28) # to a 4D tensor, compatible with our LeNetConvPoolLayer # (28, 28) is the size of MNIST images. # Parameterizing n_feature = train_set_x.get_value().shape[1] matrix_dim = numpy.sqrt(n_feature) matrix_dim = matrix_dim.astype('int8') layer0_input = x.reshape((batch_size, 1, matrix_dim, matrix_dim)) # Construct the first convolutional pooling layer: # filtering reduces the image size to (28-5+1 , 28-5+1) = (24, 24) # maxpooling reduces this further to (24/2, 24/2) = (12, 12) # 4D output tensor is thus of shape (batch_size, nkerns[0], 12, 12) layer0 = LeNetConvPoolLayer(rng, input=layer0_input, image_shape=(batch_size, 1, matrix_dim, matrix_dim), filter_shape=(nkerns[0], 1, 5, 5), poolsize=(2, 2)) # Construct the second convolutional pooling layer # filtering reduces the image size to (12-5+1, 12-5+1) = (8, 8) # maxpooling reduces this further to (8/2, 8/2) = (4, 4) # 4D output tensor is thus of shape (nkerns[0], nkerns[1], 4, 4) temp1 = (matrix_dim - 5 + 1) / 2 layer1 = LeNetConvPoolLayer(rng, input=layer0.output, image_shape=(batch_size, nkerns[0], temp1, temp1), filter_shape=(nkerns[1], nkerns[0], 5, 5), poolsize=(2, 2)) # the HiddenLayer being fully-connected, it operates on 2D matrices of # shape (batch_size, num_pixels) (i.e matrix of rasterized images). # This will generate a matrix of shape (batch_size, nkerns[1] * 4 * 4), # or (500, 50 * 4 * 4) = (500, 800) with the default values. layer2_input = layer1.output.flatten(2) temp2 = (temp1 - 5 + 1) / 2 ### This is a good place to add noise ### srng = RandomStreams(seed=508) variation = srng.normal((temp2 * temp2 * 50, ), 0, sigma) layer2_input += variation ### end ### # construct a fully-connected sigmoidal layer layer2 = HiddenLayer(rng, input=layer2_input, n_in=nkerns[1] * temp2 * temp2, n_out=500, activation=T.tanh) # classify the values of the fully-connected sigmoidal layer n_out = max(train_set_y.eval()) - min(train_set_y.eval()) + 1 # print n_out layer3 = LogisticRegression(input=layer2.output, n_in=500, n_out=n_out) # the cost we minimize during training is the NLL of the model cost = layer3.negative_log_likelihood(y) # create a function to compute the mistakes that are made by the model test_model = theano.function( [index], layer3.errors(y), givens={ x: test_set_x[index * batch_size:(index + 1) * batch_size], y: test_set_y[index * batch_size:(index + 1) * batch_size] }) validate_model = theano.function( [index], layer3.errors(y), givens={ x: valid_set_x[index * batch_size:(index + 1) * batch_size], y: valid_set_y[index * batch_size:(index + 1) * batch_size] }) # create a list of all model parameters to be fit by gradient descent params = layer3.params + layer2.params + layer1.params + layer0.params # create a list of gradients for all model parameters grads = T.grad(cost, params) # train_model is a function that updates the model parameters by # SGD Since this model has many parameters, it would be tedious to # manually create an update rule for each model parameter. We thus # create the updates list by automatically looping over all # (params[i], grads[i]) pairs. updates = [(param_i, param_i - learning_rate * grad_i) for param_i, grad_i in zip(params, grads)] train_model = theano.function( [index], cost, updates=updates, givens={ x: train_set_x[index * batch_size:(index + 1) * batch_size], y: train_set_y[index * batch_size:(index + 1) * batch_size] }) # end-snippet-1 ############### # TRAIN MODEL # ############### print '... training' # early-stopping parameters patience = 10000 # look as this many examples regardless patience_increase = 2 # wait this much longer when a new best is # found improvement_threshold = 0.995 # a relative improvement of this much is # considered significant validation_frequency = min(n_train_batches, patience / 2) # go through this many # minibatche before checking the network # on the validation set; in this case we # check every epoch best_validation_loss = numpy.inf best_iter = 0 test_score = 0. start_time = time.clock() epoch = 0 done_looping = False while (epoch < n_epochs) and (not done_looping): epoch = epoch + 1 for minibatch_index in xrange(n_train_batches): iter = (epoch - 1) * n_train_batches + minibatch_index # if iter % 10 == 0: # print 'training @ iter = ', iter cost_ij = train_model(minibatch_index) if (iter + 1) % validation_frequency == 0: # compute zero-one loss on validation set validation_losses = [ validate_model(i) for i in xrange(n_valid_batches) ] this_validation_loss = numpy.mean(validation_losses) '''print('epoch %i, minibatch %i/%i, validation error %f %%' % (epoch, minibatch_index + 1, n_train_batches, this_validation_loss * 100.))''' # if we got the best validation score until now if this_validation_loss < best_validation_loss: #improve patience if loss improvement is good enough if this_validation_loss < best_validation_loss * \ improvement_threshold: patience = max(patience, iter * patience_increase) # save best validation score and iteration number best_validation_loss = this_validation_loss best_iter = iter # test it on the test set test_losses = [ test_model(i) for i in xrange(n_test_batches) ] test_score = numpy.mean(test_losses) '''print((' epoch %i, minibatch %i/%i, test error of ' 'best model %f %%') % (epoch, minibatch_index + 1, n_train_batches, test_score * 100.))''' print test_score * 100. if patience <= iter: done_looping = True break end_time = time.clock() '''print('Optimization complete.') print('Best validation score of %f %% obtained at iteration %i, ' 'with test performance %f %%' % (best_validation_loss * 100., best_iter + 1, test_score * 100.))a''' print >> sys.stderr, ('The code for file ' + os.path.split(__file__)[1] + ' ran for %.2fm' % ((end_time - start_time) / 60.))
def train_DBN( finetune_lr=0.1, pretraining_epochs=100, pretrain_lr=0.03, k=1, training_epochs=600, dataset='/Users/apple/Desktop/pattern_recognition_pr/data/mnist.pkl.gz', batch_size=10): """ Demonstrates how to train and test a Deep Belief Network. This is demonstrated on MNIST. :type finetune_lr: float :param finetune_lr: learning rate used in the finetune stage :type pretraining_epochs: int :param pretraining_epochs: number of epoch to do pretraining :type pretrain_lr: float :param pretrain_lr: learning rate to be used during pre-training :type k: int :param k: number of Gibbs steps in CD/PCD :type training_epochs: int :param training_epochs: maximal number of iterations ot run the optimizer :type dataset: string :param dataset: path the the pickled dataset :type batch_size: int :param batch_size: the size of a minibatch """ datasets1 = load_data(dataset) datasets = [[theano.shared(numpy.load("train.npy")), datasets1[0][1]], [theano.shared(numpy.load("validation.npy")), datasets1[1][1]], [theano.shared(numpy.load("test.npy")), datasets1[2][1]]] train_set_x, train_set_y = datasets[0] valid_set_x, valid_set_y = datasets[1] test_set_x, test_set_y = datasets[2] # compute number of minibatches for training, validation and testing n_train_batches = train_set_x.get_value(borrow=True).shape[0] / batch_size # numpy random generator numpy_rng = numpy.random.RandomState(123) print '... building the model' # construct the Deep Belief Network dbn = DBN(numpy_rng=numpy_rng, n_ins=28 * 28, hidden_layers_sizes=[2000, 1000, 500, 200], n_outs=10) # start-snippet-2 ######################### # PRETRAINING THE MODEL # ######################### print '... getting the pretraining functions' pretraining_fns = dbn.pretraining_functions(train_set_x=train_set_x, batch_size=batch_size, k=k) print '... pre-training the model' start_time = timeit.default_timer() ## Pre-train layer-wise for i in range(dbn.n_layers): # go through pretraining epochs for epoch in range(pretraining_epochs): # go through the training set c = [] for batch_index in range(n_train_batches): c.append(pretraining_fns[i](index=batch_index, lr=pretrain_lr)) print 'Pre-training layer %i, epoch %d, cost ' % (i, epoch), print numpy.mean(c) end_time = timeit.default_timer() # end-snippet-2 print >> sys.stderr, ('The pretraining code for file ' + os.path.split(__file__)[1] + ' ran for %.2fm' % ((end_time - start_time) / 60.)) ######################## # FINETUNING THE MODEL # ######################## # get the training, validation and testing function for the model print '... getting the finetuning functions' train_fn, validate_model, test_model = dbn.build_finetune_functions( datasets=datasets, batch_size=batch_size, learning_rate=finetune_lr) print '... finetuning the model' # early-stopping parameters patience = 4 * n_train_batches # look as this many examples regardless patience_increase = 2. # wait this much longer when a new best is # found improvement_threshold = 0.995 # a relative improvement of this much is # considered significant validation_frequency = min(n_train_batches, patience / 2) # go through this many # minibatches before checking the network # on the validation set; in this case we # check every epoch best_validation_loss = numpy.inf test_score = 0. start_time = timeit.default_timer() done_looping = False epoch = 0 while (epoch < training_epochs) and (not done_looping): epoch = epoch + 1 for minibatch_index in range(n_train_batches): minibatch_avg_cost = train_fn(minibatch_index) iter = (epoch - 1) * n_train_batches + minibatch_index if (iter + 1) % validation_frequency == 0: validation_losses = validate_model() this_validation_loss = numpy.mean(validation_losses) print('epoch %i, minibatch %i/%i, validation error %f %%' % (epoch, minibatch_index + 1, n_train_batches, this_validation_loss * 100.)) # if we got the best validation score until now if this_validation_loss < best_validation_loss: #improve patience if loss improvement is good enough if (this_validation_loss < best_validation_loss * improvement_threshold): patience = max(patience, iter * patience_increase) # save best validation score and iteration number best_validation_loss = this_validation_loss best_iter = iter # test it on the test set test_losses = test_model() test_score = numpy.mean(test_losses) print((' epoch %i, minibatch %i/%i, test error of ' 'best model %f %%') % (epoch, minibatch_index + 1, n_train_batches, test_score * 100.)) if patience <= iter: done_looping = True break end_time = timeit.default_timer() print(('Optimization complete with best validation score of %f %%, ' 'obtained at iteration %i, ' 'with test performance %f %%') % (best_validation_loss * 100., best_iter + 1, test_score * 100.)) print >> sys.stderr, ('The fine tuning code for file ' + os.path.split(__file__)[1] + ' ran for %.2fm' % ((end_time - start_time) / 60.)) file_ = open("./model3", "w") pickle.dump(dbn, file_) file_.close()
def test_SdA(finetune_lr=0.1, pretraining_epochs=15, pretrain_lr=0.001, training_epochs=1000, dataset='mnist.pkl.gz', batch_size=1): datasets = load_data(dataset) train_set_x, train_set_y = datasets[0] valid_set_x, valid_set_y = datasets[1] test_set_x, test_set_y = datasets[2] n_train_batches = train_set_x.get_value(borrow=True).shape[0] n_train_batches /= batch_size numpy_rng = numpy.random.RandomState(89677) print('... building the model') sda = SdA( numpy_rng=numpy_rng, n_ins=28 * 28, hidden_layers_sizes=[1000, 1000, 1000], n_outs=10 ) print('... getting the pretraining functions') pretraining_fns = sda.pretraining_functions(train_set_x=train_set_x, batch_size=batch_size) print('... pre-training the model') start_time = timeit.default_timer() corruption_levels = [.1, .2, .3] for i in range(sda.n_layers): for epoch in range(pretraining_epochs): c = [] for batch_index in range(int(n_train_batches)): c.append(pretraining_fns[i](index=batch_index, corruption=corruption_levels[i], lr=pretrain_lr)) print('Pre-training layer %i, epoch %d, cost ' % (i, epoch), end=' ') print(numpy.mean(c)) end_time = timeit.default_timer() print(('The pretraining code for file ' + os.path.split(__file__)[1] + ' ran for %.2fm' % ((end_time - start_time) / 60.)), file=sys.stderr) print('... getting the finetuning functions') train_fn, validate_model, test_model = sda.build_finetune_functions( datasets=datasets, batch_size=batch_size, learning_rate=finetune_lr ) print('... finetunning the model') patience = 10 * n_train_batches patience_increase = 2. improvement_threshold = 0.995 validation_frequency = min(n_train_batches, patience / 2) best_validation_loss = numpy.inf test_score = 0. start_time = timeit.default_timer() done_looping = False epoch = 0 while (epoch < training_epochs) and (not done_looping): epoch = epoch + 1 for minibatch_index in range(int(n_train_batches)): minibatch_avg_cost = train_fn(minibatch_index) iter = (epoch - 1) * n_train_batches + minibatch_index if (iter + 1) % validation_frequency == 0: validation_losses = validate_model() this_validation_loss = numpy.mean(validation_losses) print(('epoch %i, minibatch %i/%i, validation error %f %%' % (epoch, minibatch_index + 1, n_train_batches, this_validation_loss * 100.))) if this_validation_loss < best_validation_loss: if ( this_validation_loss < best_validation_loss * improvement_threshold ): patience = max(patience, iter * patience_increase) best_validation_loss = this_validation_loss best_iter = iter test_losses = test_model() test_score = numpy.mean(test_losses) print(((' epoch %i, minibatch %i/%i, test error of ' 'best model %f %%') % (epoch, minibatch_index + 1, n_train_batches, test_score * 100.))) if patience <= iter: done_looping = True break end_time = timeit.default_timer() print(( ( 'Optimization complete with best validation score of %f %%, ' 'on iteration %i, ' 'with test performance %f %%' ) % (best_validation_loss * 100., best_iter + 1, test_score * 100.) )) print(('The training code for file ' + os.path.split(__file__)[1] + ' ran for %.2fm' % ((end_time - start_time) / 60.)), file=sys.stderr)