def test_mlp(learning_rate=0.01, L1_reg=0.0, L2_reg=0.0001, n_epochs=1000, dataset='mnist.pkl.gz', batch_size=20, n_hidden=500): # get the datasets datasets = load_data(dataset) train_set_x, train_set_y = datasets[0] valid_set_x, valid_set_y = datasets[1] test_set_x, test_set_y = datasets[2] # compute number of minibatches for training, validation & testing n_train_batches = train_set_x.get_value(borrow=True).shape[0] // batch_size n_valid_batches = valid_set_x.get_value(borrow=True).shape[0] // batch_size n_test_batches = test_set_x.get_value(borrow=True).shape[0] // batch_size ###################### # BUILD ACTUAL MODEL # ###################### print('... building the model') index = T.lscalar() # index to minibatch x = T.matrix(name='x') y = T.ivector(name='y') rng = numpy.random.RandomState(1234) # MLP class classifier = MLP(rng, x, 28*28, n_hidden, 10) # cost cost = (classifier.negative_log_likelihood(y) + \ L1_reg * classifier.L1 + L2_reg * classifier.L2_sqr) # test function on minibatch test_model = theano.function(inputs=[index], outputs=classifier.errors(y), \ givens={ x: test_set_x[index*batch_size : (index+1)*batch_size], y: test_set_y[index*batch_size : (index+1)*batch_size] }) # validation function on minibatch validate_model = theano.function(inputs=[index], outputs=classifier.errors(y), \ givens={ x: valid_set_x[index*batch_size : (index+1)*batch_size], y: valid_set_y[index*batch_size : (index+1)*batch_size] }) # gradient params gparams = [T.grad(cost, param) for param in classifier.params] # updates for training updates = [(param, param - learning_rate * gparam) \ for param, gparam in zip(classifier.params, gparams)] # train model on minibatch train_model = theano.function(inputs=[index], outputs=cost, updates=updates, givens={ x: train_set_x[index*batch_size : (index+1)*batch_size], y: train_set_y[index*batch_size : (index+1)*batch_size] }) ############### # TRAIN MODEL # ############### print('... training') # early-stopping parameters patience = 10000 # look as this many examples regardless patience_increase = 2 # wait this much longer when a new best is # found improvement_threshold = 0.995 # a relative improvement of this much is # considered significant validation_frequency = min(n_train_batches, patience // 2) # go through this many # minibatche before checking the network # on the validation set; in this case we # check every epoch best_validation_loss = numpy.inf best_iter = 0 test_score = 0. start_time = timeit.default_timer() epoch = 0 done_looping = False while (epoch < n_epochs) and (not done_looping): epoch = epoch + 1 for minibatch_index in range(n_train_batches): minibatch_avg_cost = train_model(minibatch_index) # iteration number iter = (epoch - 1) * n_train_batches + minibatch_index if (iter + 1) % validation_frequency == 0: # compute zero-one loss on validation set validation_losses = [validate_model(i) for i in range(n_valid_batches)] this_validation_loss = numpy.mean(validation_losses) print( 'epoch %i, minibatch %i/%i, validation error %f %%' % ( epoch, minibatch_index + 1, n_train_batches, this_validation_loss * 100. ) ) # if we got the best validation score until now if this_validation_loss < best_validation_loss: #improve patience if loss improvement is good enough if ( this_validation_loss < best_validation_loss * improvement_threshold ): patience = max(patience, iter * patience_increase) best_validation_loss = this_validation_loss best_iter = iter # test it on the test set test_losses = [test_model(i) for i in range(n_test_batches)] test_score = numpy.mean(test_losses) print((' epoch %i, minibatch %i/%i, test error of ' 'best model %f %%') % (epoch, minibatch_index + 1, n_train_batches, test_score * 100.)) if patience <= iter: done_looping = True break end_time = timeit.default_timer() print(('Optimization complete. Best validation score of %f %% ' 'obtained at iteration %i, with test performance %f %%') % (best_validation_loss * 100., best_iter + 1, test_score * 100.)) print(('The code for file ' + os.path.split(__file__)[1] + ' ran for %.2fm' % ((end_time - start_time) / 60.)), file=sys.stderr)
print('error: %f' % sum(error[(len(error) - 100):])) if seePred: acc = float(correct) / (showProgress / batch_size) print('accuracy: ' + str(acc)) correct = 0 i += 1 # end for loop: loop through all file if (k + 1) % 1 == 0: if doDrop: model.setDropoutOff() # redefine trainer since the argument has changed X = np.array(cv_feat_new, dtype=theano.config.floatX) Y = np.array(cv_y, dtype=theano.config.floatX) pred_max = theano.function([x], model.predict_max(x)) #print(cv_y) cv_error = model.errors( X, np.array(np.where(Y == 1)[1], dtype=theano.config.floatX)) cve.append(cv_error) if doDrop: model.setDropoutOn() print('.........................') print('CV error rate:' + str(cv_error)) print('.........................') # manually interupt the model learning except KeyboardInterrupt: temp_epoch = k print("Ctrl+C detect") break # cross validation # end for loop: epoch t2 = time()
# print(model.layers[2].mask) print('error: %f' % sum(error[ (len(error)-100) :])) if seePred: acc = float(correct) / (showProgress/batch_size) print('accuracy: '+str(acc)) correct = 0 i += 1 # end for loop: loop through all file if (k+1) % 1 == 0: if doDrop: model.setDropoutOff() # redefine trainer since the argument has changed X = np.array(cv_feat_new,dtype = theano.config.floatX) Y = np.array(cv_y,dtype = theano.config.floatX) #print(cv_y) cv_error = model.errors( X, np.array(np.where(Y==1)[1],dtype = theano.config.floatX) ) if doDrop: model.setDropoutOn() print('.........................') print('CV error rate:' + str(cv_error)) print('.........................') # manually interupt the model learning except KeyboardInterrupt: temp_epoch = k print("Ctrl+C detect") break # cross validation # end for loop: epoch t2 = time()
def trainLeNet(train_x, train_y, validation_x, validation_y, test_x, test_y, convolution_layer_size = None, rate = 0.1, batch_size = 500, n_epochs = 200): rng = np.random.RandomState(274563533) x = T.matrix('x') y = T.ivector('y') layer_0_input = x.reshape((batch_size, 1, 28, 28)) layer_0 = LeNetConvPoolLayer(rng, input = layer_0_input, layer_shape = (convolution_layer_size[0], 1, 5, 5), input_shape = (batch_size, 1, 28, 28), pool_size = (2,2)) layer_1 = LeNetConvPoolLayer(rng, input = layer_0.output, layer_shape = (convolution_layer_size[1], convolution_layer_size[0], 5, 5), input_shape = (batch_size, convolution_layer_size[0], 12, 12), pool_size = (2,2)) MLP_input = layer_1.output.flatten(2) layer_final = MLP(MLP_input, convolution_layer_size[1] * 4 * 4, 500, 10) cost = layer_final.negativeLogLikelihood(y) error = layer_final.errors(y) index = T.lscalar('index') validation_model = function([index], error, givens={x: validation_x[index * batch_size : (index + 1) * batch_size], y: validation_y[index * batch_size : (index + 1) * batch_size]}) test_model = function([index], error, givens={x: test_x[index * batch_size : (index + 1) * batch_size], y: test_y[index * batch_size : (index + 1) * batch_size]}) params = layer_final.params + layer_1.params + layer_0.params #for param in params: # pickle.dump(param, serial) param_grad = T.grad(cost, params) updates = [(p, p - rate * pg) for p, pg in zip(params, param_grad)] train_model = function([index], cost, givens={x:train_x[index * batch_size : (index + 1) * batch_size], y:train_y[index * batch_size : (index + 1) * batch_size]}, updates = updates) n_train_batches = train_x.get_value().shape[0] // batch_size n_test_batches = test_x.get_value().shape[0] // batch_size n_validation_batches = validation_x.get_value().shape[0] // batch_size epoch = 0 best_validation_cost = np.Inf patience = 1000000 improvement_thread = 0.995 patience_increase = 2 validation_frequency = min(n_train_batches, patience / 2) loop_done = False while epoch <= n_epochs and not loop_done: epoch += 1 for minibatch_index in range(n_train_batches): batch_cost = train_model(minibatch_index) iter = (epoch - 1) * n_train_batches + minibatch_index if iter % 100 == 0: print 'training @ iter = ', iter if(iter + 1) % validation_frequency == 0: validation_losses = [validation_model(i) for i in range(n_validation_batches)] this_validation_loss = np.mean(validation_losses) print 'epoch %i, minibatch %i / %i, validation error %f %%' \ % (epoch, minibatch_index+1, n_train_batches, this_validation_loss * 100) if this_validation_loss < best_validation_cost: with open('LeNet_params.pkl', 'w') as serial: pickle.dump(params, serial) if this_validation_loss < best_validation_cost * improvement_thread: patience = max(patience, iter * patience_increase) best_validation_cost = this_validation_loss test_losses = [test_model(i) for i in range(n_test_batches)]#lkfanldnfaklfnklasnfklasnklfnalksdfnkl test_loss = np.mean(test_losses) print 'test error: %f %%'%(test_loss * 100) if patience <= iter: loop_done = True break
def evaluate_lenet5(learning_rate=0.33, n_epochs=200, dataset="mnist.pkl.gz", nkerns=[32, 32, 32], batch_size=500): """ Demonstrates lenet on CIFAR-10 dataset :type learning_rate: float :param learning_rate: learning rate used (factor for the stochastic gradient) :type n_epochs: int :param n_epochs: maximal number of epochs to run the optimizer :type nkerns: list of ints :param nkerns: number of kernels on each layer """ rng = numpy.random.RandomState(23455) def shared_dataset(data_xy, borrow=True): """ Function that loads the dataset into shared variables The reason we store our dataset in shared variables is to allow Theano to copy it into the GPU memory (when code is run on GPU). Since copying data into the GPU is slow, copying a minibatch everytime is needed (the default behaviour if the data is not in a shared variable) would lead to a large decrease in performance. """ data_x, data_y = data_xy shared_x = theano.shared(numpy.asarray(data_x, dtype=theano.config.floatX), borrow=borrow) shared_y = theano.shared(numpy.asarray(data_y, dtype=theano.config.floatX), borrow=borrow) # When storing data on the GPU it has to be stored as floats # therefore we will store the labels as ``floatX`` as well # (``shared_y`` does exactly that). But during our computations # we need them as ints (we use labels as index, and if they are # floats it doesn't make sense) therefore instead of returning # ``shared_y`` we will have to cast it to int. This little hack # lets ous get around this issue return shared_x, T.cast(shared_y, "int32") data_batch_1 = unpickle("cifar-10-batches-py/data_batch_1") data_batch_2 = unpickle("cifar-10-batches-py/data_batch_2") data_batch_3 = unpickle("cifar-10-batches-py/data_batch_3") data_batch_4 = unpickle("cifar-10-batches-py/data_batch_4") data_batch_5 = unpickle("cifar-10-batches-py/data_batch_5") test = unpickle("cifar-10-batches-py/test_batch") train_set_1 = data_batch_1["data"] train_set_2 = data_batch_2["data"] train_set_3 = data_batch_3["data"] train_set_4 = data_batch_4["data"] train_set_5 = data_batch_5["data"] X_train = numpy.concatenate((train_set_1, train_set_2, train_set_3, train_set_4, train_set_5), axis=0) y_train = numpy.concatenate( ( data_batch_1["labels"], data_batch_2["labels"], data_batch_3["labels"], data_batch_4["labels"], data_batch_5["labels"], ) ) test_set = test["data"] Xte_rows = test_set.reshape(train_set_1.shape[0], 32 * 32 * 3) Yte = numpy.asarray(test["labels"]) Xval_rows = X_train[:7500, :] # take first 1000 for validation Yval = y_train[:7500] Xtr_rows = X_train[7500:50000, :] # keep last 49,000 for train Ytr = y_train[7500:50000] mean_train = Xtr_rows.mean(axis=0) stdv_train = Xte_rows.std(axis=0) Xtr_rows = (Xtr_rows - mean_train) / stdv_train Xval_rows = (Xval_rows - mean_train) / stdv_train Xte_rows = (Xte_rows - mean_train) / stdv_train learning_rate = theano.shared(learning_rate) """whitening""" """ Xtr_rows -= numpy.mean(Xtr_rows, axis=0) # zero-center the data (important) cov = numpy.dot(Xtr_rows.T, Xtr_rows) / Xtr_rows.shape[0] U,S,V = numpy.linalg.svd(cov) Xrot = numpy.dot(Xtr_rows, U)# decorrelate the data Xrot_reduced = numpy.dot(Xtr_rows, U[:,:100]) # whiten the data: # divide by the eigenvalues (which are square roots of the singular values) Xwhite = Xrot / numpy.sqrt(S + 1e-5)""" """whitening""" # Xtr_rows = whiten(Xtr_rows) # zero-center the data (important) """cov = numpy.dot(Xtr_rows.T, Xtr_rows) / Xtr_rows.shape[0] U,S,V = numpy.linalg.svd(cov) Xrot = numpy.dot(Xtr_rows, U) Xtr_rows = Xrot / numpy.sqrt(S + 1e-5) Xval_rot = numpy.dot(Xval_rows,U) Xval_rows = Xval_rot / numpy.sqrt(S + 1e-5) Xte_rot = numpy.dot(Xte_rows,U) Xte_rows = Xte_rot / numpy.sqrt(S + 1e-5) """ train_set = (Xtr_rows, Ytr) valid_set = (Xval_rows, Yval) test_set = (Xte_rows, Yte) test_set_x, test_set_y = shared_dataset(test_set) valid_set_x, valid_set_y = shared_dataset(valid_set) train_set_x, train_set_y = shared_dataset(train_set) datasets = [(train_set_x, train_set_y), (valid_set_x, valid_set_y), (test_set_x, test_set_y)] train_set_x, train_set_y = datasets[0] valid_set_x, valid_set_y = datasets[1] test_set_x, test_set_y = datasets[2] # compute number of minibatches for training, validation and testing n_train_batches = train_set_x.get_value(borrow=True).shape[0] n_valid_batches = valid_set_x.get_value(borrow=True).shape[0] n_test_batches = test_set_x.get_value(borrow=True).shape[0] n_train_batches //= batch_size n_valid_batches //= batch_size n_test_batches //= batch_size # allocate symbolic variables for the data index = T.lscalar() # index to a [mini]batch # start-snippet-1 x = T.matrix("x") # the data is presented as rasterized images y = T.ivector("y") # the labels are presented as 1D vector of [int] labels ###################### # BUILD ACTUAL MODEL # ###################### print("... building the model") # Reshape matrix of rasterized images of shape (batch_size, 28 * 28) # to a 4D tensor, compatible with our LeNetConvPoolLayer # (28, 28) is the size of MNIST images. layer0_input = x.reshape((batch_size, 3, 32, 32)) # Construct the first convolutional pooling layer: # filtering reduces the image size to (32-5+1 , 32-5+1) = (28, 28) # maxpooling reduces this further to (28/2, 28/2) = (14, 14) # 4D output tensor is thus of shape (batch_size, nkerns[0], 14, 14) layer0 = LeNetConvPoolLayer( rng, input=layer0_input, image_shape=(batch_size, 3, 32, 32), filter_shape=(nkerns[0], 3, 5, 5), poolsize=(2, 2) ) # Construct the second convolutional pooling layer # filtering reduces the image size to (14-5+1, 14-5+1) = (10, 10) # maxpooling reduces this further to (10/2, 10/2) = (5, 5) # 4D output tensor is thus of shape (batch_size, nkerns[1], 5, 5) layer1 = LeNetConvPoolLayer( rng, input=layer0.output, image_shape=(batch_size, nkerns[0], 14, 14), filter_shape=(nkerns[1], nkerns[0], 5, 5), poolsize=(2, 2), ) # Construct the third convolutional pooling layer # filtering reduces the image size to (5-2+1, 5-2+1) = (4, 4) # maxpooling reduces this further to (4/2, 4/2) = (2, 2) # 4D output tensor is thus of shape (batch_size, nkerns[2], 2, 2) layer2conv = LeNetConvPoolLayer( rng, input=layer1.output, image_shape=(batch_size, nkerns[1], 5, 5), filter_shape=(nkerns[2], nkerns[1], 2, 2), poolsize=(2, 2), ) # the HiddenLayer being fully-connected, it operates on 2D matrices of # shape (batch_size, num_pixels) (i.e matrix of rasterized images). # This will generate a matrix of shape (batch_size, nkerns[1] * 4 * 4), # or (500, 50 * 4 * 4) = (500, 800) with the default values. layer3_input = layer2conv.output.flatten(2) print(layer3_input.shape) # construct a fully-connected sigmoidal layer layer3 = HiddenLayer(rng, input=layer3_input, n_in=nkerns[2] * 2 * 2, n_out=64, activation=relu) layer3_1 = MLP(rng, input=layer3.output, n_in=64, n_hidden=200, n_out=10) # classify the values of the fully-connected sigmoidal layer # layer4 = LogisticRegression(input=layer3_1.output, n_in=10, n_out=10) # the cost we minimize during training is the NLL of the model L2_reg = 0.005 L2_sqr_model = ( (layer0.W ** 2).sum() + (layer1.W ** 2).sum() + (layer2conv.W ** 2).sum() + (layer3.W ** 2).sum() + (layer0.W ** 2).sum() + (layer3_1.L2_sqr ** 2).sum() ) cost = layer3_1.negative_log_likelihood(y) + L2_reg * L2_sqr_model # create a function to compute the mistakes that are made by the model test_model = theano.function( [index], layer3_1.errors(y), givens={ x: test_set_x[index * batch_size : (index + 1) * batch_size], y: test_set_y[index * batch_size : (index + 1) * batch_size], }, ) validate_model = theano.function( [index], layer3_1.errors(y), givens={ x: valid_set_x[index * batch_size : (index + 1) * batch_size], y: valid_set_y[index * batch_size : (index + 1) * batch_size], }, ) # create a list of all model parameters to be fit by gradient descent params = layer3_1.params + layer3.params + layer2conv.params + layer1.params + layer0.params # create a list of gradients for all model parameters grads = T.grad(cost, params) # train_model is a function that updates the model parameters by # SGD Since this model has many parameters, it would be tedious to # manually create an update rule for each model parameter. We thus # create the updates list by automatically looping over all # (params[i], grads[i]) pairs. updates = [(param_i, param_i - learning_rate * grad_i) for param_i, grad_i in zip(params, grads)] train_model = theano.function( [index], cost, updates=updates, givens={ x: train_set_x[index * batch_size : (index + 1) * batch_size], y: train_set_y[index * batch_size : (index + 1) * batch_size], }, ) # end-snippet-1 ############### # TRAIN MODEL # ############### print("... training") # early-stopping parameters patience = 10000 # look as this many examples regardless patience_increase = 2 # wait this much longer when a new best is # found improvement_threshold = 0.995 # a relative improvement of this much is # considered significant validation_frequency = min(n_train_batches, patience // 2) # go through this many # minibatche before checking the network # on the validation set; in this case we # check every epoch best_validation_loss = numpy.inf best_iter = 0 test_score = 0.0 start_time = timeit.default_timer() epoch = 0 done_looping = False epoch_loss_list = [] epoch_val_list = [] while (epoch < n_epochs) and (not done_looping): epoch += 1 if epoch == 20: learning_rate.set_value(0.1) if epoch >= 21 and learning_rate.get_value() >= 0.1 * (0.9 ** 6): learning_rate.set_value(learning_rate.get_value() * 0.9) if epoch > 3: epoch_loss_np = numpy.reshape(epoch_loss_list, newshape=(len(epoch_loss_list), 3)) epoch_val_np = numpy.reshape(epoch_val_list, newshape=(len(epoch_val_list), 3)) numpy.savetxt(fname="epoc_cost.csv", X=epoch_loss_np, fmt="%1.3f") numpy.savetxt(fname="epoc_val_error.csv", X=epoch_val_np, fmt="%1.3f") for minibatch_index in range(n_train_batches): iter = (epoch - 1) * n_train_batches + minibatch_index if iter % 100 == 0: print("training @ iter = ", iter) cost_ij = train_model(minibatch_index) epoch_loss_entry = [iter, epoch, float(cost_ij)] epoch_loss_list.append(epoch_loss_entry) if (iter + 1) % validation_frequency == 0: # compute zero-one loss on validation set validation_losses = [validate_model(i) for i in range(n_valid_batches)] this_validation_loss = numpy.mean(validation_losses) print( "epoch %i, minibatch %i/%i, validation error %f %%" % (epoch, minibatch_index + 1, n_train_batches, this_validation_loss * 100.0) ) epoch_val_entry = [iter, epoch, this_validation_loss] epoch_val_list.append(epoch_val_entry) # if we got the best validation score until now if this_validation_loss < best_validation_loss: # improve patience if loss improvement is good enough if this_validation_loss < best_validation_loss * improvement_threshold: patience = max(patience, iter * patience_increase) # save best validation score and iteration number best_validation_loss = this_validation_loss best_iter = iter # test it on the test set test_losses = [test_model(i) for i in range(n_test_batches)] test_score = numpy.mean(test_losses) print( (" epoch %i, minibatch %i/%i, test error of " "best model %f %%") % (epoch, minibatch_index + 1, n_train_batches, test_score * 100.0) ) if patience <= iter: done_looping = True break end_time = timeit.default_timer() print("Optimization complete.") print( "Best validation score of %f %% obtained at iteration %i, " "with test performance %f %%" % (best_validation_loss * 100.0, best_iter + 1, test_score * 100.0) ) print( ("The code for file " + os.path.split(__file__)[1] + " ran for %.2fm" % ((end_time - start_time) / 60.0)), file=sys.stderr, ) epoch_loss_np = numpy.reshape(epoch_loss_list, newshape=(len(epoch_loss_list), 3)) epoch_val_np = numpy.reshape(epoch_val_list, newshape=(len(epoch_val_list), 3)) epoch_loss = pandas.DataFrame( {"iter": epoch_loss_np[:, 0], "epoch": epoch_loss_np[:, 1], "cost": epoch_loss_np[:, 2]} ) epoch_vall = pandas.DataFrame( {"iter": epoch_val_np[:, 0], "epoch": epoch_val_np[:, 1], "val_error": epoch_val_np[:, 2]} ) epoc_avg_loss = pandas.DataFrame(epoch_loss.groupby(["epoch"]).mean()["cost"]) epoc_avg_val = pandas.DataFrame(epoch_vall.groupby(["epoch"]).mean()["val_error"]) epoc_avg_loss = pandas.DataFrame({"epoch": epoc_avg_loss.index.values, "cost": epoc_avg_loss["cost"]}) epoc_avg_loss_val = pandas.DataFrame({"epoch": epoc_avg_val.index.values, "val_error": epoc_avg_val["val_error"]}) epoc_avg_loss.plot(kind="line", x="epoch", y="cost") plt.show() epoc_avg_loss_val.plot(kind="line", x="epoch", y="val_error") plt.show()
def MLP_demo(learning_rate=0.01, L1_reg=0.00, L2_reg=0.0001, n_epochs=1000, dataset='mnist.pkl.gz', batch_size=1, n_hidden=309): datasets = load_multi() train_set_x, train_set_y = datasets[0] valid_set_x, valid_set_y = datasets[1] test_set_x, test_set_y = datasets[2] n_train_batches = train_set_x.get_value(borrow=True).shape[0] / batch_size n_valid_batches = valid_set_x.get_value(borrow=True).shape[0] / batch_size n_test_batches = test_set_x.get_value(borrow=True).shape[0] / batch_size print '... building the model' index = T.lscalar() x = T.matrix('x') y = T.ivector('y') rng = np.random.RandomState(1234) classifier = MLP(rng, x, y, n_in=103, n_hidden=n_hidden, n_out=9) test_model = theano.function( inputs=[index], outputs=classifier.errors(), givens={ x: test_set_x[index * batch_size:(index + 1) * batch_size], y: test_set_y[index * batch_size:(index + 1) * batch_size] }) validate_model = theano.function( inputs=[index], outputs=classifier.errors(), givens={ x: valid_set_x[index * batch_size:(index + 1) * batch_size], y: valid_set_y[index * batch_size:(index + 1) * batch_size] }) cost, updates = classifier.get_cost_updates(learning_rate=learning_rate, L1_reg=L1_reg, L2_reg=L2_reg) train_model = theano.function( inputs=[index], outputs=cost, updates=updates, givens={ x: train_set_x[index * batch_size:(index + 1) * batch_size], y: train_set_y[index * batch_size:(index + 1) * batch_size] }) print '... training' patience = 10000 patience_increase = 2 improvement_threshold = 0.995 validation_frequency = min(n_train_batches, patience / 2) best_validation_loss = np.inf best_iter = 0 test_score = 0. start_time = time.clock() epoch = 0 done_looping = False while (epoch < n_epochs) and (not done_looping): epoch = epoch + 1 for minibatch_index in xrange(n_train_batches): train_model(minibatch_index) iter = (epoch - 1) * n_train_batches + minibatch_index if (iter + 1) % validation_frequency == 0: validation_losses = [ validate_model(i) for i in xrange(n_valid_batches) ] this_validation_loss = np.mean(validation_losses) print('epoch %i, minibatch %i/%i, validation error %f %%' % (epoch, minibatch_index + 1, n_train_batches, this_validation_loss * 100.)) if this_validation_loss < best_validation_loss: if this_validation_loss < best_validation_loss * improvement_threshold: patience = max(patience, iter * patience_increase) best_validation_loss = this_validation_loss best_iter = iter test_losses = [ test_model(i) for i in xrange(n_test_batches) ] test_score = np.mean(test_losses) print((' epoch %i, minibatch %i/%i, test error of ' 'best model %f %%') % (epoch, minibatch_index + 1, n_train_batches, test_score * 100.)) if patience <= iter: done_looping = True break end_time = time.clock() print(('Optimization complete. Best validation score of %f %% ' 'obtained at iteration %i, with test performance %f %%') % (best_validation_loss * 100., best_iter + 1, test_score * 100.)) print >> sys.stderr, ('The code for file ' + os.path.split(__file__)[1] + ' ran for %.2fm' % ((end_time - start_time) / 60.))
def MLP_demo(learning_rate=0.01, L1_reg=0.00, L2_reg=0.0001, n_epochs=1000, dataset='mnist.pkl.gz', batch_size=1, n_hidden=309): datasets = load_multi() train_set_x, train_set_y = datasets[0] valid_set_x, valid_set_y = datasets[1] test_set_x, test_set_y = datasets[2] n_train_batches = train_set_x.get_value(borrow=True).shape[0] / batch_size n_valid_batches = valid_set_x.get_value(borrow=True).shape[0] / batch_size n_test_batches = test_set_x.get_value(borrow=True).shape[0] / batch_size print '... building the model' index = T.lscalar() x = T.matrix('x') y = T.ivector('y') rng = np.random.RandomState(1234) classifier = MLP(rng, x, y, n_in=103, n_hidden=n_hidden, n_out=9) test_model = theano.function(inputs=[index], outputs=classifier.errors(), givens={x: test_set_x[index * batch_size:(index + 1) * batch_size], y: test_set_y[index * batch_size:(index + 1) * batch_size]}) validate_model = theano.function(inputs=[index], outputs=classifier.errors(), givens={x: valid_set_x[index * batch_size:(index + 1) * batch_size], y: valid_set_y[index * batch_size:(index + 1) * batch_size]}) cost, updates = classifier.get_cost_updates(learning_rate=learning_rate, L1_reg=L1_reg, L2_reg=L2_reg) train_model = theano.function(inputs=[index], outputs=cost, updates=updates, givens={x: train_set_x[index * batch_size:(index + 1) * batch_size], y: train_set_y[index * batch_size:(index + 1) * batch_size]}) print '... training' patience = 10000 patience_increase = 2 improvement_threshold = 0.995 validation_frequency = min(n_train_batches, patience / 2) best_validation_loss = np.inf best_iter = 0 test_score = 0. start_time = time.clock() epoch = 0 done_looping = False while (epoch < n_epochs) and (not done_looping): epoch = epoch + 1 for minibatch_index in xrange(n_train_batches): train_model(minibatch_index) iter = (epoch - 1) * n_train_batches + minibatch_index if (iter + 1) % validation_frequency == 0: validation_losses = [validate_model(i) for i in xrange(n_valid_batches)] this_validation_loss = np.mean(validation_losses) print('epoch %i, minibatch %i/%i, validation error %f %%' % (epoch, minibatch_index + 1, n_train_batches, this_validation_loss * 100.)) if this_validation_loss < best_validation_loss: if this_validation_loss < best_validation_loss * improvement_threshold: patience = max(patience, iter * patience_increase) best_validation_loss = this_validation_loss best_iter = iter test_losses = [test_model(i) for i in xrange(n_test_batches)] test_score = np.mean(test_losses) print((' epoch %i, minibatch %i/%i, test error of ' 'best model %f %%') % (epoch, minibatch_index + 1, n_train_batches, test_score * 100.)) if patience <= iter: done_looping = True break end_time = time.clock() print(('Optimization complete. Best validation score of %f %% ' 'obtained at iteration %i, with test performance %f %%') % (best_validation_loss * 100., best_iter + 1, test_score * 100.)) print >> sys.stderr, ('The code for file ' + os.path.split(__file__)[1] + ' ran for %.2fm' % ((end_time - start_time) / 60.))
class NeuralNet(): """ Attributes: features: Numpy array matrix that represents features targets: Numpy array matrix that represents the """ def __init__(self, n_hidden_units, batch_size, output_size, metric_list="none", learning_rate=1, l1_term=0, l2_term=0, n_epochs=100, activation_function='tanh', train_p=.6, dropout=False, dropout_rate=.5, momentum=False, momentum_term=.9, adaptive_learning_rate=False): # allocate symbolic variables for the data self.x = T.matrix('x') # self.y = T.ivector('y') # the labels are presented as 1D vector of # [int] labels self.dropout = dropout self.dropout_rate = dropout_rate if metric_list == "none": self.metrics = { "F1": 0, "Accuracy": 0, "AUC": 0, "Precision": 0, "Recall": 0 } else: self.metrics = metric_list self.learning_rate = learning_rate self.L1_reg = l1_term self.L2_reg = l2_term self.n_epochs = n_epochs self.batch_size = batch_size self.train_percent = train_p #Define new ReLU activation function def relu(x): return T.switch(x < 0, 0, x) if activation_function == 'relu': self.activation_function = relu elif activation_function == 'tanh': self.activation_function = T.tanh elif activation_function == 'sigmoid': self.activation_function = T.nnet.sigmoid self.output_size = output_size self.hidden_layer_sizes = n_hidden_units self.n_epochs = n_epochs self.momentum = momentum self.momentum_term = momentum_term def train(self, x_input, y_input): """ Demonstrate stochastic gradient descent optimization for a multilayer perceptron :type learning_rate: float :param learning_rate: learning rate used (factor for the stochastic gradient :type L1_reg: float :param L1_reg: L1-norm's weight when added to the cost (see regularization) :type L2_reg: float :param L2_reg: L2-norm's weight when added to the cost (see regularization) :type n_epochs: int :param n_epochs: maximal number of epochs to run the optimizer :type dataset: string :param dataset: /datasets/ACEInhibitors_processed.csv """ index = T.lscalar('index') # index to a [mini]batch train_size = x_input.shape[0] * self.train_percent max_size = x_input.shape[0] - (x_input.shape[0] % 10) train_set_x = x_input[:train_size, :] train_set_y = y_input[:train_size] valid_set_x = x_input[(train_size + 1):max_size, :] valid_set_y = y_input[(train_size + 1):max_size] #compute number of minibatches for training, validation and testing n_train_batches = int(train_set_x.shape[0] / self.batch_size) n_valid_batches = int(valid_set_x.shape[0] / self.batch_size) # n_test_batches = int(test_set_x.shape[0] / batch_size) number_in = train_set_x.shape[1] valid_set_x = theano.shared(valid_set_x, 'valid_set_x') valid_set_y = theano.shared(valid_set_y, 'valid_set_y') train_set_x = theano.shared(train_set_x, 'train_set_x') train_set_y = theano.shared(train_set_y, 'train_set_y') # start-snippet-4 # the cost we minimize during training is the negative log likelihood of # the model plus the regularization terms (L1 and L2); cost is expressed # here symbolically self.mlp = MLP(rng=numpy.random.RandomState(), input=self.x, n_in=number_in, n_out=self.output_size, a_function=self.activation_function, n_hidden_sizes=self.hidden_layer_sizes, dropout=self.dropout, dropout_rate=self.dropout_rate) cost = (self.mlp.negative_log_likelihood(self.y) + self.L1_reg * self.mlp.L1 + self.L2_reg * self.mlp.L2_sqr) # end-snippet-4 # compiling a Theano function that computes the mistakes that are made # by the model on a minibatch validate_model = theano.function( inputs=[index], outputs=self.mlp.errors(self.y), givens={ self.x: valid_set_x[index * self.batch_size:(index + 1) * self.batch_size], self.y: valid_set_y[index * self.batch_size:(index + 1) * self.batch_size] }) training_errors = theano.function( inputs=[index], outputs=self.mlp.errors(self.y), givens={ self.x: train_set_x[index * self.batch_size:(index + 1) * self.batch_size], self.y: train_set_y[index * self.batch_size:(index + 1) * self.batch_size] }) # start-snippet-5 # compute the gradient of cost with respect to theta (sotred in params) # the resulting gradients will be stored in a list gparams parameter_gradients = [ T.grad(cost, param) for param in self.mlp.params ] # specify how to update the parameters of the model as a list of # (variable, update expression) pairs updates = [] if self.momentum: delta_before = [] for param_i in self.mlp.params: delta_before_i = theano.shared( value=numpy.zeros(param_i.get_value().shape)) delta_before.append(delta_before_i) for param, parameter_gradients, delta_before_i in zip( self.mlp.params, parameter_gradients, delta_before): delta_i = -self.learning_rate * parameter_gradients + self.momentum_term * delta_before_i updates.append((param, param + delta_i)) updates.append((delta_before_i, delta_i)) else: for param, parameter_gradients in zip(self.mlp.params, parameter_gradients): updates.append( (param, param - self.learning_rate * parameter_gradients)) # compiling a Theano function `train_model` that returns the cost, but # in the same time updates the parameter of the model based on the rules # defined in `updates` train_model = theano.function( inputs=[index], outputs=cost, updates=updates, givens={ self.x: train_set_x[index * self.batch_size:(index + 1) * self.batch_size], self.y: train_set_y[index * self.batch_size:(index + 1) * self.batch_size] }) ############### # TRAIN MODEL # ############### print('... training') # early-stopping parameters patience = 1000000 # look as this many examples regardless patience_increase = 2 # wait this much longer when a new best is # found improvement_threshold = 0.995 # a relative improvement of this much is # considered significant validation_frequency = min(n_train_batches, patience / 2) # go through this many # minibatche before checking the network # on the validation set; in this case we # check every epoch best_validation_loss = numpy.inf best_iter = 0 test_score = 0. start_time = timeit.default_timer() epoch = 0 done_looping = False while (epoch < self.n_epochs) and (not done_looping): epoch = epoch + 1 for minibatch_index in range(n_train_batches): minibatch_avg_cost = train_model(minibatch_index) # iteration number iter = (epoch - 1) * n_train_batches + minibatch_index if (iter + 1) % validation_frequency == 0: # compute zero-one loss on validation set validation_losses = [ validate_model(i) for i in range(n_valid_batches) ] this_validation_loss = numpy.mean(validation_losses) print( 'epoch %i, minibatch %i/%i, validation error %f %%, cost %f' % (epoch, minibatch_index + 1, n_train_batches, this_validation_loss * 100., cost)) # if we got the best validation score until now if this_validation_loss < best_validation_loss: #improve patience if loss improvement is good enough if (this_validation_loss < best_validation_loss * improvement_threshold): patience = max(patience, iter * patience_increase) best_validation_loss = this_validation_loss best_iter = iter print((' epoch %i, minibatch %i/%i, test error of ' 'best model %f %%') % (epoch, minibatch_index + 1, n_train_batches, test_score * 100.)) if patience <= iter: done_looping = True break end_time = timeit.default_timer() print(('Optimization complete. Best validation score of %f %% ' 'obtained at iteration %i, with test performance %f %%') % (best_validation_loss * 100., best_iter + 1, test_score * 100.)) print(sys.stderr, ('The code for file ' + os.path.split(__file__)[1] + ' ran for %.2fm' % ((end_time - start_time) / 60.))) def setup_labels(self, y): assert "There is no need to relabel if n_classes < 2 ", y < 2 negative_example_label = 2 #Transform matrices and relabel them for the neural network for i, yi in enumerate(y): if i > 0: negative_example_label = negative_example_label + 2 positive_example_label = negative_example_label + 1 relabeled_y = yi relabeled_y[relabeled_y == 0] = negative_example_label relabeled_y[relabeled_y == 1] = positive_example_label if i == 0: neural_net_y = relabeled_y else: neural_net_y = numpy.hstack((neural_net_y, relabeled_y)) neural_net_y = numpy.intc(neural_net_y) return neural_net_y def test(self, x, y): prediction = self.predict(x) f1 = f1_score(y, prediction) precision = precision_score(y, prediction) recall = recall_score(y, prediction) auc = roc_auc_score(y, prediction) accuracy = accuracy_score(y, prediction) self.metrics["F1"] = f1 self.metrics["Precision"] = precision self.metrics["Recall"] = recall self.metrics["AUC"] = auc self.metrics["Accuracy"] = accuracy def predict(self, x): #Create a theano shared variable for the input x: the data to be predicted test_set_x = theano.shared(x, 'test_set_x') input = test_set_x #Iterate over all the hidden layers in the MLP for i_hidden_layer, hidden_layer in enumerate(self.mlp.hidden_layers): hl_W = hidden_layer.W hl_b = hidden_layer.b if self.dropout: hl_W *= self.dropout_rate weight_matrix = self.activation_function(T.dot(input, hl_W) + hl_b) #Multiply the weights by the expected value of the dropout which is just the #dropoutrate so in most cases half the weights but only at test time input = weight_matrix #Get the weights and bias from the softmax output layer W = self.mlp.logRegressionLayer.W b = self.mlp.logRegressionLayer.b #compile the thenao function for calculating the outputs from the softmax layer get_y_prediction = theano.function( inputs=[], outputs=T.argmax(T.nnet.softmax(T.dot(weight_matrix, W) + b), axis=1), on_unused_input='ignore', ) return get_y_prediction() def transfer_learned_weights(self, x): a_function = self.activation_function final_hidden_layer = self.mlp.hidden_layers[-1] w = final_hidden_layer.W b = final_hidden_layer.b if self.dropout: transformation_function = theano.function( inputs=[], outputs=a_function(T.dot(x, (w * self.dropout_rate)) + b), on_unused_input='ignore', ) else: transformation_function = theano.function( inputs=[], outputs=a_function(T.dot(x, w) + b), on_unused_input='ignore', ) return transformation_function() def __str__(self): return "MLP:\nF1 Score: {}\nPrecision: {}\n" \ "Recall: {}\nAccuracy: {}\nROC: {}\n".format(self.metrics['F1'], self.metrics['Precision'], self.metrics['Recall'], self.metrics['Accuracy'], self.metrics['AUC'])
def evaluate_lenet5(learning_rate=0.33, n_epochs=200, dataset='mnist.pkl.gz', nkerns=[32, 32, 32], batch_size=500): """ Demonstrates lenet on CIFAR-10 dataset :type learning_rate: float :param learning_rate: learning rate used (factor for the stochastic gradient) :type n_epochs: int :param n_epochs: maximal number of epochs to run the optimizer :type nkerns: list of ints :param nkerns: number of kernels on each layer """ rng = numpy.random.RandomState(23455) def shared_dataset(data_xy, borrow=True): """ Function that loads the dataset into shared variables The reason we store our dataset in shared variables is to allow Theano to copy it into the GPU memory (when code is run on GPU). Since copying data into the GPU is slow, copying a minibatch everytime is needed (the default behaviour if the data is not in a shared variable) would lead to a large decrease in performance. """ data_x, data_y = data_xy shared_x = theano.shared(numpy.asarray(data_x, dtype=theano.config.floatX), borrow=borrow) shared_y = theano.shared(numpy.asarray(data_y, dtype=theano.config.floatX), borrow=borrow) # When storing data on the GPU it has to be stored as floats # therefore we will store the labels as ``floatX`` as well # (``shared_y`` does exactly that). But during our computations # we need them as ints (we use labels as index, and if they are # floats it doesn't make sense) therefore instead of returning # ``shared_y`` we will have to cast it to int. This little hack # lets ous get around this issue return shared_x, T.cast(shared_y, 'int32') data_batch_1 = unpickle('cifar-10-batches-py/data_batch_1') data_batch_2 = unpickle('cifar-10-batches-py/data_batch_2') data_batch_3 = unpickle('cifar-10-batches-py/data_batch_3') data_batch_4 = unpickle('cifar-10-batches-py/data_batch_4') data_batch_5 = unpickle('cifar-10-batches-py/data_batch_5') test = unpickle('cifar-10-batches-py/test_batch') train_set_1 = data_batch_1["data"] train_set_2 = data_batch_2["data"] train_set_3 = data_batch_3["data"] train_set_4 = data_batch_4["data"] train_set_5 = data_batch_5["data"] X_train = numpy.concatenate( (train_set_1, train_set_2, train_set_3, train_set_4, train_set_5), axis=0) y_train = numpy.concatenate( (data_batch_1["labels"], data_batch_2["labels"], data_batch_3["labels"], data_batch_4["labels"], data_batch_5["labels"])) test_set = test["data"] Xte_rows = test_set.reshape(train_set_1.shape[0], 32 * 32 * 3) Yte = numpy.asarray(test["labels"]) Xval_rows = X_train[:7500, :] # take first 1000 for validation Yval = y_train[:7500] Xtr_rows = X_train[7500:50000, :] # keep last 49,000 for train Ytr = y_train[7500:50000] mean_train = Xtr_rows.mean(axis=0) stdv_train = Xte_rows.std(axis=0) Xtr_rows = (Xtr_rows - mean_train) / stdv_train Xval_rows = (Xval_rows - mean_train) / stdv_train Xte_rows = (Xte_rows - mean_train) / stdv_train learning_rate = theano.shared(learning_rate) """whitening""" """ Xtr_rows -= numpy.mean(Xtr_rows, axis=0) # zero-center the data (important) cov = numpy.dot(Xtr_rows.T, Xtr_rows) / Xtr_rows.shape[0] U,S,V = numpy.linalg.svd(cov) Xrot = numpy.dot(Xtr_rows, U)# decorrelate the data Xrot_reduced = numpy.dot(Xtr_rows, U[:,:100]) # whiten the data: # divide by the eigenvalues (which are square roots of the singular values) Xwhite = Xrot / numpy.sqrt(S + 1e-5)""" """whitening""" #Xtr_rows = whiten(Xtr_rows) # zero-center the data (important) """cov = numpy.dot(Xtr_rows.T, Xtr_rows) / Xtr_rows.shape[0] U,S,V = numpy.linalg.svd(cov) Xrot = numpy.dot(Xtr_rows, U) Xtr_rows = Xrot / numpy.sqrt(S + 1e-5) Xval_rot = numpy.dot(Xval_rows,U) Xval_rows = Xval_rot / numpy.sqrt(S + 1e-5) Xte_rot = numpy.dot(Xte_rows,U) Xte_rows = Xte_rot / numpy.sqrt(S + 1e-5) """ train_set = (Xtr_rows, Ytr) valid_set = (Xval_rows, Yval) test_set = (Xte_rows, Yte) test_set_x, test_set_y = shared_dataset(test_set) valid_set_x, valid_set_y = shared_dataset(valid_set) train_set_x, train_set_y = shared_dataset(train_set) datasets = [(train_set_x, train_set_y), (valid_set_x, valid_set_y), (test_set_x, test_set_y)] train_set_x, train_set_y = datasets[0] valid_set_x, valid_set_y = datasets[1] test_set_x, test_set_y = datasets[2] # compute number of minibatches for training, validation and testing n_train_batches = train_set_x.get_value(borrow=True).shape[0] n_valid_batches = valid_set_x.get_value(borrow=True).shape[0] n_test_batches = test_set_x.get_value(borrow=True).shape[0] n_train_batches //= batch_size n_valid_batches //= batch_size n_test_batches //= batch_size # allocate symbolic variables for the data index = T.lscalar() # index to a [mini]batch # start-snippet-1 x = T.matrix('x') # the data is presented as rasterized images y = T.ivector('y') # the labels are presented as 1D vector of [int] labels ###################### # BUILD ACTUAL MODEL # ###################### print('... building the model') # Reshape matrix of rasterized images of shape (batch_size, 28 * 28) # to a 4D tensor, compatible with our LeNetConvPoolLayer # (28, 28) is the size of MNIST images. layer0_input = x.reshape((batch_size, 3, 32, 32)) # Construct the first convolutional pooling layer: # filtering reduces the image size to (32-5+1 , 32-5+1) = (28, 28) # maxpooling reduces this further to (28/2, 28/2) = (14, 14) # 4D output tensor is thus of shape (batch_size, nkerns[0], 14, 14) layer0 = LeNetConvPoolLayer(rng, input=layer0_input, image_shape=(batch_size, 3, 32, 32), filter_shape=(nkerns[0], 3, 5, 5), poolsize=(2, 2)) # Construct the second convolutional pooling layer # filtering reduces the image size to (14-5+1, 14-5+1) = (10, 10) # maxpooling reduces this further to (10/2, 10/2) = (5, 5) # 4D output tensor is thus of shape (batch_size, nkerns[1], 5, 5) layer1 = LeNetConvPoolLayer(rng, input=layer0.output, image_shape=(batch_size, nkerns[0], 14, 14), filter_shape=(nkerns[1], nkerns[0], 5, 5), poolsize=(2, 2)) # Construct the third convolutional pooling layer # filtering reduces the image size to (5-2+1, 5-2+1) = (4, 4) # maxpooling reduces this further to (4/2, 4/2) = (2, 2) # 4D output tensor is thus of shape (batch_size, nkerns[2], 2, 2) layer2conv = LeNetConvPoolLayer(rng, input=layer1.output, image_shape=(batch_size, nkerns[1], 5, 5), filter_shape=(nkerns[2], nkerns[1], 2, 2), poolsize=(2, 2)) # the HiddenLayer being fully-connected, it operates on 2D matrices of # shape (batch_size, num_pixels) (i.e matrix of rasterized images). # This will generate a matrix of shape (batch_size, nkerns[1] * 4 * 4), # or (500, 50 * 4 * 4) = (500, 800) with the default values. layer3_input = layer2conv.output.flatten(2) print(layer3_input.shape) # construct a fully-connected sigmoidal layer layer3 = HiddenLayer(rng, input=layer3_input, n_in=nkerns[2] * 2 * 2, n_out=64, activation=relu) layer3_1 = MLP(rng, input=layer3.output, n_in=64, n_hidden=200, n_out=10) # classify the values of the fully-connected sigmoidal layer #layer4 = LogisticRegression(input=layer3_1.output, n_in=10, n_out=10) # the cost we minimize during training is the NLL of the model L2_reg = 0.005 L2_sqr_model = ((layer0.W**2).sum() + (layer1.W**2).sum() + (layer2conv.W**2).sum() + (layer3.W**2).sum() + (layer0.W**2).sum() + (layer3_1.L2_sqr**2).sum()) cost = layer3_1.negative_log_likelihood(y) + L2_reg * L2_sqr_model # create a function to compute the mistakes that are made by the model test_model = theano.function( [index], layer3_1.errors(y), givens={ x: test_set_x[index * batch_size:(index + 1) * batch_size], y: test_set_y[index * batch_size:(index + 1) * batch_size] }) validate_model = theano.function( [index], layer3_1.errors(y), givens={ x: valid_set_x[index * batch_size:(index + 1) * batch_size], y: valid_set_y[index * batch_size:(index + 1) * batch_size] }) # create a list of all model parameters to be fit by gradient descent params = layer3_1.params + layer3.params + layer2conv.params + layer1.params + layer0.params # create a list of gradients for all model parameters grads = T.grad(cost, params) # train_model is a function that updates the model parameters by # SGD Since this model has many parameters, it would be tedious to # manually create an update rule for each model parameter. We thus # create the updates list by automatically looping over all # (params[i], grads[i]) pairs. updates = [(param_i, param_i - learning_rate * grad_i) for param_i, grad_i in zip(params, grads)] train_model = theano.function( [index], cost, updates=updates, givens={ x: train_set_x[index * batch_size:(index + 1) * batch_size], y: train_set_y[index * batch_size:(index + 1) * batch_size] }) # end-snippet-1 ############### # TRAIN MODEL # ############### print('... training') # early-stopping parameters patience = 10000 # look as this many examples regardless patience_increase = 2 # wait this much longer when a new best is # found improvement_threshold = 0.995 # a relative improvement of this much is # considered significant validation_frequency = min(n_train_batches, patience // 2) # go through this many # minibatche before checking the network # on the validation set; in this case we # check every epoch best_validation_loss = numpy.inf best_iter = 0 test_score = 0. start_time = timeit.default_timer() epoch = 0 done_looping = False epoch_loss_list = [] epoch_val_list = [] while (epoch < n_epochs) and (not done_looping): epoch += 1 if epoch == 20: learning_rate.set_value(0.1) if epoch >= 21 and learning_rate.get_value() >= 0.1 * (0.9**6): learning_rate.set_value(learning_rate.get_value() * 0.9) if epoch > 3: epoch_loss_np = numpy.reshape(epoch_loss_list, newshape=(len(epoch_loss_list), 3)) epoch_val_np = numpy.reshape(epoch_val_list, newshape=(len(epoch_val_list), 3)) numpy.savetxt(fname='epoc_cost.csv', X=epoch_loss_np, fmt='%1.3f') numpy.savetxt(fname='epoc_val_error.csv', X=epoch_val_np, fmt='%1.3f') for minibatch_index in range(n_train_batches): iter = (epoch - 1) * n_train_batches + minibatch_index if iter % 100 == 0: print('training @ iter = ', iter) cost_ij = train_model(minibatch_index) epoch_loss_entry = [iter, epoch, float(cost_ij)] epoch_loss_list.append(epoch_loss_entry) if (iter + 1) % validation_frequency == 0: # compute zero-one loss on validation set validation_losses = [ validate_model(i) for i in range(n_valid_batches) ] this_validation_loss = numpy.mean(validation_losses) print('epoch %i, minibatch %i/%i, validation error %f %%' % (epoch, minibatch_index + 1, n_train_batches, this_validation_loss * 100.)) epoch_val_entry = [iter, epoch, this_validation_loss] epoch_val_list.append(epoch_val_entry) # if we got the best validation score until now if this_validation_loss < best_validation_loss: # improve patience if loss improvement is good enough if this_validation_loss < best_validation_loss * \ improvement_threshold: patience = max(patience, iter * patience_increase) # save best validation score and iteration number best_validation_loss = this_validation_loss best_iter = iter # test it on the test set test_losses = [ test_model(i) for i in range(n_test_batches) ] test_score = numpy.mean(test_losses) print((' epoch %i, minibatch %i/%i, test error of ' 'best model %f %%') % (epoch, minibatch_index + 1, n_train_batches, test_score * 100.)) if patience <= iter: done_looping = True break end_time = timeit.default_timer() print('Optimization complete.') print('Best validation score of %f %% obtained at iteration %i, ' 'with test performance %f %%' % (best_validation_loss * 100., best_iter + 1, test_score * 100.)) print( ('The code for file ' + os.path.split(__file__)[1] + ' ran for %.2fm' % ((end_time - start_time) / 60.)), file=sys.stderr) epoch_loss_np = numpy.reshape(epoch_loss_list, newshape=(len(epoch_loss_list), 3)) epoch_val_np = numpy.reshape(epoch_val_list, newshape=(len(epoch_val_list), 3)) epoch_loss = pandas.DataFrame({ "iter": epoch_loss_np[:, 0], "epoch": epoch_loss_np[:, 1], "cost": epoch_loss_np[:, 2] }) epoch_vall = pandas.DataFrame({ "iter": epoch_val_np[:, 0], "epoch": epoch_val_np[:, 1], "val_error": epoch_val_np[:, 2] }) epoc_avg_loss = pandas.DataFrame( epoch_loss.groupby(['epoch']).mean()["cost"]) epoc_avg_val = pandas.DataFrame( epoch_vall.groupby(['epoch']).mean()["val_error"]) epoc_avg_loss = pandas.DataFrame({ "epoch": epoc_avg_loss.index.values, "cost": epoc_avg_loss["cost"] }) epoc_avg_loss_val = pandas.DataFrame({ "epoch": epoc_avg_val.index.values, "val_error": epoc_avg_val["val_error"] }) epoc_avg_loss.plot(kind="line", x="epoch", y="cost") plt.show() epoc_avg_loss_val.plot(kind='line', x="epoch", y="val_error") plt.show()
def test_pickle_mlp(learning_rate=0.01, L1_reg=0.00, L2_reg=0.0001, n_epochs=10, dataset='../data/mnist.pkl.gz', batch_size=20, pickle_file='/scratch/z/zhaolei/lzamparo/gpu_tests/mlp_results/MLP_pickle.pkl',n_hidden=500): """ Interrupt the training of an MLP, pickle the MLP object, unpickle, and continue """ datasets = load_data(dataset) train_set_x, train_set_y = datasets[0] valid_set_x, valid_set_y = datasets[1] test_set_x, test_set_y = datasets[2] # compute number of minibatches for each set n_train_batches = train_set_x.get_value(borrow=True).shape[0] / batch_size n_valid_batches = valid_set_x.get_value(borrow=True).shape[0] / batch_size n_test_batches = test_set_x.get_value(borrow=True).shape[0] / batch_size ### Build the model ### print '... building the model' # allocate symbolic variables for the data index = T.lscalar() x = T.matrix('x') y = T.ivector('y') rng = numpy.random.RandomState(1234) # construct the MLP class classifier = MLP(rng = rng, input = x, n_in=28*28, n_hidden=n_hidden, n_out=10) # cost to be minimized cost = classifier.negative_log_likelihood(y) \ + L1_reg * classifier.L1 \ + L2_reg * classifier.L2_sqr # theano function that computes the mistakes made by the model on a minibatch test_model = theano.function(inputs=[index], outputs = classifier.errors(y), givens={ x: test_set_x[index * batch_size:(index + 1) * batch_size], y: test_set_y[index * batch_size:(index + 1) * batch_size]}) # theano function to validate the model validate_model = theano.function(inputs=[index], outputs = classifier.errors(y), givens = { x: valid_set_x[index * batch_size:(index + 1) * batch_size], y: valid_set_y[index * batch_size:(index + 1) * batch_size]}) # compute the gradient of the cost function w.r.t theta gparams = [] for param in classifier.params: gparam = T.grad(cost, param) gparams.append(gparam) # build the list of parameter updates. This consists of tuples of paramters and values updates = [] for param, gparam in zip(classifier.params, gparams): updates.append((param, param - learning_rate * gparam)) # compile a Theano function to return the cost, update the parameters based on the # updates list train_model = theano.function(inputs=[index], outputs=cost, updates=updates, givens={ x: train_set_x[index * batch_size:(index + 1) * batch_size], y: train_set_y[index * batch_size:(index + 1) * batch_size]}) ### train the model ### print '... training' # early-stopping parameters patience = 10000 # look at this number of examples regardless patience_increase = 2 # wait this many more epochs when a new best comes up improvement_threshold = 0.995 # a relative improvement threshold for significance validation_frequency = min(n_train_batches, patience / 2) # train for this many minibatches before checking the model on the validation set best_params = None best_validation_loss = numpy.inf best_iter = 0 test_score = 0. start_time = time.clock() epoch = 0 done_looping = False halfway_point = n_epochs / 2 while (epoch < halfway_point) and (not done_looping): epoch = epoch + 1 for minibatch_index in xrange(n_train_batches): minibatch_avg_cost = train_model(minibatch_index) iter = (epoch - 1) * n_train_batches + minibatch_index # do we validate? if (iter + 1) % validation_frequency == 0: validation_losses = [validate_model(i) for i in xrange(n_valid_batches)] this_validation_loss = numpy.mean(validation_losses) print('epoch %i, minibatch %i/%i, validation error %f %%' % (epoch, minibatch_index + 1, n_train_batches, this_validation_loss * 100.)) if this_validation_loss < best_validation_loss: # increase patience if loss improvement is good enough if this_validation_loss < best_validation_loss * \ improvement_threshold: patience = max(patience, iter * patience_increase) best_validation_loss = this_validation_loss best_iter = iter # test on the test set test_losses = [test_model(i) for i in xrange(n_test_batches)] test_scores = numpy.mean(test_losses) print((' epoch %i, minibatch %i/%i, test error of ' 'best model %f %%') % (epoch, minibatch_index + 1, n_train_batches, test_score * 100.)) if patience <= iter: done_looping = True break end_time = time.clock() print(('Halfway point reached. Best validation score of %f %% ' 'obtained at iteration %i, with test performance %f %%') % (best_validation_loss * 100., best_iter + 1, test_score * 100.)) print >> sys.stderr, ('The code for file ' + os.path.split(__file__)[1] + ' ran for %.2fm' % ((end_time - start_time) / 60.)) print "Pickling model..." f = file(pickle_file, 'wb') cPickle.dump(classifier, f, protocol=cPickle.HIGHEST_PROTOCOL) f.close() print "Unpickling the model..." f = file(pickle_file, 'rb') unpickled_classifier = cPickle.load(f) unpickled_classifier.reconstruct_state(x, T.tanh) f.close() ### Re-establish the cost, grad, parameter updates ### # cost to be minimized cost = unpickled_classifier.negative_log_likelihood(y) \ + L1_reg * unpickled_classifier.L1 \ + L2_reg * unpickled_classifier.L2_sqr # theano function that computes the mistakes made by the model on a minibatch test_model = theano.function(inputs=[index], outputs = unpickled_classifier.errors(y), givens={ x: test_set_x[index * batch_size:(index + 1) * batch_size], y: test_set_y[index * batch_size:(index + 1) * batch_size]}) # theano function to validate the model validate_model = theano.function(inputs=[index], outputs = unpickled_classifier.errors(y), givens = { x: valid_set_x[index * batch_size:(index + 1) * batch_size], y: valid_set_y[index * batch_size:(index + 1) * batch_size]}) # compute the gradient of the cost function w.r.t theta gparams = [] for param in unpickled_classifier.params: gparam = T.grad(cost, param) gparams.append(gparam) # build the list of parameter updates. This consists of tuples of paramters and values updates = [] for param, gparam in zip(unpickled_classifier.params, gparams): updates.append((param, param - learning_rate * gparam)) print(("Continue training for %i epochs ") % (n_epochs - epoch)) start_time = time.clock() while (epoch < n_epochs) and (not done_looping): epoch = epoch + 1 for minibatch_index in xrange(n_train_batches): minibatch_avg_cost = train_model(minibatch_index) iter = (epoch - 1) * n_train_batches + minibatch_index # do we validate? if (iter + 1) % validation_frequency == 0: validation_losses = [validate_model(i) for i in xrange(n_valid_batches)] this_validation_loss = numpy.mean(validation_losses) print('epoch %i, minibatch %i/%i, validation error %f %%' % (epoch, minibatch_index + 1, n_train_batches, this_validation_loss * 100.)) if this_validation_loss < best_validation_loss: # increase patience if loss improvement is good enough if this_validation_loss < best_validation_loss * \ improvement_threshold: patience = max(patience, iter * patience_increase) best_validation_loss = this_validation_loss best_iter = iter # test on the test set test_losses = [test_model(i) for i in xrange(n_test_batches)] test_scores = numpy.mean(test_losses) print((' epoch %i, minibatch %i/%i, test error of ' 'best model %f %%') % (epoch, minibatch_index + 1, n_train_batches, test_score * 100.)) if patience <= iter: done_looping = True break end_time = time.clock() print(('End point reached. Best validation score of %f %% ' 'obtained at iteration %i, with test performance %f %%') % (best_validation_loss * 100., best_iter + 1, test_score * 100.)) print >> sys.stderr, ('The code for file ' + os.path.split(__file__)[1] + ' ran for %.2fm' % ((end_time - start_time) / 60.))
def train_model(filename): learning_rate = 0.05 patience = 10000 size = 1000 batch = 100 loader = DataLoader(filename, batch) rng = numpy.random.RandomState() print '... building the model' x = T.matrix('x') y = T.ivector('y') # construct the MLP class classifier = MLP( rng=rng, input=x, n_in=12*12*5, n_hidden=size, n_out=12 ) cost = ( classifier.negative_log_likelihood(y) ) gparams = [T.grad(cost, param) for param in classifier.params] updates = [ (param, param - learning_rate * gparam) for param, gparam in zip(classifier.params, gparams) ] print '... training' for i in xrange(patience): ip, op = loader.get_data(); test_model = theano.function( inputs=[], outputs=classifier.errors(y), givens={ x: ip, y: op } ) train_model = theano.function( inputs=[], outputs=cost, updates=updates, givens={ x: ip, y: op } ) before = test_model() train_model() after = test_model() print 100.0 * i / patience, '%', before, after W1 = classifier.params[0].get_value() b1 = classifier.params[1].get_value() W2 = classifier.params[2].get_value() b2 = classifier.params[3].get_value() W3 = classifier.params[4].get_value() b3 = classifier.params[5].get_value() out = open('W1.txt', 'w') print >> out, '\n'.join(['\t'.join(['%.6f'%item for item in row]) for row in W1]) out.close() out = open('b1.txt', 'w') print >> out, '\n'.join(['%.6f'%item for item in b1]) out.close() out = open('W2.txt', 'w') print >> out, '\n'.join(['\t'.join(['%.6f'%item for item in row]) for row in W2]) out.close() out = open('b2.txt', 'w') print >> out, '\n'.join(['%.6f'%item for item in b2]) out.close() out = open('W3.txt', 'w') print >> out, '\n'.join(['\t'.join(['%.6f'%item for item in row]) for row in W3]) out.close() out = open('b3.txt', 'w') print >> out, '\n'.join(['%.6f'%item for item in b3]) out.close()
class NeuralNet(): """ Attributes: features: Numpy array matrix that represents features targets: Numpy array matrix that represents the """ def __init__(self, n_hidden_units, batch_size, output_size, metric_list="none", learning_rate=1, l1_term=0, l2_term=0, n_epochs=100, activation_function='tanh', train_p=.6, dropout=False, dropout_rate=.5, momentum=False, momentum_term=.9, adaptive_learning_rate=False): # allocate symbolic variables for the data self.x = T.matrix('x') # self.y = T.ivector('y') # the labels are presented as 1D vector of # [int] labels self.dropout = dropout self.dropout_rate = dropout_rate if metric_list == "none": self.metrics = {"F1": 0, "Accuracy": 0, "AUC": 0, "Precision": 0, "Recall": 0} else: self.metrics = metric_list self.learning_rate = learning_rate self.L1_reg = l1_term self.L2_reg = l2_term self.n_epochs = n_epochs self.batch_size = batch_size self.train_percent = train_p #Define new ReLU activation function def relu(x): return T.switch(x < 0, 0, x) if activation_function == 'relu': self.activation_function = relu elif activation_function == 'tanh': self.activation_function = T.tanh elif activation_function == 'sigmoid': self.activation_function = T.nnet.sigmoid self.output_size = output_size self.hidden_layer_sizes = n_hidden_units self.n_epochs = n_epochs self.momentum = momentum self.momentum_term = momentum_term def train(self, x_input, y_input): """ Demonstrate stochastic gradient descent optimization for a multilayer perceptron :type learning_rate: float :param learning_rate: learning rate used (factor for the stochastic gradient :type L1_reg: float :param L1_reg: L1-norm's weight when added to the cost (see regularization) :type L2_reg: float :param L2_reg: L2-norm's weight when added to the cost (see regularization) :type n_epochs: int :param n_epochs: maximal number of epochs to run the optimizer :type dataset: string :param dataset: /datasets/ACEInhibitors_processed.csv """ index = T.lscalar('index') # index to a [mini]batch train_size = x_input.shape[0] * self.train_percent max_size = x_input.shape[0] - (x_input.shape[0] % 10) train_set_x = x_input[:train_size, :] train_set_y = y_input[:train_size] valid_set_x = x_input[(train_size + 1 ):max_size, :] valid_set_y = y_input[(train_size + 1):max_size] #compute number of minibatches for training, validation and testing n_train_batches = int(train_set_x.shape[0] / self.batch_size) n_valid_batches = int(valid_set_x.shape[0] / self.batch_size) # n_test_batches = int(test_set_x.shape[0] / batch_size) number_in = train_set_x.shape[1] valid_set_x = theano.shared(valid_set_x, 'valid_set_x') valid_set_y = theano.shared(valid_set_y, 'valid_set_y') train_set_x = theano.shared(train_set_x, 'train_set_x') train_set_y = theano.shared(train_set_y, 'train_set_y') # start-snippet-4 # the cost we minimize during training is the negative log likelihood of # the model plus the regularization terms (L1 and L2); cost is expressed # here symbolically self.mlp = MLP( rng= numpy.random.RandomState(), input=self.x, n_in = number_in, n_out=self.output_size, a_function = self.activation_function, n_hidden_sizes=self.hidden_layer_sizes, dropout=self.dropout, dropout_rate=self.dropout_rate ) cost = ( self.mlp.negative_log_likelihood(self.y) + self.L1_reg * self.mlp.L1 + self.L2_reg * self.mlp.L2_sqr ) # end-snippet-4 # compiling a Theano function that computes the mistakes that are made # by the model on a minibatch validate_model = theano.function( inputs=[index], outputs=self.mlp.errors(self.y), givens={ self.x: valid_set_x[index * self.batch_size:(index + 1) * self.batch_size], self.y: valid_set_y[index * self.batch_size:(index + 1) * self.batch_size] } ) training_errors = theano.function( inputs=[index], outputs=self.mlp.errors(self.y), givens={ self.x: train_set_x[index * self.batch_size:(index + 1) * self.batch_size], self.y: train_set_y[index * self.batch_size:(index + 1) * self.batch_size] } ) # start-snippet-5 # compute the gradient of cost with respect to theta (sotred in params) # the resulting gradients will be stored in a list gparams parameter_gradients = [T.grad(cost, param) for param in self.mlp.params] # specify how to update the parameters of the model as a list of # (variable, update expression) pairs updates = [] if self.momentum: delta_before=[] for param_i in self.mlp.params: delta_before_i=theano.shared(value=numpy.zeros(param_i.get_value().shape)) delta_before.append(delta_before_i) for param, parameter_gradients, delta_before_i in zip(self.mlp.params, parameter_gradients, delta_before): delta_i = -self.learning_rate * parameter_gradients + self.momentum_term*delta_before_i updates.append((param, param + delta_i)) updates.append((delta_before_i,delta_i)) else: for param, parameter_gradients in zip(self.mlp.params, parameter_gradients): updates.append((param, param - self.learning_rate * parameter_gradients)) # compiling a Theano function `train_model` that returns the cost, but # in the same time updates the parameter of the model based on the rules # defined in `updates` train_model = theano.function( inputs=[index], outputs=cost, updates=updates, givens={ self.x: train_set_x[index * self.batch_size: (index + 1) * self.batch_size], self.y: train_set_y[index * self.batch_size: (index + 1) * self.batch_size] } ) ############### # TRAIN MODEL # ############### print('... training') # early-stopping parameters patience = 1000000 # look as this many examples regardless patience_increase = 2 # wait this much longer when a new best is # found improvement_threshold = 0.995 # a relative improvement of this much is # considered significant validation_frequency = min(n_train_batches, patience / 2) # go through this many # minibatche before checking the network # on the validation set; in this case we # check every epoch best_validation_loss = numpy.inf best_iter = 0 test_score = 0. start_time = timeit.default_timer() epoch = 0 done_looping = False while (epoch < self.n_epochs) and (not done_looping): epoch = epoch + 1 for minibatch_index in range(n_train_batches): minibatch_avg_cost = train_model(minibatch_index) # iteration number iter = (epoch - 1) * n_train_batches + minibatch_index if (iter + 1) % validation_frequency == 0: # compute zero-one loss on validation set validation_losses = [validate_model(i) for i in range(n_valid_batches)] this_validation_loss = numpy.mean(validation_losses) print( 'epoch %i, minibatch %i/%i, validation error %f %%, cost %f' % ( epoch, minibatch_index + 1, n_train_batches, this_validation_loss * 100., cost ) ) # if we got the best validation score until now if this_validation_loss < best_validation_loss: #improve patience if loss improvement is good enough if ( this_validation_loss < best_validation_loss * improvement_threshold ): patience = max(patience, iter * patience_increase) best_validation_loss = this_validation_loss best_iter = iter print((' epoch %i, minibatch %i/%i, test error of ' 'best model %f %%') % (epoch, minibatch_index + 1, n_train_batches, test_score * 100.)) if patience <= iter: done_looping = True break end_time = timeit.default_timer() print(('Optimization complete. Best validation score of %f %% ' 'obtained at iteration %i, with test performance %f %%') % (best_validation_loss * 100., best_iter + 1, test_score * 100.)) print(sys.stderr, ('The code for file ' + os.path.split(__file__)[1] + ' ran for %.2fm' % ((end_time - start_time) / 60.))) def setup_labels(self, y): assert "There is no need to relabel if n_classes < 2 ", y < 2 negative_example_label = 2 #Transform matrices and relabel them for the neural network for i, yi in enumerate(y): if i > 0: negative_example_label = negative_example_label+2 positive_example_label = negative_example_label+1 relabeled_y = yi relabeled_y[relabeled_y == 0] = negative_example_label relabeled_y[relabeled_y == 1] = positive_example_label if i == 0: neural_net_y = relabeled_y else: neural_net_y = numpy.hstack((neural_net_y, relabeled_y)) neural_net_y = numpy.intc(neural_net_y) return neural_net_y def test(self, x, y): prediction = self.predict(x) f1 = f1_score(y, prediction) precision = precision_score(y, prediction) recall = recall_score(y, prediction) auc = roc_auc_score(y, prediction) accuracy = accuracy_score(y, prediction) self.metrics["F1"] = f1 self.metrics["Precision"] = precision self.metrics["Recall"] = recall self.metrics["AUC"] = auc self.metrics["Accuracy"] = accuracy def predict(self, x): #Create a theano shared variable for the input x: the data to be predicted test_set_x = theano.shared(x, 'test_set_x') input = test_set_x #Iterate over all the hidden layers in the MLP for i_hidden_layer, hidden_layer in enumerate(self.mlp.hidden_layers): hl_W = hidden_layer.W hl_b = hidden_layer.b if self.dropout: hl_W *= self.dropout_rate weight_matrix = self.activation_function(T.dot(input, hl_W) + hl_b) #Multiply the weights by the expected value of the dropout which is just the #dropoutrate so in most cases half the weights but only at test time input = weight_matrix #Get the weights and bias from the softmax output layer W = self.mlp.logRegressionLayer.W b = self.mlp.logRegressionLayer.b #compile the thenao function for calculating the outputs from the softmax layer get_y_prediction = theano.function( inputs=[], outputs=T.argmax(T.nnet.softmax(T.dot(weight_matrix, W) + b), axis=1), on_unused_input='ignore', ) return get_y_prediction() def transfer_learned_weights(self, x): a_function = self.activation_function final_hidden_layer = self.mlp.hidden_layers[-1] w = final_hidden_layer.W b = final_hidden_layer.b if self.dropout: transformation_function = theano.function( inputs=[], outputs=a_function(T.dot(x, (w * self.dropout_rate)) + b), on_unused_input='ignore', ) else: transformation_function = theano.function( inputs=[], outputs=a_function(T.dot(x, w) + b), on_unused_input='ignore', ) return transformation_function() def __str__(self): return "MLP:\nF1 Score: {}\nPrecision: {}\n" \ "Recall: {}\nAccuracy: {}\nROC: {}\n".format(self.metrics['F1'], self.metrics['Precision'], self.metrics['Recall'], self.metrics['Accuracy'], self.metrics['AUC'])
class DBN(object): """Deep Belief Network A deep belief network is obtained by stacking several RBMs on top of each other. The hidden layer of the RBM at layer `i` becomes the input of the RBM at layer `i+1`. The first layer RBM gets as input the input of the network, and the hidden layer of the last RBM represents the output. When used for classification, the DBN is treated as a MLP, by adding a logistic regression layer on top. """ def __init__(self, numpy_rng, theano_rng=None, n_ins=784, hidden_layers_sizes=None, n_outs=(None, None), continuous=False): """This class is made to support a variable number of layers. :type numpy_rng: numpy.random.RandomState :param numpy_rng: numpy random number generator used to draw initial weights :type theano_rng: theano.tensor.shared_randomstreams.RandomStreams :param theano_rng: Theano random generator; if None is given one is generated based on a seed drawn from `rng` :type n_ins: int :param n_ins: dimension of the input to the DBN :type hidden_layers_sizes: list of ints :param hidden_layers_sizes: intermediate layers size, must contain at least one value :type n_outs: tuple of ints :param n_outs: dimensions of the sigmoid layers of the network """ if n_outs == (None, None): n_outs = (10, 10) if hidden_layers_sizes is None: hidden_layers_sizes = [500, 500] self.sigmoid_layers = [] self.rbm_layers = [] self.params = [] self.n_layers = len(hidden_layers_sizes) assert self.n_layers > 0 if not theano_rng: theano_rng = MRG_RandomStreams(numpy_rng.randint(2 ** 30)) self.x = T.matrix('x') # the data is presented as rasterized images self.y = T.ivector('y') # the labels are presented as 1D vector for i in range(self.n_layers): if i == 0: input_size = n_ins else: input_size = hidden_layers_sizes[i - 1] if i == 0: layer_input = self.x else: layer_input = self.sigmoid_layers[-1].output sigmoid_layer = HiddenLayer(rng=numpy_rng, input=layer_input, n_in=input_size, n_out=hidden_layers_sizes[i], activation=T.nnet.sigmoid) self.sigmoid_layers.append(sigmoid_layer) self.params.extend(sigmoid_layer.params) # Construct an RBM that shared weights with this layer if continuous and i == 0: rbm_layer = CRBM(numpy_rng=numpy_rng, theano_rng=theano_rng, input=layer_input, n_visible=input_size, n_hidden=hidden_layers_sizes[i], W=sigmoid_layer.W, hbias=sigmoid_layer.b) else: rbm_layer = RBM(numpy_rng=numpy_rng, theano_rng=theano_rng, input=layer_input, n_visible=input_size, n_hidden=hidden_layers_sizes[i], W=sigmoid_layer.W, hbias=sigmoid_layer.b) self.rbm_layers.append(rbm_layer) # We now need to add a logistic layer on top of the MLP self.topLayer = MLP( rng=numpy_rng, input=self.sigmoid_layers[-1].output, n_in=hidden_layers_sizes[-1], n_hidden=n_outs[0], n_out=n_outs[1]) self.params.extend(self.topLayer.params) # self.logLayer = LogisticRegression( # input=self.sigmoid_layers[-1].output, # n_in=hidden_layers_sizes[-1], # n_out=n_outs) # self.params.extend(self.logLayer.params) # compute the cost for second phase of training, defined as the # negative log likelihood of the logistic regression (output) layer self.finetune_cost = self.topLayer.negative_log_likelihood(self.y) # compute the gradients with respect to the model parameters # symbolic variable that points to the number of errors made on the # minibatch given by self.x and self.y self.errors = self.topLayer.errors(self.y) def pretraining_functions(self, train_set_x, batch_size, k): """Generates a list of functions, for performing one step of gradient descent at a given layer. The function will require as input the minibatch index, and to train an RBM you just need to iterate, calling the corresponding function on all minibatch indexes. :type train_set_x: theano.tensor.TensorType :param train_set_x: Shared var. that contains all datapoints used for training the RBM :type batch_size: int :param batch_size: size of a [mini]batch :param k: number of Gibbs steps to do in CD-k / PCD-k """ # index to a [mini]batch index = T.lscalar('index') # index to a minibatch learning_rate = T.scalar('lr') # learning rate to use # begining of a batch, given `index` batch_begin = index * batch_size # ending of a batch given `index` batch_end = batch_begin + batch_size pretrain_fns = [] for rbm in self.rbm_layers: # get the cost and the updates list # using CD-k here (persisent=None) for training each RBM. cost, updates = rbm.contrastive_divergence(learning_rate, persistent=None, k=k) # compile the theano function fn = theano.function( inputs=[index, theano.In(learning_rate, value=0.1)], outputs=cost, updates=updates, givens={ self.x: train_set_x[batch_begin:batch_end] } ) # append `fn` to the list of functions pretrain_fns.append(fn) return pretrain_fns def build_finetune_function(self, train_x, train_y, batch_size, learning_rate): """Generates a function `train` that implements one step of finetuning, a function `validate` that computes the error on a batch from the validation set, and a function `test` that computes the error on a batch from the testing set :param train_x: train dataset, theano.tensor.TensorType :param train_y: labels :type batch_size: int :param batch_size: size of a minibatch :type learning_rate: float :param learning_rate: learning rate used during finetune stage """ index = T.lscalar('index') # index to a [mini]batch # compute the gradients with respect to the model parameters gparams = T.grad(self.finetune_cost, self.params) # compute list of fine-tuning updates updates = [] for param, gparam in zip(self.params, gparams): updates.append((param, param - gparam * learning_rate)) train_fn = theano.function( inputs=[index], outputs=self.finetune_cost, updates=updates, givens={ self.x: train_x[index * batch_size: (index + 1) * batch_size], self.y: train_y[index * batch_size: (index + 1) * batch_size] } ) return train_fn def build_finetune_functions(self, datasets, batch_size, learning_rate): '''Generates a function `train` that implements one step of finetuning, a function `validate` that computes the error on a batch from the validation set, and a function `test` that computes the error on a batch from the testing set :type datasets: list of pairs of theano.tensor.TensorType :param datasets: It is a list that contain all the datasets; the has to contain three pairs, `train`, `valid`, `test` in this order, where each pair is formed of two Theano variables, one for the datapoints, the other for the labels :type batch_size: int :param batch_size: size of a minibatch :type learning_rate: float :param learning_rate: learning rate used during finetune stage ''' (train_set_x, train_set_y) = datasets[0] (valid_set_x, valid_set_y) = datasets[1] (test_set_x, test_set_y) = datasets[2] # compute number of minibatches for training, validation and testing n_valid_batches = valid_set_x.get_value(borrow=True).shape[0] n_valid_batches /= batch_size n_test_batches = test_set_x.get_value(borrow=True).shape[0] n_test_batches /= batch_size index = T.lscalar('index') # index to a [mini]batch # compute the gradients with respect to the model parameters gparams = T.grad(self.finetune_cost, self.params) # compute list of fine-tuning updates updates = [] for param, gparam in zip(self.params, gparams): updates.append((param, param - gparam * learning_rate)) train_fn = theano.function( inputs=[index], outputs=self.finetune_cost, updates=updates, givens={ self.x: train_set_x[ index * batch_size: (index + 1) * batch_size ], self.y: train_set_y[ index * batch_size: (index + 1) * batch_size ] } ) test_score_i = theano.function( [index], self.errors, givens={ self.x: test_set_x[ index * batch_size: (index + 1) * batch_size ], self.y: test_set_y[ index * batch_size: (index + 1) * batch_size ] } ) valid_score_i = theano.function( [index], self.errors, givens={ self.x: valid_set_x[ index * batch_size: (index + 1) * batch_size ], self.y: valid_set_y[ index * batch_size: (index + 1) * batch_size ] } ) # Create a function that scans the entire validation set def valid_score(): return [valid_score_i(i) for i in range(n_valid_batches)] # Create a function that scans the entire test set def test_score(): return [test_score_i(i) for i in range(n_test_batches)] return train_fn, valid_score, test_score def train(self, X, y, finetune_lr=1e-11, pretraining_epochs=0, pretrain_lr=0.01, k=1, training_epochs=5, batch_size=1000, verbose=False): """ Train method. :param verbose: verbosity level :param X: data :param y: labels :type finetune_lr: float :param finetune_lr: learning rate used in the finetune stage :type pretraining_epochs: int :param pretraining_epochs: number of epoch to do pretraining :type pretrain_lr: float :param pretrain_lr: learning rate to be used during pre-training :type k: int :param k: number of Gibbs steps in CD/PCD :type training_epochs: int :param training_epochs: maximal number of iterations ot run the optimizer :type batch_size: int :param batch_size: the size of a minibatch """ train_x, train_y = shared_dataset((X, y)) print "Train set shape:", train_x.get_value(borrow=True).shape n_train_batches = train_x.get_value(borrow=True).shape[0] / batch_size print '... building the model' ######################### # PRETRAINING THE MODEL # ######################### print '... getting the pretraining functions' pretraining_fns = self.pretraining_functions(train_set_x=train_x, batch_size=batch_size, k=k) print '... pre-training the model' start_time = timeit.default_timer() # Pre-train layer-wise for i in range(self.n_layers): # go through pretraining epochs for epoch in range(pretraining_epochs): # go through the training set c = [] for batch_index in range(n_train_batches): c.append(pretraining_fns[i](index=batch_index, lr=pretrain_lr)) print 'Pre-training layer %i, epoch %d, cost ' % (i, epoch), print numpy.mean(c) end_time = timeit.default_timer() # end-snippet-2 print >> sys.stderr, ('The pretraining code for file ' + os.path.split(__file__)[1] + ' ran for %.2fm' % ( (end_time - start_time) / 60.)) ######################## # FINETUNING THE MODEL # ######################## # get the training function for the model print '... getting the finetuning function' train_fn = self.build_finetune_function( train_x=train_x, train_y=train_y, batch_size=batch_size, learning_rate=finetune_lr ) print '... finetuning the model' epoch = 0 while epoch < training_epochs: epoch += 1 for minibatch_index in xrange(n_train_batches): minibatch_avg_cost = train_fn(minibatch_index) if verbose and epoch % verbose == 0: print "Epoch {0}, cost: {1}".format(epoch, minibatch_avg_cost) return self def predict(self, X): predict_fn = theano.function(inputs=[self.sigmoid_layers[0].input], outputs=self.topLayer.y_pred) return predict_fn(X) def predict_proba(self, X): predict_fn = theano.function(inputs=[self.sigmoid_layers[0].input], outputs=self.topLayer.p_y_given_x) return predict_fn(X)