def main(num_epochs=500): # Load the dataset print("Loading data...") #X_train, y_train, X_val, y_val, X_test, y_test= pull_data() trainX, trainY, valX, valY, testX, testY = pull_data() trainX = normalize(trainX.reshape(trainX.shape[0],1, DIM, DIM)) valX = normalize(valX.reshape(valX.shape[0],1, DIM, DIM)) testX = normalize(testX.reshape(testX.shape[0],1, DIM, DIM)) trainY = trainY - 1 valY = valY - 1 testY = testY - 1 trainX, trainY = shuffle(trainX, trainY) valX, valY = shuffle(valX, valY) testX, testY = shuffle(testX, testY) # Prepare Theano variables for inputs and targets input_var = T.tensor4('inputs') output_var = T.ivector('targets') model = build_cnn(input_var) print "[X] CNN defining its goals." model_params = lasagne.layers.get_all_params(model, trainable=True) sh_lr = theano.shared(lasagne.utils.floatX(LEARNING_RATE)) #why do we want to compute output expressions for model and input_var??? noisy_output = lasagne.layers.get_output(model, input_var, deterministic=False) true_output = lasagne.layers.get_output(model, input_var, deterministic=True) noisy_prediction = T.argmax(noisy_output, 1) true_prediction = T.argmax(true_output, 1) l2_loss = regularize_layer_params(model, l2)*L2_REG ## Loss expression noisy_cost = T.mean(T.nnet.categorical_crossentropy(noisy_output, output_var)) + l2_loss true_cost = T.mean(T.nnet.categorical_crossentropy(true_output, output_var)) + l2_loss ## error values noisy_error = 1.0 - T.mean(lasagne.objectives.categorical_accuracy(noisy_output, output_var)) true_error = 1.0 - T.mean(lasagne.objectives.categorical_accuracy(true_output, output_var)) ## stochastic gradient descent updates #updates = lasagne.updates.sgd(noisy_cost, model_params, learning_rate=sh_lr) ##stochastic gradient descent with Nesterov momentum updates = lasagne.updates.nesterov_momentum( noisy_cost, model_params, learning_rate=sh_lr, momentum=0.99) train = theano.function([input_var,output_var], [noisy_cost, noisy_error], updates=updates, allow_input_downcast=True) get_score = theano.function([input_var,output_var], [true_cost, true_error], allow_input_downcast=True) best_validation_cost = np.inf best_iter = 0 n_train_batches = int(np.ceil(trainX.shape[0] / float(BATCH_SIZE))) plot_iters = [] plot_train_cost = [] plot_train_error = [] plot_valid_cost = [] plot_valid_error = [] plot_test_cost = [] plot_test_error = [] epoch = 0 print "[X] CNN begins its training." try: while True: epoch = epoch + 1 for minibatch_index in xrange(n_train_batches): iter = (epoch - 1) * n_train_batches + minibatch_index if iter % 100 == 0: print "[O] Training at iteration %d." % iter cost_ij = train(trainX[minibatch_index*BATCH_SIZE:np.minimum((minibatch_index+1)*BATCH_SIZE, trainX.shape[0])], trainY[minibatch_index*BATCH_SIZE:np.minimum((minibatch_index+1)*BATCH_SIZE, trainY.shape[0])]) if (iter+1) % VALIDATION_FREQUENCY == 0: train_cost, train_error = get_score(trainX, trainY) valid_cost, valid_error = get_score(valX, valY) test_cost, test_error = get_score(testX, testY) plot_train_cost.append(train_cost) plot_train_error.append(train_error) plot_valid_cost.append(valid_cost) plot_valid_error.append(valid_error) plot_test_cost.append(test_cost) plot_test_error.append(test_error) plot_iters.append(iter) ## plotting functions if not os.path.exists(FIGURE_SAVE_DIR): os.makedirs(FIGURE_SAVE_DIR) plot_curves(plot_iters, plot_train_cost, plot_valid_cost, 'Training Cost', 'Validation Cost', 'train_val_cost.pdf') plot_curves(plot_iters, plot_train_error, plot_valid_error, 'Training Error', 'Validation Error', 'train_val_error.pdf') #plot_cm(train_pred, trainY, 'Confusion Matrix on the Training Set', 'cm_train.pdf') #plot_cm(valid_pred, valY, 'Confusion Matrix on the Validation Set', 'cm_valid.pdf') #plot_cm(test_pred, testY, 'Confusion Matrix on the Test Set', 'cm_test.pdf') print "--> Epoch %i, minibatch %i/%i has training true cost \t %f." % (epoch, minibatch_index+1, n_train_batches, train_cost) print "--> Epoch %i, minibatch %i/%i has validation true cost \t %f and error of \t %f %%." % (epoch, minibatch_index+1, n_train_batches, valid_cost, valid_error) if valid_cost < best_validation_cost: print "----> New best score found!" print "--> Test cost of %f and test error of %f." % (test_cost, test_error) if not os.path.exists(PARAM_SAVE_DIR): os.makedirs(PARAM_SAVE_DIR) for f in glob.glob(PARAM_SAVE_DIR+'/*'): os.remove(f) all_param_values = lasagne.layers.get_all_param_values(model) joblib.dump(all_param_values, os.path.join(PARAM_SAVE_DIR, 'params.pkl')) print "----> Parameters saved." best_validation_cost = valid_cost best_iter = iter except KeyboardInterrupt: pass end_time = timeit.default_timer() print "--> Best validation score of %f." % best_validation_cost print "--> Total runtime %.2f minutes." % ((end_time-start_time) / 60.) print "[X] Saving the scores." joblib.dump(plot_iters, os.path.join(PARAM_SAVE_DIR, "iters.pkl")) joblib.dump(plot_train_cost, os.path.join(PARAM_SAVE_DIR, "train_cost.pkl")) joblib.dump(plot_train_error, os.path.join(PARAM_SAVE_DIR, "train_error.pkl")) joblib.dump(plot_valid_cost, os.path.join(PARAM_SAVE_DIR, "valid_cost.pkl")) joblib.dump(plot_valid_error, os.path.join(PARAM_SAVE_DIR, "valid_error.pkl")) joblib.dump(plot_test_cost, os.path.join(PARAM_SAVE_DIR, "test_cost.pkl")) joblib.dump(plot_test_error, os.path.join(PARAM_SAVE_DIR, "test_error.pkl"))
PARAM_SAVE_DIR = "./params" # Data params: IMAGE_WIDTH = 41 IMAGE_HEIGHT = 41 N_CLASSES = 15 # Conv_net params: NUM_EPOCHS = 8000 LEARNING_RATE = 0.005 if __name__ == "__main__": trainX, trainY, valX, valY, testX, testY = pull_data() print "Building the conv net..." layer0 = lasagne.layers.InputLayer(shape=(None, 1, IMAGE_WIDTH, IMAGE_HEIGHT)) layer1 = lasagne.layers.Conv2DLayer(layer0, num_filters=16, filter_size=(8, 8)) layer2 = lasagne.layers.MaxPool2DLayer(layer1, pool_size=(2, 2)) layer3 = lasagne.layers.Conv2DLayer(layer2, num_filters=48, filter_size=(5, 5)) layer4 = lasagne.layers.MaxPool2DLayer(layer3, pool_size=(2, 2)) layer5 = lasagne.layers.Conv2DLayer(layer4, num_filters=60, filter_size=(2, 2)) layer6 = lasagne.layers.MaxPool2DLayer(layer5, pool_size=(2, 2)) layer7 = lasagne.layers.DenseLayer(layer6, num_units=256, W=lasagne.init.GlorotUniform()) layer8 = lasagne.layers.DenseLayer( layer7, num_units=N_CLASSES, nonlinearity=lasagne.nonlinearities.softmax, W=lasagne.init.GlorotUniform() ) model = layer8