def colour_histogram_with_filters(): """Run evaluation of colour histograms with filters""" rf_model_normal = rf.RF(label='RF with no preprocessing', preprocessing=[], features=[colour_histogram(bins=16)]) rf_model_hsv = rf.RF(label='RF with hsv', preprocessing=[hsv_model], features=[colour_histogram(bins=16)]) svm_model_normal = svm.SVM(label='SVM with no preprocessing', preprocessing=[], features=[colour_histogram(bins=16)]) svm_model_hsv = svm.SVM(label='SVM with hsv', preprocessing=[hsv_model], features=[colour_histogram(bins=16)]) run_training_and_tests( 'system_selection_2_colour_hist_preprocessing', 'kaggle', [rf_model_normal, rf_model_hsv, svm_model_normal, svm_model_hsv], n_iterations=5, n_images=10000, training_split=0.5)
def svm_gamma(): """Run evaluation of different SVM gamma values""" svm_auto = svm.SVM( label = 'SVM with auto-selected gamma', preprocessing = hsv_saturation_threshold, features = [haralick], kernel = 'poly', degree = 3 ) svm_thousandth = svm.SVM( label = 'SVM with 0.001 gamma', preprocessing = hsv_saturation_threshold, features = [haralick], kernel = 'poly', degree = 3, gamma = 0.001 ) svm_hundredth = svm.SVM( label = 'SVM with 0.01 gamma', preprocessing = hsv_saturation_threshold, features = [haralick], kernel = 'poly', degree = 3, gamma = 0.01 ) svm_tenth = svm.SVM( label = 'SVM with 0.1 gamma', preprocessing = hsv_saturation_threshold, features = [haralick], kernel = 'poly', degree = 3, gamma = 0.1 ) svm_one = svm.SVM( label = 'SVM with 1 gamma', preprocessing = hsv_saturation_threshold, features = [haralick], kernel = 'poly', degree = 3, gamma = 1.0 ) run_training_and_tests( 'system_selection_3_svm_gamma', 'kaggle', [ svm_auto, svm_thousandth, svm_hundredth, svm_tenth, svm_one ], n_iterations = 5, n_images = 10000, training_split = 0.5 )
def svm_kernel(): """Run evaluation of different SVM kernels""" svm_rbf = svm.SVM( label = 'SVM with RBF kernel', preprocessing = hsv_saturation_threshold, features = [haralick], kernel = 'rbf' ) svm_p1 = svm.SVM( label = 'SVM with polynomial kernel, degree 1', preprocessing = hsv_saturation_threshold, features = [haralick], kernel = 'poly', degree = 1 ) svm_p2 = svm.SVM( label = 'SVM with polynomial kernel, degree 2', preprocessing = hsv_saturation_threshold, features = [haralick], kernel = 'poly', degree = 2 ) svm_p3 = svm.SVM( label = 'SVM with polynomial kernel, degree 3', preprocessing = hsv_saturation_threshold, features = [haralick], kernel = 'poly', degree = 3 ) svm_p5 = svm.SVM( label = 'SVM with polynomial kernel, degree 5', preprocessing = hsv_saturation_threshold, features = [haralick], kernel = 'poly', degree = 5 ) run_training_and_tests( 'system_selection_3_svm_kernel', 'kaggle', [ svm_rbf, svm_p1, svm_p2, svm_p3, svm_p5 ], n_iterations = 5, n_images = 10000, training_split = 0.5 )
def main(): # Get the arguments args = parse_arguments() # Get the training parameters class_size = args.class_size if args.class_size is not None else DEFAULT_CLASS_SIZE # Get the train and dev feature files train_files = args.train_files dev_files = args.dev_files # Create the list of models to train models = [] if args.svm: models.append(svm.SVM()) if args.mlp: models.append(mlp.MLP(10)) # If there no development files, perform cross-validation if dev_files == None: train_data, train_labels = utils.read_features(train_files) models = train_cross_validation(train_data, train_labels, models, class_size) # Otherwise use the development files else: train_data, train_labels = utils.read_features(train_files) dev_data, dev_labels = utils.read_features(dev_files) train(train_data, train_labels, dev_data, dev_labels)
def run_experiment(): """Run experiment one, testing performance on kaggle data""" optimal_svm = svm.SVM(label='Optimised SVM model', preprocessing=hsv_saturation_threshold, features=[haralick], kernel='poly', degree=3) optimal_rf = rf.RF(label='Optimised RF model', preprocessing=hsv_saturation, features=[greyscale_histogram(bins=64)], n_estimators=100, max_depth=100) run_training_and_tests('experiment_1_NLM_performance', 'kaggle', [optimal_rf, optimal_svm], n_iterations=10, n_training_images=5000, n_test_images=20000)
def main(): # Read the arguments args = parse_arguments() # Get the class size class_size = args.class_size if args.class_size is not None else DEFAULT_CLASS_SIZE # Read the features from the test files test_files = args.test_files # Ensure at least 1 test file is passed in if test_files is None: print 'Error. Please provide testing feature files' exit(1) test_data, test_labels = utils.read_features(test_files) test_data, test_labels, map = utils.partition(test_data, test_labels, class_size) # Read and load the model if args.svm: model = svm.SVM() model.load(args.model) if args.mlp: model = mlp.MLP(10) model.load(args.model) # Ensure a model was created if model is None: print 'Error. Model invalid' exit(1) # Test the model predictions = model.predict(test_data) accuracy = 1.0 * sum([ 1 for label, predict in zip(test_labels, predictions) if label == predict ]) / len(predictions) # Output results print 'Accuracy is: ', accuracy
print("Testing error = %.4f" % te_error) print("Runtime: %s seconds" % (time.time() - start_time)) if question == "svm": with gzip.open(os.path.join('data/mnist.pkl.gz'), 'rb') as f: train_set, valid_set, test_set = pickle.load(f, encoding="latin1") X, y = train_set Xtest, ytest = test_set X -= int(np.mean(X)) X /= int(np.std(X)) binarizer = LabelBinarizer() Y = binarizer.fit_transform(y) start_time = time.time() model = svm.SVM(epochs=20, batchSize=2500) model.fit(X, y) y_pred = model.predict(X) tr_error = np.mean(y_pred != y) y_pred = model.predict(Xtest) te_error = np.mean(y_pred != ytest) print("Training error = %.4f" % tr_error) print("Testing error = %.4f" % te_error) print("Runtime: %s seconds" % (time.time() - start_time)) if question == "mlp": with gzip.open(os.path.join('data/mnist.pkl.gz'), 'rb') as f: train_set, valid_set, test_set = pickle.load(f, encoding="latin1") X, y = train_set Xtest, ytest = test_set
def initial_grey_histogram_bins_evaluation(): """Run evaluation of grey histogram bin sizes""" rf_model_grey2 = rf.RF( label='grey 2 bin RF', preprocessing=[], features=[greyscale_histogram(bins=2)] ) rf_model_grey4 = rf.RF( label='grey 4 bin RF', preprocessing=[], features=[greyscale_histogram(bins=4)] ) rf_model_grey8 = rf.RF( label='grey 8 bin RF', preprocessing=[], features=[greyscale_histogram(bins=8)] ) rf_model_grey16 = rf.RF( label='grey 16 bin RF', preprocessing=[], features=[greyscale_histogram(bins=16)] ) rf_model_grey32 = rf.RF( label='grey 32 bin RF', preprocessing=[], features=[greyscale_histogram(bins=32)] ) rf_model_grey64 = rf.RF( label='grey 64 bin RF', preprocessing=[], features=[greyscale_histogram(bins=64)] ) svm_model_grey2 = svm.SVM( label='grey 2 bin SVM', preprocessing=[], features=[greyscale_histogram(bins=2)] ) svm_model_grey4 = svm.SVM( label='grey 4 bin SVM', preprocessing=[], features=[greyscale_histogram(bins=4)] ) svm_model_grey8 = svm.SVM( label='grey 8 bin SVM', preprocessing=[], features=[greyscale_histogram(bins=8)] ) svm_model_grey16 = svm.SVM( label='grey 16 bin SVM', preprocessing=[], features=[greyscale_histogram(bins=16)] ) svm_model_grey32 = svm.SVM( label='grey 32 bin SVM', preprocessing=[], features=[greyscale_histogram(bins=32)] ) svm_model_grey64 = svm.SVM( label='grey 64 bin SVM', preprocessing=[], features=[greyscale_histogram(bins=64)] ) run_training_and_tests( 'system_selection_1_hist_grey', 'kaggle', [ rf_model_grey2, rf_model_grey4, rf_model_grey8, rf_model_grey16, rf_model_grey32, rf_model_grey64, svm_model_grey2, svm_model_grey4, svm_model_grey8, svm_model_grey16, svm_model_grey32, svm_model_grey64 ], n_iterations=5, n_images=10000, training_split=0.5 )
def hist_hu_moments_haralick_evaluation(): """Run comparative evaluation of hu moments, haralick texture attributes, colour histograms and greyscale histograms """ rf_model_haralick = rf.RF( label='RF with haralick', preprocessing=[], features=[haralick] ) rf_model_hu = rf.RF( label='RF with hu moments', preprocessing=[], features=[hu_moments] ) rf_model_grey_hist = rf.RF( label='RF with grey hist', preprocessing=[], features=[greyscale_histogram(bins=64)] ) rf_model_colour_hist = rf.RF( label='RF with colour hist', preprocessing=[], features=[colour_histogram(bins=16)] ) svm_model_haralick = svm.SVM( label='SVM with haralick', preprocessing=[], features=[haralick] ) svm_model_hu = svm.SVM( label='SVM with hu moments', preprocessing=[], features=[hu_moments] ) svm_model_grey_hist = svm.SVM( label='SVM with grey hist', preprocessing=[], features=[greyscale_histogram(bins=64)] ) svm_model_colour_hist = svm.SVM( label='SVM with colour hist', preprocessing=[], features=[colour_histogram(bins=16)] ) run_training_and_tests( 'system_selection_1_hist_hu_haralick', 'kaggle', [ rf_model_haralick, rf_model_hu, rf_model_grey_hist, rf_model_colour_hist, svm_model_haralick, svm_model_hu, svm_model_grey_hist, svm_model_colour_hist ], n_iterations=5, n_images=10000, training_split=0.5)
def initial_colour_histogram_bins_evaluation(): """Run evaluation of colour histogram bin sizes""" rf_model_colour2 = rf.RF( label='colour 2 bin RF', preprocessing=[], features=[colour_histogram(bins=2)] ) rf_model_colour4 = rf.RF( label='colour 4 bin RF', preprocessing=[], features=[colour_histogram(bins=4)] ) rf_model_colour8 = rf.RF( label='colour 8 bin RF', preprocessing=[], features=[colour_histogram(bins=8)] ) rf_model_colour16 = rf.RF( label='colour 16 bin RF', preprocessing=[], features=[colour_histogram(bins=16)] ) rf_model_colour32 = rf.RF( label='colour 32 bin RF', preprocessing=[], features=[colour_histogram(bins=32)] ) svm_model_colour2 = svm.SVM( label='colour 2 bin SVM', preprocessing=[], features=[colour_histogram(bins=2)] ) svm_model_colour4 = svm.SVM( label='colour 4 bin SVM', preprocessing=[], features=[colour_histogram(bins=4)] ) svm_model_colour8 = svm.SVM( label='colour 8 bin SVM', preprocessing=[], features=[colour_histogram(bins=8)] ) svm_model_colour16 = svm.SVM( label='colour 16 bin SVM', preprocessing=[], features=[colour_histogram(bins=16)] ) run_training_and_tests( 'system_selection_1_hist_colour', 'kaggle', [ rf_model_colour2, rf_model_colour4, rf_model_colour8, rf_model_colour16, rf_model_colour32, svm_model_colour2, svm_model_colour4, svm_model_colour8, svm_model_colour16 ], n_iterations=5, n_images=10000, training_split=0.5 )
def greyscale_histogram_with_filters(): """Run evaluation of greyscale histograms with filters""" rf_model_normal = rf.RF(label='RF with no preprocessing', preprocessing=[], features=[greyscale_histogram(bins=64)]) rf_model_hsv = rf.RF(label='RF with hsv', preprocessing=[hsv_model], features=[greyscale_histogram(bins=64)]) rf_model_hsv_is = rf.RF(label='RF with hsv and isolate saturation', preprocessing=hsv_saturation, features=[greyscale_histogram(bins=64)]) rf_model_hsv_is_thresh = rf.RF( label='RF with hsv, isolate saturation, threshold', preprocessing=hsv_saturation_threshold, features=[greyscale_histogram(bins=64)]) rf_model_hsv_is_c = rf.RF( label='RF with hsv, isolate saturation, contrast', preprocessing=hsv_saturation_contrast, features=[greyscale_histogram(bins=64)]) svm_model_normal = svm.SVM(label='SVM with no preprocessing', preprocessing=[], features=[greyscale_histogram(bins=64)]) svm_model_hsv = svm.SVM(label='SVM with hsv', preprocessing=[hsv_model], features=[greyscale_histogram(bins=64)]) svm_model_hsv_is = svm.SVM(label='SVM with hsv and isolate saturation', preprocessing=hsv_saturation, features=[greyscale_histogram(bins=64)]) svm_model_hsv_is_thresh = svm.SVM( label='SVM with hsv, isolate saturation, threshold', preprocessing=hsv_saturation_threshold, features=[greyscale_histogram(bins=64)]) svm_model_hsv_is_c = svm.SVM( label='SVM with hsv, isolate saturation, contrast', preprocessing=hsv_saturation_contrast, features=[greyscale_histogram(bins=64)]) run_training_and_tests('system_selection_2_grey_hist_preprocessing', 'kaggle', [ rf_model_normal, rf_model_hsv, rf_model_hsv_is, rf_model_hsv_is_c, rf_model_hsv_is_thresh, svm_model_normal, svm_model_hsv, svm_model_hsv_is, svm_model_hsv_is_c, svm_model_hsv_is_thresh, ], n_iterations=5, n_images=10000, training_split=0.5)
def haralick_with_filters(): """Run evaluation of haralick texture attributes with filters""" rf_model_normal = rf.RF(label='RF with no preprocessing', preprocessing=[], features=[haralick]) rf_model_hsv = rf.RF(label='RF with hsv', preprocessing=hsv, features=[haralick]) rf_model_hsv_is = rf.RF(label='RF with hsv and isolate saturation', preprocessing=hsv_saturation, features=[haralick]) rf_model_hsv_is_thresh = rf.RF( label='RF with hsv, isolate saturation, threshold', preprocessing=hsv_saturation_threshold, features=[haralick]) rf_model_hsv_is_c = rf.RF( label='RF with hsv, isolate saturation, contrast', preprocessing=hsv_saturation_contrast, features=[haralick]) svm_model_normal = svm.SVM(label='SVM with no preprocessing', preprocessing=[], features=[haralick]) svm_model_hsv = svm.SVM(label='SVM with hsv', preprocessing=hsv, features=[haralick]) svm_model_hsv_is = svm.SVM(label='SVM with hsv and isolate saturation', preprocessing=hsv_saturation, features=[haralick]) svm_model_hsv_is_thresh = svm.SVM( label='SVM with hsv, isolate saturation, threshold', preprocessing=hsv_saturation_threshold, features=[haralick]) svm_model_hsv_is_c = svm.SVM( label='SVM with hsv, isolate saturation, contrast', preprocessing=hsv_saturation_contrast, features=[haralick]) run_training_and_tests('system_selection_2_haralick_preprocessing', 'kaggle', [ rf_model_normal, rf_model_hsv, rf_model_hsv_is, rf_model_hsv_is_c, rf_model_hsv_is_thresh, svm_model_normal, svm_model_hsv, svm_model_hsv_is, svm_model_hsv_is_c, svm_model_hsv_is_thresh, ], n_iterations=5, n_images=10000, training_split=0.5)