def parameter_tuning(grid_search: bool = True): # We need to define the parameter grid in which we will exhaustively search for the best combination # In order to do so, we need to understand what the available hyperparameters SVC() has. clf = SVC() images, labels = utils.load_image_data() images = preprocess.preprocess(images) start = time.time() if grid_search: search_cv = get_grid_search_cv(clf) else: # TODO: implement the `get_randomized_search_cv()` functions above for this part pass search_cv.fit(images, labels) logger.log_info( "GridSearchCV took %.2f seconds for %d candidate parameter settings." % (time.time() - start, len(search_cv.cv_results_['params']))) report(search_cv.cv_results_) # By default, sklearn will refit the model with the best parameters on the entire dataset # This refitted model is accessible by calling `grid_search.best_estimator_` best_clf = search_cv.best_estimator_ best_score = search_cv.best_score_ logger.log_info('Best clf (%.3f validation score):' % best_score) logger.log_info(best_clf) # let's save this best classifier for later use model_path = os.path.join(config.MODELS_SUBDIR, f'svm_grid_search={grid_search}.clf') logger.log_info(f'Saving fitted model to {model_path}') utils.save_binary(best_clf, model_path)
def full_train(): # Load images data logger.log_info('>>>>>>>>>>>>>>>>Run svm.py') images, labels = utils.load_image_data() # Separate images_train, images_test, y_train, y_test = utils.prepare_data(X=images, y=labels) # Preprocess training and test messages prev_time = time.time() logger.log_info("Start transforming data...") X_train, X_test = preprocess.process_data(images_train, images_test) logger.log_info(f'Finished transforming data in {round(time.time() - prev_time, 2)} seconds') # Fit a linear SVM classifier with all hyperparameters set to their default values prev_time = time.time() logger.log_info("Start fitting SVM classifier...") clf = SVC(kernel='linear') clf.fit(X_train, y_train) logger.log_info(f'Finished training in {round(time.time() - prev_time, 2)} seconds') predictions = clf.predict(X_test) utils.evaluate_prediction(predictions=predictions, y_test=y_test) utils.error_analysis(images_test, predictions, y_test) logger.log_info('>>>>>>>>>>>>>>>>End of svm.py')
def predict_images(grid_search: bool = True): images, labels = utils.load_image_data() images_train, images_test, y_train, y_test = utils.prepare_data(X=images, y=labels) _, X_test = preprocess.process_data(images_train, images_test) # assume the model is already trained and saved using the parameter_tuning() step above model_path = os.path.join(config.MODELS_SUBDIR, f'svm_grid_search={grid_search}.clf') clf = utils.load_binary(model_path) # tp see how our best model generalizes to unseen data # let's corrupt the test images by some noise such that they are not exactly the same as the ones # the model has seen already noise_intensity = 2 noise = numpy.random.normal(0, 1, X_test.shape) * noise_intensity X_test = np.clip(X_test + noise, 0, 255) predictions = clf.predict(X_test) utils.evaluate_prediction(predictions, y_test)
def cross_validation(): # Load images data images, labels = utils.load_image_data() # Separate fold_data = utils.prepare_cross_validation_data(X=images, y=labels) logger.log_info('>>>>>>>>>>>>>>>>Run svm_cross_validation.py') accuracies = [] for (i, (images_train, images_test, y_train, y_test)) in enumerate(fold_data): logger.log_info(f'Training and evaluating fold {i+1}...') X_train, X_test = preprocess.process_data(images_train, images_test) # Fit a linear SVM classifier with all hyperparameters set to their default values prev_time = time.time() logger.log_info("Start fitting SVM classifier...") clf = SVC(kernel='linear') clf.fit(X_train, y_train) logger.log_info( f'Finished training in {round(time.time() - prev_time, 2)} seconds' ) predictions = clf.predict(X_test) acc = utils.evaluate_prediction(predictions=predictions, y_test=y_test) accuracies.append(acc) logger.log_info(f'mean accuracy: %.3f, min: %.3f, max: %.3f, std: %.3f' % (np.mean(accuracies), np.min(accuracies), np.max(accuracies), np.std(accuracies))) logger.log_info('>>>>>>>>>>>>>>>>End of svm_cross_validation.py')