예제 #1
0
def parameter_tuning(grid_search: bool = True):
    # We need to define the parameter grid in which we will exhaustively search for the best combination
    # In order to do so, we need to understand what the available hyperparameters SVC() has.
    clf = SVC()
    images, labels = utils.load_image_data()
    images = preprocess.preprocess(images)

    start = time.time()
    if grid_search:
        search_cv = get_grid_search_cv(clf)
    else:
        # TODO: implement the `get_randomized_search_cv()` functions above for this part
        pass

    search_cv.fit(images, labels)

    logger.log_info(
        "GridSearchCV took %.2f seconds for %d candidate parameter settings." %
        (time.time() - start, len(search_cv.cv_results_['params'])))
    report(search_cv.cv_results_)

    # By default, sklearn will refit the model with the best parameters on the entire dataset
    # This refitted model is accessible by calling `grid_search.best_estimator_`
    best_clf = search_cv.best_estimator_
    best_score = search_cv.best_score_

    logger.log_info('Best clf (%.3f validation score):' % best_score)
    logger.log_info(best_clf)

    # let's save this best classifier for later use
    model_path = os.path.join(config.MODELS_SUBDIR,
                              f'svm_grid_search={grid_search}.clf')
    logger.log_info(f'Saving fitted model to {model_path}')
    utils.save_binary(best_clf, model_path)
예제 #2
0
def full_train():
    # Load images data
    logger.log_info('>>>>>>>>>>>>>>>>Run svm.py')
    images, labels = utils.load_image_data()

    # Separate
    images_train, images_test, y_train, y_test = utils.prepare_data(X=images, y=labels)

    # Preprocess training and test messages
    prev_time = time.time()
    logger.log_info("Start transforming data...")

    X_train, X_test = preprocess.process_data(images_train, images_test)
    logger.log_info(f'Finished transforming data in {round(time.time() - prev_time, 2)} seconds')

    # Fit a linear SVM classifier with all hyperparameters set to their default values
    prev_time = time.time()
    logger.log_info("Start fitting SVM classifier...")
    clf = SVC(kernel='linear')

    clf.fit(X_train, y_train)
    logger.log_info(f'Finished training in {round(time.time() - prev_time, 2)} seconds')
    predictions = clf.predict(X_test)

    utils.evaluate_prediction(predictions=predictions, y_test=y_test)

    utils.error_analysis(images_test, predictions, y_test)
    logger.log_info('>>>>>>>>>>>>>>>>End of svm.py')
예제 #3
0
def predict_images(grid_search: bool = True):
    images, labels = utils.load_image_data()
    images_train, images_test, y_train, y_test = utils.prepare_data(X=images,
                                                                    y=labels)

    _, X_test = preprocess.process_data(images_train, images_test)

    # assume the model is already trained and saved using the parameter_tuning() step above
    model_path = os.path.join(config.MODELS_SUBDIR,
                              f'svm_grid_search={grid_search}.clf')
    clf = utils.load_binary(model_path)

    # tp see how our best model generalizes to unseen data
    # let's corrupt the test images by some noise such that they are not exactly the same as the ones
    # the model has seen already
    noise_intensity = 2
    noise = numpy.random.normal(0, 1, X_test.shape) * noise_intensity
    X_test = np.clip(X_test + noise, 0, 255)
    predictions = clf.predict(X_test)

    utils.evaluate_prediction(predictions, y_test)
def cross_validation():
    # Load images data
    images, labels = utils.load_image_data()

    # Separate
    fold_data = utils.prepare_cross_validation_data(X=images, y=labels)

    logger.log_info('>>>>>>>>>>>>>>>>Run svm_cross_validation.py')
    accuracies = []

    for (i, (images_train, images_test, y_train,
             y_test)) in enumerate(fold_data):
        logger.log_info(f'Training and evaluating fold {i+1}...')

        X_train, X_test = preprocess.process_data(images_train, images_test)

        # Fit a linear SVM classifier with all hyperparameters set to their default values
        prev_time = time.time()

        logger.log_info("Start fitting SVM classifier...")
        clf = SVC(kernel='linear')

        clf.fit(X_train, y_train)
        logger.log_info(
            f'Finished training in {round(time.time() - prev_time, 2)} seconds'
        )

        predictions = clf.predict(X_test)

        acc = utils.evaluate_prediction(predictions=predictions, y_test=y_test)
        accuracies.append(acc)

    logger.log_info(f'mean accuracy: %.3f, min: %.3f, max: %.3f, std: %.3f' %
                    (np.mean(accuracies), np.min(accuracies),
                     np.max(accuracies), np.std(accuracies)))

    logger.log_info('>>>>>>>>>>>>>>>>End of svm_cross_validation.py')