Ejemplo n.º 1
0
def parameter_tuning(grid_search: bool = True):
    # We need to define the parameter grid in which we will exhaustively search for the best combination
    # In order to do so, we need to understand what the available hyperparameters SVC() has.
    clf = SVC()
    images, labels = utils.load_image_data()
    images = preprocess.preprocess(images)

    start = time.time()
    if grid_search:
        search_cv = get_grid_search_cv(clf)
    else:
        # TODO: implement the `get_randomized_search_cv()` functions above for this part
        pass

    search_cv.fit(images, labels)

    logger.log_info(
        "GridSearchCV took %.2f seconds for %d candidate parameter settings." %
        (time.time() - start, len(search_cv.cv_results_['params'])))
    report(search_cv.cv_results_)

    # By default, sklearn will refit the model with the best parameters on the entire dataset
    # This refitted model is accessible by calling `grid_search.best_estimator_`
    best_clf = search_cv.best_estimator_
    best_score = search_cv.best_score_

    logger.log_info('Best clf (%.3f validation score):' % best_score)
    logger.log_info(best_clf)

    # let's save this best classifier for later use
    model_path = os.path.join(config.MODELS_SUBDIR,
                              f'svm_grid_search={grid_search}.clf')
    logger.log_info(f'Saving fitted model to {model_path}')
    utils.save_binary(best_clf, model_path)
Ejemplo n.º 2
0
def error_analysis(images_test, predictions, y_test):
    errors = []
    for (i, image_test) in enumerate(images_test):
        if predictions[i] != y_test[i]:
            errors.append((image_test, predictions[i], y_test[i]))

    logger.log_info(f'Total errors: {len(errors)}')
    # Investigate the first 8 errors we made by showing the images with the label vs. prediction
    for (i, (image_test, pred, label)) in enumerate(errors[:8]):
        plt.subplot(2, 4, i + 1)
        plt.axis('off')
        plt.imshow(image_test, cmap=plt.cm.gray_r, interpolation='gaussian')
        plt.title(f'{pred} (truth: {label})')

    plt.show()
def process_data(images_train: 'np.array',
                 images_test: 'np.array') -> (np.array, np.array):
    """
    Process the training and testing raw images into flattened matrices
    :param images_train: the raw images for training
    :param images_test: the raw images for testing
    :return: X_train, X_test
    """

    logger.log_info(f'Transforming training images...')
    images_train = preprocess(images=images_train)

    logger.log_info(f'\nTransforming test images...')
    images_test = preprocess(images=images_test)

    return images_train, images_test
def preprocess(images: 'np.array') -> 'np.array':
    """
    Input images is a (n_samples, 8, 8) matrix.
    To apply a classifier on this data, we need to flatten the image, i.e.,
    turn the data in a (samples, n_dim) matrix, where n_dim = 8*8

    :param images: a 3D matrix with shape (n_samples, 8, 8)
    :return: a flattened image matrix of shape (n_samples, 64)
    """

    # raw images, as pixels, are already in matrix format.
    # So a simple reshaping operation (to reshape images from 3D matrix to 2D)
    # is sufficient for this simple dataset.

    logger.log_info(f'Shape before preprocessing: {images.shape}')
    n_samples = images.shape[0]
    data = images.reshape((n_samples, -1))

    logger.log_info(f'Shape after preprocessing: {data.shape}')

    return data
Ejemplo n.º 5
0
def evaluate_prediction(predictions, y_test):
    accuracy = accuracy_score(predictions, y_test)
    logger.log_info(f'Accuracy: {round(accuracy, 3)}')

    return accuracy
Ejemplo n.º 6
0
def load_image_data():
    digits = datasets.load_digits()
    logger.log_info(
        f'Loaded {len(digits.images)} images and {len(digits.target)} labels.')

    return digits.images, digits.target
Ejemplo n.º 7
0
import os.path
import time
import numpy as np
import numpy.random
from sklearn.model_selection import GridSearchCV, RandomizedSearchCV
from sklearn.svm import SVC

import svm_tuning.config as config
from svm_tuning.util import preprocess, utils, logger
"""
References: http://scikit-learn.org/stable/auto_examples/model_selection/plot_randomized_search.html#sphx-glr-auto-examples-model-selection-plot-randomized-search-py
"""

logger.log_info('>>>>>>>>>>>>>>>>Run svm_parameter_tuning.py')


def report(results, n_top=3):
    for i in range(1, n_top + 1):
        candidates = np.flatnonzero(results['rank_test_score'] == i)
        for candidate in candidates:
            print("Model with rank: {0}".format(i))
            print("Mean validation score: {0:.3f} (std: {1:.3f})".format(
                results['mean_test_score'][candidate],
                results['std_test_score'][candidate]))
            print("Parameters: {0}".format(results['params'][candidate]))
            print("")


def get_grid_search_cv(clf):
    param_grid = {
        "C": [0.1, 1, 5, 10, 100],
def cross_validation():
    # Load images data
    images, labels = utils.load_image_data()

    # Separate
    fold_data = utils.prepare_cross_validation_data(X=images, y=labels)

    logger.log_info('>>>>>>>>>>>>>>>>Run svm_cross_validation.py')
    accuracies = []

    for (i, (images_train, images_test, y_train,
             y_test)) in enumerate(fold_data):
        logger.log_info(f'Training and evaluating fold {i+1}...')

        X_train, X_test = preprocess.process_data(images_train, images_test)

        # Fit a linear SVM classifier with all hyperparameters set to their default values
        prev_time = time.time()

        logger.log_info("Start fitting SVM classifier...")
        clf = SVC(kernel='linear')

        clf.fit(X_train, y_train)
        logger.log_info(
            f'Finished training in {round(time.time() - prev_time, 2)} seconds'
        )

        predictions = clf.predict(X_test)

        acc = utils.evaluate_prediction(predictions=predictions, y_test=y_test)
        accuracies.append(acc)

    logger.log_info(f'mean accuracy: %.3f, min: %.3f, max: %.3f, std: %.3f' %
                    (np.mean(accuracies), np.min(accuracies),
                     np.max(accuracies), np.std(accuracies)))

    logger.log_info('>>>>>>>>>>>>>>>>End of svm_cross_validation.py')
Ejemplo n.º 9
0
def full_train():
    # Load images data
    logger.log_info('>>>>>>>>>>>>>>>>Run svm.py')
    images, labels = utils.load_image_data()

    # Separate
    images_train, images_test, y_train, y_test = utils.prepare_data(X=images, y=labels)

    # Preprocess training and test messages
    prev_time = time.time()
    logger.log_info("Start transforming data...")

    X_train, X_test = preprocess.process_data(images_train, images_test)
    logger.log_info(f'Finished transforming data in {round(time.time() - prev_time, 2)} seconds')

    # Fit a linear SVM classifier with all hyperparameters set to their default values
    prev_time = time.time()
    logger.log_info("Start fitting SVM classifier...")
    clf = SVC(kernel='linear')

    clf.fit(X_train, y_train)
    logger.log_info(f'Finished training in {round(time.time() - prev_time, 2)} seconds')
    predictions = clf.predict(X_test)

    utils.evaluate_prediction(predictions=predictions, y_test=y_test)

    utils.error_analysis(images_test, predictions, y_test)
    logger.log_info('>>>>>>>>>>>>>>>>End of svm.py')