Ejemplo n.º 1
0
def process(training_file, test_file, check, draw):
    # Load training data.
    with open(training_file) as f:
        class_1 = pickle.load(f)
        class_2 = pickle.load(f)
        labels = pickle.load(f)
    model = knn.KnnClassifier(labels, vstack((class_1, class_2)))

    # Load test data.
    with open(test_file) as f:
        class_1 = pickle.load(f)
        class_2 = pickle.load(f)
        labels = pickle.load(f)

    if check:
        n = class_1.shape[0]
        n_correct = 0
        for i in range(n):
            if model.classify(class_1[i]) == labels[i]: n_correct += 1
            if model.classify(class_2[i]) == labels[n + i]: n_correct += 1
        print 'percent correct:', 100 * n_correct / float(2 * n)

    if draw:

        def classify(x, y, model=model):
            return array([model.classify([xx, yy]) for (xx, yy) in zip(x, y)])

        imtools.plot_2d_boundary([-6, 6, -6, 6], [class_1, class_2], classify,
                                 [1, -1])
        show()
def readCommand(argv):
    "Processes the command used to run from the command line."
    from optparse import OptionParser
    parser = OptionParser(USAGE_STRING)

    parser.add_option('-c',
                      '--classifier',
                      help=default('The type of classifier'),
                      choices=[
                          'mostFrequent', 'nb', 'naiveBayes', 'perceptron',
                          'mira', 'minicontest', 'knn', 'neuralNetwork'
                      ],
                      default='mostFrequent')
    parser.add_option('-d',
                      '--data',
                      help=default('Dataset to use'),
                      choices=['digits', 'faces'],
                      default='digits')
    parser.add_option('-t',
                      '--training',
                      help=default('The size of the training set'),
                      default=100,
                      type="int")
    parser.add_option('-f',
                      '--features',
                      help=default('Whether to use enhanced features'),
                      default=False,
                      action="store_true")
    parser.add_option('-o',
                      '--odds',
                      help=default('Whether to compute odds ratios'),
                      default=False,
                      action="store_true")
    parser.add_option('-1',
                      '--label1',
                      help=default("First label in an odds ratio comparison"),
                      default=0,
                      type="int")
    parser.add_option('-2',
                      '--label2',
                      help=default("Second label in an odds ratio comparison"),
                      default=1,
                      type="int")
    parser.add_option('-w',
                      '--weights',
                      help=default('Whether to print weights'),
                      default=False,
                      action="store_true")
    parser.add_option(
        '-k',
        '--smoothing',
        help=default("Smoothing parameter (ignored when using --autotune)"),
        type="float",
        default=2.0)
    parser.add_option(
        '-a',
        '--autotune',
        help=default("Whether to automatically tune hyperparameters"),
        default=False,
        action="store_true")
    parser.add_option('-s',
                      '--test',
                      help=default("Amount of test data to use"),
                      default=TEST_SET_SIZE,
                      type="int")
    parser.add_option('-i',
                      '--iterations',
                      help=default("Maximum iterations to run training"),
                      default=3,
                      type="int")
    parser.add_option('-n',
                      '--neighbors',
                      help=default("Amount of neighbors to use in KNN"),
                      default=3,
                      type="int")
    options, otherjunk = parser.parse_args(argv)
    args = {}
    if len(otherjunk) != 0:
        raise Exception('Command line input not understood: ' + str(otherjunk))
    # Set up variables according to the command line input.

    print("Doing classification")
    print("--------------------")
    print("data:\t\t") + options.data
    print("classifier:\t\t") + options.classifier
    if not options.classifier == 'minicontest':
        print("using enhanced features?:\t") + str(options.features)
    else:
        print("using minicontest feature extractor")
    print("training set size:\t") + str(options.training)

    if (options.data == "digits"):
        printImage = ImagePrinter(DIGIT_DATUM_WIDTH,
                                  DIGIT_DATUM_HEIGHT).printImage
        if (options.features):
            featureFunction = enhancedFeatureExtractorDigit
        else:
            featureFunction = basicFeatureExtractorDigit
        if (options.classifier == 'minicontest'):
            featureFunction = contestFeatureExtractorDigit
    elif (options.data == "faces"):
        printImage = ImagePrinter(FACE_DATUM_WIDTH,
                                  FACE_DATUM_HEIGHT).printImage
        if (options.features):
            featureFunction = enhancedFeatureExtractorFace
        else:
            featureFunction = basicFeatureExtractorFace
    else:
        print("Unknown dataset"), options.data
        print(USAGE_STRING)
        sys.exit(2)

    if (options.data == "digits"):
        legalLabels = range(10)
    else:
        legalLabels = range(2)

    if options.training <= 0:
        print(
            "Training set size should be a positive integer (you provided: %d)"
        ) % options.training
        print(USAGE_STRING)
        sys.exit(2)

    if options.smoothing <= 0:
        print(
            "Please provide a positive number for smoothing (you provided: %f)"
        ) % options.smoothing
        print(USAGE_STRING)
        sys.exit(2)

    if options.odds:
        if options.label1 not in legalLabels or options.label2 not in legalLabels:
            print("Didn't provide a legal labels for the odds ratio: (%d,%d)"
                  ) % (options.label1, options.label2)
            print(USAGE_STRING)
            sys.exit(2)

    if (options.classifier == "mostFrequent"):
        classifier = mostFrequent.MostFrequentClassifier(legalLabels)
    elif (options.classifier == "naiveBayes" or options.classifier == "nb"):
        classifier = naiveBayes.NaiveBayesClassifier(legalLabels)
        classifier.setSmoothing(options.smoothing)
        if (options.autotune):
            print("using automatic tuning for naivebayes")
            classifier.automaticTuning = True
        else:
            print("using smoothing parameter k=%f for naivebayes"
                  ) % options.smoothing
    elif (options.classifier == "perceptron"):
        classifier = perceptron.PerceptronClassifier(legalLabels,
                                                     options.iterations)
    elif (options.classifier == "mira"):
        classifier = mira.MiraClassifier(legalLabels, options.iterations)
        if (options.autotune):
            print("using automatic tuning for MIRA")
            classifier.automaticTuning = True
        else:
            print("using default C=0.001 for MIRA")
    elif (options.classifier == 'minicontest'):
        import minicontest
        classifier = minicontest.contestClassifier(legalLabels)
    elif (options.classifier == 'knn'):
        import knn
        classifier = knn.KnnClassifier(legalLabels, options.neighbors)
    elif (options.classifier == 'neuralNetwork'):
        import neuralNetwork
        if (options.data == "faces"):
            #inputNum, hiddenNum, outputNum, dataNum, lamda
            classifier = neuralNetwork.NeuralNetworkClassifier(
                legalLabels, FACE_DATUM_WIDTH * FACE_DATUM_HEIGHT, 500, 1,
                options.training, 0.03)
        else:
            classifier = neuralNetwork.NeuralNetworkClassifier(
                legalLabels, DIGIT_DATUM_WIDTH * DIGIT_DATUM_HEIGHT, 50, 10,
                options.training, 3.5)

    else:
        print(USAGE_STRING)
        print("Unknown classifier:"), options.classifier

        sys.exit(2)

    args['classifier'] = classifier
    args['featureFunction'] = featureFunction
    args['printImage'] = printImage

    return args, options
Ejemplo n.º 3
0

features, labels = read_gesture_feature_labels('out_hands/train')
test_features, test_labels = read_gesture_feature_labels('out_hands/test')

classnames = numpy.unique(labels)

# Reduce input dimensions.
V, S, m = pca.pca(features)
V = V[:50]  # Keep most important features.
features = numpy.array([numpy.dot(V, f - m) for f in features])
test_features = numpy.array([numpy.dot(V, f - m) for f in test_features])

# Test kNN.
k = 1
knn_classifier = knn.KnnClassifier(labels, features)

res = numpy.array([knn_classifier.classify(feat, k) for feat in test_features])
acc = numpy.sum(1.0 * (res == test_labels)) / len(test_labels)
print 'kNN Accuracy:', acc
print_confusion(res, test_labels, classnames)

# Test Bayes.
bc = bayes.BayesClassifier()
blist = [features[numpy.where(labels == c)[0]] for c in classnames]
bc.train(blist, classnames)

res = bc.classify(test_features)[0]
acc = numpy.sum(1.0 * (res == test_labels)) / len(test_labels)
print 'Bayes Accuracy:', acc
print_confusion(res, test_labels, classnames)
@author: user
"""
import sys
sys.path.append('../ch1/')
import pickle
import knn
import imtools
from pylab import *
from numpy import *
# load 2D points using Pickle
with open('points_normal.pkl', 'r') as f:
    class_1 = pickle.load(f)
    class_2 = pickle.load(f)
    labels = pickle.load(f)
model = knn.KnnClassifier(labels, vstack((class_1, class_2)))
# load test data using Pickle

with open('points_normal_test.pkl', 'r') as f:
    class_1 = pickle.load(f)
    class_2 = pickle.load(f)
    labels = pickle.load(f)

# test on the first point
print model.classify(class_1[0])


# define function for plotting
def classify(x, y, model=model):
    return array([model.classify([xx, yy]) for (xx, yy) in zip(x, y)])