def process(training_file, test_file, check, draw): # Load training data. with open(training_file) as f: class_1 = pickle.load(f) class_2 = pickle.load(f) labels = pickle.load(f) model = knn.KnnClassifier(labels, vstack((class_1, class_2))) # Load test data. with open(test_file) as f: class_1 = pickle.load(f) class_2 = pickle.load(f) labels = pickle.load(f) if check: n = class_1.shape[0] n_correct = 0 for i in range(n): if model.classify(class_1[i]) == labels[i]: n_correct += 1 if model.classify(class_2[i]) == labels[n + i]: n_correct += 1 print 'percent correct:', 100 * n_correct / float(2 * n) if draw: def classify(x, y, model=model): return array([model.classify([xx, yy]) for (xx, yy) in zip(x, y)]) imtools.plot_2d_boundary([-6, 6, -6, 6], [class_1, class_2], classify, [1, -1]) show()
def readCommand(argv): "Processes the command used to run from the command line." from optparse import OptionParser parser = OptionParser(USAGE_STRING) parser.add_option('-c', '--classifier', help=default('The type of classifier'), choices=[ 'mostFrequent', 'nb', 'naiveBayes', 'perceptron', 'mira', 'minicontest', 'knn', 'neuralNetwork' ], default='mostFrequent') parser.add_option('-d', '--data', help=default('Dataset to use'), choices=['digits', 'faces'], default='digits') parser.add_option('-t', '--training', help=default('The size of the training set'), default=100, type="int") parser.add_option('-f', '--features', help=default('Whether to use enhanced features'), default=False, action="store_true") parser.add_option('-o', '--odds', help=default('Whether to compute odds ratios'), default=False, action="store_true") parser.add_option('-1', '--label1', help=default("First label in an odds ratio comparison"), default=0, type="int") parser.add_option('-2', '--label2', help=default("Second label in an odds ratio comparison"), default=1, type="int") parser.add_option('-w', '--weights', help=default('Whether to print weights'), default=False, action="store_true") parser.add_option( '-k', '--smoothing', help=default("Smoothing parameter (ignored when using --autotune)"), type="float", default=2.0) parser.add_option( '-a', '--autotune', help=default("Whether to automatically tune hyperparameters"), default=False, action="store_true") parser.add_option('-s', '--test', help=default("Amount of test data to use"), default=TEST_SET_SIZE, type="int") parser.add_option('-i', '--iterations', help=default("Maximum iterations to run training"), default=3, type="int") parser.add_option('-n', '--neighbors', help=default("Amount of neighbors to use in KNN"), default=3, type="int") options, otherjunk = parser.parse_args(argv) args = {} if len(otherjunk) != 0: raise Exception('Command line input not understood: ' + str(otherjunk)) # Set up variables according to the command line input. print("Doing classification") print("--------------------") print("data:\t\t") + options.data print("classifier:\t\t") + options.classifier if not options.classifier == 'minicontest': print("using enhanced features?:\t") + str(options.features) else: print("using minicontest feature extractor") print("training set size:\t") + str(options.training) if (options.data == "digits"): printImage = ImagePrinter(DIGIT_DATUM_WIDTH, DIGIT_DATUM_HEIGHT).printImage if (options.features): featureFunction = enhancedFeatureExtractorDigit else: featureFunction = basicFeatureExtractorDigit if (options.classifier == 'minicontest'): featureFunction = contestFeatureExtractorDigit elif (options.data == "faces"): printImage = ImagePrinter(FACE_DATUM_WIDTH, FACE_DATUM_HEIGHT).printImage if (options.features): featureFunction = enhancedFeatureExtractorFace else: featureFunction = basicFeatureExtractorFace else: print("Unknown dataset"), options.data print(USAGE_STRING) sys.exit(2) if (options.data == "digits"): legalLabels = range(10) else: legalLabels = range(2) if options.training <= 0: print( "Training set size should be a positive integer (you provided: %d)" ) % options.training print(USAGE_STRING) sys.exit(2) if options.smoothing <= 0: print( "Please provide a positive number for smoothing (you provided: %f)" ) % options.smoothing print(USAGE_STRING) sys.exit(2) if options.odds: if options.label1 not in legalLabels or options.label2 not in legalLabels: print("Didn't provide a legal labels for the odds ratio: (%d,%d)" ) % (options.label1, options.label2) print(USAGE_STRING) sys.exit(2) if (options.classifier == "mostFrequent"): classifier = mostFrequent.MostFrequentClassifier(legalLabels) elif (options.classifier == "naiveBayes" or options.classifier == "nb"): classifier = naiveBayes.NaiveBayesClassifier(legalLabels) classifier.setSmoothing(options.smoothing) if (options.autotune): print("using automatic tuning for naivebayes") classifier.automaticTuning = True else: print("using smoothing parameter k=%f for naivebayes" ) % options.smoothing elif (options.classifier == "perceptron"): classifier = perceptron.PerceptronClassifier(legalLabels, options.iterations) elif (options.classifier == "mira"): classifier = mira.MiraClassifier(legalLabels, options.iterations) if (options.autotune): print("using automatic tuning for MIRA") classifier.automaticTuning = True else: print("using default C=0.001 for MIRA") elif (options.classifier == 'minicontest'): import minicontest classifier = minicontest.contestClassifier(legalLabels) elif (options.classifier == 'knn'): import knn classifier = knn.KnnClassifier(legalLabels, options.neighbors) elif (options.classifier == 'neuralNetwork'): import neuralNetwork if (options.data == "faces"): #inputNum, hiddenNum, outputNum, dataNum, lamda classifier = neuralNetwork.NeuralNetworkClassifier( legalLabels, FACE_DATUM_WIDTH * FACE_DATUM_HEIGHT, 500, 1, options.training, 0.03) else: classifier = neuralNetwork.NeuralNetworkClassifier( legalLabels, DIGIT_DATUM_WIDTH * DIGIT_DATUM_HEIGHT, 50, 10, options.training, 3.5) else: print(USAGE_STRING) print("Unknown classifier:"), options.classifier sys.exit(2) args['classifier'] = classifier args['featureFunction'] = featureFunction args['printImage'] = printImage return args, options
features, labels = read_gesture_feature_labels('out_hands/train') test_features, test_labels = read_gesture_feature_labels('out_hands/test') classnames = numpy.unique(labels) # Reduce input dimensions. V, S, m = pca.pca(features) V = V[:50] # Keep most important features. features = numpy.array([numpy.dot(V, f - m) for f in features]) test_features = numpy.array([numpy.dot(V, f - m) for f in test_features]) # Test kNN. k = 1 knn_classifier = knn.KnnClassifier(labels, features) res = numpy.array([knn_classifier.classify(feat, k) for feat in test_features]) acc = numpy.sum(1.0 * (res == test_labels)) / len(test_labels) print 'kNN Accuracy:', acc print_confusion(res, test_labels, classnames) # Test Bayes. bc = bayes.BayesClassifier() blist = [features[numpy.where(labels == c)[0]] for c in classnames] bc.train(blist, classnames) res = bc.classify(test_features)[0] acc = numpy.sum(1.0 * (res == test_labels)) / len(test_labels) print 'Bayes Accuracy:', acc print_confusion(res, test_labels, classnames)
@author: user """ import sys sys.path.append('../ch1/') import pickle import knn import imtools from pylab import * from numpy import * # load 2D points using Pickle with open('points_normal.pkl', 'r') as f: class_1 = pickle.load(f) class_2 = pickle.load(f) labels = pickle.load(f) model = knn.KnnClassifier(labels, vstack((class_1, class_2))) # load test data using Pickle with open('points_normal_test.pkl', 'r') as f: class_1 = pickle.load(f) class_2 = pickle.load(f) labels = pickle.load(f) # test on the first point print model.classify(class_1[0]) # define function for plotting def classify(x, y, model=model): return array([model.classify([xx, yy]) for (xx, yy) in zip(x, y)])