def testClassic(): m = classifiers.KNNClassifier(tr_images_S, tr_labels_O, lambda x: analysis.mean(x), k=10) m.evaluate(test_images_S, test_labels_O, False) h = classifiers.KNNClassifier(tr_images_O, tr_labels_O, lambda x: analysis.histogram(x), k=10) h.evaluate(test_images_O, test_labels_O, False) hnn = hogNN() hnn.evaluate(test_images_O, test_labels_O, False) mc = classifiers.EnsembleClassifier(confidence_mode = "average") mc.add(hnn, 0) mc.add(m, 1) mc.evaluateMixedImageSets(test_images_O + test_images_S, test_labels_O, False) mc.add(hist(), 0) mc.evaluateMixedImageSets(test_images_O + test_images_S, test_labels_O, False)
def hogNearestNeighbour(): knn = classifiers.KNNClassifier(tr_images_O, tr_labels_O, lambda x: hog(resize(x, imagesize, mode = "constant", anti_aliasing=True), block_norm = "L2-Hys", orientations = 8, pixels_per_cell = (14, 14), cells_per_block = (1, 1), visualize = False, multichannel = True), k=5) return knn
def readCommand( argv ): "Processes the command used to run from the command line." global ClassifierText from optparse import OptionParser parser = OptionParser(USAGE_STRING) parser.add_option('-c', '--classifier', help=default('The type of classifier'), choices=['knn', 'perceptron', 'svm', 'lr', 'best', 'none'], default='none') parser.add_option('-d', '--data', help=default('Dataset to use'), choices=['digits'], default='digits') parser.add_option('-t', '--training', help=default('The size of the training set'), default=TRAIN_SET_SIZE, type="int") parser.add_option('-i', '--iterations', help=default("Maximum iterations to run training"), default=50, type="int") parser.add_option('-s', '--test', help=default("Amount of test data to use"), default=TEST_SET_SIZE, type="int") parser.add_option('-n', '--neighbors', help=default("Number of clusters(for KMeans) or nearest neighbors(for KNN)"), default=5, type="int") parser.add_option('-v', '--visualize', help=default('Whether to visualize some results'), action='store_true') options, otherjunk = parser.parse_args(argv) if len(otherjunk) != 0: raise Exception('Command line input not understood: ' + str(otherjunk)) args = {} # Set up variables according to the command line input. print "Doing classification" print "--------------------" print "data:\t\t" + options.data if options.data == 'digits': print "classifier:\t\t" + options.classifier else: print "classifier:\t\t" + options.data + " classifier" print "training set size:\t" + str(options.training) featureExtractor = BasicFeatureExtractorDigit() legalLabels = range(10) if options.training <= 0: print "Training set size should be a positive integer (you provided: %d)" % options.training print USAGE_STRING sys.exit(2) if options.classifier == "perceptron": classifier = classifiers.PerceptronClassifier(legalLabels, options.iterations) elif options.classifier == "svm": classifier = classifiers.SVMClassifier(legalLabels) elif options.classifier == "knn": classifier = classifiers.KNNClassifier(legalLabels, options.neighbors) elif options.classifier == 'lr': classifier = classifiers.LinearRegressionClassifier(legalLabels) elif options.classifier == 'best': classifier = classifiers.BestClassifier(legalLabels) else: print "Do not use any classifier:", options.classifier classifier = None args['classifier'] = classifier args['featureExtractor'] = featureExtractor return args, options
def q1(grades): print "\n===== Grading Q1 ===== " grades[1] = 0 print "1. checking python dataClassifier.py -d digits -c knn -t 5000 -s 1000 -f basic -n 5" try: legalLabels = range(10) classifier = classifiers.KNNClassifier(legalLabels, 5) accDigits = getAccuracy(digitData, classifier) # our solution: 91.0% accThres = [50, 70, 80, 87] score = [accDigits >= t for t in accThres].count(True) grades[1] = score checkSklearnPackage(grades, 1) except: display("An exception was raised:\n%s" % getExceptionTraceBack())
def hist(): knn = classifiers.KNNClassifier(tr_images_O, tr_labels_O, lambda x: analysis.histogram(x), k=10) return knn
def mean(imagesize = imagesize): knn = classifiers.KNNClassifier(tr_images_O, tr_labels_O, lambda x: analysis.mean(x), k=10) return knn
def readCommand( argv ): "Processes the command used to run from the command line." global ClassifierText from optparse import OptionParser parser = OptionParser(USAGE_STRING) parser.add_option('-c', '--classifier', help=default('The type of classifier'), choices=['knn', 'perceptron', 'svm', 'lr', 'none'], default='none') parser.add_option('-d', '--data', help=default('Dataset to use'), choices=['digits', 'text'], default='digits') parser.add_option('-t', '--training', help=default('The size of the training set'), default=TRAIN_SET_SIZE, type="int") parser.add_option('-f', '--features', help=default('Type of features'), default='basic', choices=['basic', 'pca', 'kmeans']) parser.add_option('-i', '--iterations', help=default("Maximum iterations to run training"), default=50, type="int") parser.add_option('-s', '--test', help=default("Amount of test data to use"), default=TEST_SET_SIZE, type="int") parser.add_option('-n', '--neighbors', help=default("Number of clusters(for KMeans) or nearest neighbors(for KNN)"), default=5, type="int") parser.add_option('-k', '--kernel', help=default("Kernel type"), default='linear', choices=['linear', 'rbf']) parser.add_option('-C', '--C', help=default('Panelty C in SVM'), default=1.0, type='float') parser.add_option('-m', '--dimension', help=default('Dimension reduction'), default=32, type='int') parser.add_option('-v', '--visualize', help=default('Whether to visualize some results'), action='store_true') options, otherjunk = parser.parse_args(argv) if len(otherjunk) != 0: raise Exception('Command line input not understood: ' + str(otherjunk)) args = {} # Set up variables according to the command line input. print "Doing classification" print "--------------------" print "data:\t\t" + options.data if options.data == 'digits': print "classifier:\t\t" + options.classifier else: print "classifier:\t\t" + options.data + " classifier" print "using enhanced features:\t" + str(options.features) print "training set size:\t" + str(options.training) if(options.data=="digits"): if options.features == 'basic': featureExtractor = BasicFeatureExtractorDigit() elif options.features == 'pca': featureExtractor = PCAFeatureExtractorDigit(options.dimension) elif options.features == 'kmeans': featureExtractor = KMeansClusterDigit(options.neighbors, options.iterations) else: featureExtractor = BasicFeatureExtractorDigit() elif(options.data=="text"): from sentimentAnalysis import FeatureExtractorText, ClassifierText featureExtractor = FeatureExtractorText() else: print "Unknown dataset", options.data print USAGE_STRING sys.exit(2) if options.data=="digits": legalLabels = range(10) else: legalLabels = range(5) if options.training <= 0: print "Training set size should be a positive integer (you provided: %d)" % options.training print USAGE_STRING sys.exit(2) if options.classifier == "perceptron": classifier = classifiers.PerceptronClassifier(legalLabels, options.iterations) elif options.classifier == "svm": classifier = classifiers.SVMClassifier(legalLabels, options.iterations*100, options.C, options.kernel) elif options.classifier == "knn": classifier = classifiers.KNNClassifier(legalLabels, options.neighbors) elif options.classifier == 'lr': classifier = classifiers.LinearRegressionClassifier(legalLabels) elif options.data == 'text': classifier = ClassifierText(legalLabels) else: print "Do not use any classifier:", options.classifier classifier = None args['classifier'] = classifier args['featureExtractor'] = featureExtractor return args, options