Esempio n. 1
0
def testClassic():
    m = classifiers.KNNClassifier(tr_images_S, tr_labels_O, lambda x: analysis.mean(x), k=10)
    m.evaluate(test_images_S, test_labels_O, False)
    
    h = classifiers.KNNClassifier(tr_images_O, tr_labels_O, lambda x: analysis.histogram(x), k=10)
    h.evaluate(test_images_O, test_labels_O, False)
    
    hnn = hogNN()
    hnn.evaluate(test_images_O, test_labels_O, False)
    
    mc = classifiers.EnsembleClassifier(confidence_mode = "average")
    mc.add(hnn, 0)
    mc.add(m, 1)
    mc.evaluateMixedImageSets(test_images_O + test_images_S, test_labels_O, False)
    mc.add(hist(), 0)
    mc.evaluateMixedImageSets(test_images_O + test_images_S, test_labels_O, False)
Esempio n. 2
0
def hogNearestNeighbour():
    
    knn = classifiers.KNNClassifier(tr_images_O, tr_labels_O, lambda x: hog(resize(x, imagesize, mode = "constant", anti_aliasing=True),
                                                                        block_norm = "L2-Hys", orientations = 8,
                                                                        pixels_per_cell = (14, 14), cells_per_block = (1, 1), 
                                                                        visualize = False, multichannel = True), k=5)
    
    return knn
Esempio n. 3
0
def readCommand( argv ):
    "Processes the command used to run from the command line."
    global ClassifierText
    from optparse import OptionParser
    parser = OptionParser(USAGE_STRING)

    parser.add_option('-c', '--classifier', help=default('The type of classifier'), choices=['knn', 'perceptron', 'svm', 'lr', 'best', 'none'], default='none')
    parser.add_option('-d', '--data', help=default('Dataset to use'), choices=['digits'], default='digits')
    parser.add_option('-t', '--training', help=default('The size of the training set'), default=TRAIN_SET_SIZE, type="int")
    parser.add_option('-i', '--iterations', help=default("Maximum iterations to run training"), default=50, type="int")
    parser.add_option('-s', '--test', help=default("Amount of test data to use"), default=TEST_SET_SIZE, type="int")
    parser.add_option('-n', '--neighbors', help=default("Number of clusters(for KMeans) or nearest neighbors(for KNN)"), default=5, type="int")
    parser.add_option('-v', '--visualize', help=default('Whether to visualize some results'), action='store_true')

    options, otherjunk = parser.parse_args(argv)
    if len(otherjunk) != 0: raise Exception('Command line input not understood: ' + str(otherjunk))
    args = {}

    # Set up variables according to the command line input.
    print "Doing classification"
    print "--------------------"
    print "data:\t\t" + options.data
    if options.data == 'digits':
        print "classifier:\t\t" + options.classifier
    else:
        print "classifier:\t\t" + options.data + " classifier"
    print "training set size:\t" + str(options.training)
    
    featureExtractor = BasicFeatureExtractorDigit()

    legalLabels = range(10)

    if options.training <= 0:
        print "Training set size should be a positive integer (you provided: %d)" % options.training
        print USAGE_STRING
        sys.exit(2)

    if options.classifier == "perceptron":
        classifier = classifiers.PerceptronClassifier(legalLabels, options.iterations)
    elif options.classifier == "svm":
        classifier = classifiers.SVMClassifier(legalLabels)
    elif options.classifier == "knn":
        classifier = classifiers.KNNClassifier(legalLabels, options.neighbors)
    elif options.classifier == 'lr':
        classifier = classifiers.LinearRegressionClassifier(legalLabels)
    elif options.classifier == 'best':
        classifier = classifiers.BestClassifier(legalLabels)
    else:
        print "Do not use any classifier:", options.classifier
        classifier = None

    args['classifier'] = classifier
    args['featureExtractor'] = featureExtractor

    return args, options
def q1(grades):
    print "\n===== Grading Q1 ===== "

    grades[1] = 0

    print "1. checking python dataClassifier.py -d digits -c knn -t 5000 -s 1000 -f basic -n 5"
    try:
        legalLabels = range(10)
        classifier = classifiers.KNNClassifier(legalLabels, 5)
        accDigits = getAccuracy(digitData, classifier) # our solution: 91.0%
        accThres = [50, 70, 80, 87]
        score = [accDigits >= t for t in accThres].count(True)
        grades[1] = score
        checkSklearnPackage(grades, 1)
    except:
        display("An exception was raised:\n%s" % getExceptionTraceBack())
Esempio n. 5
0
def hist():
    knn = classifiers.KNNClassifier(tr_images_O, tr_labels_O, lambda x: analysis.histogram(x), k=10)
    return knn
Esempio n. 6
0
def mean(imagesize = imagesize):
    knn = classifiers.KNNClassifier(tr_images_O, tr_labels_O, lambda x: analysis.mean(x), k=10)
    return knn
Esempio n. 7
0
def readCommand( argv ):
    "Processes the command used to run from the command line."
    global ClassifierText
    from optparse import OptionParser
    parser = OptionParser(USAGE_STRING)

    parser.add_option('-c', '--classifier', help=default('The type of classifier'), choices=['knn', 'perceptron', 'svm', 'lr', 'none'], default='none')
    parser.add_option('-d', '--data', help=default('Dataset to use'), choices=['digits', 'text'], default='digits')
    parser.add_option('-t', '--training', help=default('The size of the training set'), default=TRAIN_SET_SIZE, type="int")
    parser.add_option('-f', '--features', help=default('Type of features'), default='basic', choices=['basic', 'pca', 'kmeans'])
    parser.add_option('-i', '--iterations', help=default("Maximum iterations to run training"), default=50, type="int")
    parser.add_option('-s', '--test', help=default("Amount of test data to use"), default=TEST_SET_SIZE, type="int")
    parser.add_option('-n', '--neighbors', help=default("Number of clusters(for KMeans) or nearest neighbors(for KNN)"), default=5, type="int")
    parser.add_option('-k', '--kernel', help=default("Kernel type"), default='linear', choices=['linear', 'rbf'])
    parser.add_option('-C', '--C', help=default('Panelty C in SVM'), default=1.0, type='float')
    parser.add_option('-m', '--dimension', help=default('Dimension reduction'), default=32, type='int')
    parser.add_option('-v', '--visualize', help=default('Whether to visualize some results'), action='store_true')

    options, otherjunk = parser.parse_args(argv)
    if len(otherjunk) != 0: raise Exception('Command line input not understood: ' + str(otherjunk))
    args = {}

    # Set up variables according to the command line input.
    print "Doing classification"
    print "--------------------"
    print "data:\t\t" + options.data
    if options.data == 'digits':
        print "classifier:\t\t" + options.classifier
    else:
        print "classifier:\t\t" + options.data + " classifier"
    print "using enhanced features:\t" + str(options.features)
    print "training set size:\t" + str(options.training)
    if(options.data=="digits"):
        if options.features == 'basic':
            featureExtractor = BasicFeatureExtractorDigit()
        elif options.features == 'pca':
            featureExtractor = PCAFeatureExtractorDigit(options.dimension)
        elif options.features == 'kmeans':
            featureExtractor = KMeansClusterDigit(options.neighbors, options.iterations)
        else:
            featureExtractor = BasicFeatureExtractorDigit()
    elif(options.data=="text"):
        from sentimentAnalysis import FeatureExtractorText, ClassifierText
        featureExtractor = FeatureExtractorText()
    else:
        print "Unknown dataset", options.data
        print USAGE_STRING
        sys.exit(2)

    if options.data=="digits":
        legalLabels = range(10)
    else:
        legalLabels = range(5)

    if options.training <= 0:
        print "Training set size should be a positive integer (you provided: %d)" % options.training
        print USAGE_STRING
        sys.exit(2)

    if options.classifier == "perceptron":
        classifier = classifiers.PerceptronClassifier(legalLabels, options.iterations)
    elif options.classifier == "svm":
        classifier = classifiers.SVMClassifier(legalLabels, options.iterations*100, options.C, options.kernel)
    elif options.classifier == "knn":
        classifier = classifiers.KNNClassifier(legalLabels, options.neighbors)
    elif options.classifier == 'lr':
        classifier = classifiers.LinearRegressionClassifier(legalLabels)
    elif options.data == 'text':
        classifier = ClassifierText(legalLabels)
    else:
        print "Do not use any classifier:", options.classifier
        classifier = None

    args['classifier'] = classifier
    args['featureExtractor'] = featureExtractor

    return args, options