def __init__(self, master):
        frame = Frame(master, width=700, height=600, bd=1, background="black")
        frame.pack()

        iFrame = Frame(frame, width=700, height=600, bd=1, background="black")
        iFrame.pack(side=TOP)
        # Control panel
        temp.iControl = control.theControl(iFrame, LEFT)

        # System Output Frame
        temp.iOutput = theOutput(iFrame, LEFT)

        # User Input Image
        temp.usrInput = theDisplay(iFrame, "User Input Image", LEFT, 235)

        # Add a log list
        temp.LOG_LIST = theLog(iFrame, RIGHT)

        # Create a new frame
        iFrame = Frame(frame, width=700, height=600, bd=1, background="black")
        iFrame.pack(side=BOTTOM)

        # Input weight
        temp.inputWeight = theDisplay(iFrame, "", LEFT, 470)

        # Current Weight
        temp.currentWeight = theDisplay(iFrame, "", LEFT, 470)

        # Select classifier
        temp.iClassifier = perceptron.PerceptronClassifier()
Beispiel #2
0
def runClassifier():

    # Set up variables according to the command line inputs
    featureFunction = basicFeatureExtractorDigit

    legalLabels = range(10)  # number of labels

    # Select classifier
    classifier = perceptron.PerceptronClassifier(legalLabels)

    # Load data
    numTraining = 100

    rawTrainingData = samples.loadDataFile("digitdata/trainingimages",
                                           numTraining, DIGIT_DATUM_WIDTH,
                                           DIGIT_DATUM_HEIGHT)
    trainingLabels = samples.loadLabelsFile("digitdata/traininglabels",
                                            numTraining)
    rawValidationData = samples.loadDataFile("digitdata/validationimages",
                                             TEST_SET_SIZE, DIGIT_DATUM_WIDTH,
                                             DIGIT_DATUM_HEIGHT)
    validationLabels = samples.loadLabelsFile("digitdata/validationlabels",
                                              TEST_SET_SIZE)
    rawTestData = samples.loadDataFile("digitdata/testimages", TEST_SET_SIZE,
                                       DIGIT_DATUM_WIDTH, DIGIT_DATUM_HEIGHT)
    testLabels = samples.loadLabelsFile("digitdata/testlabels", TEST_SET_SIZE)

    # Extract features
    trainingData = map(basicFeatureExtractorDigit, rawTrainingData)
    validationData = map(basicFeatureExtractorDigit, rawValidationData)
    testData = map(basicFeatureExtractorDigit, rawTestData)

    # Conduct training and testing
    print "Training..."
    classifier.train(trainingData, trainingLabels, validationData,
                     validationLabels)
    # print "Validating..."
    #  guesses = classifier.classify(validationData)
    #  correct = [guesses[i] == validationLabels[i] for i in range(len(validationLabels))].count(True)
    # print str(correct), ("correct out of " + str(len(validationLabels)) + " (%.1f%%).") % (100.0 * correct / len(validationLabels))

    print "Testing..."
    guesses = classifier.classify(testData)
    correct = [guesses[i] == testLabels[i]
               for i in range(len(testLabels))].count(True)
    print str(correct), ("correct out of " + str(len(testLabels)) +
                         " (%.1f%%).") % (100.0 * correct / len(testLabels))
    util.pause()
    analysis(classifier, guesses, testLabels, rawTestData)
Beispiel #3
0
  def __init__(self, master):
    global c1, c2, c3, c4, c5, c6, c7, iClassifier
    global TRAIN_NUM, POS_WEIGHT, NEG_WEIGHT, Tkinter
    
    frame = Frame(master, width=700, height=600, bd=1, background ="black")
    frame.pack()

    # Add a title
    temp.iTitle = title.theTitle(frame,TOP)

    # Add setting option
  #  temp.iSetting = setting.theSetting(frame,BOTTOM)




    iFrame = Frame(frame, width=700, height=600, bd=1, background ="black")
    iFrame.pack(side=TOP)

    # User Input Image
    temp.usrInput = display.theDisplay(iFrame,"User Input Image",LEFT)
    
    # Input and weight Overlap
    temp.inputWeight = display.theDisplay(iFrame,"Input and Weight Overlap",LEFT)
    
    # Current Weight
    temp.currentWeight = display.theDisplay(iFrame,"Current weight",LEFT)


    iFrame = Frame(frame, width=700, height=600, bd=1, background ="black")
    iFrame.pack(side=TOP)
    
    # Control panel
    temp.iControl = control.theControl(iFrame,LEFT)

    # Current Weight
    temp.weightChange = display.theDisplay(iFrame,"Weight Change",LEFT)

    # System Output Frame
    temp.iOutput = output.theOutput(iFrame,LEFT)

    # Add a log list
    temp.LOG_LIST = log.theLog(iFrame,BOTTOM)
    
    # Select classifier
    temp.iClassifier = perceptron.PerceptronClassifier()
Beispiel #4
0
def readCommand(argv):
    "Processes the command used to run from the command line."
    from optparse import OptionParser
    parser = OptionParser(USAGE_STRING)

    parser.add_option('-c',
                      '--classifier',
                      help=default('The type of classifier'),
                      choices=['mostFrequent', 'perceptron', 'mlp', 'svm'],
                      default='mostFrequent')
    parser.add_option('-t',
                      '--training',
                      help=default('The size of the training set'),
                      default=TRAINING_SET_SIZE,
                      type="int")
    parser.add_option('-w',
                      '--weights',
                      help=default('Whether to print weights'),
                      default=False,
                      action="store_true")
    parser.add_option('-i',
                      '--iterations',
                      help=default("Maximum iterations to run training"),
                      default=3,
                      type="int")
    parser.add_option('-s',
                      '--test',
                      help=default("Amount of test data to use"),
                      default=TEST_SET_SIZE,
                      type="int")

    options, otherjunk = parser.parse_args(argv)
    if len(otherjunk) != 0:
        raise Exception('Command line input not understood: ' + str(otherjunk))
    args = {}

    # Set up variables according to the command line input.
    print "Doing classification"
    print "--------------------"
    print "classifier:\t\t" + options.classifier
    print "training set size:\t" + str(options.training)

    printImage = ImagePrinter(DIGIT_DATUM_WIDTH, DIGIT_DATUM_HEIGHT).printImage
    featureFunction = basicFeatureExtractorDigit
    legalLabels = range(10)

    if options.training <= 0:
        print "Training set size should be a positive integer (you provided: %d)" % options.training
        print USAGE_STRING
        sys.exit(2)

    if (options.classifier == "mostFrequent"):
        classifier = mostFrequent.MostFrequentClassifier(legalLabels)
    elif (options.classifier == "mlp"):
        classifier = mlp.MLPClassifier(legalLabels, options.iterations)
    elif (options.classifier == "perceptron"):
        classifier = perceptron.PerceptronClassifier(legalLabels,
                                                     options.iterations)
    elif (options.classifier == "svm"):
        classifier = svm.SVMClassifier(legalLabels)
    else:
        print "Unknown classifier:", options.classifier
        print USAGE_STRING

        sys.exit(2)

    args['classifier'] = classifier
    args['featureFunction'] = featureFunction
    args['printImage'] = printImage

    return args, options
Beispiel #5
0
def readCommand( argv ):
  "Processes the command used to run from the command line."
  from optparse import OptionParser  
  parser = OptionParser(USAGE_STRING)
  
  parser.add_option('-c', '--classifier', help=default('The type of classifier'), choices=['linear_svm', 'nb', 'naiveBayes', 'perceptron'], default='linear_svm')
  parser.add_option('-d', '--data', help=default('Dataset to use'), choices=['digits', 'faces'], default='faces')
  parser.add_option('-t', '--training', help=default('The size of the training set'), default=100, type="int")
  parser.add_option('-k', '--smoothing', help=default("Smoothing parameter (ignored when using --autotune)"), type="float", default=0.1)
  parser.add_option('-a', '--autotune', help=default("Whether to automatically tune hyperparameters"), default=False, action="store_true")
  parser.add_option('-i', '--iterations', help=default("Maximum iterations to run training"), default=3, type="int")
  parser.add_option('-s', '--test', help=default("Amount of test data to use"), default=50, type="int")

  options, otherjunk = parser.parse_args(argv)
  if len(otherjunk) != 0: raise Exception('Command line input not understood: ' + str(otherjunk))
  args = {}
  
  # Set up variables according to the command line input.
  print "Doing classification"
  print "--------------------"
  print "data:\t\t" + options.data
  print "classifier:\t\t" + options.classifier
  print "training set size:\t" + str(options.training)
  print "testing set size:\t"+str(options.test)
  if(options.data=="digits"):
    printImage = ImagePrinter(DIGIT_DATUM_WIDTH, DIGIT_DATUM_HEIGHT).printImage

  elif(options.data=="faces"):
    printImage = ImagePrinter(FACE_DATUM_WIDTH, FACE_DATUM_HEIGHT).printImage
  else:
    print "Unknown dataset", options.data
    print USAGE_STRING
    sys.exit(2)
    
  if(options.data=="digits"):
    legalLabels = range(10)
  else:
    legalLabels = range(2)
    
  if options.training <= 0:
    print "Training set size should be a positive integer (you provided: %d)" % options.training
    print USAGE_STRING
    sys.exit(2)
    
  if options.smoothing <= 0:
    print "Please provide a positive number for smoothing (you provided: %f)" % options.smoothing
    print USAGE_STRING
    sys.exit(2)

  if(options.classifier == "linear_svm"):
    classifier = linear_svm.LinearClassifier(options.data)
  elif(options.classifier == "naiveBayes" or options.classifier == "nb"):
    classifier = naiveBayes.NaiveBayesClassifier(legalLabels)
    classifier.setSmoothing(options.smoothing)
    if (options.autotune):
        print "using automatic tuning for naivebayes"
        classifier.automaticTuning = True
    else:
        print "using smoothing parameter k=%f for naivebayes" %  options.smoothing
  elif(options.classifier == "perceptron"):
    classifier = perceptron.PerceptronClassifier(legalLabels,options.iterations)
  else:
    print "Unknown classifier:", options.classifier
    print USAGE_STRING
    sys.exit(2)

  args['classifier'] = classifier
  args['printImage'] = printImage
  return args, options
def runClassifier(args, options, legalLabels):

    featureFunction = args['featureFunction']
    classifier = args['classifier']
    printImage = args['printImage']

    # Load data
    numTraining = options.training
    numTest = options.test

    if (options.data == "faces"):

        rawValidationData = samples.loadDataFile("facedata/facedatatrain",
                                                 numTest, FACE_DATUM_WIDTH,
                                                 FACE_DATUM_HEIGHT)
        validationLabels = samples.loadLabelsFile(
            "facedata/facedatatrainlabels", numTest)
        rawTestData = samples.loadDataFile("facedata/facedatatest", numTest,
                                           FACE_DATUM_WIDTH, FACE_DATUM_HEIGHT)
        testLabels = samples.loadLabelsFile("facedata/facedatatestlabels",
                                            numTest)
    else:

        rawValidationData = samples.loadDataFile("digitdata/validationimages",
                                                 numTest, DIGIT_DATUM_WIDTH,
                                                 DIGIT_DATUM_HEIGHT)
        validationLabels = samples.loadLabelsFile("digitdata/validationlabels",
                                                  numTest)
        rawTestData = samples.loadDataFile("digitdata/testimages", numTest,
                                           DIGIT_DATUM_WIDTH,
                                           DIGIT_DATUM_HEIGHT)
        testLabels = samples.loadLabelsFile("digitdata/testlabels", numTest)

    # Extract features
    print "Extracting features..."

    validationData = map(featureFunction, rawValidationData)
    testData = map(featureFunction, rawTestData)

    total = numTraining

    f_out = open(
        './results/' + options.classifier + "_" + options.data + '.txt', 'w')

    # train and classify for portions of the training data, compare performance
    for i in range(1, 11):

        print "\n\nUsing", i * 10, "% of training data\n"

        multiplier = i / 10.0
        numTraining = int(total * multiplier)

        if (options.classifier == "naiveBayes"):
            classifier = naiveBayes.NaiveBayesClassifier(legalLabels)
        elif (options.classifier == "perceptron"):
            classifier = perceptron.PerceptronClassifier(
                legalLabels, options.iterations)

        if options.data == "faces":
            rawTrainingData = samples.loadDataFile("facedata/facedatatrain",
                                                   numTraining,
                                                   FACE_DATUM_WIDTH,
                                                   FACE_DATUM_HEIGHT)
            trainingLabels = samples.loadLabelsFile(
                "facedata/facedatatrainlabels", numTraining)
        else:
            rawTrainingData = samples.loadDataFile("digitdata/trainingimages",
                                                   numTraining,
                                                   DIGIT_DATUM_WIDTH,
                                                   DIGIT_DATUM_HEIGHT)
            trainingLabels = samples.loadLabelsFile("digitdata/traininglabels",
                                                    numTraining)

        trainingData = map(featureFunction, rawTrainingData)
        # Conduct training and testing

        start_time = time.time()

        print "Training..."
        classifier.train(trainingData, trainingLabels, validationData,
                         validationLabels)

        end_time = time.time()
        exec_time = end_time - start_time

        print "\n\nUsing " + str(numTraining) + " training images"
        print "Training took " + str(exec_time) + " seconds\n\n"

        print "Validating..."
        guesses = classifier.classify(validationData)
        correct = [
            guesses[i] == validationLabels[i]
            for i in range(len(validationLabels))
        ].count(True)
        print str(correct), ("correct out of " + str(len(validationLabels)) +
                             " (%.1f%%).") % (100.0 * correct /
                                              len(validationLabels))
        val_correct = correct
        print "Testing..."
        guesses = classifier.classify(testData)
        correct = [
            guesses[i] == testLabels[i] for i in range(len(testLabels))
        ].count(True)
        print str(correct), ("correct out of " + str(len(testLabels)) +
                             " (%.1f%%).") % (100.0 * correct /
                                              len(testLabels))
        test_correct = correct
        # analysis(classifier, guesses, testLabels, testData, rawTestData, printImage)

        f_out.write(
            str(numTraining) + " " + str(exec_time) + " " + str(numTest) +
            " " + str(val_correct) + " " + str(test_correct) + '\n')

    f_out.close()
Beispiel #7
0
def runClassifier():
    global TK_ROOT, SP_CANVAS, LOG_X, LOG_Y
    TK_ROOT = Tk(className="Classifier Interface")  # Create window
    TK_ROOT.geometry("1024x768")
    TK_ROOT.grid_rowconfigure(0, weight=1)
    TK_ROOT.grid_columnconfigure(0, weight=1)
    SP_CANVAS = Canvas(TK_ROOT, xscrollcommand=None, scrollcommand=None)
    SP_CANVAS.grid(row=0, column=0, sticky='nesw')
    SP_CANVAS.create_rectangle(10, 10, 150, 500, fill="white")

    # Set up variables according to the command line inputs
    featureFunction = basicFeatureExtractorDigit

    legalLabels = range(10)  # number of labels

    # Select classifier
    classifier = perceptron.PerceptronClassifier(legalLabels)

    # Load data
    numTraining = 1

    rawTrainingData = samples.loadDataFile("digitdata/trainingimages",
                                           numTraining, DIGIT_DATUM_WIDTH,
                                           DIGIT_DATUM_HEIGHT, 'train',
                                           SP_CANVAS)

    trainingLabels = samples.loadLabelsFile("digitdata/traininglabels",
                                            numTraining)

    loadImage()
    rawTestData = samples.loadDataFile("digitdata/testingimages",
                                       TEST_SET_SIZE, DIGIT_DATUM_WIDTH,
                                       DIGIT_DATUM_HEIGHT, 'test', SP_CANVAS)
    testLabels = samples.loadLabelsFile("digitdata/testlabels", TEST_SET_SIZE)

    # Extract features

    print rawTestData
    trainingData = map(basicFeatureExtractorDigit, rawTrainingData)
    print "cp3"
    testData = map(basicFeatureExtractorDigit, rawTestData)

    # Conduct training and testing
    SP_CANVAS.create_text(LOG_X,
                          LOG_Y,
                          text="Training...",
                          anchor=NW,
                          font=tkFont.Font(size=-14))
    LOG_Y += 15
    classifier.train(trainingData, trainingLabels, SP_CANVAS)
    # print "Validating..."
    #  guesses = classifier.classify(validationData)
    #  correct = [guesses[i] == validationLabels[i] for i in range(len(validationLabels))].count(True)
    # print str(correct), ("correct out of " + str(len(validationLabels)) + " (%.1f%%).") % (100.0 * correct / len(validationLabels))

    SP_CANVAS.create_text(LOG_X,
                          LOG_Y,
                          text="Testing...",
                          anchor=NW,
                          font=tkFont.Font(size=-14))
    LOG_Y += 15
    guesses = classifier.classify(testData, SP_CANVAS)
    correct = [guesses[i] == testLabels[i]
               for i in range(len(testLabels))].count(True)
    print str(correct), ("correct out of " + str(len(testLabels)) +
                         " (%.1f%%).") % (100.0 * correct / len(testLabels))
    SP_CANVAS.create_text(LOG_X,
                          LOG_Y + 30,
                          text="Completed...",
                          anchor=NW,
                          font=tkFont.Font(size=-14))
    LOG_Y += 15
    SP_CANVAS.create_rectangle(200, 300, 201, 301)

    the_input = raw_input('TYPE HERE:>> ')
    if match('bye', the_input):
        return
Beispiel #8
0
def readCommand(argv):
    """Processes the command used to run from the command line."""
    from optparse import OptionParser
    parser = OptionParser(USAGE_STRING)

    parser.add_option(
        '-r',
        '--run',
        help=default('automatically runs training and test cycle for 5 times'),
        default=False,
        action='store_true')

    parser.add_option('-c',
                      '--classifier',
                      help=default('The type of classifier'),
                      choices=['perceptron', 'naiveBayes', 'mira'],
                      default='naiveBayes')
    parser.add_option('-d',
                      '--data',
                      help=default('Dataset to use'),
                      choices=['digits', 'faces'],
                      default='digits')
    parser.add_option('-t',
                      '--training',
                      help=default('The ratio of the training set to use'),
                      default=1.0,
                      type="float")
    parser.add_option('-f',
                      '--features',
                      help=default('Whether to use enhanced features'),
                      default=False,
                      action="store_true")
    parser.add_option('-o',
                      '--odds',
                      help=default('Whether to compute odds ratios'),
                      default=False,
                      action="store_true")
    parser.add_option('-1',
                      '--label1',
                      help=default("First label in an odds ratio comparison"),
                      default=0,
                      type="int")
    parser.add_option('-2',
                      '--label2',
                      help=default("Second label in an odds ratio comparison"),
                      default=1,
                      type="int")
    parser.add_option(
        '-k',
        '--smoothing',
        help=default("Smoothing parameter (ignored when using --autotune)"),
        type="float",
        default=2.0)
    parser.add_option(
        '-a',
        '--autotune',
        help=default("Whether to automatically tune hyperparameters"),
        default=False,
        action="store_true")
    parser.add_option('-i',
                      '--iterations',
                      help=default("Maximum iterations to run training"),
                      default=3,
                      type="int")

    options, otherjunk = parser.parse_args(argv)
    if len(otherjunk) != 0:
        raise Exception('Command line input not understood: ' + str(otherjunk))
    args = {}

    # Set up variables according to the command line input.
    print("Doing classification")
    print("--------------------")
    print("data:\t\t" + options.data)
    print("classifier:\t\t" + options.classifier)
    print("using enhanced features?:\t" + str(options.features))

    if options.data == "digits":
        printImage = ImagePrinter(DIGIT_DATUM_WIDTH,
                                  DIGIT_DATUM_HEIGHT).printImage
        if options.features:
            featureFunction = enhancedFeatureExtractorDigit
        else:
            featureFunction = basicFeatureExtractorDigit
    elif options.data == "faces":
        printImage = ImagePrinter(FACE_DATUM_WIDTH,
                                  FACE_DATUM_HEIGHT).printImage
        if options.features:
            featureFunction = enhancedFeatureExtractorFace
        else:
            featureFunction = basicFeatureExtractorFace
    else:
        print("Unknown dataset", options.data)
        print(USAGE_STRING)
        sys.exit(2)

    if options.data == "digits":
        legalLabels = range(10)
    else:
        legalLabels = range(2)

    if options.training <= 0:
        print(
            "Training set size should be a positive integer (you provided: %d)"
            % options.training)
        print(USAGE_STRING)
        sys.exit(2)

    if options.smoothing <= 0:
        print(
            "Please provide a positive number for smoothing (you provided: %f)"
            % options.smoothing)
        print(USAGE_STRING)
        sys.exit(2)

    if options.odds:
        if options.label1 not in legalLabels or options.label2 not in legalLabels:
            print("Didn't provide a legal labels for the odds ratio: (%d,%d)" %
                  (options.label1, options.label2))
            print(USAGE_STRING)
            sys.exit(2)

    if options.classifier == "mira":
        classifier = mira.MiraClassifier(legalLabels, options.iterations)
    elif options.classifier == "naiveBayes":
        classifier = naiveBayes.NaiveBayesClassifier(legalLabels)
        classifier.setSmoothing(options.smoothing)
        if options.autotune:
            print
            "using automatic tuning for naivebayes"
            classifier.automaticTuning = True
        else:
            print("using smoothing parameter k=%f for naivebayes" %
                  options.smoothing)
    elif options.classifier == "perceptron":
        classifier = perceptron.PerceptronClassifier(legalLabels,
                                                     options.iterations)

    elif options.classifier == "knn":
        classifier = knn.KNN(legalLabels)

    else:
        print("Unknown classifier:", options.classifier)
        print(USAGE_STRING)

        sys.exit(2)

    args['classifier'] = classifier
    args['featureFunction'] = featureFunction
    args['printImage'] = printImage

    return args, options
    def __init__(self, master):
        global c1, c2, c3, c4, c5, myList, legalLabels, iClassifier
        frame = Frame(master, width=700, height=600, bd=1, background="black")
        frame.pack()

        iframe5 = Frame(frame, bd=0, relief=FLAT, background="black")
        iframe5.pack(expand=1, fill=X, pady=10, padx=5, side="top")
        c1 = Canvas(iframe5,
                    bg='white',
                    width=700,
                    height=50,
                    background="black")
        c1.pack()

        iframe5 = Frame(frame,
                        bd=0,
                        relief=RAISED,
                        background="black",
                        highlightcolor="red")
        iframe5.pack(expand=1, fill=X, pady=10, padx=5, side="bottom")

        iframe7 = Frame(iframe5,
                        bd=0,
                        relief=RAISED,
                        background="black",
                        highlightcolor="red")
        iframe7.pack(expand=1, fill=X, pady=10, padx=5, side="left")
        lbl = Label(iframe7,
                    text="Auto Training Settings ",
                    fg="#3cecff",
                    bg="black",
                    font=("times", 14, "bold"))
        lbl.pack()
        c7 = Canvas(iframe7, width=70, height=50, background="black")
        c7.pack()

        iframe7 = Frame(iframe5,
                        bd=0,
                        relief=RAISED,
                        background="black",
                        highlightcolor="red")
        iframe7.pack(expand=1, fill=X, pady=10, padx=5, side="left")
        lbl = Label(iframe7,
                    text="Validation Settings",
                    fg="#3cecff",
                    bg="black",
                    font=("times", 14, "bold"))
        lbl.pack()
        c7 = Canvas(iframe7, width=70, height=50, background="black")
        c7.pack()

        iframe7 = Frame(iframe5,
                        bd=0,
                        relief=RAISED,
                        background="black",
                        highlightcolor="red")
        iframe7.pack(expand=1, fill=X, pady=10, padx=5, side="left")
        lbl = Label(iframe7,
                    text="User Training Settings ",
                    fg="#3cecff",
                    bg="black",
                    font=("times", 14, "bold"))
        lbl.pack()
        c4 = Canvas(iframe7, width=70, height=50, background="black")
        c4.pack()

        iframe7 = Frame(iframe5,
                        bd=0,
                        relief=RAISED,
                        background="black",
                        highlightcolor="red")
        iframe7.pack(expand=1, fill=X, pady=10, padx=5, side="right")
        lbl = Label(iframe7,
                    text="Perceptron Settings",
                    fg="#3cecff",
                    bg="black",
                    font=("times", 14, "bold"))
        lbl.pack()
        c7 = Canvas(iframe7, width=70, height=50, background="black")
        c7.pack()

        # Status Frame
        iframe5 = Frame(frame, bd=0, relief=RAISED, background="black")
        iframe5.pack(expand=1, fill=X, pady=10, padx=5, side="left")
        lbl = Label(iframe5,
                    text="System Status",
                    fg="#3cecff",
                    bg="black",
                    font=("times", 14, "bold"))
        lbl.pack()
        #   c2 = Canvas(iframe5, bg='white', width=175, height=400,background="black")
        #c2.pack()

        # Add a log list
        myList = theList(iframe5)

        # User Input Frame
        iframe5 = Frame(frame, bd=0, relief=RAISED, background="black")
        iframe5.pack(expand=1, fill=X, pady=10, padx=5, side="left")
        lb2 = Label(iframe5,
                    text="User Input",
                    fg="#3cecff",
                    bg="black",
                    font=("times", 14, "bold"))
        lb2.pack(side="top")
        c3 = Canvas(iframe5, width=235, height=300, background="black")
        c3.pack()

        iframe5 = Frame(frame, bd=0, relief=RAISED, background="black")
        iframe5.pack(expand=1, fill=X, pady=10, padx=5, side="right")
        myButton(iframe5)
        # System Output Frame

        iframe5 = Frame(frame, bd=0, relief=RAISED, background="black")
        iframe5.pack(expand=1, fill=X, pady=10, padx=5, side="top")
        lb2 = Label(iframe5,
                    text="Guess Digit",
                    fg="#3cecff",
                    bg="black",
                    font=("times", 14, "bold"))
        lb2.pack(side="top")
        c4 = Canvas(iframe5,
                    bg='white',
                    width=130,
                    height=60,
                    background="black")
        c4.pack()

        iframe5 = Frame(frame, bd=0, relief=RAISED, background="black")
        iframe5.pack(expand=1, fill=X, pady=10, padx=5, side="top")
        lb2 = Label(iframe5,
                    text="Predicted Correct",
                    fg="#3cecff",
                    bg="black",
                    font=("times", 14, "bold"))
        lb2.pack(side="top")
        c5 = Canvas(iframe5,
                    bg='white',
                    width=130,
                    height=60,
                    background="black")
        c5.pack()

        iframe5 = Frame(frame, bd=0, relief=RAISED, background="black")
        iframe5.pack(expand=1, fill=X, pady=10, padx=5, side="top")
        lb2 = Label(iframe5,
                    text="User Input\nCorrectness Statistics",
                    fg="#3cecff",
                    bg="black",
                    font=("times", 14, "bold"))
        lb2.pack(side="top")
        c6 = Canvas(iframe5,
                    bg='white',
                    width=130,
                    height=60,
                    background="black")
        c6.pack()

        c5.create_text(80,
                       280,
                       text='Correctness',
                       fill="#3cecff",
                       justify=CENTER,
                       font=('times', 14, 'bold'))

        c1.create_text(330,
                       25,
                       text='Perceptron Handwriting Recognition',
                       fill="#3cecff",
                       justify=CENTER,
                       font=('Times', 20, 'bold'))
        # c3.create_text(130, 20, text='User Input', fill="#3cecff", justify=CENTER, font=('times', 14, 'bold'))
        #  c3.create_text(130, 320, text='Predicted Correctness', fill="#3cecff", justify=CENTER, font=('times', 14, 'bold'))

        c5.create_text(80,
                       280,
                       text='Correctness',
                       fill="#3cecff",
                       justify=CENTER,
                       font=('times', 14, 'bold'))
        c5.create_text(80,
                       296,
                       text='Statistics',
                       fill="#3cecff",
                       justify=CENTER,
                       font=('times', 14, 'bold'))
        c6.create_text(80,
                       20,
                       text='Statistics',
                       fill="#3cecff",
                       justify=CENTER,
                       font=('times', 14, 'bold'))

        iClassifier = perceptron.PerceptronClassifier(legalLabels)
Beispiel #10
0
def readCommand(argv):
    "Processes the command used to run from the command line."
    from optparse import OptionParser  #is a powerful tool to parsing command line options.
    parser = OptionParser(USAGE_STRING)
    #parser.add_option('-f', '--features', help=default('Whether to use enhanced features'), default=False, action="store_true")
    #-f or --features both mean the same option, we can use either of them on the command line.
    #<script> -h will print all the help texts set for each option.
    #default: it sets the option.features to false if the option is not present in command line. but it present always, action is chosen i:e. True

    parser.add_option(
        '-c',
        '--classifier',
        help=default('The type of classifier'),
        choices=['mostFrequent', 'nb', 'naiveBayes', 'perceptron'],
        default='naiveBayes')
    parser.add_option('-d',
                      '--data',
                      help=default('Dataset to use'),
                      choices=['digits', 'faces'],
                      default='digits')
    parser.add_option('-t',
                      '--training',
                      help=default('The size of the training set'),
                      default=100,
                      type="int")
    parser.add_option('-f',
                      '--features',
                      help=default('Whether to use enhanced features'),
                      default=False,
                      action="store_true")
    parser.add_option('-o',
                      '--odds',
                      help=default('Whether to compute odds ratios'),
                      default=False,
                      action="store_true")
    parser.add_option('-1',
                      '--label1',
                      help=default("First label in an odds ratio comparison"),
                      default=0,
                      type="int")
    parser.add_option('-2',
                      '--label2',
                      help=default("Second label in an odds ratio comparison"),
                      default=1,
                      type="int")
    parser.add_option('-w',
                      '--weights',
                      help=default('Whether to print weights'),
                      default=False,
                      action="store_true")
    parser.add_option(
        '-k',
        '--smoothing',
        help=default("Smoothing parameter (ignored when using --autotune)"),
        type="float",
        default=2.0)
    parser.add_option(
        '-a',
        '--autotune',
        help=default("Whether to automatically tune hyperparameters"),
        default=False,
        action="store_true")
    parser.add_option('-i',
                      '--iterations',
                      help=default("Maximum iterations to run training"),
                      default=15,
                      type="int")
    parser.add_option('-s',
                      '--test',
                      help=default("Amount of test data to use"),
                      default=TEST_SET_SIZE,
                      type="int")
    parser.add_option('-n',
                      '--analysis',
                      help=default("Shows which data is wrongly predicted"),
                      default=False,
                      action="store_true")
    parser.add_option('-r',
                      '--random',
                      help=default("Trains the data set using random data and \
   calculates averages for percent accuracy and standard deviation"),
                      default=False,
                      action="store_true")

    options, otherjunk = parser.parse_args(argv)
    if len(otherjunk) != 0:
        raise Exception('Command line input not understood: ' + str(otherjunk))
    args = {}  #empty dictionary to capture the command line inputs.

    # Set up variables according to the command line input. This is the start line of the whole drama.
    print("Doing classification")
    print("--------------------")
    print("Data:\t\t" + options.data)
    print("Classifier:\t\t" + options.classifier)
    print("Using enhanced features?:\t" + str(options.features))
    if not options.random:
        print("Training set size:\t" + str(options.training))

    if (options.data == "digits"):
        printImage = ImagePrinter(
            DIGIT_DATUM_WIDTH, DIGIT_DATUM_HEIGHT
        ).printImage  #DIGIT_DATUM_WIDTH, DIGIT_DATUM_HEIGHT are global variables

        if (options.features):
            featureFunction = enhancedFeatureExtractorDigit
        else:
            featureFunction = basicFeatureExtractorDigit

    elif (options.data == "faces"):
        printImage = ImagePrinter(
            FACE_DATUM_WIDTH, FACE_DATUM_HEIGHT
        ).printImage  #FACE_DATUM_WIDTH, FACE_DATUM_HEIGHT are global variables
        #& creating an object of class ImagePrinter.
        #print("ImagePrinter is used")
        if (
                options.features
        ):  #to decide on what to choose b/w enhancedFeatureExtractorFace function or basicFeatureExtractorFace function.
            featureFunction = enhancedFeatureExtractorFace
        else:
            featureFunction = basicFeatureExtractorFace
    else:  #if both digits and faces are not what we called on the command prompt.
        print("Unknown dataset", options.data)
        print(USAGE_STRING)
        sys.exit(2)

    if (options.data == "digits"):
        legalLabels = range(10)  #0,1,2,3,4,5,6,7,8,9
    else:
        legalLabels = range(2)  #face or not face

    #we are not keeping training <=0 hence, below wont be used
    if options.training <= 0:
        print(
            "Training set size should be a positive integer (you provided: %d)"
            % options.training)
        print(USAGE_STRING)
        sys.exit(2)
    #we are not using smoothing. hence, below wont be used
    if options.smoothing <= 0:
        print(
            "Please provide a positive number for smoothing (you provided: %f)"
            % options.smoothing)
        print(USAGE_STRING)
        sys.exit(2)
    #we are not using odds. hence, below wont be used
    if options.odds:
        if options.label1 not in legalLabels or options.label2 not in legalLabels:
            print("Didn't provide a legal labels for the odds ratio: (%d,%d)" %
                  (options.label1, options.label2))
            print(USAGE_STRING)
            sys.exit(2)

    #defining decision structure based on asked classifier.
    if (options.classifier == "naiveBayes" or options.classifier == "nb"):
        classifier = naiveBayes.NaiveBayesClassifier(legalLabels)
        classifier.setSmoothing(options.smoothing)
        if (options.autotune):
            print("Using automatic tuning for naivebayes")
            classifier.automaticTuning = True
        else:
            print("Using smoothing parameter k=%f for naivebayes" %
                  options.smoothing)

    elif (options.classifier == "perceptron"):
        classifier = perceptron.PerceptronClassifier(legalLabels,
                                                     options.iterations)
        #creating a PerceptronClassifier object by passing legalLabels and iterations=3 as max iterations to PerceptronClassifier's constructor.

    else:
        print("Unknown classifier:", options.classifier)
        print(USAGE_STRING)

        sys.exit(2)

    args[
        'classifier'] = classifier  #assining classifier as a value to key 'classifier'
    args['featureFunction'] = featureFunction
    args['printImage'] = printImage

    return args, options
def readCommand( argv ):
  """
  Processes the command used to run from the command line.
  """
  import getopt

  # Set default options
  options = {'classifier': 'mostfrequent', 
             'data': 'digits', 
             'enhancedFeatures': False,
             'train': 100,
             'odds': False,
             'class1': 1,
             'class2': 0,
             'smoothing': 1,
             'automaticTuning' : False,
             'maxIterations': 3}
             
  args = {} # This dictionary will hold the objects used by the main method
  
  # Read input from the command line
  commands = ['help', 
              'classifer=', 
              'data=',
              'train=', 
              'enhancedFeatures', 
              'odds',
              'class1=',
              'class2=',
              'smoothing=',
              'automaticTuning'
              'maxIterations=']
  try:
    opts = getopt.getopt( argv, "hc:d:t:fo1:2:k:ai:", commands )
  except getopt.GetoptError:
    print USAGE_STRING
    sys.exit( 2 )
    
  for option, value in opts[0]:
    if option in ['--help', '-h']:
      print USAGE_STRING
      sys.exit( 0 )
    if option in ['--classifier', '-c']:
      options['classifier'] = value
    if option in ['--data', '-d']:
      options['data'] = value
    if option in ['--train', '-t']:
      options['train'] = int(value)
    if option in ['--enhancedFeatures', '-f']:
      options['enhancedFeatures'] = True
    if option in ['--odds', '-o']:
      options['odds'] = True
    if option in ['--class1', '-1']:
      options['class1'] = int(value)
    if option in ['--class2', '-2']:
      options['class2'] = int(value)
    if option in ['--smoothing', '-k']:
      options['smoothing'] = float( value )
    if option in ['--automaticTuning', '-a']:
      options['automaticTuning'] = True
    if option in ['--maxIterations', '-i']:
      options['maxIterations'] = int(value)
    
  # Set up variables according to the command line input.
  print "Doing classification"
  print "--------------------"
  print "data:\t\t" + options['data']
  print "classifier:\t\t" + options['classifier']
  print "using enhanced features?:\t" + str(options['enhancedFeatures'])
  print "training set size:\t" + str(options['train'])
  if(options['data']=="digits"):
    printImage = ImagePrinter(DIGIT_DATUM_WIDTH, DIGIT_DATUM_HEIGHT).printImage
    if (options['enhancedFeatures']):
      featureFunction = enhancedFeatureExtractorDigit
    else:
      featureFunction = basicFeatureExtractorDigit
  elif(options['data']=="faces"):
    printImage = ImagePrinter(FACE_DATUM_WIDTH, FACE_DATUM_HEIGHT).printImage
    if (options['enhancedFeatures']):
      featureFunction = enhancedFeatureExtractorFace
    else:
      featureFunction = basicFeatureExtractorFace      
  else:
    print "Unknown dataset", options['data']
    print USAGE_STRING
    sys.exit(2)
    
  if(options['data']=="digits"):
    legalLabels = range(10)
  else:
    legalLabels = range(2)
    
  if options['train'] <= 0:
    print "Training set size should be a positive integer (you provided: %d)" % options['train']
    print USAGE_STRING
    sys.exit(2)
    
  if options['smoothing'] <= 0:
    print "Please provide a positive number for smoothing (you provided: %f)" % options['smoothing']
    print USAGE_STRING
    sys.exit(2)
    
  if options['odds']:
    for className in ['class1','class2']:
      if options[className] not in legalLabels:
        print "Didn't provide a legal labels for the odds ratio for %s" % className
        print USAGE_STRING
        sys.exit(2)

  if(options['classifier'] == "mostfrequent"):
    classifier = mostFrequent.MostFrequentClassifier(legalLabels)
  elif(options['classifier'] == "naivebayes"):
    classifier = naiveBayes.NaiveBayesClassifier(legalLabels)
    classifier.setSmoothing(options['smoothing'])
    if (options['automaticTuning']):
        print "using automatic tuning for naivebayes"
        classifier.automaticTuning = True
    else:
        print "using smoothing parameter k=%f for naivebayes" %  options['smoothing']
  elif(options['classifier'] == "perceptron"):
    classifier = perceptron.PerceptronClassifier(legalLabels,options['maxIterations'])
  elif(options['classifier'] == "mira"):
    classifier = mira.MiraClassifier(legalLabels, options['maxIterations'])
    if (options['automaticTuning']):
        print "using automatic tuning for MIRA"
        classifier.automaticTuning = True
    else:
        print "using default C=0.001 for MIRA"
  else:
    print "Unknown classifier:", options['classifier']
    print USAGE_STRING
    sys.exit(2)

  args['classifier'] = classifier
  args['featureFunction'] = featureFunction
  args['printImage'] = printImage
  
  return args, options
        item["background"] = "blue"
        item["fg"] = "#FFF"
        for i in self.button:
            if self.button[i] != item:
                self.button[i]["background"] = "black"
                self.button[i]["fg"] = "#3cecff"


# Test code
if __name__ == "__main__":
    root = Tk()
    frame = Frame(root)
    temp.LOG_LIST = log.theLog(frame, TOP)
    iControl = theControl(root, TOP)

    temp.iClassifier = perceptron.PerceptronClassifier()

    temp.iTitle = title.theTitle(frame, TOP)

    # Add setting option
    #temp.iSetting = setting.theSetting(frame,TOP)

    # Add a log list
    temp.LOG_LIST = log.theLog(frame, TOP)

    # User Input Image
    temp.iDisplay = display.theDisplay(frame, "hi", TOP)

    # System Output Frame
    temp.iOutput = output.theOutput(frame, TOP)
def readCommand(argv):
    "Processes the command used to run from the command line."
    from optparse import OptionParser

    parser = OptionParser(USAGE_STRING)

    parser.add_option(
        "-c",
        "--classifier",
        help=default("The type of classifier"),
        choices=[
            "mostFrequent",
            "perceptron",
        ],
        default="mostFrequent",
    )
    parser.add_option(
        "-d",
        "--data",
        help=default("Dataset to use"),
        choices=["digits", "faces", "pacman"],
        default="digits",
    )
    parser.add_option(
        "-t",
        "--training",
        help=default("The size of the training set"),
        default=100,
        type="int",
    )
    parser.add_option(
        "-f",
        "--features",
        help=default("Whether to use enhanced features"),
        default=False,
        action="store_true",
    )
    parser.add_option(
        "-o",
        "--odds",
        help=default("Whether to compute odds ratios"),
        default=False,
        action="store_true",
    )
    parser.add_option(
        "-1",
        "--label1",
        help=default("First label in an odds ratio comparison"),
        default=0,
        type="int",
    )
    parser.add_option(
        "-2",
        "--label2",
        help=default("Second label in an odds ratio comparison"),
        default=1,
        type="int",
    )
    parser.add_option(
        "-w",
        "--weights",
        help=default("Whether to print weights"),
        default=False,
        action="store_true",
    )
    parser.add_option(
        "-k",
        "--smoothing",
        help=default("Smoothing parameter (ignored when using --autotune)"),
        type="float",
        default=2.0,
    )
    parser.add_option(
        "-a",
        "--autotune",
        help=default("Whether to automatically tune hyperparameters"),
        default=False,
        action="store_true",
    )
    parser.add_option(
        "-i",
        "--iterations",
        help=default("Maximum iterations to run training"),
        default=3,
        type="int",
    )
    parser.add_option(
        "-s",
        "--test",
        help=default("Amount of test data to use"),
        default=TEST_SET_SIZE,
        type="int",
    )
    parser.add_option(
        "-g",
        "--agentToClone",
        help=default("Pacman agent to copy"),
        default=None,
        type="str",
    )

    options, otherjunk = parser.parse_args(argv)
    if len(otherjunk) != 0:
        raise Exception(
            "Command line input not understood: " + str(otherjunk)
        )
    args = {}

    # Set up variables according to the command line input.
    print("Doing classification")
    print("--------------------")
    print("data:\t\t" + options.data)
    print("classifier:\t\t" + options.classifier)
    print("using enhanced features?:\t" + str(options.features))
    print("training set size:\t" + str(options.training))
    if options.data == "digits":
        printImage = ImagePrinter(
            DIGIT_DATUM_WIDTH, DIGIT_DATUM_HEIGHT
        ).printImage
        featureFunction = basicFeatureExtractorDigit
    elif options.data == "faces":
        printImage = ImagePrinter(
            FACE_DATUM_WIDTH, FACE_DATUM_HEIGHT
        ).printImage
        if options.features:
            featureFunction = enhancedFeatureExtractorFace
        else:
            featureFunction = basicFeatureExtractorFace
    elif options.data == "pacman":
        printImage = None
        if options.features:
            featureFunction = enhancedFeatureExtractorPacman
        else:
            featureFunction = basicFeatureExtractorPacman
    else:
        print("Unknown dataset", options.data)
        print(USAGE_STRING)
        sys.exit(2)

    if options.data == "digits":
        legalLabels = list(range(10))
    else:
        legalLabels = ["Stop", "West", "East", "North", "South"]

    if options.training <= 0:
        print(
            "Training set size should be a positive integer (you provided: %d)"
            % options.training
        )
        print(USAGE_STRING)
        sys.exit(2)

    if options.smoothing <= 0:
        print(
            "Please provide a positive number for smoothing (you provided: %f)"
            % options.smoothing
        )
        print(USAGE_STRING)
        sys.exit(2)

    if options.odds:
        if (
            options.label1 not in legalLabels
            or options.label2 not in legalLabels
        ):
            print(
                "Didn't provide a legal labels for the odds ratio: (%d,%d)"
                % (options.label1, options.label2)
            )
            print(USAGE_STRING)
            sys.exit(2)

    if options.classifier == "mostFrequent":
        classifier = mostFrequent.MostFrequentClassifier(legalLabels)
    elif options.classifier == "perceptron":
        if options.data != "pacman":
            classifier = perceptron.PerceptronClassifier(
                legalLabels, options.iterations
            )
        else:
            classifier = perceptron_pacman.PerceptronClassifierPacman(
                legalLabels, options.iterations
            )
    else:
        print("Unknown classifier:", options.classifier)
        print(USAGE_STRING)

        sys.exit(2)

    args["agentToClone"] = options.agentToClone

    args["classifier"] = classifier
    args["featureFunction"] = featureFunction
    args["printImage"] = printImage

    return args, options
Beispiel #14
0
            print USAGE_STRING
            sys.exit(2)

if(options.classifier == "mostFrequent"):
    classifier = mostFrequent.MostFrequentClassifier(legalLabels)
    elif(options.classifier == "naiveBayes" or options.classifier == "nb"):
        classifier = naiveBayes.NaiveBayesClassifier(legalLabels)
        classifier.setSmoothing(options.smoothing)
        if (options.autotune):
            print "using automatic tuning for naivebayes"
            classifier.automaticTuning = True
        else:
            print "using smoothing parameter k=%f for naivebayes" %  options.smoothing
elif(options.classifier == "perceptron"):
    if options.data != 'pacman':
        classifier = perceptron.PerceptronClassifier(legalLabels,options.iterations)
        else:
            classifier = perceptron_pacman.PerceptronClassifierPacman(legalLabels,options.iterations)
elif(options.classifier == "mira"):
    if options.data != 'pacman':
        classifier = mira.MiraClassifier(legalLabels, options.iterations)
        if (options.autotune):
            print "using automatic tuning for MIRA"
            classifier.automaticTuning = True
    else:
        print "using default C=0.001 for MIRA"
elif(options.classifier == 'minicontest'):
    import minicontest
        classifier = minicontest.contestClassifier(legalLabels)
    else:
        print "Unknown classifier:", options.classifier
def readCommand( argv ):
    "Processes the command used to run from the command line."
    from optparse import OptionParser    
    parser = OptionParser(USAGE_STRING)
    
    parser.add_option('-c', '--classifier', help=default('The type of classifier'), choices=['naiveBayes', 'perceptron', 'kNN'], default='naiveBayes')
    parser.add_option('-d', '--data', help=default('Dataset to use'), choices=['digits', 'faces'], default='digits')
    parser.add_option('-t', '--training', help=default('The size of the training set'), default=100, type="int")
    parser.add_option('-w', '--weights', help=default('Whether to print weights'), default=False, action="store_true")
    parser.add_option('-k', '--neighbors', help=default("Numbers of neighbors in k-Nearest Neighbors"), type="int", default=3)
    parser.add_option('-i', '--iterations', help=default("Maximum iterations to run training"), default=3, type="int")
    parser.add_option('-s', '--test', help=default("Amount of test data to use"), default=TEST_SET_SIZE, type="int")

    options, otherjunk = parser.parse_args(argv)
    if len(otherjunk) != 0: raise Exception('Command line input not understood: ' + str(otherjunk))
    args = {}
    
    # Set up variables according to the command line input.
    print "Doing classification"
    print "--------------------"
    print "data:\t\t" + options.data
    print "classifier:\t\t" + options.classifier
    print "training set size:\t" + str(options.training)
    if(options.data=="digits"):
        printImage = ImagePrinter(DIGIT_DATUM_WIDTH, DIGIT_DATUM_HEIGHT).printImage
        featureFunction = basicFeatureExtractorDigit
    elif(options.data=="faces"):
        printImage = ImagePrinter(FACE_DATUM_WIDTH, FACE_DATUM_HEIGHT).printImage
        featureFunction = basicFeatureExtractorFace            
    else:
        print "Unknown dataset", options.data
        print USAGE_STRING
        sys.exit(2)
        
    if(options.data=="digits"):
        legalLabels = range(10)
    else:
        legalLabels = range(2)
        
    if options.training <= 0:
        print "Training set size should be a positive integer (you provided: %d)" % options.training
        print USAGE_STRING
        sys.exit(2)

    if options.neighbors <= 0:
        print "Neighbors for kNN should be a positive integer (you provided: %d)" % options.neighbors
        print USAGE_STRING
        sys.exit(2)
        
    if(options.classifier == "naiveBayes"):
        classifier = naiveBayes.NaiveBayesClassifier(legalLabels)
    elif(options.classifier == "perceptron"):
        classifier = perceptron.PerceptronClassifier(legalLabels,options.iterations)
    elif(options.classifier == "kNN"):
        classifier = kNN.kNNClassifier(legalLabels,options.neighbors)
    else:
        print "Unknown classifier:", options.classifier
        print USAGE_STRING
        
        sys.exit(2)

    args['classifier'] = classifier
    args['featureFunction'] = featureFunction
    args['printImage'] = printImage
    
    return args, options
def runClassifier():
    ########################################################################################################################################
    #Edited Code
    #Store info for each iteration
    nbDigits = [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
    nbFaces = [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
    perceptronDigits = [
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
    ]
    perceptronFaces = [
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
    ]
    trainingCounts = {
        0: 500,
        1: 1000,
        2: 1500,
        3: 2000,
        4: 2500,
        5: 3000,
        6: 3500,
        7: 4000,
        8: 4500,
        9: 5000,
        10: 500,
        11: 1000,
        12: 1500,
        13: 2000,
        14: 2500,
        15: 3000,
        16: 3500,
        17: 4000,
        18: 4500,
        19: 5000,
        20: 45,
        21: 90,
        22: 135,
        23: 180,
        24: 225,
        25: 270,
        26: 315,
        27: 360,
        28: 405,
        29: 450,
        30: 45,
        31: 90,
        32: 135,
        33: 180,
        34: 225,
        35: 270,
        36: 315,
        37: 360,
        38: 405,
        39: 450
    }
    #FaceData
    rawFaceTrainingData = samples.loadDataFile("facedata/facedatatrain", 450,
                                               FACE_DATUM_WIDTH,
                                               FACE_DATUM_HEIGHT)
    faceTrainingLabels = samples.loadLabelsFile("facedata/facedatatrainlabels",
                                                450)

    rawFaceValidationData = samples.loadDataFile("facedata/facedatatrain", 300,
                                                 FACE_DATUM_WIDTH,
                                                 FACE_DATUM_HEIGHT)
    faceValidationLabels = samples.loadLabelsFile(
        "facedata/facedatatrainlabels", 300)

    rawFaceTestData = samples.loadDataFile("facedata/facedatatest", 149,
                                           FACE_DATUM_WIDTH, FACE_DATUM_HEIGHT)
    testFaceLabels = samples.loadLabelsFile("facedata/facedatatestlabels", 149)

    #DigitData
    rawDigitTrainingData = samples.loadDataFile("digitdata/trainingimages",
                                                5000, DIGIT_DATUM_WIDTH,
                                                DIGIT_DATUM_HEIGHT)
    digitTrainingLabels = samples.loadLabelsFile("digitdata/traininglabels",
                                                 5000)

    rawDigitValidationData = samples.loadDataFile("digitdata/validationimages",
                                                  1000, DIGIT_DATUM_WIDTH,
                                                  DIGIT_DATUM_HEIGHT)
    digitValidationLabels = samples.loadLabelsFile(
        "digitdata/validationlabels", 1000)

    rawDigitTestData = samples.loadDataFile("digitdata/testimages", 1000,
                                            DIGIT_DATUM_WIDTH,
                                            DIGIT_DATUM_HEIGHT)
    testDigitLabels = samples.loadLabelsFile("digitdata/testlabels", 1000)

    #Automation of test for each classifier and data type
    for x in range(40):

        if x < 10:
            classifierName = "nb"
        elif x < 20:
            classifierName = "perceptron"
        elif x < 30:
            classifierName = "nb"
        else:
            classifierName = "perceptron"

        if x < 20:
            Data = "digits"
        else:
            Data = "faces"

        if (Data == "digits"):
            legalLabels = range(10)
            #featureFunction = enhancedFeatureExtractorDigit
            featureFunction = basicFeatureExtractorDigit
        else:
            legalLabels = range(2)
            #featureFunction = enhancedFeatureExtractorFace
            featureFunction = basicFeatureExtractorFace

        if (classifierName == "nb"):
            classifier = naiveBayes.NaiveBayesClassifier(legalLabels)
            classifier.setSmoothing(2.0)

        elif (classifierName == "perceptron"):
            classifier = perceptron.PerceptronClassifier(legalLabels, 3)

        print("Doing classification")
        print("--------------------")
        print("data:\t\t" + Data)
        print("classifier:\t\t " + classifierName)
        print("using enhanced features")
        print("training set size:\t" + str(trainingCounts[x]))

        # Extract features
        print("Extracting features...")
        # Load data

        if Data == "digits":
            startTime = time.process_time()
            h = 0
            while h < 3:
                print("Iteration %d" % h)
                numTraining = trainingCounts[x]
                rawTrainingData = []
                rawTrainingLabels = []
                i = 0
                while i < numTraining:
                    k = list(range(0, 5000))
                    random.shuffle(k)
                    j = k.pop()
                    rawTrainingLabels.append(digitTrainingLabels[j])
                    rawTrainingData.append(rawDigitTrainingData[j])
                    i += 1

                trainingData = list(map(featureFunction, rawTrainingData))
                validationData = list(
                    map(featureFunction, rawDigitValidationData))
                testData = list(map(featureFunction, rawDigitTestData))

                print("Training...")
                classifier.train(trainingData, rawTrainingLabels,
                                 validationData, digitValidationLabels)
                print("Validating...")
                guesses = classifier.classify(validationData)
                correct = [
                    guesses[i] == digitValidationLabels[i]
                    for i in range(len(digitValidationLabels))
                ].count(True)
                print(str(correct),
                      ("correct out of " + str(len(digitValidationLabels)) +
                       " (%.1f%%).") %
                      (100.0 * correct / len(digitValidationLabels)))
                print("Testing...")
                guesses = classifier.classify(testData)
                correct = [
                    guesses[i] == testDigitLabels[i]
                    for i in range(len(testDigitLabels))
                ].count(True)
                print(str(correct),
                      ("correct out of " + str(len(testDigitLabels)) +
                       " (%.1f%%).") %
                      (100.0 * correct / len(testDigitLabels)))
                h += 1
                #Gather correct count for each iteration and use to compute standard deviation
                if classifierName == "nb":
                    if Data == "digits":
                        nbDigits[x % 10] += correct
                        nbDigits[(x % 10) +
                                 10] += time.process_time() - startTime
                    else:
                        nbFaces[x % 10] += correct
                        nbFaces[(x % 10) +
                                10] += time.process_time() - startTime
                else:
                    if Data == "digits":
                        perceptronDigits[x % 10] += correct
                        perceptronDigits[(x % 10) +
                                         10] += time.process_time() - startTime
                    else:
                        perceptronFaces[x % 10] += correct
                        perceptronFaces[(x % 10) +
                                        10] += time.process_time() - startTime
        else:
            h = 0
            while h < 3:
                print("Iteration %d" % h)
                numTraining = trainingCounts[x]
                rawTrainingData = []
                rawTrainingLabels = []
                i = 0
                while i < numTraining:
                    k = list(range(0, 450))
                    random.shuffle(k)
                    j = k.pop()
                    rawTrainingLabels.append(faceTrainingLabels[j])
                    rawTrainingData.append(rawFaceTrainingData[j])
                    i += 1
                trainingData = list(map(featureFunction, rawTrainingData))
                validationData = list(
                    map(featureFunction, rawFaceValidationData))
                testData = list(map(featureFunction, rawFaceTestData))

                print("Training...")
                classifier.train(trainingData, rawTrainingLabels,
                                 validationData, faceValidationLabels)
                print("Validating...")
                guesses = classifier.classify(validationData)
                correct = [
                    guesses[i] == faceValidationLabels[i]
                    for i in range(len(faceValidationLabels))
                ].count(True)
                print(str(correct),
                      ("correct out of " + str(len(faceValidationLabels)) +
                       " (%.1f%%).") %
                      (100.0 * correct / len(faceValidationLabels)))
                print("Testing...")
                guesses = classifier.classify(testData)
                correct = [
                    guesses[i] == testFaceLabels[i]
                    for i in range(len(testFaceLabels))
                ].count(True)
                print(str(correct),
                      ("correct out of " + str(len(testFaceLabels)) +
                       " (%.1f%%).") % (100.0 * correct / len(testFaceLabels)))
                h += 1
                #Gather correct count for each iteration and use to compute standard deviation
                if classifierName == "nb":
                    if Data == "digits":
                        nbDigits[x % 10] += correct
                        nbDigits[(x % 10) +
                                 10] += time.process_time() - startTime
                    else:
                        nbFaces[x % 10] += correct
                        nbFaces[(x % 10) +
                                10] += time.process_time() - startTime
                else:
                    if Data == "digits":
                        perceptronDigits[x % 10] += correct
                        perceptronDigits[(x % 10) +
                                         10] += time.process_time() - startTime
                    else:
                        perceptronFaces[x % 10] += correct
                        perceptronFaces[(x % 10) +
                                        10] += time.process_time() - startTime

    #NAIVE BAYES DIGITS
    print(
        "Average Correct Guesses for Naive Bayes Digits Based on Percentage of TrainingData Used"
    )
    print(
        "10%% %d/1000, 20%% %d/1000, 30%% %d/1000, 40%% %d/1000, 50%% %d/1000, 60%% %d/1000, 70%% %d/1000, 80%% %d/1000, 90%% %d/1000, 100%% %d/1000"
        % (nbDigits[0] / 3, nbDigits[1] / 3, nbDigits[2] / 3, nbDigits[3] / 3,
           nbDigits[4] / 3, nbDigits[5] / 3, nbDigits[6] / 3, nbDigits[7] / 3,
           nbDigits[8] / 3, nbDigits[9] / 3))
    print(
        "Standard Deviation for Naive Bayes Digits Based on Percentage of Training Data Used"
    )
    stndDev = [0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
    i = 0
    while i < 10:
        stndDev[i] = nbDigits[i] / 3
        stndDev[i] = nbDigits[i] - stndDev[i]
        stndDev[i] = math.pow(stndDev[i], 2)
        stndDev[i] = stndDev[i] / 1000
        stndDev[i] = math.sqrt(stndDev[i])
        i += 1
    print(
        "10%% %d, 20%% %d, 30%% %d, 40%% %d, 50%% %d, 60%% %d, 70%% %d, 80%% %d, 90%% %d, 100%% %d"
        % (stndDev[0], stndDev[1], stndDev[2], stndDev[3], stndDev[4],
           stndDev[5], stndDev[6], stndDev[7], stndDev[8], stndDev[9]))
    print(
        "Average Time to Complete Each Iteration Based on Percentage of Training Data Used In Seconds"
    )
    print(
        "10%% %d seconds, 20%% %d seconds, 30%% %d seconds, 40%% %d seconds, 50%% %d seconds, 60%% %d seconds, 70%% %d seconds, 80%% %d seconds, 90%% %d seconds, 100%% %d seconds"
        % (nbDigits[10] / 3, nbDigits[11] / 3, nbDigits[12] / 3, nbDigits[13] /
           3, nbDigits[14] / 3, nbDigits[15] / 3, nbDigits[16] / 3,
           nbDigits[17] / 3, nbDigits[18] / 3, nbDigits[19] / 3))

    #NAIVE BAYES FACES
    print(
        "Average Correct Guesses for Naive Bayes Faces Based on Percentage of TrainingData Used"
    )
    print(
        "10%% %d/149, 20%% %d/149, 30%% %d/149, 40%% %d/149, 50%% %d/149, 60%% %d/149, 70%% %d/149, 80%% %d/149, 90%% %d/149, 100%% %d/149"
        % (nbFaces[0] / 3, nbFaces[1] / 3, nbFaces[2] / 3, nbFaces[3] / 3,
           nbFaces[4] / 3, nbFaces[5] / 3, nbFaces[6] / 3, nbFaces[7] / 3,
           nbFaces[8] / 3, nbFaces[9] / 3))
    stndDev = [0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
    i = 0
    while i < 10:
        stndDev[i] = nbFaces[i] / 3
        stndDev[i] = nbFaces[i] - stndDev[i]
        stndDev[i] = math.pow(stndDev[i], 2)
        stndDev[i] = stndDev[i] / 149
        stndDev[i] = math.sqrt(stndDev[i])
        i += 1
    print(
        "Standard Deviation for Naive Bayes Faces Based on Percentage of Training Data Used"
    )
    print(
        "10%% %d, 20%% %d, 30%% %d, 40%% %d, 50%% %d, 60%% %d, 70%% %d, 80%% %d, 90%% %d, 100%% %d"
        % (stndDev[0], stndDev[1], stndDev[2], stndDev[3], stndDev[4],
           stndDev[5], stndDev[6], stndDev[7], stndDev[8], stndDev[9]))
    print(
        "Time to Complete Each Iteration Based on Percentage of Training Data Used In Seconds"
    )
    print(
        "10%% %d, 20%% %d, 30%% %d, 40%% %d, 50%% %d, 60%% %d, 70%% %d, 80%% %d, 90%% %d, 100%% %d"
        % (nbFaces[10] / 3, nbFaces[11] / 3, nbFaces[12] / 3, nbFaces[13] / 3,
           nbFaces[14] / 3, nbFaces[15] / 3, nbFaces[16] / 3, nbFaces[17] / 3,
           nbFaces[18] / 3, nbFaces[19] / 3))

    #PERCEPTRON DIGITS
    print(
        "Average Correct Guesses for Perceptron Digits Based on Percentage of Training Data Used"
    )
    print(
        "10%% %d/1000, 20%% %d/1000, 30%% %d/1000, 40%% %d/1000, 50%% %d/1000, 60%% %d/1000, 70%% %d/1000, 80%% %d/1000, 90%% %d/1000, 100%% %d/1000"
        % (perceptronDigits[0] / 3, perceptronDigits[1] / 3,
           perceptronDigits[2] / 3, perceptronDigits[3] / 3,
           perceptronDigits[4] / 3, perceptronDigits[5] / 3,
           perceptronDigits[6] / 3, perceptronDigits[7] / 3,
           perceptronDigits[8] / 3, perceptronDigits[9] / 3))
    print(
        "Standard Deviation for Perceptron Digits Based on Percentage of Training Data Used"
    )
    stndDev = [0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
    i = 0
    while i < 10:
        stndDev[i] = perceptronDigits[i] / 3
        stndDev[i] = perceptronDigits[i] - stndDev[i]
        stndDev[i] = math.pow(stndDev[i], 2)
        stndDev[i] = stndDev[i] / 1000
        stndDev[i] = math.sqrt(stndDev[i])
        i += 1
    print(
        "10%% %d, 20%% %d, 30%% %d, 40%% %d, 50%% %d, 60%% %d, 70%% %d, 80%% %d, 90%% %d, 100%% %d"
        % (stndDev[0], stndDev[1], stndDev[2], stndDev[3], stndDev[4],
           stndDev[5], stndDev[6], stndDev[7], stndDev[8], stndDev[9]))
    print(
        "Time to Complete Each Iteration Based on Percentage of Training Data Used In Seconds"
    )
    print(
        "10%% %d, 20%% %d, 30%% %d, 40%% %d, 50%% %d, 60%% %d, 70%% %d, 80%% %d, 90%% %d, 100%% %d"
        % (perceptronDigits[10] / 3, perceptronDigits[11] / 3,
           perceptronDigits[12] / 3, perceptronDigits[13] / 3,
           perceptronDigits[14] / 3, perceptronDigits[15] / 3,
           perceptronDigits[16] / 3, perceptronDigits[17] / 3,
           perceptronDigits[18] / 3, perceptronDigits[19] / 3))

    #PERCEPTRON FACES
    print(
        "Average Correct Guesses for Perceptron Faces Based on Percentage of Training Data Used"
    )
    print(
        "10%% %d/149, 20%% %d/149, 30%% %d/149, 40%% %d/149, 50%% %d/149, 60%% %d/149, 70%% %d/149, 80%% %d/149, 90%% %d/149, 100%% %d/149"
        % (perceptronFaces[0] / 3, perceptronFaces[1] / 3, perceptronFaces[2] /
           3, perceptronFaces[3] / 3, perceptronFaces[4] / 3,
           perceptronFaces[5] / 3, perceptronFaces[6] / 3, perceptronFaces[7] /
           3, perceptronFaces[8] / 3, perceptronFaces[9] / 3))
    print(
        "Standard Deviation for Perceptron Faces Based on Percentage of Training Data Used"
    )
    stndDev = [0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
    i = 0
    while i < 10:
        stndDev[i] = perceptronFaces[i] / 3
        stndDev[i] = perceptronFaces[i] - stndDev[i]
        stndDev[i] = math.pow(stndDev[i], 2)
        stndDev[i] = stndDev[i] / 149
        stndDev[i] = math.sqrt(stndDev[i])
        i += 1
    print(
        "10%% %d, 20%% %d, 30%% %d, 40%% %d, 50%% %d, 60%% %d, 70%% %d, 80%% %d, 90%% %d, 100%% %d"
        % (stndDev[0], stndDev[1], stndDev[2], stndDev[3], stndDev[4],
           stndDev[5], stndDev[6], stndDev[7], stndDev[8], stndDev[9]))
    print(
        "Time to Complete Each Iteration Based on Percentage of Training Data Used In Seconds"
    )
    print(
        "10%% %d, 20%% %d, 30%% %d, 40%% %d, 50%% %d, 60%% %d, 70%% %d, 80%% %d, 90%% %d, 100%% %d"
        % (perceptronFaces[10] / 3, perceptronFaces[11] / 3,
           perceptronFaces[12] / 3, perceptronFaces[13] / 3,
           perceptronFaces[14] / 3, perceptronFaces[15] / 3,
           perceptronFaces[16] / 3, perceptronFaces[17] / 3,
           perceptronFaces[18] / 3, perceptronFaces[19] / 3))
def readCommand(argv):
    "Processes the command used to run from the command line."
    from optparse import OptionParser
    parser = OptionParser(USAGE_STRING)

    parser.add_option('-c',
                      '--classifier',
                      help=default('The type of classifier'),
                      choices=['perceptron'],
                      default='perceptron')
    parser.add_option('-t',
                      '--training',
                      help=default('The size of the training set'),
                      default=1000,
                      type="int")
    parser.add_option('-f',
                      '--features',
                      help=default('Whether to use enhanced features'),
                      default=False,
                      action="store_true")
    parser.add_option(
        '-k',
        '--smoothing',
        help=default("Smoothing parameter (ignored when using --autotune)"),
        type="float",
        default=2.0)
    parser.add_option(
        '-a',
        '--autotune',
        help=default("Whether to automatically tune hyperparameters"),
        default=False,
        action="store_true")
    parser.add_option('-i',
                      '--iterations',
                      help=default("Maximum iterations to run training"),
                      default=3,
                      type="int")
    parser.add_option('-s',
                      '--test',
                      help=default("Amount of test data to use"),
                      default=TEST_SET_SIZE,
                      type="int")
    parser.add_option(
        '-v',
        '--validate',
        help=default("Whether to validate when training (for graphs)"),
        default=False,
        action="store_true")

    options, otherjunk = parser.parse_args(argv)
    if len(otherjunk) != 0:
        raise Exception('Command line input not understood: ' + str(otherjunk))
    args = {}

    # Set up variables according to the command line input.
    print "Doing classification"
    print "--------------------"
    print "classifier:\t\t" + options.classifier
    print "using enhanced features?:\t" + str(options.features)
    print "training set size:\t" + str(options.training)

    printImage = ImagePrinter(DIGIT_DATUM_WIDTH, DIGIT_DATUM_HEIGHT).printImage
    if (options.features):
        featureFunction = enhancedFeatureExtractorDigit
    else:
        featureFunction = basicFeatureExtractorDigit

    legalLabels = range(10)

    if options.training <= 0:
        print "Training set size should be a positive integer (you provided: %d)" % options.training
        print USAGE_STRING
        sys.exit(2)

    if options.smoothing <= 0:
        print "Please provide a positive number for smoothing (you provided: %f)" % options.smoothing
        print USAGE_STRING
        sys.exit(2)

    if (options.classifier == "perceptron"):
        classifier = perceptron.PerceptronClassifier(legalLabels,
                                                     options.iterations)
    else:
        print "Unknown classifier:", options.classifier
        print USAGE_STRING
        sys.exit(2)

    args['classifier'] = classifier
    args['featureFunction'] = featureFunction
    args['printImage'] = printImage

    return args, options
Beispiel #18
0
def readCommand(argv):
    "Processes the command used to run from the command line."
    from optparse import OptionParser
    parser = OptionParser(USAGE_STRING)

    parser.add_option(
        '-c',
        '--classifier',
        help=default('The type of classifier'),
        choices=['mostFrequent', 'nb', 'naiveBayes', 'perceptron', 'knn'],
        default='mostFrequent')
    parser.add_option('-d',
                      '--data',
                      help=default('Dataset to use'),
                      choices=['digits', 'faces'],
                      default='digits')
    parser.add_option('-t',
                      '--training',
                      help=default('The size of the training set'),
                      default=100,
                      type="int")
    parser.add_option('-f',
                      '--features',
                      help=default('Whether to use enhanced features'),
                      default=False,
                      action="store_true")
    parser.add_option('-o',
                      '--odds',
                      help=default('Whether to compute odds ratios'),
                      default=False,
                      action="store_true")
    parser.add_option('-1',
                      '--label1',
                      help=default("First label in an odds ratio comparison"),
                      default=0,
                      type="int")
    parser.add_option('-2',
                      '--label2',
                      help=default("Second label in an odds ratio comparison"),
                      default=1,
                      type="int")
    parser.add_option('-w',
                      '--weights',
                      help=default('Whether to print weights'),
                      default=False,
                      action="store_true")
    parser.add_option(
        '-k',
        '--smoothing',
        help=default("Smoothing parameter (ignored when using --autotune)"),
        type="float",
        default=K_VALUE)
    parser.add_option(
        '-a',
        '--autotune',
        help=default("Whether to automatically tune hyperparameters"),
        default=False,
        action="store_true")
    parser.add_option('-i',
                      '--iterations',
                      help=default("Maximum iterations to run training"),
                      default=MAX_ITERATIONS,
                      type="int")
    parser.add_option('-s',
                      '--test',
                      help=default("Amount of test data to use"),
                      default=TEST_SET_SIZE,
                      type="int")
    parser.add_option(
        '-q',
        '--index',
        help=default(
            "index of data whose predicted label and actual label you want to display"
        ),
        default=-1,
        type="int")
    options, otherjunk = parser.parse_args(argv)
    if len(otherjunk) != 0:
        raise Exception('Command line input not understood: ' + str(otherjunk))
    args = {}

    # Set up variables according to the command line input.
    print("Doing classification")
    print("--------------------")
    print("data:\t\t", options.data)
    print("classifier:\t\t", options.classifier)
    print("training set size:\t" + str(options.training))
    if (options.data == "digits"):
        printImage = ImagePrinter(DIGIT_DATUM_WIDTH,
                                  DIGIT_DATUM_HEIGHT).printImage
        if (options.features):
            featureFunction = enhancedFeatureExtractorDigit
        else:
            print('using basicFeatureExtractorDigit for digits')
            featureFunction = basicFeatureExtractorDigit
    elif (options.data == "faces"):
        printImage = ImagePrinter(FACE_DATUM_WIDTH,
                                  FACE_DATUM_HEIGHT).printImage
        if (options.features):
            featureFunction = enhancedFeatureExtractorFace
        else:
            print('using basicFeatureExtractorDigit for faces')
            featureFunction = basicFeatureExtractorFace
    else:
        print("Unknown dataset", options.data)
        print(USAGE_STRING)
        sys.exit(2)

    if (options.data == "digits"):
        legalLabels = range(10)
    else:
        legalLabels = range(2)

    if options.training <= 0:
        print(
            "Training set size should be a positive integer (you provided: %d)",
            options.training)
        print(USAGE_STRING)
        sys.exit(2)

    if options.smoothing <= 0:
        print(
            "Please provide a positive number for smoothing (you provided: %f)",
            options.smoothing)
        print(USAGE_STRING)
        sys.exit(2)

    if options.odds:
        if options.label1 not in legalLabels or options.label2 not in legalLabels:
            print("Didn't provide a legal labels for the odds ratio: (%d,%d)" %
                  (options.label1, options.label2))
            print(USAGE_STRING)
            sys.exit(2)

    if (options.classifier == "mostFrequent"):
        classifier = mostFrequent.MostFrequentClassifier(legalLabels)
    elif (options.classifier == "naiveBayes" or options.classifier == "nb"):
        classifier = naiveBayes.NaiveBayesClassifier(legalLabels)
        classifier.setSmoothing(options.smoothing)
        if (options.autotune):
            print("using automatic tuning for naivebayes")
            classifier.automaticTuning = True
        else:
            print("using smoothing parameter k=%f for naivebayes",
                  options.smoothing)
    elif (options.classifier == "perceptron"):
        classifier = perceptron.PerceptronClassifier(legalLabels,
                                                     options.iterations)
    elif (options.classifier == 'knn'):
        if (options.data == "digits"):
            classifier = knn.KNNClassifier(legalLabels, options.smoothing)
        else:
            classifier = knn_faces.KNNClassifierFaces(legalLabels,
                                                      options.smoothing)
    else:
        print("Unknown classifier:", options.classifier)
        print(USAGE_STRING)

        sys.exit(2)

    args['classifier'] = classifier
    args['featureFunction'] = featureFunction
    args['printImage'] = printImage

    return args, options
def read_command(argv):
    "Processes the command used to run from the command line."
    from optparse import OptionParser
    parser = OptionParser(USAGE_STRING)

    parser.add_option('-c',
                      '--classifier',
                      help=default('The type of classifier'),
                      choices=[
                          'most_frequent', 'nb', 'naive_bayes', 'perceptron',
                          'perceptron_numpy', 'logistic', 'minicontest'
                      ],
                      default='most_frequent')
    parser.add_option('-d',
                      '--data',
                      help=default('Dataset to use'),
                      choices=['digits', 'faces', 'pacman'],
                      default='digits')
    parser.add_option('-t',
                      '--training',
                      help=default('The size of the training set'),
                      default=100,
                      type="int")
    parser.add_option('-f',
                      '--features',
                      help=default('Whether to use enhanced features'),
                      default=False,
                      action="store_true")
    parser.add_option('-o',
                      '--odds',
                      help=default('Whether to compute odds ratios'),
                      default=False,
                      action="store_true")
    parser.add_option('-1',
                      '--label1',
                      help=default("First label in an odds ratio comparison"),
                      default=0,
                      type="int")
    parser.add_option('-2',
                      '--label2',
                      help=default("Second label in an odds ratio comparison"),
                      default=1,
                      type="int")
    parser.add_option('-w',
                      '--weights',
                      help=default('Whether to print weights'),
                      default=False,
                      action="store_true")
    parser.add_option(
        '-n',
        '--num_weights',
        help=default(
            "Num Weights to Print (when --weights enabled), default: 100"),
        default=100,
        type="int")
    parser.add_option(
        '-k',
        '--smoothing',
        help=default("Smoothing parameter (ignored when using --autotune)"),
        type="float",
        default=2.0)
    parser.add_option(
        '-a',
        '--autotune',
        help=default("Whether to automatically tune hyperparameters"),
        default=False,
        action="store_true")
    parser.add_option('-i',
                      '--iterations',
                      help=default("Maximum iterations to run training"),
                      default=3,
                      type="int")
    parser.add_option('-s',
                      '--test',
                      help=default("Amount of test data to use"),
                      default=TEST_SET_SIZE,
                      type="int")
    parser.add_option('-g',
                      '--agent_to_clone',
                      help=default("Pacman agent to copy"),
                      default=None,
                      type="str")
    parser.add_option(
        '-l',
        '--learning_rates',
        help=default(
            "Learning rates to use for gradient descent, can be a comma separated list or single value"
        ),
        default=[0.2],
        type="str",
        action='callback',
        callback=learning_rate_callback)

    options, otherjunk = parser.parse_args(argv)
    if len(otherjunk) != 0:
        raise Exception('Command line input not understood: ' + str(otherjunk))
    args = {}

    # Set up variables according to the command line input.
    print("Doing classification")
    print("--------------------")
    print("data:\t\t" + options.data)
    print("classifier:\t\t" + options.classifier)
    if not options.classifier == 'minicontest':
        print("using enhanced features?:\t" + str(options.features))
    else:
        print("using minicontest feature extractor")
    print("training set size:\t" + str(options.training))
    if (options.data == "digits"):
        print_image = ImagePrinter(DIGIT_DATUM_WIDTH,
                                   DIGIT_DATUM_HEIGHT).print_image
        if (options.features):
            feature_function = enhanced_feature_extractor_digit
        else:
            feature_function = basic_feature_extractor_digit
        if (options.classifier == 'minicontest'):
            feature_function = contest_feature_extractor_digit
    elif (options.data == "faces"):
        print_image = ImagePrinter(FACE_DATUM_WIDTH,
                                   FACE_DATUM_HEIGHT).print_image
        if (options.features):
            feature_function = enhanced_feature_extractor_face
        else:
            feature_function = basic_feature_extractor_face
    elif (options.data == "pacman"):
        print_image = None
        if (options.features):
            feature_function = enhanced_feature_extractor_pacman
        else:
            feature_function = basic_feature_extractor_pacman
    else:
        print("Unknown dataset", options.data)
        print(USAGE_STRING)
        sys.exit(2)

    if (options.data == "digits"):
        legal_labels = list(range(10))
    else:
        legal_labels = ['Stop', 'West', 'East', 'North', 'South']

    if options.training <= 0:
        print(
            "Training set size should be a positive integer (you provided: %d)"
            % options.training)
        print(USAGE_STRING)
        sys.exit(2)

    if options.smoothing <= 0:
        print(
            "Please provide a positive number for smoothing (you provided: %f)"
            % options.smoothing)
        print(USAGE_STRING)
        sys.exit(2)

    if options.odds:
        if options.label1 not in legal_labels or options.label2 not in legal_labels:
            print("Didn't provide a legal labels for the odds ratio: (%d,%d)" %
                  (options.label1, options.label2))
            print(USAGE_STRING)
            sys.exit(2)

    if (options.classifier == "most_frequent"):
        classifier = most_frequent.MostFrequentClassifier(legal_labels)
    elif (options.classifier == "naive_bayes" or options.classifier == "nb"):
        classifier = naive_bayes.NaiveBayesClassifier(legal_labels)
        classifier.set_smoothing(options.smoothing)
        if (options.autotune):
            print("using automatic tuning for naivebayes")
            classifier.automatic_tuning = True
        else:
            print("using smoothing parameter k=%f for naivebayes" %
                  options.smoothing)
    elif (options.classifier == "perceptron"):
        if options.data != 'pacman':
            classifier = perceptron.PerceptronClassifier(
                legal_labels, options.iterations)
        else:
            classifier = perceptron_pacman.PerceptronClassifierPacman(
                legal_labels, options.iterations)
    elif (options.classifier == "perceptron_numpy"):
        if options.data != 'pacman':
            classifier = perceptron_numpy.OptimizedPerceptronClassifier(
                legal_labels, options.iterations)
    elif (options.classifier == "logistic"):
        if options.data != 'pacman':
            classifier = logistic.SoftmaxClassifier(legal_labels,
                                                    options.iterations)
            classifier.learning_rates = options.learning_rates

    elif (options.classifier == 'minicontest'):
        import minicontest
        classifier = minicontest.contest_classifier(legal_labels)
    else:
        print("Unknown classifier:", options.classifier)
        print(USAGE_STRING)

        sys.exit(2)

    args['agent_to_clone'] = options.agent_to_clone

    args['classifier'] = classifier
    args['feature_function'] = feature_function
    args['print_image'] = print_image

    return args, options
Beispiel #20
0
def runClassifier():
    global TK_ROOT, SP_CANVAS, LOG_X, LOG_Y

    # Set up variables according to the command line inputs
    featureFunction = basicFeatureExtractorDigit

    legalLabels = range(10)  # number of labels

    # Select classifier
    classifier = perceptron.PerceptronClassifier(legalLabels)

    # Load data
    numTraining = 1

    loadImage()

    rawTrainingData = samples.loadDataFile("digitdata/trainingimages",
                                           numTraining, DIGIT_DATUM_WIDTH,
                                           DIGIT_DATUM_HEIGHT, 'train',
                                           SP_CANVAS)

    trainingLabels = samples.loadLabelsFile("digitdata/traininglabels",
                                            numTraining)

    rawTestData = samples.loadDataFile("digitdata/testingimages",
                                       TEST_SET_SIZE, DIGIT_DATUM_WIDTH,
                                       DIGIT_DATUM_HEIGHT, 'test', SP_CANVAS)
    testLabels = samples.loadLabelsFile("digitdata/testlabels", TEST_SET_SIZE)

    # Extract features

    print rawTestData
    trainingData = map(basicFeatureExtractorDigit, rawTrainingData)
    print "cp3"
    testData = map(basicFeatureExtractorDigit, rawTestData)

    # Conduct auto training
    SP_CANVAS.create_text(LOG_X,
                          LOG_Y,
                          text="Auto Training...",
                          anchor=NW,
                          font=tkFont.Font(size=-14))
    LOG_Y += 15
    classifier.train(trainingData, trainingLabels, SP_CANVAS)

    # Auto Testing
    # print "Validating..."
    #  guesses = classifier.classify(validationData)
    #  correct = [guesses[i] == validationLabels[i] for i in range(len(validationLabels))].count(True)
    # print str(correct), ("correct out of " + str(len(validationLabels)) + " (%.1f%%).") % (100.0 * correct / len(validationLabels))

    # User Input Testing
    SP_CANVAS.create_text(LOG_X,
                          LOG_Y,
                          text="Recognizing...",
                          anchor=NW,
                          font=tkFont.Font(size=-14))
    LOG_Y += 15
    guesses = classifier.classify(testData, SP_CANVAS, "usr")

    # Completion Notify
    SP_CANVAS.create_text(LOG_X,
                          LOG_Y + 30,
                          text="Completed...",
                          anchor=NW,
                          font=tkFont.Font(size=-14))
    LOG_Y += 15
Beispiel #21
0
def runClassifier():
    """
  Harness code for running different classifiers on the face or digit data.
  
  This is the main function for classification, and is designed
  to be invoked from the command line (outside the Python interpreter).
  
  Usage:
    > python dataClassifier.py 
    OR
    > python dataClassifier.py <data> <classifierName>
    OR
    > python dataClassifier.py <data> <classifierName> <featureFunction>
    OR
    > python dataClassifier.py <data> <classifierName> <featureFunction> <numTrainingExamples>
    OR
    > python dataClassifier.py <data> <classifierName> <featureFunction> <numTrainingExamples> <odds class1 class2>
    
  For example:
    > python dataClassifier.py digits naivebayes basic 1000
    
  would run the naive Bayes classifier on 1000 training examples using the
  basicFeatureExtractor function, and then test the classifier on the test data.
  """
    print "Doing classification"
    print "--------------------"
    # Assign default values for arguments if they are not provided.
    if (len(sys.argv) == 1):
        print "No data specified; using digits."
        sys.argv.append("digits")
    if (len(sys.argv) == 2):
        print "No classifier specified; using default."
        sys.argv.append("mostfrequent")
    if (len(sys.argv) == 3):
        print "No feature extraction function specified; using default."
        sys.argv.append("basic")
    if (len(sys.argv) == 4):
        print "No training set size specified; using default."
        sys.argv.append("100")
    if (len(sys.argv) == 5):
        print "Not doing odds ratio computation."
        sys.argv.append("noodds")

    # Set up variables according to the command line input.
    print "data:\t\t" + sys.argv[1]
    print "classifier:\t\t" + sys.argv[2]
    print "feature extractor:\t" + sys.argv[3]
    print "training set size:\t" + sys.argv[4]
    if ((sys.argv[1] == "digits") & (sys.argv[3] == "basic")):
        featureFunction = basicFeatureExtractorDigit
    elif ((sys.argv[1] == "faces") & (sys.argv[3] == "basic")):
        featureFunction = basicFeatureExtractorFace
    elif ((sys.argv[1] == "digits") & (sys.argv[3] == "enhanced")):
        featureFunction = enhancedFeatureExtractorDigit
    elif ((sys.argv[1] == "faces") & (sys.argv[3] == "enhanced")):
        featureFunction = enhancedFeatureExtractorFace
    else:
        print "Unknown feature function:", sys.argv[2]
        return

    if (sys.argv[1] == "digits"):  # if digits detect
        legalLabels = range(10)
    else:  # if face detect
        legalLabels = range(2)

    if (sys.argv[2] == "mostfrequent"):
        classifier = mostFrequent.MostFrequentClassifier(legalLabels)
    elif (sys.argv[2] == "naivebayes"):
        classifier = naiveBayes.NaiveBayesClassifier(legalLabels)
    elif (sys.argv[2] == "perceptron"):
        classifier = perceptron.PerceptronClassifier(legalLabels)
    else:
        print "Unknown classifier:", sys.argv[2]
        return

    # Load data
    numTraining = int(sys.argv[4])

    if (sys.argv[1] == "faces"):
        rawTrainingData = samples.loadDataFile("facedata/facedatatrain",
                                               numTraining, FACE_DATUM_WIDTH,
                                               FACE_DATUM_HEIGHT)
        trainingLabels = samples.loadLabelsFile("facedata/facedatatrainlabels",
                                                numTraining)
        rawValidationData = samples.loadDataFile("facedata/facedatatrain",
                                                 TEST_SET_SIZE,
                                                 FACE_DATUM_WIDTH,
                                                 FACE_DATUM_HEIGHT)
        validationLabels = samples.loadLabelsFile(
            "facedata/facedatatrainlabels", TEST_SET_SIZE)
        rawTestData = samples.loadDataFile("facedata/facedatatest",
                                           TEST_SET_SIZE, FACE_DATUM_WIDTH,
                                           FACE_DATUM_HEIGHT)
        testLabels = samples.loadLabelsFile("facedata/facedatatestlabels",
                                            TEST_SET_SIZE)
    else:
        rawTrainingData = samples.loadDataFile("digitdata/trainingimages",
                                               numTraining, DIGIT_DATUM_WIDTH,
                                               DIGIT_DATUM_HEIGHT)
        trainingLabels = samples.loadLabelsFile("digitdata/traininglabels",
                                                numTraining)
        rawValidationData = samples.loadDataFile("digitdata/validationimages",
                                                 TEST_SET_SIZE,
                                                 DIGIT_DATUM_WIDTH,
                                                 DIGIT_DATUM_HEIGHT)
        validationLabels = samples.loadLabelsFile("digitdata/validationlabels",
                                                  TEST_SET_SIZE)
        rawTestData = samples.loadDataFile("digitdata/testimages",
                                           TEST_SET_SIZE, DIGIT_DATUM_WIDTH,
                                           DIGIT_DATUM_HEIGHT)
        testLabels = samples.loadLabelsFile("digitdata/testlabels",
                                            TEST_SET_SIZE)

    # Extract features
    print "Extracting features..."
    trainingData = map(featureFunction, rawTrainingData)
    validationData = map(featureFunction, rawValidationData)
    testData = map(featureFunction, rawTestData)

    # Conduct training and testing
    print "Training..."
    classifier.train(trainingData, trainingLabels, validationData,
                     validationLabels)
    print "Validating..."
    guesses = classifier.classify(validationData)
    correct = [
        guesses[i] == validationLabels[i] for i in range(len(validationLabels))
    ].count(True)
    print str(correct), ("correct out of " + str(len(validationLabels)) +
                         " (%.1f%%).") % (100.0 * correct /
                                          len(validationLabels))
    print "Testing..."
    guesses = classifier.classify(testData)
    correct = [guesses[i] == testLabels[i]
               for i in range(len(testLabels))].count(True)
    print str(correct), ("correct out of " + str(len(testLabels)) +
                         " (%.1f%%).") % (100.0 * correct / len(testLabels))
    util.pause()
    analysis(classifier, guesses, testLabels, rawTestData)

    # do odds ratio computation if specified at command line
    if ((sys.argv[5] == "odds") & (len(sys.argv) == 8)):
        features_class1, features_class2, features_odds = classifier.findHighOddsFeatures(
            int(sys.argv[6]), int(sys.argv[7]))
        if (sys.argv[1] == "faces"):
            printImage(features_class1, FACE_DATUM_WIDTH, FACE_DATUM_HEIGHT)
            printImage(features_class2, FACE_DATUM_WIDTH, FACE_DATUM_HEIGHT)
            printImage(features_odds, FACE_DATUM_WIDTH, FACE_DATUM_HEIGHT)
        else:
            printImage(features_class1, DIGIT_DATUM_WIDTH, DIGIT_DATUM_HEIGHT)
            printImage(features_class2, DIGIT_DATUM_WIDTH, DIGIT_DATUM_HEIGHT)
            printImage(features_odds, DIGIT_DATUM_WIDTH, DIGIT_DATUM_HEIGHT)
Beispiel #22
0
def readCommand(argv):
    "Processes the command used to run from the command line."
    from optparse import OptionParser
    parser = OptionParser(USAGE_STRING)

    parser.add_option('-c',
                      '--classifier',
                      help=default('The type of classifier'),
                      choices=['perceptron'],
                      default='perceptron')
    parser.add_option('-d',
                      '--data',
                      help=default('Dataset to use'),
                      choices=['digits', 'pacman'],
                      default='digits')
    parser.add_option('-t',
                      '--training',
                      help=default('The size of the training set'),
                      default=100,
                      type="int")
    parser.add_option('-i',
                      '--iterations',
                      help=default("Maximum iterations to run training"),
                      default=3,
                      type="int")
    parser.add_option('-s',
                      '--test',
                      help=default("Amount of test data to use"),
                      default=TEST_SET_SIZE,
                      type="int")
    parser.add_option('-g',
                      '--agentToClone',
                      help=default("Pacman agent to copy"),
                      default=None,
                      type="str")

    options, otherjunk = parser.parse_args(argv)
    if len(otherjunk) != 0:
        raise Exception('Command line input not understood: ' + str(otherjunk))
    args = {}

    # Set up variables according to the command line input.
    print "Doing classification"
    print "--------------------"
    print "data:\t\t" + options.data
    print "classifier:\t\t" + options.classifier
    print "training set size:\t" + str(options.training)
    if (options.data == "digits"):
        printImage = ImagePrinter(DIGIT_DATUM_WIDTH,
                                  DIGIT_DATUM_HEIGHT).printImage
        featureFunction = basicFeatureExtractorDigit
        if (options.classifier == 'minicontest'):
            featureFunction = contestFeatureExtractorDigit
    elif (options.data == "pacman"):
        printImage = None
        featureFunction = basicFeatureExtractorPacman
    else:
        print "Unknown dataset", options.data
        print USAGE_STRING
        sys.exit(2)

    if (options.data == "digits"):
        legalLabels = range(10)
    else:
        legalLabels = ['Stop', 'West', 'East', 'North', 'South']

    if options.training <= 0:
        print "Training set size should be a positive integer (you provided: %d)" % options.training
        print USAGE_STRING
        sys.exit(2)

    if (options.classifier == "perceptron"):
        if options.data != 'pacman':
            classifier = perceptron.PerceptronClassifier(
                legalLabels, options.iterations)
        else:
            classifier = perceptron_pacman.PerceptronClassifierPacman(
                legalLabels, options.iterations)
    else:
        print "Unknown classifier:", options.classifier
        print USAGE_STRING

        sys.exit(2)

    args['agentToClone'] = options.agentToClone

    args['classifier'] = classifier
    args['featureFunction'] = featureFunction
    args['printImage'] = printImage

    return args, options
def readCommand( argv ):
  "Processes the command used to run from the command line."
  from optparse import OptionParser
  parser = OptionParser(USAGE_STRING)

  parser.add_option('-c', '--classifier', help=default('The type of classifier'), choices=['mostFrequent', 'nb', 'naiveBayes', 'perceptron','mira'], default='perceptron')
  parser.add_option('-d', '--data', help=default('Dataset to use'), choices=['digits', 'faces'], default='digits')
  parser.add_option('-t', '--training', help=default('The size of the training set'), default=100, type="int")
  parser.add_option('-f', '--features', help=default('Whether to use enhanced features'), default=False, action="store_true")
  parser.add_option('-o', '--odds', help=default('Whether to compute odds ratios'), default=False, action="store_true")
  parser.add_option('-1', '--label1', help=default("First label in an odds ratio comparison"), default=0, type="int")
  parser.add_option('-2', '--label2', help=default("Second label in an odds ratio comparison"), default=1, type="int")
  parser.add_option('-w', '--weights', help=default('Whether to print weights'), default=False, action="store_true")
  parser.add_option('-k', '--smoothing', help=default("Smoothing parameter (ignored when using --autotune)"), type="float", default=2.0)
  parser.add_option('-a', '--autotune', help=default("Whether to automatically tune hyperparameters"), default=False, action="store_true")
  parser.add_option('-i', '--iterations', help=default("Maximum iterations to run training"), default=3, type="int")
  parser.add_option('-s', '--test', help=default("Amount of test data to use"), default=TEST_SET_SIZE, type="int")
  parser.add_option('-n', '--analysis', help=default("Shows which data is wrongly predicted"), default=True, action="store_true")
  parser.add_option('-r', '--random', help=default("Trains the data set using random data and calculates averages for percent accuracy and standard deviation"), default=True, action="store_true")

  options, otherjunk = parser.parse_args(argv)
  if len(otherjunk) != 0: raise Exception('Command line input not understood: ' + str(otherjunk))
  args = {}

  # Set up variables according to the command line input.
  print ("Doing classification")
  print ("--------------------")
  print ("Data:\t\t" + options.data)
  print ("Classifier:\t\t" + options.classifier)
  print ("Using enhanced features?:\t" + str(options.features))
  if not options.random:
      print ("Training set size:\t" + str(options.training))
  if(options.data=="digits"):
    printImage = ImagePrinter(DIGIT_DATUM_WIDTH, DIGIT_DATUM_HEIGHT).printImage
    if (options.features):
      featureFunction = enhancedFeatureExtractorDigit
    else:
      featureFunction = basicFeatureExtractorDigit
  elif(options.data=="faces"):
    printImage = ImagePrinter(FACE_DATUM_WIDTH, FACE_DATUM_HEIGHT).printImage
    if (options.features):
      featureFunction = enhancedFeatureExtractorFace
    else:
      featureFunction = basicFeatureExtractorFace
  else:
    print ("Unknown dataset", options.data)
    print (USAGE_STRING)
    sys.exit(2)

  if(options.data=="digits"):
    legalLabels = range(10)
  else:
    legalLabels = range(2)

  if options.training <= 0:
    print ("Training set size should be a positive integer (you provided: %d)" % options.training)
    print (USAGE_STRING)
    sys.exit(2)

  if options.smoothing <= 0:
    print ("Please provide a positive number for smoothing (you provided: %f)" % options.smoothing)
    print (USAGE_STRING)
    sys.exit(2)

  if options.odds:
    if options.label1 not in legalLabels or options.label2 not in legalLabels:
      print ("Didn't provide a legal labels for the odds ratio: (%d,%d)" % (options.label1, options.label2))
      print (USAGE_STRING)
      sys.exit(2)
      
  if(options.classifier == "naiveBayes" or options.classifier == "nb"):
    classifier = naiveBayes.NaiveBayesClassifier(legalLabels)
    classifier.setSmoothing(options.smoothing)
    if (options.autotune):
        print ("Using automatic tuning for naivebayes")
        classifier.automaticTuning = True
    else:
        print ("Using smoothing parameter k=%f for naivebayes" %  options.smoothing)
  elif(options.classifier == "perceptron"):
    classifier = perceptron.PerceptronClassifier(legalLabels,options.iterations)
  elif(options.classifier == "mira"):
    classifier = mira.MiraClassifier(legalLabels, options.iterations)
    if (options.autotune):
        print ("Using automatic tuning for MIRA")
        classifier.automaticTuning = True
    else:
        print ("Using default C=0.001 for MIRA")
  else:
    print ("Unknown classifier:", options.classifier)
    print (USAGE_STRING)

    sys.exit(2)

  args['classifier'] = classifier
  args['featureFunction'] = featureFunction
  args['printImage'] = printImage

  return args, options
def readCommand(argv):
    "Processes the command used to run from the command line."
    from optparse import OptionParser
    parser = OptionParser(USAGE_STRING)

    parser.add_option('-c',
                      '--classifier',
                      help=default('The type of classifier'),
                      choices=['perceptron', 'bagging', 'boosting'],
                      default='bagging')
    parser.add_option('-t',
                      '--training',
                      help=default('The size of the training set'),
                      default=1000,
                      type="int")
    parser.add_option(
        '-k',
        '--smoothing',
        help=default("Smoothing parameter (ignored when using --autotune)"),
        type="float",
        default=2.0)
    parser.add_option(
        '-a',
        '--autotune',
        help=default("Whether to automatically tune hyperparameters"),
        default=False,
        action="store_true")
    parser.add_option('-i',
                      '--iterations',
                      help=default("Maximum iterations to run training"),
                      default=3,
                      type="int")
    parser.add_option('-s',
                      '--test',
                      help=default("Amount of test data to use"),
                      default=TEST_SET_SIZE,
                      type="int")
    parser.add_option(
        '-v',
        '--validate',
        help=default("Whether to validate when training (for graphs)"),
        default=False,
        action="store_true")
    parser.add_option(
        '-r',
        '--ratio',
        help=default(
            'The ratio of dataset to be used to train 1 weak classifier'),
        default=0.1,
        type=float)
    parser.add_option(
        '-n',
        '--num_classifiers',
        help=default(
            'The number of weak classifier to be trained on each subset of dataset'
        ),
        default=10,
        type=int)
    parser.add_option(
        '-b',
        '--boosting_iteration',
        help=default('Maximum iterations to run adaboost algorithm'),
        default=2,
        type=int)

    options, otherjunk = parser.parse_args(argv)
    if len(otherjunk) != 0:
        raise Exception('Command line input not understood: ' + str(otherjunk))
    args = {}

    # Set up variables according to the command line input.
    print "Doing classification"
    print "--------------------"
    print "classifier:\t\t" + options.classifier
    if options.classifier == "bagging":
        print "num of weak classifier:%d" % options.num_classifiers
    if options.classifier == "boosting":
        print "num of boosting iterations:%d" % options.boosting_iteration
    print "training set size:\t" + str(options.training)

    printImage = ImagePrinter(DIGIT_DATUM_WIDTH, DIGIT_DATUM_HEIGHT).printImage

    featureFunction = basicFeatureExtractorDigit

    legalLabels = [-1, 1]

    if options.training <= 0:
        print "Training set size should be a positive integer (you provided: %d)" % options.training
        print USAGE_STRING
        sys.exit(2)

    if options.smoothing <= 0:
        print "Please provide a positive number for smoothing (you provided: %f)" % options.smoothing
        print USAGE_STRING
        sys.exit(2)

    if (options.classifier == "perceptron"):
        classifier = perceptron.PerceptronClassifier(legalLabels,
                                                     options.iterations)
    elif (options.classifier == "bagging"):
        classifier = bagging.BaggingClassifier(legalLabels, options.iterations,
                                               perceptron.PerceptronClassifier,
                                               options.ratio,
                                               options.num_classifiers)
    elif (options.classifier == "boosting"):
        classifier = boosting.AdaBoostClassifier(
            legalLabels, options.iterations, perceptron.PerceptronClassifier,
            options.boosting_iteration)
    else:
        print "Unknown classifier:", options.classifier
        print USAGE_STRING
        sys.exit(2)

    args['classifier'] = classifier
    args['featureFunction'] = featureFunction
    args['printImage'] = printImage

    return args, options
def readCommand(argv):
    "Processes the command used to run from the command line."
    from optparse import OptionParser
    parser = OptionParser(USAGE_STRING)

    parser.add_option('-c',
                      '--classifier',
                      help=default('The type of classifier'),
                      choices=[
                          'mostFrequent', 'nb', 'naiveBayes', 'perceptron',
                          'mira', 'minicontest'
                      ],
                      default='mostFrequent')
    parser.add_option('-d',
                      '--data',
                      help=default('Dataset to use'),
                      choices=['digits', 'faces', 'pacman'],
                      default='digits')
    parser.add_option('-t',
                      '--training',
                      help=default('The size of the training set'),
                      default=100,
                      type="int")
    parser.add_option('-f',
                      '--features',
                      help=default('Whether to use enhanced features'),
                      default=False,
                      action="store_true")
    parser.add_option('-o',
                      '--odds',
                      help=default('Whether to compute odds ratios'),
                      default=False,
                      action="store_true")
    parser.add_option('-1',
                      '--label1',
                      help=default("First label in an odds ratio comparison"),
                      default=0,
                      type="int")
    parser.add_option('-2',
                      '--label2',
                      help=default("Second label in an odds ratio comparison"),
                      default=1,
                      type="int")
    parser.add_option('-w',
                      '--weights',
                      help=default('Whether to print weights'),
                      default=False,
                      action="store_true")
    parser.add_option(
        '-k',
        '--smoothing',
        help=default("Smoothing parameter (ignored when using --autotune)"),
        type="float",
        default=2.0)
    parser.add_option(
        '-a',
        '--autotune',
        help=default("Whether to automatically tune hyperparameters"),
        default=False,
        action="store_true")
    parser.add_option('-i',
                      '--iterations',
                      help=default("Maximum iterations to run training"),
                      default=3,
                      type="int")
    parser.add_option('-s',
                      '--test',
                      help=default("Amount of test data to use"),
                      default=TEST_SET_SIZE,
                      type="int")
    parser.add_option('-g',
                      '--agentToClone',
                      help=default("Pacman agent to copy"),
                      default=None,
                      type="str")

    options, otherjunk = parser.parse_args(argv)
    if len(otherjunk) != 0:
        raise Exception('Command line input not understood: ' + str(otherjunk))
    args = {}

    # Set up variables according to the command line input.
    print "Doing classification"
    print "--------------------"
    print "data:\t\t" + options.data
    print "classifier:\t\t" + options.classifier
    if not options.classifier == 'minicontest':
        print "using enhanced features?:\t" + str(options.features)
    else:
        print "using minicontest feature extractor"
    print "training set size:\t" + str(options.training)
    if (options.data == "digits"):
        printImage = ImagePrinter(DIGIT_DATUM_WIDTH,
                                  DIGIT_DATUM_HEIGHT).printImage
        if (options.features):
            featureFunction = enhancedFeatureExtractorDigit
        else:
            featureFunction = basicFeatureExtractorDigit
        if (options.classifier == 'minicontest'):
            featureFunction = contestFeatureExtractorDigit
    elif (options.data == "faces"):
        printImage = ImagePrinter(FACE_DATUM_WIDTH,
                                  FACE_DATUM_HEIGHT).printImage
        if (options.features):
            featureFunction = enhancedFeatureExtractorFace
        else:
            featureFunction = basicFeatureExtractorFace
    elif (options.data == "pacman"):
        printImage = None
        if (options.features):
            featureFunction = enhancedFeatureExtractorPacman
        else:
            featureFunction = basicFeatureExtractorPacman
    else:
        print "Unknown dataset", options.data
        print USAGE_STRING
        sys.exit(2)

    if (options.data == "digits"):
        legalLabels = range(10)
    else:
        legalLabels = ['Stop', 'West', 'East', 'North', 'South']

    if options.training <= 0:
        print "Training set size should be a positive integer (you provided: %d)" % options.training
        print USAGE_STRING
        sys.exit(2)

    if options.smoothing <= 0:
        print "Please provide a positive number for smoothing (you provided: %f)" % options.smoothing
        print USAGE_STRING
        sys.exit(2)

    if options.odds:
        if options.label1 not in legalLabels or options.label2 not in legalLabels:
            print "Didn't provide a legal labels for the odds ratio: (%d,%d)" % (
                options.label1, options.label2)
            print USAGE_STRING
            sys.exit(2)

    if (options.classifier == "mostFrequent"):
        classifier = mostFrequent.MostFrequentClassifier(legalLabels)
    elif (options.classifier == "naiveBayes" or options.classifier == "nb"):
        classifier = naiveBayes.NaiveBayesClassifier(legalLabels)
        classifier.setSmoothing(options.smoothing)
        if (options.autotune):
            print "using automatic tuning for naivebayes"
            classifier.automaticTuning = True
        else:
            print "using smoothing parameter k=%f for naivebayes" % options.smoothing
    elif (options.classifier == "perceptron"):
        if options.data != 'pacman':
            classifier = perceptron.PerceptronClassifier(
                legalLabels, options.iterations)
        else:
            classifier = perceptron_pacman.PerceptronClassifierPacman(
                legalLabels, options.iterations)
    elif (options.classifier == "mira"):
        if options.data != 'pacman':
            classifier = mira.MiraClassifier(legalLabels, options.iterations)
        if (options.autotune):
            print "using automatic tuning for MIRA"
            classifier.automaticTuning = True
        else:
            print "using default C=0.001 for MIRA"
    elif (options.classifier == 'minicontest'):
        import minicontest
        classifier = minicontest.contestClassifier(legalLabels)
    else:
        print "Unknown classifier:", options.classifier
        print USAGE_STRING

        sys.exit(2)

    args['agentToClone'] = options.agentToClone

    args['classifier'] = classifier
    args['featureFunction'] = featureFunction
    args['printImage'] = printImage

    return args, options
def selfRunClassifier():
    print "Doing classification"
    print "--------------------"

    data_percent = [10, 20, 30, 40, 50, 60, 70, 80, 90, 100]
    # print "=================Digits===================="
    #
    #
    # # NaiveBayes part
    # print "Training by using NaiveBayes Algorithm"
    # featureFunction = enhancedFeatureExtractorDigit
    # legalLabels = range(10)
    # classifier = naiveBayes.NaiveBayesClassifier(legalLabels)
    # lst_avg_time = []
    # lst_avg_acc = []
    # lst_std_acc = []
    # for percent in data_percent:
    #     print "training set size:\t" + str(percent)+"%"
    #     # print "setSmoothing: k value is ", classifier.k
    #     lst_time = []
    #     lst_acc = []
    #     for i in range(5):
    #         start_time = timeit.default_timer()
    #         rawTrainingData = samples.loadDataFile("digitdata/trainingimages", percent,DIGIT_DATUM_WIDTH,DIGIT_DATUM_HEIGHT)
    #         trainingLabels = samples.loadLabelsFile("digitdata/traininglabels", percent)
    #         rawValidationData = samples.loadDataFile("digitdata/validationimages", TEST_SET_SIZE,DIGIT_DATUM_WIDTH,DIGIT_DATUM_HEIGHT)
    #         validationLabels = samples.loadLabelsFile("digitdata/validationlabels", TEST_SET_SIZE)
    #         rawTestData = samples.loadDataFile("digitdata/testimages", TEST_SET_SIZE,DIGIT_DATUM_WIDTH,DIGIT_DATUM_HEIGHT)
    #         testLabels = samples.loadLabelsFile("digitdata/testlabels", TEST_SET_SIZE)
    #         # print "Extracting features..."
    #         trainingData = map(featureFunction, rawTrainingData)
    #         validationData = map(featureFunction, rawValidationData)
    #         testData = map(featureFunction, rawTestData)
    #         classifier.train(trainingData, trainingLabels, validationData, validationLabels)
    #         elapse = timeit.default_timer() - start_time
    #         lst_time.append(elapse)
    #         guesses = classifier.classify(testData)
    #         correct = [guesses[i] == testLabels[i] for i in range(len(testLabels))].count(True)
    #         lst_acc.append(float(correct) / len(testLabels))
    #     print '--------------------------------------------------------------'
    #     lst_avg_time.append(np.mean(lst_time))
    #     lst_avg_acc.append(np.mean(lst_acc))
    #     lst_std_acc.append(np.std(lst_acc))
    # analysis(lst_avg_time, lst_avg_acc, lst_std_acc)
    # # Percentron algorithm
    # print "Training by using Percentron Algorithm"
    # lst_avg_time = []
    # lst_avg_acc = []
    # lst_std_acc = []
    # for percent in data_percent:
    #     print "training set size:\t" + str(percent)+"%"
    #     # print "setSmoothing: k value is ", classifier.k
    #     lst_time = []
    #     lst_acc = []
    #     for i in range(5):
    #         featureFunction = enhancedFeatureExtractorDigit
    #         legalLabels = range(10)
    #         classifier = perceptron.PerceptronClassifier(legalLabels, 3)
    #         start_time = timeit.default_timer()
    #         rawTrainingData = samples.loadDataFile("digitdata/trainingimages", percent,DIGIT_DATUM_WIDTH,DIGIT_DATUM_HEIGHT)
    #         trainingLabels = samples.loadLabelsFile("digitdata/traininglabels", percent)
    #         rawValidationData = samples.loadDataFile("digitdata/validationimages", TEST_SET_SIZE,DIGIT_DATUM_WIDTH,DIGIT_DATUM_HEIGHT)
    #         validationLabels = samples.loadLabelsFile("digitdata/validationlabels", TEST_SET_SIZE)
    #         rawTestData = samples.loadDataFile("digitdata/testimages", TEST_SET_SIZE,DIGIT_DATUM_WIDTH,DIGIT_DATUM_HEIGHT)
    #         testLabels = samples.loadLabelsFile("digitdata/testlabels", TEST_SET_SIZE)
    #         # print "Extracting features..."
    #         trainingData = map(featureFunction, rawTrainingData)
    #         validationData = map(featureFunction, rawValidationData)
    #         testData = map(featureFunction, rawTestData)
    #         classifier.train(trainingData, trainingLabels, validationData, validationLabels)
    #         elapse = timeit.default_timer() - start_time
    #         lst_time.append(elapse)
    #         guesses = classifier.classify(testData)
    #         correct = [guesses[i] == testLabels[i] for i in range(len(testLabels))].count(True)
    #         lst_acc.append(float(correct) / len(testLabels))
    #     print '--------------------------------------------------------------'
    #     lst_avg_time.append(np.mean(lst_time))
    #     lst_avg_acc.append(np.mean(lst_acc))
    #     lst_std_acc.append(np.std(lst_acc))
    # analysis(lst_avg_time, lst_avg_acc, lst_std_acc)
    #
    #
    #
    #
    #
    #
    # # K nearest neighbour algorithm
    # print "Training by using KNN Algorithm"
    # print "Only use 10% test set"
    # # featureFunction = enhancedFeatureExtractorDigit
    # # legalLabels = range(10)
    # # classifier = Knear.KnearestNeighbourClassifier(legalLabels)
    # lst_avg_time = []
    # lst_avg_acc = []
    # lst_std_acc = []
    # for percent in data_percent:
    #     print "training set size:\t" + str(percent)+"%"
    #     lst_time = []
    #     lst_acc = []
    #     for i in range(5):
    #         start_time = timeit.default_timer()
    #         featureFunction = enhancedFeatureExtractorDigit
    #         legalLabels = range(10)
    #         classifier = Knear.KnearestNeighbourClassifier(legalLabels)
    #         rawTrainingData = samples.loadDataFile("digitdata/trainingimages", percent,DIGIT_DATUM_WIDTH,DIGIT_DATUM_HEIGHT)
    #         trainingLabels = samples.loadLabelsFile("digitdata/traininglabels", percent)
    #         rawValidationData = samples.loadDataFile("digitdata/validationimages", 10,DIGIT_DATUM_WIDTH,DIGIT_DATUM_HEIGHT)
    #         validationLabels = samples.loadLabelsFile("digitdata/validationlabels", 10)
    #         rawTestData = samples.loadDataFile("digitdata/testimages", 10,DIGIT_DATUM_WIDTH,DIGIT_DATUM_HEIGHT)
    #         testLabels = samples.loadLabelsFile("digitdata/testlabels", 10)
    #         # print "Extracting features..."
    #         trainingData = map(featureFunction, rawTrainingData)
    #         validationData = map(featureFunction, rawValidationData)
    #         testData = map(featureFunction, rawTestData)
    #
    #         classifier.train(trainingData, trainingLabels, validationData, validationLabels)
    #         elapse = timeit.default_timer() - start_time
    #         # print elapse
    #         lst_time.append(elapse)
    #         guesses = classifier.classify(testData)
    #         correct = [guesses[i] == testLabels[i] for i in range(len(testLabels))].count(True)
    #         lst_acc.append(float(correct) / len(testLabels))
    #     print '--------------------------------------------------------------'
    #     lst_avg_time.append(np.mean(lst_time))
    #     lst_avg_acc.append(np.mean(lst_acc))
    #     lst_std_acc.append(np.std(lst_acc))
    # analysis(lst_avg_time, lst_avg_acc, lst_std_acc)

    print ""
    print "=================Faces===================="
    # # NaiveBayes Algorithm
    # print "Training by using NaiveBayes Algorithm"
    # featureFunction = enhancedFeatureExtractorDigit
    # legalLabels = range(2)
    # classifier = naiveBayes.NaiveBayesClassifier(legalLabels)
    # lst_avg_time = []
    # lst_avg_acc = []
    # lst_std_acc = []
    # for percent in data_percent:
    #     print "training set size:\t" + str(percent)+"%"
    #     lst_time = []
    #     lst_acc = []
    #     for i in range(5):
    #         start_time = timeit.default_timer()
    #         rawTrainingData = samples.loadDataFile("facedata/facedatatrain", percent,FACE_DATUM_WIDTH,FACE_DATUM_HEIGHT)
    #         trainingLabels = samples.loadLabelsFile("facedata/facedatatrainlabels", percent)
    #         rawValidationData = samples.loadDataFile("facedata/facedatatrain", TEST_SET_SIZE,FACE_DATUM_WIDTH,FACE_DATUM_HEIGHT)
    #         validationLabels = samples.loadLabelsFile("facedata/facedatatrainlabels", TEST_SET_SIZE)
    #         rawTestData = samples.loadDataFile("facedata/facedatatest", TEST_SET_SIZE,FACE_DATUM_WIDTH,FACE_DATUM_HEIGHT)
    #         testLabels = samples.loadLabelsFile("facedata/facedatatestlabels", TEST_SET_SIZE)
    #         trainingData = map(featureFunction, rawTrainingData)
    #         validationData = map(featureFunction, rawValidationData)
    #         testData = map(featureFunction, rawTestData)
    #         classifier.train(trainingData, trainingLabels, validationData, validationLabels)
    #         elapse = timeit.default_timer() - start_time
    #         lst_time.append(elapse)
    #         guesses = classifier.classify(testData)
    #         correct = [guesses[i] == testLabels[i] for i in range(len(testLabels))].count(True)
    #         lst_acc.append(float(correct) / len(testLabels))
    #     print '--------------------------------------------------------------'
    #     lst_avg_time.append(np.mean(lst_time))
    #     lst_avg_acc.append(np.mean(lst_acc))
    #     lst_std_acc.append(np.std(lst_acc))
    # analysis(lst_avg_time, lst_avg_acc, lst_std_acc)

    # Perceptron Algorithm
    print "Training by using Perceptron Algorithm"
    featureFunction = enhancedFeatureExtractorDigit
    legalLabels = range(2)
    classifier = perceptron.PerceptronClassifier(legalLabels, 3)
    lst_avg_time = []
    lst_avg_acc = []
    lst_std_acc = []
    for percent in data_percent:
        print "training set size:\t" + str(percent) + "%"
        lst_time = []
        lst_acc = []
        for i in range(5):
            start_time = timeit.default_timer()
            rawTrainingData = samples.loadDataFile("facedata/facedatatrain",
                                                   percent, FACE_DATUM_WIDTH,
                                                   FACE_DATUM_HEIGHT)
            trainingLabels = samples.loadLabelsFile(
                "facedata/facedatatrainlabels", percent)
            rawValidationData = samples.loadDataFile("facedata/facedatatrain",
                                                     TEST_SET_SIZE,
                                                     FACE_DATUM_WIDTH,
                                                     FACE_DATUM_HEIGHT)
            validationLabels = samples.loadLabelsFile(
                "facedata/facedatatrainlabels", TEST_SET_SIZE)
            rawTestData = samples.loadDataFile("facedata/facedatatest",
                                               TEST_SET_SIZE, FACE_DATUM_WIDTH,
                                               FACE_DATUM_HEIGHT)
            testLabels = samples.loadLabelsFile("facedata/facedatatestlabels",
                                                TEST_SET_SIZE)
            trainingData = map(featureFunction, rawTrainingData)
            validationData = map(featureFunction, rawValidationData)
            testData = map(featureFunction, rawTestData)
            classifier.train(trainingData, trainingLabels, validationData,
                             validationLabels)
            elapse = timeit.default_timer() - start_time
            lst_time.append(elapse)
            guesses = classifier.classify(testData)
            correct = [
                guesses[i] == testLabels[i] for i in range(len(testLabels))
            ].count(True)
            lst_acc.append(float(correct) / len(testLabels))
        print '--------------------------------------------------------------'
        lst_avg_time.append(np.mean(lst_time))
        lst_avg_acc.append(np.mean(lst_acc))
        lst_std_acc.append(np.std(lst_acc))
    analysis(lst_avg_time, lst_avg_acc, lst_std_acc)