def readCommand(argv):
    "Processes the command used to run from the command line."
    from optparse import OptionParser
    parser = OptionParser(USAGE_STRING)

    parser.add_option('-c',
                      '--classifier',
                      help=default('The type of classifier'),
                      choices=[
                          'mostFrequent', 'nb', 'naiveBayes', 'perceptron',
                          'mira', 'minicontest'
                      ],
                      default='mostFrequent')
    parser.add_option('-d',
                      '--data',
                      help=default('Dataset to use'),
                      choices=['digits', 'faces', 'pacman'],
                      default='digits')
    parser.add_option('-t',
                      '--training',
                      help=default('The size of the training set'),
                      default=100,
                      type="int")
    parser.add_option('-f',
                      '--features',
                      help=default('Whether to use enhanced features'),
                      default=False,
                      action="store_true")
    parser.add_option('-o',
                      '--odds',
                      help=default('Whether to compute odds ratios'),
                      default=False,
                      action="store_true")
    parser.add_option('-1',
                      '--label1',
                      help=default("First label in an odds ratio comparison"),
                      default=0,
                      type="int")
    parser.add_option('-2',
                      '--label2',
                      help=default("Second label in an odds ratio comparison"),
                      default=1,
                      type="int")
    parser.add_option('-w',
                      '--weights',
                      help=default('Whether to print weights'),
                      default=False,
                      action="store_true")
    parser.add_option(
        '-k',
        '--smoothing',
        help=default("Smoothing parameter (ignored when using --autotune)"),
        type="float",
        default=2.0)
    parser.add_option(
        '-a',
        '--autotune',
        help=default("Whether to automatically tune hyperparameters"),
        default=False,
        action="store_true")
    parser.add_option('-i',
                      '--iterations',
                      help=default("Maximum iterations to run training"),
                      default=3,
                      type="int")
    parser.add_option('-s',
                      '--test',
                      help=default("Amount of test data to use"),
                      default=TEST_SET_SIZE,
                      type="int")
    parser.add_option('-g',
                      '--agentToClone',
                      help=default("Pacman agent to copy"),
                      default=None,
                      type="str")

    options, otherjunk = parser.parse_args(argv)
    if len(otherjunk) != 0:
        raise Exception('Command line input not understood: ' + str(otherjunk))
    args = {}

    # Set up variables according to the command line input.
    print "Doing classification"
    print "--------------------"
    print "data:\t\t" + options.data
    print "classifier:\t\t" + options.classifier
    if not options.classifier == 'minicontest':
        print "using enhanced features?:\t" + str(options.features)
    else:
        print "using minicontest feature extractor"
    print "training set size:\t" + str(options.training)
    if (options.data == "digits"):
        printImage = ImagePrinter(DIGIT_DATUM_WIDTH,
                                  DIGIT_DATUM_HEIGHT).printImage
        if (options.features):
            featureFunction = enhancedFeatureExtractorDigit
        else:
            featureFunction = basicFeatureExtractorDigit
        if (options.classifier == 'minicontest'):
            featureFunction = contestFeatureExtractorDigit
    elif (options.data == "faces"):
        printImage = ImagePrinter(FACE_DATUM_WIDTH,
                                  FACE_DATUM_HEIGHT).printImage
        if (options.features):
            featureFunction = enhancedFeatureExtractorFace
        else:
            featureFunction = basicFeatureExtractorFace
    elif (options.data == "pacman"):
        printImage = None
        if (options.features):
            featureFunction = enhancedFeatureExtractorPacman
        else:
            featureFunction = basicFeatureExtractorPacman
    else:
        print "Unknown dataset", options.data
        print USAGE_STRING
        sys.exit(2)

    if (options.data == "digits"):
        legalLabels = range(10)
    else:
        legalLabels = ['Stop', 'West', 'East', 'North', 'South']

    if options.training <= 0:
        print "Training set size should be a positive integer (you provided: %d)" % options.training
        print USAGE_STRING
        sys.exit(2)

    if options.smoothing <= 0:
        print "Please provide a positive number for smoothing (you provided: %f)" % options.smoothing
        print USAGE_STRING
        sys.exit(2)

    if options.odds:
        if options.label1 not in legalLabels or options.label2 not in legalLabels:
            print "Didn't provide a legal labels for the odds ratio: (%d,%d)" % (
                options.label1, options.label2)
            print USAGE_STRING
            sys.exit(2)

    if (options.classifier == "mostFrequent"):
        classifier = mostFrequent.MostFrequentClassifier(legalLabels)
    elif (options.classifier == "naiveBayes" or options.classifier == "nb"):
        classifier = naiveBayes.NaiveBayesClassifier(legalLabels)
        classifier.setSmoothing(options.smoothing)
        if (options.autotune):
            print "using automatic tuning for naivebayes"
            classifier.automaticTuning = True
        else:
            print "using smoothing parameter k=%f for naivebayes" % options.smoothing
    elif (options.classifier == "perceptron"):
        if options.data != 'pacman':
            classifier = perceptron.PerceptronClassifier(
                legalLabels, options.iterations)
        else:
            classifier = perceptron_pacman.PerceptronClassifierPacman(
                legalLabels, options.iterations)
    elif (options.classifier == "mira"):
        if options.data != 'pacman':
            classifier = mira.MiraClassifier(legalLabels, options.iterations)
        if (options.autotune):
            print "using automatic tuning for MIRA"
            classifier.automaticTuning = True
        else:
            print "using default C=0.001 for MIRA"
    elif (options.classifier == 'minicontest'):
        import minicontest
        classifier = minicontest.contestClassifier(legalLabels)
    else:
        print "Unknown classifier:", options.classifier
        print USAGE_STRING

        sys.exit(2)

    args['agentToClone'] = options.agentToClone

    args['classifier'] = classifier
    args['featureFunction'] = featureFunction
    args['printImage'] = printImage

    return args, options
def readCommand( argv ):
    "Processes the command used to run from the command line."
    from optparse import OptionParser
    parser = OptionParser(USAGE_STRING)

    parser.add_option('-c', '--classifier', help=default('The type of classifier'), choices=['mostFrequent', 'nb', 'naiveBayes', 'perceptron', 'mira', 'minicontest'], default='mostFrequent')
    parser.add_option('-d', '--data', help=default('Dataset to use'), choices=['digits', 'faces', 'pacman'], default='digits')
    parser.add_option('-t', '--training', help=default('The size of the training set'), default=100, type="int")
    parser.add_option('-f', '--features', help=default('Whether to use enhanced features'), default=False, action="store_true")
    parser.add_option('-o', '--odds', help=default('Whether to compute odds ratios'), default=False, action="store_true")
    parser.add_option('-1', '--label1', help=default("First label in an odds ratio comparison"), default=0, type="int")
    parser.add_option('-2', '--label2', help=default("Second label in an odds ratio comparison"), default=1, type="int")
    parser.add_option('-w', '--weights', help=default('Whether to print weights'), default=False, action="store_true")
    parser.add_option('-k', '--smoothing', help=default("Smoothing parameter (ignored when using --autotune)"), type="float", default=2.0)
    parser.add_option('-a', '--autotune', help=default("Whether to automatically tune hyperparameters"), default=False, action="store_true")
    parser.add_option('-i', '--iterations', help=default("Maximum iterations to run training"), default=3, type="int")
    parser.add_option('-s', '--test', help=default("Amount of test data to use"), default=TEST_SET_SIZE, type="int")
    parser.add_option('-g', '--agentToClone', help=default("Pacman agent to copy"), default=None, type="str")

    options, otherjunk = parser.parse_args(argv)
    if len(otherjunk) != 0: raise Exception('Command line input not understood: ' + str(otherjunk))
    args = {}

    # Set up variables according to the command line input.
    print "Doing classification"
    print "--------------------"
    print "data:\t\t" + options.data
    print "classifier:\t\t" + options.classifier
    if not options.classifier == 'minicontest':
        print "using enhanced features?:\t" + str(options.features)
    else:
        print "using minicontest feature extractor"
    print "training set size:\t" + str(options.training)
    if(options.data=="digits"):
        printImage = ImagePrinter(DIGIT_DATUM_WIDTH, DIGIT_DATUM_HEIGHT).printImage
        if (options.features):
            featureFunction = enhancedFeatureExtractorDigit
        else:
            featureFunction = basicFeatureExtractorDigit
        if (options.classifier == 'minicontest'):
            featureFunction = contestFeatureExtractorDigit
    elif(options.data=="faces"):
        printImage = ImagePrinter(FACE_DATUM_WIDTH, FACE_DATUM_HEIGHT).printImage
        if (options.features):
            featureFunction = enhancedFeatureExtractorFace
        else:
            featureFunction = basicFeatureExtractorFace
    elif(options.data=="pacman"):
        printImage = None
        if (options.features):
            featureFunction = enhancedFeatureExtractorPacman
        else:
            featureFunction = basicFeatureExtractorPacman
    else:
        print "Unknown dataset", options.data
        print USAGE_STRING
        sys.exit(2)

    if(options.data=="digits"):
        legalLabels = range(10)
    else:
        legalLabels = ['Stop', 'West', 'East', 'North', 'South']

    if options.training <= 0:
        print "Training set size should be a positive integer (you provided: %d)" % options.training
        print USAGE_STRING
        sys.exit(2)

    if options.smoothing <= 0:
        print "Please provide a positive number for smoothing (you provided: %f)" % options.smoothing
        print USAGE_STRING
        sys.exit(2)

    if options.odds:
        if options.label1 not in legalLabels or options.label2 not in legalLabels:
            print "Didn't provide a legal labels for the odds ratio: (%d,%d)" % (options.label1, options.label2)
            print USAGE_STRING
            sys.exit(2)

    if(options.classifier == "mostFrequent"):
        classifier = mostFrequent.MostFrequentClassifier(legalLabels)
    elif(options.classifier == "naiveBayes" or options.classifier == "nb"):
        classifier = naiveBayes.NaiveBayesClassifier(legalLabels)
        classifier.setSmoothing(options.smoothing)
        if (options.autotune):
            print "using automatic tuning for naivebayes"
            classifier.automaticTuning = True
        else:
            print "using smoothing parameter k=%f for naivebayes" %  options.smoothing
    elif(options.classifier == "perceptron"):
        if options.data != 'pacman':
            classifier = perceptron.PerceptronClassifier(legalLabels,options.iterations)
        else:
            classifier = perceptron_pacman.PerceptronClassifierPacman(legalLabels,options.iterations)
    elif(options.classifier == "mira"):
        if options.data != 'pacman':
            classifier = mira.MiraClassifier(legalLabels, options.iterations)
        if (options.autotune):
            print "using automatic tuning for MIRA"
            classifier.automaticTuning = True
        else:
            print "using default C=0.001 for MIRA"
    elif(options.classifier == 'minicontest'):
        import minicontest
        classifier = minicontest.contestClassifier(legalLabels)
    else:
        print "Unknown classifier:", options.classifier
        print USAGE_STRING

        sys.exit(2)

    args['agentToClone'] = options.agentToClone

    args['classifier'] = classifier
    args['featureFunction'] = featureFunction
    args['printImage'] = printImage

    return args, options
Beispiel #3
0
MINICONTEST_PATH = "minicontest_output.pickle"


if __name__ == '__main__':
    print "Loading training data"
    rawTrainingData = samples.loadDataFile("digitdata/trainingimages", 5000,DIGIT_DATUM_WIDTH,DIGIT_DATUM_HEIGHT)
    trainingLabels = samples.loadLabelsFile("digitdata/traininglabels", 5000)
    rawValidationData = samples.loadDataFile("digitdata/validationimages", 100,DIGIT_DATUM_WIDTH,DIGIT_DATUM_HEIGHT)
    validationLabels = samples.loadLabelsFile("digitdata/validationlabels", 100)
    rawTestData = samples.loadDataFile("digitdata/testimages", TEST_SIZE,DIGIT_DATUM_WIDTH,DIGIT_DATUM_HEIGHT)


    featureFunction = contestFeatureExtractorDigit
    legalLabels = range(10)
    classifier = minicontest.contestClassifier(legalLabels)

    print "Extracting features..."
    trainingData = map(featureFunction, rawTrainingData)
    validationData = map(featureFunction, rawValidationData)
    testData = map(featureFunction, rawTestData)

    print "Training..."
    classifier.train(trainingData, trainingLabels, validationData, validationLabels)
    print "Validating..."
    guesses = classifier.classify(validationData)
    correct = [guesses[i] == validationLabels[i] for i in range(len(validationLabels))].count(True)
    print str(correct), ("correct out of " + str(len(validationLabels)) + " (%.1f%%).") % (100.0 * correct / len(validationLabels))
    print "Testing..."
    guesses = classifier.classify(testData)
Beispiel #4
0
    print "Loading training data"
    rawTrainingData = samples.loadDataFile("digitdata/trainingimages", 5000,
                                           DIGIT_DATUM_WIDTH,
                                           DIGIT_DATUM_HEIGHT)
    trainingLabels = samples.loadLabelsFile("digitdata/traininglabels", 5000)
    rawValidationData = samples.loadDataFile("digitdata/validationimages", 100,
                                             DIGIT_DATUM_WIDTH,
                                             DIGIT_DATUM_HEIGHT)
    validationLabels = samples.loadLabelsFile("digitdata/validationlabels",
                                              100)
    rawTestData = samples.loadDataFile("digitdata/testimages", TEST_SIZE,
                                       DIGIT_DATUM_WIDTH, DIGIT_DATUM_HEIGHT)

    featureFunction = contestFeatureExtractorDigit
    legalLabels = range(10)
    classifier = minicontest.contestClassifier(legalLabels)

    print "Extracting features..."
    trainingData = map(featureFunction, rawTrainingData)
    validationData = map(featureFunction, rawValidationData)
    testData = map(featureFunction, rawTestData)

    print "Training..."
    classifier.train(trainingData, trainingLabels, validationData,
                     validationLabels)
    print "Validating..."
    guesses = classifier.classify(validationData)
    correct = [
        guesses[i] == validationLabels[i] for i in range(len(validationLabels))
    ].count(True)
    print str(correct), ("correct out of " + str(len(validationLabels)) +
Beispiel #5
0
def readCommand(argv):
    """Processes the command used to run from the command line."""
    from optparse import OptionParser
    parser = OptionParser(USAGE_STRING)

    parser.add_option('-c',
                      '--classifier',
                      help=default('The type of classifier'),
                      choices=[
                          'mostFrequent', 'nb', 'naiveBayes',
                          'nearestNeighbors', 'perceptron', 'mira',
                          'minicontest'
                      ],
                      default='mostFrequent')
    parser.add_option('-d',
                      '--data',
                      help=default('Dataset to use'),
                      choices=['digits', 'faces'],
                      default='digits')
    parser.add_option('-t',
                      '--training',
                      help=default('The size of the training set'),
                      default=100,
                      type="int")
    parser.add_option('-f',
                      '--features',
                      help=default('Whether to use enhanced features'),
                      default=False,
                      action="store_true")
    parser.add_option('-o',
                      '--odds',
                      help=default('Whether to compute odds ratios'),
                      default=False,
                      action="store_true")
    parser.add_option('-1',
                      '--label1',
                      help=default("First label in an odds ratio comparison"),
                      default=0,
                      type="int")
    parser.add_option('-2',
                      '--label2',
                      help=default("Second label in an odds ratio comparison"),
                      default=1,
                      type="int")
    parser.add_option('-w',
                      '--weights',
                      help=default('Whether to print weights'),
                      default=False,
                      action="store_true")
    parser.add_option(
        '-k',
        '--smoothing',
        help=default("Smoothing parameter (ignored when using --autotune)"),
        type="float",
        default=2.0)
    parser.add_option(
        '-a',
        '--autotune',
        help=default("Whether to automatically tune hyperparameters"),
        default=False,
        action="store_true")
    parser.add_option('-i',
                      '--iterations',
                      help=default("Maximum iterations to run training"),
                      default=3,
                      type="int")
    parser.add_option('-s',
                      '--test',
                      help=default("Amount of test data to use"),
                      default=TEST_SET_SIZE,
                      type="int")
    parser.add_option(
        '-n',
        '--k_number_of_neighbors',
        help=default("Number of neighbors to search (For nearestNeighbors)"),
        default=3,
        type="int")
    parser.add_option(
        '-x',
        '--specialMode',
        help=default(
            "Trains in increments of random 10% of data and displays results"),
        default=False,
        action="store_true")
    parser.add_option('-y',
                      '--analysis',
                      help=default("Shows which data is wrongly predicted"),
                      default=False,
                      action="store_true")

    options, otherjunk = parser.parse_args(argv)
    if len(otherjunk) != 0:
        raise Exception('Command line input not understood: ' + str(otherjunk))
    args = {}

    # Set up variables according to the command line input.
    print("Doing classification")
    print("--------------------")
    print("data:\t\t" + options.data)
    print("classifier:\t\t" + options.classifier)
    if not options.classifier == 'minicontest':
        print("using enhanced features?:\t" + str(options.features))
    else:
        print("using minicontest feature extractor")
    print("training set size:\t" + str(options.training))
    if not options.specialMode:
        print("training set size:\t" + str(options.training))
    if options.data == "digits":
        printImage = ImagePrinter(DIGIT_DATUM_WIDTH,
                                  DIGIT_DATUM_HEIGHT).printImage
        if options.features:
            featureFunction = enhancedFeatureExtractorDigit
        else:
            featureFunction = basicFeatureExtractorDigit
        if options.classifier == 'minicontest':
            featureFunction = contestFeatureExtractorDigit
    elif options.data == "faces":
        printImage = ImagePrinter(FACE_DATUM_WIDTH,
                                  FACE_DATUM_HEIGHT).printImage
        if options.features:
            featureFunction = enhancedFeatureExtractorFace
        else:
            featureFunction = basicFeatureExtractorFace
    else:
        print("Unknown dataset", options.data)
        print(USAGE_STRING)
        sys.exit(2)

    if options.data == "digits":
        legalLabels = range(10)
    else:
        legalLabels = range(2)

    if options.training <= 0:
        print(
            "Training set size should be a positive integer (you provided: %d)"
            % options.training)
        print(USAGE_STRING)
        sys.exit(2)

    if options.smoothing <= 0:
        print(
            "Please provide a positive number for smoothing (you provided: %f)"
            % options.smoothing)
        print(USAGE_STRING)
        sys.exit(2)

    if options.odds:
        if options.label1 not in legalLabels or options.label2 not in legalLabels:
            print("Didn't provide a legal labels for the odds ratio: (%d,%d)" %
                  (options.label1, options.label2))
            print(USAGE_STRING)
            sys.exit(2)

    if options.k_number_of_neighbors <= 0:
        print(
            "Please provide a positive number for neighbors (you provided: %f)"
            % options.k_number_of_neighbors)
        print(USAGE_STRING)
        sys.exit(2)

    if options.classifier == "mostFrequent":
        classifier = mostFrequent.MostFrequentClassifier(legalLabels)
    elif options.classifier == "nearestNeighbors":
        classifier = nearestNeighbors.NNClassifier(legalLabels)
    elif options.classifier == "naiveBayes" or options.classifier == "nb":
        classifier = naiveBayes.NaiveBayesClassifier(legalLabels)
        classifier.setSmoothing(options.smoothing)
        if options.autotune:
            print("using automatic tuning for naivebayes")
            classifier.automaticTuning = True
        else:
            print("using smoothing parameter k=%f for naivebayes" %
                  options.smoothing)
    elif options.classifier == "perceptron":
        classifier = perceptron.PerceptronClassifier(legalLabels,
                                                     options.iterations)
    elif options.classifier == "mira":
        classifier = mira.MiraClassifier(legalLabels, options.iterations)
        if options.autotune:
            print("using automatic tuning for MIRA")
            classifier.automaticTuning = True
        else:
            print("using default C=0.001 for MIRA")
    elif options.classifier == 'minicontest':
        import minicontest
        classifier = minicontest.contestClassifier(legalLabels)
    else:
        print("Unknown classifier:", options.classifier)
        print(USAGE_STRING)

        sys.exit(2)

    args['classifier'] = classifier
    args['featureFunction'] = featureFunction
    args['printImage'] = printImage

    return args, options