def readCommand(argv): "Processes the command used to run from the command line." from optparse import OptionParser parser = OptionParser(USAGE_STRING) parser.add_option('-c', '--classifier', help=default('The type of classifier'), choices=[ 'mostFrequent', 'nb', 'naiveBayes', 'perceptron', 'mira', 'minicontest' ], default='mostFrequent') parser.add_option('-d', '--data', help=default('Dataset to use'), choices=['digits', 'faces', 'pacman'], default='digits') parser.add_option('-t', '--training', help=default('The size of the training set'), default=100, type="int") parser.add_option('-f', '--features', help=default('Whether to use enhanced features'), default=False, action="store_true") parser.add_option('-o', '--odds', help=default('Whether to compute odds ratios'), default=False, action="store_true") parser.add_option('-1', '--label1', help=default("First label in an odds ratio comparison"), default=0, type="int") parser.add_option('-2', '--label2', help=default("Second label in an odds ratio comparison"), default=1, type="int") parser.add_option('-w', '--weights', help=default('Whether to print weights'), default=False, action="store_true") parser.add_option( '-k', '--smoothing', help=default("Smoothing parameter (ignored when using --autotune)"), type="float", default=2.0) parser.add_option( '-a', '--autotune', help=default("Whether to automatically tune hyperparameters"), default=False, action="store_true") parser.add_option('-i', '--iterations', help=default("Maximum iterations to run training"), default=3, type="int") parser.add_option('-s', '--test', help=default("Amount of test data to use"), default=TEST_SET_SIZE, type="int") parser.add_option('-g', '--agentToClone', help=default("Pacman agent to copy"), default=None, type="str") options, otherjunk = parser.parse_args(argv) if len(otherjunk) != 0: raise Exception('Command line input not understood: ' + str(otherjunk)) args = {} # Set up variables according to the command line input. print "Doing classification" print "--------------------" print "data:\t\t" + options.data print "classifier:\t\t" + options.classifier if not options.classifier == 'minicontest': print "using enhanced features?:\t" + str(options.features) else: print "using minicontest feature extractor" print "training set size:\t" + str(options.training) if (options.data == "digits"): printImage = ImagePrinter(DIGIT_DATUM_WIDTH, DIGIT_DATUM_HEIGHT).printImage if (options.features): featureFunction = enhancedFeatureExtractorDigit else: featureFunction = basicFeatureExtractorDigit if (options.classifier == 'minicontest'): featureFunction = contestFeatureExtractorDigit elif (options.data == "faces"): printImage = ImagePrinter(FACE_DATUM_WIDTH, FACE_DATUM_HEIGHT).printImage if (options.features): featureFunction = enhancedFeatureExtractorFace else: featureFunction = basicFeatureExtractorFace elif (options.data == "pacman"): printImage = None if (options.features): featureFunction = enhancedFeatureExtractorPacman else: featureFunction = basicFeatureExtractorPacman else: print "Unknown dataset", options.data print USAGE_STRING sys.exit(2) if (options.data == "digits"): legalLabels = range(10) else: legalLabels = ['Stop', 'West', 'East', 'North', 'South'] if options.training <= 0: print "Training set size should be a positive integer (you provided: %d)" % options.training print USAGE_STRING sys.exit(2) if options.smoothing <= 0: print "Please provide a positive number for smoothing (you provided: %f)" % options.smoothing print USAGE_STRING sys.exit(2) if options.odds: if options.label1 not in legalLabels or options.label2 not in legalLabels: print "Didn't provide a legal labels for the odds ratio: (%d,%d)" % ( options.label1, options.label2) print USAGE_STRING sys.exit(2) if (options.classifier == "mostFrequent"): classifier = mostFrequent.MostFrequentClassifier(legalLabels) elif (options.classifier == "naiveBayes" or options.classifier == "nb"): classifier = naiveBayes.NaiveBayesClassifier(legalLabels) classifier.setSmoothing(options.smoothing) if (options.autotune): print "using automatic tuning for naivebayes" classifier.automaticTuning = True else: print "using smoothing parameter k=%f for naivebayes" % options.smoothing elif (options.classifier == "perceptron"): if options.data != 'pacman': classifier = perceptron.PerceptronClassifier( legalLabels, options.iterations) else: classifier = perceptron_pacman.PerceptronClassifierPacman( legalLabels, options.iterations) elif (options.classifier == "mira"): if options.data != 'pacman': classifier = mira.MiraClassifier(legalLabels, options.iterations) if (options.autotune): print "using automatic tuning for MIRA" classifier.automaticTuning = True else: print "using default C=0.001 for MIRA" elif (options.classifier == 'minicontest'): import minicontest classifier = minicontest.contestClassifier(legalLabels) else: print "Unknown classifier:", options.classifier print USAGE_STRING sys.exit(2) args['agentToClone'] = options.agentToClone args['classifier'] = classifier args['featureFunction'] = featureFunction args['printImage'] = printImage return args, options
def readCommand( argv ): "Processes the command used to run from the command line." from optparse import OptionParser parser = OptionParser(USAGE_STRING) parser.add_option('-c', '--classifier', help=default('The type of classifier'), choices=['mostFrequent', 'nb', 'naiveBayes', 'perceptron', 'mira', 'minicontest'], default='mostFrequent') parser.add_option('-d', '--data', help=default('Dataset to use'), choices=['digits', 'faces', 'pacman'], default='digits') parser.add_option('-t', '--training', help=default('The size of the training set'), default=100, type="int") parser.add_option('-f', '--features', help=default('Whether to use enhanced features'), default=False, action="store_true") parser.add_option('-o', '--odds', help=default('Whether to compute odds ratios'), default=False, action="store_true") parser.add_option('-1', '--label1', help=default("First label in an odds ratio comparison"), default=0, type="int") parser.add_option('-2', '--label2', help=default("Second label in an odds ratio comparison"), default=1, type="int") parser.add_option('-w', '--weights', help=default('Whether to print weights'), default=False, action="store_true") parser.add_option('-k', '--smoothing', help=default("Smoothing parameter (ignored when using --autotune)"), type="float", default=2.0) parser.add_option('-a', '--autotune', help=default("Whether to automatically tune hyperparameters"), default=False, action="store_true") parser.add_option('-i', '--iterations', help=default("Maximum iterations to run training"), default=3, type="int") parser.add_option('-s', '--test', help=default("Amount of test data to use"), default=TEST_SET_SIZE, type="int") parser.add_option('-g', '--agentToClone', help=default("Pacman agent to copy"), default=None, type="str") options, otherjunk = parser.parse_args(argv) if len(otherjunk) != 0: raise Exception('Command line input not understood: ' + str(otherjunk)) args = {} # Set up variables according to the command line input. print "Doing classification" print "--------------------" print "data:\t\t" + options.data print "classifier:\t\t" + options.classifier if not options.classifier == 'minicontest': print "using enhanced features?:\t" + str(options.features) else: print "using minicontest feature extractor" print "training set size:\t" + str(options.training) if(options.data=="digits"): printImage = ImagePrinter(DIGIT_DATUM_WIDTH, DIGIT_DATUM_HEIGHT).printImage if (options.features): featureFunction = enhancedFeatureExtractorDigit else: featureFunction = basicFeatureExtractorDigit if (options.classifier == 'minicontest'): featureFunction = contestFeatureExtractorDigit elif(options.data=="faces"): printImage = ImagePrinter(FACE_DATUM_WIDTH, FACE_DATUM_HEIGHT).printImage if (options.features): featureFunction = enhancedFeatureExtractorFace else: featureFunction = basicFeatureExtractorFace elif(options.data=="pacman"): printImage = None if (options.features): featureFunction = enhancedFeatureExtractorPacman else: featureFunction = basicFeatureExtractorPacman else: print "Unknown dataset", options.data print USAGE_STRING sys.exit(2) if(options.data=="digits"): legalLabels = range(10) else: legalLabels = ['Stop', 'West', 'East', 'North', 'South'] if options.training <= 0: print "Training set size should be a positive integer (you provided: %d)" % options.training print USAGE_STRING sys.exit(2) if options.smoothing <= 0: print "Please provide a positive number for smoothing (you provided: %f)" % options.smoothing print USAGE_STRING sys.exit(2) if options.odds: if options.label1 not in legalLabels or options.label2 not in legalLabels: print "Didn't provide a legal labels for the odds ratio: (%d,%d)" % (options.label1, options.label2) print USAGE_STRING sys.exit(2) if(options.classifier == "mostFrequent"): classifier = mostFrequent.MostFrequentClassifier(legalLabels) elif(options.classifier == "naiveBayes" or options.classifier == "nb"): classifier = naiveBayes.NaiveBayesClassifier(legalLabels) classifier.setSmoothing(options.smoothing) if (options.autotune): print "using automatic tuning for naivebayes" classifier.automaticTuning = True else: print "using smoothing parameter k=%f for naivebayes" % options.smoothing elif(options.classifier == "perceptron"): if options.data != 'pacman': classifier = perceptron.PerceptronClassifier(legalLabels,options.iterations) else: classifier = perceptron_pacman.PerceptronClassifierPacman(legalLabels,options.iterations) elif(options.classifier == "mira"): if options.data != 'pacman': classifier = mira.MiraClassifier(legalLabels, options.iterations) if (options.autotune): print "using automatic tuning for MIRA" classifier.automaticTuning = True else: print "using default C=0.001 for MIRA" elif(options.classifier == 'minicontest'): import minicontest classifier = minicontest.contestClassifier(legalLabels) else: print "Unknown classifier:", options.classifier print USAGE_STRING sys.exit(2) args['agentToClone'] = options.agentToClone args['classifier'] = classifier args['featureFunction'] = featureFunction args['printImage'] = printImage return args, options
MINICONTEST_PATH = "minicontest_output.pickle" if __name__ == '__main__': print "Loading training data" rawTrainingData = samples.loadDataFile("digitdata/trainingimages", 5000,DIGIT_DATUM_WIDTH,DIGIT_DATUM_HEIGHT) trainingLabels = samples.loadLabelsFile("digitdata/traininglabels", 5000) rawValidationData = samples.loadDataFile("digitdata/validationimages", 100,DIGIT_DATUM_WIDTH,DIGIT_DATUM_HEIGHT) validationLabels = samples.loadLabelsFile("digitdata/validationlabels", 100) rawTestData = samples.loadDataFile("digitdata/testimages", TEST_SIZE,DIGIT_DATUM_WIDTH,DIGIT_DATUM_HEIGHT) featureFunction = contestFeatureExtractorDigit legalLabels = range(10) classifier = minicontest.contestClassifier(legalLabels) print "Extracting features..." trainingData = map(featureFunction, rawTrainingData) validationData = map(featureFunction, rawValidationData) testData = map(featureFunction, rawTestData) print "Training..." classifier.train(trainingData, trainingLabels, validationData, validationLabels) print "Validating..." guesses = classifier.classify(validationData) correct = [guesses[i] == validationLabels[i] for i in range(len(validationLabels))].count(True) print str(correct), ("correct out of " + str(len(validationLabels)) + " (%.1f%%).") % (100.0 * correct / len(validationLabels)) print "Testing..." guesses = classifier.classify(testData)
print "Loading training data" rawTrainingData = samples.loadDataFile("digitdata/trainingimages", 5000, DIGIT_DATUM_WIDTH, DIGIT_DATUM_HEIGHT) trainingLabels = samples.loadLabelsFile("digitdata/traininglabels", 5000) rawValidationData = samples.loadDataFile("digitdata/validationimages", 100, DIGIT_DATUM_WIDTH, DIGIT_DATUM_HEIGHT) validationLabels = samples.loadLabelsFile("digitdata/validationlabels", 100) rawTestData = samples.loadDataFile("digitdata/testimages", TEST_SIZE, DIGIT_DATUM_WIDTH, DIGIT_DATUM_HEIGHT) featureFunction = contestFeatureExtractorDigit legalLabels = range(10) classifier = minicontest.contestClassifier(legalLabels) print "Extracting features..." trainingData = map(featureFunction, rawTrainingData) validationData = map(featureFunction, rawValidationData) testData = map(featureFunction, rawTestData) print "Training..." classifier.train(trainingData, trainingLabels, validationData, validationLabels) print "Validating..." guesses = classifier.classify(validationData) correct = [ guesses[i] == validationLabels[i] for i in range(len(validationLabels)) ].count(True) print str(correct), ("correct out of " + str(len(validationLabels)) +
def readCommand(argv): """Processes the command used to run from the command line.""" from optparse import OptionParser parser = OptionParser(USAGE_STRING) parser.add_option('-c', '--classifier', help=default('The type of classifier'), choices=[ 'mostFrequent', 'nb', 'naiveBayes', 'nearestNeighbors', 'perceptron', 'mira', 'minicontest' ], default='mostFrequent') parser.add_option('-d', '--data', help=default('Dataset to use'), choices=['digits', 'faces'], default='digits') parser.add_option('-t', '--training', help=default('The size of the training set'), default=100, type="int") parser.add_option('-f', '--features', help=default('Whether to use enhanced features'), default=False, action="store_true") parser.add_option('-o', '--odds', help=default('Whether to compute odds ratios'), default=False, action="store_true") parser.add_option('-1', '--label1', help=default("First label in an odds ratio comparison"), default=0, type="int") parser.add_option('-2', '--label2', help=default("Second label in an odds ratio comparison"), default=1, type="int") parser.add_option('-w', '--weights', help=default('Whether to print weights'), default=False, action="store_true") parser.add_option( '-k', '--smoothing', help=default("Smoothing parameter (ignored when using --autotune)"), type="float", default=2.0) parser.add_option( '-a', '--autotune', help=default("Whether to automatically tune hyperparameters"), default=False, action="store_true") parser.add_option('-i', '--iterations', help=default("Maximum iterations to run training"), default=3, type="int") parser.add_option('-s', '--test', help=default("Amount of test data to use"), default=TEST_SET_SIZE, type="int") parser.add_option( '-n', '--k_number_of_neighbors', help=default("Number of neighbors to search (For nearestNeighbors)"), default=3, type="int") parser.add_option( '-x', '--specialMode', help=default( "Trains in increments of random 10% of data and displays results"), default=False, action="store_true") parser.add_option('-y', '--analysis', help=default("Shows which data is wrongly predicted"), default=False, action="store_true") options, otherjunk = parser.parse_args(argv) if len(otherjunk) != 0: raise Exception('Command line input not understood: ' + str(otherjunk)) args = {} # Set up variables according to the command line input. print("Doing classification") print("--------------------") print("data:\t\t" + options.data) print("classifier:\t\t" + options.classifier) if not options.classifier == 'minicontest': print("using enhanced features?:\t" + str(options.features)) else: print("using minicontest feature extractor") print("training set size:\t" + str(options.training)) if not options.specialMode: print("training set size:\t" + str(options.training)) if options.data == "digits": printImage = ImagePrinter(DIGIT_DATUM_WIDTH, DIGIT_DATUM_HEIGHT).printImage if options.features: featureFunction = enhancedFeatureExtractorDigit else: featureFunction = basicFeatureExtractorDigit if options.classifier == 'minicontest': featureFunction = contestFeatureExtractorDigit elif options.data == "faces": printImage = ImagePrinter(FACE_DATUM_WIDTH, FACE_DATUM_HEIGHT).printImage if options.features: featureFunction = enhancedFeatureExtractorFace else: featureFunction = basicFeatureExtractorFace else: print("Unknown dataset", options.data) print(USAGE_STRING) sys.exit(2) if options.data == "digits": legalLabels = range(10) else: legalLabels = range(2) if options.training <= 0: print( "Training set size should be a positive integer (you provided: %d)" % options.training) print(USAGE_STRING) sys.exit(2) if options.smoothing <= 0: print( "Please provide a positive number for smoothing (you provided: %f)" % options.smoothing) print(USAGE_STRING) sys.exit(2) if options.odds: if options.label1 not in legalLabels or options.label2 not in legalLabels: print("Didn't provide a legal labels for the odds ratio: (%d,%d)" % (options.label1, options.label2)) print(USAGE_STRING) sys.exit(2) if options.k_number_of_neighbors <= 0: print( "Please provide a positive number for neighbors (you provided: %f)" % options.k_number_of_neighbors) print(USAGE_STRING) sys.exit(2) if options.classifier == "mostFrequent": classifier = mostFrequent.MostFrequentClassifier(legalLabels) elif options.classifier == "nearestNeighbors": classifier = nearestNeighbors.NNClassifier(legalLabels) elif options.classifier == "naiveBayes" or options.classifier == "nb": classifier = naiveBayes.NaiveBayesClassifier(legalLabels) classifier.setSmoothing(options.smoothing) if options.autotune: print("using automatic tuning for naivebayes") classifier.automaticTuning = True else: print("using smoothing parameter k=%f for naivebayes" % options.smoothing) elif options.classifier == "perceptron": classifier = perceptron.PerceptronClassifier(legalLabels, options.iterations) elif options.classifier == "mira": classifier = mira.MiraClassifier(legalLabels, options.iterations) if options.autotune: print("using automatic tuning for MIRA") classifier.automaticTuning = True else: print("using default C=0.001 for MIRA") elif options.classifier == 'minicontest': import minicontest classifier = minicontest.contestClassifier(legalLabels) else: print("Unknown classifier:", options.classifier) print(USAGE_STRING) sys.exit(2) args['classifier'] = classifier args['featureFunction'] = featureFunction args['printImage'] = printImage return args, options