def readContestData(trainingSize=100, testSize=100): rootdata = 'pacmandata' rawTrainingData, trainingLabels = samples.loadPacmanData(rootdata + '/contest_training.pkl', trainingSize) rawValidationData, validationLabels = samples.loadPacmanData(rootdata + '/contest_validation.pkl', testSize) rawTestData, testLabels = samples.loadPacmanData(rootdata + '/contest_test.pkl', testSize) trainingData = [] validationData = [] testData = [] return (trainingData, trainingLabels, validationData, validationLabels, rawTrainingData, rawValidationData, testData, testLabels, rawTestData)
def readSuicideData(trainingSize=100, testSize=100): rootdata = "pacmandata" rawTrainingData, trainingLabels = samples.loadPacmanData(rootdata + "/suicide_training.pkl", trainingSize) rawValidationData, validationLabels = samples.loadPacmanData(rootdata + "/suicide_validation.pkl", testSize) rawTestData, testLabels = samples.loadPacmanData(rootdata + "/suicide_test.pkl", testSize) trainingData = [] validationData = [] testData = [] return ( trainingData, trainingLabels, validationData, validationLabels, rawTrainingData, rawValidationData, testData, testLabels, rawTestData, )
def runClassifier(args, options): featureFunction = args['featureFunction'] classifier = args['classifier'] printImage = args['printImage'] # Load data numTraining = options.training numTest = options.test if(options.data=="pacman"): agentToClone = args.get('agentToClone', None) trainingData, validationData, testData = MAP_AGENT_TO_PATH_OF_SAVED_GAMES.get(agentToClone, (None, None, None)) trainingData = trainingData or args.get('trainingData', False) or MAP_AGENT_TO_PATH_OF_SAVED_GAMES['ContestAgent'][0] validationData = validationData or args.get('validationData', False) or MAP_AGENT_TO_PATH_OF_SAVED_GAMES['ContestAgent'][1] testData = testData or MAP_AGENT_TO_PATH_OF_SAVED_GAMES['ContestAgent'][2] rawTrainingData, trainingLabels = samples.loadPacmanData(trainingData, numTraining) rawValidationData, validationLabels = samples.loadPacmanData(validationData, numTest) rawTestData, testLabels = samples.loadPacmanData(testData, numTest) else: rawTrainingData = samples.loadDataFile("digitdata/trainingimages", numTraining,DIGIT_DATUM_WIDTH,DIGIT_DATUM_HEIGHT) trainingLabels = samples.loadLabelsFile("digitdata/traininglabels", numTraining) rawValidationData = samples.loadDataFile("digitdata/validationimages", numTest,DIGIT_DATUM_WIDTH,DIGIT_DATUM_HEIGHT) validationLabels = samples.loadLabelsFile("digitdata/validationlabels", numTest) rawTestData = samples.loadDataFile("digitdata/testimages", numTest,DIGIT_DATUM_WIDTH,DIGIT_DATUM_HEIGHT) testLabels = samples.loadLabelsFile("digitdata/testlabels", numTest) # Extract features print "Extracting features..." trainingData = map(featureFunction, rawTrainingData) validationData = map(featureFunction, rawValidationData) testData = map(featureFunction, rawTestData) # Conduct training and testing print "Training..." classifier.train(trainingData, trainingLabels, validationData, validationLabels) print "Validating..." guesses = classifier.classify(validationData) correct = [guesses[i] == validationLabels[i] for i in range(len(validationLabels))].count(True) print str(correct), ("correct out of " + str(len(validationLabels)) + " (%.1f%%).") % (100.0 * correct / len(validationLabels)) print "Testing..." guesses = classifier.classify(testData) correct = [guesses[i] == testLabels[i] for i in range(len(testLabels))].count(True) print str(correct), ("correct out of " + str(len(testLabels)) + " (%.1f%%).") % (100.0 * correct / len(testLabels)) analysis(classifier, guesses, testLabels, testData, rawTestData, printImage) # do odds ratio computation if specified at command line if((options.odds) & (options.classifier == "naiveBayes" or (options.classifier == "nb")) ): label1, label2 = options.label1, options.label2 features_odds = classifier.findHighOddsFeatures(label1,label2) if(options.classifier == "naiveBayes" or options.classifier == "nb"): string3 = "=== Features with highest odd ratio of label %d over label %d ===" % (label1, label2) else: string3 = "=== Features for which weight(label %d)-weight(label %d) is biggest ===" % (label1, label2) print string3 printImage(features_odds) if((options.weights) & (options.classifier == "perceptron")): for l in classifier.legalLabels: features_weights = classifier.findHighWeightFeatures(l) print ("=== Features with high weight for label %d ==="%l) printImage(features_weights)
def runClassifier(args, options): featureFunction = args['featureFunction'] classifier = args['classifier'] printImage = args['printImage'] # Load data numTraining = options.training numTest = options.test if (options.data == "pacman"): agentToClone = args.get('agentToClone', None) trainingData, validationData, testData = MAP_AGENT_TO_PATH_OF_SAVED_GAMES.get( agentToClone, (None, None, None)) trainingData = trainingData or args.get( 'trainingData', False) or MAP_AGENT_TO_PATH_OF_SAVED_GAMES['ContestAgent'][0] validationData = validationData or args.get( 'validationData', False) or MAP_AGENT_TO_PATH_OF_SAVED_GAMES['ContestAgent'][1] testData = testData or MAP_AGENT_TO_PATH_OF_SAVED_GAMES[ 'ContestAgent'][2] rawTrainingData, trainingLabels = samples.loadPacmanData( trainingData, numTraining) rawValidationData, validationLabels = samples.loadPacmanData( validationData, numTest) rawTestData, testLabels = samples.loadPacmanData(testData, numTest) else: rawTrainingData = samples.loadDataFile("digitdata/trainingimages", numTraining, DIGIT_DATUM_WIDTH, DIGIT_DATUM_HEIGHT) trainingLabels = samples.loadLabelsFile("digitdata/traininglabels", numTraining) rawValidationData = samples.loadDataFile("digitdata/validationimages", numTest, DIGIT_DATUM_WIDTH, DIGIT_DATUM_HEIGHT) validationLabels = samples.loadLabelsFile("digitdata/validationlabels", numTest) rawTestData = samples.loadDataFile("digitdata/testimages", numTest, DIGIT_DATUM_WIDTH, DIGIT_DATUM_HEIGHT) testLabels = samples.loadLabelsFile("digitdata/testlabels", numTest) # Extract features print "Extracting features..." trainingData = map(featureFunction, rawTrainingData) validationData = map(featureFunction, rawValidationData) testData = map(featureFunction, rawTestData) # Conduct training and testing print "Training..." classifier.train(trainingData, trainingLabels, validationData, validationLabels) print "Validating..." guesses = classifier.classify(validationData) correct = [ guesses[i] == validationLabels[i] for i in range(len(validationLabels)) ].count(True) print str(correct), ("correct out of " + str(len(validationLabels)) + " (%.1f%%).") % (100.0 * correct / len(validationLabels)) print "Testing..." guesses = classifier.classify(testData) correct = [guesses[i] == testLabels[i] for i in range(len(testLabels))].count(True) print str(correct), ("correct out of " + str(len(testLabels)) + " (%.1f%%).") % (100.0 * correct / len(testLabels)) analysis(classifier, guesses, testLabels, testData, rawTestData, printImage) # do odds ratio computation if specified at command line if ((options.odds) & (options.classifier == "naiveBayes" or (options.classifier == "nb"))): label1, label2 = options.label1, options.label2 features_odds = classifier.findHighOddsFeatures(label1, label2) if (options.classifier == "naiveBayes" or options.classifier == "nb"): string3 = "=== Features with highest odd ratio of label %d over label %d ===" % ( label1, label2) else: string3 = "=== Features for which weight(label %d)-weight(label %d) is biggest ===" % ( label1, label2) print string3 printImage(features_odds) if ((options.weights) & (options.classifier == "perceptron")): for l in classifier.legalLabels: features_weights = classifier.findHighWeightFeatures(l) print("=== Features with high weight for label %d ===" % l) printImage(features_weights)
def runClassifier(args, options): featureFunction = args['featureFunction'] classifier = args['classifier'] printImage = args['printImage'] # Load data numTraining = options.training numTest = options.test if (options.data == "pacman"): agentToClone = args.get('agentToClone', None) trainingData, validationData, testData = MAP_AGENT_TO_PATH_OF_SAVED_GAMES.get( agentToClone, (None, None, None)) trainingData = trainingData or args.get( 'trainingData', False) or MAP_AGENT_TO_PATH_OF_SAVED_GAMES['ContestAgent'][0] validationData = validationData or args.get( 'validationData', False) or MAP_AGENT_TO_PATH_OF_SAVED_GAMES['ContestAgent'][1] testData = testData or MAP_AGENT_TO_PATH_OF_SAVED_GAMES[ 'ContestAgent'][2] rawTrainingData, trainingLabels = samples.loadPacmanData( trainingData, numTraining) rawValidationData, validationLabels = samples.loadPacmanData( validationData, numTest) rawTestData, testLabels = samples.loadPacmanData(testData, numTest) else: rawTrainingData = samples.loadDataFile("digitdata/trainingimages", numTraining, DIGIT_DATUM_WIDTH, DIGIT_DATUM_HEIGHT) trainingLabels = samples.loadLabelsFile("digitdata/traininglabels", numTraining) rawValidationData = samples.loadDataFile("digitdata/validationimages", numTest, DIGIT_DATUM_WIDTH, DIGIT_DATUM_HEIGHT) validationLabels = samples.loadLabelsFile("digitdata/validationlabels", numTest) rawTestData = samples.loadDataFile("digitdata/testimages", numTest, DIGIT_DATUM_WIDTH, DIGIT_DATUM_HEIGHT) testLabels = samples.loadLabelsFile("digitdata/testlabels", numTest) # Extract features print "Extracting features..." trainingData = map(featureFunction, rawTrainingData) validationData = map(featureFunction, rawValidationData) testData = map(featureFunction, rawTestData) # Conduct training and testing print "Training..." classifier.train(trainingData, trainingLabels, validationData, validationLabels) print "Validating..." guesses = classifier.classify(validationData) correct = [ guesses[i] == validationLabels[i] for i in range(len(validationLabels)) ].count(True) print str(correct), ("correct out of " + str(len(validationLabels)) + " (%.1f%%).") % (100.0 * correct / len(validationLabels)) print "Testing..." guesses = classifier.classify(testData) correct = [guesses[i] == testLabels[i] for i in range(len(testLabels))].count(True) print str(correct), ("correct out of " + str(len(testLabels)) + " (%.1f%%).") % (100.0 * correct / len(testLabels))
def runClassifier(args, options): classifier = args['classifier'] # Load data if options.data == "pacman": agentToClone = args.get('agentToClone', None) trainingData, validationData, testData = MAP_AGENT_TO_PATH_OF_SAVED_GAMES.get( agentToClone, (None, None, None)) trainingData = trainingData or args.get( 'trainingData', False) or MAP_AGENT_TO_PATH_OF_SAVED_GAMES['ContestAgent'][0] validationData = validationData or args.get( 'validationData', False) or MAP_AGENT_TO_PATH_OF_SAVED_GAMES['ContestAgent'][1] testData = testData or MAP_AGENT_TO_PATH_OF_SAVED_GAMES[ 'ContestAgent'][2] trainingData, trainingLabels = samples.loadPacmanData( trainingData, options.training) validationData, validationLabels = samples.loadPacmanData( validationData, options.validation) testData, testLabels = samples.loadPacmanData(testData, None) elif options.data == "digits": if options.training is None: options.training = 2000 if options.validation is None: options.validation = 1000 numTest = 1000 trainingData = samples.loadDataFile("digitdata/trainingimages", options.training, DIGIT_DATUM_WIDTH, DIGIT_DATUM_HEIGHT) trainingLabels = samples.loadLabelsFile("digitdata/traininglabels", options.training) validationData = samples.loadDataFile("digitdata/validationimages", options.validation, DIGIT_DATUM_WIDTH, DIGIT_DATUM_HEIGHT) validationLabels = samples.loadLabelsFile("digitdata/validationlabels", options.validation) testData = samples.loadDataFile("digitdata/testimages", numTest, DIGIT_DATUM_WIDTH, DIGIT_DATUM_HEIGHT) testLabels = samples.loadLabelsFile("digitdata/testlabels", numTest) else: raise ValueError('unrecognized dataset %r' % options.data) # Conduct training and testing print "Training..." classifier.train(trainingData, trainingLabels, validationData, validationLabels) print "Testing..." guesses = classifier.classify(testData) correct = [guesses[i] == testLabels[i] for i in range(len(testLabels))].count(True) print str(correct), ("correct out of " + str(len(testLabels)) + " (%.1f%%).") % (100.0 * correct / len(testLabels))