def readDigitData(trainingSize=100, testSize=100): rootdata = "digitdata/" # loading digits data rawTrainingData = samples.loadDataFile( rootdata + "trainingimages", trainingSize, DIGIT_DATUM_WIDTH, DIGIT_DATUM_HEIGHT ) trainingLabels = samples.loadLabelsFile(rootdata + "traininglabels", trainingSize) rawValidationData = samples.loadDataFile( rootdata + "validationimages", TEST_SET_SIZE, DIGIT_DATUM_WIDTH, DIGIT_DATUM_HEIGHT ) validationLabels = samples.loadLabelsFile(rootdata + "validationlabels", TEST_SET_SIZE) rawTestData = samples.loadDataFile("digitdata/testimages", testSize, DIGIT_DATUM_WIDTH, DIGIT_DATUM_HEIGHT) testLabels = samples.loadLabelsFile("digitdata/testlabels", testSize) try: print "Extracting features..." featureFunction = dataClassifier.basicFeatureExtractorDigit trainingData = map(featureFunction, rawTrainingData) validationData = map(featureFunction, rawValidationData) testData = map(featureFunction, rawTestData) except: display("An exception was raised while extracting basic features: \n %s" % getExceptionTraceBack()) return ( trainingData, trainingLabels, validationData, validationLabels, rawTrainingData, rawValidationData, testData, testLabels, rawTestData, )
def getNumpyData(numTraining, numTest): featureFunction = numpyFeatureExtractorDigit rawTrainingData = samples.loadDataFile("data/digitdata/trainingimages", numTraining, DIGIT_DATUM_WIDTH, DIGIT_DATUM_HEIGHT) trainingLabels = samples.loadLabelsFile("data/digitdata/traininglabels", numTraining) rawValidationData = samples.loadDataFile("data/digitdata/validationimages", numTest, DIGIT_DATUM_WIDTH, DIGIT_DATUM_HEIGHT) validationLabels = samples.loadLabelsFile( "data/digitdata/validationlabels", numTest) rawTestData = samples.loadDataFile("data/digitdata/testimages", numTest, DIGIT_DATUM_WIDTH, DIGIT_DATUM_HEIGHT) testLabels = samples.loadLabelsFile("data/digitdata/testlabels", numTest) # Extract features print "Extracting features..." trainingData = np.array(map(featureFunction, rawTrainingData)) validationData = np.array(map(featureFunction, rawValidationData)) testData = np.array(map(featureFunction, rawTestData)) validationLabels = np.array(validationLabels) testLabels = np.array(testLabels) trainingLabels = np.array(trainingLabels) return (trainingData, validationData, testData, trainingLabels, validationLabels, testLabels)
def runClassifier(args, options): featureFunction = args['featureFunction'] classifier = args['classifier'] # Load data dataset = options.dataset numTraining = options.training numTest = options.test if dataset == 'd1': rawTrainingData = samples.loadDataFile("data/D1/training_data", numTraining) trainingLabels = samples.loadLabelsFile("data/D1/training_labels", numTraining) rawTestData = samples.loadDataFile("data/D1/test_data", numTest) testLabels = samples.loadLabelsFile("data/D1/test_labels", numTest) else: rawTrainingData = samples.loadDataFile("data/D2/training_data", numTraining) trainingLabels = samples.loadLabelsFile("data/D2/training_labels", numTraining) rawTestData = samples.loadDataFile("data/D2/test_data", numTest) testLabels = samples.loadLabelsFile("data/D2/test_labels", numTest) # Extract features print "Extracting features..." trainingData = map(featureFunction, rawTrainingData) testData = map(featureFunction, rawTestData) # Conduct training and testing print "Training..." classifier.train(trainingData, trainingLabels, testData, testLabels, options.validate) guesses = classifier.classify(trainingData) correct = [ guesses[i] == trainingLabels[i] for i in range(len(trainingLabels)) ].count(True) if (options.classifier == "1vr"): f = open("perceptron1vr_train.csv", "a") f.write( str(len(trainingData)) + "," + str(100 * correct / (1.0 * (len(trainingData)))) + '\n') f.close() print "Testing..." guesses = classifier.classify(testData) correct = [guesses[i] == testLabels[i] for i in range(len(testLabels))].count(True) print str(correct), ("correct out of " + str(len(testLabels)) + " (%.1f%%).") % (100.0 * correct / len(testLabels)) if (options.classifier == "1vr"): f = open("perceptron1vr_test.csv", "a") f.write( str(len(trainingData)) + "," + str(100 * correct / (1.0 * (len(testData)))) + '\n') f.close()
def readDigitData(trainingSize=100, testSize=100): rootdata = 'digitdata/' # loading digits data rawTrainingData = samples.loadDataFile(rootdata + 'trainingimages', trainingSize, DIGIT_DATUM_WIDTH, DIGIT_DATUM_HEIGHT) trainingLabels = samples.loadLabelsFile(rootdata + "traininglabels", trainingSize) rawValidationData = samples.loadDataFile(rootdata + "validationimages", TEST_SET_SIZE, DIGIT_DATUM_WIDTH, DIGIT_DATUM_HEIGHT) validationLabels = samples.loadLabelsFile(rootdata + "validationlabels", TEST_SET_SIZE) rawTestData = samples.loadDataFile("digitdata/testimages", testSize, DIGIT_DATUM_WIDTH, DIGIT_DATUM_HEIGHT) testLabels = samples.loadLabelsFile("digitdata/testlabels", testSize) try: print "Extracting features..." featureFunction = dataClassifier.basicFeatureExtractorDigit trainingData = map(featureFunction, rawTrainingData) validationData = map(featureFunction, rawValidationData) testData = map(featureFunction, rawTestData) except: display( "An exception was raised while extracting basic features: \n %s" % getExceptionTraceBack()) return (trainingData, trainingLabels, validationData, validationLabels, rawTrainingData, rawValidationData, testData, testLabels, rawTestData)
def runClassifier(args, options): featureFunction = args['featureFunction'] classifier = args['classifier'] printImage = args['printImage'] # Load data numTraining = options.training numTest = options.test if(options.data=="pacman"): agentToClone = args.get('agentToClone', None) trainingData, validationData, testData = MAP_AGENT_TO_PATH_OF_SAVED_GAMES.get(agentToClone, (None, None, None)) trainingData = trainingData or args.get('trainingData', False) or MAP_AGENT_TO_PATH_OF_SAVED_GAMES['ContestAgent'][0] validationData = validationData or args.get('validationData', False) or MAP_AGENT_TO_PATH_OF_SAVED_GAMES['ContestAgent'][1] testData = testData or MAP_AGENT_TO_PATH_OF_SAVED_GAMES['ContestAgent'][2] rawTrainingData, trainingLabels = samples.loadPacmanData(trainingData, numTraining) rawValidationData, validationLabels = samples.loadPacmanData(validationData, numTest) rawTestData, testLabels = samples.loadPacmanData(testData, numTest) else: rawTrainingData = samples.loadDataFile("digitdata/trainingimages", numTraining,DIGIT_DATUM_WIDTH,DIGIT_DATUM_HEIGHT) trainingLabels = samples.loadLabelsFile("digitdata/traininglabels", numTraining) rawValidationData = samples.loadDataFile("digitdata/validationimages", numTest,DIGIT_DATUM_WIDTH,DIGIT_DATUM_HEIGHT) validationLabels = samples.loadLabelsFile("digitdata/validationlabels", numTest) rawTestData = samples.loadDataFile("digitdata/testimages", numTest,DIGIT_DATUM_WIDTH,DIGIT_DATUM_HEIGHT) testLabels = samples.loadLabelsFile("digitdata/testlabels", numTest) # Extract features print "Extracting features..." trainingData = map(featureFunction, rawTrainingData) validationData = map(featureFunction, rawValidationData) testData = map(featureFunction, rawTestData)
def runClassifier(args, options): featureFunction = args['featureFunction'] classifier = args['classifier'] printImage = args['printImage'] # Load data numTraining = options.training numTest = options.test rawTrainingData = samples.loadDataFile("data/digitdata/trainingimages", numTraining, DIGIT_DATUM_WIDTH, DIGIT_DATUM_HEIGHT) trainingLabels = samples.loadLabelsFile("data/digitdata/traininglabels", numTraining) rawValidationData = samples.loadDataFile("data/digitdata/validationimages", numTest, DIGIT_DATUM_WIDTH, DIGIT_DATUM_HEIGHT) validationLabels = samples.loadLabelsFile( "data/digitdata/validationlabels", numTest) rawTestData = samples.loadDataFile("data/digitdata/testimages", numTest, DIGIT_DATUM_WIDTH, DIGIT_DATUM_HEIGHT) testLabels = samples.loadLabelsFile("data/digitdata/testlabels", numTest) # Extract features print "Extracting features..." trainingData = map(featureFunction, rawTrainingData) validationData = map(featureFunction, rawValidationData) testData = map(featureFunction, rawTestData) # Conduct training and testing print "Training..." classifier.train(trainingData, trainingLabels, validationData, validationLabels) print "Validating..." validation_guesses = classifier.classify(validationData) correct = [ validation_guesses[i] == validationLabels[i] for i in range(len(validationLabels)) ].count(True) print str(correct), ("correct out of " + str(len(validationLabels)) + " (%.1f%%).") % (100.0 * correct / len(validationLabels)) print "Testing..." test_guesses = classifier.classify(testData) correct = [ test_guesses[i] == testLabels[i] for i in range(len(testLabels)) ].count(True) print str(correct), ("correct out of " + str(len(testLabels)) + " (%.1f%%).") % (100.0 * correct / len(testLabels)) analysis(classifier, test_guesses, trainingData, trainingLabels, testLabels, testData, rawTestData, printImage) if ((options.weights) & (options.classifier == "perceptron")): for l in classifier.legalLabels: features_weights = classifier.findHighWeightFeatures(l) print("=== Features with high weight for label %d ===" % l) printImage(features_weights)
def runClassifier(dataset, numTraining): if dataset == 'faces': legalLabels = range(2) featureFunction = basicFeatureExtractorFace rawTrainingData = samples.loadDataFile("facedata/facedatatrain", numTraining, FACE_DATUM_WIDTH, FACE_DATUM_HEIGHT) trainingLabels = samples.loadLabelsFile("facedata/facedatatrainlabels", numTraining) rawTestData = samples.loadDataFile("facedata/facedatatest", TEST_SET_SIZE, FACE_DATUM_WIDTH, FACE_DATUM_HEIGHT) testLabels = samples.loadLabelsFile("facedata/facedatatestlabels", TEST_SET_SIZE) elif dataset == 'digits': legalLabels = range(10) featureFunction = basicFeatureExtractorDigit rawTrainingData = samples.loadDataFile("digitdata/trainingimages", numTraining, DIGIT_DATUM_WIDTH, DIGIT_DATUM_HEIGHT) trainingLabels = samples.loadLabelsFile("digitdata/traininglabels", numTraining) rawTestData = samples.loadDataFile("digitdata/testimages", TEST_SET_SIZE, DIGIT_DATUM_WIDTH, DIGIT_DATUM_HEIGHT) testLabels = samples.loadLabelsFile("digitdata/testlabels", TEST_SET_SIZE) else: raise Exception # load NN classifier classifier = neuralNetwork_submission.NeuralNetworkClassifier( legalLabels, "NeuralNetwork", 123) # converting data to np.array trainingData = basicFeatureDataToNumpyArray( map(featureFunction, rawTrainingData)).astype(np.float32) testData = basicFeatureDataToNumpyArray(map( featureFunction, rawTestData)).astype(np.float32) print("Training...") classifier.train(trainingData, trainingLabels) guesses = classifier.classify(testData) correct = [guesses[i] == testLabels[i] for i in range(len(testLabels))].count(True) print( "Performance on the test set:", str(correct), ("correct out of " + str(len(testLabels)) + " (%.1f%%).") % (100.0 * correct / len(testLabels)), "[Dataset: " + dataset + ", Number of training samples: " + str(numTraining) + "]")
def rLoadDataFile(inputDataType, numTraining, numTest): dataCollection = {} dataCollection['rawTrainingData'] = [] dataCollection['trainingLabels'] = [] if (inputDataType == "faces"): maxTraining = FACE_MAX_TRAINING - 1 rawTrainingDataFile = samples.readlines("data/facedata/facedatatrain") trainingLabelsFile = samples.readlines( "data/facedata/facedatatrainlabels") for i in range(numTraining): rand = random.randint(1, maxTraining) dataCollection['rawTrainingData'].append( load1Data(rawTrainingDataFile, rand, FACE_DATUM_WIDTH, FACE_DATUM_HEIGHT)) dataCollection['trainingLabels'].append( load1Label(trainingLabelsFile, rand)) dataCollection['rawValidationData'] = samples.loadDataFile( "data/facedata/facedatatrain", numTest, FACE_DATUM_WIDTH, FACE_DATUM_HEIGHT) dataCollection['validationLabels'] = samples.loadLabelsFile( "data/facedata/facedatatrainlabels", numTest) dataCollection['rawTestData'] = samples.loadDataFile( "data/facedata/facedatatest", numTest, FACE_DATUM_WIDTH, FACE_DATUM_HEIGHT) dataCollection['testLabels'] = samples.loadLabelsFile( "data/facedata/facedatatestlabels", numTest) elif (inputDataType == "digits"): maxTraining = DIGIT_MAX_TRAINING - 1 rawTrainingDataFile = samples.readlines( "data/digitdata/trainingimages") trainingLabelsFile = samples.readlines("data/digitdata/traininglabels") for i in range(numTraining): rand = random.randint(1, maxTraining) dataCollection['rawTrainingData'].append( load1Data(rawTrainingDataFile, rand, DIGIT_DATUM_WIDTH, DIGIT_DATUM_HEIGHT)) dataCollection['trainingLabels'].append( load1Label(trainingLabelsFile, rand)) dataCollection['rawValidationData'] = samples.loadDataFile( "data/digitdata/validationimages", numTest, DIGIT_DATUM_WIDTH, DIGIT_DATUM_HEIGHT) dataCollection['validationLabels'] = samples.loadLabelsFile( "data/digitdata/validationlabels", numTest) dataCollection['rawTestData'] = samples.loadDataFile( "data/digitdata/testimages", numTest, DIGIT_DATUM_WIDTH, DIGIT_DATUM_HEIGHT) dataCollection['testLabels'] = samples.loadLabelsFile( "data/digitdata/testlabels", numTest) return dataCollection
def runClassifier(): # Set up variables according to the command line inputs featureFunction = basicFeatureExtractorDigit legalLabels = range(10) # number of labels # Select classifier classifier = perceptron.PerceptronClassifier(legalLabels) # Load data numTraining = 100 rawTrainingData = samples.loadDataFile("digitdata/trainingimages", numTraining, DIGIT_DATUM_WIDTH, DIGIT_DATUM_HEIGHT) trainingLabels = samples.loadLabelsFile("digitdata/traininglabels", numTraining) rawValidationData = samples.loadDataFile("digitdata/validationimages", TEST_SET_SIZE, DIGIT_DATUM_WIDTH, DIGIT_DATUM_HEIGHT) validationLabels = samples.loadLabelsFile("digitdata/validationlabels", TEST_SET_SIZE) rawTestData = samples.loadDataFile("digitdata/testimages", TEST_SET_SIZE, DIGIT_DATUM_WIDTH, DIGIT_DATUM_HEIGHT) testLabels = samples.loadLabelsFile("digitdata/testlabels", TEST_SET_SIZE) # Extract features trainingData = map(basicFeatureExtractorDigit, rawTrainingData) validationData = map(basicFeatureExtractorDigit, rawValidationData) testData = map(basicFeatureExtractorDigit, rawTestData) # Conduct training and testing print "Training..." classifier.train(trainingData, trainingLabels, validationData, validationLabels) # print "Validating..." # guesses = classifier.classify(validationData) # correct = [guesses[i] == validationLabels[i] for i in range(len(validationLabels))].count(True) # print str(correct), ("correct out of " + str(len(validationLabels)) + " (%.1f%%).") % (100.0 * correct / len(validationLabels)) print "Testing..." guesses = classifier.classify(testData) correct = [guesses[i] == testLabels[i] for i in range(len(testLabels))].count(True) print str(correct), ("correct out of " + str(len(testLabels)) + " (%.1f%%).") % (100.0 * correct / len(testLabels)) util.pause() analysis(classifier, guesses, testLabels, rawTestData)
def runClassifier( args , options ) : classifier = args['classifier'] # Load data trainingData = samples.loadDataFile( "data/%s/training_data.csv" % options.data ) trainingLabels = samples.loadLabelsFile( "data/%s/training_labels.csv" % options.data ) testData = samples.loadDataFile( "data/%s/test_data.csv" % options.data ) testLabels = samples.loadLabelsFile( "data/%s/test_labels.csv" % options.data ) # Conduct training and testing print "Training..." classifier.train( trainingData , trainingLabels , args ) print "Testing..." guesses = classifier.classify( testData ) analysis( classifier , guesses , testLabels , testData )
def get_neuron_test_data(): test_data = samples.loadDataFile("digitdata/testimages", 1000, 28,28) test_labels = np.array(samples.loadLabelsFile("digitdata/testlabels", 1000)) test_labels = test_labels == 3 featurized_test_data = np.array(map(dcu.simple_image_featurization, test_data)) return test_data, featurized_test_data, test_labels
def recog(self): global TRAIN_NUM, iClassifier, DIGIT_DATUM_HEIGHT global DIGIT_DATUM_WIDTH, myList, iClassifier, c4Temp self.b3["background"] = "blue" self.b3["fg"] = "#FFF" for i in self.s: if i != self.b3: i["background"] = "black" i["fg"] = "#3cecff" rawTestData = samples.loadDataFile("digitdata/testingimages", 1, DIGIT_DATUM_WIDTH, DIGIT_DATUM_HEIGHT, myList) testLabels = samples.loadLabelsFile("digitdata/testlabels", TEST_SET_SIZE, myList) testData = map(basicFeatureExtractorDigit, rawTestData) guess = iClassifier.classify(testData, myList) if c4Temp != 0: c4.delete(c4Temp) c4Temp = c4.create_text(65, 30, text=guess[0], fill="#3cecff", justify=CENTER, font=('times', 24, 'bold'))
def usrTrain(self): global TRAIN_NUM, iClassifier, DIGIT_DATUM_HEIGHT global root, DIGIT_DATUM_WIDTH, myList self.b4["background"] = "blue" self.b4["fg"] = "#FFF" for i in self.s: if i != self.b4: i["background"] = "black" i["fg"] = "#3cecff" d = MyDialog(root) root.wait_window(d.top) # Load training images rawTrainingData = samples.loadDataFile("digitdata/testingimages", TRAIN_NUM, DIGIT_DATUM_WIDTH, DIGIT_DATUM_HEIGHT, myList) myList.insertItem("Training data imported.") trainingData = map(basicFeatureExtractorDigit, rawTrainingData) # Load training Labels trainingLabels = samples.loadLabelsFile("digitdata/traininglabels", TRAIN_NUM, myList) myList.insertItem("Training labels imported.") # Training iClassifier.train(trainingData, trainingLabels, myList) myList.insertItem("Training Completed.") myList.insertItem("==================")
def demo_faces(weights): n_images = 301 start = time.time() data_path = 'facedata/facedatavalidation' labels_path = 'facedata/facedatavalidationlabels' images = samples.loadDataFile(data_path, n_images, 60, 70) labels = samples.loadLabelsFile(labels_path, n_images) featureslist = perceptron.compute_features2(images) results = [] for image in range(len(images)): sum = 0 for weight in range(len(weights)): sum += weights[weight] * featureslist[image][weight] results.append((sum, labels[image])) correctcount = 0 for t in results: if t[0] >= float(0) and t[1] == 1: correctcount += 1 elif t[0] < float(0) and t[1] == 0: correctcount += 1 return (float(correctcount) * 100 / float(len(labels)))
def valid(self): global iClassifier global pCorrect, c5, c5Temp self.modBG(self.button["Validate Training"]) rawValidationData = samples.loadDataFile("digitdata/validationimages", temp.TEST_SET_SIZE, temp.DIGIT_DATUM_WIDTH, temp.DIGIT_DATUM_HEIGHT, temp.LOG_LIST) temp.LOG_LIST.insertItem("Testing data imported.") validationLabels = samples.loadLabelsFile("digitdata/validationlabels", temp.TEST_SET_SIZE, temp.LOG_LIST) temp.LOG_LIST.insertItem("Testing labels imported.") validationData = map(samples.basicFeatureExtractorDigit, rawValidationData) guesses = temp.iClassifier.classify(validationData, temp.LOG_LIST) correct = [ guesses[i] == validationLabels[i] for i in range(len(validationLabels)) ].count(True) temp.LOG_LIST.insertItem( str(correct) + (" correct out of " + str(len(validationLabels)))) temp.LOG_LIST.insertItem("=========================") temp.iOutput.display( 1, str(100.0 * correct / len(validationLabels)) + "%")
def get_neuron_training_data(): training_data = samples.loadDataFile("digitdata/trainingimages", num_train_examples, 28, 28) training_labels = np.array(samples.loadLabelsFile("digitdata/traininglabels", num_train_examples)) training_labels = training_labels == 3 featurized_training_data = np.array(map(dcu.simple_image_featurization, training_data)) return training_data, featurized_training_data, training_labels
def autoTrain(self): global iClassifier, DIGIT_DATUM_HEIGHT, DIGIT_DATUM_WIDTH, myList self.b0["background"] = "blue" self.b0["fg"] = "#FFF" for i in self.s: if i != self.b0: i["background"] = "black" i["fg"] = "#3cecff" # Load training images rawTrainingData = samples.loadDataFile("digitdata/trainingimages", variable.TRAIN_NUM, DIGIT_DATUM_WIDTH, DIGIT_DATUM_HEIGHT, myList) myList.insertItem("Training data imported.") trainingData = map(basicFeatureExtractorDigit, rawTrainingData) # Load training Labels trainingLabels = samples.loadLabelsFile("digitdata/traininglabels", variable.TRAIN_NUM, myList) myList.insertItem("Training labels imported.") # Training variable.iClassifier.train(trainingData, trainingLabels, myList) myList.insertItem("Training Completed.") myList.insertItem("==================")
def callData(): n = 1000 m = 1000 items = loadDataFile("data/digitdata/trainingimages", n, 28, 28) trainingData = FlatInput(n, items) labels = loadLabelsFile("data/digitdata/traininglabels", n) val_items = loadDataFile("data/digitdata/testimages", m, 28, 28) validationData = FlatInput(m, val_items) val_labels = loadLabelsFile("data/digitdata/testlabels", m) data = SVMClassifier([0, 1, 2, 3, 4, 5, 6, 7, 8, 9]) data.train(trainingData, labels, validationData, val_labels)
def validate_weights(digit, final_weights): if digit >= 0: images = samples.loadDataFile('digitdata/validationimages', 1000, 28, 28) labels = samples.loadLabelsFile('digitdata/validationlabels', 1000) features_list = compute_features2(images) else: images = samples.loadDataFile('facedata/facedatavalidation', 301, 60, 70) labels = samples.loadLabelsFile('facedata/facedatavalidationlabels', 301) features_list = compute_features2(images) accuracylist = [] for i in range(len(images)): fsum = 0 # for each already computed feature in the current image for j in range(len(features_list[i])): fsum += (final_weights[j] * features_list[i][j]) # If working with digits if digit >= 0: #print(str(labels[i]) + ' --- ' + 'fsum: ' + str(fsum)) if fsum < float(0) and labels[i] != digit: accuracylist.append(True) elif fsum >= float(0) and labels[i] == digit: accuracylist.append(True) else: accuracylist.append(False) # If working with faces else: if fsum < float(0) and labels[i] == 0: accuracylist.append(True) elif fsum >= float(0) and labels[i] == 1: accuracylist.append(True) else: accuracylist.append(False) accuracy_count = 0 for i in range(len(accuracylist)): if accuracylist[i]: accuracy_count += 1 accuracy = accuracy_count * 100 / len(accuracylist) return accuracy
def callData(): n = 100 hidden_neurons = 25 #items = loadDataFile("data/digitdata/trainingimages", n,28,28) #for item in items: #items = util.Counter() items = loadDataFile("data/digitdata/trainingimages", n, 28, 28) flat_item = FlatInput(n, items) trainingData = {} for i in range(len(flat_item)): trainingData[i] = util.Counter() for j in range(len(flat_item[i])): trainingData[i][j] = flat_item[i][j] labels = loadLabelsFile("data/digitdata/traininglabels", n) val_items = loadDataFile("data/digitdata/validationimages", n, 28, 28) flat_val = FlatInput(n, val_items) validationData = {} for i in range(len(flat_val)): validationData[i] = util.Counter() for j in range(len(flat_val[i])): validationData[i][j] = flat_val[i][j] val_labels = loadLabelsFile("data/digitdata/validationlabels", n) data = MLPClassifier([0, 1, 2, 3, 4, 5, 6, 7, 8, 9], 50, hidden_neurons, .5) weights_op = {} for w in range(0, 10): weights_op[w] = util.Counter() for i in range(0, 784): weights_op[w][i] = random.random() weights_L2 = {} for w in range(0, hidden_neurons): weights_L2[w] = util.Counter() for i in range(0, 784): weights_L2[w][i] = random.random() data.setWeights(weights_L2, weights_op) data.train(trainingData, labels, validationData, val_labels)
def runClassifier(args, options): featureFunction = args['featureFunction'] classifier = args['classifier'] printImage = args['printImage'] # Load data numTraining = options.training numTest = options.test rawTrainingData = samples.loadDataFile("digitdata/trainingimages", numTraining,DIGIT_DATUM_WIDTH,DIGIT_DATUM_HEIGHT) trainingLabels = samples.loadLabelsFile("digitdata/traininglabels", numTraining) rawValidationData = samples.loadDataFile("digitdata/validationimages", numTest,DIGIT_DATUM_WIDTH,DIGIT_DATUM_HEIGHT) validationLabels = samples.loadLabelsFile("digitdata/validationlabels", numTest) rawTestData = samples.loadDataFile("digitdata/testimages", numTest,DIGIT_DATUM_WIDTH,DIGIT_DATUM_HEIGHT) testLabels = samples.loadLabelsFile("digitdata/testlabels", numTest) # Extract features print "Extracting features..." trainingData = map(featureFunction, rawTrainingData) validationData = map(featureFunction, rawValidationData) testData = map(featureFunction, rawTestData) # Conduct training and testing print "Training..." classifier.train(trainingData, trainingLabels, validationData, validationLabels, options.validate) print "Validating..." guesses = classifier.classify(validationData) correct = [guesses[i] == validationLabels[i] for i in range(len(validationLabels))].count(True) print str(correct), ("correct out of " + str(len(validationLabels)) + " (%.1f%%).") % (100.0 * correct / len(validationLabels)) if(options.classifier == "perceptron"): f = open("perceptron_valid.csv","a") f.write(str(len(trainingData))+","+str(100*correct/(1.0*(len(validationData))))+'\n') f.close() print "Testing..." guesses = classifier.classify(testData) correct = [guesses[i] == testLabels[i] for i in range(len(testLabels))].count(True) print str(correct), ("correct out of " + str(len(testLabels)) + " (%.1f%%).") % (100.0 * correct / len(testLabels)) analysis(classifier, guesses, testLabels, testData, rawTestData, printImage) if(options.classifier == "perceptron"): f = open("perceptron_test.csv","a") f.write(str(len(trainingData))+","+str(100*correct/(1.0*(len(testData))))+'\n') f.close()
def run(config, codeRoot=''): classifier = DecisionTreeClassifier([0, 1]) print " ========= %s ========= " % config['header'] pointsPerCase = int(config['pointsPerCase']) args = {} args['metric'] = get_metric(config['metric']) size = len(config['dataset']) points = 0 first_error = '' for i in xrange(size): data = config['dataset'][i] print "\tDataset: %s" % data # Load data trainingData = samples.loadDataFile( os.path.join(codeRoot, "data/%s/training_data.csv" % data)) trainingLabels = samples.loadLabelsFile( os.path.join(codeRoot, "data/%s/training_labels.csv" % data)) # Load test testData = samples.loadDataFile( os.path.join(codeRoot, "data/%s/test_data.csv" % data)) testLabels = samples.loadLabelsFile( os.path.join(codeRoot, "data/%s/test_labels.csv" % data)) # Conduct training and testing args['maxdepth'] = int(config['maxdepth'][i]) classifier.train(trainingData, trainingLabels, args) guesses = classifier.classify(testData) min_accuracy_required = float(config['accuracy'][i]) if is_good_classifier(guesses, testLabels, min_accuracy_required): print '\t\tOK' points += pointsPerCase else: points += (pointsPerCase / 2.) print '\t\tMost frequent classifier is better' needed_all_tests_passed = int(config['totalQuestion']) > 0 if needed_all_tests_passed and first_error: points = 0 test_error = 'Your code does not pass all tests' else: test_error = '' return (points, test_error)
def get_perceptron_test_data(): test_data = samples.loadDataFile("digitdata/testimages.txt", 1000, 28, 28) test_labels = map(str, samples.loadLabelsFile("digitdata/testlabels.txt", 1000)) featurized_test_data = np.array( map(dcu.simple_image_featurization, test_data)) return test_data, featurized_test_data, test_labels
def testing(num): trainData = np.load("traindigitbasic.npy") trainLabels = samples.loadLabelsFile("data/digitdata/traininglabels", num) testData = np.load("testdigitbasic.npy") testLabels = samples.loadLabelsFile("data/digitdata/testlabels", 1000) validData = np.load("validationdigitbasic.npy") validLabels = samples.loadLabelsFile("data/digitdata/validationlabels", 1000) neural = NeuralNetworkClassifier(28 * 28, 50, 10, num, 3.5) neural.train(trainData[:, 0:num], trainLabels, 100) print "Test Data" guess = neural.classify(testData) samples.verify(neural, guess, testLabels) print "===================================" print "Validation Data" guess = neural.classify(validData) samples.verify(neural, guess, validLabels)
def testing(num): trainData = samples.loadImagesFile("data/digitdata/trainingimages", num, 28, 28) trainLabels = samples.loadLabelsFile("data/digitdata/traininglabels", num) testData = samples.loadImagesFile("data/digitdata/testimages", 1000, 28, 28) testLabels = samples.loadLabelsFile("data/digitdata/testlabels", 1000) validData = samples.loadImagesFile("data/digitdata/validationimages", 1000, 28, 28) validLabels = samples.loadLabelsFile("data/digitdata/validationlabels", 1000) nb = NaiveBayesClassifier(1,0) nb.train(trainData, trainLabels) print "===================================" print "Test Data" guess = nb.classify(testData) samples.verify(nb,guess,testLabels) print "===================================" print "Validation Data" guess=nb.classify(validData) samples.verify(nb,guess,validLabels)
def testing(num): trainData = samples.loadImagesFile("data/digitdata/trainingimages", num, 28, 28) trainLabels = samples.loadLabelsFile("data/digitdata/traininglabels", num) testData = samples.loadImagesFile("data/digitdata/testimages", 1000, 28, 28) testLabels = samples.loadLabelsFile("data/digitdata/testlabels", 1000) validData = samples.loadImagesFile("data/digitdata/validationimages", 1000, 28, 28) validLabels = samples.loadLabelsFile("data/digitdata/validationlabels", 1000) perceptron=PerceptronClassifier(trainData, trainLabels,0) perceptron.train(trainData, trainLabels,10) print "===================================" print "Test Data" guess=perceptron.classify(testData) samples.verify(perceptron, guess, testLabels) print "===================================" print "Validation Data" guess=perceptron.classify(validData) samples.verify(perceptron,guess,validLabels)
def testing(num): trainData = samples.loadImagesFile("data/facedata/facedatatrain", num, 60, 70) trainLabels = samples.loadLabelsFile("data/facedata/facedatatrainlabels", num) testData = samples.loadImagesFile("data/facedata/facedatatest", 150, 60, 70) testLabels = samples.loadLabelsFile("data/facedata/facedatatestlabels", 151) validData = samples.loadImagesFile("data/facedata/facedatavalidation", 301, 60, 70) validLabels = samples.loadLabelsFile("data/facedata/facedatavalidationlabels", 301) perceptron=PerceptronClassifier(trainData, trainLabels,0) perceptron.train(trainData, trainLabels,10) print "===================================" print "Test Data" guess=perceptron.classify(testData) samples.verify(perceptron, guess, testLabels) print "===================================" print "Validation Data" guess=perceptron.classify(validData) samples.verify(perceptron,guess,validLabels)
def testing(num): trainData = samples.loadImagesFile("data/digitdata/trainingimages", num, 28, 28) trainLabels = samples.loadLabelsFile("data/digitdata/traininglabels", num) testData = samples.loadImagesFile("data/digitdata/testimages", 1000, 28, 28) testLabels = samples.loadLabelsFile("data/digitdata/testlabels", 1000) validData = samples.loadImagesFile("data/digitdata/validationimages", 1000, 28, 28) validLabels = samples.loadLabelsFile("data/digitdata/validationlabels", 1000) perceptron=PerceptronClassifier(trainData, trainLabels,0) perceptron.train(trainData, trainLabels,10) print "***********************************" print "*************Test Data*************" guess=perceptron.classify(testData) samples.verify(perceptron, guess, testLabels) print "***********************************" print "************Valid Data*************" guess=perceptron.classify(validData) samples.verify(perceptron,guess,validLabels)
def testing(num): trainData = np.load("traindigitbasic.npy") trainLabels = samples.loadLabelsFile("data/digitdata/traininglabels", num) testData = np.load("testdigitbasic.npy") testLabels = samples.loadLabelsFile("data/digitdata/testlabels", 1000) validData = np.load("validationdigitbasic.npy") validLabels = samples.loadLabelsFile("data/digitdata/validationlabels", 1000) neural = NeuralNetworkClassifier(28 * 28, 50, 10, num, 3.5) neural.train(trainData[:, 0:num], trainLabels, 100) print "*************Test Data*************" guess = neural.classify(testData) samples.verify(neural, guess, testLabels) print "***********************************" print "************Valid Data*************" guess = neural.classify(validData) samples.verify(neural, guess, validLabels)
def testing(num): trainData = samples.loadImagesFile("data/digitdata/trainingimages", num, 28, 28) trainLabels = samples.loadLabelsFile("data/digitdata/traininglabels", num) testData = samples.loadImagesFile("data/digitdata/testimages", 1000, 28, 28) testLabels = samples.loadLabelsFile("data/digitdata/testlabels", 1000) validData = samples.loadImagesFile("data/digitdata/validationimages", 1000, 28, 28) validLabels = samples.loadLabelsFile("data/digitdata/validationlabels", 1000) nb = NaiveBayesClassifier(1,0) nb.train(trainData, trainLabels) print "***********************************" print "*************Test Data*************" guess = nb.classify(testData) samples.verify(nb,guess,testLabels) print "***********************************" print "************Valid Data*************" guess=nb.classify(validData) samples.verify(nb,guess,validLabels)
def sample_faces(sample_percentage, trainingpath, labelspath, n_images): faces = samples.loadDataFile(trainingpath, 451, 60, 70) labels = samples.loadLabelsFile(labelspath, 451) joinedlists = list(zip(faces, labels)) random.shuffle(joinedlists) faces, labels = zip(*joinedlists) n_faces = int(float(sample_percentage) / float(100) * 451) return faces[:n_faces], labels[:n_faces]
def testing(num): trainData = samples.loadImagesFile("data/facedata/facedatatrain", num, 60, 70) trainLabels = samples.loadLabelsFile("data/facedata/facedatatrainlabels", num) testData = samples.loadImagesFile("data/facedata/facedatatest", 150, 60, 70) testLabels = samples.loadLabelsFile("data/facedata/facedatatestlabels", 151) validData = samples.loadImagesFile("data/facedata/facedatavalidation", 301, 60, 70) validLabels = samples.loadLabelsFile("data/facedata/facedatavalidationlabels", 301) nb = NaiveBayesClassifier(1, 0) nb.train(trainData, trainLabels) print "===================================" print "Test Data" guess = nb.classify(testData) samples.verify(nb, guess, testLabels) print "===================================" print "Validation Data" guess = nb.classify(validData) samples.verify(nb, guess, validLabels)
def readDigitData(trainingSize=2000, testSize=1000): rootdata = 'digitdata/' # loading digits data testSize = 1000 trainingData = samples.loadDataFile(rootdata + 'trainingimages', trainingSize, DIGIT_DATUM_WIDTH, DIGIT_DATUM_HEIGHT) trainingLabels = samples.loadLabelsFile(rootdata + "traininglabels", trainingSize) validationData = samples.loadDataFile(rootdata + "validationimages", testSize, DIGIT_DATUM_WIDTH, DIGIT_DATUM_HEIGHT) validationLabels = samples.loadLabelsFile(rootdata + "validationlabels", testSize) testData = samples.loadDataFile("digitdata/testimages", testSize, DIGIT_DATUM_WIDTH, DIGIT_DATUM_HEIGHT) testLabels = samples.loadLabelsFile("digitdata/testlabels", testSize) return (trainingData, trainingLabels, validationData, validationLabels, testData, testLabels)
def testing(num): trainData = np.load("trainfacebasic.npy") trainLabels = samples.loadLabelsFile("data/facedata/facedatatrainlabels", num) testData = np.load("testfacebasic.npy") testLabels = samples.loadLabelsFile("data/facedata/facedatatestlabels", 151) validData = np.load("validationfacebasic.npy") validLabels = samples.loadLabelsFile("data/facedata/facedatavalidationlabels", 301) loop=True while loop: neural = NeuralNetworkClassifier(60 * 70, 500, 1, num, 0.03) neural.train(trainData[:,0:num], trainLabels, 100) print "Test Data" guess = neural.classify(testData) loop=samples.verify(neural, guess, testLabels) if loop: continue print "===================================" print "Validation Data" guess = neural.classify(validData) samples.verify(neural, guess, validLabels)
def get_perceptron_training_data(): training_data = samples.loadDataFile("digitdata/trainingimages.txt", num_train_examples, 28, 28) training_labels = map( str, samples.loadLabelsFile("digitdata/traininglabels.txt", num_train_examples)) featurized_training_data = np.array( map(dcu.simple_image_featurization, training_data)) return training_data, featurized_training_data, training_labels
def testing(num): trainData = np.load("trainfaceadvanced.npy") trainLabels = samples.loadLabelsFile("data/facedata/facedatatrainlabels", num) testData = np.load("testfaceadvanced.npy") testLabels = samples.loadLabelsFile("data/facedata/facedatatestlabels", 151) validData = np.load("validationfaceadvanced.npy") validLabels = samples.loadLabelsFile("data/facedata/facedatavalidationlabels", 301) loop=True while loop: neural = NeuralNetworkClassifier(60 * (70+1), 500, 1, num, 0.03) neural.train(trainData[:,0:num], trainLabels, 100) print "Test Data" guess = neural.classify(testData) loop=samples.verify(neural, guess, testLabels) if loop: continue print "===================================" print "Validation Data" guess = neural.classify(validData) samples.verify(neural, guess, validLabels)
def runClassifier(args, options): classifier = args['classifier'] printImage = args['printImage'] # Load data numTraining = options.training numTest = options.test if(options.data=="faces"): print "loading face data set" rawTrainingData = samples.loadDataFile("facedata/facedatatrain",FACE_DATUM_WIDTH,FACE_DATUM_HEIGHT) trainingLabels = samples.loadLabelsFile("facedata/facedatatrainlabels") rawValidationData = samples.loadDataFile("facedata/facedatavalidation",FACE_DATUM_WIDTH,FACE_DATUM_HEIGHT) validationLabels = samples.loadLabelsFile("facedata/facedatavalidationlabels") rawTestData = samples.loadDataFile("facedata/facedatatest", FACE_DATUM_WIDTH,FACE_DATUM_HEIGHT) testLabels = samples.loadLabelsFile("facedata/facedatatestlabels") rawTrainingData,trainingLabels=randomSample(rawTrainingData,trainingLabels,numTraining) rawTestData,testLabels=randomSample(rawTestData,testLabels,numTest) else: print "loading digit data set" rawTrainingData = samples.loadDataFile("digitdata/trainingimages",DIGIT_DATUM_WIDTH,DIGIT_DATUM_HEIGHT) trainingLabels = samples.loadLabelsFile("digitdata/traininglabels") rawValidationData = samples.loadDataFile("digitdata/validationimages",DIGIT_DATUM_WIDTH,DIGIT_DATUM_HEIGHT) validationLabels = samples.loadLabelsFile("digitdata/validationlabels") rawTestData = samples.loadDataFile("digitdata/testimages",DIGIT_DATUM_WIDTH,DIGIT_DATUM_HEIGHT) testLabels = samples.loadLabelsFile("digitdata/testlabels") rawTrainingData, trainingLabels = randomSample(rawTrainingData, trainingLabels, numTraining) rawTestData, testLabels = randomSample(rawTestData, testLabels, numTest) print "Extracting features..." if (options.classifier == "linear_svm"): if (options.data == "faces"): featureFunction = HogFeatureFaceImg else: featureFunction=HogFeatureImgDigit trainingData = map(featureFunction, rawTrainingData) trainingData=np.array(trainingData).transpose() validationData=map(featureFunction, rawValidationData) validationData = np.array(validationData).transpose() testData=map(featureFunction, rawTestData) testData = np.array(testData).transpose() else: if (options.data == "faces"): featureFunction = enhancedFeatureExtractorFace else: featureFunction = enhancedFeatureExtractorDigit trainingData = map(featureFunction, rawTrainingData) validationData = map(featureFunction, rawValidationData) testData = map(featureFunction, rawTestData) print "Training..." start = timeit.default_timer() classifier.train(trainingData, trainingLabels, validationData, validationLabels) stop = timeit.default_timer() print stop - start, " s" print "Validating..." guesses = classifier.classify(validationData) correct = [guesses[i] == validationLabels[i] for i in range(len(validationLabels))].count(True) print str(correct), ("correct out of " + str(len(validationLabels)) + " (%.1f%%).") % (100.0 * correct / len(validationLabels)) print "Testing..." guesses = classifier.classify(testData) correct = [guesses[i] == testLabels[i] for i in range(len(testLabels))].count(True) print str(correct), ("correct out of " + str(len(testLabels)) + " (%.1f%%).") % (100.0 * correct / len(testLabels)) analysis(classifier, guesses, testLabels, testData, rawTestData, printImage)
def runClassifier(args, options): featureFunction = args['featureFunction'] classifier = args['classifier'] printImage = args['printImage'] # Load data numTraining = options.training numTest = options.test if(options.data=="faces"): rawTrainingData = samples.loadDataFile("facedata/facedatatrain", numTraining,FACE_DATUM_WIDTH,FACE_DATUM_HEIGHT) trainingLabels = samples.loadLabelsFile("facedata/facedatatrainlabels", numTraining) rawValidationData = samples.loadDataFile("facedata/facedatatrain", numTest,FACE_DATUM_WIDTH,FACE_DATUM_HEIGHT) validationLabels = samples.loadLabelsFile("facedata/facedatatrainlabels", numTest) rawTestData = samples.loadDataFile("facedata/facedatatest", numTest,FACE_DATUM_WIDTH,FACE_DATUM_HEIGHT) testLabels = samples.loadLabelsFile("facedata/facedatatestlabels", numTest) else: rawTrainingData = samples.loadDataFile("digitdata/trainingimages", numTraining,DIGIT_DATUM_WIDTH,DIGIT_DATUM_HEIGHT) trainingLabels = samples.loadLabelsFile("digitdata/traininglabels", numTraining) rawValidationData = samples.loadDataFile("digitdata/validationimages", numTest,DIGIT_DATUM_WIDTH,DIGIT_DATUM_HEIGHT) validationLabels = samples.loadLabelsFile("digitdata/validationlabels", numTest) rawTestData = samples.loadDataFile("digitdata/testimages", numTest,DIGIT_DATUM_WIDTH,DIGIT_DATUM_HEIGHT) testLabels = samples.loadLabelsFile("digitdata/testlabels", numTest) # Extract features print "Extracting features..." trainingData = map(featureFunction, rawTrainingData) validationData = map(featureFunction, rawValidationData) testData = map(featureFunction, rawTestData) # Conduct training and testing print "Training..." classifier.train(trainingData, trainingLabels, validationData, validationLabels) print "Validating..." guesses = classifier.classify(validationData) correct = [guesses[i] == validationLabels[i] for i in range(len(validationLabels))].count(True) print str(correct), ("correct out of " + str(len(validationLabels)) + " (%.1f%%).") % (100.0 * correct / len(validationLabels)) print "Testing..." guesses = classifier.classify(testData) correct = [guesses[i] == testLabels[i] for i in range(len(testLabels))].count(True) print str(correct), ("correct out of " + str(len(testLabels)) + " (%.1f%%).") % (100.0 * correct / len(testLabels)) analysis(classifier, guesses, testLabels, testData, rawTestData, printImage) # do odds ratio computation if specified at command line if((options.odds) & (options.classifier == "naiveBayes" or (options.classifier == "nb")) ): label1, label2 = options.label1, options.label2 features_odds = classifier.findHighOddsFeatures(label1,label2) if(options.classifier == "naiveBayes" or options.classifier == "nb"): string3 = "=== Features with highest odd ratio of label %d over label %d ===" % (label1, label2) else: string3 = "=== Features for which weight(label %d)-weight(label %d) is biggest ===" % (label1, label2) print string3 printImage(features_odds) if((options.weights) & (options.classifier == "perceptron")): for l in classifier.legalLabels: features_weights = classifier.findHighWeightFeatures(l) print ("=== Features with high weight for label %d ==="%l) printImage(features_weights)
def runClassifier(args, options): featureFunction = args['featureFunction'] classifier = args['classifier'] printImage = args['printImage'] # Load data numTraining = options.training if(options.data=="faces"): rawTrainingData = samples.loadDataFile("facedata/facedatatrain", numTraining,FACE_DATUM_WIDTH,FACE_DATUM_HEIGHT) trainingLabels = samples.loadLabelsFile("facedata/facedatatrainlabels", numTraining) rawValidationData = samples.loadDataFile("facedata/facedatatrain", TEST_SET_SIZE,FACE_DATUM_WIDTH,FACE_DATUM_HEIGHT) validationLabels = samples.loadLabelsFile("facedata/facedatatrainlabels", TEST_SET_SIZE) rawTestData = samples.loadDataFile("facedata/facedatatest", TEST_SET_SIZE,FACE_DATUM_WIDTH,FACE_DATUM_HEIGHT) testLabels = samples.loadLabelsFile("facedata/facedatatestlabels", TEST_SET_SIZE) else: rawTrainingData = samples.loadDataFile("digitdata/trainingimages", numTraining,DIGIT_DATUM_WIDTH,DIGIT_DATUM_HEIGHT) trainingLabels = samples.loadLabelsFile("digitdata/traininglabels", numTraining) rawValidationData = samples.loadDataFile("digitdata/validationimages", TEST_SET_SIZE,DIGIT_DATUM_WIDTH,DIGIT_DATUM_HEIGHT) validationLabels = samples.loadLabelsFile("digitdata/validationlabels", TEST_SET_SIZE) rawTestData = samples.loadDataFile("digitdata/testimages", TEST_SET_SIZE,DIGIT_DATUM_WIDTH,DIGIT_DATUM_HEIGHT) testLabels = samples.loadLabelsFile("digitdata/testlabels", TEST_SET_SIZE) # Extract features print "Extracting features..." trainingData = map(featureFunction, rawTrainingData) validationData = map(featureFunction, rawValidationData) testData = map(featureFunction, rawTestData) # Conduct training and testing print "Training..." classifier.train(trainingData, trainingLabels, validationData, validationLabels) print "Validating..." guesses = classifier.classify(validationData) correct = [guesses[i] == validationLabels[i] for i in range(len(validationLabels))].count(True) print str(correct), ("correct out of " + str(len(validationLabels)) + " (%.1f%%).") % (100.0 * correct / len(validationLabels)) print "Testing..." guesses = classifier.classify(testData) correct = [guesses[i] == testLabels[i] for i in range(len(testLabels))].count(True) print str(correct), ("correct out of " + str(len(testLabels)) + " (%.1f%%).") % (100.0 * correct / len(testLabels)) analysis(classifier, guesses, testLabels, testData, rawTestData, printImage) # do odds ratio computation if specified at command line if((options.odds) & (options.classifier != "mostFrequent")): label1, label2 = options.label1, options.label2 features_odds = classifier.findHighOddsFeatures(label1,label2) if(options.classifier == "naiveBayes" or options.classifier == "nb"): string3 = "=== Features with highest odd ratio of label %d over label %d ===" % (label1, label2) else: string3 = "=== Features for which weight(label %d)-weight(label %d) is biggest ===" % (label1, label2) print string3 printImage(features_odds)
def runClassifier(args, options): featureFunction = args['featureFunction'] classifier = args['classifier'] printImage = args['printImage'] # Load data numTraining = options.training if(options.data=="faces"): rawTrainingData = samples.loadDataFile("facedata/facedatatrain", numTraining,FACE_DATUM_WIDTH,FACE_DATUM_HEIGHT) trainingLabels = samples.loadLabelsFile("facedata/facedatatrainlabels", numTraining) rawValidationData = samples.loadDataFile("facedata/facedatatrain", TEST_SET_SIZE,FACE_DATUM_WIDTH,FACE_DATUM_HEIGHT) validationLabels = samples.loadLabelsFile("facedata/facedatatrainlabels", TEST_SET_SIZE) rawTestData = samples.loadDataFile("facedata/facedatatest", TEST_SET_SIZE,FACE_DATUM_WIDTH,FACE_DATUM_HEIGHT) testLabels = samples.loadLabelsFile("facedata/facedatatestlabels", TEST_SET_SIZE) else: rawTrainingData = samples.loadDataFile("digitdata/trainingimages", numTraining,DIGIT_DATUM_WIDTH,DIGIT_DATUM_HEIGHT) trainingLabels = samples.loadLabelsFile("digitdata/traininglabels", numTraining) rawValidationData = samples.loadDataFile("digitdata/validationimages", TEST_SET_SIZE,DIGIT_DATUM_WIDTH,DIGIT_DATUM_HEIGHT) validationLabels = samples.loadLabelsFile("digitdata/validationlabels", TEST_SET_SIZE) rawTestData = samples.loadDataFile("digitdata/testimages", TEST_SET_SIZE,DIGIT_DATUM_WIDTH,DIGIT_DATUM_HEIGHT) testLabels = samples.loadLabelsFile("digitdata/testlabels", TEST_SET_SIZE) # Extract features print "Extracting features..." trainingData = map(featureFunction, rawTrainingData) validationData = map(featureFunction, rawValidationData) testData = map(featureFunction, rawTestData) # Conduct training and testing print "Training..." classifier.train(trainingData, trainingLabels, validationData, validationLabels) print "Validating..." guesses = classifier.classify(validationData) correct = [guesses[i] == validationLabels[i] for i in range(len(validationLabels))].count(True) print str(correct), ("correct out of " + str(len(validationLabels)) + " (%.1f%%).") % (100.0 * correct / len(validationLabels)) print "Testing..." guesses = classifier.classify(testData) correct = [guesses[i] == testLabels[i] for i in range(len(testLabels))].count(True) print str(correct), ("correct out of " + str(len(testLabels)) + " (%.1f%%).") % (100.0 * correct / len(testLabels)) analysis(classifier, guesses, testLabels, testData, rawTestData, printImage)
def runClassifier(args, options): classifier = args['classifier'] classifierArgs = args['classifierArgs'] # import statements here because sys.path may be altered to point # to student code import featureExtractors import featureExtractorsBasic import mostFrequent import decisionTree import decisionStump import naiveBayes import perceptron import mira import diffDecisionTree # Load data numTraining = options.training numTest = options.test if(options.data.endswith('.arff')): data, labels = samples.loadARFFDataFile("data/arffdata/"+options.data, numTraining+numTest) rawTrainingData, rawTestData = data[:numTraining], data[numTraining:numTraining+numTest] trainingLabels, testLabels = labels[:numTraining], labels[numTraining:numTraining+numTest] legalLabels = set(trainingLabels) elif(options.data=="spam"): rawTrainingData = samples.loadSpamData("data/spamdata/trainingdata", numTraining) trainingLabels = samples.loadLabelsFile("data/spamdata/traininglabels.txt", numTraining) rawTestData = samples.loadSpamData("data/spamdata/testdata", numTest) testLabels = samples.loadLabelsFile("data/spamdata/testlabels.txt", numTest) legalLabels = ['1', '0'] elif(options.data=="digits"): DIGIT_DATUM_WIDTH=28 DIGIT_DATUM_HEIGHT=28 rawTrainingData = samples.loadDigitsDataFile("data/digitdata/trainingimages", numTraining,DIGIT_DATUM_WIDTH,DIGIT_DATUM_HEIGHT) trainingLabels = samples.loadLabelsFile("data/digitdata/traininglabels", numTraining) rawTestData = samples.loadDigitsDataFile("data/digitdata/testimages", numTest,DIGIT_DATUM_WIDTH,DIGIT_DATUM_HEIGHT) testLabels = samples.loadLabelsFile("data/digitdata/testlabels", numTest) legalLabels = set(trainingLabels) else: print "Unknown dataset", options.data print USAGE_STRING sys.exit(2) # Load classifier if(options.classifier == "mostFrequent"): classifier = mostFrequent.MostFrequentClassifier(legalLabels, **classifierArgs) elif(options.classifier == "dt" or options.classifier == "decisionTree"): classifier = decisionTree.DecisionTreeClassifer(legalLabels, **classifierArgs) elif(options.classifier == "diffTree"): classifier = diffDecisionTree.DiffDecisionTreeClassifer(legalLabels, **classifierArgs) elif(options.classifier == "stump"): classifier = decisionStump.DecisionStumpClassifer(legalLabels, **classifierArgs) elif(options.classifier == "naiveBayes" or options.classifier == "nb"): classifier = naiveBayes.NaiveBayesClassifier(legalLabels, **classifierArgs) elif(options.classifier == "perceptron"): classifier = perceptron.PerceptronClassifier(legalLabels,**classifierArgs) elif(options.classifier == "mira"): classifier = mira.MiraClassifier(legalLabels,**classifierArgs) # Load feature extractors if (options.data.endswith('.arff')): if options.classifier in ['nb', 'perceptron', 'mira']: make_binary = True else: make_binary = False extractor = featureExtractorsBasic.IdentityFeatureExtractor(make_binary) elif (options.data=='spam'): if options.features: extractor = featureExtractors.EnhancedEmailFeatureExtractor() else: extractor = featureExtractors.EmailFeatureExtractor() else: assert options.data=="digits" if options.features: extractor = featureExtractors.EnhancedDigitFeatureExtractor() else: extractor = featureExtractors.DigitFeatureExtractor() if options.training <= 0: print "Training set size should be a positive integer (you provided: %d)" % options.training print USAGE_STRING sys.exit(2) featureFunction = extractor.extractFeatures # Preprocess data print "Preprocessing data..." map(extractor.preProcess, rawTrainingData) extractor.finalizeFeatures() assert len(rawTrainingData) == len(trainingLabels) # Extract features print "Extracting features..." trainingData = map(featureFunction, rawTrainingData) # validationData = map(featureFunction, rawValidationData) testData = map(featureFunction, rawTestData) assert len(rawTrainingData) == len(trainingLabels) # Conduct training and testing print "Training..." classifier.train(trainingData, trainingLabels) guesses = classifier.classify(trainingData) correct = [guesses[i] == trainingLabels[i] for i in range(len(trainingLabels))].count(True) print str(correct), ("correct out of " + str(len(trainingLabels)) + " (%.1f%%) on training data.") % (100.0 * correct / len(trainingLabels)) if len(testData) > 0: print "Testing..." guesses = classifier.classify(testData) correct = [guesses[i] == testLabels[i] for i in range(len(testLabels))].count(True) print str(correct), ("correct out of " + str(len(testLabels)) + " (%.1f%%) on test data.") % (100.0 * correct / len(testLabels)) else: print "The test data set is empty." # have classifier print out some helpful information classifier.printDiagnostics()
def runClassifier(args, options): featureFunction = args['featureFunction'] classifier = args['classifier'] printImage = args['printImage'] # Load data numTraining = options['train'] if(options['data']=="faces"): rawTrainingData = samples.loadDataFile("facedata/facedatatrain", numTraining,FACE_DATUM_WIDTH,FACE_DATUM_HEIGHT) trainingLabels = samples.loadLabelsFile("facedata/facedatatrainlabels", numTraining) rawValidationData = samples.loadDataFile("facedata/facedatatrain", TEST_SET_SIZE,FACE_DATUM_WIDTH,FACE_DATUM_HEIGHT) validationLabels = samples.loadLabelsFile("facedata/facedatatrainlabels", TEST_SET_SIZE) rawTestData = samples.loadDataFile("facedata/facedatatest", TEST_SET_SIZE,FACE_DATUM_WIDTH,FACE_DATUM_HEIGHT) testLabels = samples.loadLabelsFile("facedata/facedatatestlabels", TEST_SET_SIZE) else: rawTrainingData = samples.loadDataFile("digitdata/trainingimages", numTraining,DIGIT_DATUM_WIDTH,DIGIT_DATUM_HEIGHT) trainingLabels = samples.loadLabelsFile("digitdata/traininglabels", numTraining) rawValidationData = samples.loadDataFile("digitdata/validationimages", TEST_SET_SIZE,DIGIT_DATUM_WIDTH,DIGIT_DATUM_HEIGHT) validationLabels = samples.loadLabelsFile("digitdata/validationlabels", TEST_SET_SIZE) rawTestData = samples.loadDataFile("digitdata/testimages", TEST_SET_SIZE,DIGIT_DATUM_WIDTH,DIGIT_DATUM_HEIGHT) testLabels = samples.loadLabelsFile("digitdata/testlabels", TEST_SET_SIZE) # Extract features print "Extracting features..." trainingData = map(featureFunction, rawTrainingData) validationData = map(featureFunction, rawValidationData) testData = map(featureFunction, rawTestData) # Conduct training and testing print "Training..." classifier.train(trainingData, trainingLabels, validationData, validationLabels) print "Validating..." guesses = classifier.classify(validationData) correct = [guesses[i] == validationLabels[i] for i in range(len(validationLabels))].count(True) print str(correct), ("correct out of " + str(len(validationLabels)) + " (%.1f%%).") % (100.0 * correct / len(validationLabels)) print "Testing..." guesses = classifier.classify(testData) correct = [guesses[i] == testLabels[i] for i in range(len(testLabels))].count(True) print str(correct), ("correct out of " + str(len(testLabels)) + " (%.1f%%).") % (100.0 * correct / len(testLabels)) util.pause() analysis(classifier, guesses, testLabels, testData, rawTestData, printImage) # do odds ratio computation if specified at command line if((options['odds']) & (options['classifier'] != "mostfrequent")): class1, class2 = options['class1'], options['class2'] features_class1,features_class2,features_odds = classifier.findHighOddsFeatures(class1,class2) if(options['classifier'] == "naivebayes"): string1 = "=== Features with max P(F_i = 1 | class = %d) ===" % class1 string2 = "=== Features with max P(F_i = 1 | class = %d) ===" % class2 string3 = "=== Features with highest odd ratio of class %d over class %d ===" % (class1, class2) else: string1 = "=== Features with largest weight for class %d ===" % class1 string2 = "=== Features with largest weight for class %d ===" % class2 string3 = "=== Features with for which weight(class %d)-weight(class %d) is biggest ===" % (class1, class2) print string1 printImage(features_class1) print string2 printImage(features_class2) print string3 printImage(features_odds)
def writeLabeledData(prefix, labeled_data): datums, labels = zip(*labeled_data) with open(prefix + "images", 'w') as f: for datum in datums: f.write(str(datum) + "\n") f.close() with open(prefix + "labels", 'w') as f: for label in labels: f.write(str(label) + "\n") f.close() rawTrainingData = samples.loadDataFile("digitdata/trainingimages", 5000,DIGIT_DATUM_WIDTH,DIGIT_DATUM_HEIGHT) trainingLabels = samples.loadLabelsFile("digitdata/traininglabels", 5000) rawValidationData = samples.loadDataFile("digitdata/validationimages", 1000,DIGIT_DATUM_WIDTH,DIGIT_DATUM_HEIGHT) validationLabels = samples.loadLabelsFile("digitdata/validationlabels", 1000) rawTestData = samples.loadDataFile("digitdata/testimages", 1000,DIGIT_DATUM_WIDTH,DIGIT_DATUM_HEIGHT) testLabels = samples.loadLabelsFile("digitdata/testlabels", 1000) all_data = rawTrainingData + rawValidationData + rawTestData all_labels = trainingLabels + validationLabels + testLabels labeled_data = zip(all_data, all_labels) perm = np.random.permutation(len(labeled_data)) permuted_data = [] for i in perm:
def runClassifier(args, options): featureFunction = args['featureFunction'] classifier = args['classifier'] printImage = args['printImage'] # Load data numTraining = options.training numTest = options.test if(options.data=="faces"): rawTrainingData = samples.loadDataFile("facedata/facedatatrain", numTraining,FACE_DATUM_WIDTH,FACE_DATUM_HEIGHT) trainingLabels = samples.loadLabelsFile("facedata/facedatatrainlabels", numTraining) rawValidationData = samples.loadDataFile("facedata/facedatatrain", numTest,FACE_DATUM_WIDTH,FACE_DATUM_HEIGHT) validationLabels = samples.loadLabelsFile("facedata/facedatatrainlabels", numTest) rawTestData = samples.loadDataFile("facedata/facedatatest", numTest,FACE_DATUM_WIDTH,FACE_DATUM_HEIGHT) testLabels = samples.loadLabelsFile("facedata/facedatatestlabels", numTest) else: rawTrainingData = samples.loadDataFile("digitdata/trainingimages", numTraining,DIGIT_DATUM_WIDTH,DIGIT_DATUM_HEIGHT) trainingLabels = samples.loadLabelsFile("digitdata/traininglabels", numTraining) rawValidationData = samples.loadDataFile("digitdata/validationimages", numTest,DIGIT_DATUM_WIDTH,DIGIT_DATUM_HEIGHT) validationLabels = samples.loadLabelsFile("digitdata/validationlabels", numTest) rawTestData = samples.loadDataFile("digitdata/testimages", numTest,DIGIT_DATUM_WIDTH,DIGIT_DATUM_HEIGHT) testLabels = samples.loadLabelsFile("digitdata/testlabels", numTest) # Extract features print "Extracting features..." trainingData = map(featureFunction, rawTrainingData) validationData = map(featureFunction, rawValidationData) testData = map(featureFunction, rawTestData) # Conduct training and testing print "Start training..." start = time.time() classifier.train(trainingData, trainingLabels, validationData, validationLabels) end = time.time() - start print "Traning time: " + str(end) print "Start validating..." guesses = classifier.classify(validationData) correct = [guesses[i] == validationLabels[i] for i in range(len(validationLabels))].count(True) print "Validation result: ", str(correct), ("correct out of " + str(len(validationLabels)) + " (%.1f%%).") % (100.0 * correct / len(validationLabels)) print "Start testing..." guesses = classifier.classify(testData) correct = [guesses[i] == testLabels[i] for i in range(len(testLabels))].count(True) print "Testing result: ", str(correct), ("correct out of " + str(len(testLabels)) + " (%.1f%%).") % (100.0 * correct / len(testLabels)) #analysis(classifier, guesses, testLabels, testData, rawTestData, printImage) # do odds ratio computation if specified at command line if((options.odds) & (options.classifier == NB) ): label1, label2 = options.label1, options.label2 features_odds = classifier.findHighOddsFeatures(label1,label2) if(options.classifier == NB): string3 = "=== Features with highest odd ratio of label %d over label %d ===" % (label1, label2) else: string3 = "=== Features for which weight(label %d)-weight(label %d) is biggest ===" % (label1, label2) print string3 printImage(features_odds) if((options.weights) & (options.classifier == PT)): for l in classifier.legalLabels: features_weights = classifier.findHighWeightFeatures(l) print ("=== Features with high weight for label %d ==="%l) printImage(features_weights)
def runClassifier(args, options): featureFunction = args['featureFunction'] classifier = args['classifier'] printImage = args['printImage'] # Load data numTraining = options.training numTest = options.test if(options.data=="pacman"): agentToClone = args.get('agentToClone', None) trainingData, validationData, testData = MAP_AGENT_TO_PATH_OF_SAVED_GAMES.get(agentToClone, (None, None, None)) trainingData = trainingData or args.get('trainingData', False) or MAP_AGENT_TO_PATH_OF_SAVED_GAMES['ContestAgent'][0] validationData = validationData or args.get('validationData', False) or MAP_AGENT_TO_PATH_OF_SAVED_GAMES['ContestAgent'][1] testData = testData or MAP_AGENT_TO_PATH_OF_SAVED_GAMES['ContestAgent'][2] rawTrainingData, trainingLabels = samples.loadPacmanData(trainingData, numTraining) rawValidationData, validationLabels = samples.loadPacmanData(validationData, numTest) rawTestData, testLabels = samples.loadPacmanData(testData, numTest) else: rawTrainingData = samples.loadDataFile("digitdata/trainingimages", numTraining,DIGIT_DATUM_WIDTH,DIGIT_DATUM_HEIGHT) trainingLabels = samples.loadLabelsFile("digitdata/traininglabels", numTraining) rawValidationData = samples.loadDataFile("digitdata/validationimages", numTest,DIGIT_DATUM_WIDTH,DIGIT_DATUM_HEIGHT) validationLabels = samples.loadLabelsFile("digitdata/validationlabels", numTest) rawTestData = samples.loadDataFile("digitdata/testimages", numTest,DIGIT_DATUM_WIDTH,DIGIT_DATUM_HEIGHT) testLabels = samples.loadLabelsFile("digitdata/testlabels", numTest) # Extract features print "Extracting features..." trainingData = map(featureFunction, rawTrainingData) validationData = map(featureFunction, rawValidationData) testData = map(featureFunction, rawTestData) # Conduct training and testing print "Training..." classifier.train(trainingData, trainingLabels, validationData, validationLabels) print "Validating..." guesses = classifier.classify(validationData) correct = [guesses[i] == validationLabels[i] for i in range(len(validationLabels))].count(True) print str(correct), ("correct out of " + str(len(validationLabels)) + " (%.1f%%).") % (100.0 * correct / len(validationLabels)) print "Testing..." guesses = classifier.classify(testData) correct = [guesses[i] == testLabels[i] for i in range(len(testLabels))].count(True) print str(correct), ("correct out of " + str(len(testLabels)) + " (%.1f%%).") % (100.0 * correct / len(testLabels)) analysis(classifier, guesses, testLabels, testData, rawTestData, printImage) # do odds ratio computation if specified at command line if((options.odds) & (options.classifier == "naiveBayes" or (options.classifier == "nb")) ): label1, label2 = options.label1, options.label2 features_odds = classifier.findHighOddsFeatures(label1,label2) if(options.classifier == "naiveBayes" or options.classifier == "nb"): string3 = "=== Features with highest odd ratio of label %d over label %d ===" % (label1, label2) else: string3 = "=== Features for which weight(label %d)-weight(label %d) is biggest ===" % (label1, label2) print string3 printImage(features_odds) if((options.weights) & (options.classifier == "perceptron")): for l in classifier.legalLabels: features_weights = classifier.findHighWeightFeatures(l) print ("=== Features with high weight for label %d ==="%l) printImage(features_weights)
def runClassifier(args, options): #print 'args: ', args #print 'options', options featureFunction = args['featureFunction'] classifier = args['classifier'] printImage = args['printImage'] # Load data numTraining = options.training numTest = options.test if(options.data=="faces"): rawTrainingData = samples.loadDataFile("facedata/facedatatrain", numTraining,FACE_DATUM_WIDTH,FACE_DATUM_HEIGHT) trainingLabels = samples.loadLabelsFile("facedata/facedatatrainlabels", numTraining) rawValidationData = samples.loadDataFile("facedata/facedatatrain", numTest,FACE_DATUM_WIDTH,FACE_DATUM_HEIGHT) validationLabels = samples.loadLabelsFile("facedata/facedatatrainlabels", numTest) rawTestData = samples.loadDataFile("facedata/facedatatest", numTest,FACE_DATUM_WIDTH,FACE_DATUM_HEIGHT) testLabels = samples.loadLabelsFile("facedata/facedatatestlabels", numTest) else: rawTrainingData = samples.loadDataFile("digitdata/trainingimages", numTraining,DIGIT_DATUM_WIDTH,DIGIT_DATUM_HEIGHT) trainingLabels = samples.loadLabelsFile("digitdata/traininglabels", numTraining) rawValidationData = samples.loadDataFile("digitdata/validationimages", numTest,DIGIT_DATUM_WIDTH,DIGIT_DATUM_HEIGHT) validationLabels = samples.loadLabelsFile("digitdata/validationlabels", numTest) rawTestData = samples.loadDataFile("digitdata/testimages", numTest,DIGIT_DATUM_WIDTH,DIGIT_DATUM_HEIGHT) testLabels = samples.loadLabelsFile("digitdata/testlabels", numTest) # Extract features #print "Extracting features..." #print '#######type of rawTrainingData is', rawTrainingData.__class__ # list of Datum #print '#######type of rawTrainingData[0] is', rawTrainingData[0].__class__ # Datum trainingData = map(featureFunction, rawTrainingData) #print '#######type of trainingData is', trainingData.__class__ # list of Counter #print '#######type of trainingData[0] is', trainingData[0].__class__ # Counter validationData = map(featureFunction, rawValidationData) testData = map(featureFunction, rawTestData) # Conduct training and testing print "Training..." classifier.train(trainingData, trainingLabels, validationData, validationLabels) print "Validating..." guesses = classifier.classify(validationData) correct = [guesses[i] == validationLabels[i] for i in range(len(validationLabels))].count(True) print str(correct), ("correct out of " + str(len(validationLabels)) + " (%.1f%%).") % (100.0 * correct / len(validationLabels)) print "Testing..." guesses = classifier.classify(testData) correct = [guesses[i] == testLabels[i] for i in range(len(testLabels))].count(True) print str(correct), ("correct out of " + str(len(testLabels)) + " (%.1f%%).") % (100.0 * correct / len(testLabels)) analysis(classifier, guesses, testLabels, testData, rawTestData, printImage) # do odds ratio computation if specified at command line if((options.odds) & (options.classifier == "naiveBayes" or (options.classifier == "nb")) ): label1, label2 = options.label1, options.label2 features_odds = classifier.findHighOddsFeatures(label1,label2) if(options.classifier == "naiveBayes" or options.classifier == "nb"): string3 = "=== Features with highest odd ratio of label %d over label %d ===" % (label1, label2) else: string3 = "=== Features for which weight(label %d)-weight(label %d) is biggest ===" % (label1, label2) print string3 printImage(features_odds) if((options.weights) & (options.classifier == "perceptron")): for l in classifier.legalLabels: features_weights = classifier.findHighWeightFeatures(l) print ("=== Features with high weight for label %d ==="%l) printImage(features_weights)
def runClassifier(args, options): featureFunction = args['featureFunction'] classifier = args['classifier'] printImage = args['printImage'] # Load data numTraining = options.training # Extract features print "Extracting features..." if options.data=="faces": rawTrainingData = samples.loadDataFile("facedata/facedatatrain", numTraining,FACE_DATUM_WIDTH,FACE_DATUM_HEIGHT) trainingLabels = samples.loadLabelsFile("facedata/facedatatrainlabels", numTraining) rawValidationData = samples.loadDataFile("facedata/facedatavalidation", TEST_SET_SIZE,FACE_DATUM_WIDTH,FACE_DATUM_HEIGHT) validationLabels = samples.loadLabelsFile("facedata/facedatavalidationlabels", TEST_SET_SIZE) rawTestData = samples.loadDataFile("facedata/facedatatest", TEST_SET_SIZE,FACE_DATUM_WIDTH,FACE_DATUM_HEIGHT) testLabels = samples.loadLabelsFile("facedata/facedatatestlabels", TEST_SET_SIZE) else: rawTrainingData = samples.loadDataFile("digitdata/trainingimages", numTraining,DIGIT_DATUM_WIDTH,DIGIT_DATUM_HEIGHT) trainingLabels = samples.loadLabelsFile("digitdata/traininglabels", numTraining) rawValidationData = samples.loadDataFile("digitdata/validationimages", TEST_SET_SIZE,DIGIT_DATUM_WIDTH,DIGIT_DATUM_HEIGHT) validationLabels = samples.loadLabelsFile("digitdata/validationlabels", TEST_SET_SIZE) rawTestData = samples.loadDataFile("digitdata/testimages", TEST_SET_SIZE,DIGIT_DATUM_WIDTH,DIGIT_DATUM_HEIGHT) testLabels = samples.loadLabelsFile("digitdata/testlabels", TEST_SET_SIZE) if options.classifier == "GDA" or options.classifier == "LR": import os.path if os.path.isfile(options.data + '_' + str(numTraining) + '_pca.np'): f = open(options.data + '_' + str(numTraining) + '_pca.np', 'rb') principleComponents, trainingData, validationData, testData = cPickle.load(f) f.close() else: if options.data == "faces": dimension = 13 principleComponents = getPrincipleComponents(map(featureFunction, samples.loadDataFile("facedata/facedatatrain",451,FACE_DATUM_WIDTH,FACE_DATUM_HEIGHT)), dimension) trainingData = np.dot(basicFeatureDataToNumpyArray(map(featureFunction, rawTrainingData)), principleComponents) validationData = np.dot(basicFeatureDataToNumpyArray(map(featureFunction, rawValidationData)), principleComponents) testData = np.dot(basicFeatureDataToNumpyArray(map(featureFunction, rawTestData)), principleComponents) else: dimension = 13 principleComponents = getPrincipleComponents(map(featureFunction, samples.loadDataFile("digitdata/trainingimages",5000,DIGIT_DATUM_WIDTH,DIGIT_DATUM_HEIGHT)), dimension) trainingData = np.dot(basicFeatureDataToNumpyArray(map(featureFunction, rawTrainingData)), principleComponents) validationData = np.dot(basicFeatureDataToNumpyArray(map(featureFunction, rawValidationData)), principleComponents) testData = np.dot(basicFeatureDataToNumpyArray(map(featureFunction, rawTestData)), principleComponents) f = open(options.data + '_' + str(numTraining) + '_pca.np', 'wb') cPickle.dump((principleComponents, trainingData, validationData, testData), f) f.close() elif options.classifier == "GPC": trainingData = basicFeatureDataToNumpyArray(map(featureFunction, rawTrainingData)) validationData = basicFeatureDataToNumpyArray(map(featureFunction, rawValidationData)) testData = basicFeatureDataToNumpyArray(map(featureFunction, rawTestData)) else: trainingData = map(featureFunction, rawTrainingData) validationData = map(featureFunction, rawValidationData) testData = map(featureFunction, rawTestData) # Conduct training and testing print "Training..." classifier.train(trainingData, trainingLabels, validationData, validationLabels) print "Validating..." guesses = classifier.classify(validationData) correct = [guesses[i] == validationLabels[i] for i in range(len(validationLabels))].count(True) print str(correct), ("correct out of " + str(len(validationLabels)) + " (%.1f%%).") % (100.0 * correct / len(validationLabels)) print "Testing..." guesses = classifier.classify(testData) correct = [guesses[i] == testLabels[i] for i in range(len(testLabels))].count(True) print str(correct), ("correct out of " + str(len(testLabels)) + " (%.1f%%).") % (100.0 * correct / len(testLabels)) analysis(classifier, guesses, testLabels, testData, rawTestData, printImage)