def readDigitData(trainingSize=100, testSize=100):
    rootdata = "digitdata/"
    # loading digits data
    rawTrainingData = samples.loadDataFile(
        rootdata + "trainingimages", trainingSize, DIGIT_DATUM_WIDTH, DIGIT_DATUM_HEIGHT
    )
    trainingLabels = samples.loadLabelsFile(rootdata + "traininglabels", trainingSize)
    rawValidationData = samples.loadDataFile(
        rootdata + "validationimages", TEST_SET_SIZE, DIGIT_DATUM_WIDTH, DIGIT_DATUM_HEIGHT
    )
    validationLabels = samples.loadLabelsFile(rootdata + "validationlabels", TEST_SET_SIZE)
    rawTestData = samples.loadDataFile("digitdata/testimages", testSize, DIGIT_DATUM_WIDTH, DIGIT_DATUM_HEIGHT)
    testLabels = samples.loadLabelsFile("digitdata/testlabels", testSize)
    try:
        print "Extracting features..."
        featureFunction = dataClassifier.basicFeatureExtractorDigit
        trainingData = map(featureFunction, rawTrainingData)
        validationData = map(featureFunction, rawValidationData)
        testData = map(featureFunction, rawTestData)
    except:
        display("An exception was raised while extracting basic features: \n %s" % getExceptionTraceBack())
    return (
        trainingData,
        trainingLabels,
        validationData,
        validationLabels,
        rawTrainingData,
        rawValidationData,
        testData,
        testLabels,
        rawTestData,
    )
def getNumpyData(numTraining, numTest):
    featureFunction = numpyFeatureExtractorDigit

    rawTrainingData = samples.loadDataFile("data/digitdata/trainingimages",
                                           numTraining, DIGIT_DATUM_WIDTH,
                                           DIGIT_DATUM_HEIGHT)
    trainingLabels = samples.loadLabelsFile("data/digitdata/traininglabels",
                                            numTraining)
    rawValidationData = samples.loadDataFile("data/digitdata/validationimages",
                                             numTest, DIGIT_DATUM_WIDTH,
                                             DIGIT_DATUM_HEIGHT)
    validationLabels = samples.loadLabelsFile(
        "data/digitdata/validationlabels", numTest)
    rawTestData = samples.loadDataFile("data/digitdata/testimages", numTest,
                                       DIGIT_DATUM_WIDTH, DIGIT_DATUM_HEIGHT)
    testLabels = samples.loadLabelsFile("data/digitdata/testlabels", numTest)

    # Extract features
    print "Extracting features..."
    trainingData = np.array(map(featureFunction, rawTrainingData))
    validationData = np.array(map(featureFunction, rawValidationData))
    testData = np.array(map(featureFunction, rawTestData))
    validationLabels = np.array(validationLabels)
    testLabels = np.array(testLabels)
    trainingLabels = np.array(trainingLabels)

    return (trainingData, validationData, testData, trainingLabels,
            validationLabels, testLabels)
def runClassifier(args, options):
    featureFunction = args['featureFunction']
    classifier = args['classifier']

    # Load data
    dataset = options.dataset
    numTraining = options.training
    numTest = options.test

    if dataset == 'd1':
        rawTrainingData = samples.loadDataFile("data/D1/training_data",
                                               numTraining)
        trainingLabels = samples.loadLabelsFile("data/D1/training_labels",
                                                numTraining)
        rawTestData = samples.loadDataFile("data/D1/test_data", numTest)
        testLabels = samples.loadLabelsFile("data/D1/test_labels", numTest)

    else:
        rawTrainingData = samples.loadDataFile("data/D2/training_data",
                                               numTraining)
        trainingLabels = samples.loadLabelsFile("data/D2/training_labels",
                                                numTraining)
        rawTestData = samples.loadDataFile("data/D2/test_data", numTest)
        testLabels = samples.loadLabelsFile("data/D2/test_labels", numTest)

    # Extract features
    print "Extracting features..."
    trainingData = map(featureFunction, rawTrainingData)
    testData = map(featureFunction, rawTestData)

    # Conduct training and testing
    print "Training..."
    classifier.train(trainingData, trainingLabels, testData, testLabels,
                     options.validate)

    guesses = classifier.classify(trainingData)
    correct = [
        guesses[i] == trainingLabels[i] for i in range(len(trainingLabels))
    ].count(True)

    if (options.classifier == "1vr"):
        f = open("perceptron1vr_train.csv", "a")
        f.write(
            str(len(trainingData)) + "," +
            str(100 * correct / (1.0 * (len(trainingData)))) + '\n')
        f.close()

    print "Testing..."
    guesses = classifier.classify(testData)
    correct = [guesses[i] == testLabels[i]
               for i in range(len(testLabels))].count(True)
    print str(correct), ("correct out of " + str(len(testLabels)) +
                         " (%.1f%%).") % (100.0 * correct / len(testLabels))

    if (options.classifier == "1vr"):
        f = open("perceptron1vr_test.csv", "a")
        f.write(
            str(len(trainingData)) + "," + str(100 * correct /
                                               (1.0 * (len(testData)))) + '\n')
        f.close()
def readDigitData(trainingSize=100, testSize=100):
    rootdata = 'digitdata/'
    # loading digits data
    rawTrainingData = samples.loadDataFile(rootdata + 'trainingimages',
                                           trainingSize, DIGIT_DATUM_WIDTH,
                                           DIGIT_DATUM_HEIGHT)
    trainingLabels = samples.loadLabelsFile(rootdata + "traininglabels",
                                            trainingSize)
    rawValidationData = samples.loadDataFile(rootdata + "validationimages",
                                             TEST_SET_SIZE, DIGIT_DATUM_WIDTH,
                                             DIGIT_DATUM_HEIGHT)
    validationLabels = samples.loadLabelsFile(rootdata + "validationlabels",
                                              TEST_SET_SIZE)
    rawTestData = samples.loadDataFile("digitdata/testimages", testSize,
                                       DIGIT_DATUM_WIDTH, DIGIT_DATUM_HEIGHT)
    testLabels = samples.loadLabelsFile("digitdata/testlabels", testSize)
    try:
        print "Extracting features..."
        featureFunction = dataClassifier.basicFeatureExtractorDigit
        trainingData = map(featureFunction, rawTrainingData)
        validationData = map(featureFunction, rawValidationData)
        testData = map(featureFunction, rawTestData)
    except:
        display(
            "An exception was raised while extracting basic features: \n %s" %
            getExceptionTraceBack())
    return (trainingData, trainingLabels, validationData, validationLabels,
            rawTrainingData, rawValidationData, testData, testLabels,
            rawTestData)
Example #5
0
def runClassifier(args, options):
    featureFunction = args['featureFunction']
    classifier = args['classifier']
    printImage = args['printImage']
    
    # Load data
    numTraining = options.training
    numTest = options.test
    
    if(options.data=="pacman"):
        agentToClone = args.get('agentToClone', None)
        trainingData, validationData, testData = MAP_AGENT_TO_PATH_OF_SAVED_GAMES.get(agentToClone, (None, None, None))
        trainingData = trainingData or args.get('trainingData', False) or MAP_AGENT_TO_PATH_OF_SAVED_GAMES['ContestAgent'][0]
        validationData = validationData or args.get('validationData', False) or MAP_AGENT_TO_PATH_OF_SAVED_GAMES['ContestAgent'][1]
        testData = testData or MAP_AGENT_TO_PATH_OF_SAVED_GAMES['ContestAgent'][2]
        rawTrainingData, trainingLabels = samples.loadPacmanData(trainingData, numTraining)
        rawValidationData, validationLabels = samples.loadPacmanData(validationData, numTest)
        rawTestData, testLabels = samples.loadPacmanData(testData, numTest)
    else:
        rawTrainingData = samples.loadDataFile("digitdata/trainingimages", numTraining,DIGIT_DATUM_WIDTH,DIGIT_DATUM_HEIGHT)
        trainingLabels = samples.loadLabelsFile("digitdata/traininglabels", numTraining)
        rawValidationData = samples.loadDataFile("digitdata/validationimages", numTest,DIGIT_DATUM_WIDTH,DIGIT_DATUM_HEIGHT)
        validationLabels = samples.loadLabelsFile("digitdata/validationlabels", numTest)
        rawTestData = samples.loadDataFile("digitdata/testimages", numTest,DIGIT_DATUM_WIDTH,DIGIT_DATUM_HEIGHT)
        testLabels = samples.loadLabelsFile("digitdata/testlabels", numTest)
    
    
    # Extract features
    print "Extracting features..."
    trainingData = map(featureFunction, rawTrainingData)
    validationData = map(featureFunction, rawValidationData)
    testData = map(featureFunction, rawTestData)
Example #6
0
def runClassifier(args, options):

    featureFunction = args['featureFunction']
    classifier = args['classifier']
    printImage = args['printImage']

    # Load data
    numTraining = options.training
    numTest = options.test

    rawTrainingData = samples.loadDataFile("data/digitdata/trainingimages",
                                           numTraining, DIGIT_DATUM_WIDTH,
                                           DIGIT_DATUM_HEIGHT)
    trainingLabels = samples.loadLabelsFile("data/digitdata/traininglabels",
                                            numTraining)
    rawValidationData = samples.loadDataFile("data/digitdata/validationimages",
                                             numTest, DIGIT_DATUM_WIDTH,
                                             DIGIT_DATUM_HEIGHT)
    validationLabels = samples.loadLabelsFile(
        "data/digitdata/validationlabels", numTest)
    rawTestData = samples.loadDataFile("data/digitdata/testimages", numTest,
                                       DIGIT_DATUM_WIDTH, DIGIT_DATUM_HEIGHT)
    testLabels = samples.loadLabelsFile("data/digitdata/testlabels", numTest)

    # Extract features
    print "Extracting features..."
    trainingData = map(featureFunction, rawTrainingData)
    validationData = map(featureFunction, rawValidationData)
    testData = map(featureFunction, rawTestData)

    # Conduct training and testing
    print "Training..."
    classifier.train(trainingData, trainingLabels, validationData,
                     validationLabels)
    print "Validating..."
    validation_guesses = classifier.classify(validationData)
    correct = [
        validation_guesses[i] == validationLabels[i]
        for i in range(len(validationLabels))
    ].count(True)
    print str(correct), ("correct out of " + str(len(validationLabels)) +
                         " (%.1f%%).") % (100.0 * correct /
                                          len(validationLabels))
    print "Testing..."
    test_guesses = classifier.classify(testData)
    correct = [
        test_guesses[i] == testLabels[i] for i in range(len(testLabels))
    ].count(True)
    print str(correct), ("correct out of " + str(len(testLabels)) +
                         " (%.1f%%).") % (100.0 * correct / len(testLabels))
    analysis(classifier, test_guesses, trainingData, trainingLabels,
             testLabels, testData, rawTestData, printImage)

    if ((options.weights) & (options.classifier == "perceptron")):
        for l in classifier.legalLabels:
            features_weights = classifier.findHighWeightFeatures(l)
            print("=== Features with high weight for label %d ===" % l)
            printImage(features_weights)
Example #7
0
def runClassifier(dataset, numTraining):
    if dataset == 'faces':
        legalLabels = range(2)
        featureFunction = basicFeatureExtractorFace

        rawTrainingData = samples.loadDataFile("facedata/facedatatrain",
                                               numTraining, FACE_DATUM_WIDTH,
                                               FACE_DATUM_HEIGHT)
        trainingLabels = samples.loadLabelsFile("facedata/facedatatrainlabels",
                                                numTraining)
        rawTestData = samples.loadDataFile("facedata/facedatatest",
                                           TEST_SET_SIZE, FACE_DATUM_WIDTH,
                                           FACE_DATUM_HEIGHT)
        testLabels = samples.loadLabelsFile("facedata/facedatatestlabels",
                                            TEST_SET_SIZE)

    elif dataset == 'digits':
        legalLabels = range(10)
        featureFunction = basicFeatureExtractorDigit

        rawTrainingData = samples.loadDataFile("digitdata/trainingimages",
                                               numTraining, DIGIT_DATUM_WIDTH,
                                               DIGIT_DATUM_HEIGHT)
        trainingLabels = samples.loadLabelsFile("digitdata/traininglabels",
                                                numTraining)
        rawTestData = samples.loadDataFile("digitdata/testimages",
                                           TEST_SET_SIZE, DIGIT_DATUM_WIDTH,
                                           DIGIT_DATUM_HEIGHT)
        testLabels = samples.loadLabelsFile("digitdata/testlabels",
                                            TEST_SET_SIZE)

    else:
        raise Exception

    # load NN classifier
    classifier = neuralNetwork_submission.NeuralNetworkClassifier(
        legalLabels, "NeuralNetwork", 123)

    # converting data to np.array
    trainingData = basicFeatureDataToNumpyArray(
        map(featureFunction, rawTrainingData)).astype(np.float32)
    testData = basicFeatureDataToNumpyArray(map(
        featureFunction, rawTestData)).astype(np.float32)

    print("Training...")
    classifier.train(trainingData, trainingLabels)

    guesses = classifier.classify(testData)
    correct = [guesses[i] == testLabels[i]
               for i in range(len(testLabels))].count(True)
    print(
        "Performance on the test set:", str(correct),
        ("correct out of " + str(len(testLabels)) + " (%.1f%%).") %
        (100.0 * correct / len(testLabels)), "[Dataset: " + dataset +
        ", Number of training samples: " + str(numTraining) + "]")
Example #8
0
def rLoadDataFile(inputDataType, numTraining, numTest):
    dataCollection = {}
    dataCollection['rawTrainingData'] = []
    dataCollection['trainingLabels'] = []
    if (inputDataType == "faces"):
        maxTraining = FACE_MAX_TRAINING - 1
        rawTrainingDataFile = samples.readlines("data/facedata/facedatatrain")
        trainingLabelsFile = samples.readlines(
            "data/facedata/facedatatrainlabels")
        for i in range(numTraining):
            rand = random.randint(1, maxTraining)
            dataCollection['rawTrainingData'].append(
                load1Data(rawTrainingDataFile, rand, FACE_DATUM_WIDTH,
                          FACE_DATUM_HEIGHT))
            dataCollection['trainingLabels'].append(
                load1Label(trainingLabelsFile, rand))

        dataCollection['rawValidationData'] = samples.loadDataFile(
            "data/facedata/facedatatrain", numTest, FACE_DATUM_WIDTH,
            FACE_DATUM_HEIGHT)
        dataCollection['validationLabels'] = samples.loadLabelsFile(
            "data/facedata/facedatatrainlabels", numTest)
        dataCollection['rawTestData'] = samples.loadDataFile(
            "data/facedata/facedatatest", numTest, FACE_DATUM_WIDTH,
            FACE_DATUM_HEIGHT)
        dataCollection['testLabels'] = samples.loadLabelsFile(
            "data/facedata/facedatatestlabels", numTest)

    elif (inputDataType == "digits"):
        maxTraining = DIGIT_MAX_TRAINING - 1
        rawTrainingDataFile = samples.readlines(
            "data/digitdata/trainingimages")
        trainingLabelsFile = samples.readlines("data/digitdata/traininglabels")
        for i in range(numTraining):

            rand = random.randint(1, maxTraining)
            dataCollection['rawTrainingData'].append(
                load1Data(rawTrainingDataFile, rand, DIGIT_DATUM_WIDTH,
                          DIGIT_DATUM_HEIGHT))
            dataCollection['trainingLabels'].append(
                load1Label(trainingLabelsFile, rand))

        dataCollection['rawValidationData'] = samples.loadDataFile(
            "data/digitdata/validationimages", numTest, DIGIT_DATUM_WIDTH,
            DIGIT_DATUM_HEIGHT)
        dataCollection['validationLabels'] = samples.loadLabelsFile(
            "data/digitdata/validationlabels", numTest)
        dataCollection['rawTestData'] = samples.loadDataFile(
            "data/digitdata/testimages", numTest, DIGIT_DATUM_WIDTH,
            DIGIT_DATUM_HEIGHT)
        dataCollection['testLabels'] = samples.loadLabelsFile(
            "data/digitdata/testlabels", numTest)

    return dataCollection
Example #9
0
def runClassifier():

    # Set up variables according to the command line inputs
    featureFunction = basicFeatureExtractorDigit

    legalLabels = range(10)  # number of labels

    # Select classifier
    classifier = perceptron.PerceptronClassifier(legalLabels)

    # Load data
    numTraining = 100

    rawTrainingData = samples.loadDataFile("digitdata/trainingimages",
                                           numTraining, DIGIT_DATUM_WIDTH,
                                           DIGIT_DATUM_HEIGHT)
    trainingLabels = samples.loadLabelsFile("digitdata/traininglabels",
                                            numTraining)
    rawValidationData = samples.loadDataFile("digitdata/validationimages",
                                             TEST_SET_SIZE, DIGIT_DATUM_WIDTH,
                                             DIGIT_DATUM_HEIGHT)
    validationLabels = samples.loadLabelsFile("digitdata/validationlabels",
                                              TEST_SET_SIZE)
    rawTestData = samples.loadDataFile("digitdata/testimages", TEST_SET_SIZE,
                                       DIGIT_DATUM_WIDTH, DIGIT_DATUM_HEIGHT)
    testLabels = samples.loadLabelsFile("digitdata/testlabels", TEST_SET_SIZE)

    # Extract features
    trainingData = map(basicFeatureExtractorDigit, rawTrainingData)
    validationData = map(basicFeatureExtractorDigit, rawValidationData)
    testData = map(basicFeatureExtractorDigit, rawTestData)

    # Conduct training and testing
    print "Training..."
    classifier.train(trainingData, trainingLabels, validationData,
                     validationLabels)
    # print "Validating..."
    #  guesses = classifier.classify(validationData)
    #  correct = [guesses[i] == validationLabels[i] for i in range(len(validationLabels))].count(True)
    # print str(correct), ("correct out of " + str(len(validationLabels)) + " (%.1f%%).") % (100.0 * correct / len(validationLabels))

    print "Testing..."
    guesses = classifier.classify(testData)
    correct = [guesses[i] == testLabels[i]
               for i in range(len(testLabels))].count(True)
    print str(correct), ("correct out of " + str(len(testLabels)) +
                         " (%.1f%%).") % (100.0 * correct / len(testLabels))
    util.pause()
    analysis(classifier, guesses, testLabels, rawTestData)
Example #10
0
def runClassifier( args , options ) :
	classifier = args['classifier']

	# Load data
	trainingData = samples.loadDataFile( "data/%s/training_data.csv" % options.data )
	trainingLabels = samples.loadLabelsFile( "data/%s/training_labels.csv" % options.data )

	testData = samples.loadDataFile( "data/%s/test_data.csv" % options.data )
	testLabels = samples.loadLabelsFile( "data/%s/test_labels.csv" % options.data )

	# Conduct training and testing
	print "Training..."
	classifier.train( trainingData , trainingLabels , args )
	print "Testing..."
	guesses = classifier.classify( testData )
	analysis( classifier , guesses , testLabels , testData )
Example #11
0
def get_neuron_test_data():
	test_data = samples.loadDataFile("digitdata/testimages", 1000, 28,28)
	test_labels = np.array(samples.loadLabelsFile("digitdata/testlabels", 1000))
	test_labels = test_labels == 3	

	featurized_test_data = np.array(map(dcu.simple_image_featurization, test_data))
	return test_data, featurized_test_data, test_labels
    def recog(self):
        global TRAIN_NUM, iClassifier, DIGIT_DATUM_HEIGHT
        global DIGIT_DATUM_WIDTH, myList, iClassifier, c4Temp

        self.b3["background"] = "blue"
        self.b3["fg"] = "#FFF"
        for i in self.s:
            if i != self.b3:
                i["background"] = "black"
                i["fg"] = "#3cecff"
        rawTestData = samples.loadDataFile("digitdata/testingimages", 1,
                                           DIGIT_DATUM_WIDTH,
                                           DIGIT_DATUM_HEIGHT, myList)
        testLabels = samples.loadLabelsFile("digitdata/testlabels",
                                            TEST_SET_SIZE, myList)
        testData = map(basicFeatureExtractorDigit, rawTestData)
        guess = iClassifier.classify(testData, myList)
        if c4Temp != 0:
            c4.delete(c4Temp)
        c4Temp = c4.create_text(65,
                                30,
                                text=guess[0],
                                fill="#3cecff",
                                justify=CENTER,
                                font=('times', 24, 'bold'))
    def usrTrain(self):
        global TRAIN_NUM, iClassifier, DIGIT_DATUM_HEIGHT
        global root, DIGIT_DATUM_WIDTH, myList

        self.b4["background"] = "blue"
        self.b4["fg"] = "#FFF"
        for i in self.s:
            if i != self.b4:
                i["background"] = "black"
                i["fg"] = "#3cecff"
        d = MyDialog(root)
        root.wait_window(d.top)

        # Load training images
        rawTrainingData = samples.loadDataFile("digitdata/testingimages",
                                               TRAIN_NUM, DIGIT_DATUM_WIDTH,
                                               DIGIT_DATUM_HEIGHT, myList)
        myList.insertItem("Training data imported.")

        trainingData = map(basicFeatureExtractorDigit, rawTrainingData)
        # Load training Labels
        trainingLabels = samples.loadLabelsFile("digitdata/traininglabels",
                                                TRAIN_NUM, myList)
        myList.insertItem("Training labels imported.")
        # Training
        iClassifier.train(trainingData, trainingLabels, myList)
        myList.insertItem("Training Completed.")
        myList.insertItem("==================")
Example #14
0
def demo_faces(weights):
    n_images = 301
    start = time.time()

    data_path = 'facedata/facedatavalidation'
    labels_path = 'facedata/facedatavalidationlabels'

    images = samples.loadDataFile(data_path, n_images, 60, 70)
    labels = samples.loadLabelsFile(labels_path, n_images)
    featureslist = perceptron.compute_features2(images)

    results = []

    for image in range(len(images)):
        sum = 0
        for weight in range(len(weights)):
            sum += weights[weight] * featureslist[image][weight]

        results.append((sum, labels[image]))

    correctcount = 0
    for t in results:
        if t[0] >= float(0) and t[1] == 1:
            correctcount += 1
        elif t[0] < float(0) and t[1] == 0:
            correctcount += 1

    return (float(correctcount) * 100 / float(len(labels)))
    def valid(self):
        global iClassifier
        global pCorrect, c5, c5Temp
        self.modBG(self.button["Validate Training"])

        rawValidationData = samples.loadDataFile("digitdata/validationimages",
                                                 temp.TEST_SET_SIZE,
                                                 temp.DIGIT_DATUM_WIDTH,
                                                 temp.DIGIT_DATUM_HEIGHT,
                                                 temp.LOG_LIST)
        temp.LOG_LIST.insertItem("Testing data imported.")
        validationLabels = samples.loadLabelsFile("digitdata/validationlabels",
                                                  temp.TEST_SET_SIZE,
                                                  temp.LOG_LIST)
        temp.LOG_LIST.insertItem("Testing labels imported.")
        validationData = map(samples.basicFeatureExtractorDigit,
                             rawValidationData)

        guesses = temp.iClassifier.classify(validationData, temp.LOG_LIST)
        correct = [
            guesses[i] == validationLabels[i]
            for i in range(len(validationLabels))
        ].count(True)
        temp.LOG_LIST.insertItem(
            str(correct) + (" correct out of " + str(len(validationLabels))))
        temp.LOG_LIST.insertItem("=========================")
        temp.iOutput.display(
            1,
            str(100.0 * correct / len(validationLabels)) + "%")
Example #16
0
def get_neuron_training_data():
	training_data = samples.loadDataFile("digitdata/trainingimages", num_train_examples, 28, 28)
	training_labels = np.array(samples.loadLabelsFile("digitdata/traininglabels", num_train_examples))
	training_labels = training_labels == 3

	featurized_training_data = np.array(map(dcu.simple_image_featurization, training_data))
	return training_data, featurized_training_data, training_labels
    def autoTrain(self):
        global iClassifier, DIGIT_DATUM_HEIGHT, DIGIT_DATUM_WIDTH, myList
        self.b0["background"] = "blue"
        self.b0["fg"] = "#FFF"
        for i in self.s:
            if i != self.b0:
                i["background"] = "black"
                i["fg"] = "#3cecff"

        # Load training images
        rawTrainingData = samples.loadDataFile("digitdata/trainingimages",
                                               variable.TRAIN_NUM,
                                               DIGIT_DATUM_WIDTH,
                                               DIGIT_DATUM_HEIGHT, myList)
        myList.insertItem("Training data imported.")

        trainingData = map(basicFeatureExtractorDigit, rawTrainingData)
        # Load training Labels
        trainingLabels = samples.loadLabelsFile("digitdata/traininglabels",
                                                variable.TRAIN_NUM, myList)
        myList.insertItem("Training labels imported.")
        # Training
        variable.iClassifier.train(trainingData, trainingLabels, myList)
        myList.insertItem("Training Completed.")
        myList.insertItem("==================")
Example #18
0
def callData():
    n = 1000
    m = 1000

    items = loadDataFile("data/digitdata/trainingimages", n, 28, 28)
    trainingData = FlatInput(n, items)

    labels = loadLabelsFile("data/digitdata/traininglabels", n)

    val_items = loadDataFile("data/digitdata/testimages", m, 28, 28)
    validationData = FlatInput(m, val_items)

    val_labels = loadLabelsFile("data/digitdata/testlabels", m)
    data = SVMClassifier([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])

    data.train(trainingData, labels, validationData, val_labels)
Example #19
0
def validate_weights(digit, final_weights):

    if digit >= 0:
        images = samples.loadDataFile('digitdata/validationimages', 1000, 28,
                                      28)
        labels = samples.loadLabelsFile('digitdata/validationlabels', 1000)
        features_list = compute_features2(images)
    else:
        images = samples.loadDataFile('facedata/facedatavalidation', 301, 60,
                                      70)
        labels = samples.loadLabelsFile('facedata/facedatavalidationlabels',
                                        301)
        features_list = compute_features2(images)

    accuracylist = []

    for i in range(len(images)):
        fsum = 0
        # for each already computed feature in the current image
        for j in range(len(features_list[i])):
            fsum += (final_weights[j] * features_list[i][j])
        # If working with digits
        if digit >= 0:
            #print(str(labels[i]) + ' --- ' + 'fsum: ' + str(fsum))

            if fsum < float(0) and labels[i] != digit:
                accuracylist.append(True)
            elif fsum >= float(0) and labels[i] == digit:
                accuracylist.append(True)
            else:
                accuracylist.append(False)
        # If working with faces
        else:
            if fsum < float(0) and labels[i] == 0:
                accuracylist.append(True)
            elif fsum >= float(0) and labels[i] == 1:
                accuracylist.append(True)
            else:
                accuracylist.append(False)

    accuracy_count = 0
    for i in range(len(accuracylist)):
        if accuracylist[i]:
            accuracy_count += 1

    accuracy = accuracy_count * 100 / len(accuracylist)
    return accuracy
Example #20
0
def callData():
    n = 100
    hidden_neurons = 25
    #items = loadDataFile("data/digitdata/trainingimages", n,28,28)
    #for item in items:
    #items = util.Counter()

    items = loadDataFile("data/digitdata/trainingimages", n, 28, 28)
    flat_item = FlatInput(n, items)

    trainingData = {}
    for i in range(len(flat_item)):
        trainingData[i] = util.Counter()
        for j in range(len(flat_item[i])):
            trainingData[i][j] = flat_item[i][j]

    labels = loadLabelsFile("data/digitdata/traininglabels", n)

    val_items = loadDataFile("data/digitdata/validationimages", n, 28, 28)
    flat_val = FlatInput(n, val_items)

    validationData = {}
    for i in range(len(flat_val)):
        validationData[i] = util.Counter()
        for j in range(len(flat_val[i])):
            validationData[i][j] = flat_val[i][j]

    val_labels = loadLabelsFile("data/digitdata/validationlabels", n)
    data = MLPClassifier([0, 1, 2, 3, 4, 5, 6, 7, 8, 9], 50, hidden_neurons,
                         .5)

    weights_op = {}
    for w in range(0, 10):
        weights_op[w] = util.Counter()
        for i in range(0, 784):
            weights_op[w][i] = random.random()

    weights_L2 = {}
    for w in range(0, hidden_neurons):
        weights_L2[w] = util.Counter()
        for i in range(0, 784):
            weights_L2[w][i] = random.random()

    data.setWeights(weights_L2, weights_op)

    data.train(trainingData, labels, validationData, val_labels)
def runClassifier(args, options):
	featureFunction = args['featureFunction']
	classifier = args['classifier']
	printImage = args['printImage']
	
	# Load data
	numTraining = options.training
	numTest = options.test

	rawTrainingData = samples.loadDataFile("digitdata/trainingimages", numTraining,DIGIT_DATUM_WIDTH,DIGIT_DATUM_HEIGHT)
	trainingLabels = samples.loadLabelsFile("digitdata/traininglabels", numTraining)
	rawValidationData = samples.loadDataFile("digitdata/validationimages", numTest,DIGIT_DATUM_WIDTH,DIGIT_DATUM_HEIGHT)
	validationLabels = samples.loadLabelsFile("digitdata/validationlabels", numTest)
	rawTestData = samples.loadDataFile("digitdata/testimages", numTest,DIGIT_DATUM_WIDTH,DIGIT_DATUM_HEIGHT)
	testLabels = samples.loadLabelsFile("digitdata/testlabels", numTest)


	# Extract features
	print "Extracting features..."
	trainingData = map(featureFunction, rawTrainingData)
	validationData = map(featureFunction, rawValidationData)
	testData = map(featureFunction, rawTestData)

	# Conduct training and testing
	print "Training..."
	classifier.train(trainingData, trainingLabels, validationData, validationLabels, options.validate)
	print "Validating..."
	guesses = classifier.classify(validationData)
	correct = [guesses[i] == validationLabels[i] for i in range(len(validationLabels))].count(True)
	print str(correct), ("correct out of " + str(len(validationLabels)) + " (%.1f%%).") % (100.0 * correct / len(validationLabels))
	
	if(options.classifier == "perceptron"):
		f = open("perceptron_valid.csv","a")
		f.write(str(len(trainingData))+","+str(100*correct/(1.0*(len(validationData))))+'\n')
		f.close()
	
	print "Testing..."
	guesses = classifier.classify(testData)
	correct = [guesses[i] == testLabels[i] for i in range(len(testLabels))].count(True)
	print str(correct), ("correct out of " + str(len(testLabels)) + " (%.1f%%).") % (100.0 * correct / len(testLabels))
	analysis(classifier, guesses, testLabels, testData, rawTestData, printImage)
	
	if(options.classifier == "perceptron"):
		f = open("perceptron_test.csv","a")
		f.write(str(len(trainingData))+","+str(100*correct/(1.0*(len(testData))))+'\n')
		f.close()
Example #22
0
def run(config, codeRoot=''):
    classifier = DecisionTreeClassifier([0, 1])
    print " ========= %s ========= " % config['header']
    pointsPerCase = int(config['pointsPerCase'])

    args = {}
    args['metric'] = get_metric(config['metric'])

    size = len(config['dataset'])
    points = 0
    first_error = ''
    for i in xrange(size):
        data = config['dataset'][i]
        print "\tDataset: %s" % data
        # Load data
        trainingData = samples.loadDataFile(
            os.path.join(codeRoot, "data/%s/training_data.csv" % data))
        trainingLabels = samples.loadLabelsFile(
            os.path.join(codeRoot, "data/%s/training_labels.csv" % data))

        # Load test
        testData = samples.loadDataFile(
            os.path.join(codeRoot, "data/%s/test_data.csv" % data))
        testLabels = samples.loadLabelsFile(
            os.path.join(codeRoot, "data/%s/test_labels.csv" % data))

        # Conduct training and testing
        args['maxdepth'] = int(config['maxdepth'][i])
        classifier.train(trainingData, trainingLabels, args)
        guesses = classifier.classify(testData)
        min_accuracy_required = float(config['accuracy'][i])
        if is_good_classifier(guesses, testLabels, min_accuracy_required):
            print '\t\tOK'
            points += pointsPerCase
        else:
            points += (pointsPerCase / 2.)
            print '\t\tMost frequent classifier is better'

    needed_all_tests_passed = int(config['totalQuestion']) > 0
    if needed_all_tests_passed and first_error:
        points = 0
        test_error = 'Your code does not pass all tests'
    else:
        test_error = ''

    return (points, test_error)
def get_perceptron_test_data():
    test_data = samples.loadDataFile("digitdata/testimages.txt", 1000, 28, 28)
    test_labels = map(str,
                      samples.loadLabelsFile("digitdata/testlabels.txt", 1000))

    featurized_test_data = np.array(
        map(dcu.simple_image_featurization, test_data))
    return test_data, featurized_test_data, test_labels
def testing(num):
    trainData = np.load("traindigitbasic.npy")
    trainLabels = samples.loadLabelsFile("data/digitdata/traininglabels", num)
    testData = np.load("testdigitbasic.npy")
    testLabels = samples.loadLabelsFile("data/digitdata/testlabels", 1000)
    validData = np.load("validationdigitbasic.npy")
    validLabels = samples.loadLabelsFile("data/digitdata/validationlabels", 1000)

    neural = NeuralNetworkClassifier(28 * 28, 50, 10, num, 3.5)
    neural.train(trainData[:, 0:num], trainLabels, 100)
    print "Test Data"
    guess = neural.classify(testData)
    samples.verify(neural, guess, testLabels)
    print "==================================="
    print "Validation Data"
    guess = neural.classify(validData)
    samples.verify(neural, guess, validLabels)
Example #25
0
def testing(num):
    trainData = samples.loadImagesFile("data/digitdata/trainingimages", num, 28, 28)
    trainLabels = samples.loadLabelsFile("data/digitdata/traininglabels", num)
    testData = samples.loadImagesFile("data/digitdata/testimages", 1000, 28, 28)
    testLabels = samples.loadLabelsFile("data/digitdata/testlabels", 1000)
    validData = samples.loadImagesFile("data/digitdata/validationimages", 1000, 28, 28)
    validLabels = samples.loadLabelsFile("data/digitdata/validationlabels", 1000)

    nb = NaiveBayesClassifier(1,0)
    nb.train(trainData, trainLabels)
    print "==================================="
    print "Test Data"
    guess = nb.classify(testData)
    samples.verify(nb,guess,testLabels)
    print "==================================="
    print "Validation Data"
    guess=nb.classify(validData)
    samples.verify(nb,guess,validLabels)
def testing(num):
    trainData = samples.loadImagesFile("data/digitdata/trainingimages", num, 28, 28)
    trainLabels = samples.loadLabelsFile("data/digitdata/traininglabels", num)
    testData = samples.loadImagesFile("data/digitdata/testimages", 1000, 28, 28)
    testLabels = samples.loadLabelsFile("data/digitdata/testlabels", 1000)
    validData = samples.loadImagesFile("data/digitdata/validationimages", 1000, 28, 28)
    validLabels = samples.loadLabelsFile("data/digitdata/validationlabels", 1000)

    perceptron=PerceptronClassifier(trainData, trainLabels,0)
    perceptron.train(trainData, trainLabels,10)
    print "==================================="
    print "Test Data"
    guess=perceptron.classify(testData)
    samples.verify(perceptron, guess, testLabels)
    print "==================================="
    print "Validation Data"
    guess=perceptron.classify(validData)
    samples.verify(perceptron,guess,validLabels)
def testing(num):
    trainData = samples.loadImagesFile("data/facedata/facedatatrain", num, 60, 70)
    trainLabels = samples.loadLabelsFile("data/facedata/facedatatrainlabels", num)
    testData = samples.loadImagesFile("data/facedata/facedatatest", 150, 60, 70)
    testLabels = samples.loadLabelsFile("data/facedata/facedatatestlabels", 151)
    validData = samples.loadImagesFile("data/facedata/facedatavalidation", 301, 60, 70)
    validLabels = samples.loadLabelsFile("data/facedata/facedatavalidationlabels", 301)

    perceptron=PerceptronClassifier(trainData, trainLabels,0)
    perceptron.train(trainData, trainLabels,10)
    print "==================================="
    print "Test Data"
    guess=perceptron.classify(testData)
    samples.verify(perceptron, guess, testLabels)
    print "==================================="
    print "Validation Data"
    guess=perceptron.classify(validData)
    samples.verify(perceptron,guess,validLabels)
def testing(num):
    trainData = samples.loadImagesFile("data/digitdata/trainingimages", num, 28, 28)
    trainLabels = samples.loadLabelsFile("data/digitdata/traininglabels", num)
    testData = samples.loadImagesFile("data/digitdata/testimages", 1000, 28, 28)
    testLabels = samples.loadLabelsFile("data/digitdata/testlabels", 1000)
    validData = samples.loadImagesFile("data/digitdata/validationimages", 1000, 28, 28)
    validLabels = samples.loadLabelsFile("data/digitdata/validationlabels", 1000)

    perceptron=PerceptronClassifier(trainData, trainLabels,0)
    perceptron.train(trainData, trainLabels,10)
    print "***********************************"
    print "*************Test Data*************"
    guess=perceptron.classify(testData)
    samples.verify(perceptron, guess, testLabels)
    print "***********************************"
    print "************Valid Data*************"
    guess=perceptron.classify(validData)
    samples.verify(perceptron,guess,validLabels)
Example #29
0
def testing(num):
    trainData = np.load("traindigitbasic.npy")
    trainLabels = samples.loadLabelsFile("data/digitdata/traininglabels", num)
    testData = np.load("testdigitbasic.npy")
    testLabels = samples.loadLabelsFile("data/digitdata/testlabels", 1000)
    validData = np.load("validationdigitbasic.npy")
    validLabels = samples.loadLabelsFile("data/digitdata/validationlabels",
                                         1000)

    neural = NeuralNetworkClassifier(28 * 28, 50, 10, num, 3.5)
    neural.train(trainData[:, 0:num], trainLabels, 100)
    print "*************Test Data*************"
    guess = neural.classify(testData)
    samples.verify(neural, guess, testLabels)
    print "***********************************"
    print "************Valid Data*************"
    guess = neural.classify(validData)
    samples.verify(neural, guess, validLabels)
Example #30
0
def testing(num):
    trainData = samples.loadImagesFile("data/digitdata/trainingimages", num, 28, 28)
    trainLabels = samples.loadLabelsFile("data/digitdata/traininglabels", num)
    testData = samples.loadImagesFile("data/digitdata/testimages", 1000, 28, 28)
    testLabels = samples.loadLabelsFile("data/digitdata/testlabels", 1000)
    validData = samples.loadImagesFile("data/digitdata/validationimages", 1000, 28, 28)
    validLabels = samples.loadLabelsFile("data/digitdata/validationlabels", 1000)

    nb = NaiveBayesClassifier(1,0)
    nb.train(trainData, trainLabels)
    print "***********************************"
    print "*************Test Data*************"
    guess = nb.classify(testData)
    samples.verify(nb,guess,testLabels)
    print "***********************************"
    print "************Valid Data*************"
    guess=nb.classify(validData)
    samples.verify(nb,guess,validLabels)
Example #31
0
def sample_faces(sample_percentage, trainingpath, labelspath, n_images):
    faces = samples.loadDataFile(trainingpath, 451, 60, 70)
    labels = samples.loadLabelsFile(labelspath, 451)
    joinedlists = list(zip(faces, labels))
    random.shuffle(joinedlists)
    faces, labels = zip(*joinedlists)
    n_faces = int(float(sample_percentage) / float(100) * 451)

    return faces[:n_faces], labels[:n_faces]
Example #32
0
def testing(num):
    trainData = samples.loadImagesFile("data/facedata/facedatatrain", num, 60, 70)
    trainLabels = samples.loadLabelsFile("data/facedata/facedatatrainlabels", num)
    testData = samples.loadImagesFile("data/facedata/facedatatest", 150, 60, 70)
    testLabels = samples.loadLabelsFile("data/facedata/facedatatestlabels", 151)
    validData = samples.loadImagesFile("data/facedata/facedatavalidation", 301, 60, 70)
    validLabels = samples.loadLabelsFile("data/facedata/facedatavalidationlabels", 301)

    nb = NaiveBayesClassifier(1, 0)
    nb.train(trainData, trainLabels)
    print "==================================="
    print "Test Data"
    guess = nb.classify(testData)
    samples.verify(nb, guess, testLabels)
    print "==================================="
    print "Validation Data"
    guess = nb.classify(validData)
    samples.verify(nb, guess, validLabels)
Example #33
0
def readDigitData(trainingSize=2000, testSize=1000):
    rootdata = 'digitdata/'
    # loading digits data
    testSize = 1000
    trainingData = samples.loadDataFile(rootdata + 'trainingimages',
                                        trainingSize, DIGIT_DATUM_WIDTH,
                                        DIGIT_DATUM_HEIGHT)
    trainingLabels = samples.loadLabelsFile(rootdata + "traininglabels",
                                            trainingSize)
    validationData = samples.loadDataFile(rootdata + "validationimages",
                                          testSize, DIGIT_DATUM_WIDTH,
                                          DIGIT_DATUM_HEIGHT)
    validationLabels = samples.loadLabelsFile(rootdata + "validationlabels",
                                              testSize)
    testData = samples.loadDataFile("digitdata/testimages", testSize,
                                    DIGIT_DATUM_WIDTH, DIGIT_DATUM_HEIGHT)
    testLabels = samples.loadLabelsFile("digitdata/testlabels", testSize)
    return (trainingData, trainingLabels, validationData, validationLabels,
            testData, testLabels)
def testing(num):
    trainData = np.load("trainfacebasic.npy")
    trainLabels = samples.loadLabelsFile("data/facedata/facedatatrainlabels", num)
    testData = np.load("testfacebasic.npy")
    testLabels = samples.loadLabelsFile("data/facedata/facedatatestlabels", 151)
    validData = np.load("validationfacebasic.npy")
    validLabels = samples.loadLabelsFile("data/facedata/facedatavalidationlabels", 301)
    loop=True
    while loop:
        neural = NeuralNetworkClassifier(60 * 70, 500, 1, num, 0.03)
        neural.train(trainData[:,0:num], trainLabels, 100)
        print "Test Data"
        guess = neural.classify(testData)
        loop=samples.verify(neural, guess, testLabels)
        if loop:
            continue
        print "==================================="
        print "Validation Data"
        guess = neural.classify(validData)
        samples.verify(neural, guess, validLabels)
def get_perceptron_training_data():
    training_data = samples.loadDataFile("digitdata/trainingimages.txt",
                                         num_train_examples, 28, 28)
    training_labels = map(
        str,
        samples.loadLabelsFile("digitdata/traininglabels.txt",
                               num_train_examples))

    featurized_training_data = np.array(
        map(dcu.simple_image_featurization, training_data))
    return training_data, featurized_training_data, training_labels
Example #36
0
def testing(num):
    trainData = np.load("trainfaceadvanced.npy")
    trainLabels = samples.loadLabelsFile("data/facedata/facedatatrainlabels", num)
    testData = np.load("testfaceadvanced.npy")
    testLabels = samples.loadLabelsFile("data/facedata/facedatatestlabels", 151)
    validData = np.load("validationfaceadvanced.npy")
    validLabels = samples.loadLabelsFile("data/facedata/facedatavalidationlabels", 301)

    loop=True
    while loop:
        neural = NeuralNetworkClassifier(60 * (70+1), 500, 1, num, 0.03)
        neural.train(trainData[:,0:num], trainLabels, 100)
        print "Test Data"
        guess = neural.classify(testData)
        loop=samples.verify(neural, guess, testLabels)
        if loop:
            continue
        print "==================================="
        print "Validation Data"
        guess = neural.classify(validData)
        samples.verify(neural, guess, validLabels)
def runClassifier(args, options):
  classifier = args['classifier']
  printImage = args['printImage']
  # Load data  
  numTraining = options.training
  numTest = options.test
  if(options.data=="faces"):
    print "loading face data set"
    rawTrainingData = samples.loadDataFile("facedata/facedatatrain",FACE_DATUM_WIDTH,FACE_DATUM_HEIGHT)
    trainingLabels = samples.loadLabelsFile("facedata/facedatatrainlabels")
    rawValidationData = samples.loadDataFile("facedata/facedatavalidation",FACE_DATUM_WIDTH,FACE_DATUM_HEIGHT)
    validationLabels = samples.loadLabelsFile("facedata/facedatavalidationlabels")
    rawTestData = samples.loadDataFile("facedata/facedatatest", FACE_DATUM_WIDTH,FACE_DATUM_HEIGHT)
    testLabels = samples.loadLabelsFile("facedata/facedatatestlabels")
    rawTrainingData,trainingLabels=randomSample(rawTrainingData,trainingLabels,numTraining)
    rawTestData,testLabels=randomSample(rawTestData,testLabels,numTest)
  else:
    print "loading digit data set"
    rawTrainingData = samples.loadDataFile("digitdata/trainingimages",DIGIT_DATUM_WIDTH,DIGIT_DATUM_HEIGHT)
    trainingLabels = samples.loadLabelsFile("digitdata/traininglabels")
    rawValidationData = samples.loadDataFile("digitdata/validationimages",DIGIT_DATUM_WIDTH,DIGIT_DATUM_HEIGHT)
    validationLabels = samples.loadLabelsFile("digitdata/validationlabels")
    rawTestData = samples.loadDataFile("digitdata/testimages",DIGIT_DATUM_WIDTH,DIGIT_DATUM_HEIGHT)
    testLabels = samples.loadLabelsFile("digitdata/testlabels")
    rawTrainingData, trainingLabels = randomSample(rawTrainingData, trainingLabels, numTraining)
    rawTestData, testLabels = randomSample(rawTestData, testLabels, numTest)
  print "Extracting features..."
  if (options.classifier == "linear_svm"):
        if (options.data == "faces"):
            featureFunction = HogFeatureFaceImg
        else:
            featureFunction=HogFeatureImgDigit
        trainingData = map(featureFunction, rawTrainingData)
        trainingData=np.array(trainingData).transpose()
        validationData=map(featureFunction, rawValidationData)
        validationData = np.array(validationData).transpose()
        testData=map(featureFunction, rawTestData)
        testData = np.array(testData).transpose()
  else:
      if (options.data == "faces"):
          featureFunction = enhancedFeatureExtractorFace
      else:
          featureFunction = enhancedFeatureExtractorDigit
      trainingData = map(featureFunction, rawTrainingData)
      validationData = map(featureFunction, rawValidationData)
      testData = map(featureFunction, rawTestData)
  print "Training..."
  start = timeit.default_timer()
  classifier.train(trainingData, trainingLabels, validationData, validationLabels)
  stop = timeit.default_timer()
  print  stop - start, " s"
  print "Validating..."
  guesses = classifier.classify(validationData)
  correct = [guesses[i] == validationLabels[i] for i in range(len(validationLabels))].count(True)
  print str(correct), ("correct out of " + str(len(validationLabels)) + " (%.1f%%).") % (100.0 * correct / len(validationLabels))
  print "Testing..."
  guesses = classifier.classify(testData)
  correct = [guesses[i] == testLabels[i] for i in range(len(testLabels))].count(True)
  print str(correct), ("correct out of " + str(len(testLabels)) + " (%.1f%%).") % (100.0 * correct / len(testLabels))
  analysis(classifier, guesses, testLabels, testData, rawTestData, printImage)
Example #38
0
def runClassifier(args, options):
  featureFunction = args['featureFunction']
  classifier = args['classifier']
  printImage = args['printImage']
  
  # Load data  
  numTraining = options.training
  numTest = options.test

  if(options.data=="faces"):
    rawTrainingData = samples.loadDataFile("facedata/facedatatrain", numTraining,FACE_DATUM_WIDTH,FACE_DATUM_HEIGHT)
    trainingLabels = samples.loadLabelsFile("facedata/facedatatrainlabels", numTraining)
    rawValidationData = samples.loadDataFile("facedata/facedatatrain", numTest,FACE_DATUM_WIDTH,FACE_DATUM_HEIGHT)
    validationLabels = samples.loadLabelsFile("facedata/facedatatrainlabels", numTest)
    rawTestData = samples.loadDataFile("facedata/facedatatest", numTest,FACE_DATUM_WIDTH,FACE_DATUM_HEIGHT)
    testLabels = samples.loadLabelsFile("facedata/facedatatestlabels", numTest)
  else:
    rawTrainingData = samples.loadDataFile("digitdata/trainingimages", numTraining,DIGIT_DATUM_WIDTH,DIGIT_DATUM_HEIGHT)
    trainingLabels = samples.loadLabelsFile("digitdata/traininglabels", numTraining)
    rawValidationData = samples.loadDataFile("digitdata/validationimages", numTest,DIGIT_DATUM_WIDTH,DIGIT_DATUM_HEIGHT)
    validationLabels = samples.loadLabelsFile("digitdata/validationlabels", numTest)
    rawTestData = samples.loadDataFile("digitdata/testimages", numTest,DIGIT_DATUM_WIDTH,DIGIT_DATUM_HEIGHT)
    testLabels = samples.loadLabelsFile("digitdata/testlabels", numTest)
  
  # Extract features
  print "Extracting features..."
  trainingData = map(featureFunction, rawTrainingData)
  validationData = map(featureFunction, rawValidationData)
  testData = map(featureFunction, rawTestData)
  
  # Conduct training and testing
  print "Training..."
  classifier.train(trainingData, trainingLabels, validationData, validationLabels)
  print "Validating..."
  guesses = classifier.classify(validationData)
  correct = [guesses[i] == validationLabels[i] for i in range(len(validationLabels))].count(True)
  print str(correct), ("correct out of " + str(len(validationLabels)) + " (%.1f%%).") % (100.0 * correct / len(validationLabels))
  print "Testing..."
  guesses = classifier.classify(testData)
  correct = [guesses[i] == testLabels[i] for i in range(len(testLabels))].count(True)
  print str(correct), ("correct out of " + str(len(testLabels)) + " (%.1f%%).") % (100.0 * correct / len(testLabels))
  analysis(classifier, guesses, testLabels, testData, rawTestData, printImage)
  
  # do odds ratio computation if specified at command line
  if((options.odds) & (options.classifier == "naiveBayes" or (options.classifier == "nb")) ):
    label1, label2 = options.label1, options.label2
    features_odds = classifier.findHighOddsFeatures(label1,label2)
    if(options.classifier == "naiveBayes" or options.classifier == "nb"):
      string3 = "=== Features with highest odd ratio of label %d over label %d ===" % (label1, label2)
    else:
      string3 = "=== Features for which weight(label %d)-weight(label %d) is biggest ===" % (label1, label2)    
      
    print string3
    printImage(features_odds)

  if((options.weights) & (options.classifier == "perceptron")):
    for l in classifier.legalLabels:
      features_weights = classifier.findHighWeightFeatures(l)
      print ("=== Features with high weight for label %d ==="%l)
      printImage(features_weights)
Example #39
0
def runClassifier(args, options):

  featureFunction = args['featureFunction']
  classifier = args['classifier']
  printImage = args['printImage']
      
  # Load data  
  numTraining = options.training

  if(options.data=="faces"):
    rawTrainingData = samples.loadDataFile("facedata/facedatatrain", numTraining,FACE_DATUM_WIDTH,FACE_DATUM_HEIGHT)
    trainingLabels = samples.loadLabelsFile("facedata/facedatatrainlabels", numTraining)
    rawValidationData = samples.loadDataFile("facedata/facedatatrain", TEST_SET_SIZE,FACE_DATUM_WIDTH,FACE_DATUM_HEIGHT)
    validationLabels = samples.loadLabelsFile("facedata/facedatatrainlabels", TEST_SET_SIZE)
    rawTestData = samples.loadDataFile("facedata/facedatatest", TEST_SET_SIZE,FACE_DATUM_WIDTH,FACE_DATUM_HEIGHT)
    testLabels = samples.loadLabelsFile("facedata/facedatatestlabels", TEST_SET_SIZE)
  else:
    rawTrainingData = samples.loadDataFile("digitdata/trainingimages", numTraining,DIGIT_DATUM_WIDTH,DIGIT_DATUM_HEIGHT)
    trainingLabels = samples.loadLabelsFile("digitdata/traininglabels", numTraining)
    rawValidationData = samples.loadDataFile("digitdata/validationimages", TEST_SET_SIZE,DIGIT_DATUM_WIDTH,DIGIT_DATUM_HEIGHT)
    validationLabels = samples.loadLabelsFile("digitdata/validationlabels", TEST_SET_SIZE)
    rawTestData = samples.loadDataFile("digitdata/testimages", TEST_SET_SIZE,DIGIT_DATUM_WIDTH,DIGIT_DATUM_HEIGHT)
    testLabels = samples.loadLabelsFile("digitdata/testlabels", TEST_SET_SIZE)
    
  
  # Extract features
  print "Extracting features..."
  trainingData = map(featureFunction, rawTrainingData)
  validationData = map(featureFunction, rawValidationData)
  testData = map(featureFunction, rawTestData)
  
  # Conduct training and testing
  print "Training..."
  classifier.train(trainingData, trainingLabels, validationData, validationLabels)
  print "Validating..."
  guesses = classifier.classify(validationData)
  correct = [guesses[i] == validationLabels[i] for i in range(len(validationLabels))].count(True)
  print str(correct), ("correct out of " + str(len(validationLabels)) + " (%.1f%%).") % (100.0 * correct / len(validationLabels))
  print "Testing..."
  guesses = classifier.classify(testData)
  correct = [guesses[i] == testLabels[i] for i in range(len(testLabels))].count(True)
  print str(correct), ("correct out of " + str(len(testLabels)) + " (%.1f%%).") % (100.0 * correct / len(testLabels))
  analysis(classifier, guesses, testLabels, testData, rawTestData, printImage)
  
  # do odds ratio computation if specified at command line
  if((options.odds) & (options.classifier != "mostFrequent")):
    label1, label2 = options.label1, options.label2
    features_odds = classifier.findHighOddsFeatures(label1,label2)
    if(options.classifier == "naiveBayes" or options.classifier == "nb"):
      string3 = "=== Features with highest odd ratio of label %d over label %d ===" % (label1, label2)
    else:
      string3 = "=== Features for which weight(label %d)-weight(label %d) is biggest ===" % (label1, label2)    
      
    print string3
    printImage(features_odds)
Example #40
0
def runClassifier(args, options):

  featureFunction = args['featureFunction']
  classifier = args['classifier']
  printImage = args['printImage']
      
  # Load data  
  numTraining = options.training

  if(options.data=="faces"):
    rawTrainingData = samples.loadDataFile("facedata/facedatatrain", numTraining,FACE_DATUM_WIDTH,FACE_DATUM_HEIGHT)
    trainingLabels = samples.loadLabelsFile("facedata/facedatatrainlabels", numTraining)
    rawValidationData = samples.loadDataFile("facedata/facedatatrain", TEST_SET_SIZE,FACE_DATUM_WIDTH,FACE_DATUM_HEIGHT)
    validationLabels = samples.loadLabelsFile("facedata/facedatatrainlabels", TEST_SET_SIZE)
    rawTestData = samples.loadDataFile("facedata/facedatatest", TEST_SET_SIZE,FACE_DATUM_WIDTH,FACE_DATUM_HEIGHT)
    testLabels = samples.loadLabelsFile("facedata/facedatatestlabels", TEST_SET_SIZE)
  else:
    rawTrainingData = samples.loadDataFile("digitdata/trainingimages", numTraining,DIGIT_DATUM_WIDTH,DIGIT_DATUM_HEIGHT)
    trainingLabels = samples.loadLabelsFile("digitdata/traininglabels", numTraining)
    rawValidationData = samples.loadDataFile("digitdata/validationimages", TEST_SET_SIZE,DIGIT_DATUM_WIDTH,DIGIT_DATUM_HEIGHT)
    validationLabels = samples.loadLabelsFile("digitdata/validationlabels", TEST_SET_SIZE)
    rawTestData = samples.loadDataFile("digitdata/testimages", TEST_SET_SIZE,DIGIT_DATUM_WIDTH,DIGIT_DATUM_HEIGHT)
    testLabels = samples.loadLabelsFile("digitdata/testlabels", TEST_SET_SIZE)
    
  
  # Extract features
  print "Extracting features..."
  trainingData = map(featureFunction, rawTrainingData)
  validationData = map(featureFunction, rawValidationData)
  testData = map(featureFunction, rawTestData)
  
  # Conduct training and testing
  print "Training..."
  classifier.train(trainingData, trainingLabels, validationData, validationLabels)
  print "Validating..."
  guesses = classifier.classify(validationData)
  correct = [guesses[i] == validationLabels[i] for i in range(len(validationLabels))].count(True)
  print str(correct), ("correct out of " + str(len(validationLabels)) + " (%.1f%%).") % (100.0 * correct / len(validationLabels))
  print "Testing..."
  guesses = classifier.classify(testData)
  correct = [guesses[i] == testLabels[i] for i in range(len(testLabels))].count(True)
  print str(correct), ("correct out of " + str(len(testLabels)) + " (%.1f%%).") % (100.0 * correct / len(testLabels))
  analysis(classifier, guesses, testLabels, testData, rawTestData, printImage)
def runClassifier(args, options):
    classifier = args['classifier']
    classifierArgs = args['classifierArgs']

    # import statements here because sys.path may be altered to point
    # to student code
    import featureExtractors
    import featureExtractorsBasic
    import mostFrequent
    import decisionTree
    import decisionStump
    import naiveBayes
    import perceptron
    import mira
    import diffDecisionTree

    # Load data
    numTraining = options.training
    numTest = options.test
    if(options.data.endswith('.arff')):
        data, labels = samples.loadARFFDataFile("data/arffdata/"+options.data, numTraining+numTest)
        rawTrainingData, rawTestData = data[:numTraining], data[numTraining:numTraining+numTest]
        trainingLabels, testLabels = labels[:numTraining], labels[numTraining:numTraining+numTest]
        legalLabels = set(trainingLabels)
    elif(options.data=="spam"):
        rawTrainingData = samples.loadSpamData("data/spamdata/trainingdata", numTraining)
        trainingLabels = samples.loadLabelsFile("data/spamdata/traininglabels.txt", numTraining)
        rawTestData = samples.loadSpamData("data/spamdata/testdata", numTest)
        testLabels = samples.loadLabelsFile("data/spamdata/testlabels.txt", numTest)
        legalLabels = ['1', '0']
    elif(options.data=="digits"):
        DIGIT_DATUM_WIDTH=28
        DIGIT_DATUM_HEIGHT=28
        rawTrainingData = samples.loadDigitsDataFile("data/digitdata/trainingimages", numTraining,DIGIT_DATUM_WIDTH,DIGIT_DATUM_HEIGHT)
        trainingLabels = samples.loadLabelsFile("data/digitdata/traininglabels", numTraining)
        rawTestData = samples.loadDigitsDataFile("data/digitdata/testimages", numTest,DIGIT_DATUM_WIDTH,DIGIT_DATUM_HEIGHT)
        testLabels = samples.loadLabelsFile("data/digitdata/testlabels", numTest)
        legalLabels = set(trainingLabels)
    else:
        print "Unknown dataset", options.data
        print USAGE_STRING
        sys.exit(2)

    # Load classifier
    if(options.classifier == "mostFrequent"):
        classifier = mostFrequent.MostFrequentClassifier(legalLabels, **classifierArgs)
    elif(options.classifier == "dt" or options.classifier == "decisionTree"):
        classifier = decisionTree.DecisionTreeClassifer(legalLabels, **classifierArgs)
    elif(options.classifier == "diffTree"):
        classifier = diffDecisionTree.DiffDecisionTreeClassifer(legalLabels, **classifierArgs)
    elif(options.classifier == "stump"):
        classifier = decisionStump.DecisionStumpClassifer(legalLabels, **classifierArgs)
    elif(options.classifier == "naiveBayes" or options.classifier == "nb"):
        classifier = naiveBayes.NaiveBayesClassifier(legalLabels, **classifierArgs)
    elif(options.classifier == "perceptron"):
        classifier = perceptron.PerceptronClassifier(legalLabels,**classifierArgs)
    elif(options.classifier == "mira"):
        classifier = mira.MiraClassifier(legalLabels,**classifierArgs)
    
    # Load feature extractors
    if (options.data.endswith('.arff')):
        if options.classifier in ['nb', 'perceptron', 'mira']:
            make_binary = True
        else:
            make_binary = False
        extractor = featureExtractorsBasic.IdentityFeatureExtractor(make_binary)
    elif (options.data=='spam'):
        if options.features:
            extractor = featureExtractors.EnhancedEmailFeatureExtractor()
        else:
            extractor = featureExtractors.EmailFeatureExtractor()
    else:
        assert options.data=="digits"
        if options.features:
            extractor = featureExtractors.EnhancedDigitFeatureExtractor()
        else:
            extractor = featureExtractors.DigitFeatureExtractor()

    if options.training <= 0:
        print "Training set size should be a positive integer (you provided: %d)" % options.training
        print USAGE_STRING
        sys.exit(2)

    featureFunction = extractor.extractFeatures

    # Preprocess data
    print "Preprocessing data..."
    map(extractor.preProcess, rawTrainingData)
    extractor.finalizeFeatures()

    assert len(rawTrainingData) == len(trainingLabels)

    # Extract features
    print "Extracting features..."
    trainingData = map(featureFunction, rawTrainingData)
    # validationData = map(featureFunction, rawValidationData)
    testData = map(featureFunction, rawTestData)

    assert len(rawTrainingData) == len(trainingLabels)

    # Conduct training and testing
    print "Training..."
    classifier.train(trainingData, trainingLabels)
    guesses = classifier.classify(trainingData)
    correct = [guesses[i] == trainingLabels[i] for i in range(len(trainingLabels))].count(True)
    print str(correct), ("correct out of " + str(len(trainingLabels)) + " (%.1f%%) on training data.") % (100.0 * correct / len(trainingLabels))


    if len(testData) > 0:
        print "Testing..."
        guesses = classifier.classify(testData)
        correct = [guesses[i] == testLabels[i] for i in range(len(testLabels))].count(True)
        print str(correct), ("correct out of " + str(len(testLabels)) + " (%.1f%%) on test data.") % (100.0 * correct / len(testLabels))
    else:
        print "The test data set is empty."

    # have classifier print out some helpful information
    classifier.printDiagnostics()
Example #42
0
def runClassifier(args, options):

  featureFunction = args['featureFunction']
  classifier = args['classifier']
  printImage = args['printImage']
      
  # Load data  
  numTraining = options['train']

  if(options['data']=="faces"):
    rawTrainingData = samples.loadDataFile("facedata/facedatatrain", numTraining,FACE_DATUM_WIDTH,FACE_DATUM_HEIGHT)
    trainingLabels = samples.loadLabelsFile("facedata/facedatatrainlabels", numTraining)
    rawValidationData = samples.loadDataFile("facedata/facedatatrain", TEST_SET_SIZE,FACE_DATUM_WIDTH,FACE_DATUM_HEIGHT)
    validationLabels = samples.loadLabelsFile("facedata/facedatatrainlabels", TEST_SET_SIZE)
    rawTestData = samples.loadDataFile("facedata/facedatatest", TEST_SET_SIZE,FACE_DATUM_WIDTH,FACE_DATUM_HEIGHT)
    testLabels = samples.loadLabelsFile("facedata/facedatatestlabels", TEST_SET_SIZE)
  else:
    rawTrainingData = samples.loadDataFile("digitdata/trainingimages", numTraining,DIGIT_DATUM_WIDTH,DIGIT_DATUM_HEIGHT)
    trainingLabels = samples.loadLabelsFile("digitdata/traininglabels", numTraining)
    rawValidationData = samples.loadDataFile("digitdata/validationimages", TEST_SET_SIZE,DIGIT_DATUM_WIDTH,DIGIT_DATUM_HEIGHT)
    validationLabels = samples.loadLabelsFile("digitdata/validationlabels", TEST_SET_SIZE)
    rawTestData = samples.loadDataFile("digitdata/testimages", TEST_SET_SIZE,DIGIT_DATUM_WIDTH,DIGIT_DATUM_HEIGHT)
    testLabels = samples.loadLabelsFile("digitdata/testlabels", TEST_SET_SIZE)
    
  
  # Extract features
  print "Extracting features..."
  trainingData = map(featureFunction, rawTrainingData)
  validationData = map(featureFunction, rawValidationData)
  testData = map(featureFunction, rawTestData)
  
  # Conduct training and testing
  print "Training..."
  classifier.train(trainingData, trainingLabels, validationData, validationLabels)
  print "Validating..."
  guesses = classifier.classify(validationData)
  correct = [guesses[i] == validationLabels[i] for i in range(len(validationLabels))].count(True)
  print str(correct), ("correct out of " + str(len(validationLabels)) + " (%.1f%%).") % (100.0 * correct / len(validationLabels))
  print "Testing..."
  guesses = classifier.classify(testData)
  correct = [guesses[i] == testLabels[i] for i in range(len(testLabels))].count(True)
  print str(correct), ("correct out of " + str(len(testLabels)) + " (%.1f%%).") % (100.0 * correct / len(testLabels))
  util.pause()
  analysis(classifier, guesses, testLabels, testData, rawTestData, printImage)
  
  # do odds ratio computation if specified at command line
  if((options['odds']) & (options['classifier'] != "mostfrequent")):
    class1, class2 = options['class1'], options['class2']
    features_class1,features_class2,features_odds = classifier.findHighOddsFeatures(class1,class2)
    if(options['classifier'] == "naivebayes"):
      string1 = "=== Features with max P(F_i = 1 | class = %d) ===" % class1
      string2 = "=== Features with max P(F_i = 1 | class = %d) ===" % class2
      string3 = "=== Features with highest odd ratio of class %d over class %d ===" % (class1, class2)
    else:
      string1 = "=== Features with largest weight for class %d ===" % class1
      string2 = "=== Features with largest weight for class %d ===" % class2
      string3 = "=== Features with for which weight(class %d)-weight(class %d) is biggest ===" % (class1, class2)    
      
    print string1
    printImage(features_class1)
    print string2
    printImage(features_class2)
    print string3
    printImage(features_odds)
Example #43
0
def writeLabeledData(prefix, labeled_data):
    datums, labels = zip(*labeled_data)

    with open(prefix + "images", 'w') as f:
        for datum in datums:
            f.write(str(datum) + "\n")
        f.close()

    with open(prefix + "labels", 'w') as f:
        for label in labels:
            f.write(str(label) + "\n")
        f.close()

rawTrainingData = samples.loadDataFile("digitdata/trainingimages", 5000,DIGIT_DATUM_WIDTH,DIGIT_DATUM_HEIGHT)
trainingLabels = samples.loadLabelsFile("digitdata/traininglabels", 5000)
rawValidationData = samples.loadDataFile("digitdata/validationimages", 1000,DIGIT_DATUM_WIDTH,DIGIT_DATUM_HEIGHT)
validationLabels = samples.loadLabelsFile("digitdata/validationlabels", 1000)
rawTestData = samples.loadDataFile("digitdata/testimages", 1000,DIGIT_DATUM_WIDTH,DIGIT_DATUM_HEIGHT)
testLabels = samples.loadLabelsFile("digitdata/testlabels", 1000)


all_data = rawTrainingData + rawValidationData + rawTestData
all_labels = trainingLabels + validationLabels + testLabels

labeled_data = zip(all_data, all_labels)

perm = np.random.permutation(len(labeled_data))

permuted_data = []
for i in perm:
Example #44
0
def runClassifier(args, options):

    featureFunction = args['featureFunction']
    classifier = args['classifier']
    printImage = args['printImage']

    # Load data    
    numTraining = options.training
    numTest = options.test

    if(options.data=="faces"):
        rawTrainingData = samples.loadDataFile("facedata/facedatatrain", numTraining,FACE_DATUM_WIDTH,FACE_DATUM_HEIGHT)
        trainingLabels = samples.loadLabelsFile("facedata/facedatatrainlabels", numTraining)
        rawValidationData = samples.loadDataFile("facedata/facedatatrain", numTest,FACE_DATUM_WIDTH,FACE_DATUM_HEIGHT)
        validationLabels = samples.loadLabelsFile("facedata/facedatatrainlabels", numTest)
        rawTestData = samples.loadDataFile("facedata/facedatatest", numTest,FACE_DATUM_WIDTH,FACE_DATUM_HEIGHT)
        testLabels = samples.loadLabelsFile("facedata/facedatatestlabels", numTest)
    else:
        rawTrainingData = samples.loadDataFile("digitdata/trainingimages", numTraining,DIGIT_DATUM_WIDTH,DIGIT_DATUM_HEIGHT)
        trainingLabels = samples.loadLabelsFile("digitdata/traininglabels", numTraining)
        rawValidationData = samples.loadDataFile("digitdata/validationimages", numTest,DIGIT_DATUM_WIDTH,DIGIT_DATUM_HEIGHT)
        validationLabels = samples.loadLabelsFile("digitdata/validationlabels", numTest)
        rawTestData = samples.loadDataFile("digitdata/testimages", numTest,DIGIT_DATUM_WIDTH,DIGIT_DATUM_HEIGHT)
        testLabels = samples.loadLabelsFile("digitdata/testlabels", numTest)


    # Extract features
    print "Extracting features..."
    trainingData = map(featureFunction, rawTrainingData)
    validationData = map(featureFunction, rawValidationData)
    testData = map(featureFunction, rawTestData)

    # Conduct training and testing
    print "Start training..."
    start = time.time()
    classifier.train(trainingData, trainingLabels, validationData, validationLabels)
    end = time.time() - start
    print "Traning time: " + str(end)
    print "Start validating..."
    guesses = classifier.classify(validationData)
    correct = [guesses[i] == validationLabels[i] for i in range(len(validationLabels))].count(True)
    print "Validation result: ", str(correct), ("correct out of " + str(len(validationLabels)) + " (%.1f%%).") % (100.0 * correct / len(validationLabels))
    print "Start testing..."
    guesses = classifier.classify(testData)
    correct = [guesses[i] == testLabels[i] for i in range(len(testLabels))].count(True)
    print "Testing result: ", str(correct), ("correct out of " + str(len(testLabels)) + " (%.1f%%).") % (100.0 * correct / len(testLabels))
    #analysis(classifier, guesses, testLabels, testData, rawTestData, printImage)

    # do odds ratio computation if specified at command line
    if((options.odds) & (options.classifier == NB) ):
        label1, label2 = options.label1, options.label2
        features_odds = classifier.findHighOddsFeatures(label1,label2)
        if(options.classifier == NB):
            string3 = "=== Features with highest odd ratio of label %d over label %d ===" % (label1, label2)
        else:
            string3 = "=== Features for which weight(label %d)-weight(label %d) is biggest ===" % (label1, label2)        

        print string3
        printImage(features_odds)

    if((options.weights) & (options.classifier == PT)):
        for l in classifier.legalLabels:
            features_weights = classifier.findHighWeightFeatures(l)
            print ("=== Features with high weight for label %d ==="%l)
            printImage(features_weights)
def runClassifier(args, options):
    featureFunction = args['featureFunction']
    classifier = args['classifier']
    printImage = args['printImage']
    
    # Load data
    numTraining = options.training
    numTest = options.test

    if(options.data=="pacman"):
        agentToClone = args.get('agentToClone', None)
        trainingData, validationData, testData = MAP_AGENT_TO_PATH_OF_SAVED_GAMES.get(agentToClone, (None, None, None))
        trainingData = trainingData or args.get('trainingData', False) or MAP_AGENT_TO_PATH_OF_SAVED_GAMES['ContestAgent'][0]
        validationData = validationData or args.get('validationData', False) or MAP_AGENT_TO_PATH_OF_SAVED_GAMES['ContestAgent'][1]
        testData = testData or MAP_AGENT_TO_PATH_OF_SAVED_GAMES['ContestAgent'][2]
        rawTrainingData, trainingLabels = samples.loadPacmanData(trainingData, numTraining)
        rawValidationData, validationLabels = samples.loadPacmanData(validationData, numTest)
        rawTestData, testLabels = samples.loadPacmanData(testData, numTest)
    else:
        rawTrainingData = samples.loadDataFile("digitdata/trainingimages", numTraining,DIGIT_DATUM_WIDTH,DIGIT_DATUM_HEIGHT)
        trainingLabels = samples.loadLabelsFile("digitdata/traininglabels", numTraining)
        rawValidationData = samples.loadDataFile("digitdata/validationimages", numTest,DIGIT_DATUM_WIDTH,DIGIT_DATUM_HEIGHT)
        validationLabels = samples.loadLabelsFile("digitdata/validationlabels", numTest)
        rawTestData = samples.loadDataFile("digitdata/testimages", numTest,DIGIT_DATUM_WIDTH,DIGIT_DATUM_HEIGHT)
        testLabels = samples.loadLabelsFile("digitdata/testlabels", numTest)


    # Extract features
    print "Extracting features..."
    trainingData = map(featureFunction, rawTrainingData)
    validationData = map(featureFunction, rawValidationData)
    testData = map(featureFunction, rawTestData)

    # Conduct training and testing
    print "Training..."
    classifier.train(trainingData, trainingLabels, validationData, validationLabels)
    print "Validating..."
    guesses = classifier.classify(validationData)
    correct = [guesses[i] == validationLabels[i] for i in range(len(validationLabels))].count(True)
    print str(correct), ("correct out of " + str(len(validationLabels)) + " (%.1f%%).") % (100.0 * correct / len(validationLabels))
    print "Testing..."
    guesses = classifier.classify(testData)
    correct = [guesses[i] == testLabels[i] for i in range(len(testLabels))].count(True)
    print str(correct), ("correct out of " + str(len(testLabels)) + " (%.1f%%).") % (100.0 * correct / len(testLabels))
    analysis(classifier, guesses, testLabels, testData, rawTestData, printImage)

    # do odds ratio computation if specified at command line
    if((options.odds) & (options.classifier == "naiveBayes" or (options.classifier == "nb")) ):
        label1, label2 = options.label1, options.label2
        features_odds = classifier.findHighOddsFeatures(label1,label2)
        if(options.classifier == "naiveBayes" or options.classifier == "nb"):
            string3 = "=== Features with highest odd ratio of label %d over label %d ===" % (label1, label2)
        else:
            string3 = "=== Features for which weight(label %d)-weight(label %d) is biggest ===" % (label1, label2)

        print string3
        printImage(features_odds)

    if((options.weights) & (options.classifier == "perceptron")):
        for l in classifier.legalLabels:
            features_weights = classifier.findHighWeightFeatures(l)
            print ("=== Features with high weight for label %d ==="%l)
            printImage(features_weights)
Example #46
0
def runClassifier(args, options):
  #print 'args: ', args
  #print 'options', options
  featureFunction = args['featureFunction']
  classifier = args['classifier']
  printImage = args['printImage']
      
  # Load data  
  numTraining = options.training
  numTest = options.test

  if(options.data=="faces"):
    rawTrainingData = samples.loadDataFile("facedata/facedatatrain", numTraining,FACE_DATUM_WIDTH,FACE_DATUM_HEIGHT)
    trainingLabels = samples.loadLabelsFile("facedata/facedatatrainlabels", numTraining)
    rawValidationData = samples.loadDataFile("facedata/facedatatrain", numTest,FACE_DATUM_WIDTH,FACE_DATUM_HEIGHT)
    validationLabels = samples.loadLabelsFile("facedata/facedatatrainlabels", numTest)
    rawTestData = samples.loadDataFile("facedata/facedatatest", numTest,FACE_DATUM_WIDTH,FACE_DATUM_HEIGHT)
    testLabels = samples.loadLabelsFile("facedata/facedatatestlabels", numTest)
  else:
    rawTrainingData = samples.loadDataFile("digitdata/trainingimages", numTraining,DIGIT_DATUM_WIDTH,DIGIT_DATUM_HEIGHT)
    trainingLabels = samples.loadLabelsFile("digitdata/traininglabels", numTraining)
    rawValidationData = samples.loadDataFile("digitdata/validationimages", numTest,DIGIT_DATUM_WIDTH,DIGIT_DATUM_HEIGHT)
    validationLabels = samples.loadLabelsFile("digitdata/validationlabels", numTest)
    rawTestData = samples.loadDataFile("digitdata/testimages", numTest,DIGIT_DATUM_WIDTH,DIGIT_DATUM_HEIGHT)
    testLabels = samples.loadLabelsFile("digitdata/testlabels", numTest)
    
  
  # Extract features
  #print "Extracting features..."
  #print '#######type of rawTrainingData is', rawTrainingData.__class__ # list of Datum
  #print '#######type of rawTrainingData[0] is', rawTrainingData[0].__class__ # Datum
  trainingData = map(featureFunction, rawTrainingData)
  #print '#######type of trainingData is', trainingData.__class__ # list of Counter
  #print '#######type of trainingData[0] is', trainingData[0].__class__ # Counter
  validationData = map(featureFunction, rawValidationData)
  testData = map(featureFunction, rawTestData)
  
  # Conduct training and testing
  print "Training..."
  classifier.train(trainingData, trainingLabels, validationData, validationLabels)
  print "Validating..."
  guesses = classifier.classify(validationData)
  correct = [guesses[i] == validationLabels[i] for i in range(len(validationLabels))].count(True)
  print str(correct), ("correct out of " + str(len(validationLabels)) + " (%.1f%%).") % (100.0 * correct / len(validationLabels))
  print "Testing..."
  guesses = classifier.classify(testData)
  correct = [guesses[i] == testLabels[i] for i in range(len(testLabels))].count(True)
  print str(correct), ("correct out of " + str(len(testLabels)) + " (%.1f%%).") % (100.0 * correct / len(testLabels))
  analysis(classifier, guesses, testLabels, testData, rawTestData, printImage)
  
  # do odds ratio computation if specified at command line
  if((options.odds) & (options.classifier == "naiveBayes" or (options.classifier == "nb")) ):
    label1, label2 = options.label1, options.label2
    features_odds = classifier.findHighOddsFeatures(label1,label2)
    if(options.classifier == "naiveBayes" or options.classifier == "nb"):
      string3 = "=== Features with highest odd ratio of label %d over label %d ===" % (label1, label2)
    else:
      string3 = "=== Features for which weight(label %d)-weight(label %d) is biggest ===" % (label1, label2)    
      
    print string3
    printImage(features_odds)

  if((options.weights) & (options.classifier == "perceptron")):
    for l in classifier.legalLabels:
      features_weights = classifier.findHighWeightFeatures(l)
      print ("=== Features with high weight for label %d ==="%l)
      printImage(features_weights)
Example #47
0
def runClassifier(args, options):

  featureFunction = args['featureFunction']
  classifier = args['classifier']
  printImage = args['printImage']
      
  # Load data  
  numTraining = options.training

  # Extract features
  print "Extracting features..."
  if options.data=="faces":
    rawTrainingData = samples.loadDataFile("facedata/facedatatrain", numTraining,FACE_DATUM_WIDTH,FACE_DATUM_HEIGHT)
    trainingLabels  = samples.loadLabelsFile("facedata/facedatatrainlabels", numTraining)
    rawValidationData = samples.loadDataFile("facedata/facedatavalidation", TEST_SET_SIZE,FACE_DATUM_WIDTH,FACE_DATUM_HEIGHT)
    validationLabels  = samples.loadLabelsFile("facedata/facedatavalidationlabels", TEST_SET_SIZE)
    rawTestData = samples.loadDataFile("facedata/facedatatest", TEST_SET_SIZE,FACE_DATUM_WIDTH,FACE_DATUM_HEIGHT)
    testLabels  = samples.loadLabelsFile("facedata/facedatatestlabels", TEST_SET_SIZE)
  else:
    rawTrainingData = samples.loadDataFile("digitdata/trainingimages", numTraining,DIGIT_DATUM_WIDTH,DIGIT_DATUM_HEIGHT)
    trainingLabels = samples.loadLabelsFile("digitdata/traininglabels", numTraining)
    rawValidationData = samples.loadDataFile("digitdata/validationimages", TEST_SET_SIZE,DIGIT_DATUM_WIDTH,DIGIT_DATUM_HEIGHT)
    validationLabels = samples.loadLabelsFile("digitdata/validationlabels", TEST_SET_SIZE)
    rawTestData = samples.loadDataFile("digitdata/testimages", TEST_SET_SIZE,DIGIT_DATUM_WIDTH,DIGIT_DATUM_HEIGHT)
    testLabels = samples.loadLabelsFile("digitdata/testlabels", TEST_SET_SIZE)

  if options.classifier == "GDA" or options.classifier == "LR":
    import os.path
    if os.path.isfile(options.data + '_' + str(numTraining) + '_pca.np'):
      f = open(options.data + '_' + str(numTraining) + '_pca.np', 'rb')
      principleComponents, trainingData, validationData, testData = cPickle.load(f) 
      f.close()
    else:
      if options.data == "faces":
        dimension = 13
        principleComponents = getPrincipleComponents(map(featureFunction, samples.loadDataFile("facedata/facedatatrain",451,FACE_DATUM_WIDTH,FACE_DATUM_HEIGHT)), dimension)
        trainingData = np.dot(basicFeatureDataToNumpyArray(map(featureFunction, rawTrainingData)), principleComponents)
        validationData = np.dot(basicFeatureDataToNumpyArray(map(featureFunction, rawValidationData)), principleComponents)
        testData = np.dot(basicFeatureDataToNumpyArray(map(featureFunction, rawTestData)), principleComponents)
      else:
        dimension = 13
        principleComponents = getPrincipleComponents(map(featureFunction, samples.loadDataFile("digitdata/trainingimages",5000,DIGIT_DATUM_WIDTH,DIGIT_DATUM_HEIGHT)), dimension)
        trainingData = np.dot(basicFeatureDataToNumpyArray(map(featureFunction, rawTrainingData)), principleComponents)
        validationData = np.dot(basicFeatureDataToNumpyArray(map(featureFunction, rawValidationData)), principleComponents)
        testData = np.dot(basicFeatureDataToNumpyArray(map(featureFunction, rawTestData)), principleComponents)
      f = open(options.data + '_' + str(numTraining) + '_pca.np', 'wb')
      cPickle.dump((principleComponents, trainingData, validationData, testData), f)
      f.close()
  elif options.classifier == "GPC":
    trainingData = basicFeatureDataToNumpyArray(map(featureFunction, rawTrainingData))
    validationData = basicFeatureDataToNumpyArray(map(featureFunction, rawValidationData))
    testData = basicFeatureDataToNumpyArray(map(featureFunction, rawTestData))
  else:
    trainingData = map(featureFunction, rawTrainingData)
    validationData = map(featureFunction, rawValidationData)
    testData = map(featureFunction, rawTestData)

  # Conduct training and testing
  print "Training..."
  classifier.train(trainingData, trainingLabels, validationData, validationLabels)
  print "Validating..."
  guesses = classifier.classify(validationData)
  correct = [guesses[i] == validationLabels[i] for i in range(len(validationLabels))].count(True)
  print str(correct), ("correct out of " + str(len(validationLabels)) + " (%.1f%%).") % (100.0 * correct / len(validationLabels))
  print "Testing..."
  guesses = classifier.classify(testData)
  correct = [guesses[i] == testLabels[i] for i in range(len(testLabels))].count(True)
  print str(correct), ("correct out of " + str(len(testLabels)) + " (%.1f%%).") % (100.0 * correct / len(testLabels))
  analysis(classifier, guesses, testLabels, testData, rawTestData, printImage)