#!/usr/bin/python

import re
import fileMaker

def wordExtractor(line):
    # first lowercase it
    line = line.lower()

    # next, remove all non [a-z ] characters; [^a-z ] is a regular
    # expression for everything except a-z and space.
    line = re.sub("[^a-z ]", " ", line)

    # split out all words, since word == feature
    words = line.split()

    # compute the features
    feats = {}
    for word in words:
        if feats.has_key(word):
            feats[word] += 1
        else:
            feats[word] = 1

    return feats


if __name__=="__main__":
    fileMaker.mainExtractor(wordExtractor)
#!/usr/bin/python

import re
import fileMaker

def pixelExtractor(line):
    # get all the pixel values and normalize them to be in [0,1] instead of [0,255]
    vals = [ float(v) / 255 for v in line.split() ]

    # the layout is row-major, so pixels 0..27 are the first line, 28..55 are the second line and so on
    feats = {}
    for i in range(len(vals)):
        feats["px" + repr(i)] = vals[i]

    return feats


if __name__=="__main__":
    fileMaker.mainExtractor(pixelExtractor)
Beispiel #3
0
#!/usr/bin/python

import re
import fileMaker


def wordExtractor(line):
    # first lowercase it
    line = line.lower()

    # next, remove all non [a-z ] characters; [^a-z ] is a regular
    # expression for everything except a-z and space.
    line = re.sub("[^a-z ]", " ", line)

    # split out all words, since word == feature
    words = line.split()

    # compute the features
    feats = {}
    for word in words:
        if feats.has_key(word):
            feats[word] += 1
        else:
            feats[word] = 1

    return feats


if __name__ == "__main__":
    fileMaker.mainExtractor(wordExtractor)
#!/usr/bin/python

import re
import fileMaker


def pixelExtractor(line):
    # get all the pixel values and normalize them to be in [0,1] instead of [0,255]
    vals = [float(v) / 255 for v in line.split()]

    # the layout is row-major, so pixels 0..27 are the first line, 28..55 are the second line and so on
    feats = {}
    for i in range(len(vals)):
        feats["px" + repr(i)] = vals[i]

    return feats


if __name__ == "__main__":
    fileMaker.mainExtractor(pixelExtractor)