Exemple #1
0
db = Db()
if len(sys.argv) != 2:
	raise Exception("Usage: run.py [init|process]")
if sys.argv[1] == "init":
	db.initCounts()
	sys.exit()
elif sys.argv[1] != "process":
	raise Exception("Invalid mode!")

# split and validate words
re_valid = re.compile(r"^[a-z]+$")
re_split = re.compile(r"[\s.,:;/()\"&-]+")
charcounts = collections.defaultdict(lambda:0)

for line in sys.stdin:
	for word in re_split.split(line):
		last_char = "@"
		if re_valid.match(word):
			db.incrementWordCount(word)
			for char in word:
				charcounts[char] += 1
				charcounts[last_char+char] += 1
				last_char = char
			charcounts["@"] += 1

for k,v in charcounts.iteritems():
	db.setCharCount(k,v)

# important: 
db.commit()