db = Db() if len(sys.argv) != 2: raise Exception("Usage: run.py [init|process]") if sys.argv[1] == "init": db.initCounts() sys.exit() elif sys.argv[1] != "process": raise Exception("Invalid mode!") # split and validate words re_valid = re.compile(r"^[a-z]+$") re_split = re.compile(r"[\s.,:;/()\"&-]+") charcounts = collections.defaultdict(lambda:0) for line in sys.stdin: for word in re_split.split(line): last_char = "@" if re_valid.match(word): db.incrementWordCount(word) for char in word: charcounts[char] += 1 charcounts[last_char+char] += 1 last_char = char charcounts["@"] += 1 for k,v in charcounts.iteritems(): db.setCharCount(k,v) # important: db.commit()