def __init__(self): self.known_words = wordb.open('./words.db') self.filters = [self.is_single_character, self.is_not_chinese_word, self.is_number, self.is_AA, is_stop_word, self.is_known_word] self.filter_names = {self.is_single_character : "is_single_character", self.is_not_chinese_word : "is_not_chinese_word", self.is_number : "is_number", self.is_AA : "is_AA", is_stop_word : "is_stop_word", self.is_known_word : "is_known_word"}
def __init__(self): self.known_words = wordb.open('./words.db') self.filters = [ self.is_single_character, self.is_not_chinese_word, self.is_number, self.is_AA, is_stop_word, self.is_known_word ] self.filter_names = { self.is_single_character: "is_single_character", self.is_not_chinese_word: "is_not_chinese_word", self.is_number: "is_number", self.is_AA: "is_AA", is_stop_word: "is_stop_word", self.is_known_word: "is_known_word" }
def dump_to_db(filename): import wordb db = wordb.open(filename) def dump_func(word, pinyins): db[word] = 1 return dump_func
def __init__(self, output_file, get_word_freq = None): self.get_word_freq = get_word_freq self.new_words = wordb.open(output_file) self.filters = Filters() self.n_killed = 0 self.n_added = 0