Beispiel #1
0
    def __init__(self):
        self.known_words = wordb.open('./words.db')

        self.filters = [self.is_single_character,
                        self.is_not_chinese_word,
                        self.is_number,
                        self.is_AA,
                        is_stop_word,
                        self.is_known_word]
        
        self.filter_names = {self.is_single_character : "is_single_character",
                             self.is_not_chinese_word : "is_not_chinese_word",
                             self.is_number           : "is_number",
                             self.is_AA               : "is_AA",
                             is_stop_word             : "is_stop_word",
                             self.is_known_word       : "is_known_word"}
Beispiel #2
0
    def __init__(self):
        self.known_words = wordb.open('./words.db')

        self.filters = [
            self.is_single_character, self.is_not_chinese_word, self.is_number,
            self.is_AA, is_stop_word, self.is_known_word
        ]

        self.filter_names = {
            self.is_single_character: "is_single_character",
            self.is_not_chinese_word: "is_not_chinese_word",
            self.is_number: "is_number",
            self.is_AA: "is_AA",
            is_stop_word: "is_stop_word",
            self.is_known_word: "is_known_word"
        }
Beispiel #3
0
def dump_to_db(filename):
    import wordb
    db = wordb.open(filename)
    def dump_func(word, pinyins):
        db[word] = 1
    return dump_func
Beispiel #4
0
 def __init__(self, output_file, get_word_freq = None):
     self.get_word_freq = get_word_freq
     self.new_words = wordb.open(output_file)
     self.filters = Filters()
     self.n_killed = 0
     self.n_added = 0