Beispiel #1
0
    def unknowWordsSetZero(self, file_name='test.txt'):
        '''
        统计未登录词,放入word_dict中,value = 0
        :param file_name: 测试文件路径
        :return:
        '''
        test_word_dict = calcuBigramWordDistri(file_name)
        unknow_word_cnt = 0
        for front_word in test_word_dict:
            for word in test_word_dict[front_word]:
                if not self.inGram(front_word, word):
                    self.Bigram[front_word][word] = 0.
                    unknow_word_cnt += 1

        print('unknow words count number: {0}, set unknow word value = {1}'.format(
            unknow_word_cnt, 0.0))
Beispiel #2
0
 def __init__(self):
     # 继承Dictionary子类
     DictionarySmooth.__init__(self)
     self.Bigram = calcuBigramWordDistri()
     self.unknowWordsSetZero()
     self.alpha = self.wittenBellSmoothing()
Beispiel #3
0
 def __init__(self):
     # 继承Dictionary子类
     Dictionary.__init__(self)
     self.Bigram = calcuBigramWordDistri()