def unknowWordsSetZero(self, file_name='test.txt'): ''' 统计未登录词,放入word_dict中,value = 0 :param file_name: 测试文件路径 :return: ''' test_word_dict = calcuBigramWordDistri(file_name) unknow_word_cnt = 0 for front_word in test_word_dict: for word in test_word_dict[front_word]: if not self.inGram(front_word, word): self.Bigram[front_word][word] = 0. unknow_word_cnt += 1 print('unknow words count number: {0}, set unknow word value = {1}'.format( unknow_word_cnt, 0.0))
def __init__(self): # 继承Dictionary子类 DictionarySmooth.__init__(self) self.Bigram = calcuBigramWordDistri() self.unknowWordsSetZero() self.alpha = self.wittenBellSmoothing()
def __init__(self): # 继承Dictionary子类 Dictionary.__init__(self) self.Bigram = calcuBigramWordDistri()