def compute_pmi(self, words_dict): #compute all kinds of combines for word sub_part = gen_bigram(self.text) if len(sub_part) > 0: self.pmi = min( map( lambda (left, right): math.log(self.freq / words_dict[ left].freq / words_dict[right].freq), sub_part))
def compute_pmi(self, words_dict): #compute all kinds of combines for word sub_part = gen_bigram(self.text) if len(sub_part) > 0: self.pmi = min( map( lambda word: math.log(self.freq / words_dict[word[ 0]].freq / words_dict[word[1]].freq), sub_part))
def compute_pmi(self, words_dict): # 这里的words_dict是word_cad # key:word,value:word_info #compute all kinds of combines for word sub_part = gen_bigram(self.text) if len(sub_part) > 0: # 使用一个具体的例子来概括就是: # 计算min{p(电影院)/(p(电影)*p(院)),p(电影院)/(p(电)*p(影院))} self.pmi = min( map( lambda word: math.log(self.freq / words_dict[word[ 0]].freq / words_dict[word[1]].freq), sub_part))
def compute_pmi(self, words_dict): # compute all kinds of combines for word sub_part = gen_bigram(self.text) # print(sub_part) if len(sub_part) > 0: # self.pmi = min( # map(lambda x: math.log(self.freq / (words_dict[x[0]].freq * # words_dict[x[1]].freq)), # sub_part)) # print(self.freq, max(words_dict[sub_part[0][0]].freq - self.freq, 0.00001), # max(words_dict[sub_part[0][1]].freq - self.freq, 0.00001)) # print(math.log(self.freq / (max(words_dict[sub_part[0][0]].freq - self.freq, 0.00001)) * # (max(words_dict[sub_part[0][1]].freq - self.freq, 0.00001)))) self.pmi = min( map( lambda x: math.log(self.freq / ( (max(words_dict[x[0]].freq - self.freq, 0.00001)) * (max(words_dict[x[1]].freq - self.freq, 0.00001)))), sub_part))