def coverage_values(self, sens_vector, essay_vector): coverage_list = [] for sv in sens_vector: coverage_list.append(self.dist.sim(sv, essay_vector, self.disttype)) tools.normarlize(coverage_list) return coverage_list
def clueswords_values(self, sens_words): clue_list = [0] * len(sens_words) words = [] for var in sens_words: for w in var: words.append(w) sen_len_list = [] for i in range(len(sens_words)): sen_w = sens_words[i] sen_len_list.append(len(sen_w) / len(words)) for w in sen_w: if w in self.cluewords: clue_list[i] = 1 break tools.normarlize(sen_len_list) for i in range(len(sens_words)): clue_list[i] = self.clue_weight * clue_list[i] + ( 1 - self.clue_weight) * sen_len_list[i] return clue_list
if __name__ == "__main__": path = Dir.res + "/cleandata_small/news/trainning_2788.txt" text = ftools.read_lines(path) text = '。'.join(text) asv = Auto_Simple_Vec() sens, sens_words, sens_tags = asv.preprocess(text) # for var in sens_words: # print(var) print("se_words lgth", len(sens_words)) sen_vec, essay_vec = asv.vectorize(sens_words, sens_tags) # print(essay_vec) print(sens[0], sens[1]) print(asv.dist.sim(sen_vec[0], sen_vec[1])) print(asv.dist.sim(sen_vec[0], sen_vec[-1])) print(asv.dist.sim(sen_vec[2], sen_vec[3])) coverage_list = [] for i in range(len(sen_vec)): # print(sen_vec[i]) # input() coverage_list.append(asv.dist.sim(sen_vec[i], essay_vec, Distance.EUD)) tools.normarlize(coverage_list) for i in range(len(coverage_list)): print(sens[i], ",", coverage_list[i]) # print("------en--------") # print(essay_vec)