def novel_node_similarity_calculation(self, novel_node_x, novel_node_y): """ 计算两本小说的相似性 """ min_length = min(len(novel_node_x.chapter_list), len(novel_node_y.chapter_list)) if min_length <= 2: return 0.0, [] similarity_matirx = defaultdict(list) for index_x, chapter_x in enumerate(novel_node_x.chapter_list): if len(chapter_x.chapter_title) == 0: continue for index_y, chapter_y in enumerate(novel_node_y.chapter_list): if len(chapter_y.chapter_title) == 0: continue chapter_similarity = self.novel_chapter_similarity_calculation(chapter_x, chapter_y) if chapter_similarity >= 0.8: similarity_matirx[index_x].append(index_y) match = BipartiteGraph() match_number, match_list = match.bipartite_graph_max_match( len(novel_node_x.chapter_list), len(novel_node_y.chapter_list), similarity_matirx ) similarity = match_number * 1.0 / min_length return similarity, match_list
def novel_node_similarity_calculation(self, novel_node_x, novel_node_y): """ 计算两本小说的相似性 """ long_list = novel_node_x.chapter_list short_list = novel_node_y.chapter_list if len(novel_node_y.chapter_list) > len(novel_node_x.chapter_list): long_list = novel_node_y.chapter_list short_list = novel_node_x.chapter_list if len(short_list) <= 2: return 0.0, [] similarity_matirx = defaultdict(list) for index_x, chapter_x in enumerate(long_list): if chapter_x.chapter_title == '': continue for index_y, chapter_y in enumerate(short_list): chapter_similarity = self.novel_chapter_similarity_calculation(chapter_x, chapter_y) if chapter_similarity >= 0.8: similarity_matirx[index_x].append(index_y) match = BipartiteGraph() match_number, match_list = match.bipartite_graph_max_match(len(long_list), len(short_list), similarity_matirx) similarity = match_number * 1.0 / len(short_list) return similarity, match_list