def novel_node_similarity_calculation(self, novel_node_x, novel_node_y):
        """
            计算两本小说的相似性
        """
        min_length = min(len(novel_node_x.chapter_list), len(novel_node_y.chapter_list))
        if min_length <= 2:
            return 0.0, []

        similarity_matirx = defaultdict(list)
        for index_x, chapter_x in enumerate(novel_node_x.chapter_list):
            if len(chapter_x.chapter_title) == 0:
                continue
            for index_y, chapter_y in enumerate(novel_node_y.chapter_list):
                if len(chapter_y.chapter_title) == 0:
                    continue
                chapter_similarity = self.novel_chapter_similarity_calculation(chapter_x, chapter_y)
                if chapter_similarity >= 0.8:
                    similarity_matirx[index_x].append(index_y)

        match = BipartiteGraph()
        match_number, match_list = match.bipartite_graph_max_match(
            len(novel_node_x.chapter_list),
            len(novel_node_y.chapter_list),
            similarity_matirx
        )
        similarity = match_number * 1.0 / min_length
        return similarity, match_list
예제 #2
0
    def novel_node_similarity_calculation(self, novel_node_x, novel_node_y):
        """
            计算两本小说的相似性
        """
        long_list = novel_node_x.chapter_list
        short_list = novel_node_y.chapter_list
        if len(novel_node_y.chapter_list) > len(novel_node_x.chapter_list):
            long_list = novel_node_y.chapter_list
            short_list = novel_node_x.chapter_list

        if len(short_list) <= 2:
            return 0.0, []

        similarity_matirx = defaultdict(list)
        for index_x, chapter_x in enumerate(long_list):
            if chapter_x.chapter_title == '':
                continue
            for index_y, chapter_y in enumerate(short_list):
                chapter_similarity = self.novel_chapter_similarity_calculation(chapter_x, chapter_y)
                if chapter_similarity >= 0.8:
                    similarity_matirx[index_x].append(index_y)

        match = BipartiteGraph()
        match_number, match_list = match.bipartite_graph_max_match(len(long_list), len(short_list), similarity_matirx)
        similarity = match_number * 1.0 / len(short_list)
        return similarity, match_list