def run(self):
        """
        """
        self.logger.info('novel cluster edge module start')

        similarity = NovelSimilarityModule()

        process_gid_list = self.process_gid_collection()
        for index, gid in enumerate(process_gid_list):

            cluster_db = ClusterDBModule()
            cluster_db.delete_novelclusteredgeinfo(gid)

            cluster_node = self.cluster_node_collection(gid)
            if not cluster_node:
                continue
            virtual_node = similarity.virtual_novel_node_generate(cluster_node)
            related_gid_list = self.related_gid_collection(cluster_node)
            if len(related_gid_list) == 0:
                continue

            book_name = cluster_node.book_name.encode('GBK')
            pen_name = cluster_node.pen_name.encode('GBK')
            self.logger.info('index: {0}/{1}'.format(index, len(process_gid_list)))
            self.logger.info('novel_info: {0}@{1}@{2}, '
                             'chater_number: {3}, related_gid_number: {4}'.format(
                gid, book_name, pen_name,
                len(virtual_node.chapter_list), len(related_gid_list)
            ))

            related_edge_list = []
            for related_gid in related_gid_list:
                related_cluster_node = self.cluster_node_collection(related_gid)
                if not related_cluster_node:
                    continue
                related_virtual_node = similarity.virtual_novel_node_generate(related_cluster_node)
                cluster_similarity = similarity.novel_cluster_similarity_calculation(virtual_node, related_virtual_node)
                if cluster_similarity >= 0.7:
                    cluster_edge = ClusterEdgeInfo(cluster_node.gid, related_cluster_node.gid, cluster_similarity)
                    related_edge_list.append(cluster_edge)

                    book_name = related_cluster_node.book_name.encode('GBK')
                    pen_name = related_cluster_node.pen_name.encode('GBK')
                    self.logger.info('novel_info: {0}@{1}@{2}, '
                                     'chapter_number: {3}, similarity: {4}'.format(
                        related_gid, book_name, pen_name,
                        len(related_virtual_node.chapter_list), cluster_similarity
                    ))
            self.cluster_edge_update(gid, related_edge_list)

        self.logger.info('novel cluster edge module end')
        return True
Ejemplo n.º 2
0
def show_cluster_node(gid):
    """
    """
    cluster_db = ClusterDBModule()

    cluster_edge = ClusterEdgeModule()
    cluster_similarity = NovelSimilarityModule()

    cluster_node = cluster_edge.cluster_node_collection(gid)
    virtual_novel_node = cluster_similarity.virtual_novel_node_generate(cluster_node)
    book_name = cluster_node.book_name.encode('GBK', 'ignore')
    pen_name = cluster_node.pen_name.encode('GBK', 'ignore')
    print('gid: {0}, book_name: {1}, pen_name: {2}'.format(gid, book_name, pen_name))
    print(', '.join('%s: %d' % (chapter.chapter_title.encode('GBK', 'ignore'), chapter.rank) for chapter in virtual_novel_node.chapter_list))
    return virtual_novel_node
Ejemplo n.º 3
0
    def run_test(self):
        """
            跑评估数据
        """
        gid_list = [int(line.strip()) for line in open('./data/rid.txt', 'r').readlines()]
        similarity = NovelSimilarityModule()

        for index, gid in enumerate(gid_list):
            cluster_node = self.cluster_node_collection(gid)
            if not cluster_node:
                continue
            print('gid: {0}, book_name: {1}, pen_name: {2}'.format(
                gid,
                cluster_node.book_name.encode('GBK', 'ignore'),
                cluster_node.pen_name.encode('GBK', 'ignore')
            ))
            novel_node = similarity.virtual_novel_node_generate(cluster_node)
            print(', '.join('%s' % chapter.chapter_title.encode('GBK', 'ignore') for chapter in novel_node.chapter_list))
            print('')