Exemple #1
0
def show_cluster_node(gid):
    """
    """
    cluster_db = ClusterDBModule()

    cluster_edge = ClusterEdgeModule()
    cluster_similarity = NovelSimilarityModule()

    cluster_node = cluster_edge.cluster_node_collection(gid)
    virtual_novel_node = cluster_similarity.virtual_novel_node_generate(cluster_node)
    book_name = cluster_node.book_name.encode('GBK', 'ignore')
    pen_name = cluster_node.pen_name.encode('GBK', 'ignore')
    print('gid: {0}, book_name: {1}, pen_name: {2}'.format(gid, book_name, pen_name))
    print(', '.join('%s: %d' % (chapter.chapter_title.encode('GBK', 'ignore'), chapter.rank) for chapter in virtual_novel_node.chapter_list))
    return virtual_novel_node
    def run(self, update_edge = False):
        """
        """
        self.logger.info('novel cluster node module start')

        for site_id in xrange(self.start_site_id, self.end_site_id + 1):
            if not self.site_dict.has_key(site_id):
                continue
            update_time = self.proc_time_dict['dir_fmt_info{0}'.format(site_id)]
            self.novel_node_generate(site_id, update_time)

        self.novel_gid_list = {}.fromkeys(self.novel_gid_list).keys()
        if update_edge is True and len(self.novel_gid_list) > 0:
            cluster_edge_module = ClusterEdgeModule()
            cluster_edge_module.run(self.novel_gid_list)

        self.logger.info('novel cluster node module end')
        return True
def check_chapter_title_clean(gid):
    """
        检查一本书的标题过滤 和 虚拟目录效果
    """
    cluster_edge = ClusterEdgeModule()
    cluster_similarity = NovelSimilarityModule()

    novel_clean = NovelCleanModule()

    cluster_node = cluster_edge.cluster_node_collection(gid)
    if cluster_node is False:
        return False
    print('book_name: {0}, pen_name: {1}'.format(cluster_node.book_name, cluster_node.pen_name))

    for novel_node in cluster_node.novel_node_list:
        for chapter in novel_node.chapter_list:
            chapter.chapter_title = chapter.raw_chapter_title
        novel_clean.novel_chapter_clean(novel_node)

        print(novel_node.dir_url)
        print(', '.join('%s' % chapter.chapter_title.encode('GBK', 'ignore') for chapter in novel_node.chapter_list))
        print(', '.join('%s' % chapter.raw_chapter_title.encode('GBK', 'ignore') for chapter in novel_node.chapter_list))
Exemple #4
0
def cluster_edge_module():
    """
    """
    novel_module = ClusterEdgeModule()
    novel_module.run()