def show_cluster_node(gid): """ """ cluster_db = ClusterDBModule() cluster_edge = ClusterEdgeModule() cluster_similarity = NovelSimilarityModule() cluster_node = cluster_edge.cluster_node_collection(gid) virtual_novel_node = cluster_similarity.virtual_novel_node_generate(cluster_node) book_name = cluster_node.book_name.encode('GBK', 'ignore') pen_name = cluster_node.pen_name.encode('GBK', 'ignore') print('gid: {0}, book_name: {1}, pen_name: {2}'.format(gid, book_name, pen_name)) print(', '.join('%s: %d' % (chapter.chapter_title.encode('GBK', 'ignore'), chapter.rank) for chapter in virtual_novel_node.chapter_list)) return virtual_novel_node
def run(self, update_edge = False): """ """ self.logger.info('novel cluster node module start') for site_id in xrange(self.start_site_id, self.end_site_id + 1): if not self.site_dict.has_key(site_id): continue update_time = self.proc_time_dict['dir_fmt_info{0}'.format(site_id)] self.novel_node_generate(site_id, update_time) self.novel_gid_list = {}.fromkeys(self.novel_gid_list).keys() if update_edge is True and len(self.novel_gid_list) > 0: cluster_edge_module = ClusterEdgeModule() cluster_edge_module.run(self.novel_gid_list) self.logger.info('novel cluster node module end') return True
def check_chapter_title_clean(gid): """ 检查一本书的标题过滤 和 虚拟目录效果 """ cluster_edge = ClusterEdgeModule() cluster_similarity = NovelSimilarityModule() novel_clean = NovelCleanModule() cluster_node = cluster_edge.cluster_node_collection(gid) if cluster_node is False: return False print('book_name: {0}, pen_name: {1}'.format(cluster_node.book_name, cluster_node.pen_name)) for novel_node in cluster_node.novel_node_list: for chapter in novel_node.chapter_list: chapter.chapter_title = chapter.raw_chapter_title novel_clean.novel_chapter_clean(novel_node) print(novel_node.dir_url) print(', '.join('%s' % chapter.chapter_title.encode('GBK', 'ignore') for chapter in novel_node.chapter_list)) print(', '.join('%s' % chapter.raw_chapter_title.encode('GBK', 'ignore') for chapter in novel_node.chapter_list))
def cluster_edge_module(): """ """ novel_module = ClusterEdgeModule() novel_module.run()