def evaluate(rid, align_id): """ 提取已固化的正文、本次选取的正文、以及去杂之后的正文 添加几个需要返回的字段 """ module = ChapterOptimizeModule() rid = int(rid) cid = '{0}|{1}'.format(rid, align_id) # 从pageDB根据章节的cid读取其章节内容 silk_server = SilkServer() chapter_page = silk_server.get(src='http://test.com', pageid=cid) if not chapter_page or 'novel_chapter_type' not in chapter_page or chapter_page['novel_chapter_type'] != 0 \ or 'blocks' not in chapter_page: module.logger.info('cid:{0} not exists in pageDB'.format(cid)) return False store_chapter_content = u'' for block in chapter_page['blocks']: if 'type' in block and block['type'] == 'NOVELCONTENT': raw_chapter_content = block['data_value'] store_chapter_content = ChapterHtmlFilter().chapter_html_filter(raw_chapter_content) module.logger.info('rid: {0}, align_id: {1}'.format(rid, align_id)) total_candidate_chapter_list = module.candidate_chapter_collecion(rid, align_id) current_chapter_status = len(total_candidate_chapter_list) module.logger.info('total_candidate_chapter_length: {0}'.format(len(total_candidate_chapter_list))) candidate_chapter_list = module.candidate_chapter_generate(rid, align_id, total_candidate_chapter_list) if len(candidate_chapter_list) == 0: module.logger.info('candidate_chapter_list is empty') return False candidate_chapter_num = len(candidate_chapter_list) candidate_chapter_list = module.candidate_chapter_filter(candidate_chapter_list) cluster_chapter_num = len(candidate_chapter_list) selected_index, candidate_chapter_list = module.candidate_chapter_rank(candidate_chapter_list) rank_chapter_num = len(candidate_chapter_list) selected_chapter = module.selected_chapter_content_filter(selected_index, candidate_chapter_list) if not selected_chapter.there_impurity: return False raw_chapter_content = selected_chapter.raw_chapter_content pure_chapter_content = selected_chapter.pure_chapter_content return store_chapter_content, raw_chapter_content, pure_chapter_content, selected_chapter.site_id, \ candidate_chapter_num, cluster_chapter_num, rank_chapter_num
def chapter_module(): """ """ novel_module = ChapterOptimizeModule() novel_module.run()