def evaluate(rid, align_id):
    """
    提取已固化的正文、本次选取的正文、以及去杂之后的正文

    添加几个需要返回的字段
    """
    module = ChapterOptimizeModule()

    rid = int(rid)

    cid = '{0}|{1}'.format(rid, align_id)

    # 从pageDB根据章节的cid读取其章节内容
    silk_server = SilkServer()
    chapter_page = silk_server.get(src='http://test.com', pageid=cid)
    if not chapter_page or 'novel_chapter_type' not in chapter_page or chapter_page['novel_chapter_type'] != 0 \
            or 'blocks' not in chapter_page:
        module.logger.info('cid:{0} not exists in pageDB'.format(cid))
        return False

    store_chapter_content = u''
    for block in chapter_page['blocks']:
        if 'type' in block and block['type'] == 'NOVELCONTENT':
            raw_chapter_content = block['data_value']
            store_chapter_content = ChapterHtmlFilter().chapter_html_filter(raw_chapter_content)

    module.logger.info('rid: {0}, align_id: {1}'.format(rid, align_id))

    total_candidate_chapter_list = module.candidate_chapter_collecion(rid, align_id)
    current_chapter_status = len(total_candidate_chapter_list)
    module.logger.info('total_candidate_chapter_length: {0}'.format(len(total_candidate_chapter_list)))

    candidate_chapter_list = module.candidate_chapter_generate(rid, align_id, total_candidate_chapter_list)
    if len(candidate_chapter_list) == 0:
        module.logger.info('candidate_chapter_list is empty')
        return False

    candidate_chapter_num = len(candidate_chapter_list)

    candidate_chapter_list = module.candidate_chapter_filter(candidate_chapter_list)

    cluster_chapter_num = len(candidate_chapter_list)

    selected_index, candidate_chapter_list = module.candidate_chapter_rank(candidate_chapter_list)

    rank_chapter_num = len(candidate_chapter_list)

    selected_chapter = module.selected_chapter_content_filter(selected_index, candidate_chapter_list)

    if not selected_chapter.there_impurity:
        return False

    raw_chapter_content = selected_chapter.raw_chapter_content
    pure_chapter_content = selected_chapter.pure_chapter_content

    return store_chapter_content, raw_chapter_content, pure_chapter_content, selected_chapter.site_id, \
           candidate_chapter_num, cluster_chapter_num, rank_chapter_num
Ejemplo n.º 2
0
def chapter_module():
    """
    """
    novel_module = ChapterOptimizeModule()
    novel_module.run()