예제 #1
0
def extract_highlights_process(pdf_dir_path, opath):
    util.multi_thread_process_files(pdf_dir_path,
                                    'pdf',
                                    thread_num_highlights,
                                    extract_pdf_highlights,
                                    proc_desc='extracted highlights',
                                    args=[opath])
예제 #2
0
def score_exp(score_files_path, out_file, threshold, manual_ann=None):
    ret_container = []
    hter = ah.HighLighter.get_instance()
    utils.multi_thread_process_files(score_files_path, '', 1, ah.score_paper_threshold,
                                     args=[ret_container, out_file, hter, threshold, manual_ann],
                                     file_filter_func=lambda fn: fn.endswith('_scores.json'),
                                     callback_func=pp_score_exp)
예제 #3
0
def sapienta_process(pdf_dir_path, opath):
    util.multi_thread_process_files(pdf_dir_path,
                                    'pdf',
                                    thread_num_sapienta,
                                    sapienta_annotate,
                                    proc_desc='annotated by Sapienta',
                                    args=[opath])
예제 #4
0
def clean_ann_files(ann_files_path):
    utils.multi_thread_process_files(
        ann_files_path,
        '',
        10,
        remove_ann_sentences,
        file_filter_func=lambda fn: fn.endswith('_ann.json'))
예제 #5
0
def do_highlighting(score_path):
    threshold = .4
    ret_container = []
    hter = ah.HighLighter.get_instance()
    utils.multi_thread_process_files(score_path, '', 3, ah.score_paper_threshold,
                                     args=[ret_container, score_path + '/highlight-results.json', hter, threshold, None],
                                     file_filter_func=lambda fn: fn.endswith('_scores.json'),
                                     callback_func=pp_highlight)
예제 #6
0
def corpus_simple_stat(ann_files_path):
    ret_container = []
    utils.multi_thread_process_files(
        ann_files_path,
        '',
        10,
        paper_stat,
        args=[ret_container],
        file_filter_func=lambda fn: fn.endswith('_ann.json'),
        callback_func=pp_paper_stat)
예제 #7
0
def compute_sp_ne_stat(score_files_path):
    ret_container = []
    utils.multi_thread_process_files(
        score_files_path,
        '',
        10,
        get_sp_ne_associations,
        args=[ret_container],
        file_filter_func=lambda fn: fn.endswith('_scores.json'),
        callback_func=pp_sp_ne_asso)
예제 #8
0
def compute_overall_ncbo_stat(ann_files_path):
    ret_container = []
    utils.multi_thread_process_files(
        ann_files_path,
        '',
        10,
        get_ncbo_stats,
        args=[ret_container],
        file_filter_func=lambda fn: fn.endswith('_ann.json'),
        callback_func=pp_ncbo_stat)
예제 #9
0
def analyse_language_pattern_stats(score_files_path, out_file):
    ret_container = []
    hter = ah.HighLighter.get_instance()
    utils.multi_thread_process_files(
        score_files_path,
        '',
        10,
        get_language_pattern_stats,
        args=[ret_container, out_file, hter],
        file_filter_func=lambda fn: fn.endswith('_scores.json'),
        callback_func=pp_pattern_stats)
예제 #10
0
def visualise_highlights_3D(annotation_files_path, out_file):
    ret_container = []
    hter = HighLighter.get_instance()
    utils.multi_thread_process_files(
        annotation_files_path,
        '',
        10,
        get3DCords,
        args=[ret_container, out_file, hter],
        file_filter_func=lambda fn: fn.endswith('_scores.json'),
        callback_func=pp_3D)
예제 #11
0
def extract_geometrics(annotation_files_path, gm_feature_output_file):
    ret_container = []
    hter = ah.HighLighter.get_instance()
    utils.multi_thread_process_files(
        annotation_files_path,
        '',
        10,
        geometric_analysis,
        args=[ret_container, gm_feature_output_file, hter],
        file_filter_func=lambda fn: fn.endswith('_ann.json'),
        callback_func=post_process_geometric_analysis)
예제 #12
0
def lp_dist_cal(score_files_path, out_file):
    ret_container = []
    hter = ah.HighLighter.get_instance()
    utils.multi_thread_process_files(
        score_files_path,
        '',
        10,
        paper_language_pattern_dist,
        args=[ret_container, hter, out_file],
        file_filter_func=lambda fn: fn.endswith('_scores.json'),
        callback_func=lp_dist_cb)
예제 #13
0
def summarise_all_papers(ann_path, summ_path, callback=None):
    thread_num = 6
    hters = []
    for i in range(thread_num):
        hters.append(HighLighter.get_instance())
    utils.multi_thread_process_files(
        ann_path,
        '',
        thread_num,
        summ,
        args=[summ_path],
        thread_wise_objs=hters,
        file_filter_func=lambda f: f.endswith('_ann.json'),
        callback_func=callback)
예제 #14
0
def do_highlighting(score_path):
    job_path, job_id = get_job_path_id_from_score_path(score_path)
    update_score_path_summ(score_path)
    update_job_progress(job_path, job_id, status_code.HIGHLIGHTING,
                        'highlighting...')
    threshold = .4
    ret_container = []
    hter = ah.HighLighter.get_instance()
    utils.multi_thread_process_files(
        score_path,
        '',
        3,
        ah.score_paper_threshold,
        args=[ret_container, score_path, hter, threshold, None],
        file_filter_func=lambda fn: fn.endswith('_scores.json'),
        callback_func=finish_highlighting)
예제 #15
0
def main():
    path = './local_exp/42-extra-papers/'
    num_threads = 30
    util.multi_thread_process_files(path, 'xml', num_threads, ann_article)
예제 #16
0
def append_abstract_label_for_all(xml_path):
    util.multi_thread_process_files(xml_path, 'xml', 10, append_abstract_label)