def extract_highlights_process(pdf_dir_path, opath): util.multi_thread_process_files(pdf_dir_path, 'pdf', thread_num_highlights, extract_pdf_highlights, proc_desc='extracted highlights', args=[opath])
def score_exp(score_files_path, out_file, threshold, manual_ann=None): ret_container = [] hter = ah.HighLighter.get_instance() utils.multi_thread_process_files(score_files_path, '', 1, ah.score_paper_threshold, args=[ret_container, out_file, hter, threshold, manual_ann], file_filter_func=lambda fn: fn.endswith('_scores.json'), callback_func=pp_score_exp)
def sapienta_process(pdf_dir_path, opath): util.multi_thread_process_files(pdf_dir_path, 'pdf', thread_num_sapienta, sapienta_annotate, proc_desc='annotated by Sapienta', args=[opath])
def clean_ann_files(ann_files_path): utils.multi_thread_process_files( ann_files_path, '', 10, remove_ann_sentences, file_filter_func=lambda fn: fn.endswith('_ann.json'))
def do_highlighting(score_path): threshold = .4 ret_container = [] hter = ah.HighLighter.get_instance() utils.multi_thread_process_files(score_path, '', 3, ah.score_paper_threshold, args=[ret_container, score_path + '/highlight-results.json', hter, threshold, None], file_filter_func=lambda fn: fn.endswith('_scores.json'), callback_func=pp_highlight)
def corpus_simple_stat(ann_files_path): ret_container = [] utils.multi_thread_process_files( ann_files_path, '', 10, paper_stat, args=[ret_container], file_filter_func=lambda fn: fn.endswith('_ann.json'), callback_func=pp_paper_stat)
def compute_sp_ne_stat(score_files_path): ret_container = [] utils.multi_thread_process_files( score_files_path, '', 10, get_sp_ne_associations, args=[ret_container], file_filter_func=lambda fn: fn.endswith('_scores.json'), callback_func=pp_sp_ne_asso)
def compute_overall_ncbo_stat(ann_files_path): ret_container = [] utils.multi_thread_process_files( ann_files_path, '', 10, get_ncbo_stats, args=[ret_container], file_filter_func=lambda fn: fn.endswith('_ann.json'), callback_func=pp_ncbo_stat)
def analyse_language_pattern_stats(score_files_path, out_file): ret_container = [] hter = ah.HighLighter.get_instance() utils.multi_thread_process_files( score_files_path, '', 10, get_language_pattern_stats, args=[ret_container, out_file, hter], file_filter_func=lambda fn: fn.endswith('_scores.json'), callback_func=pp_pattern_stats)
def visualise_highlights_3D(annotation_files_path, out_file): ret_container = [] hter = HighLighter.get_instance() utils.multi_thread_process_files( annotation_files_path, '', 10, get3DCords, args=[ret_container, out_file, hter], file_filter_func=lambda fn: fn.endswith('_scores.json'), callback_func=pp_3D)
def extract_geometrics(annotation_files_path, gm_feature_output_file): ret_container = [] hter = ah.HighLighter.get_instance() utils.multi_thread_process_files( annotation_files_path, '', 10, geometric_analysis, args=[ret_container, gm_feature_output_file, hter], file_filter_func=lambda fn: fn.endswith('_ann.json'), callback_func=post_process_geometric_analysis)
def lp_dist_cal(score_files_path, out_file): ret_container = [] hter = ah.HighLighter.get_instance() utils.multi_thread_process_files( score_files_path, '', 10, paper_language_pattern_dist, args=[ret_container, hter, out_file], file_filter_func=lambda fn: fn.endswith('_scores.json'), callback_func=lp_dist_cb)
def summarise_all_papers(ann_path, summ_path, callback=None): thread_num = 6 hters = [] for i in range(thread_num): hters.append(HighLighter.get_instance()) utils.multi_thread_process_files( ann_path, '', thread_num, summ, args=[summ_path], thread_wise_objs=hters, file_filter_func=lambda f: f.endswith('_ann.json'), callback_func=callback)
def do_highlighting(score_path): job_path, job_id = get_job_path_id_from_score_path(score_path) update_score_path_summ(score_path) update_job_progress(job_path, job_id, status_code.HIGHLIGHTING, 'highlighting...') threshold = .4 ret_container = [] hter = ah.HighLighter.get_instance() utils.multi_thread_process_files( score_path, '', 3, ah.score_paper_threshold, args=[ret_container, score_path, hter, threshold, None], file_filter_func=lambda fn: fn.endswith('_scores.json'), callback_func=finish_highlighting)
def main(): path = './local_exp/42-extra-papers/' num_threads = 30 util.multi_thread_process_files(path, 'xml', num_threads, ann_article)
def append_abstract_label_for_all(xml_path): util.multi_thread_process_files(xml_path, 'xml', 10, append_abstract_label)