def safe_disambiguation_iteration(): from invenio.bibauthorid_tortoise import tortoise from invenio.bibauthorid_rabbit import rabbit from invenio.bibauthorid_personid_maintenance import check_author_paper_associations \ , duplicated_tortoise_results_exist \ , repair_author_paper_associations if not check_author_paper_associations(): rabbit([]) repair_author_paper_associations() rabbit([]) assert check_author_paper_associations() tortoise() assert duplicated_tortoise_results_exist()
def run_tortoise(from_scratch, last_names_thresholds=None, single_threaded=False): _prepare_tortoise_cache() from invenio.bibauthorid_tortoise import tortoise, \ tortoise_from_scratch, tortoise_last_name, tortoise_last_names if single_threaded and last_names_thresholds: for last_name, threshold in last_names_thresholds.items(): tortoise_last_name(last_name, wedge_threshold=threshold, from_mark=from_scratch) elif last_names_thresholds: names_with_args = list() for last_name, threshold in last_names_thresholds.items(): kwargs = dict() if from_scratch: kwargs['from_mark'] = from_scratch else: kwargs['pure'] = from_scratch if threshold: args = (last_name, threshold) else: args = (last_name, ) names_with_args.append((args, kwargs)) tortoise_last_names(names_with_args) elif from_scratch: tortoise_from_scratch() else: start_time = get_db_time() tortoise_db_name = 'tortoise' last_run = get_user_logs(userinfo=tortoise_db_name, only_most_recent=True) if last_run: modified = get_modified_papers_since(last_run[0][2]) else: modified = [] tortoise(modified) insert_user_log(tortoise_db_name, '-1', '', '', '', timestamp=start_time)
def safe_disambiguation_iteration(): from invenio.bibauthorid_tortoise import tortoise from invenio.bibauthorid_rabbit import rabbit from invenio.bibauthorid_personid_maintenance import check_personid_papers \ , check_results \ , repair_personid if not check_personid_papers(): rabbit([]) repair_personid() rabbit([]) assert check_personid_papers() tortoise() assert check_results()
def run_tortoise(from_scratch): from invenio.bibauthorid_tortoise import tortoise, tortoise_from_scratch if from_scratch: tortoise_from_scratch() else: start_time = get_sql_time() tortoise_db_name = 'tortoise' last_run = get_user_log(userinfo=tortoise_db_name, only_most_recent=True) if last_run: modified = get_recently_modified_record_ids(last_run[0][2]) else: modified = [] tortoise(modified) insert_user_log(tortoise_db_name, '-1', '', '', '', timestamp=start_time)
def test_accuracy(): from invenio.bibauthorid_tortoise import tortoise from invenio.bibauthorid_rabbit import rabbit from invenio.bibauthorid_personid_maintenance import check_author_paper_associations \ , duplicated_tortoise_results_exist \ , repair_author_paper_associations from invenio.bibauthorid_merge import matched_claims if not check_author_paper_associations(): rabbit([]) repair_author_paper_associations() rabbit([]) assert check_author_paper_associations() tortoise(pure=True) assert duplicated_tortoise_results_exist() return matched_claims()
def run_tortoise(from_scratch): from invenio.bibauthorid_tortoise import tortoise, tortoise_from_scratch if from_scratch: tortoise_from_scratch() else: start_time = get_db_time() tortoise_db_name = 'tortoise' last_run = get_user_logs(userinfo=tortoise_db_name, only_most_recent=True) if last_run: modified = get_modified_papers_since(last_run[0][2]) else: modified = [] tortoise(modified) insert_user_log(tortoise_db_name, '-1', '', '', '', timestamp=start_time)
def test_accuracy(): from invenio.bibauthorid_tortoise import tortoise from invenio.bibauthorid_rabbit import rabbit from invenio.bibauthorid_personid_maintenance import check_personid_papers \ , check_results \ , repair_personid from invenio.bibauthorid_merge import matched_claims if not check_personid_papers(): rabbit([]) repair_personid() rabbit([]) assert check_personid_papers() tortoise(pure=True) assert check_results() return matched_claims()