def _get_personids_to_update_extids(papers=None): ''' It returns the set of personids of which we should recalculate their external ids. @param papers: papers @type papers: set or None @return: personids @rtype: set ''' last_log = get_user_log(userinfo='daemon', action='PID_UPDATE', only_most_recent=True) if last_log: daemon_last_time_run = last_log[0][2] modified_bibrecs = get_recently_modified_record_ids(daemon_last_time_run) else: modified_bibrecs = get_all_valid_bibrecs() if papers: modified_bibrecs &= set(papers) if not modified_bibrecs: return None if bconfig.LIMIT_EXTERNAL_IDS_COLLECTION_TO_CLAIMED_PAPERS: modified_bibrecs = [rec[0] for rec in get_claimed_papers_from_papers(modified_bibrecs)] personids_to_update_extids = set() for bibrec in modified_bibrecs: personids_to_update_extids |= set(get_personids_from_bibrec(bibrec)) return personids_to_update_extids
def _get_personids_to_update_extids(papers=None): ''' It returns the set of personids of which we should recalculate their external ids. @param papers: papers @type papers: set or None @return: personids @rtype: set ''' last_log = get_user_log(userinfo='daemon', action='PID_UPDATE', only_most_recent=True) if last_log: daemon_last_time_run = last_log[0][2] modified_bibrecs = get_recently_modified_record_ids( daemon_last_time_run) else: modified_bibrecs = get_all_valid_bibrecs() if papers: modified_bibrecs &= set(papers) if not modified_bibrecs: return None if bconfig.LIMIT_EXTERNAL_IDS_COLLECTION_TO_CLAIMED_PAPERS: modified_bibrecs = [ rec[0] for rec in get_claimed_papers_from_papers(modified_bibrecs) ] personids_to_update_extids = set() for bibrec in modified_bibrecs: personids_to_update_extids |= set(get_personids_from_bibrec(bibrec)) return personids_to_update_extids
def run_tortoise(from_scratch): from invenio.bibauthorid_tortoise import tortoise, tortoise_from_scratch if from_scratch: tortoise_from_scratch() else: start_time = get_sql_time() tortoise_db_name = 'tortoise' last_run = get_user_log(userinfo=tortoise_db_name, only_most_recent=True) if last_run: modified = get_recently_modified_record_ids(last_run[0][2]) else: modified = [] tortoise(modified) insert_user_log(tortoise_db_name, '-1', '', '', '', timestamp=start_time)
def run_rabbit(paperslist, all_records=False): if not paperslist and all_records: rabbit_with_log(None, True, 'bibauthorid_daemon, update_personid on all papers') elif not paperslist: last_log = get_user_log(userinfo='daemon', action='PID_UPDATE', only_most_recent=True) if len(last_log) >= 1: #select only the most recent papers recently_modified = get_recently_modified_record_ids(date=last_log[0][2]) if not recently_modified: bibtask.write_message("update_personID_table_from_paper: " "All person entities up to date.", stream=sys.stdout, verbose=0) else: bibtask.write_message("update_personID_table_from_paper: Running on: " + str(recently_modified), stream=sys.stdout, verbose=0) rabbit_with_log(recently_modified, True, 'bibauthorid_daemon, run_personid_fast_assign_papers on ' + str([paperslist, all_records, recently_modified])) else: rabbit_with_log(None, True, 'bibauthorid_daemon, update_personid on all papers') else: rabbit_with_log(paperslist, True, 'bibauthorid_daemon, personid_fast_assign_papers on ' + str(paperslist), partial=True)
def run_rabbit(paperslist, all_records=False): if not paperslist and all_records: rabbit_with_log(None, True, 'bibauthorid_daemon, update_personid on all papers') elif not paperslist: last_log = get_user_log(userinfo='daemon', action='PID_UPDATE', only_most_recent=True) if len(last_log) >= 1: #select only the most recent papers recently_modified = get_recently_modified_record_ids( date=last_log[0][2]) if not recently_modified: bibtask.write_message( "update_personID_table_from_paper: " "All person entities up to date.", stream=sys.stdout, verbose=0) else: bibtask.write_message( "update_personID_table_from_paper: Running on: " + str(recently_modified), stream=sys.stdout, verbose=0) rabbit_with_log( recently_modified, True, 'bibauthorid_daemon, run_personid_fast_assign_papers on ' + str([paperslist, all_records, recently_modified])) else: rabbit_with_log( None, True, 'bibauthorid_daemon, update_personid on all papers') else: rabbit_with_log(paperslist, True, 'bibauthorid_daemon, personid_fast_assign_papers on ' + str(paperslist), partial=True)