def _run_update_authornames_tables_from_paper(record_ids=None, all_records=False): ''' Runs the update on the papers which have been modified since the last run @note: This should be run as often as possible to keep authornames and authornames_bibrefs cache tables up to date. ''' if not all_records and not record_ids: last_log = get_user_log(userinfo='daemon', action='UATFP', only_most_recent=True) if len(last_log) >= 1: #select only the most recent papers recently_modified, min_date = get_papers_recently_modified(date=last_log[0][2]) insert_user_log('daemon', '-1', 'UATFP', 'bibsched', 'status', comment='bibauthorid_daemon, update_authornames_tables_from_paper', timestamp=min_date[0][0]) if not recently_modified: bibtask.write_message("update_authornames_tables_from_paper: " "All names up to date.", stream=sys.stdout, verbose=0) else: bibtask.write_message( "update_authornames_tables_from_paper: Running on %s papers " % str( len(recently_modified)), stream=sys.stdout, verbose=0) update_authornames_tables_from_paper(recently_modified) else: #this is the first time the utility is run, run on all the papers? #Probably better to write the log on the first authornames population #@todo: authornames population writes the log recently_modified, min_date = get_papers_recently_modified() insert_user_log('daemon', '-1', 'UATFP', 'bibsched', 'status', comment='bibauthorid_daemon, update_authornames_tables_from_paper', timestamp=min_date[0][0]) bibtask.write_message( "update_authornames_tables_from_paper: Running on %s papers " % str( len(recently_modified)), stream=sys.stdout, verbose=0) update_authornames_tables_from_paper(recently_modified) else: bibtask.write_message("update_authornames_tables_from_paper: Running " "on all papers ", stream=sys.stdout, verbose=0) update_authornames_tables_from_paper(record_ids)
def _get_personids_to_update_extids(papers=None): ''' It returns the set of personids of which we should recalculate their external ids. @param papers: papers @type papers: set or None @return: personids @rtype: set ''' last_log = get_user_log(userinfo='daemon', action='PID_UPDATE', only_most_recent=True) if last_log: daemon_last_time_run = last_log[0][2] modified_bibrecs = get_recently_modified_record_ids(daemon_last_time_run) else: modified_bibrecs = get_all_valid_bibrecs() if papers: modified_bibrecs &= set(papers) if not modified_bibrecs: return None if bconfig.LIMIT_EXTERNAL_IDS_COLLECTION_TO_CLAIMED_PAPERS: modified_bibrecs = [rec[0] for rec in get_claimed_papers_from_papers(modified_bibrecs)] personids_to_update_extids = set() for bibrec in modified_bibrecs: personids_to_update_extids |= set(get_personids_from_bibrec(bibrec)) return personids_to_update_extids
def run_rabbit(paperslist, all_records=False): if not paperslist and all_records: rabbit_with_log(None, True, "bibauthorid_daemon, update_personid on all papers") elif not paperslist: last_log = get_user_log(userinfo="daemon", action="PID_UPDATE", only_most_recent=True) if len(last_log) >= 1: # select only the most recent papers recently_modified = get_recently_modified_record_ids(date=last_log[0][2]) if not recently_modified: bibtask.write_message( "update_personID_table_from_paper: " "All person entities up to date.", stream=sys.stdout, verbose=0 ) else: bibtask.write_message( "update_personID_table_from_paper: Running on: " + str(recently_modified), stream=sys.stdout, verbose=0, ) rabbit_with_log( recently_modified, True, "bibauthorid_daemon, run_personid_fast_assign_papers on " + str([paperslist, all_records, recently_modified]), ) else: rabbit_with_log(None, True, "bibauthorid_daemon, update_personid on all papers") else: rabbit_with_log( paperslist, True, "bibauthorid_daemon, personid_fast_assign_papers on " + str(paperslist), partial=True )
def run_tortoise(from_scratch): from bibauthorid_tortoise import tortoise, tortoise_from_scratch if from_scratch: tortoise_from_scratch() else: start_time = get_sql_time() tortoise_db_name = 'tortoise' last_run = get_user_log(userinfo=tortoise_db_name, only_most_recent=True) if last_run: modified = get_recently_modified_record_ids(last_run[0][2]) else: modified = [] tortoise(modified) insert_user_log(tortoise_db_name, '-1', '', '', '', timestamp=start_time)
def run_rabbit(paperslist, all_records=False): if not paperslist and all_records: rabbit_with_log(None, True, 'bibauthorid_daemon, update_personid on all papers') elif not paperslist: last_log = get_user_log(userinfo='daemon', action='PID_UPDATE', only_most_recent=True) if len(last_log) >= 1: #select only the most recent papers recently_modified = get_recently_modified_record_ids(date=last_log[0][2]) if not recently_modified: bibtask.write_message("update_personID_table_from_paper: " "All person entities up to date.", stream=sys.stdout, verbose=0) else: bibtask.write_message("update_personID_table_from_paper: Running on: " + str(recently_modified), stream=sys.stdout, verbose=0) rabbit_with_log(recently_modified, True, 'bibauthorid_daemon, run_personid_fast_assign_papers on ' + str([paperslist, all_records, recently_modified])) else: rabbit_with_log(None, True, 'bibauthorid_daemon, update_personid on all papers') else: rabbit_with_log(paperslist, True, 'bibauthorid_daemon, personid_fast_assign_papers on ' + str(paperslist), partial=True)
def _run_personid_gc(paperslist, all_records=False): # insert_user_log('daemon', '-1', 'PGC', 'bibsched', 'status', # comment='bibauthorid_daemon, personid_gc (update_personid_from_papers) on ' # + str(paperslist)) if not paperslist and all_records: #update_authornames_tables_from_paper() insert_user_log('daemon', '-1', 'PGC', 'bibsched', 'status', comment='bibauthorid_daemon, personid_gc (update_personid_from_papers) on all papers') update_personID_table_from_paper() elif not paperslist: last_log = get_user_log(userinfo='daemon', action='PGC', only_most_recent=True) if len(last_log) >= 1: #select only the most recent papers recently_modified, min_date = get_papers_recently_modified(date=last_log[0][2]) insert_user_log('daemon', '-1', 'PGC', 'bibsched', 'status', comment='bibauthorid_daemon, update_personid_from_papers on ' + str([paperslist, all_records, recently_modified]), timestamp=min_date[0][0]) if not recently_modified: bibtask.write_message("update_personID_table_from_paper: " "All person entities up to date.", stream=sys.stdout, verbose=0) else: bibtask.write_message("update_personID_table_from_paper: Running on: " + str(recently_modified), stream=sys.stdout, verbose=0) personid_fast_assign_papers(recently_modified) else: insert_user_log('daemon', '-1', 'PGC', 'bibsched', 'status', comment='bibauthorid_daemon, personid_gc (update_personid_from_papers) on all papers') update_personID_table_from_paper() else: insert_user_log('daemon', '-1', 'PGC', 'bibsched', 'status', comment='bibauthorid_daemon, personid_gc (update_personid_from_papers) on ' + str(paperslist)) update_authornames_tables_from_paper(paperslist) update_personID_table_from_paper(paperslist)
def _run_update_personID_table_from_paper(record_ids=None, all_records=False): ''' Runs the update on the papers which have been modified since the last run This is removing no-longer existing papers from the personid table. @note: Update recommended monthly. @warning: quite resource intensive. ''' if not record_ids and not all_records: last_log = get_user_log(userinfo='daemon', action='UPITFP', only_most_recent=True) if len(last_log) >= 1: #select only the most recent papers recently_modified, min_date = get_papers_recently_modified(date=last_log[0][2]) insert_user_log('daemon', '-1', 'UPITFP', 'bibsched', 'status', comment='bibauthorid_daemon, update_personID_table_from_paper', timestamp=min_date[0][0]) if not recently_modified: bibtask.write_message("update_personID_table_from_paper: " "All person entities up to date.", stream=sys.stdout, verbose=0) else: bibtask.write_message("update_personID_table_from_paper: Running on: " + str(recently_modified), stream=sys.stdout, verbose=0) update_personID_table_from_paper(recently_modified) else: # Should not process all papers, hence authornames population writes # the appropriate log. In case the log is missing, process everything. recently_modified, min_date = get_papers_recently_modified() insert_user_log('daemon', '-1', 'UPITFP', 'bibsched', 'status', comment='bibauthorid_daemon, update_personID_table_from_paper', timestamp=min_date[0][0]) bibtask.write_message("update_personID_table_from_paper: Running on: " + str(recently_modified), stream=sys.stdout, verbose=0) update_personID_table_from_paper(recently_modified) else: update_personID_table_from_paper(record_ids)