def task_submit_check_options(): """Last checks and updating on the options...""" if not (task_has_option('all') or task_has_option('collection') or task_has_option('field') or task_has_option('pattern') or task_has_option('matching') or task_has_option('recids')): task_set_option('last', 1) return True
def task_run_core(): """Run the task by fetching arguments from the BibSched task queue. This is what BibSched will be invoking via daemon call. """ fmts = task_get_option('format', 'HB,RECJSON') for fmt in fmts.split(','): last_updated = fetch_last_updated(fmt) write_message("last stored run date is %s" % last_updated) recids = intbitset() if task_has_option("all"): recids += all_records() if task_has_option("last"): recids += outdated_caches(fmt, last_updated) if task_has_option('ignore_without'): without_fmt = intbitset() else: without_fmt = missing_caches(fmt) recids += without_fmt cli_recids = split_cli_ids_arg(task_get_option('recids', '')) recids += cli_recids query_params = { 'collection': task_get_option('collection', ''), 'field': task_get_option('field', ''), 'pattern': task_get_option('pattern', ''), 'matching': task_get_option('matching', '') } recids += query_records(query_params) bibreformat_task(fmt, recids, without_fmt, not task_has_option('noprocess')) return True
def task_run_core(): """Run the task by fetching arguments from the BibSched task queue. This is what BibSched will be invoking via daemon call. """ fmts = task_get_option('format', 'HB,RECJSON') for fmt in fmts.split(','): last_updated = fetch_last_updated(fmt) write_message("last stored run date is %s" % last_updated) recids = intbitset() if task_has_option("all"): recids += all_records() if task_has_option("last"): recids += outdated_caches(fmt, last_updated) if task_has_option('ignore_without'): without_fmt = intbitset() else: without_fmt = missing_caches(fmt) recids += without_fmt cli_recids = split_cli_ids_arg(task_get_option('recids', '')) recids += cli_recids query_params = {'collection': task_get_option('collection', ''), 'field': task_get_option('field', ''), 'pattern': task_get_option('pattern', ''), 'matching': task_get_option('matching', '')} recids += query_records(query_params) bibreformat_task(fmt, recids, without_fmt, not task_has_option('noprocess')) return True
def task_submit_check_options(): """Check that options are valid.""" if task_has_option('wjob'): jobnames = task_get_option('wjob') if jobnames: jobnames = jobnames.split(',') for jobname in jobnames: res = run_sql("SELECT COUNT(*) FROM expJOB WHERE jobname=%s", (jobname,)) if res and res[0][0]: # okay, jobname exists pass else: write_message("Sorry, job name %s is not known. Exiting." % jobname) return False return True
def update_rule_last_run(rule_name): """ Set the last time a rule was run to now. This function should be called after a rule has been ran. """ if task_has_option('record_ids') or task_get_option('no_upload', False) \ or task_get_option('no_tickets', False): return # We don't want to update the database in this case updated = run_sql("UPDATE bibcheck_rules SET last_run=%s WHERE name=%s;", (task_get_task_param('task_starting_time'), rule_name,)) if not updated: # rule not in the database, insert it run_sql("INSERT INTO bibcheck_rules(name, last_run) VALUES (%s, %s)", (rule_name, task_get_task_param('task_starting_time')))
def task_submit_check_options(): """Check that options are valid.""" if task_has_option('wjob'): jobnames = task_get_option('wjob') if jobnames: jobnames = jobnames.split(',') for jobname in jobnames: res = run_sql("SELECT COUNT(*) FROM expJOB WHERE jobname=%s", (jobname, )) if res and res[0][0]: # okay, jobname exists pass else: write_message("Sorry, job name %s is not known. Exiting." % jobname) return False return True
def task_submit_check_options(): """ NOTE: Depending on the parameters, either "BibSched mode" or plain straigh-forward execution mode is entered. """ if task_has_option("create_event_with_id"): print(webstat.create_customevent(task_get_option("create_event_with_id"), task_get_option("event_name", None), task_get_option("column_headers", []))) sys.exit(0) elif task_has_option("destroy_event_with_id"): print(webstat.destroy_customevent(task_get_option("destroy_event_with_id"))) sys.exit(0) elif task_has_option("list_events"): events = webstat._get_customevents() if len(events) == 0: print("There are no custom events available.") else: print("Available custom events are:\n") print('\n'.join([x[0] + ": " + ((x[1] == None) and "No descriptive name" or str(x[1])) for x in events])) sys.exit(0) elif task_has_option("cache_events"): events = task_get_option("cache_events") write_message(str(events), verbose=9) if events[0] == 'ALL': keyevents_to_cache = webstat.KEYEVENT_REPOSITORY.keys() customevents_to_cache = [x[0] for x in webstat._get_customevents()] elif events[0] == 'KEYEVENTS': keyevents_to_cache = webstat.KEYEVENT_REPOSITORY.keys() customevents_to_cache = [] elif events[0] == 'CUSTOMEVENTS': keyevents_to_cache = [] customevents_to_cache = [x[0] for x in webstat._get_customevents()] elif events[0] != '': keyevents_to_cache = [x for x in webstat.KEYEVENT_REPOSITORY.keys() if x in events] customevents_to_cache = [x[0] for x in webstat._get_customevents() if x in events] # Control so that we have valid event names if len(keyevents_to_cache + customevents_to_cache) == 0: # Oops, no events. Abort and display help. return False else: task_set_option("keyevents", keyevents_to_cache) task_set_option("customevents", customevents_to_cache) return True elif task_has_option("dump_config"): print("""\ [general] visitors_box = True search_box = True record_box = True bibsched_box = True basket_box = True apache_box = True uptime_box = True [webstat_custom_event_1] name = baskets param1 = action param2 = basket param3 = user [apache_log_analyzer] profile = nil nb-histogram-items-to-print = 20 exclude-ip-list = ("137.138.249.162") home-collection = "Atlantis Institute of Fictive Science" search-interface-url = "/?" detailed-record-url = "/%s/" search-engine-url = "/search?" search-engine-url-old-style = "/search.py?" basket-url = "/yourbaskets/" add-to-basket-url = "/yourbaskets/add" display-basket-url = "/yourbaskets/display" display-public-basket-url = "/yourbaskets/display_public" alert-url = "/youralerts/" display-your-alerts-url = "/youralerts/list" display-your-searches-url = "/youralerts/display" """ % CFG_SITE_RECORD) sys.exit(0) elif task_has_option("load_config"): from ConfigParser import ConfigParser conf = ConfigParser() conf.read(CFG_WEBSTAT_CONFIG_PATH) for section in conf.sections(): if section[:21] == "webstat_custom_event_": cols = [] name = "" for option, value in conf.items(section): if option == "name": name = value if option[:5] == "param": # add the column name in it's position index = int(option[-1]) - 1 while len(cols) <= index: cols.append("") cols[index] = value if name: res = run_sql("SELECT COUNT(id) FROM staEVENT WHERE id = %s", (name, )) if res[0][0] == 0: # name does not exist, create customevent webstat.create_customevent(name, name, cols) else: # name already exists, update customevent webstat.modify_customevent(name, cols=cols) sys.exit(0) else: # False means that the --help should be displayed return False
def bibreformat_task(fmt, recids, without_fmt, process): """BibReformat main task. @param fmt: output format to use @param process: @param recids: a list of record IDs to reformat @return: None """ write_message("Processing format %s" % fmt) t1 = os.times()[4] start_date = datetime.now().strftime("%Y-%m-%d %H:%M:%S") latest_bibrank_run = get_bibrankmethod_lastupdate('citation') def related_records(recids, recids_processed): if fmt == "HDREF" and recids: # HDREF represents the references tab # the tab needs to be recomputed not only when the record changes # but also when one of the citations changes sql = """SELECT id, modification_date FROM bibrec WHERE id in (%s)""" % ','.join(str(r) for r in recids) def check_date(mod_date): return mod_date.strftime( "%Y-%m-%d %H:%M:%S") < latest_bibrank_run rel_recids = intbitset([ recid for recid, mod_date in run_sql(sql) if check_date(mod_date) ]) for r in rel_recids: recids |= intbitset(get_cited_by(r)) # To not process recids twice recids -= recids_processed # Adds to the set of processed recids recids_processed += recids return recids def recid_chunker(recids): recids_processed = intbitset() chunk = intbitset() for recid in recids: if len(chunk) == 5000: for r in related_records(chunk, recids_processed): yield r recids_processed += chunk chunk = intbitset() if recid not in recids_processed: chunk.add(recid) if chunk: for r in related_records(chunk, recids_processed): yield r recIDs = list(recid_chunker(recids)) ### list of corresponding record IDs was retrieved ### now format the selected records if without_fmt: write_message("Records to be processed: %d" % len(recIDs)) write_message("Out of it records without existing cache: %d" % len(without_fmt)) else: write_message("Records to be processed: %d" % len(recIDs)) ### Initialize main loop total_rec = 0 # Total number of records tbibformat = 0 # time taken up by external call tbibupload = 0 # time taken up by external call ### Iterate over all records prepared in lists I (option) if process: total_rec_1, tbibformat_1, tbibupload_1 = iterate_over_new(recIDs, fmt) total_rec += total_rec_1 tbibformat += tbibformat_1 tbibupload += tbibupload_1 ### Store last run time if task_has_option("last"): write_message("storing run date to %s" % start_date) store_last_updated(fmt, start_date) ### Final statistics t2 = os.times()[4] elapsed = t2 - t1 message = "total records processed: %d" % total_rec write_message(message) message = "total processing time: %2f sec" % elapsed write_message(message) message = "Time spent on external call (os.system):" write_message(message) message = " bibformat: %2f sec" % tbibformat write_message(message) message = " bibupload: %2f sec" % tbibupload write_message(message)
def bibreformat_task(fmt, recids, without_fmt, process): """BibReformat main task. @param fmt: output format to use @param process: @param recids: a list of record IDs to reformat @return: None """ write_message("Processing format %s" % fmt) t1 = os.times()[4] start_date = datetime.now().strftime("%Y-%m-%d %H:%M:%S") latest_bibrank_run = get_bibrankmethod_lastupdate('citation') def related_records(recids, recids_processed): if fmt == "HDREF" and recids: # HDREF represents the references tab # the tab needs to be recomputed not only when the record changes # but also when one of the citations changes sql = """SELECT id, modification_date FROM bibrec WHERE id in (%s)""" % ','.join(str(r) for r in recids) def check_date(mod_date): return mod_date.strftime( "%Y-%m-%d %H:%M:%S") < latest_bibrank_run rel_recids = intbitset([recid for recid, mod_date in run_sql(sql) if check_date(mod_date)]) for r in rel_recids: recids |= intbitset(get_cited_by(r)) # To not process recids twice recids -= recids_processed # Adds to the set of processed recids recids_processed += recids return recids def recid_chunker(recids): recids_processed = intbitset() chunk = intbitset() for recid in recids: if len(chunk) == 5000: for r in related_records(chunk, recids_processed): yield r recids_processed += chunk chunk = intbitset() if recid not in recids_processed: chunk.add(recid) if chunk: for r in related_records(chunk, recids_processed): yield r recIDs = list(recid_chunker(recids)) ### list of corresponding record IDs was retrieved ### now format the selected records if without_fmt: write_message("Records to be processed: %d" % len(recIDs)) write_message("Out of it records without existing cache: %d" % len(without_fmt)) else: write_message("Records to be processed: %d" % len(recIDs)) ### Initialize main loop total_rec = 0 # Total number of records tbibformat = 0 # time taken up by external call tbibupload = 0 # time taken up by external call ### Iterate over all records prepared in lists I (option) if process: total_rec_1, tbibformat_1, tbibupload_1 = iterate_over_new(recIDs, fmt) total_rec += total_rec_1 tbibformat += tbibformat_1 tbibupload += tbibupload_1 ### Store last run time if task_has_option("last"): write_message("storing run date to %s" % start_date) store_last_updated(fmt, start_date) ### Final statistics t2 = os.times()[4] elapsed = t2 - t1 message = "total records processed: %d" % total_rec write_message(message) message = "total processing time: %2f sec" % elapsed write_message(message) message = "Time spent on external call (os.system):" write_message(message) message = " bibformat: %2f sec" % tbibformat write_message(message) message = " bibupload: %2f sec" % tbibupload write_message(message)
def task_submit_check_options(): """ NOTE: Depending on the parameters, either "BibSched mode" or plain straigh-forward execution mode is entered. """ if task_has_option("create_event_with_id"): print( webstat.create_customevent(task_get_option("create_event_with_id"), task_get_option("event_name", None), task_get_option("column_headers", []))) sys.exit(0) elif task_has_option("destroy_event_with_id"): print( webstat.destroy_customevent( task_get_option("destroy_event_with_id"))) sys.exit(0) elif task_has_option("list_events"): events = webstat._get_customevents() if len(events) == 0: print("There are no custom events available.") else: print("Available custom events are:\n") print('\n'.join([ x[0] + ": " + ((x[1] == None) and "No descriptive name" or str(x[1])) for x in events ])) sys.exit(0) elif task_has_option("cache_events"): events = task_get_option("cache_events") write_message(str(events), verbose=9) if events[0] == 'ALL': keyevents_to_cache = webstat.KEYEVENT_REPOSITORY.keys() customevents_to_cache = [x[0] for x in webstat._get_customevents()] elif events[0] == 'KEYEVENTS': keyevents_to_cache = webstat.KEYEVENT_REPOSITORY.keys() customevents_to_cache = [] elif events[0] == 'CUSTOMEVENTS': keyevents_to_cache = [] customevents_to_cache = [x[0] for x in webstat._get_customevents()] elif events[0] != '': keyevents_to_cache = [ x for x in webstat.KEYEVENT_REPOSITORY.keys() if x in events ] customevents_to_cache = [ x[0] for x in webstat._get_customevents() if x in events ] # Control so that we have valid event names if len(keyevents_to_cache + customevents_to_cache) == 0: # Oops, no events. Abort and display help. return False else: task_set_option("keyevents", keyevents_to_cache) task_set_option("customevents", customevents_to_cache) return True elif task_has_option("dump_config"): print("""\ [general] visitors_box = True search_box = True record_box = True bibsched_box = True basket_box = True apache_box = True uptime_box = True [webstat_custom_event_1] name = baskets param1 = action param2 = basket param3 = user [apache_log_analyzer] profile = nil nb-histogram-items-to-print = 20 exclude-ip-list = ("137.138.249.162") home-collection = "Atlantis Institute of Fictive Science" search-interface-url = "/?" detailed-record-url = "/%s/" search-engine-url = "/search?" search-engine-url-old-style = "/search.py?" basket-url = "/yourbaskets/" add-to-basket-url = "/yourbaskets/add" display-basket-url = "/yourbaskets/display" display-public-basket-url = "/yourbaskets/display_public" alert-url = "/youralerts/" display-your-alerts-url = "/youralerts/list" display-your-searches-url = "/youralerts/display" """ % CFG_SITE_RECORD) sys.exit(0) elif task_has_option("load_config"): from ConfigParser import ConfigParser conf = ConfigParser() conf.read(CFG_WEBSTAT_CONFIG_PATH) for section in conf.sections(): if section[:21] == "webstat_custom_event_": cols = [] name = "" for option, value in conf.items(section): if option == "name": name = value if option[:5] == "param": # add the column name in it's position index = int(option[-1]) - 1 while len(cols) <= index: cols.append("") cols[index] = value if name: res = run_sql( "SELECT COUNT(id) FROM staEVENT WHERE id = %s", (name, )) if res[0][0] == 0: # name does not exist, create customevent webstat.create_customevent(name, name, cols) else: # name already exists, update customevent webstat.modify_customevent(name, cols=cols) sys.exit(0) else: # False means that the --help should be displayed return False
def bibreformat_task(fmt, sql, sql_queries, cds_query, process_format, process, recids): """ BibReformat main task @param fmt: output format to use @param sql: dictionary with pre-created sql queries for various cases (for selecting records). Some of these queries will be picked depending on the case @param sql_queries: a list of sql queries to be executed to select records to reformat. @param cds_query: a search query to be executed to select records to reformat @param process_format: @param process: @param recids: a list of record IDs to reformat @return: None """ write_message("Processing format %s" % fmt) t1 = os.times()[4] start_date = datetime.now() ### Query the database ### task_update_progress('Fetching records to process') if process_format: # '-without' parameter write_message("Querying database for records without cache...") without_format = without_fmt(sql) recIDs = intbitset(recids) if cds_query['field'] != "" or \ cds_query['collection'] != "" or \ cds_query['pattern'] != "": write_message("Querying database (CDS query)...") if cds_query['collection'] == "": # use search_pattern() whenever possible, as it can search # even in private collections res = search_pattern(p=cds_query['pattern'], f=cds_query['field'], m=cds_query['matching']) else: # use perform_request_search when '-c' argument has been # defined, as it is not supported by search_pattern() res = intbitset(perform_request_search(req=None, of='id', c=cds_query['collection'], p=cds_query['pattern'], f=cds_query['field'])) recIDs |= res for sql_query in sql_queries: write_message("Querying database (%s) ..." % sql_query, verbose=2) recIDs |= intbitset(run_sql(sql_query)) if fmt == "HDREF" and recIDs: # HDREF represents the references tab # the tab needs to be recomputed not only when the record changes # but also when one of the citations changes latest_bibrank_run = get_bibrankmethod_lastupdate('citation') start_date = latest_bibrank_run sql = """SELECT id, modification_date FROM bibrec WHERE id in (%s)""" % ','.join(str(r) for r in recIDs) def check_date(mod_date): return mod_date < latest_bibrank_run recIDs = intbitset([recid for recid, mod_date in run_sql(sql) \ if check_date(mod_date)]) for r in recIDs: recIDs |= intbitset(get_cited_by(r)) ### list of corresponding record IDs was retrieved ### now format the selected records if process_format: write_message("Records to be processed: %d" % (len(recIDs) \ + len(without_format))) write_message("Out of it records without existing cache: %d" % len(without_format)) else: write_message("Records to be processed: %d" % (len(recIDs))) ### Initialize main loop total_rec = 0 # Total number of records tbibformat = 0 # time taken up by external call tbibupload = 0 # time taken up by external call ### Iterate over all records prepared in lists I (option) if process: if CFG_BIBFORMAT_USE_OLD_BIBFORMAT: # FIXME: remove this # when migration from php to # python bibformat is done (total_rec_1, tbibformat_1, tbibupload_1) = iterate_over_old(recIDs, fmt) else: (total_rec_1, tbibformat_1, tbibupload_1) = iterate_over_new(recIDs, fmt) total_rec += total_rec_1 tbibformat += tbibformat_1 tbibupload += tbibupload_1 ### Iterate over all records prepared in list II (no_format) if process_format and process: if CFG_BIBFORMAT_USE_OLD_BIBFORMAT: # FIXME: remove this # when migration from php to # python bibformat is done (total_rec_2, tbibformat_2, tbibupload_2) = iterate_over_old(without_format, fmt) else: (total_rec_2, tbibformat_2, tbibupload_2) = iterate_over_new(without_format, fmt) total_rec += total_rec_2 tbibformat += tbibformat_2 tbibupload += tbibupload_2 ### Store last run time if task_has_option("last"): write_message("storing run date to %s" % start_date) store_last_updated(fmt, start_date) ### Final statistics t2 = os.times()[4] elapsed = t2 - t1 message = "total records processed: %d" % total_rec write_message(message) message = "total processing time: %2f sec" % elapsed write_message(message) message = "Time spent on external call (os.system):" write_message(message) message = " bibformat: %2f sec" % tbibformat write_message(message) message = " bibupload: %2f sec" % tbibupload write_message(message)
def task_run_core(): """Runs the task by fetching arguments from the BibSched task queue. This is what BibSched will be invoking via daemon call.""" ## initialize parameters if task_get_option('format'): fmts = task_get_option('format') else: fmts = 'HB' # default value if no format option given for fmt in fmts.split(','): last_updated = fetch_last_updated(fmt) write_message("last stored run date is %s" % last_updated) sql = { "all" : """SELECT br.id FROM bibrec AS br, bibfmt AS bf WHERE bf.id_bibrec = br.id AND bf.format = '%s'""" % fmt, "last": """SELECT br.id FROM bibrec AS br INNER JOIN bibfmt AS bf ON bf.id_bibrec = br.id WHERE br.modification_date >= '%(last_updated)s' AND bf.format='%(format)s' AND bf.last_updated < br.modification_date""" \ % {'format': fmt, 'last_updated': last_updated.strftime('%Y-%m-%d %H:%M:%S')}, "missing" : """SELECT br.id FROM bibrec as br LEFT JOIN bibfmt as bf ON bf.id_bibrec = br.id AND bf.format ='%s' WHERE bf.id_bibrec IS NULL AND br.id BETWEEN %%s AND %%s """ % fmt, } sql_queries = [] cds_query = {} if task_has_option("all"): sql_queries.append(sql['all']) if task_has_option("last"): sql_queries.append(sql['last']) if task_has_option("collection"): cds_query['collection'] = task_get_option('collection') else: cds_query['collection'] = "" if task_has_option("field"): cds_query['field'] = task_get_option('field') else: cds_query['field'] = "" if task_has_option("pattern"): cds_query['pattern'] = task_get_option('pattern') else: cds_query['pattern'] = "" if task_has_option("matching"): cds_query['matching'] = task_get_option('matching') else: cds_query['matching'] = "" if task_has_option("recids"): recids = list(split_cli_ids_arg(task_get_option('recids'))) else: recids = [] ### sql commands to be executed during the script run ### bibreformat_task(fmt, sql, sql_queries, cds_query, task_has_option('without'), not task_has_option('noprocess'), recids) return True