def process_updates(rank_method_code): """ This is what gets executed first when the task is started. It handles the --rebuild option. If that option is not specified we fall back to the process_one() """ write_message("Running rank method: %s" % rank_method_code, verbose=0) selfcites_config = read_configuration(rank_method_code) config = { 'algorithm': selfcites_config.get(rank_method_code, "algorithm"), 'friends_threshold': selfcites_config.get(rank_method_code, "friends_threshold") } quick = task_get_option("quick") != "no" if not quick: return rebuild_tables(rank_method_code, config) tags = get_authors_tags() recids, end_date = fetch_concerned_records(rank_method_code, task_get_option("id")) citations_fun = get_citations_fun(config['algorithm']) weights = fromDB(rank_method_code) write_message("recids %s" % str(recids)) total = len(recids) for count, recid in enumerate(recids): task_sleep_now_if_required(can_stop_too=True) msg = "Extracting for %s (%d/%d)" % (recid, count + 1, total) task_update_progress(msg) write_message(msg) process_one(recid, tags, citations_fun, weights) intoDB(weights, end_date, rank_method_code) store_weights_cache(weights) write_message("Complete") return True
def fill_self_cites_tables(rank_method_code, config): """ This will fill the self-cites tables with data The purpose of this function is to fill these tables on a website that never ran the self-cites daemon This is an optimization when running on empty tables, and we hope the result is the same as the compute_and_store_self_citations. """ begin_date = datetime.now().strftime("%Y-%m-%d %H:%M:%S") algorithm = config['algorithm'] tags = get_authors_tags() selfcites_dic = {} all_ids = intbitset(run_sql('SELECT id FROM bibrec ORDER BY id')) citations_fun = get_citations_fun(algorithm) write_message('using %s' % citations_fun.__name__) if algorithm == 'friends': # We only needs this table for the friends algorithm or assimilated # Fill intermediary tables for index, recid in enumerate(all_ids): if index % 1000 == 0: msg = 'intermediate %d/%d' % (index, len(all_ids)) task_update_progress(msg) write_message(msg) task_sleep_now_if_required() update_self_cites_tables(recid, config, tags) # Fill self-cites table for index, recid in enumerate(all_ids): if index % 1000 == 0: msg = 'final %d/%d' % (index, len(all_ids)) task_update_progress(msg) write_message(msg) task_sleep_now_if_required() compute_and_store_self_citations(recid, tags, citations_fun, selfcites_dic) intoDB(selfcites_dic, begin_date, rank_method_code) store_weights_cache(selfcites_dic)