def fill(): alldicts = {} from invenio.bibrank_tag_based_indexer import fromDB redis = get_redis() serialized_weights = redis.get('citations_weights') if serialized_weights: weights = deserialize_via_marshal(serialized_weights) else: weights = fromDB('citation') alldicts['citations_weights'] = weights # for cited:M->N queries, it is interesting to cache also # some preprocessed citationdict: alldicts['citations_keys'] = intbitset(weights.keys()) # Citation counts alldicts['citations_counts'] = [t for t in weights.iteritems()] alldicts['citations_counts'].sort(key=itemgetter(1), reverse=True) # Self-cites serialized_weights = redis.get('selfcites_weights') if serialized_weights: selfcites = deserialize_via_marshal(serialized_weights) else: selfcites = fromDB('selfcites') selfcites_weights = {} for recid, counts in alldicts['citations_counts']: selfcites_weights[recid] = counts - selfcites.get(recid, 0) alldicts['selfcites_weights'] = selfcites_weights alldicts['selfcites_counts'] = [(recid, selfcites_weights.get(recid, cites)) for recid, cites in alldicts['citations_counts']] alldicts['selfcites_counts'].sort(key=itemgetter(1), reverse=True) return alldicts
def process_updates(rank_method_code): """ This is what gets executed first when the task is started. It handles the --rebuild option. If that option is not specified we fall back to the process_one() """ write_message("Running rank method: %s" % rank_method_code, verbose=0) selfcites_config = read_configuration(rank_method_code) config = { 'algorithm': selfcites_config.get(rank_method_code, "algorithm"), 'friends_threshold': selfcites_config.get(rank_method_code, "friends_threshold") } quick = task_get_option("quick") != "no" if not quick: return rebuild_tables(rank_method_code, config) tags = get_authors_tags() recids, end_date = fetch_concerned_records(rank_method_code, task_get_option("id")) citations_fun = get_citations_fun(config['algorithm']) weights = fromDB(rank_method_code) write_message("recids %s" % str(recids)) total = len(recids) for count, recid in enumerate(recids): task_sleep_now_if_required(can_stop_too=True) msg = "Extracting for %s (%d/%d)" % (recid, count + 1, total) task_update_progress(msg) write_message(msg) process_one(recid, tags, citations_fun, weights) intoDB(weights, end_date, rank_method_code) store_weights_cache(weights) write_message("Complete") return True