def fill():
            alldicts = {}
            from invenio.legacy.bibrank.tag_based_indexer import fromDB
            serialized_weights = cache.get('citations_weights')
            if serialized_weights:
                weights = deserialize_via_marshal(serialized_weights)
            else:
                weights = fromDB('citation')

            alldicts['citations_weights'] = weights
            # for cited:M->N queries, it is interesting to cache also
            # some preprocessed citationdict:
            alldicts['citations_keys'] = intbitset(weights.keys())

            # Citation counts
            alldicts['citations_counts'] = [t for t in iteritems(weights)]
            alldicts['citations_counts'].sort(key=itemgetter(1), reverse=True)

            # Self-cites
            serialized_weights = cache.get('selfcites_weights')
            if serialized_weights:
                selfcites = deserialize_via_marshal(serialized_weights)
            else:
                selfcites = fromDB('selfcites')
            selfcites_weights = {}
            for recid, counts in alldicts['citations_counts']:
                selfcites_weights[recid] = counts - selfcites.get(recid, 0)
            alldicts['selfcites_weights'] = selfcites_weights
            alldicts['selfcites_counts'] = [(recid, selfcites_weights.get(recid, cites)) for recid, cites in alldicts['citations_counts']]
            alldicts['selfcites_counts'].sort(key=itemgetter(1), reverse=True)

            return alldicts
Beispiel #2
0
        def fill():
            alldicts = {}
            from invenio.legacy.bibrank.tag_based_indexer import fromDB
            redis = get_redis()
            serialized_weights = redis.get('citations_weights')
            if serialized_weights:
                weights = deserialize_via_marshal(serialized_weights)
            else:
                weights = fromDB('citation')

            alldicts['citations_weights'] = weights
            # for cited:M->N queries, it is interesting to cache also
            # some preprocessed citationdict:
            alldicts['citations_keys'] = intbitset(weights.keys())

            # Citation counts
            alldicts['citations_counts'] = [t for t in iteritems(weights)]
            alldicts['citations_counts'].sort(key=itemgetter(1), reverse=True)

            # Self-cites
            serialized_weights = redis.get('selfcites_weights')
            if serialized_weights:
                selfcites = deserialize_via_marshal(serialized_weights)
            else:
                selfcites = fromDB('selfcites')
            selfcites_weights = {}
            for recid, counts in alldicts['citations_counts']:
                selfcites_weights[recid] = counts - selfcites.get(recid, 0)
            alldicts['selfcites_weights'] = selfcites_weights
            alldicts['selfcites_counts'] = [(recid, selfcites_weights.get(recid, cites)) for recid, cites in alldicts['citations_counts']]
            alldicts['selfcites_counts'].sort(key=itemgetter(1), reverse=True)

            return alldicts
Beispiel #3
0
def process_updates(rank_method_code):
    """
    This is what gets executed first when the task is started.
    It handles the --rebuild option. If that option is not specified
    we fall back to the process_one()
    """
    write_message("Running rank method: %s" % rank_method_code, verbose=0)

    selfcites_config = read_configuration(rank_method_code)
    config = {
        'algorithm':
        selfcites_config.get(rank_method_code, "algorithm"),
        'friends_threshold':
        selfcites_config.get(rank_method_code, "friends_threshold")
    }
    quick = task_get_option("quick") != "no"
    if not quick:
        return rebuild_tables(rank_method_code, config)

    tags = get_authors_tags()
    recids, end_date = fetch_concerned_records(rank_method_code,
                                               task_get_option("id"))
    citations_fun = get_citations_fun(config['algorithm'])
    weights = fromDB(rank_method_code)

    write_message("recids %s" % str(recids))

    total = len(recids)
    for count, recid in enumerate(recids):
        task_sleep_now_if_required(can_stop_too=True)
        msg = "Extracting for %s (%d/%d)" % (recid, count + 1, total)
        task_update_progress(msg)
        write_message(msg)

        process_one(recid, tags, citations_fun, weights)

    intoDB(weights, end_date, rank_method_code)
    store_weights_cache(weights)

    write_message("Complete")
    return True
Beispiel #4
0
def process_updates(rank_method_code):
    """
    This is what gets executed first when the task is started.
    It handles the --rebuild option. If that option is not specified
    we fall back to the process_one()
    """
    write_message("Running rank method: %s" % rank_method_code, verbose=0)

    selfcites_config = read_configuration(rank_method_code)
    config = {
        'algorithm': selfcites_config.get(rank_method_code, "algorithm"),
        'friends_threshold': selfcites_config.get(rank_method_code, "friends_threshold")
    }
    quick = task_get_option("quick") != "no"
    if not quick:
        return rebuild_tables(rank_method_code, config)

    tags = get_authors_tags()
    recids, end_date = fetch_concerned_records(rank_method_code,
                                               task_get_option("id"))
    citations_fun = get_citations_fun(config['algorithm'])
    weights = fromDB(rank_method_code)

    write_message("recids %s" % str(recids))

    total = len(recids)
    for count, recid in enumerate(recids):
        task_sleep_now_if_required(can_stop_too=True)
        msg = "Extracting for %s (%d/%d)" % (recid, count + 1, total)
        task_update_progress(msg)
        write_message(msg)

        process_one(recid, tags, citations_fun, weights)

    intoDB(weights, end_date, rank_method_code)
    store_weights_cache(weights)

    write_message("Complete")
    return True