def __init__(self): def cache_filler(): alldicts = {} try: res = run_sql( "SELECT object_name,object_value FROM rnkCITATIONDATA") except OperationalError: # database problems, return empty cache return {} for row in res: object_name = row[0] object_value = row[1] try: object_value_dict = deserialize_via_marshal(object_value) except: object_value_dict = {} alldicts[object_name] = object_value_dict if object_name == 'citationdict': # for cited:M->N queries, it is interesting to cache also # some preprocessed citationdict: alldicts['citationdict_keys'] = object_value_dict.keys() alldicts['citationdict_keys_intbitset'] = intbitset( object_value_dict.keys()) return alldicts def timestamp_verifier(): res = run_sql( """SELECT DATE_FORMAT(last_updated, '%Y-%m-%d %H:%i:%s') FROM rnkMETHOD WHERE name='citation'""") if res: return res[0][0] else: return '0000-00-00 00:00:00' DataCacher.__init__(self, cache_filler, timestamp_verifier)
def __init__(self): def cache_filler(): alldicts = {} try: res = run_sql("SELECT object_name,object_value FROM rnkCITATIONDATA") except OperationalError: # database problems, return empty cache return {} for row in res: object_name = row[0] object_value = row[1] try: object_value_dict = deserialize_via_marshal(object_value) except: object_value_dict = {} alldicts[object_name] = object_value_dict if object_name == 'citationdict': # for cited:M->N queries, it is interesting to cache also # some preprocessed citationdict: alldicts['citationdict_keys'] = object_value_dict.keys() alldicts['citationdict_keys_intbitset'] = intbitset(object_value_dict.keys()) return alldicts def timestamp_verifier(): res = run_sql("""SELECT DATE_FORMAT(last_updated, '%Y-%m-%d %H:%i:%s') FROM rnkMETHOD WHERE name='citation'""") if res: return res[0][0] else: return '0000-00-00 00:00:00' DataCacher.__init__(self, cache_filler, timestamp_verifier)
def __init__(self): def cache_filler(): alldicts = {} try: res = run_sql("""SELECT object_name,object_value FROM rnkCITATIONDATA""") except OperationalError: # database problems, return empty cache return {} for row in res: object_name = row[0] object_value = row[1] try: object_value_dict = marshal.loads(decompress(object_value)) except: object_value_dict = {} alldicts[object_name] = object_value_dict if object_name == "citationdict": # for cited:M->N queries, it is interesting to cache also # some preprocessed citationdict: alldicts["citationdict_keys"] = object_value_dict.keys() alldicts["citationdict_keys_intbitset"] = intbitset(object_value_dict.keys()) return alldicts def timestamp_verifier(): return get_table_update_time("rnkCITATIONDATA") DataCacher.__init__(self, cache_filler, timestamp_verifier)
def __init__(self): @gcfix def fill(): alldicts = {} from invenio.bibrank_tag_based_indexer import fromDB redis = get_redis() serialized_weights = redis.get('citations_weights') if serialized_weights: weights = deserialize_via_marshal(serialized_weights) else: weights = fromDB('citation') alldicts['citations_weights'] = weights # for cited:M->N queries, it is interesting to cache also # some preprocessed citationdict: alldicts['citations_keys'] = intbitset(weights.keys()) # Citation counts alldicts['citations_counts'] = [t for t in weights.iteritems()] alldicts['citations_counts'].sort(key=itemgetter(1), reverse=True) # Self-cites serialized_weights = redis.get('selfcites_weights') if serialized_weights: selfcites = deserialize_via_marshal(serialized_weights) else: selfcites = fromDB('selfcites') selfcites_weights = {} for recid, counts in alldicts['citations_counts']: selfcites_weights[recid] = counts - selfcites.get(recid, 0) alldicts['selfcites_weights'] = selfcites_weights alldicts['selfcites_counts'] = [ (recid, selfcites_weights.get(recid, cites)) for recid, cites in alldicts['citations_counts'] ] alldicts['selfcites_counts'].sort(key=itemgetter(1), reverse=True) return alldicts def cache_filler(): self.cache = None # misfire from pylint: disable=W0201 # this is really defined in DataCacher return fill() from invenio.bibrank_tag_based_indexer import get_lastupdated def timestamp_verifier(): citation_lastupdate = get_lastupdated('citation') if citation_lastupdate: return citation_lastupdate.strftime("%Y-%m-%d %H:%M:%S") else: return "0000-00-00 00:00:00" DataCacher.__init__(self, cache_filler, timestamp_verifier)
def __init__(self): def fill(): alldicts = {} from invenio.bibrank_tag_based_indexer import fromDB redis = get_redis() serialized_weights = redis.get('citations_weights') if serialized_weights: weights = deserialize_via_marshal(serialized_weights) else: weights = fromDB('citation') alldicts['citations_weights'] = weights # for cited:M->N queries, it is interesting to cache also # some preprocessed citationdict: alldicts['citations_keys'] = intbitset(weights.keys()) # Citation counts alldicts['citations_counts'] = [t for t in weights.iteritems()] alldicts['citations_counts'].sort(key=itemgetter(1), reverse=True) # Self-cites serialized_weights = redis.get('selfcites_weights') if serialized_weights: selfcites = deserialize_via_marshal(serialized_weights) else: selfcites = fromDB('selfcites') selfcites_weights = {} for recid, counts in alldicts['citations_counts']: selfcites_weights[recid] = counts - selfcites.get(recid, 0) alldicts['selfcites_weights'] = selfcites_weights alldicts['selfcites_counts'] = [(recid, selfcites_weights.get(recid, cites)) for recid, cites in alldicts['citations_counts']] alldicts['selfcites_counts'].sort(key=itemgetter(1), reverse=True) return alldicts def cache_filler(): self.cache = None # misfire from pylint: disable=W0201 # this is really defined in DataCacher return fill() from invenio.bibrank_tag_based_indexer import get_lastupdated def timestamp_verifier(): citation_lastupdate = get_lastupdated('citation') if citation_lastupdate: return citation_lastupdate.strftime("%Y-%m-%d %H:%M:%S") else: return "0000-00-00 00:00:00" DataCacher.__init__(self, cache_filler, timestamp_verifier)
def __init__(self): def initial_fill(): alldicts = {} from invenio.bibrank_tag_based_indexer import fromDB weights = fromDB('citation') alldicts['citations_weights'] = weights # for cited:M->N queries, it is interesting to cache also # some preprocessed citationdict: alldicts['citations_keys'] = intbitset(weights.keys()) # Citation counts alldicts['citations_counts'] = [t for t in weights.iteritems()] alldicts['citations_counts'].sort(key=itemgetter(1), reverse=True) # Self-cites selfcites = fromDB('selfcites') selfcites_weights = {} for recid, counts in alldicts['citations_counts']: selfcites_weights[recid] = counts - selfcites.get(recid, 0) alldicts['selfcites_weights'] = selfcites_weights alldicts['selfcites_counts'] = [(recid, selfcites_weights.get(recid, cites)) for recid, cites in alldicts['citations_counts']] alldicts['selfcites_counts'].sort(key=itemgetter(1), reverse=True) return alldicts def incremental_fill(): self.cache = None return initial_fill() def cache_filler(): if self.cache: cache = incremental_fill() else: cache = initial_fill() return cache from invenio.bibrank_tag_based_indexer import get_lastupdated def timestamp_verifier(): citation_lastupdate = get_lastupdated('citation') if citation_lastupdate: return citation_lastupdate.strftime("%Y-%m-%d %H:%M:%S") else: return "0000-00-00 00:00:00" DataCacher.__init__(self, cache_filler, timestamp_verifier)