def process_one(recid, tags, citations_fun): """Self-cites core func, executed on each recid""" # First update this record then all its references compute_and_store_self_citations(recid, tags, citations_fun) references = get_refers_to(recid) for recordid in references: compute_and_store_self_citations(recordid, tags, citations_fun)
def process_one(recid, tags, citations_fun, selfcites_dic): """Self-cites core func, executed on each recid""" # First update this record then all its references compute_and_store_self_citations(recid, tags, citations_fun, selfcites_dic) references = get_refers_to(recid) for recordid in references: compute_and_store_self_citations(recordid, tags, citations_fun, selfcites_dic)
def compute_and_store_self_citations(recid, tags, citations_fun, selfcites_dic, verbose=False): """Compute and store self-cites in a table Args: - recid - tags: used when bibauthorid is deactivated see get_author_tags() in bibrank_selfcites_indexer """ assert recid if verbose: write_message("* processing %s" % recid) references = get_refers_to(recid) recids_to_check = set([recid]) | set(references) placeholders = ','.join('%s' for r in recids_to_check) rec_row = run_sql( "SELECT MAX(`modification_date`) FROM `bibrec`" " WHERE `id` IN (%s)" % placeholders, recids_to_check) try: rec_timestamp = rec_row[0] except IndexError: write_message("record not found") return cached_citations_row = run_sql( "SELECT `count` FROM `rnkSELFCITES`" " WHERE `last_updated` >= %s" " AND `id_bibrec` = %s", (rec_timestamp[0], recid)) if cached_citations_row and cached_citations_row[0][0]: if verbose: write_message("%s found (cached)" % cached_citations_row[0]) else: cites = citations_fun(recid, tags) selfcites_dic[recid] = len(cites) replace_cites(recid, cites) sql = """REPLACE INTO rnkSELFCITES (`id_bibrec`, `count`, `references`, `last_updated`) VALUES (%s, %s, %s, NOW())""" references_string = ','.join(str(r) for r in references) run_sql(sql, (recid, len(cites), references_string)) if verbose: write_message("%s found" % len(cites))
def compute_and_store_self_citations(recid, tags, citations_fun, selfcites_dic, verbose=False): """Compute and store self-cites in a table Args: - recid - tags: used when bibauthorid is deactivated see get_author_tags() in bibrank_selfcites_indexer """ assert recid if verbose: write_message("* processing %s" % recid) references = get_refers_to(recid) recids_to_check = set([recid]) | set(references) placeholders = ','.join('%s' for r in recids_to_check) rec_row = run_sql("SELECT MAX(`modification_date`) FROM `bibrec`" " WHERE `id` IN (%s)" % placeholders, recids_to_check) try: rec_timestamp = rec_row[0] except IndexError: write_message("record not found") return cached_citations_row = run_sql("SELECT `count` FROM `rnkSELFCITES`" " WHERE `last_updated` >= %s" " AND `id_bibrec` = %s", (rec_timestamp[0], recid)) if cached_citations_row and cached_citations_row[0][0]: if verbose: write_message("%s found (cached)" % cached_citations_row[0]) else: cites = citations_fun(recid, tags) selfcites_dic[recid] = len(cites) replace_cites(recid, cites) sql = """REPLACE INTO rnkSELFCITES (`id_bibrec`, `count`, `references`, `last_updated`) VALUES (%s, %s, %s, NOW())""" references_string = ','.join(str(r) for r in references) run_sql(sql, (recid, len(cites), references_string)) if verbose: write_message("%s found" % len(cites))