def test_database_tuples_to_single_list(self): """bibrank downloads indexer - database tuples to list.""" from invenio.legacy.bibrank import \ downloads_indexer as bibrank_downloads_indexer self.assertEqual( [1, 2, 3], bibrank_downloads_indexer.database_tuples_to_single_list( ((1,), (2,), (3,))))
def calculate_reading_similarity_list(recid, type="pageviews"): """Calculate reading similarity data to use in reading similarity boxes (``people who downloaded/viewed this file/page have also downloaded/viewed''). Return list of (recid1, score1), (recid2,score2), ... for all recidN that were consulted by the same people who have also consulted RECID. The reading similarity TYPE can be either `pageviews' or `downloads', depending whether we want to obtain page view similarity or download similarity. """ if CFG_CERN_SITE: return [] # CERN hack 2009-11-23 to ease the load if type == "downloads": tablename = "rnkDOWNLOADS" else: # default tablename = "rnkPAGEVIEWS" # firstly compute the set of client hosts who consulted recid: client_host_list = run_sql("SELECT DISTINCT(client_host)" + \ " FROM " + tablename + \ " WHERE id_bibrec=%s " + \ " AND client_host IS NOT NULL", (recid,)) # secondly look up all recids that were consulted by these client hosts, # and order them by the number of different client hosts reading them: res = [] if client_host_list != (): client_host_list = str( database_tuples_to_single_list(client_host_list)) client_host_list = client_host_list.replace("L", "") client_host_list = client_host_list.replace("[", "") client_host_list = client_host_list.replace("]", "") res = run_sql("SELECT id_bibrec,COUNT(DISTINCT(client_host)) AS c" \ " FROM " + tablename + \ " WHERE client_host IN (" + client_host_list + ")" + \ " AND id_bibrec != %s" \ " GROUP BY id_bibrec ORDER BY c DESC LIMIT 10", (recid,)) return res
def calculate_reading_similarity_list(recid, type="pageviews"): """Calculate reading similarity data to use in reading similarity boxes (``people who downloaded/viewed this file/page have also downloaded/viewed''). Return list of (recid1, score1), (recid2,score2), ... for all recidN that were consulted by the same people who have also consulted RECID. The reading similarity TYPE can be either `pageviews' or `downloads', depending whether we want to obtain page view similarity or download similarity. """ if CFG_CERN_SITE: return [] # CERN hack 2009-11-23 to ease the load if type == "downloads": tablename = "rnkDOWNLOADS" else: # default tablename = "rnkPAGEVIEWS" # firstly compute the set of client hosts who consulted recid: client_host_list = run_sql("SELECT DISTINCT(client_host)" + \ " FROM " + tablename + \ " WHERE id_bibrec=%s " + \ " AND client_host IS NOT NULL", (recid,)) # secondly look up all recids that were consulted by these client hosts, # and order them by the number of different client hosts reading them: res = [] if client_host_list != (): client_host_list = str(database_tuples_to_single_list(client_host_list)) client_host_list = client_host_list.replace("L", "") client_host_list = client_host_list.replace("[", "") client_host_list = client_host_list.replace("]", "") res = run_sql("SELECT id_bibrec,COUNT(DISTINCT(client_host)) AS c" \ " FROM " + tablename + \ " WHERE client_host IN (" + client_host_list + ")" + \ " AND id_bibrec != %s" \ " GROUP BY id_bibrec ORDER BY c DESC LIMIT 10", (recid,)) return res
def create_download_history_graph_and_box(id_bibrec, ln=CFG_SITE_LANG): """Create graph with citation history for record ID_BIBREC (into a temporary file) and return HTML box refering to that image. Called by Detailed record pages. Notes: if id_bibdoc=0 : its an oustide-stored document and it has no id_bibdoc --> only one line if len(id_bibdocs) <= cfg_id_bibdoc_id_bibrec draw one line per id_bibdoc if len(id_bibdocs) > cfg_id_bibdoc_id_bibrec draw only one line which hold simultaneously the downloads for all id_bibdoc Each time this function is called, all the images older than 10 minutes are deleted. """ _ = gettext_set_language(ln) out = "" # Prepare downloads history graph: if CFG_BIBRANK_SHOW_DOWNLOAD_GRAPHS: html_content = "" # remove images older than 10 minutes remove_old_img("download") # download count graph id_bibdocs = intbitset( run_sql( "select distinct id_bibdoc from rnkDOWNLOADS where id_bibrec=%s", (id_bibrec, ))) id_existing_bibdocs = intbitset( run_sql( "SELECT id_bibdoc FROM bibrec_bibdoc JOIN bibdoc ON id_bibdoc=id WHERE id_bibrec=%s AND status<>'DELETED'", (id_bibrec, ))) ## FIXME: when bibdocs are deleted we loose the stats. What shall we do with them? id_bibdocs &= id_existing_bibdocs history_analysis_results = () if not id_bibdocs: pass elif len( id_bibdocs) <= CFG_ID_BIBDOC_ID_BIBREC and 0 not in id_bibdocs: history_analysis_results = draw_downloads_statistics( id_bibrec, list(id_bibdocs)) else: history_analysis_results = draw_downloads_statistics(id_bibrec, []) if history_analysis_results and history_analysis_results[0]: graph_path = history_analysis_results[0][ history_analysis_results[0].rfind('/') + 1:] if CFG_BIBRANK_SHOW_DOWNLOAD_GRAPHS == 2: graph_file_history = CFG_WEBDIR + "/img/" + graph_path html_content += """<tr><td valign=center align=center>%s</td>""" % open( graph_file_history).read() else: # gnuplot graph_file_history = CFG_SITE_URL + "/img/" + graph_path html_content += """<tr><td valign=center align=center><img src='%s'/></td>""" % graph_file_history file_to_close_history = history_analysis_results[1] if file_to_close_history: if os.path.exists(file_to_close_history): os.unlink(file_to_close_history) if html_content != "": out += """<table border="0" cellspacing="1" cellpadding="1">""" out += html_content + "</table>" if CFG_BIBRANK_SHOW_DOWNLOAD_GRAPHS_CLIENT_IP_DISTRIBUTION: # do we show also user IP repartition? html_content = "" remove_old_img("download") #Users analysis graph ips = database_tuples_to_single_list( run_sql( "select client_host from rnkDOWNLOADS where id_bibrec=%s;" % id_bibrec)) if ips: users_analysis_results = create_users_analysis_graph( id_bibrec, ips) if users_analysis_results[0]: file_to_close_users = users_analysis_results[1] if CFG_BIBRANK_SHOW_DOWNLOAD_GRAPHS_CLIENT_IP_DISTRIBUTION == 1: html_content += """<tr><td valign=center align=center><img src='%s/img/%s' align="center" alt=""></td>""" % ( CFG_SITE_URL, users_analysis_results[0]) elif CFG_BIBRANK_SHOW_DOWNLOAD_GRAPHS_CLIENT_IP_DISTRIBUTION == 2: html_content += """<tr><td valign=center align=center>%s</td>""" % open( CFG_WEBDIR + "/img/" + users_analysis_results[0]).read() if file_to_close_users: if os.path.exists(file_to_close_users): os.unlink(file_to_close_users) if html_content != "": out += """<br/><br/><table><tr><td class="blocknote"> %s</td></tr><tr><td> <table border="0" cellspacing="1" cellpadding="1">""" % _( "Download user distribution:") out += html_content out += "</table></td></tr></table>" # return html code used by get_file or search_engine return out
def create_download_history_graph_and_box(id_bibrec, ln=CFG_SITE_LANG): """Create graph with citation history for record ID_BIBREC (into a temporary file) and return HTML box refering to that image. Called by Detailed record pages. Notes: if id_bibdoc=0 : its an oustide-stored document and it has no id_bibdoc --> only one line if len(id_bibdocs) <= cfg_id_bibdoc_id_bibrec draw one line per id_bibdoc if len(id_bibdocs) > cfg_id_bibdoc_id_bibrec draw only one line which hold simultaneously the downloads for all id_bibdoc Each time this function is called, all the images older than 10 minutes are deleted. """ _ = gettext_set_language(ln) out = "" # Prepare downloads history graph: if CFG_BIBRANK_SHOW_DOWNLOAD_GRAPHS: html_content = "" # remove images older than 10 minutes remove_old_img("download") # download count graph id_bibdocs = intbitset(run_sql("""select distinct id_bibdoc from "rnkDOWNLOADS" where id_bibrec=%s""", (id_bibrec, ))) id_existing_bibdocs = intbitset(run_sql("SELECT id_bibdoc FROM bibrec_bibdoc JOIN bibdoc ON id_bibdoc=id WHERE id_bibrec=%s AND status<>'DELETED'", (id_bibrec, ))) ## FIXME: when bibdocs are deleted we loose the stats. What shall we do with them? id_bibdocs &= id_existing_bibdocs history_analysis_results = () if not id_bibdocs: pass elif len(id_bibdocs) <= CFG_ID_BIBDOC_ID_BIBREC and 0 not in id_bibdocs: history_analysis_results = draw_downloads_statistics(id_bibrec, list(id_bibdocs)) else: history_analysis_results = draw_downloads_statistics(id_bibrec, []) if history_analysis_results and history_analysis_results[0]: graph_path = history_analysis_results[0][history_analysis_results[0].rfind('/')+1:] if CFG_BIBRANK_SHOW_DOWNLOAD_GRAPHS == 2: graph_file_history = CFG_WEBDIR + "/img/" + graph_path html_content += """<tr><td valign=center align=center>%s</td>""" % open(graph_file_history).read() else: # gnuplot graph_file_history = CFG_SITE_URL + "/img/" + graph_path html_content += """<tr><td valign=center align=center><img src='%s'/></td>""" % graph_file_history file_to_close_history = history_analysis_results[1] if file_to_close_history : if os.path.exists(file_to_close_history): os.unlink(file_to_close_history) if html_content != "": out += """<table border="0" cellspacing="1" cellpadding="1">""" out += html_content + "</table>" if CFG_BIBRANK_SHOW_DOWNLOAD_GRAPHS_CLIENT_IP_DISTRIBUTION: # do we show also user IP repartition? html_content = "" remove_old_img("download") #Users analysis graph ips = database_tuples_to_single_list(run_sql("""select client_host from "rnkDOWNLOADS" where id_bibrec=%s;""" % id_bibrec)) if ips: users_analysis_results = create_users_analysis_graph(id_bibrec, ips) if users_analysis_results[0]: file_to_close_users = users_analysis_results[1] if CFG_BIBRANK_SHOW_DOWNLOAD_GRAPHS_CLIENT_IP_DISTRIBUTION == 1: html_content += """<tr><td valign=center align=center><img src='%s/img/%s' align="center" alt=""></td>""" % (CFG_SITE_URL, users_analysis_results[0]) elif CFG_BIBRANK_SHOW_DOWNLOAD_GRAPHS_CLIENT_IP_DISTRIBUTION == 2: html_content += """<tr><td valign=center align=center>%s</td>""" % open(CFG_WEBDIR + "/img/" + users_analysis_results[0]).read() if file_to_close_users: if os.path.exists(file_to_close_users): os.unlink(file_to_close_users) if html_content != "": out += """<br/><br/><table><tr><td class="blocknote"> %s</td></tr><tr><td> <table border="0" cellspacing="1" cellpadding="1">""" % _("Download user distribution:") out += html_content out += "</table></td></tr></table>" # return html code used by get_file or search_engine return out
def test_database_tuples_to_single_list(self): """bibrank downloads indexer - database tuples to list""" self.assertEqual([1, 2, 3], bibrank_downloads_indexer.database_tuples_to_single_list(((1,), (2,), (3,))))
def test_database_tuples_to_single_list(self): """bibrank downloads indexer - database tuples to list""" self.assertEqual( [1, 2, 3], bibrank_downloads_indexer.database_tuples_to_single_list( ((1, ), (2, ), (3, ))))