Esempio n. 1
0
 def test_database_tuples_to_single_list(self):
     """bibrank downloads indexer - database tuples to list."""
     from invenio.legacy.bibrank import \
         downloads_indexer as bibrank_downloads_indexer
     self.assertEqual(
         [1, 2, 3],
         bibrank_downloads_indexer.database_tuples_to_single_list(
             ((1,), (2,), (3,))))
Esempio n. 2
0
def calculate_reading_similarity_list(recid, type="pageviews"):
    """Calculate reading similarity data to use in reading similarity
       boxes (``people who downloaded/viewed this file/page have also
       downloaded/viewed'').  Return list of (recid1, score1),
       (recid2,score2), ... for all recidN that were consulted by the
       same people who have also consulted RECID.  The reading
       similarity TYPE can be either `pageviews' or `downloads',
       depending whether we want to obtain page view similarity or
       download similarity.
    """
    if CFG_CERN_SITE:
        return []  # CERN hack 2009-11-23 to ease the load
    if type == "downloads":
        tablename = "rnkDOWNLOADS"
    else:  # default
        tablename = "rnkPAGEVIEWS"
    # firstly compute the set of client hosts who consulted recid:
    client_host_list = run_sql("SELECT DISTINCT(client_host)" + \
                               "  FROM " + tablename + \
                               " WHERE id_bibrec=%s " + \
                               "   AND client_host IS NOT NULL",
                               (recid,))
    # secondly look up all recids that were consulted by these client hosts,
    # and order them by the number of different client hosts reading them:
    res = []
    if client_host_list != ():
        client_host_list = str(
            database_tuples_to_single_list(client_host_list))
        client_host_list = client_host_list.replace("L", "")
        client_host_list = client_host_list.replace("[", "")
        client_host_list = client_host_list.replace("]", "")
        res = run_sql("SELECT id_bibrec,COUNT(DISTINCT(client_host)) AS c" \
                      "  FROM " + tablename + \
                      " WHERE client_host IN (" + client_host_list + ")" + \
                      "   AND id_bibrec != %s" \
                      " GROUP BY id_bibrec ORDER BY c DESC LIMIT 10",
                      (recid,))
    return res
Esempio n. 3
0
def calculate_reading_similarity_list(recid, type="pageviews"):
    """Calculate reading similarity data to use in reading similarity
       boxes (``people who downloaded/viewed this file/page have also
       downloaded/viewed'').  Return list of (recid1, score1),
       (recid2,score2), ... for all recidN that were consulted by the
       same people who have also consulted RECID.  The reading
       similarity TYPE can be either `pageviews' or `downloads',
       depending whether we want to obtain page view similarity or
       download similarity.
    """
    if CFG_CERN_SITE:
        return [] # CERN hack 2009-11-23 to ease the load
    if type == "downloads":
        tablename = "rnkDOWNLOADS"
    else: # default
        tablename = "rnkPAGEVIEWS"
    # firstly compute the set of client hosts who consulted recid:
    client_host_list = run_sql("SELECT DISTINCT(client_host)" + \
                               "  FROM " + tablename + \
                               " WHERE id_bibrec=%s " + \
                               "   AND client_host IS NOT NULL",
                               (recid,))
    # secondly look up all recids that were consulted by these client hosts,
    # and order them by the number of different client hosts reading them:
    res = []
    if client_host_list != ():
        client_host_list = str(database_tuples_to_single_list(client_host_list))
        client_host_list = client_host_list.replace("L", "")
        client_host_list = client_host_list.replace("[", "")
        client_host_list = client_host_list.replace("]", "")
        res = run_sql("SELECT id_bibrec,COUNT(DISTINCT(client_host)) AS c" \
                      "  FROM " + tablename + \
                      " WHERE client_host IN (" + client_host_list + ")" + \
                      "   AND id_bibrec != %s" \
                      " GROUP BY id_bibrec ORDER BY c DESC LIMIT 10",
                      (recid,))
    return res
Esempio n. 4
0
def create_download_history_graph_and_box(id_bibrec, ln=CFG_SITE_LANG):
    """Create graph with citation history for record ID_BIBREC (into a
       temporary file) and return HTML box refering to that image.
       Called by Detailed record pages.
       Notes:
        if id_bibdoc=0 : its an oustide-stored document and it has no id_bibdoc --> only one line
        if len(id_bibdocs) <= cfg_id_bibdoc_id_bibrec draw one line per id_bibdoc
        if len(id_bibdocs) > cfg_id_bibdoc_id_bibrec draw only one line which hold simultaneously the downloads for all id_bibdoc
        Each time this function is called, all the images older than 10 minutes are deleted.
    """
    _ = gettext_set_language(ln)

    out = ""

    # Prepare downloads history graph:
    if CFG_BIBRANK_SHOW_DOWNLOAD_GRAPHS:
        html_content = ""
        # remove images older than 10 minutes
        remove_old_img("download")
        # download count graph
        id_bibdocs = intbitset(
            run_sql(
                "select distinct id_bibdoc from rnkDOWNLOADS where id_bibrec=%s",
                (id_bibrec, )))

        id_existing_bibdocs = intbitset(
            run_sql(
                "SELECT id_bibdoc FROM bibrec_bibdoc JOIN bibdoc ON id_bibdoc=id WHERE id_bibrec=%s AND status<>'DELETED'",
                (id_bibrec, )))

        ## FIXME: when bibdocs are deleted we loose the stats. What shall we do with them?
        id_bibdocs &= id_existing_bibdocs

        history_analysis_results = ()
        if not id_bibdocs:
            pass
        elif len(
                id_bibdocs) <= CFG_ID_BIBDOC_ID_BIBREC and 0 not in id_bibdocs:
            history_analysis_results = draw_downloads_statistics(
                id_bibrec, list(id_bibdocs))
        else:
            history_analysis_results = draw_downloads_statistics(id_bibrec, [])
        if history_analysis_results and history_analysis_results[0]:
            graph_path = history_analysis_results[0][
                history_analysis_results[0].rfind('/') + 1:]
            if CFG_BIBRANK_SHOW_DOWNLOAD_GRAPHS == 2:
                graph_file_history = CFG_WEBDIR + "/img/" + graph_path
                html_content += """<tr><td valign=center align=center>%s</td>""" % open(
                    graph_file_history).read()
            else:  # gnuplot
                graph_file_history = CFG_SITE_URL + "/img/" + graph_path
                html_content += """<tr><td valign=center align=center><img src='%s'/></td>""" % graph_file_history
            file_to_close_history = history_analysis_results[1]
            if file_to_close_history:
                if os.path.exists(file_to_close_history):
                    os.unlink(file_to_close_history)
        if html_content != "":
            out += """<table border="0" cellspacing="1" cellpadding="1">"""
            out += html_content + "</table>"

    if CFG_BIBRANK_SHOW_DOWNLOAD_GRAPHS_CLIENT_IP_DISTRIBUTION:
        # do we show also user IP repartition?
        html_content = ""
        remove_old_img("download")
        #Users analysis graph
        ips = database_tuples_to_single_list(
            run_sql(
                "select client_host from rnkDOWNLOADS where id_bibrec=%s;" %
                id_bibrec))
        if ips:
            users_analysis_results = create_users_analysis_graph(
                id_bibrec, ips)
            if users_analysis_results[0]:
                file_to_close_users = users_analysis_results[1]
                if CFG_BIBRANK_SHOW_DOWNLOAD_GRAPHS_CLIENT_IP_DISTRIBUTION == 1:
                    html_content += """<tr><td valign=center align=center><img src='%s/img/%s' align="center" alt=""></td>""" % (
                        CFG_SITE_URL, users_analysis_results[0])
                elif CFG_BIBRANK_SHOW_DOWNLOAD_GRAPHS_CLIENT_IP_DISTRIBUTION == 2:
                    html_content += """<tr><td valign=center align=center>%s</td>""" % open(
                        CFG_WEBDIR + "/img/" +
                        users_analysis_results[0]).read()
                if file_to_close_users:
                    if os.path.exists(file_to_close_users):
                        os.unlink(file_to_close_users)
        if html_content != "":
            out += """<br/><br/><table><tr><td class="blocknote">
                      %s</td></tr><tr><td>
                      <table border="0" cellspacing="1" cellpadding="1">""" % _(
                "Download user distribution:")
            out += html_content
            out += "</table></td></tr></table>"

    # return html code used by get_file or search_engine
    return out
Esempio n. 5
0
def create_download_history_graph_and_box(id_bibrec, ln=CFG_SITE_LANG):
    """Create graph with citation history for record ID_BIBREC (into a
       temporary file) and return HTML box refering to that image.
       Called by Detailed record pages.
       Notes:
        if id_bibdoc=0 : its an oustide-stored document and it has no id_bibdoc --> only one line
        if len(id_bibdocs) <= cfg_id_bibdoc_id_bibrec draw one line per id_bibdoc
        if len(id_bibdocs) > cfg_id_bibdoc_id_bibrec draw only one line which hold simultaneously the downloads for all id_bibdoc
        Each time this function is called, all the images older than 10 minutes are deleted.
    """
    _ = gettext_set_language(ln)

    out = ""

    # Prepare downloads history graph:
    if CFG_BIBRANK_SHOW_DOWNLOAD_GRAPHS:
        html_content = ""
        # remove images older than 10 minutes
        remove_old_img("download")
        # download count graph
        id_bibdocs = intbitset(run_sql("""select distinct id_bibdoc from "rnkDOWNLOADS" where id_bibrec=%s""", (id_bibrec, )))

        id_existing_bibdocs = intbitset(run_sql("SELECT id_bibdoc FROM bibrec_bibdoc JOIN bibdoc ON id_bibdoc=id WHERE id_bibrec=%s AND status<>'DELETED'", (id_bibrec, )))

        ## FIXME: when bibdocs are deleted we loose the stats. What shall we do with them?
        id_bibdocs &= id_existing_bibdocs

        history_analysis_results = ()
        if not id_bibdocs:
            pass
        elif len(id_bibdocs) <= CFG_ID_BIBDOC_ID_BIBREC and 0 not in id_bibdocs:
            history_analysis_results = draw_downloads_statistics(id_bibrec, list(id_bibdocs))
        else:
            history_analysis_results = draw_downloads_statistics(id_bibrec, [])
        if history_analysis_results and history_analysis_results[0]:
            graph_path = history_analysis_results[0][history_analysis_results[0].rfind('/')+1:]
            if CFG_BIBRANK_SHOW_DOWNLOAD_GRAPHS == 2:
                graph_file_history = CFG_WEBDIR + "/img/" + graph_path
                html_content += """<tr><td valign=center align=center>%s</td>""" % open(graph_file_history).read()
            else:  # gnuplot
                graph_file_history = CFG_SITE_URL + "/img/" + graph_path
                html_content += """<tr><td valign=center align=center><img src='%s'/></td>""" % graph_file_history
            file_to_close_history = history_analysis_results[1]
            if file_to_close_history :
                if os.path.exists(file_to_close_history):
                    os.unlink(file_to_close_history)
        if html_content != "":
            out += """<table border="0" cellspacing="1" cellpadding="1">"""
            out += html_content + "</table>"

    if CFG_BIBRANK_SHOW_DOWNLOAD_GRAPHS_CLIENT_IP_DISTRIBUTION:
        # do we show also user IP repartition?
        html_content = ""
        remove_old_img("download")
        #Users analysis graph
        ips = database_tuples_to_single_list(run_sql("""select client_host from "rnkDOWNLOADS" where id_bibrec=%s;""" % id_bibrec))
        if ips:
            users_analysis_results = create_users_analysis_graph(id_bibrec, ips)
            if users_analysis_results[0]:
                file_to_close_users = users_analysis_results[1]
                if CFG_BIBRANK_SHOW_DOWNLOAD_GRAPHS_CLIENT_IP_DISTRIBUTION == 1:
                    html_content += """<tr><td valign=center align=center><img src='%s/img/%s' align="center" alt=""></td>""" % (CFG_SITE_URL, users_analysis_results[0])
                elif CFG_BIBRANK_SHOW_DOWNLOAD_GRAPHS_CLIENT_IP_DISTRIBUTION == 2:
                    html_content += """<tr><td valign=center align=center>%s</td>""" % open(CFG_WEBDIR + "/img/"  + users_analysis_results[0]).read()
                if file_to_close_users:
                    if os.path.exists(file_to_close_users):
                        os.unlink(file_to_close_users)
        if html_content != "":
            out += """<br/><br/><table><tr><td class="blocknote">
                      %s</td></tr><tr><td>
                      <table border="0" cellspacing="1" cellpadding="1">""" % _("Download user distribution:")
            out += html_content
            out += "</table></td></tr></table>"

    # return html code used by get_file or search_engine
    return out
Esempio n. 6
0
 def test_database_tuples_to_single_list(self):
     """bibrank downloads indexer - database tuples to list"""
     self.assertEqual([1, 2, 3], bibrank_downloads_indexer.database_tuples_to_single_list(((1,), (2,), (3,))))
 def test_database_tuples_to_single_list(self):
     """bibrank downloads indexer - database tuples to list"""
     self.assertEqual(
         [1, 2, 3],
         bibrank_downloads_indexer.database_tuples_to_single_list(
             ((1, ), (2, ), (3, ))))