Ejemplo n.º 1
0
def perform_deleterank(rnkID, ln=CFG_SITE_LANG, confirm=0):
    """form to delete a rank method
    """
    subtitle = ''
    output  = """
    <span class="warning">
    <dl>
     <dt><strong>WARNING:</strong></dt>
     <dd><strong>When deleting a rank method, you also deletes all data related to the rank method, like translations, which collections
     it was attached to and the data necessary to rank the searchresults. Any scheduled tasks using the deleted rank method will also stop working.
     <br /><br />For more information, please go to the <a title="See guide" href="%s/help/admin/bibrank-admin-guide">BibRank guide</a> and read the section regarding deleting a rank method.</strong></dd>
    </dl>
    </span>
    """ % CFG_SITE_URL

    if rnkID:
        if confirm in ["0", 0]:
            rnkNAME = get_def_name(rnkID, "rnkMETHOD")[0][1]
            subtitle = 'Step 1 - Confirm deletion'
            text = """Delete rank method '%s'.""" % (rnkNAME)
            output += createhiddenform(action="deleterank",
                                       text=text,
                                       button="Confirm",
                                       rnkID=rnkID,
                                       confirm=1)
        elif confirm in ["1", 1]:
            try:
                rnkNAME = get_def_name(rnkID, "rnkMETHOD")[0][1]
                rnkcode = get_rnk_code(rnkID)[0][0]
                table = ""
                try:
                    config = ConfigParser.ConfigParser()
                    config.readfp(
                        open(configuration.get(rnkcode + ".cfg"), 'r'))
                    table = config.get(
                        config.get('rank_method', "function"), "table")
                except Exception:
                    pass
                result = delete_rnk(rnkID, table)
                subtitle = "Step 2 - Result"
                if result:
                    text = """<b><span class="info">Rank method deleted</span></b>"""
                    try:
                        os.remove(configuration.get(rnkcode + ".cfg"))
                        text += """<br /><b><span class="info">Configuration file deleted: '%s.cfg'.</span></b>"""  % (
                            configuration.get(rnkcode + ".cfg"), )
                    except StandardError as e:
                        text += """<br /><b><span class="info">Sorry, could not delete configuration file: '%s/bibrank/%s.cfg'.</span><br />Please delete the file manually.</span></b>""" % (
                            configuration.get(rnkcode + ".cfg"), )
                else:
                    text = """<b><span class="info">Sorry, could not delete rank method</span></b>"""
            except StandardError as e:
                text = """<b><span class="info">Sorry, could not delete rank method, most likely already deleted</span></b>"""
            output = text

    body = [output]

    return addadminbox(subtitle + """&nbsp;&nbsp;&nbsp;<small>[<a title="See guide" href="%s/help/admin/bibrank-admin-guide#dr">?</a>]</small>""" % CFG_SITE_URL, body)
Ejemplo n.º 2
0
def task_run_core():
    """Run the indexing task. The row argument is the BibSched task
    queue row, containing if, arguments, etc.
    Return 1 in case of success and 0 in case of failure.
    """
    if not task_get_option("run"):
        task_set_option("run", [name[0] for name in run_sql("SELECT name from rnkMETHOD")])

    for key in task_get_option("run"):
        task_sleep_now_if_required(can_stop_too=True)
        write_message("")
        filename = configuration.get(key + '.cfg', '')
        write_message("Getting configuration from file: %s" % filename,
            verbose=9)
        config = ConfigParser.ConfigParser()
        try:
            config.readfp(open(filename))
        except StandardError:
            write_message("Cannot find configuration file: %s. "
                "The rankmethod may also not be registered using "
                "the BibRank Admin Interface." % filename, sys.stderr)
            raise

        #Using the function variable to call the function related to the
        #rank method
        cfg_function = config.get("rank_method", "function")
        func_object = globals().get(cfg_function)
        if func_object:
            func_object(key)
        else:
            write_message("Cannot run method '%s', no function to call"
                % key)

    return True
Ejemplo n.º 3
0
def command_line():
    global opts_dict
    long_flags = ["input=", "output=", "help", "version", "verbose="]
    short_flags = "i:o:hVv:"
    format_string = "%Y-%m-%d %H:%M:%S"
    sleeptime = ""
    try:
        opts, args = getopt.getopt(sys.argv[1:], short_flags, long_flags)
    except getopt.GetoptError as err:
        write_message(err, sys.stderr)
        usage(1)
    if args:
        usage(1)
    opts_dict = {
        "input": configuration.get('bibrankgkb.cfg', ''),
        "output": "",
        "verbose": 1
    }
    sched_time = time.strftime(format_string)
    user = ""
    try:
        for opt in opts:
            if opt == ("-h", "") or opt == ("--help", ""):
                usage(1)
            elif opt == ("-V", "") or opt == ("--version", ""):
                print(__revision__)
                sys.exit(1)
            elif opt[0] in ["--input", "-i"]:
                opts_dict["input"] = configuration.get(opt[1], opt[1])
            elif opt[0] in ["--output", "-o"]:
                opts_dict["output"] = opt[1]
            elif opt[0] in ["--verbose", "-v"]:
                opts_dict["verbose"] = int(opt[1])
            else:
                usage(1)

        startCreate = time.time()
        config_file = opts_dict["input"]
        config = ConfigParser.ConfigParser()
        config.readfp(open(config_file))
        bibrankgkb(config)
        if opts_dict["verbose"] >= 9:
            showtime((time.time() - startCreate))
    except StandardError as e:
        write_message(e, sys.stderr)
        sys.exit(1)
    return
Ejemplo n.º 4
0
def create_rnkmethod_cache():
    """Create cache with vital information for each rank method."""

    bibrank_meths = run_sql("SELECT name from rnkMETHOD")

    for (rank_method_code,) in bibrank_meths:
        filepath = configuration.get(rank_method_code + '.cfg', '')
        config = ConfigParser.ConfigParser()
        try:
            config.readfp(open(filepath))
        except IOError:
            pass

        cfg_function = config.get("rank_method", "function")
        if config.has_section(cfg_function):
            METHODS[rank_method_code] = {}
            METHODS[rank_method_code]["function"] = cfg_function
            METHODS[rank_method_code]["prefix"] = config.get(cfg_function, "relevance_number_output_prologue")
            METHODS[rank_method_code]["postfix"] = config.get(cfg_function, "relevance_number_output_epilogue")
            METHODS[rank_method_code]["chars_alphanumericseparators"] = r"[1234567890\!\"\#\$\%\&\'\(\)\*\+\,\-\.\/\:\;\<\=\>\?\@\[\\\]\^\_\`\{\|\}\~]"
        else:
            raise Exception("Error in configuration config_file: %s" % (config_file + ".cfg", ))

        i8n_names = run_sql("""SELECT ln,value from rnkMETHODNAME,rnkMETHOD where id_rnkMETHOD=rnkMETHOD.id and rnkMETHOD.name=%s""", (rank_method_code,))
        for (ln, value) in i8n_names:
            METHODS[rank_method_code][ln] = value

        if config.has_option(cfg_function, "table"):
            METHODS[rank_method_code]["rnkWORD_table"] = config.get(cfg_function, "table")
            query = "SELECT count(*) FROM %sR" % wash_table_column_name(METHODS[rank_method_code]["rnkWORD_table"][:-1])
            METHODS[rank_method_code]["col_size"] = run_sql(query)[0][0]

        if config.has_option(cfg_function, "stemming") and config.get(cfg_function, "stemming"):
            try:
                METHODS[rank_method_code]["stemmer"] = config.get(cfg_function, "stemming")
            except KeyError:
                pass

        if config.has_option(cfg_function, "stopword"):
            METHODS[rank_method_code]["stopwords"] = config.get(cfg_function, "stopword")

        if config.has_section("find_similar"):
            METHODS[rank_method_code]["max_word_occurence"] = float(config.get("find_similar", "max_word_occurence"))
            METHODS[rank_method_code]["min_word_occurence"] = float(config.get("find_similar", "min_word_occurence"))
            METHODS[rank_method_code]["min_word_length"] = int(config.get("find_similar", "min_word_length"))
            METHODS[rank_method_code]["min_nr_words_docs"] = int(config.get("find_similar", "min_nr_words_docs"))
            METHODS[rank_method_code]["max_nr_words_upper"] = int(config.get("find_similar", "max_nr_words_upper"))
            METHODS[rank_method_code]["max_nr_words_lower"] = int(config.get("find_similar", "max_nr_words_lower"))
            METHODS[rank_method_code]["default_min_relevance"] = int(config.get("find_similar", "default_min_relevance"))

        if cfg_function in ('word_similarity_solr', 'word_similarity_xapian'):
            create_external_ranking_settings(rank_method_code, config)

        if config.has_section("combine_method"):
            i = 1
            METHODS[rank_method_code]["combine_method"] = []
            while config.has_option("combine_method", "method%s" % i):
                METHODS[rank_method_code]["combine_method"].append(config.get("combine_method", "method%s" % i).split(","))
                i += 1
Ejemplo n.º 5
0
def create_rnkmethod_cache():
    """Create cache with vital information for each rank method."""

    bibrank_meths = run_sql("SELECT name from rnkMETHOD")

    for (rank_method_code,) in bibrank_meths:
        filepath = configuration.get(rank_method_code + '.cfg', '')
        config = ConfigParser.ConfigParser()
        try:
            config.readfp(open(filepath))
        except IOError:
            pass

        cfg_function = config.get("rank_method", "function")
        if config.has_section(cfg_function):
            METHODS[rank_method_code] = {}
            METHODS[rank_method_code]["function"] = cfg_function
            METHODS[rank_method_code]["prefix"] = config.get(cfg_function, "relevance_number_output_prologue")
            METHODS[rank_method_code]["postfix"] = config.get(cfg_function, "relevance_number_output_epilogue")
            METHODS[rank_method_code]["chars_alphanumericseparators"] = r"[1234567890\!\"\#\$\%\&\'\(\)\*\+\,\-\.\/\:\;\<\=\>\?\@\[\\\]\^\_\`\{\|\}\~]"
        else:
            raise Exception("Error in configuration config_file: %s" % (config_file + ".cfg", ))

        i8n_names = run_sql("""SELECT ln,value from rnkMETHODNAME,rnkMETHOD where id_rnkMETHOD=rnkMETHOD.id and rnkMETHOD.name=%s""", (rank_method_code,))
        for (ln, value) in i8n_names:
            METHODS[rank_method_code][ln] = value

        if config.has_option(cfg_function, "table"):
            METHODS[rank_method_code]["rnkWORD_table"] = config.get(cfg_function, "table")
            query = "SELECT count(*) FROM %sR" % wash_table_column_name(METHODS[rank_method_code]["rnkWORD_table"][:-1])
            METHODS[rank_method_code]["col_size"] = run_sql(query)[0][0]

        if config.has_option(cfg_function, "stemming") and config.get(cfg_function, "stemming"):
            try:
                METHODS[rank_method_code]["stemmer"] = config.get(cfg_function, "stemming")
            except KeyError:
                pass

        if config.has_option(cfg_function, "stopword"):
            METHODS[rank_method_code]["stopwords"] = config.get(cfg_function, "stopword")

        if config.has_section("find_similar"):
            METHODS[rank_method_code]["max_word_occurence"] = float(config.get("find_similar", "max_word_occurence"))
            METHODS[rank_method_code]["min_word_occurence"] = float(config.get("find_similar", "min_word_occurence"))
            METHODS[rank_method_code]["min_word_length"] = int(config.get("find_similar", "min_word_length"))
            METHODS[rank_method_code]["min_nr_words_docs"] = int(config.get("find_similar", "min_nr_words_docs"))
            METHODS[rank_method_code]["max_nr_words_upper"] = int(config.get("find_similar", "max_nr_words_upper"))
            METHODS[rank_method_code]["max_nr_words_lower"] = int(config.get("find_similar", "max_nr_words_lower"))
            METHODS[rank_method_code]["default_min_relevance"] = int(config.get("find_similar", "default_min_relevance"))

        if cfg_function in ('word_similarity_solr', 'word_similarity_xapian'):
            create_external_ranking_settings(rank_method_code, config)

        if config.has_section("combine_method"):
            i = 1
            METHODS[rank_method_code]["combine_method"] = []
            while config.has_option("combine_method", "method%s" % i):
                METHODS[rank_method_code]["combine_method"].append(config.get("combine_method", "method%s" % i).split(","))
                i += 1
Ejemplo n.º 6
0
def read_configuration(rank_method_code):
    filename = configuration.get(rank_method_code + '.cfg', '')
    config = ConfigParser.ConfigParser()
    try:
        config.readfp(open(filename))
    except StandardError:
        write_message("Cannot find configuration file: %s" % filename, sys.stderr)
        raise
    return config
Ejemplo n.º 7
0
def load_config_file(key):
    """Load config file containing the authors, co-authors tags #"""
    filename = configuration.get(key + '.cfg', '')
    config = ConfigParser.ConfigParser()
    try:
        config.readfp(open(filename))
    except StandardError:
        raise Exception('Unable to load config file %s' % filename)
    return config
Ejemplo n.º 8
0
def load_config_file(key):
    """Load config file containing the authors, co-authors tags #"""
    filename = configuration.get(key + '.cfg', '')
    config = ConfigParser.ConfigParser()
    try:
        config.readfp(open(filename))
    except StandardError:
        raise Exception('Unable to load config file %s' % filename)
    return config
Ejemplo n.º 9
0
def single_tag_rank(config):
    """Connect the given tag with the data from the kb file given"""
    write_message("Loading knowledgebase file", verbose=9)
    kb_data = {}
    records = []

    write_message("Reading knowledgebase file: %s" % \
                   config.get(config.get("rank_method", "function"), "kb_src"))

    kb_src = config.get(config.get("rank_method", "function"), "kb_src").strip()
    # Find path from configuration registry by knowledge base name.
    kb_src_clean = configuration.get(kb_src)

    with open(kb_src_clean, 'r') as kb_file:
        data = kb_file.readlines()

    for line in data:
        if not line[0:1] == "#":
            kb_data[string.strip((string.split(string.strip(line), "---"))[0])] = (string.split(string.strip(line), "---"))[1]
    write_message("Number of lines read from knowledgebase file: %s" % len(kb_data))

    tag = config.get(config.get("rank_method", "function"), "tag")
    tags = config.get(config.get("rank_method", "function"), "check_mandatory_tags").split(", ")
    if tags == ['']:
        tags = ""

    records = []
    for (recids, recide) in options["recid_range"]:
        task_sleep_now_if_required(can_stop_too=True)
        write_message("......Processing records #%s-%s" % (recids, recide))
        recs = run_sql("SELECT id_bibrec, value FROM bib%sx, bibrec_bib%sx WHERE tag=%%s AND id_bibxxx=id and id_bibrec >=%%s and id_bibrec<=%%s" % (tag[0:2], tag[0:2]), (tag, recids, recide))
        valid = intbitset(trailing_bits=1)
        valid.discard(0)
        for key in tags:
            newset = intbitset()
            newset += [recid[0] for recid in (run_sql("SELECT id_bibrec FROM bib%sx, bibrec_bib%sx WHERE id_bibxxx=id AND tag=%%s AND id_bibxxx=id and id_bibrec >=%%s and id_bibrec<=%%s" % (tag[0:2], tag[0:2]), (key, recids, recide)))]
            valid.intersection_update(newset)
        if tags:
            recs = filter(lambda x: x[0] in valid, recs)
        records = records + list(recs)
        write_message("Number of records found with the necessary tags: %s" % len(records))

    records = filter(lambda x: x[0] in options["validset"], records)
    rnkset = {}
    for key, value in records:
        if value in kb_data:
            if key not in rnkset:
                rnkset[key] = float(kb_data[value])
            else:
                if rnkset[key] in kb_data and float(kb_data[value]) > float((rnkset[key])[1]):
                    rnkset[key] = float(kb_data[value])
        else:
            rnkset[key] = 0

    write_message("Number of records available in rank method: %s" % len(rnkset))
    return rnkset
Ejemplo n.º 10
0
def command_line():
    global opts_dict
    long_flags = ["input=", "output=", "help", "version", "verbose="]
    short_flags = "i:o:hVv:"
    format_string = "%Y-%m-%d %H:%M:%S"
    sleeptime = ""
    try:
        opts, args = getopt.getopt(sys.argv[1:], short_flags, long_flags)
    except getopt.GetoptError as err:
        write_message(err, sys.stderr)
        usage(1)
    if args:
        usage(1)
    opts_dict = {"input": configuration.get('bibrankgkb.cfg', ''), "output":"", "verbose":1}
    sched_time = time.strftime(format_string)
    user = ""
    try:
        for opt in opts:
            if opt == ("-h","") or opt == ("--help",""):
                usage(1)
            elif opt == ("-V","") or opt == ("--version",""):
                print(__revision__)
                sys.exit(1)
            elif opt[0] in ["--input", "-i"]:
                opts_dict["input"] = configuration.get(opt[1], opt[1])
            elif opt[0] in ["--output", "-o"]:
                opts_dict["output"] = opt[1]
            elif opt[0] in ["--verbose", "-v"]:
                opts_dict["verbose"] = int(opt[1])
            else:
                usage(1)

        startCreate = time.time()
        config_file = opts_dict["input"]
        config = ConfigParser.ConfigParser()
        config.readfp(open(config_file))
        bibrankgkb(config)
        if opts_dict["verbose"] >= 9:
            showtime((time.time() - startCreate))
    except StandardError as e:
        write_message(e, sys.stderr)
        sys.exit(1)
    return
Ejemplo n.º 11
0
def load_config(method):
    filename = configuration.get(method + '.cfg', '')
    config = ConfigParser.ConfigParser()
    try:
        config.readfp(open(filename))
    except StandardError:
        write_message("Cannot find configuration file: %s" % filename,
                      sys.stderr)
        raise
    return config
Ejemplo n.º 12
0
def read_configuration(rank_method_code):
    """Load the config file from disk and parse it."""
    filename = configuration.get(rank_method_code + '.cfg', '')
    config = ConfigParser.ConfigParser()
    try:
        config.readfp(open(filename))
    except StandardError:
        write_message("Cannot find configuration file: %s" % filename,
                      sys.stderr)
        raise
    return config
Ejemplo n.º 13
0
##
## Invenio is distributed in the hope that it will be useful, but
## WITHOUT ANY WARRANTY; without even the implied warranty of
## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
## General Public License for more details.
##
## You should have received a copy of the GNU General Public License
## along with Invenio; if not, write to the Free Software Foundation, Inc.,
## 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA.

from invenio.modules.formatter.api import get_tag_from_name
from invenio.ext.logging import register_exception
from invenio.modules.ranker.registry import configuration


CFG_BIBRANK_WRD_CFG_PATH = configuration.get('wrd.cfg', '')


def alert_admin(name):
    try:
        raise ValueError('No marc tag for %s defined' % name)
    except Exception:
        return register_exception(alert_admin=True)


# abstract:
marc_tag_abstract = get_tag_from_name('abstract')
if marc_tag_abstract:
    CFG_MARC_ABSTRACT = marc_tag_abstract
else:
    CFG_MARC_ABSTRACT = '520__a'
Ejemplo n.º 14
0
def citerank(rank_method_code):
    """new ranking method based on the citation graph"""
    write_message("Running rank method: %s" % rank_method_code, verbose=0)
    if not import_numpy:
        write_message('The numpy package could not be imported. \
This package is compulsory for running the citerank methods.')
        return
    try:
        file_ = configuration.get(rank_method_code + '.cfg', '')
        config = ConfigParser.ConfigParser()
        config.readfp(open(file_))
    except StandardError:
        write_message("Cannot find configuration file: %s" % file_, sys.stderr)
        raise StandardError
    # the file for citations needs to have the following format:
    #each line needs to be x[tab]y, where x cites y; x,y are recids
    function = config.get("rank_method", "function")
    try:
        file_for_citations = config.get(function, "file_with_citations")
        cit, dict_of_ids = get_citations_from_file(file_for_citations)
    except (ConfigParser.NoOptionError, StandardError) as err:
        write_message("If you want to read the citation data from file set up \
the file_for_citations parameter in the config file [%s]" %err, verbose=2)
        cit, dict_of_ids = get_citations_from_db()
    len_ = len(dict_of_ids.keys())
    write_message("Number of nodes(papers) to rank : %s" % str(len_), verbose=3)
    if len_ == 0:
        write_message("No citation data found, nothing to be done.")
        return
    try:
        method = config.get(function, "citerank_method")
    except ConfigParser.NoOptionError as err:
        write_message("Exception: %s " %err, sys.stderr)
        raise Exception
    write_message("Running %s method." % method, verbose=2)
    dates = get_dates(function, config, dict_of_ids)
    if method == "citation_time":
        try:
            time_decay = float(config.get(function, "time_decay"))
        except (ConfigParser.NoOptionError, ValueError) as err:
            write_message("Exception: %s" % err, sys.stderr)
            raise Exception
        date_coef = calculate_time_weights(len_, time_decay, dates)
        #cit = remove_loops(cit, dates, dict_of_ids)
        dict_of_ranks = \
            run_citation_rank_time(cit, dict_of_ids, date_coef, dates)
    else:
        try:
            conv_threshold = float(config.get(function, "conv_threshold"))
            check_point = int(config.get(function, "check_point"))
            damping_factor = float(config.get(function, "damping_factor"))
            write_message("Parameters: d = %s, conv_threshold = %s, \
check_point = %s" %(str(damping_factor), \
str(conv_threshold), str(check_point)), verbose=5)
        except (ConfigParser.NoOptionError, StandardError) as err:
            write_message("Exception: %s" % err, sys.stderr)
            raise Exception
        if method == "pagerank_classic":
            ref = construct_ref_array(cit, dict_of_ids, len_)
            use_ext_cit = ""
            try:
                use_ext_cit = config.get(function, "use_external_citations")
                write_message("Pagerank will use external citations: %s" \
                   %str(use_ext_cit), verbose=5)
            except (ConfigParser.NoOptionError, StandardError) as err:
                write_message("%s" % err, verbose=2)
            if use_ext_cit == "yes":
                try:
                    ext_citation_file = config.get(function, "ext_citation_file")
                    ext_links = get_external_links_from_file(ext_citation_file,
                                                             ref, dict_of_ids)
                except (ConfigParser.NoOptionError, StandardError):
                    write_message("If you want to read the external citation \
data from file set up the ext_citation_file parameter in the config. file", \
verbose=3)
                    try:
                        reference_tag = config.get(function, "ext_reference_tag")
                        dummy = int(reference_tag[0:3])
                    except (ConfigParser.NoOptionError, StandardError):
                        write_message("You need to set up correctly the \
reference_tag in the cfg file", sys.stderr)
                        raise Exception
                    ext_links = get_external_links_from_db(ref, \
                            dict_of_ids, reference_tag)
                    avg = avg_ext_links_with_0(ext_links)
                    if avg < 1:
                        write_message("This method can't be ran. There is not \
enough information about the external citation. Hint: check the reference tag", \
sys.stderr)
                        raise Exception
                    avg_ext_links_without_0(ext_links)
                try:
                    alpha = float(config.get(function, "ext_alpha"))
                    beta = float(config.get(function, "ext_beta"))
                except (ConfigParser.NoOptionError, StandardError) as err:
                    write_message("Exception: %s" % err, sys.stderr)
                    raise Exception
                dict_of_ranks = run_pagerank_ext(cit, dict_of_ids, ref, \
                ext_links, conv_threshold, check_point, alpha, beta, dates)
            else:
                dict_of_ranks = run_pagerank(cit, dict_of_ids, len_, ref, \
                    damping_factor, conv_threshold, check_point, dates)
        elif method == "pagerank_time":
            try:
                time_decay = float(config.get(function, "time_decay"))
                write_message("Parameter: time_decay = %s" \
                              %str(time_decay), verbose=5)
            except (ConfigParser.NoOptionError, StandardError) as err:
                write_message("Exception: %s" % err, sys.stderr)
                raise Exception
            date_coef = calculate_time_weights(len_, time_decay, dates)
            cit = remove_loops(cit, dates, dict_of_ids)
            ref = construct_ref_array(cit, dict_of_ids, len_)
            dict_of_ranks = run_pagerank_time(cit, dict_of_ids, len_, ref, \
             damping_factor, conv_threshold, check_point, date_coef, dates)
        else:
            write_message("Error: Unknown ranking method. \
Please check the ranking_method parameter in the config. file.", sys.stderr)
            raise Exception
    try:
        filename_ranks = config.get(function, "output_ranks_to_filename")
        max_ranks = config.get(function, "output_rank_limit")
        if not max_ranks.isdigit():
            max_ranks = len_
        else:
            max_ranks = int(max_ranks)
            if max_ranks > len_:
                max_ranks = len_
        ranks = sort_weights(dict_of_ranks)
        write_message("Ranks: %s" % str(ranks), verbose=9)
        write_first_ranks_to_file(ranks, dict_of_ranks, \
                max_ranks, filename_ranks)
    except (ConfigParser.NoOptionError, StandardError):
        write_message("If you want the ranks to be printed in a file you have \
to set output_ranks_to_filename and output_rank_limit \
parameters in the configuration file", verbose=3)
    normalize_weights(dict_of_ranks)
    into_db(dict_of_ranks, rank_method_code)
Ejemplo n.º 15
0
def single_tag_rank(config):
    """Connect the given tag with the data from the kb file given"""
    write_message("Loading knowledgebase file", verbose=9)
    kb_data = {}
    records = []

    write_message("Reading knowledgebase file: %s" % \
                   config.get(config.get("rank_method", "function"), "kb_src"))

    kb_src = config.get(config.get("rank_method", "function"),
                        "kb_src").strip()
    # Find path from configuration registry by knowledge base name.
    kb_src_clean = configuration.get(kb_src)

    with open(kb_src_clean, 'r') as kb_file:
        data = kb_file.readlines()

    for line in data:
        if not line[0:1] == "#":
            kb_data[string.strip(
                (string.split(string.strip(line), "---"))[0])] = (string.split(
                    string.strip(line), "---"))[1]
    write_message("Number of lines read from knowledgebase file: %s" %
                  len(kb_data))

    tag = config.get(config.get("rank_method", "function"), "tag")
    tags = config.get(config.get("rank_method", "function"),
                      "check_mandatory_tags").split(", ")
    if tags == ['']:
        tags = ""

    records = []
    for (recids, recide) in options["recid_range"]:
        task_sleep_now_if_required(can_stop_too=True)
        write_message("......Processing records #%s-%s" % (recids, recide))
        recs = run_sql(
            "SELECT id_bibrec, value FROM bib%sx, bibrec_bib%sx WHERE tag=%%s AND id_bibxxx=id and id_bibrec >=%%s and id_bibrec<=%%s"
            % (tag[0:2], tag[0:2]), (tag, recids, recide))
        valid = intbitset(trailing_bits=1)
        valid.discard(0)
        for key in tags:
            newset = intbitset()
            newset += [
                recid[0] for recid in (run_sql(
                    "SELECT id_bibrec FROM bib%sx, bibrec_bib%sx WHERE id_bibxxx=id AND tag=%%s AND id_bibxxx=id and id_bibrec >=%%s and id_bibrec<=%%s"
                    % (tag[0:2], tag[0:2]), (key, recids, recide)))
            ]
            valid.intersection_update(newset)
        if tags:
            recs = filter(lambda x: x[0] in valid, recs)
        records = records + list(recs)
        write_message("Number of records found with the necessary tags: %s" %
                      len(records))

    records = filter(lambda x: x[0] in options["validset"], records)
    rnkset = {}
    for key, value in records:
        if value in kb_data:
            if key not in rnkset:
                rnkset[key] = float(kb_data[value])
            else:
                if rnkset[key] in kb_data and float(kb_data[value]) > float(
                    (rnkset[key])[1]):
                    rnkset[key] = float(kb_data[value])
        else:
            rnkset[key] = 0

    write_message("Number of records available in rank method: %s" %
                  len(rnkset))
    return rnkset
Ejemplo n.º 16
0
def perform_modifyrank(rnkID, rnkcode='', ln=CFG_SITE_LANG, template='', cfgfile='', confirm=0):
    """form to modify a rank method

    rnkID - id of the rank method
    """

    if not rnkID:
        return "No ranking method selected."
    if not get_rnk_code(rnkID):
        return "Ranking method %s does not seem to exist." % str(rnkID)

    subtitle = 'Step 1 - Please modify the wanted values below'
    if not rnkcode:
        oldcode = get_rnk_code(rnkID)[0]
    else:
        oldcode = rnkcode

    output  = """
    <dl>
     <dd>When changing the BibRank code of a rank method, you must also change any scheduled tasks using the old value.
     <br />For more information, please go to the <a title="See guide" href="%s/help/admin/bibrank-admin-guide">BibRank guide</a> and read the section about modifying a rank method's  BibRank code.</dd>
    </dl>
    """ % CFG_SITE_URL

    text = """
     <span class="adminlabel">BibRank code</span>
     <input class="admin_wvar" type="text" name="rnkcode" value="%s" />
     <br />
    """ % (oldcode)

    try:
        text += """<span class="adminlabel">Cfg file</span>"""
        textarea = ""
        if cfgfile:
            textarea +=cfgfile
        else:
            file = open(configuration.get(get_rnk_code(rnkID)[0][0] + '.cfg', ''))
            for line in file.readlines():
                textarea += line
        text += """<textarea class="admin_wvar" name="cfgfile" rows="15" cols="70">""" + textarea + """</textarea>"""
    except StandardError as e:
        text += """<b><span class="info">Cannot load file, either it does not exist, or not enough rights to read it: '%s.cfg'<br />Please create the file in the path given.</span></b>""" % (configuration.get(get_rnk_code(rnkID)[0][0] + '.cfg', ''), )

    output += createhiddenform(action="modifyrank",
                               text=text,
                               rnkID=rnkID,
                               button="Modify",
                               confirm=1)

    if rnkcode and confirm in ["1", 1] and get_rnk_code(rnkID)[0][0] != rnkcode:
        oldcode = get_rnk_code(rnkID)[0][0]
        result = modify_rnk(rnkID, rnkcode)
        subtitle = "Step 3 - Result"
        if result:
            text = """<b><span class="info">Rank method modified.</span></b>"""
            try:
                file =  open(configuration.get(oldcode + '.cfg', ''), 'r')
                file2 =  open(configuration.get(rnkcode + '.cfg', ''), 'w')
                lines = file.readlines()
                for line in lines:
                    file2.write(line)
                file.close()
                file2.close()
                os.remove(configuration.get(oldcode + '.cfg', ''))
            except StandardError as e:
                text = """<b><span class="info">Sorry, could not change name of cfg file, must be done manually: '%s.cfg'</span></b>""" % (configuration.get(oldcode + '.cfg', ''), )
        else:
            text = """<b><span class="info">Sorry, could not modify rank method.</span></b>"""
        output += text

    if cfgfile and confirm in ["1", 1]:
        try:
            file =  open(configuration.get(get_rnk_code(rnkID)[0][0] + '.cfg', ''), 'w')
            file.write(cfgfile)
            file.close()
            text = """<b><span class="info"><br />Configuration file modified: '%s/bibrank/%s.cfg'</span></b>""" % (configuration.get(get_rnk_code(rnkID)[0][0] + '.cfg', ''), )
        except StandardError as e:
            text = """<b><span class="info"><br />Sorry, could not modify configuration file, please check for rights to do so: '%s.cfg'<br />Please modify the file manually.</span></b>""" % (configuration.get(get_rnk_code(rnkID)[0][0] + '.cfg', ''), )
        output += text

    finoutput = addadminbox(subtitle + """&nbsp;&nbsp;&nbsp;<small>[<a title="See guide" href="%s/help/admin/bibrank-admin-guide#mr">?</a>]</small>""" % CFG_SITE_URL, [output])
    output = ""

    text = """
    <span class="adminlabel">Select</span>
    <select name="template" class="admin_w200">
    <option value="">- select template -</option>
    """
    templates = get_templates()
    for templ in templates:
        text += """<option value="%s" %s>%s</option>""" % (templ, template == templ and 'selected="selected"' or '', templ[9:len(templ)-4])
    text += """</select><br />"""

    output += createhiddenform(action="modifyrank",
                               text=text,
                               rnkID=rnkID,
                               button="Show template",
                               confirm=0)

    try:
        if template:
            textarea = ""
            text = """<span class="adminlabel">Content:</span>"""
            file =  open(configuration.get(template, ''), 'r')
            lines = file.readlines()
            for line in lines:
                textarea += line
            file.close()
            text += """<textarea class="admin_wvar" readonly="true" rows="15" cols="70">""" + textarea + """</textarea>"""
            output += text
    except StandardError as e:
        output += """Cannot load file, either it does not exist, or not enough rights to read it: '%s'""" % (configuration.get(template, ''), )

    finoutput += addadminbox("View templates", [output])
    return finoutput
Ejemplo n.º 17
0
# License, or (at your option) any later version.
#
# Invenio is distributed in the hope that it will be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
# General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with Invenio; if not, write to the Free Software Foundation, Inc.,
# 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA.

from invenio.modules.formatter.api import get_tag_from_name
from invenio.ext.logging import register_exception
from invenio.modules.ranker.registry import configuration

CFG_BIBRANK_WRD_CFG_PATH = configuration.get('wrd.cfg', '')


def alert_admin(name):
    try:
        raise ValueError('No marc tag for %s defined' % name)
    except Exception:
        return register_exception(alert_admin=True)


# abstract:
marc_tag_abstract = get_tag_from_name('abstract')
if marc_tag_abstract:
    CFG_MARC_ABSTRACT = marc_tag_abstract
else:
    CFG_MARC_ABSTRACT = '520__a'
Ejemplo n.º 18
0
def perform_addrankarea(rnkcode='', ln=CFG_SITE_LANG, template='', confirm=-1):
    """form to add a new rank method with these values:"""

    subtitle = 'Step 1 - Create new rank method'
    output = """
    <dl>
     <dt>BibRank code:</dt>
     <dd>A unique code that identifies a rank method, is used when running the bibrank daemon and used to name the configuration file for the method.
     <br />The template files includes the necessary parameters for the chosen rank method, and only needs to be edited with the correct tags and paths.
     <br />For more information, please go to the <a title="See guide" href="%s/help/admin/bibrank-admin-guide">BibRank guide</a> and read the section about adding a rank method</dd>
    </dl>
    """ % CFG_SITE_URL
    text = """
    <span class="adminlabel">BibRank code</span>
    <input class="admin_wvar" type="text" name="rnkcode" value="%s" />
    """ % (rnkcode)

    text += """<br />
    <span class="adminlabel">Cfg template</span>
    <select name="template" class="admin_w200">
    <option value="">No template</option>
    """

    templates = get_templates()
    for templ in templates:
        text += """<option value="%s" %s>%s</option>""" % (
            templ, template == templ and 'selected="selected"'
            or '', templ[9:len(templ) - 4])
    text += """</select>"""

    output += createhiddenform(action="addrankarea",
                               text=text,
                               button="Add rank method",
                               ln=ln,
                               confirm=1)

    if rnkcode:
        if confirm in ["0", 0]:
            subtitle = 'Step 2 - Confirm addition of rank method'
            text = """<b>Add rank method with BibRank code: '%s'.</b>""" % (
                rnkcode)
            if template:
                text += """<br /><b>Using configuration template: '%s'.</b>""" % (
                    template)
            else:
                text += """<br /><b>Create empty configuration file.</b>"""
            output += createhiddenform(action="addrankarea",
                                       text=text,
                                       rnkcode=rnkcode,
                                       button="Confirm",
                                       template=template,
                                       confirm=1)

        elif confirm in ["1", 1]:
            rnkID = add_rnk(rnkcode)
            subtitle = "Step 3 - Result"
            if rnkID[0] == 1:
                rnkID = rnkID[1]
                text = """<b><span class="info">Added new rank method with BibRank code '%s'</span></b>""" % rnkcode
                try:
                    if template:
                        infile = open(configuration.get(template, ''), 'r')
                        indata = infile.readlines()
                        infile.close()
                    else:
                        indata = ()
                    file = open(
                        configuration.get(
                            get_rnk_code(rnkID)[0][0] + '.cfg', ''), 'w')
                    for line in indata:
                        file.write(line)
                    file.close()
                    if template:
                        text += """<b><span class="info"><br />Configuration file created using '%s' as template.</span></b>""" % template
                    else:
                        text += """<b><span class="info"><br />Empty configuration file created.</span></b>"""
                except StandardError as e:
                    text += """<b><span class="info"><br />Sorry, could not create configuration file: '%s.cfg', either because it already exists, or not enough rights to create file. <br />Please create the file in the path given.</span></b>""" % (
                        configuration.get(
                            get_rnk_code(rnkID)[0][0] + '.cfg', ''), )
            else:
                text = """<b><span class="info">Sorry, could not add rank method, rank method with the same BibRank code probably exists.</span></b>"""
            output += text
    elif not rnkcode and confirm not in [-1, "-1"]:
        output += """<b><span class="info">Sorry, could not add rank method, not enough data submitted.</span></b>"""

    body = [output]

    return addadminbox(
        subtitle +
        """&nbsp;&nbsp;&nbsp;<small>[<a title="See guide" href="%s/help/admin/bibrank-admin-guide#ar">?</a>]</small>"""
        % CFG_SITE_URL, body)
Ejemplo n.º 19
0
def perform_deleterank(rnkID, ln=CFG_SITE_LANG, confirm=0):
    """form to delete a rank method
    """
    subtitle = ''
    output = """
    <span class="warning">
    <dl>
     <dt><strong>WARNING:</strong></dt>
     <dd><strong>When deleting a rank method, you also deletes all data related to the rank method, like translations, which collections
     it was attached to and the data necessary to rank the searchresults. Any scheduled tasks using the deleted rank method will also stop working.
     <br /><br />For more information, please go to the <a title="See guide" href="%s/help/admin/bibrank-admin-guide">BibRank guide</a> and read the section regarding deleting a rank method.</strong></dd>
    </dl>
    </span>
    """ % CFG_SITE_URL

    if rnkID:
        if confirm in ["0", 0]:
            rnkNAME = get_def_name(rnkID, "rnkMETHOD")[0][1]
            subtitle = 'Step 1 - Confirm deletion'
            text = """Delete rank method '%s'.""" % (rnkNAME)
            output += createhiddenform(action="deleterank",
                                       text=text,
                                       button="Confirm",
                                       rnkID=rnkID,
                                       confirm=1)
        elif confirm in ["1", 1]:
            try:
                rnkNAME = get_def_name(rnkID, "rnkMETHOD")[0][1]
                rnkcode = get_rnk_code(rnkID)[0][0]
                table = ""
                try:
                    config = ConfigParser.ConfigParser()
                    config.readfp(
                        open(configuration.get(rnkcode + ".cfg"), 'r'))
                    table = config.get(config.get('rank_method', "function"),
                                       "table")
                except Exception:
                    pass
                result = delete_rnk(rnkID, table)
                subtitle = "Step 2 - Result"
                if result:
                    text = """<b><span class="info">Rank method deleted</span></b>"""
                    try:
                        os.remove(configuration.get(rnkcode + ".cfg"))
                        text += """<br /><b><span class="info">Configuration file deleted: '%s.cfg'.</span></b>""" % (
                            configuration.get(rnkcode + ".cfg"), )
                    except StandardError as e:
                        text += """<br /><b><span class="info">Sorry, could not delete configuration file: '%s/bibrank/%s.cfg'.</span><br />Please delete the file manually.</span></b>""" % (
                            configuration.get(rnkcode + ".cfg"), )
                else:
                    text = """<b><span class="info">Sorry, could not delete rank method</span></b>"""
            except StandardError as e:
                text = """<b><span class="info">Sorry, could not delete rank method, most likely already deleted</span></b>"""
            output = text

    body = [output]

    return addadminbox(
        subtitle +
        """&nbsp;&nbsp;&nbsp;<small>[<a title="See guide" href="%s/help/admin/bibrank-admin-guide#dr">?</a>]</small>"""
        % CFG_SITE_URL, body)
Ejemplo n.º 20
0
def perform_showrankdetails(rnkID, ln=CFG_SITE_LANG):
    """Returns details about the rank method given by rnkID"""

    if not rnkID:
        return "No ranking method selected."
    if not get_rnk_code(rnkID):
        return "Ranking method %s does not seem to exist." % str(rnkID)

    subtitle = """Overview <a href="%s/admin/bibrank/bibrankadmin.py/modifyrank?rnkID=%s&amp;ln=%s">[Modify]</a>""" % (
        CFG_SITE_URL, rnkID, ln)
    text = """
    BibRank code: %s<br />
    Last updated by BibRank:
    """ % (get_rnk_code(rnkID)[0][0])
    if get_rnk(rnkID)[0][2]:
        text += "%s<br />" % get_rnk(rnkID)[0][2]
    else:
        text += "Not yet run.<br />"
    output = addadminbox(subtitle, [text])

    subtitle = """Rank method statistics"""
    text = ""
    try:
        text = "Not yet implemented"
    except StandardError as e:
        text = "BibRank not yet run, cannot show statistics for method"
    output += addadminbox(subtitle, [text])

    subtitle = """Attached to collections <a href="%s/admin/bibrank/bibrankadmin.py/modifycollection?rnkID=%s&amp;ln=%s">[Modify]</a>""" % (
        CFG_SITE_URL, rnkID, ln)
    text = ""
    col = get_rnk_col(rnkID, ln)
    for key, value in col:
        text += "%s<br />" % value
    if not col:
        text += "No collections"
    output += addadminbox(subtitle, [text])

    subtitle = """Translations <a href="%s/admin/bibrank/bibrankadmin.py/modifytranslations?rnkID=%s&amp;ln=%s">[Modify]</a>""" % (
        CFG_SITE_URL, rnkID, ln)
    prev_lang = ''
    trans = get_translations(rnkID)
    types = get_rnk_nametypes()
    types = dict(map(lambda x: (x[0], x[1]), types))
    text = ""
    languages = dict(get_languages())
    if trans:
        for lang, type, name in trans:
            if lang and lang in languages and type and name:
                if prev_lang != lang:
                    prev_lang = lang
                    text += """%s: <br />""" % (languages[lang])
                if type in types:
                    text += """<span style="margin-left: 10px">'%s'</span><span class="note">(%s)</span><br />""" % (
                        name, types[type])
    else:
        text = """No translations exists"""
    output += addadminbox(subtitle, [text])

    subtitle = """Configuration file: '%s/bibrank/%s.cfg' <a href="%s/admin/bibrank/bibrankadmin.py/modifyrank?rnkID=%s&amp;ln=%s">[Modify]</a>""" % (
        CFG_ETCDIR, get_rnk_code(rnkID)[0][0], CFG_SITE_URL, rnkID, ln)
    text = ""
    try:
        file = open(configuration.get(get_rnk_code(rnkID)[0][0] + ".cfg", ''))
        text += """<pre>"""
        for line in file.readlines():
            text += line
        text += """</pre>"""
    except StandardError as e:
        text = """Cannot load file, either it does not exist, or not enough rights to read it."""
    output += addadminbox(subtitle, [text])

    return output
Ejemplo n.º 21
0
def bibrank_engine(run):
    """Run the indexing task.
    Return 1 in case of success and 0 in case of failure.
    """
    startCreate = time.time()
    try:
        options["run"] = []
        options["run"].append(run)
        for rank_method_code in options["run"]:
            task_sleep_now_if_required(can_stop_too=True)
            cfg_name = getName(rank_method_code)
            write_message("Running rank method: %s." % cfg_name)

            config_file = configuration.get(rank_method_code + '.cfg', '')
            config = ConfigParser.ConfigParser()
            try:
                config.readfp(open(config_file))
            except StandardError as e:
                write_message("Cannot find configurationfile: %s" % config_file, sys.stderr)
                raise StandardError

            cfg_short = rank_method_code
            cfg_function = config.get("rank_method", "function") + "_exec"
            cfg_repair_function = config.get("rank_method", "function") + "_repair_exec"
            cfg_name = getName(cfg_short)
            options["validset"] = get_valid_range(rank_method_code)

            if task_get_option("collection"):
                l_of_colls = string.split(task_get_option("collection"), ", ")
                recIDs = perform_request_search(c=l_of_colls)
                recIDs_range = []
                for recID in recIDs:
                    recIDs_range.append([recID, recID])
                options["recid_range"] = recIDs_range
            elif task_get_option("id"):
                options["recid_range"] = task_get_option("id")
            elif task_get_option("modified"):
                options["recid_range"] = add_recIDs_by_date(rank_method_code, task_get_option("modified"))
            elif task_get_option("last_updated"):
                options["recid_range"] = add_recIDs_by_date(rank_method_code)
            else:
                write_message("No records specified, updating all", verbose=2)
                min_id = run_sql("SELECT min(id) from bibrec")[0][0]
                max_id = run_sql("SELECT max(id) from bibrec")[0][0]
                options["recid_range"] = [[min_id, max_id]]

            if task_get_option("quick") == "no":
                write_message("Recalculate parameter not used, parameter ignored.", verbose=9)

            if task_get_option("cmd") == "del":
                del_recids(cfg_short, options["recid_range"])
            elif task_get_option("cmd") == "add":
                func_object = globals().get(cfg_function)
                func_object(rank_method_code, cfg_name, config)
            elif task_get_option("cmd") == "stat":
                rank_method_code_statistics(rank_method_code)
            elif task_get_option("cmd") == "check":
                check_method(rank_method_code)
            elif task_get_option("cmd") == "print-missing":
                func_object = globals().get(cfg_function)
                func_object(rank_method_code, cfg_name, config)
            elif task_get_option("cmd") == "repair":
                func_object = globals().get(cfg_repair_function)
                func_object()
            else:
                write_message("Invalid command found processing %s" % rank_method_code, sys.stderr)
                raise StandardError
    except StandardError as e:
        write_message("\nException caught: %s" % e, sys.stderr)
        write_message(traceback.format_exc()[:-1])
        register_exception()
        raise StandardError

    if task_get_option("verbose"):
        showtime((time.time() - startCreate))
    return 1
Ejemplo n.º 22
0
def perform_addrankarea(rnkcode='', ln=CFG_SITE_LANG, template='', confirm=-1):
    """form to add a new rank method with these values:"""

    subtitle = 'Step 1 - Create new rank method'
    output  = """
    <dl>
     <dt>BibRank code:</dt>
     <dd>A unique code that identifies a rank method, is used when running the bibrank daemon and used to name the configuration file for the method.
     <br />The template files includes the necessary parameters for the chosen rank method, and only needs to be edited with the correct tags and paths.
     <br />For more information, please go to the <a title="See guide" href="%s/help/admin/bibrank-admin-guide">BibRank guide</a> and read the section about adding a rank method</dd>
    </dl>
    """ % CFG_SITE_URL
    text = """
    <span class="adminlabel">BibRank code</span>
    <input class="admin_wvar" type="text" name="rnkcode" value="%s" />
    """ % (rnkcode)

    text += """<br />
    <span class="adminlabel">Cfg template</span>
    <select name="template" class="admin_w200">
    <option value="">No template</option>
    """

    templates = get_templates()
    for templ in templates:
        text += """<option value="%s" %s>%s</option>""" % (templ, template == templ and 'selected="selected"' or '', templ[9:len(templ)-4])
    text += """</select>"""

    output += createhiddenform(action="addrankarea",
                               text=text,
                               button="Add rank method",
                               ln=ln,
                               confirm=1)

    if rnkcode:
        if confirm in ["0", 0]:
            subtitle = 'Step 2 - Confirm addition of rank method'
            text = """<b>Add rank method with BibRank code: '%s'.</b>""" % (rnkcode)
            if template:
                text += """<br /><b>Using configuration template: '%s'.</b>""" % (template)
            else:
                text += """<br /><b>Create empty configuration file.</b>"""
            output += createhiddenform(action="addrankarea",
                                       text=text,
                                       rnkcode=rnkcode,
                                       button="Confirm",
                                       template=template,
                                       confirm=1)

        elif confirm in ["1", 1]:
            rnkID = add_rnk(rnkcode)
            subtitle = "Step 3 - Result"
            if rnkID[0] == 1:
                rnkID = rnkID[1]
                text = """<b><span class="info">Added new rank method with BibRank code '%s'</span></b>""" % rnkcode
                try:
                    if template:
                        infile =  open(configuration.get(template, ''), 'r')
                        indata = infile.readlines()
                        infile.close()
                    else:
                        indata = ()
                    file =  open(configuration.get(get_rnk_code(rnkID)[0][0] + '.cfg', ''), 'w')
                    for line in indata:
                        file.write(line)
                    file.close()
                    if template:
                        text += """<b><span class="info"><br />Configuration file created using '%s' as template.</span></b>""" % template
                    else:
                        text += """<b><span class="info"><br />Empty configuration file created.</span></b>"""
                except StandardError as e:
                    text += """<b><span class="info"><br />Sorry, could not create configuration file: '%s.cfg', either because it already exists, or not enough rights to create file. <br />Please create the file in the path given.</span></b>""" % (configuration.get(get_rnk_code(rnkID)[0][0] + '.cfg', ''), )
            else:
                text = """<b><span class="info">Sorry, could not add rank method, rank method with the same BibRank code probably exists.</span></b>"""
            output += text
    elif not rnkcode and confirm not in [-1, "-1"]:
        output += """<b><span class="info">Sorry, could not add rank method, not enough data submitted.</span></b>"""

    body = [output]

    return addadminbox(subtitle + """&nbsp;&nbsp;&nbsp;<small>[<a title="See guide" href="%s/help/admin/bibrank-admin-guide#ar">?</a>]</small>""" % CFG_SITE_URL, body)
Ejemplo n.º 23
0
def perform_modifyrank(rnkID,
                       rnkcode='',
                       ln=CFG_SITE_LANG,
                       template='',
                       cfgfile='',
                       confirm=0):
    """form to modify a rank method

    rnkID - id of the rank method
    """

    if not rnkID:
        return "No ranking method selected."
    if not get_rnk_code(rnkID):
        return "Ranking method %s does not seem to exist." % str(rnkID)

    subtitle = 'Step 1 - Please modify the wanted values below'
    if not rnkcode:
        oldcode = get_rnk_code(rnkID)[0]
    else:
        oldcode = rnkcode

    output = """
    <dl>
     <dd>When changing the BibRank code of a rank method, you must also change any scheduled tasks using the old value.
     <br />For more information, please go to the <a title="See guide" href="%s/help/admin/bibrank-admin-guide">BibRank guide</a> and read the section about modifying a rank method's  BibRank code.</dd>
    </dl>
    """ % CFG_SITE_URL

    text = """
     <span class="adminlabel">BibRank code</span>
     <input class="admin_wvar" type="text" name="rnkcode" value="%s" />
     <br />
    """ % (oldcode)

    try:
        text += """<span class="adminlabel">Cfg file</span>"""
        textarea = ""
        if cfgfile:
            textarea += cfgfile
        else:
            file = open(
                configuration.get(get_rnk_code(rnkID)[0][0] + '.cfg', ''))
            for line in file.readlines():
                textarea += line
        text += """<textarea class="admin_wvar" name="cfgfile" rows="15" cols="70">""" + \
            textarea + """</textarea>"""
    except StandardError as e:
        text += """<b><span class="info">Cannot load file, either it does not exist, or not enough rights to read it: '%s.cfg'<br />Please create the file in the path given.</span></b>""" % (
            configuration.get(get_rnk_code(rnkID)[0][0] + '.cfg', ''), )

    output += createhiddenform(action="modifyrank",
                               text=text,
                               rnkID=rnkID,
                               button="Modify",
                               confirm=1)

    if rnkcode and confirm in ["1", 1
                               ] and get_rnk_code(rnkID)[0][0] != rnkcode:
        oldcode = get_rnk_code(rnkID)[0][0]
        result = modify_rnk(rnkID, rnkcode)
        subtitle = "Step 3 - Result"
        if result:
            text = """<b><span class="info">Rank method modified.</span></b>"""
            try:
                file = open(configuration.get(oldcode + '.cfg', ''), 'r')
                file2 = open(configuration.get(rnkcode + '.cfg', ''), 'w')
                lines = file.readlines()
                for line in lines:
                    file2.write(line)
                file.close()
                file2.close()
                os.remove(configuration.get(oldcode + '.cfg', ''))
            except StandardError as e:
                text = """<b><span class="info">Sorry, could not change name of cfg file, must be done manually: '%s.cfg'</span></b>""" % (
                    configuration.get(oldcode + '.cfg', ''), )
        else:
            text = """<b><span class="info">Sorry, could not modify rank method.</span></b>"""
        output += text

    if cfgfile and confirm in ["1", 1]:
        try:
            file = open(
                configuration.get(get_rnk_code(rnkID)[0][0] + '.cfg', ''), 'w')
            file.write(cfgfile)
            file.close()
            text = """<b><span class="info"><br />Configuration file modified: '%s/bibrank/%s.cfg'</span></b>""" % (
                configuration.get(get_rnk_code(rnkID)[0][0] + '.cfg', ''), )
        except StandardError as e:
            text = """<b><span class="info"><br />Sorry, could not modify configuration file, please check for rights to do so: '%s.cfg'<br />Please modify the file manually.</span></b>""" % (
                configuration.get(get_rnk_code(rnkID)[0][0] + '.cfg', ''), )
        output += text

    finoutput = addadminbox(
        subtitle +
        """&nbsp;&nbsp;&nbsp;<small>[<a title="See guide" href="%s/help/admin/bibrank-admin-guide#mr">?</a>]</small>"""
        % CFG_SITE_URL, [output])
    output = ""

    text = """
    <span class="adminlabel">Select</span>
    <select name="template" class="admin_w200">
    <option value="">- select template -</option>
    """
    templates = get_templates()
    for templ in templates:
        text += """<option value="%s" %s>%s</option>""" % (
            templ, template == templ and 'selected="selected"'
            or '', templ[9:len(templ) - 4])
    text += """</select><br />"""

    output += createhiddenform(action="modifyrank",
                               text=text,
                               rnkID=rnkID,
                               button="Show template",
                               confirm=0)

    try:
        if template:
            textarea = ""
            text = """<span class="adminlabel">Content:</span>"""
            file = open(configuration.get(template, ''), 'r')
            lines = file.readlines()
            for line in lines:
                textarea += line
            file.close()
            text += """<textarea class="admin_wvar" readonly="true" rows="15" cols="70">""" + \
                textarea + """</textarea>"""
            output += text
    except StandardError as e:
        output += """Cannot load file, either it does not exist, or not enough rights to read it: '%s'""" % (
            configuration.get(template, ''), )

    finoutput += addadminbox("View templates", [output])
    return finoutput
Ejemplo n.º 24
0
def perform_showrankdetails(rnkID, ln=CFG_SITE_LANG):
    """Returns details about the rank method given by rnkID"""

    if not rnkID:
        return "No ranking method selected."
    if not get_rnk_code(rnkID):
        return "Ranking method %s does not seem to exist." % str(rnkID)

    subtitle = """Overview <a href="%s/admin/bibrank/bibrankadmin.py/modifyrank?rnkID=%s&amp;ln=%s">[Modify]</a>""" % (CFG_SITE_URL, rnkID, ln)
    text  = """
    BibRank code: %s<br />
    Last updated by BibRank:
    """ % (get_rnk_code(rnkID)[0][0])
    if get_rnk(rnkID)[0][2]:
        text += "%s<br />" % get_rnk(rnkID)[0][2]
    else:
        text += "Not yet run.<br />"
    output = addadminbox(subtitle, [text])

    subtitle = """Rank method statistics"""
    text = ""
    try:
        text = "Not yet implemented"
    except StandardError as e:
        text = "BibRank not yet run, cannot show statistics for method"
    output += addadminbox(subtitle, [text])

    subtitle = """Attached to collections <a href="%s/admin/bibrank/bibrankadmin.py/modifycollection?rnkID=%s&amp;ln=%s">[Modify]</a>""" % (CFG_SITE_URL, rnkID, ln)
    text = ""
    col = get_rnk_col(rnkID, ln)
    for key,  value in col:
        text+= "%s<br />" % value
    if not col:
        text +="No collections"
    output += addadminbox(subtitle, [text])

    subtitle = """Translations <a href="%s/admin/bibrank/bibrankadmin.py/modifytranslations?rnkID=%s&amp;ln=%s">[Modify]</a>""" % (CFG_SITE_URL, rnkID, ln)
    prev_lang = ''
    trans = get_translations(rnkID)
    types = get_rnk_nametypes()
    types = dict(map(lambda x: (x[0], x[1]), types))
    text = ""
    languages = dict(get_languages())
    if trans:
        for lang, type, name in trans:
            if lang and lang in languages and type and name:
                if prev_lang != lang:
                    prev_lang = lang
                    text += """%s: <br />""" % (languages[lang])
                if type in types:
                    text+= """<span style="margin-left: 10px">'%s'</span><span class="note">(%s)</span><br />""" % (name, types[type])
    else:
        text = """No translations exists"""
    output += addadminbox(subtitle, [text])

    subtitle = """Configuration file: '%s/bibrank/%s.cfg' <a href="%s/admin/bibrank/bibrankadmin.py/modifyrank?rnkID=%s&amp;ln=%s">[Modify]</a>""" % (CFG_ETCDIR, get_rnk_code(rnkID)[0][0], CFG_SITE_URL, rnkID, ln)
    text = ""
    try:
        file = open(configuration.get(get_rnk_code(rnkID)[0][0] + ".cfg", ''))
        text += """<pre>"""
        for line in file.readlines():
            text += line
        text += """</pre>"""
    except StandardError as e:
        text = """Cannot load file, either it does not exist, or not enough rights to read it."""
    output += addadminbox(subtitle, [text])

    return output
Ejemplo n.º 25
0
def citerank(rank_method_code):
    """new ranking method based on the citation graph"""
    write_message("Running rank method: %s" % rank_method_code, verbose=0)
    if not import_numpy:
        write_message('The numpy package could not be imported. \
This package is compulsory for running the citerank methods.')
        return
    try:
        file_ = configuration.get(rank_method_code + '.cfg', '')
        config = ConfigParser.ConfigParser()
        config.readfp(open(file_))
    except StandardError:
        write_message("Cannot find configuration file: %s" % file_, sys.stderr)
        raise StandardError
    # the file for citations needs to have the following format:
    #each line needs to be x[tab]y, where x cites y; x,y are recids
    function = config.get("rank_method", "function")
    try:
        file_for_citations = config.get(function, "file_with_citations")
        cit, dict_of_ids = get_citations_from_file(file_for_citations)
    except (ConfigParser.NoOptionError, StandardError) as err:
        write_message("If you want to read the citation data from file set up \
the file_for_citations parameter in the config file [%s]" % err,
                      verbose=2)
        cit, dict_of_ids = get_citations_from_db()
    len_ = len(dict_of_ids.keys())
    write_message("Number of nodes(papers) to rank : %s" % str(len_),
                  verbose=3)
    if len_ == 0:
        write_message("No citation data found, nothing to be done.")
        return
    try:
        method = config.get(function, "citerank_method")
    except ConfigParser.NoOptionError as err:
        write_message("Exception: %s " % err, sys.stderr)
        raise Exception
    write_message("Running %s method." % method, verbose=2)
    dates = get_dates(function, config, dict_of_ids)
    if method == "citation_time":
        try:
            time_decay = float(config.get(function, "time_decay"))
        except (ConfigParser.NoOptionError, ValueError) as err:
            write_message("Exception: %s" % err, sys.stderr)
            raise Exception
        date_coef = calculate_time_weights(len_, time_decay, dates)
        #cit = remove_loops(cit, dates, dict_of_ids)
        dict_of_ranks = \
            run_citation_rank_time(cit, dict_of_ids, date_coef, dates)
    else:
        try:
            conv_threshold = float(config.get(function, "conv_threshold"))
            check_point = int(config.get(function, "check_point"))
            damping_factor = float(config.get(function, "damping_factor"))
            write_message("Parameters: d = %s, conv_threshold = %s, \
check_point = %s"                  %(str(damping_factor), \
str(conv_threshold), str(check_point)), verbose=5)
        except (ConfigParser.NoOptionError, StandardError) as err:
            write_message("Exception: %s" % err, sys.stderr)
            raise Exception
        if method == "pagerank_classic":
            ref = construct_ref_array(cit, dict_of_ids, len_)
            use_ext_cit = ""
            try:
                use_ext_cit = config.get(function, "use_external_citations")
                write_message("Pagerank will use external citations: %s" \
                   %str(use_ext_cit), verbose=5)
            except (ConfigParser.NoOptionError, StandardError) as err:
                write_message("%s" % err, verbose=2)
            if use_ext_cit == "yes":
                try:
                    ext_citation_file = config.get(function,
                                                   "ext_citation_file")
                    ext_links = get_external_links_from_file(
                        ext_citation_file, ref, dict_of_ids)
                except (ConfigParser.NoOptionError, StandardError):
                    write_message("If you want to read the external citation \
data from file set up the ext_citation_file parameter in the config. file"                                                                          , \
verbose=3)
                    try:
                        reference_tag = config.get(function,
                                                   "ext_reference_tag")
                        dummy = int(reference_tag[0:3])
                    except (ConfigParser.NoOptionError, StandardError):
                        write_message(
                            "You need to set up correctly the \
reference_tag in the cfg file", sys.stderr)
                        raise Exception
                    ext_links = get_external_links_from_db(ref, \
                            dict_of_ids, reference_tag)
                    avg = avg_ext_links_with_0(ext_links)
                    if avg < 1:
                        write_message("This method can't be ran. There is not \
enough information about the external citation. Hint: check the reference tag"                                                                              , \
sys.stderr)
                        raise Exception
                    avg_ext_links_without_0(ext_links)
                try:
                    alpha = float(config.get(function, "ext_alpha"))
                    beta = float(config.get(function, "ext_beta"))
                except (ConfigParser.NoOptionError, StandardError) as err:
                    write_message("Exception: %s" % err, sys.stderr)
                    raise Exception
                dict_of_ranks = run_pagerank_ext(cit, dict_of_ids, ref, \
                ext_links, conv_threshold, check_point, alpha, beta, dates)
            else:
                dict_of_ranks = run_pagerank(cit, dict_of_ids, len_, ref, \
                    damping_factor, conv_threshold, check_point, dates)
        elif method == "pagerank_time":
            try:
                time_decay = float(config.get(function, "time_decay"))
                write_message("Parameter: time_decay = %s" \
                              %str(time_decay), verbose=5)
            except (ConfigParser.NoOptionError, StandardError) as err:
                write_message("Exception: %s" % err, sys.stderr)
                raise Exception
            date_coef = calculate_time_weights(len_, time_decay, dates)
            cit = remove_loops(cit, dates, dict_of_ids)
            ref = construct_ref_array(cit, dict_of_ids, len_)
            dict_of_ranks = run_pagerank_time(cit, dict_of_ids, len_, ref, \
             damping_factor, conv_threshold, check_point, date_coef, dates)
        else:
            write_message(
                "Error: Unknown ranking method. \
Please check the ranking_method parameter in the config. file.", sys.stderr)
            raise Exception
    try:
        filename_ranks = config.get(function, "output_ranks_to_filename")
        max_ranks = config.get(function, "output_rank_limit")
        if not max_ranks.isdigit():
            max_ranks = len_
        else:
            max_ranks = int(max_ranks)
            if max_ranks > len_:
                max_ranks = len_
        ranks = sort_weights(dict_of_ranks)
        write_message("Ranks: %s" % str(ranks), verbose=9)
        write_first_ranks_to_file(ranks, dict_of_ranks, \
                max_ranks, filename_ranks)
    except (ConfigParser.NoOptionError, StandardError):
        write_message("If you want the ranks to be printed in a file you have \
to set output_ranks_to_filename and output_rank_limit \
parameters in the configuration file",
                      verbose=3)
    normalize_weights(dict_of_ranks)
    into_db(dict_of_ranks, rank_method_code)