def dict2db(table_name, dict_data, mode):
    """
    Load the dict values into the database
    Three modes of operation:
    i - insert
    r - replace
    c - correct
    """
    #Escape all the content in dict data to avoid " and '
    for data in dict_data:
        dict_data[data] = re.escape(dict_data[data])

    if mode == 'i': #Insert mode
        query_fields = " , " .join(dict_data.keys())
        query_values = "' , '" .join(dict_data.values())
        query = "INSERT IGNORE INTO %s(%s) VALUES ('%s')" % (wash_table_column_name(table_name),
                                                            query_fields,
                                                            query_values)
    elif mode == 'c': #Correct mode
        if '_' in table_name:
            query = "SELECT * FROM %s" % table_name#FIXIT Trick to execute something instead of giving error
        else:
            tbl_id = get_primary_keys(table_name)[0]
            del dict_data[tbl_id]
            query_update = " , " .join(["%s=\'%s\'" % (field, dict_data[field]) for field in dict_data])
            query = "UPDATE %s SET %s" % (wash_table_column_name(table_name),
                                         query_update)
    else: #Try in the default mode
        dict2db(table_name, dict_data, LOAD_DEFAULT_MODE)
    try:
        run_sql(query)
    except:
        print "VALUES: %s ALREADY EXIST IN TABLE %s. SKIPPING" % (query_values, table_name)
        pass
Example #2
0
def get_customevent_trend(args):
    """
    Returns trend data for a custom event over a give
    timestamp range.

    @param args['id']: The event id
    @type args['id']: str

    @param args['t_start']: Date and time of start point
    @type args['t_start']: str

    @param args['t_end']: Date and time of end point
    @type args['t_end']: str

    @param args['granularity']: Granularity of date and time
    @type args['granularity']: str

    @param args['t_format']: Date and time formatting string
    @type args['t_format']: str

    @param args['cols']: Columns and it's content that will be include
                         if don't exist or it's empty it will include all cols
    @type args['cols']: [ [ str, str ], ]
    """
    # Get a MySQL friendly date
    lower = _to_datetime(args['t_start'], args['t_format']).isoformat()
    upper = _to_datetime(args['t_end'], args['t_format']).isoformat()
    tbl_name = get_customevent_table(args['id'])
    col_names = get_customevent_args(args['id'])

    sql_query = [
        "SELECT creation_time FROM %s WHERE creation_time > '%s'" %
        (tbl_name, lower)
    ]
    sql_query.append("AND creation_time < '%s'" % upper)
    sql_param = []
    for col_bool, col_title, col_content in args['cols']:
        if not col_title in col_names:
            continue
        if col_content:
            if col_bool == "and" or col_bool == "":
                sql_query.append("AND %s" % wash_table_column_name(col_title))
            elif col_bool == "or":
                sql_query.append("OR %s" % wash_table_column_name(col_title))
            elif col_bool == "and_not":
                sql_query.append("AND NOT %s" %
                                 wash_table_column_name(col_title))
            else:
                continue
            sql_query.append(" LIKE %s")
            sql_param.append("%" + col_content + "%")
    sql_query.append("ORDER BY creation_time DESC")
    sql = ' '.join(sql_query)

    dates = [x[0] for x in run_sql(sql, tuple(sql_param))]
    return _get_trend_from_actions(dates, 0, args['t_start'], args['t_end'],
                                   args['granularity'], args['t_format'])
Example #3
0
def get_customevent_trend(args):
    """
    Returns trend data for a custom event over a give
    timestamp range.

    @param args['id']: The event id
    @type args['id']: str

    @param args['t_start']: Date and time of start point
    @type args['t_start']: str

    @param args['t_end']: Date and time of end point
    @type args['t_end']: str

    @param args['granularity']: Granularity of date and time
    @type args['granularity']: str

    @param args['t_format']: Date and time formatting string
    @type args['t_format']: str

    @param args['cols']: Columns and it's content that will be include
                         if don't exist or it's empty it will include all cols
    @type args['cols']: [ [ str, str ], ]
    """
    # Get a MySQL friendly date
    lower = _to_datetime(args["t_start"], args["t_format"]).isoformat()
    upper = _to_datetime(args["t_end"], args["t_format"]).isoformat()
    tbl_name = get_customevent_table(args["id"])
    col_names = get_customevent_args(args["id"])

    sql_query = ["SELECT creation_time FROM %s WHERE creation_time > '%s'" % (tbl_name, lower)]
    sql_query.append("AND creation_time < '%s'" % upper)
    sql_param = []
    for col_bool, col_title, col_content in args["cols"]:
        if not col_title in col_names:
            continue
        if col_content:
            if col_bool == "and" or col_bool == "":
                sql_query.append("AND %s" % wash_table_column_name(col_title))
            elif col_bool == "or":
                sql_query.append("OR %s" % wash_table_column_name(col_title))
            elif col_bool == "and_not":
                sql_query.append("AND NOT %s" % wash_table_column_name(col_title))
            else:
                continue
            sql_query.append(" LIKE %s")
            sql_param.append("%" + col_content + "%")
    sql_query.append("ORDER BY creation_time DESC")
    sql = " ".join(sql_query)

    dates = [x[0] for x in run_sql(sql, tuple(sql_param))]
    return _get_trend_from_actions(dates, 0, args["t_start"], args["t_end"], args["granularity"], args["t_format"])
Example #4
0
def get_field(recid, field):
    """
    Gets list of field 'field' for the record with 'recid' system number.
    """

    digit = field[0:2]

    bibbx = "bib%sx" % digit
    bibx = "bibrec_bib%sx" % digit
    query = "SELECT bx.value FROM %s AS bx, %s AS bibx WHERE bibx.id_bibrec=%%s AND bx.id=bibx.id_bibxxx AND bx.tag=%%s" % (
        wash_table_column_name(bibbx), wash_table_column_name(bibx))

    return [row[0] for row in run_sql(query, (recid, field))]
Example #5
0
def modify_translations(ID, langs, sel_type, trans, table, id_column=None):
    """add or modify translations in tables given by table
    frmID - the id of the format from the format table
    sel_type - the name type
    langs - the languages
    trans - the translations, in same order as in langs
    table - the table
    id_column - name of the column with identifier. If None, expect column to be named 'id_%s' % table
    """

    name = "name"
    if table[-1:].isupper():
        name = "NAME"

    id_column = id_column or 'id_%s' % table
    if id_column:
        id_column = wash_table_column_name(id_column)
    try:
        for nr in range(0,len(langs)):
            res = run_sql("SELECT value FROM %s%s WHERE %s=%%s AND type=%%s AND ln=%%s" % (table, name, id_column),
                          (ID, sel_type, langs[nr][0]))
            if res:
                if trans[nr]:
                    res = run_sql("UPDATE %s%s SET value=%%s WHERE %s=%%s AND type=%%s AND ln=%%s" % (table, name, id_column),
                                  (trans[nr], ID, sel_type, langs[nr][0]))
                else:
                    res = run_sql("DELETE FROM %s%s WHERE %s=%%s AND type=%%s AND ln=%%s" % (table, name, id_column),
                                  (ID, sel_type, langs[nr][0]))
            else:
                if trans[nr]:
                    res = run_sql("INSERT INTO %s%s (%s, type, ln, value) VALUES (%%s,%%s,%%s,%%s)" % (table, name, id_column),
                                  (ID, sel_type, langs[nr][0], trans[nr]))
        return (1, "")
    except StandardError, e:
        return (0, e)
Example #6
0
def dump_collection(collection, config, force_ids, print_to_screen=False):
    """
    Dump the current collection
    Note: there are a special notation, ori(origin) - rel(relation) - fin(final)
    For example in the relation field-field_tag-tag:
    ori(origin): field table
    rel(relation): field_tag
    fin(final): tag
    """
    tbl_ori, tbl_rel, tbl_fin = collection['relations'].split("-")
    query = "SELECT * FROM %s" % (wash_table_column_name(tbl_ori))
    lst_ori = query2list(query, tbl_ori)
    tbl_ori_id = get_primary_keys(tbl_ori)[0]
    for index_ori, result_ori in enumerate(lst_ori):
        dict_rels = get_relationship(collection, tbl_ori, tbl_ori_id)
        query = "SELECT * FROM %s WHERE %s=%s" % (
            wash_table_column_name(tbl_rel),
            dict_rels[tbl_ori + "." + tbl_ori_id], result_ori[tbl_ori_id])
        if collection['tables'][tbl_ori].startswith('extend'):
            add_special_field(collection, tbl_ori, result_ori)
        lst_rel = query2list(query, tbl_rel)
        for result_rel in lst_rel:
            tbl_fin_id = get_primary_keys(tbl_fin)[0]
            tbl_rel_id = dict_rels[tbl_fin + "." +
                                   tbl_fin_id].split(".")[1].strip()
            query = "SELECT * FROM %s WHERE %s=%s" % (wash_table_column_name(
                tbl_fin), tbl_fin_id, result_rel[tbl_rel_id])
            lst_fin = query2list(query, tbl_fin)
            for index_fin, result_fin in enumerate(lst_fin):
                result_ori[tbl_fin + "." + create_section_id(
                    index_fin, with_date=False)] = result_fin

        section_name = tbl_ori + "." + create_section_id(index_ori)
        if force_ids == False:  #Remove the ids from the dict
            results = delete_ids(result_ori,
                                 collection['relations'].split("-"))
            config[section_name] = results
        else:
            config[section_name] = result_ori

        if print_to_screen == True:
            output = StringIO.StringIO()
            config.write(
                output)  #Write to the output string instead of the file
            print output.getvalue()
        else:
            config.write()
def create_rnkmethod_cache():
    """Create cache with vital information for each rank method."""

    bibrank_meths = run_sql("SELECT name from rnkMETHOD")

    for (rank_method_code,) in bibrank_meths:
        filepath = CFG_ETCDIR + "/bibrank/" + rank_method_code + ".cfg"
        config = ConfigParser.ConfigParser()
        try:
            config.readfp(open(filepath))
        except IOError:
            pass

        cfg_function = config.get("rank_method", "function")
        if config.has_section(cfg_function):
            METHODS[rank_method_code] = {}
            METHODS[rank_method_code]["function"] = cfg_function
            METHODS[rank_method_code]["prefix"] = config.get(cfg_function, "relevance_number_output_prologue")
            METHODS[rank_method_code]["postfix"] = config.get(cfg_function, "relevance_number_output_epilogue")
            METHODS[rank_method_code]["chars_alphanumericseparators"] = r"[1234567890\!\"\#\$\%\&\'\(\)\*\+\,\-\.\/\:\;\<\=\>\?\@\[\\\]\^\_\`\{\|\}\~]"
        else:
            raise Exception("Error in configuration file: %s" % (CFG_ETCDIR + "/bibrank/" + rank_method_code + ".cfg"))

        i8n_names = run_sql("""SELECT ln,value from rnkMETHODNAME,rnkMETHOD where id_rnkMETHOD=rnkMETHOD.id and rnkMETHOD.name=%s""", (rank_method_code,))
        for (ln, value) in i8n_names:
            METHODS[rank_method_code][ln] = value

        if config.has_option(cfg_function, "table"):
            METHODS[rank_method_code]["rnkWORD_table"] = config.get(cfg_function, "table")
            query = "SELECT count(*) FROM %sR" % wash_table_column_name(METHODS[rank_method_code]["rnkWORD_table"][:-1])
            METHODS[rank_method_code]["col_size"] = run_sql(query)[0][0]

        if config.has_option(cfg_function, "stemming") and config.get(cfg_function, "stemming"):
            try:
                METHODS[rank_method_code]["stemmer"] = config.get(cfg_function, "stemming")
            except KeyError:
                pass

        if config.has_option(cfg_function, "stopword"):
            METHODS[rank_method_code]["stopwords"] = config.get(cfg_function, "stopword")

        if config.has_section("find_similar"):
            METHODS[rank_method_code]["max_word_occurence"] = float(config.get("find_similar", "max_word_occurence"))
            METHODS[rank_method_code]["min_word_occurence"] = float(config.get("find_similar", "min_word_occurence"))
            METHODS[rank_method_code]["min_word_length"] = int(config.get("find_similar", "min_word_length"))
            METHODS[rank_method_code]["min_nr_words_docs"] = int(config.get("find_similar", "min_nr_words_docs"))
            METHODS[rank_method_code]["max_nr_words_upper"] = int(config.get("find_similar", "max_nr_words_upper"))
            METHODS[rank_method_code]["max_nr_words_lower"] = int(config.get("find_similar", "max_nr_words_lower"))
            METHODS[rank_method_code]["default_min_relevance"] = int(config.get("find_similar", "default_min_relevance"))

        if cfg_function in ('word_similarity_solr', 'word_similarity_xapian'):
            create_external_ranking_settings(rank_method_code, config)

        if config.has_section("combine_method"):
            i = 1
            METHODS[rank_method_code]["combine_method"] = []
            while config.has_option("combine_method", "method%s" % i):
                METHODS[rank_method_code]["combine_method"].append(config.get("combine_method", "method%s" % i).split(","))
                i += 1
 def test_wash_table_column_name(self):
     """dbquery - wash table column name"""
     testcase_error = "foo ; bar"
     testcase_ok = "foo_bar"
     self.assertRaises(Exception, dbquery.wash_table_column_name,
                       testcase_error)
     self.assertEqual(testcase_ok,
                      dbquery.wash_table_column_name(testcase_ok))
def dump_collection(collection, config, force_ids, print_to_screen=False):
    """
    Dump the current collection
    Note: there are a special notation, ori(origin) - rel(relation) - fin(final)
    For example in the relation field-field_tag-tag:
    ori(origin): field table
    rel(relation): field_tag
    fin(final): tag
    """
    tbl_ori, tbl_rel, tbl_fin = collection['relations'].split("-")
    query = "SELECT * FROM %s" % (wash_table_column_name(tbl_ori))
    lst_ori = query2list(query, tbl_ori)
    tbl_ori_id = get_primary_keys(tbl_ori)[0]
    for index_ori, result_ori in enumerate(lst_ori):
        dict_rels = get_relationship(collection, tbl_ori, tbl_ori_id)
        query = "SELECT * FROM %s WHERE %s=%s" % (wash_table_column_name(tbl_rel),
                                                 dict_rels[tbl_ori+"."+tbl_ori_id],
                                                 result_ori[tbl_ori_id])
        if collection['tables'][tbl_ori].startswith('extend'):
            add_special_field(collection, tbl_ori, result_ori)
        lst_rel = query2list(query, tbl_rel)
        for result_rel in lst_rel:
            tbl_fin_id = get_primary_keys(tbl_fin)[0]
            tbl_rel_id = dict_rels[tbl_fin+"."+tbl_fin_id].split(".")[1].strip()
            query = "SELECT * FROM %s WHERE %s=%s" % (wash_table_column_name(tbl_fin),
                                                     tbl_fin_id, result_rel[tbl_rel_id])
            lst_fin = query2list(query, tbl_fin)
            for index_fin, result_fin in enumerate(lst_fin):
                result_ori[tbl_fin+"."+create_section_id(index_fin, with_date=False)] = result_fin

        section_name = tbl_ori + "." + create_section_id(index_ori)
        if force_ids == False:#Remove the ids from the dict
            results = delete_ids(result_ori, collection['relations'].split("-"))
            config[section_name] = results
        else:
            config[section_name] = result_ori

        if print_to_screen == True:
            output = StringIO.StringIO()
            config.write(output)#Write to the output string instead of the file
            print output.getvalue()
        else:
            config.write()
def get_primary_keys(table_name):
    """
    Get the primary keys from the table with the DESC mysql function
    """
    lst_keys = []
    query = "DESC %s" % wash_table_column_name(table_name)
    results = run_sql(query)
    for field in results:
        if field[3] == 'PRI':
            lst_keys.append(field[0])
    return lst_keys
Example #11
0
def check_tables():
    """
    Check all DB tables.  Useful to run from time to time when the
    site is idle, say once a month during a weekend night.

    FIXME: should produce useful output about outcome.
    """
    res = run_sql("SHOW TABLES")
    for row in res:
        table_name = row[0]
        write_message("checking table %s" % table_name)
        run_sql("CHECK TABLE %s" % wash_table_column_name(table_name)) # kwalitee: disable=sql
Example #12
0
def check_tables():
    """
    Check all DB tables.  Useful to run from time to time when the
    site is idle, say once a month during a weekend night.

    FIXME: should produce useful output about outcome.
    """
    res = run_sql("SHOW TABLES")
    for row in res:
        table_name = row[0]
        write_message("checking table %s" % table_name)
        run_sql("CHECK TABLE %s" % wash_table_column_name(table_name)) # kwalitee: disable=sql
Example #13
0
def create_rnkmethod_cache():
    """Create cache with vital information for each rank method."""

    global methods
    bibrank_meths = run_sql("SELECT name from rnkMETHOD")
    methods = {}
    global voutput
    voutput = ""

    for (rank_method_code, ) in bibrank_meths:
        try:
            file = CFG_ETCDIR + "/bibrank/" + rank_method_code + ".cfg"
            config = ConfigParser.ConfigParser()
            config.readfp(open(file))
        except StandardError, e:
            pass

        cfg_function = config.get("rank_method", "function")
        if config.has_section(cfg_function):
            methods[rank_method_code] = {}
            methods[rank_method_code]["function"] = cfg_function
            methods[rank_method_code]["prefix"] = config.get(
                cfg_function, "relevance_number_output_prologue")
            methods[rank_method_code]["postfix"] = config.get(
                cfg_function, "relevance_number_output_epilogue")
            methods[rank_method_code][
                "chars_alphanumericseparators"] = r"[1234567890\!\"\#\$\%\&\'\(\)\*\+\,\-\.\/\:\;\<\=\>\?\@\[\\\]\^\_\`\{\|\}\~]"
        else:
            raise Exception(
                "Error in configuration file: %s" %
                (CFG_ETCDIR + "/bibrank/" + rank_method_code + ".cfg"))

        i8n_names = run_sql(
            """SELECT ln,value from rnkMETHODNAME,rnkMETHOD where id_rnkMETHOD=rnkMETHOD.id and rnkMETHOD.name=%s""",
            (rank_method_code, ))
        for (ln, value) in i8n_names:
            methods[rank_method_code][ln] = value

        if config.has_option(cfg_function, "table"):
            methods[rank_method_code]["rnkWORD_table"] = config.get(
                cfg_function, "table")
            query = "SELECT count(*) FROM %sR" % wash_table_column_name(
                methods[rank_method_code]["rnkWORD_table"][:-1])
            methods[rank_method_code]["col_size"] = run_sql(query)[0][0]

        if config.has_option(cfg_function, "stemming") and config.get(
                cfg_function, "stemming"):
            try:
                methods[rank_method_code]["stemmer"] = config.get(
                    cfg_function, "stemming")
            except Exception, e:
                pass
Example #14
0
def optimise_tables():
    """
    Optimise all DB tables to defragment them in order to increase DB
    performance.  Useful to run from time to time when the site is
    idle, say once a month during a weekend night.

    FIXME: should produce useful output about outcome.
    """
    res = run_sql("SHOW TABLES")
    for row in res:
        table_name = row[0]
        write_message("optimising table %s" % table_name)
        run_sql("OPTIMIZE TABLE %s" % wash_table_column_name(table_name)) # kwalitee: disable=sql
Example #15
0
def optimise_tables():
    """
    Optimise all DB tables to defragment them in order to increase DB
    performance.  Useful to run from time to time when the site is
    idle, say once a month during a weekend night.

    FIXME: should produce useful output about outcome.
    """
    res = run_sql("SHOW TABLES")
    for row in res:
        table_name = row[0]
        write_message("optimising table %s" % table_name)
        run_sql("OPTIMIZE TABLE %s" % wash_table_column_name(table_name)) # kwalitee: disable=sql
Example #16
0
def get_name(ID, ln, rtype, table, id_column=None):
    """Returns the value from the table name based on arguments
    ID - id
    ln - a language supported by Invenio
    type - the type of value wanted, like 'ln', 'sn'
    table - tablename
    id_column - name of the column with identifier. If None, expect column to be named 'id_%s' % table
    """

    name = "name"
    if table[-1:].isupper():
        name = "NAME"

    if id_column:
        id_column = wash_table_column_name(id_column)

    try:
        res = run_sql(
            "SELECT value FROM %s%s WHERE type='%s' and ln='%s' and %s=%s" %
            (table, name, rtype, ln,
             (id_column or 'id_%s' % wash_table_column_name(table)), ID))
        return res
    except StandardError, e:
        return ()
Example #17
0
def optimise_tables():
    """
    Optimise all DB tables to defragment them in order to increase DB
    performance.  Useful to run from time to time when the site is
    idle, say once a month during a weekend night.

    FIXME: should produce useful output about outcome.
    """
    res = run_sql("SHOW TABLES")
    for row in res:
        table_name = row[0]
        if table_name == 'bibfmt':
            # inspire production: requires ~30G of temp space and 4 hours
            continue
        elif table_name == 'aidPERSONIDPAPERS':
            write_message("optimising table %s" % table_name)
            run_sql(
                "OPTIMIZE LOCAL TABLE %s" %
                wash_table_column_name(table_name))  # kwalitee: disable=sql
        else:
            write_message("optimising table %s" % table_name)
            run_sql(
                "OPTIMIZE TABLE %s" %
                wash_table_column_name(table_name))  # kwalitee: disable=sql
Example #18
0
def modify_translations(ID, langs, sel_type, trans, table, id_column=None):
    """add or modify translations in tables given by table
    frmID - the id of the format from the format table
    sel_type - the name type
    langs - the languages
    trans - the translations, in same order as in langs
    table - the table
    id_column - name of the column with identifier. If None, expect column to be named 'id_%s' % table
    """

    name = "name"
    if table[-1:].isupper():
        name = "NAME"

    id_column = id_column or 'id_%s' % table
    if id_column:
        id_column = wash_table_column_name(id_column)
    try:
        for nr in range(0, len(langs)):
            res = run_sql(
                "SELECT value FROM %s%s WHERE %s=%%s AND type=%%s AND ln=%%s" %
                (table, name, id_column), (ID, sel_type, langs[nr][0]))
            if res:
                if trans[nr]:
                    res = run_sql(
                        "UPDATE %s%s SET value=%%s WHERE %s=%%s AND type=%%s AND ln=%%s"
                        % (table, name, id_column),
                        (trans[nr], ID, sel_type, langs[nr][0]))
                else:
                    res = run_sql(
                        "DELETE FROM %s%s WHERE %s=%%s AND type=%%s AND ln=%%s"
                        % (table, name, id_column),
                        (ID, sel_type, langs[nr][0]))
            else:
                if trans[nr]:
                    res = run_sql(
                        "INSERT INTO %s%s (%s, type, ln, value) VALUES (%%s,%%s,%%s,%%s)"
                        % (table, name, id_column),
                        (ID, sel_type, langs[nr][0], trans[nr]))
        return (1, "")
    except StandardError, e:
        return (0, e)
def calculate_index_term_count(config):
    """Calculate the weight of a record set based on number of enries of a
    tag from the record in another index...useful for authority files"""

    records = []

    if config.has_section("index_term_count"):
        index = config.get("index_term_count", "index_table_name")
        tag = config.get("index_term_count", "index_term_value_from_tag")
        # check against possible SQL injection:
        dummy = get_table_update_time(index)
        tag = wash_table_column_name(tag)
    else:
        raise Exception("Config file " + config +
                        " does not have index_term_count section")
        return ()

    task_sleep_now_if_required(can_stop_too=True)
    write_message("......Processing all records")
    query = "SELECT id_bibrec, value FROM bib%sx, bibrec_bib%sx WHERE tag=%%s AND id_bibxxx=id" % \
            (tag[0:2], tag[0:2]) # we checked that tag is safe
    records = list(run_sql(query, (tag, )))
    write_message("Number of records found with the necessary tags: %s" %
                  len(records))

    rnkset = {}
    for key, value in records:
        hits = 0
        if len(value):
            query = "SELECT hitlist from %s where term = %%s" % index  # we checked that index is a table
            row = run_sql(query, (value, ))
            if row and row[0] and row[0][0]:
                #has to be prepared for corrupted data!
                try:
                    hits = len(intbitset(row[0][0]))
                except:
                    hits = 0
        rnkset[key] = hits
    write_message("Number of records available in rank method: %s" %
                  len(rnkset))
    return rnkset
def create_rnkmethod_cache():
    """Create cache with vital information for each rank method."""

    global methods
    bibrank_meths = run_sql("SELECT name from rnkMETHOD")
    methods = {}
    global voutput
    voutput = ""

    for (rank_method_code,) in bibrank_meths:
        try:
            file = CFG_ETCDIR + "/bibrank/" + rank_method_code + ".cfg"
            config = ConfigParser.ConfigParser()
            config.readfp(open(file))
        except StandardError, e:
            pass

        cfg_function = config.get("rank_method", "function")
        if config.has_section(cfg_function):
            methods[rank_method_code] = {}
            methods[rank_method_code]["function"] = cfg_function
            methods[rank_method_code]["prefix"] = config.get(cfg_function, "relevance_number_output_prologue")
            methods[rank_method_code]["postfix"] = config.get(cfg_function, "relevance_number_output_epilogue")
            methods[rank_method_code]["chars_alphanumericseparators"] = r"[1234567890\!\"\#\$\%\&\'\(\)\*\+\,\-\.\/\:\;\<\=\>\?\@\[\\\]\^\_\`\{\|\}\~]"
        else:
            raise Exception("Error in configuration file: %s" % (CFG_ETCDIR + "/bibrank/" + rank_method_code + ".cfg"))

        i8n_names = run_sql("""SELECT ln,value from rnkMETHODNAME,rnkMETHOD where id_rnkMETHOD=rnkMETHOD.id and rnkMETHOD.name=%s""", (rank_method_code,))
        for (ln, value) in i8n_names:
            methods[rank_method_code][ln] = value

        if config.has_option(cfg_function, "table"):
            methods[rank_method_code]["rnkWORD_table"] = config.get(cfg_function, "table")
            query = "SELECT count(*) FROM %sR" % wash_table_column_name(methods[rank_method_code]["rnkWORD_table"][:-1])
            methods[rank_method_code]["col_size"] = run_sql(query)[0][0]

        if config.has_option(cfg_function, "stemming") and config.get(cfg_function, "stemming"):
            try:
                methods[rank_method_code]["stemmer"] = config.get(cfg_function, "stemming")
            except Exception,e:
                pass
Example #21
0
def get_name(ID, ln, rtype, table, id_column=None):
    """Returns the value from the table name based on arguments
    ID - id
    ln - a language supported by Invenio
    type - the type of value wanted, like 'ln', 'sn'
    table - tablename
    id_column - name of the column with identifier. If None, expect column to be named 'id_%s' % table
    """

    name = "name"
    if table[-1:].isupper():
        name = "NAME"

    if id_column:
        id_column = wash_table_column_name(id_column)

    try:
        res = run_sql("SELECT value FROM %s%s WHERE type='%s' and ln='%s' and %s=%s" % (table, name, rtype, ln, (id_column or 'id_%s' % wash_table_column_name(table)), ID))
        return res
    except StandardError, e:
        return ()
def calculate_index_term_count(config):
    """Calculate the weight of a record set based on number of enries of a
    tag from the record in another index...useful for authority files"""

    records = []

    if config.has_section("index_term_count"):
        index = config.get("index_term_count","index_table_name")
        tag = config.get("index_term_count","index_term_value_from_tag")
        # check against possible SQL injection:
        dummy = get_table_update_time(index)
        tag = wash_table_column_name(tag)
    else:
        raise Exception("Config file " + config + " does not have index_term_count section")
        return()

    task_sleep_now_if_required(can_stop_too=True)
    write_message("......Processing all records")
    query = "SELECT id_bibrec, value FROM bib%sx, bibrec_bib%sx WHERE tag=%%s AND id_bibxxx=id" % \
            (tag[0:2], tag[0:2]) # we checked that tag is safe
    records = list(run_sql(query, (tag,)))
    write_message("Number of records found with the necessary tags: %s" % len(records))


    rnkset = {}
    for key, value in records:
        hits = 0
        if len(value):
            query = "SELECT hitlist from %s where term = %%s" % index # we checked that index is a table
            row = run_sql(query, (value,))
            if row and row[0] and row[0][0]:
                #has to be prepared for corrupted data!
                try:
                    hits = len(intbitset(row[0][0]))
                except:
                    hits = 0
        rnkset[key] = hits
    write_message("Number of records available in rank method: %s" % len(rnkset))
    return rnkset
Example #23
0
def create_rnkmethod_cache():
    """Create cache with vital information for each rank method."""

    bibrank_meths = run_sql("SELECT name from rnkMETHOD")

    for (rank_method_code, ) in bibrank_meths:
        filepath = CFG_ETCDIR + "/bibrank/" + rank_method_code + ".cfg"
        config = ConfigParser.ConfigParser()
        try:
            config.readfp(open(filepath))
        except IOError:
            pass

        cfg_function = config.get("rank_method", "function")
        if config.has_section(cfg_function):
            METHODS[rank_method_code] = {}
            METHODS[rank_method_code]["function"] = cfg_function
            METHODS[rank_method_code]["prefix"] = config.get(
                cfg_function, "relevance_number_output_prologue")
            METHODS[rank_method_code]["postfix"] = config.get(
                cfg_function, "relevance_number_output_epilogue")
            METHODS[rank_method_code][
                "chars_alphanumericseparators"] = r"[1234567890\!\"\#\$\%\&\'\(\)\*\+\,\-\.\/\:\;\<\=\>\?\@\[\\\]\^\_\`\{\|\}\~]"
        else:
            raise Exception(
                "Error in configuration file: %s" %
                (CFG_ETCDIR + "/bibrank/" + rank_method_code + ".cfg"))

        i8n_names = run_sql(
            """SELECT ln,value from rnkMETHODNAME,rnkMETHOD where id_rnkMETHOD=rnkMETHOD.id and rnkMETHOD.name=%s""",
            (rank_method_code, ))
        for (ln, value) in i8n_names:
            METHODS[rank_method_code][ln] = value

        if config.has_option(cfg_function, "table"):
            METHODS[rank_method_code]["rnkWORD_table"] = config.get(
                cfg_function, "table")
            query = "SELECT count(*) FROM %sR" % wash_table_column_name(
                METHODS[rank_method_code]["rnkWORD_table"][:-1])
            METHODS[rank_method_code]["col_size"] = run_sql(query)[0][0]

        if config.has_option(cfg_function, "stemming") and config.get(
                cfg_function, "stemming"):
            try:
                METHODS[rank_method_code]["stemmer"] = config.get(
                    cfg_function, "stemming")
            except KeyError:
                pass

        if config.has_option(cfg_function, "stopword"):
            METHODS[rank_method_code]["stopwords"] = config.get(
                cfg_function, "stopword")

        if config.has_section("find_similar"):
            METHODS[rank_method_code]["max_word_occurence"] = float(
                config.get("find_similar", "max_word_occurence"))
            METHODS[rank_method_code]["min_word_occurence"] = float(
                config.get("find_similar", "min_word_occurence"))
            METHODS[rank_method_code]["min_word_length"] = int(
                config.get("find_similar", "min_word_length"))
            METHODS[rank_method_code]["min_nr_words_docs"] = int(
                config.get("find_similar", "min_nr_words_docs"))
            METHODS[rank_method_code]["max_nr_words_upper"] = int(
                config.get("find_similar", "max_nr_words_upper"))
            METHODS[rank_method_code]["max_nr_words_lower"] = int(
                config.get("find_similar", "max_nr_words_lower"))
            METHODS[rank_method_code]["default_min_relevance"] = int(
                config.get("find_similar", "default_min_relevance"))

        if cfg_function in ('word_similarity_solr', 'word_similarity_xapian'):
            create_external_ranking_settings(rank_method_code, config)

        if config.has_section("combine_method"):
            i = 1
            METHODS[rank_method_code]["combine_method"] = []
            while config.has_option("combine_method", "method%s" % i):
                METHODS[rank_method_code]["combine_method"].append(
                    config.get("combine_method", "method%s" % i).split(","))
                i += 1
Example #24
0
def get_customevent_dump(args):
    """
    Similar to a get_event_trend implemention, but NO refining aka frequency
    handling is carried out what so ever. This is just a dump. A dump!

    @param args['id']: The event id
    @type args['id']: str

    @param args['t_start']: Date and time of start point
    @type args['t_start']: str

    @param args['t_end']: Date and time of end point
    @type args['t_end']: str

    @param args['granularity']: Granularity of date and time
    @type args['granularity']: str

    @param args['t_format']: Date and time formatting string
    @type args['t_format']: str

    @param args['cols']: Columns and it's content that will be include
                         if don't exist or it's empty it will include all cols
    @type args['cols']: [ [ str, str ], ]
    """
    # Get a MySQL friendly date
    lower = _to_datetime(args['t_start'], args['t_format']).isoformat()
    upper = _to_datetime(args['t_end'], args['t_format']).isoformat()

    # Get customevents
    # events_list = [(creation_time, event, [arg1, arg2, ...]), ...]
    event_list = []
    event_cols = {}
    for id, i in [(args['ids'][i], str(i)) for i in range(len(args['ids']))]:
        # Get all the event arguments and creation times
        tbl_name = get_customevent_table(id)
        col_names = get_customevent_args(id)
        sql_query = [
            "SELECT * FROM %s WHERE creation_time > '%s'" % (tbl_name, lower)
        ]  # Note: SELECT * technique is okay here
        sql_query.append("AND creation_time < '%s'" % upper)
        sql_param = []
        for col_bool, col_title, col_content in args['cols' + i]:
            if not col_title in col_names: continue
            if col_content:
                if col_bool == "and" or col_bool == "":
                    sql_query.append("AND %s" %
                                     wash_table_column_name(col_title))
                elif col_bool == "or":
                    sql_query.append("OR %s" %
                                     wash_table_column_name(col_title))
                elif col_bool == "and_not":
                    sql_query.append("AND NOT %s" %
                                     wash_table_column_name(col_title))
                else:
                    continue
                sql_query.append(" LIKE %s")
                sql_param.append("%" + col_content + "%")
        sql_query.append("ORDER BY creation_time DESC")
        sql = ' '.join(sql_query)
        res = run_sql(sql, tuple(sql_param))

        for row in res:
            event_list.append((row[1], id, row[2:]))
        # Get the event col names
        try:
            event_cols[id] = cPickle.loads(
                run_sql("SELECT cols FROM staEVENT WHERE id = %s",
                        (id, ))[0][0])
        except TypeError:
            event_cols[id] = ["Unnamed"]
    event_list.sort()

    output = []
    for row in event_list:
        temp = [row[1], row[0].strftime('%Y-%m-%d %H:%M:%S')]

        arguments = [
            "%s: %s" % (event_cols[row[1]][i], row[2][i])
            for i in range(len(row[2]))
        ]

        temp.extend(arguments)
        output.append(tuple(temp))

    return output
Example #25
0
def get_customevent_dump(args):
    """
    Similar to a get_event_trend implemention, but NO refining aka frequency
    handling is carried out what so ever. This is just a dump. A dump!

    @param args['id']: The event id
    @type args['id']: str

    @param args['t_start']: Date and time of start point
    @type args['t_start']: str

    @param args['t_end']: Date and time of end point
    @type args['t_end']: str

    @param args['granularity']: Granularity of date and time
    @type args['granularity']: str

    @param args['t_format']: Date and time formatting string
    @type args['t_format']: str

    @param args['cols']: Columns and it's content that will be include
                         if don't exist or it's empty it will include all cols
    @type args['cols']: [ [ str, str ], ]
    """
    # Get a MySQL friendly date
    lower = _to_datetime(args["t_start"], args["t_format"]).isoformat()
    upper = _to_datetime(args["t_end"], args["t_format"]).isoformat()

    # Get customevents
    # events_list = [(creation_time, event, [arg1, arg2, ...]), ...]
    event_list = []
    event_cols = {}
    for id, i in [(args["ids"][i], str(i)) for i in range(len(args["ids"]))]:
        # Get all the event arguments and creation times
        tbl_name = get_customevent_table(id)
        col_names = get_customevent_args(id)
        sql_query = [
            "SELECT * FROM %s WHERE creation_time > '%s'" % (tbl_name, lower)
        ]  # Note: SELECT * technique is okay here
        sql_query.append("AND creation_time < '%s'" % upper)
        sql_param = []
        for col_bool, col_title, col_content in args["cols" + i]:
            if not col_title in col_names:
                continue
            if col_content:
                if col_bool == "and" or col_bool == "":
                    sql_query.append("AND %s" % wash_table_column_name(col_title))
                elif col_bool == "or":
                    sql_query.append("OR %s" % wash_table_column_name(col_title))
                elif col_bool == "and_not":
                    sql_query.append("AND NOT %s" % wash_table_column_name(col_title))
                else:
                    continue
                sql_query.append(" LIKE %s")
                sql_param.append("%" + col_content + "%")
        sql_query.append("ORDER BY creation_time DESC")
        sql = " ".join(sql_query)
        res = run_sql(sql, tuple(sql_param))

        for row in res:
            event_list.append((row[1], id, row[2:]))
        # Get the event col names
        try:
            event_cols[id] = cPickle.loads(run_sql("SELECT cols FROM staEVENT WHERE id = %s", (id,))[0][0])
        except TypeError:
            event_cols[id] = ["Unnamed"]
    event_list.sort()

    output = []
    for row in event_list:
        temp = [row[1], row[0].strftime("%Y-%m-%d %H:%M:%S")]

        arguments = ["%s: %s" % (event_cols[row[1]][i], row[2][i]) for i in range(len(row[2]))]

        temp.extend(arguments)
        output.append(tuple(temp))

    return output
Example #26
0
 def test_wash_table_column_name(self):
     """dbquery - wash table column name"""
     testcase_error = "foo ; bar"
     testcase_ok = "foo_bar"
     self.assertRaises(Exception, dbquery.wash_table_column_name, testcase_error)
     self.assertEqual(testcase_ok, dbquery.wash_table_column_name(testcase_ok))
Example #27
0
def create_customevent(id=None, name=None, cols=[]):
    """
    Creates a new custom event by setting up the necessary MySQL tables.

    @param id: Proposed human-readable id of the new event.
    @type id: str

    @param name: Optionally, a descriptive name.
    @type name: str

    @param cols: Optionally, the name of the additional columns.
    @type cols: [str]

    @return: A status message
    @type: str
    """
    if id is None:
        return "Please specify a human-readable ID for the event."

    # Only accept id and name with standard characters
    if not re.search("[^\w]", str(id) + str(name)) is None:
        return "Please note that both event id and event name needs to be written without any non-standard characters."

    # Make sure the chosen id is not already taken
    if len(run_sql("SELECT NULL FROM staEVENT WHERE id = %s", (id,))) != 0:
        return "Event id [%s] already exists! Aborted." % id

    # Check if the cols are valid titles
    for argument in cols:
        if (argument == "creation_time") or (argument == "id"):
            return "Invalid column title: %s! Aborted." % argument

    # Insert a new row into the events table describing the new event
    sql_param = [id]
    if name is not None:
        sql_name = "%s"
        sql_param.append(name)
    else:
        sql_name = "NULL"
    if len(cols) != 0:
        sql_cols = "%s"
        sql_param.append(cPickle.dumps(cols))
    else:
        sql_cols = "NULL"
    run_sql("INSERT INTO staEVENT (id, name, cols) VALUES (%s, " + sql_name + ", " + sql_cols + ")",
            tuple(sql_param))

    tbl_name = get_customevent_table(id)

    # Create a table for the new event
    sql_query = ["CREATE TABLE %s (" % tbl_name]
    sql_query.append("id MEDIUMINT unsigned NOT NULL auto_increment,")
    sql_query.append("creation_time TIMESTAMP DEFAULT NOW(),")
    for argument in cols:
        arg = wash_table_column_name(argument)
        sql_query.append("%s MEDIUMTEXT NULL," % arg)
        sql_query.append("INDEX %s (%s(50))," % (arg, arg))
    sql_query.append("PRIMARY KEY (id))")
    sql_str = ' '.join(sql_query)
    run_sql(sql_str)

    # We're done! Print notice containing the name of the event.
    return ("Event table [%s] successfully created.\n" +
            "Please use event id [%s] when registering an event.") % (tbl_name, id)
Example #28
0
def get_field(recid, field):
    """
    Gets list of field 'field' for the record with 'recid' system number.
    """

    digit = field[0:2]

    bibbx = "bib%sx" % digit
    bibx  = "bibrec_bib%sx" % digit
    query = "SELECT bx.value FROM %s AS bx, %s AS bibx WHERE bibx.id_bibrec=%%s AND bx.id=bibx.id_bibxxx AND bx.tag=%%s" % (wash_table_column_name(bibbx), wash_table_column_name(bibx))

    return [row[0] for row in run_sql(query, (recid, field))]
Example #29
0
def create_customevent(id=None, name=None, cols=[]):
    """
    Creates a new custom event by setting up the necessary MySQL tables.

    @param id: Proposed human-readable id of the new event.
    @type id: str

    @param name: Optionally, a descriptive name.
    @type name: str

    @param cols: Optionally, the name of the additional columns.
    @type cols: [str]

    @return: A status message
    @type: str
    """
    if id is None:
        return "Please specify a human-readable ID for the event."

    # Only accept id and name with standard characters
    if not re.search("[^\w]", str(id) + str(name)) is None:
        return "Please note that both event id and event name needs to be written without any non-standard characters."

    # Make sure the chosen id is not already taken
    if len(run_sql("SELECT NULL FROM staEVENT WHERE id = %s", (id, ))) != 0:
        return "Event id [%s] already exists! Aborted." % id

    # Check if the cols are valid titles
    for argument in cols:
        if (argument == "creation_time") or (argument == "id"):
            return "Invalid column title: %s! Aborted." % argument

    # Insert a new row into the events table describing the new event
    sql_param = [id]
    if name is not None:
        sql_name = "%s"
        sql_param.append(name)
    else:
        sql_name = "NULL"
    if len(cols) != 0:
        sql_cols = "%s"
        sql_param.append(cPickle.dumps(cols))
    else:
        sql_cols = "NULL"
    run_sql(
        "INSERT INTO staEVENT (id, name, cols) VALUES (%s, " + sql_name +
        ", " + sql_cols + ")", tuple(sql_param))

    tbl_name = get_customevent_table(id)

    # Create a table for the new event
    sql_query = ["CREATE TABLE %s (" % tbl_name]
    sql_query.append("id MEDIUMINT unsigned NOT NULL auto_increment,")
    sql_query.append("creation_time TIMESTAMP DEFAULT NOW(),")
    for argument in cols:
        arg = wash_table_column_name(argument)
        sql_query.append("%s MEDIUMTEXT NULL," % arg)
        sql_query.append("INDEX %s (%s(50))," % (arg, arg))
    sql_query.append("PRIMARY KEY (id))")
    sql_str = ' '.join(sql_query)
    run_sql(sql_str)

    # We're done! Print notice containing the name of the event.
    return ("Event table [%s] successfully created.\n" +
            "Please use event id [%s] when registering an event.") % (tbl_name,
                                                                      id)