def dict2db(table_name, dict_data, mode): """ Load the dict values into the database Three modes of operation: i - insert r - replace c - correct """ #Escape all the content in dict data to avoid " and ' for data in dict_data: dict_data[data] = re.escape(dict_data[data]) if mode == 'i': #Insert mode query_fields = " , " .join(dict_data.keys()) query_values = "' , '" .join(dict_data.values()) query = "INSERT IGNORE INTO %s(%s) VALUES ('%s')" % (wash_table_column_name(table_name), query_fields, query_values) elif mode == 'c': #Correct mode if '_' in table_name: query = "SELECT * FROM %s" % table_name#FIXIT Trick to execute something instead of giving error else: tbl_id = get_primary_keys(table_name)[0] del dict_data[tbl_id] query_update = " , " .join(["%s=\'%s\'" % (field, dict_data[field]) for field in dict_data]) query = "UPDATE %s SET %s" % (wash_table_column_name(table_name), query_update) else: #Try in the default mode dict2db(table_name, dict_data, LOAD_DEFAULT_MODE) try: run_sql(query) except: print "VALUES: %s ALREADY EXIST IN TABLE %s. SKIPPING" % (query_values, table_name) pass
def get_customevent_trend(args): """ Returns trend data for a custom event over a give timestamp range. @param args['id']: The event id @type args['id']: str @param args['t_start']: Date and time of start point @type args['t_start']: str @param args['t_end']: Date and time of end point @type args['t_end']: str @param args['granularity']: Granularity of date and time @type args['granularity']: str @param args['t_format']: Date and time formatting string @type args['t_format']: str @param args['cols']: Columns and it's content that will be include if don't exist or it's empty it will include all cols @type args['cols']: [ [ str, str ], ] """ # Get a MySQL friendly date lower = _to_datetime(args['t_start'], args['t_format']).isoformat() upper = _to_datetime(args['t_end'], args['t_format']).isoformat() tbl_name = get_customevent_table(args['id']) col_names = get_customevent_args(args['id']) sql_query = [ "SELECT creation_time FROM %s WHERE creation_time > '%s'" % (tbl_name, lower) ] sql_query.append("AND creation_time < '%s'" % upper) sql_param = [] for col_bool, col_title, col_content in args['cols']: if not col_title in col_names: continue if col_content: if col_bool == "and" or col_bool == "": sql_query.append("AND %s" % wash_table_column_name(col_title)) elif col_bool == "or": sql_query.append("OR %s" % wash_table_column_name(col_title)) elif col_bool == "and_not": sql_query.append("AND NOT %s" % wash_table_column_name(col_title)) else: continue sql_query.append(" LIKE %s") sql_param.append("%" + col_content + "%") sql_query.append("ORDER BY creation_time DESC") sql = ' '.join(sql_query) dates = [x[0] for x in run_sql(sql, tuple(sql_param))] return _get_trend_from_actions(dates, 0, args['t_start'], args['t_end'], args['granularity'], args['t_format'])
def get_customevent_trend(args): """ Returns trend data for a custom event over a give timestamp range. @param args['id']: The event id @type args['id']: str @param args['t_start']: Date and time of start point @type args['t_start']: str @param args['t_end']: Date and time of end point @type args['t_end']: str @param args['granularity']: Granularity of date and time @type args['granularity']: str @param args['t_format']: Date and time formatting string @type args['t_format']: str @param args['cols']: Columns and it's content that will be include if don't exist or it's empty it will include all cols @type args['cols']: [ [ str, str ], ] """ # Get a MySQL friendly date lower = _to_datetime(args["t_start"], args["t_format"]).isoformat() upper = _to_datetime(args["t_end"], args["t_format"]).isoformat() tbl_name = get_customevent_table(args["id"]) col_names = get_customevent_args(args["id"]) sql_query = ["SELECT creation_time FROM %s WHERE creation_time > '%s'" % (tbl_name, lower)] sql_query.append("AND creation_time < '%s'" % upper) sql_param = [] for col_bool, col_title, col_content in args["cols"]: if not col_title in col_names: continue if col_content: if col_bool == "and" or col_bool == "": sql_query.append("AND %s" % wash_table_column_name(col_title)) elif col_bool == "or": sql_query.append("OR %s" % wash_table_column_name(col_title)) elif col_bool == "and_not": sql_query.append("AND NOT %s" % wash_table_column_name(col_title)) else: continue sql_query.append(" LIKE %s") sql_param.append("%" + col_content + "%") sql_query.append("ORDER BY creation_time DESC") sql = " ".join(sql_query) dates = [x[0] for x in run_sql(sql, tuple(sql_param))] return _get_trend_from_actions(dates, 0, args["t_start"], args["t_end"], args["granularity"], args["t_format"])
def get_field(recid, field): """ Gets list of field 'field' for the record with 'recid' system number. """ digit = field[0:2] bibbx = "bib%sx" % digit bibx = "bibrec_bib%sx" % digit query = "SELECT bx.value FROM %s AS bx, %s AS bibx WHERE bibx.id_bibrec=%%s AND bx.id=bibx.id_bibxxx AND bx.tag=%%s" % ( wash_table_column_name(bibbx), wash_table_column_name(bibx)) return [row[0] for row in run_sql(query, (recid, field))]
def modify_translations(ID, langs, sel_type, trans, table, id_column=None): """add or modify translations in tables given by table frmID - the id of the format from the format table sel_type - the name type langs - the languages trans - the translations, in same order as in langs table - the table id_column - name of the column with identifier. If None, expect column to be named 'id_%s' % table """ name = "name" if table[-1:].isupper(): name = "NAME" id_column = id_column or 'id_%s' % table if id_column: id_column = wash_table_column_name(id_column) try: for nr in range(0,len(langs)): res = run_sql("SELECT value FROM %s%s WHERE %s=%%s AND type=%%s AND ln=%%s" % (table, name, id_column), (ID, sel_type, langs[nr][0])) if res: if trans[nr]: res = run_sql("UPDATE %s%s SET value=%%s WHERE %s=%%s AND type=%%s AND ln=%%s" % (table, name, id_column), (trans[nr], ID, sel_type, langs[nr][0])) else: res = run_sql("DELETE FROM %s%s WHERE %s=%%s AND type=%%s AND ln=%%s" % (table, name, id_column), (ID, sel_type, langs[nr][0])) else: if trans[nr]: res = run_sql("INSERT INTO %s%s (%s, type, ln, value) VALUES (%%s,%%s,%%s,%%s)" % (table, name, id_column), (ID, sel_type, langs[nr][0], trans[nr])) return (1, "") except StandardError, e: return (0, e)
def dump_collection(collection, config, force_ids, print_to_screen=False): """ Dump the current collection Note: there are a special notation, ori(origin) - rel(relation) - fin(final) For example in the relation field-field_tag-tag: ori(origin): field table rel(relation): field_tag fin(final): tag """ tbl_ori, tbl_rel, tbl_fin = collection['relations'].split("-") query = "SELECT * FROM %s" % (wash_table_column_name(tbl_ori)) lst_ori = query2list(query, tbl_ori) tbl_ori_id = get_primary_keys(tbl_ori)[0] for index_ori, result_ori in enumerate(lst_ori): dict_rels = get_relationship(collection, tbl_ori, tbl_ori_id) query = "SELECT * FROM %s WHERE %s=%s" % ( wash_table_column_name(tbl_rel), dict_rels[tbl_ori + "." + tbl_ori_id], result_ori[tbl_ori_id]) if collection['tables'][tbl_ori].startswith('extend'): add_special_field(collection, tbl_ori, result_ori) lst_rel = query2list(query, tbl_rel) for result_rel in lst_rel: tbl_fin_id = get_primary_keys(tbl_fin)[0] tbl_rel_id = dict_rels[tbl_fin + "." + tbl_fin_id].split(".")[1].strip() query = "SELECT * FROM %s WHERE %s=%s" % (wash_table_column_name( tbl_fin), tbl_fin_id, result_rel[tbl_rel_id]) lst_fin = query2list(query, tbl_fin) for index_fin, result_fin in enumerate(lst_fin): result_ori[tbl_fin + "." + create_section_id( index_fin, with_date=False)] = result_fin section_name = tbl_ori + "." + create_section_id(index_ori) if force_ids == False: #Remove the ids from the dict results = delete_ids(result_ori, collection['relations'].split("-")) config[section_name] = results else: config[section_name] = result_ori if print_to_screen == True: output = StringIO.StringIO() config.write( output) #Write to the output string instead of the file print output.getvalue() else: config.write()
def create_rnkmethod_cache(): """Create cache with vital information for each rank method.""" bibrank_meths = run_sql("SELECT name from rnkMETHOD") for (rank_method_code,) in bibrank_meths: filepath = CFG_ETCDIR + "/bibrank/" + rank_method_code + ".cfg" config = ConfigParser.ConfigParser() try: config.readfp(open(filepath)) except IOError: pass cfg_function = config.get("rank_method", "function") if config.has_section(cfg_function): METHODS[rank_method_code] = {} METHODS[rank_method_code]["function"] = cfg_function METHODS[rank_method_code]["prefix"] = config.get(cfg_function, "relevance_number_output_prologue") METHODS[rank_method_code]["postfix"] = config.get(cfg_function, "relevance_number_output_epilogue") METHODS[rank_method_code]["chars_alphanumericseparators"] = r"[1234567890\!\"\#\$\%\&\'\(\)\*\+\,\-\.\/\:\;\<\=\>\?\@\[\\\]\^\_\`\{\|\}\~]" else: raise Exception("Error in configuration file: %s" % (CFG_ETCDIR + "/bibrank/" + rank_method_code + ".cfg")) i8n_names = run_sql("""SELECT ln,value from rnkMETHODNAME,rnkMETHOD where id_rnkMETHOD=rnkMETHOD.id and rnkMETHOD.name=%s""", (rank_method_code,)) for (ln, value) in i8n_names: METHODS[rank_method_code][ln] = value if config.has_option(cfg_function, "table"): METHODS[rank_method_code]["rnkWORD_table"] = config.get(cfg_function, "table") query = "SELECT count(*) FROM %sR" % wash_table_column_name(METHODS[rank_method_code]["rnkWORD_table"][:-1]) METHODS[rank_method_code]["col_size"] = run_sql(query)[0][0] if config.has_option(cfg_function, "stemming") and config.get(cfg_function, "stemming"): try: METHODS[rank_method_code]["stemmer"] = config.get(cfg_function, "stemming") except KeyError: pass if config.has_option(cfg_function, "stopword"): METHODS[rank_method_code]["stopwords"] = config.get(cfg_function, "stopword") if config.has_section("find_similar"): METHODS[rank_method_code]["max_word_occurence"] = float(config.get("find_similar", "max_word_occurence")) METHODS[rank_method_code]["min_word_occurence"] = float(config.get("find_similar", "min_word_occurence")) METHODS[rank_method_code]["min_word_length"] = int(config.get("find_similar", "min_word_length")) METHODS[rank_method_code]["min_nr_words_docs"] = int(config.get("find_similar", "min_nr_words_docs")) METHODS[rank_method_code]["max_nr_words_upper"] = int(config.get("find_similar", "max_nr_words_upper")) METHODS[rank_method_code]["max_nr_words_lower"] = int(config.get("find_similar", "max_nr_words_lower")) METHODS[rank_method_code]["default_min_relevance"] = int(config.get("find_similar", "default_min_relevance")) if cfg_function in ('word_similarity_solr', 'word_similarity_xapian'): create_external_ranking_settings(rank_method_code, config) if config.has_section("combine_method"): i = 1 METHODS[rank_method_code]["combine_method"] = [] while config.has_option("combine_method", "method%s" % i): METHODS[rank_method_code]["combine_method"].append(config.get("combine_method", "method%s" % i).split(",")) i += 1
def test_wash_table_column_name(self): """dbquery - wash table column name""" testcase_error = "foo ; bar" testcase_ok = "foo_bar" self.assertRaises(Exception, dbquery.wash_table_column_name, testcase_error) self.assertEqual(testcase_ok, dbquery.wash_table_column_name(testcase_ok))
def dump_collection(collection, config, force_ids, print_to_screen=False): """ Dump the current collection Note: there are a special notation, ori(origin) - rel(relation) - fin(final) For example in the relation field-field_tag-tag: ori(origin): field table rel(relation): field_tag fin(final): tag """ tbl_ori, tbl_rel, tbl_fin = collection['relations'].split("-") query = "SELECT * FROM %s" % (wash_table_column_name(tbl_ori)) lst_ori = query2list(query, tbl_ori) tbl_ori_id = get_primary_keys(tbl_ori)[0] for index_ori, result_ori in enumerate(lst_ori): dict_rels = get_relationship(collection, tbl_ori, tbl_ori_id) query = "SELECT * FROM %s WHERE %s=%s" % (wash_table_column_name(tbl_rel), dict_rels[tbl_ori+"."+tbl_ori_id], result_ori[tbl_ori_id]) if collection['tables'][tbl_ori].startswith('extend'): add_special_field(collection, tbl_ori, result_ori) lst_rel = query2list(query, tbl_rel) for result_rel in lst_rel: tbl_fin_id = get_primary_keys(tbl_fin)[0] tbl_rel_id = dict_rels[tbl_fin+"."+tbl_fin_id].split(".")[1].strip() query = "SELECT * FROM %s WHERE %s=%s" % (wash_table_column_name(tbl_fin), tbl_fin_id, result_rel[tbl_rel_id]) lst_fin = query2list(query, tbl_fin) for index_fin, result_fin in enumerate(lst_fin): result_ori[tbl_fin+"."+create_section_id(index_fin, with_date=False)] = result_fin section_name = tbl_ori + "." + create_section_id(index_ori) if force_ids == False:#Remove the ids from the dict results = delete_ids(result_ori, collection['relations'].split("-")) config[section_name] = results else: config[section_name] = result_ori if print_to_screen == True: output = StringIO.StringIO() config.write(output)#Write to the output string instead of the file print output.getvalue() else: config.write()
def get_primary_keys(table_name): """ Get the primary keys from the table with the DESC mysql function """ lst_keys = [] query = "DESC %s" % wash_table_column_name(table_name) results = run_sql(query) for field in results: if field[3] == 'PRI': lst_keys.append(field[0]) return lst_keys
def check_tables(): """ Check all DB tables. Useful to run from time to time when the site is idle, say once a month during a weekend night. FIXME: should produce useful output about outcome. """ res = run_sql("SHOW TABLES") for row in res: table_name = row[0] write_message("checking table %s" % table_name) run_sql("CHECK TABLE %s" % wash_table_column_name(table_name)) # kwalitee: disable=sql
def create_rnkmethod_cache(): """Create cache with vital information for each rank method.""" global methods bibrank_meths = run_sql("SELECT name from rnkMETHOD") methods = {} global voutput voutput = "" for (rank_method_code, ) in bibrank_meths: try: file = CFG_ETCDIR + "/bibrank/" + rank_method_code + ".cfg" config = ConfigParser.ConfigParser() config.readfp(open(file)) except StandardError, e: pass cfg_function = config.get("rank_method", "function") if config.has_section(cfg_function): methods[rank_method_code] = {} methods[rank_method_code]["function"] = cfg_function methods[rank_method_code]["prefix"] = config.get( cfg_function, "relevance_number_output_prologue") methods[rank_method_code]["postfix"] = config.get( cfg_function, "relevance_number_output_epilogue") methods[rank_method_code][ "chars_alphanumericseparators"] = r"[1234567890\!\"\#\$\%\&\'\(\)\*\+\,\-\.\/\:\;\<\=\>\?\@\[\\\]\^\_\`\{\|\}\~]" else: raise Exception( "Error in configuration file: %s" % (CFG_ETCDIR + "/bibrank/" + rank_method_code + ".cfg")) i8n_names = run_sql( """SELECT ln,value from rnkMETHODNAME,rnkMETHOD where id_rnkMETHOD=rnkMETHOD.id and rnkMETHOD.name=%s""", (rank_method_code, )) for (ln, value) in i8n_names: methods[rank_method_code][ln] = value if config.has_option(cfg_function, "table"): methods[rank_method_code]["rnkWORD_table"] = config.get( cfg_function, "table") query = "SELECT count(*) FROM %sR" % wash_table_column_name( methods[rank_method_code]["rnkWORD_table"][:-1]) methods[rank_method_code]["col_size"] = run_sql(query)[0][0] if config.has_option(cfg_function, "stemming") and config.get( cfg_function, "stemming"): try: methods[rank_method_code]["stemmer"] = config.get( cfg_function, "stemming") except Exception, e: pass
def optimise_tables(): """ Optimise all DB tables to defragment them in order to increase DB performance. Useful to run from time to time when the site is idle, say once a month during a weekend night. FIXME: should produce useful output about outcome. """ res = run_sql("SHOW TABLES") for row in res: table_name = row[0] write_message("optimising table %s" % table_name) run_sql("OPTIMIZE TABLE %s" % wash_table_column_name(table_name)) # kwalitee: disable=sql
def get_name(ID, ln, rtype, table, id_column=None): """Returns the value from the table name based on arguments ID - id ln - a language supported by Invenio type - the type of value wanted, like 'ln', 'sn' table - tablename id_column - name of the column with identifier. If None, expect column to be named 'id_%s' % table """ name = "name" if table[-1:].isupper(): name = "NAME" if id_column: id_column = wash_table_column_name(id_column) try: res = run_sql( "SELECT value FROM %s%s WHERE type='%s' and ln='%s' and %s=%s" % (table, name, rtype, ln, (id_column or 'id_%s' % wash_table_column_name(table)), ID)) return res except StandardError, e: return ()
def optimise_tables(): """ Optimise all DB tables to defragment them in order to increase DB performance. Useful to run from time to time when the site is idle, say once a month during a weekend night. FIXME: should produce useful output about outcome. """ res = run_sql("SHOW TABLES") for row in res: table_name = row[0] if table_name == 'bibfmt': # inspire production: requires ~30G of temp space and 4 hours continue elif table_name == 'aidPERSONIDPAPERS': write_message("optimising table %s" % table_name) run_sql( "OPTIMIZE LOCAL TABLE %s" % wash_table_column_name(table_name)) # kwalitee: disable=sql else: write_message("optimising table %s" % table_name) run_sql( "OPTIMIZE TABLE %s" % wash_table_column_name(table_name)) # kwalitee: disable=sql
def modify_translations(ID, langs, sel_type, trans, table, id_column=None): """add or modify translations in tables given by table frmID - the id of the format from the format table sel_type - the name type langs - the languages trans - the translations, in same order as in langs table - the table id_column - name of the column with identifier. If None, expect column to be named 'id_%s' % table """ name = "name" if table[-1:].isupper(): name = "NAME" id_column = id_column or 'id_%s' % table if id_column: id_column = wash_table_column_name(id_column) try: for nr in range(0, len(langs)): res = run_sql( "SELECT value FROM %s%s WHERE %s=%%s AND type=%%s AND ln=%%s" % (table, name, id_column), (ID, sel_type, langs[nr][0])) if res: if trans[nr]: res = run_sql( "UPDATE %s%s SET value=%%s WHERE %s=%%s AND type=%%s AND ln=%%s" % (table, name, id_column), (trans[nr], ID, sel_type, langs[nr][0])) else: res = run_sql( "DELETE FROM %s%s WHERE %s=%%s AND type=%%s AND ln=%%s" % (table, name, id_column), (ID, sel_type, langs[nr][0])) else: if trans[nr]: res = run_sql( "INSERT INTO %s%s (%s, type, ln, value) VALUES (%%s,%%s,%%s,%%s)" % (table, name, id_column), (ID, sel_type, langs[nr][0], trans[nr])) return (1, "") except StandardError, e: return (0, e)
def calculate_index_term_count(config): """Calculate the weight of a record set based on number of enries of a tag from the record in another index...useful for authority files""" records = [] if config.has_section("index_term_count"): index = config.get("index_term_count", "index_table_name") tag = config.get("index_term_count", "index_term_value_from_tag") # check against possible SQL injection: dummy = get_table_update_time(index) tag = wash_table_column_name(tag) else: raise Exception("Config file " + config + " does not have index_term_count section") return () task_sleep_now_if_required(can_stop_too=True) write_message("......Processing all records") query = "SELECT id_bibrec, value FROM bib%sx, bibrec_bib%sx WHERE tag=%%s AND id_bibxxx=id" % \ (tag[0:2], tag[0:2]) # we checked that tag is safe records = list(run_sql(query, (tag, ))) write_message("Number of records found with the necessary tags: %s" % len(records)) rnkset = {} for key, value in records: hits = 0 if len(value): query = "SELECT hitlist from %s where term = %%s" % index # we checked that index is a table row = run_sql(query, (value, )) if row and row[0] and row[0][0]: #has to be prepared for corrupted data! try: hits = len(intbitset(row[0][0])) except: hits = 0 rnkset[key] = hits write_message("Number of records available in rank method: %s" % len(rnkset)) return rnkset
def create_rnkmethod_cache(): """Create cache with vital information for each rank method.""" global methods bibrank_meths = run_sql("SELECT name from rnkMETHOD") methods = {} global voutput voutput = "" for (rank_method_code,) in bibrank_meths: try: file = CFG_ETCDIR + "/bibrank/" + rank_method_code + ".cfg" config = ConfigParser.ConfigParser() config.readfp(open(file)) except StandardError, e: pass cfg_function = config.get("rank_method", "function") if config.has_section(cfg_function): methods[rank_method_code] = {} methods[rank_method_code]["function"] = cfg_function methods[rank_method_code]["prefix"] = config.get(cfg_function, "relevance_number_output_prologue") methods[rank_method_code]["postfix"] = config.get(cfg_function, "relevance_number_output_epilogue") methods[rank_method_code]["chars_alphanumericseparators"] = r"[1234567890\!\"\#\$\%\&\'\(\)\*\+\,\-\.\/\:\;\<\=\>\?\@\[\\\]\^\_\`\{\|\}\~]" else: raise Exception("Error in configuration file: %s" % (CFG_ETCDIR + "/bibrank/" + rank_method_code + ".cfg")) i8n_names = run_sql("""SELECT ln,value from rnkMETHODNAME,rnkMETHOD where id_rnkMETHOD=rnkMETHOD.id and rnkMETHOD.name=%s""", (rank_method_code,)) for (ln, value) in i8n_names: methods[rank_method_code][ln] = value if config.has_option(cfg_function, "table"): methods[rank_method_code]["rnkWORD_table"] = config.get(cfg_function, "table") query = "SELECT count(*) FROM %sR" % wash_table_column_name(methods[rank_method_code]["rnkWORD_table"][:-1]) methods[rank_method_code]["col_size"] = run_sql(query)[0][0] if config.has_option(cfg_function, "stemming") and config.get(cfg_function, "stemming"): try: methods[rank_method_code]["stemmer"] = config.get(cfg_function, "stemming") except Exception,e: pass
def get_name(ID, ln, rtype, table, id_column=None): """Returns the value from the table name based on arguments ID - id ln - a language supported by Invenio type - the type of value wanted, like 'ln', 'sn' table - tablename id_column - name of the column with identifier. If None, expect column to be named 'id_%s' % table """ name = "name" if table[-1:].isupper(): name = "NAME" if id_column: id_column = wash_table_column_name(id_column) try: res = run_sql("SELECT value FROM %s%s WHERE type='%s' and ln='%s' and %s=%s" % (table, name, rtype, ln, (id_column or 'id_%s' % wash_table_column_name(table)), ID)) return res except StandardError, e: return ()
def calculate_index_term_count(config): """Calculate the weight of a record set based on number of enries of a tag from the record in another index...useful for authority files""" records = [] if config.has_section("index_term_count"): index = config.get("index_term_count","index_table_name") tag = config.get("index_term_count","index_term_value_from_tag") # check against possible SQL injection: dummy = get_table_update_time(index) tag = wash_table_column_name(tag) else: raise Exception("Config file " + config + " does not have index_term_count section") return() task_sleep_now_if_required(can_stop_too=True) write_message("......Processing all records") query = "SELECT id_bibrec, value FROM bib%sx, bibrec_bib%sx WHERE tag=%%s AND id_bibxxx=id" % \ (tag[0:2], tag[0:2]) # we checked that tag is safe records = list(run_sql(query, (tag,))) write_message("Number of records found with the necessary tags: %s" % len(records)) rnkset = {} for key, value in records: hits = 0 if len(value): query = "SELECT hitlist from %s where term = %%s" % index # we checked that index is a table row = run_sql(query, (value,)) if row and row[0] and row[0][0]: #has to be prepared for corrupted data! try: hits = len(intbitset(row[0][0])) except: hits = 0 rnkset[key] = hits write_message("Number of records available in rank method: %s" % len(rnkset)) return rnkset
def create_rnkmethod_cache(): """Create cache with vital information for each rank method.""" bibrank_meths = run_sql("SELECT name from rnkMETHOD") for (rank_method_code, ) in bibrank_meths: filepath = CFG_ETCDIR + "/bibrank/" + rank_method_code + ".cfg" config = ConfigParser.ConfigParser() try: config.readfp(open(filepath)) except IOError: pass cfg_function = config.get("rank_method", "function") if config.has_section(cfg_function): METHODS[rank_method_code] = {} METHODS[rank_method_code]["function"] = cfg_function METHODS[rank_method_code]["prefix"] = config.get( cfg_function, "relevance_number_output_prologue") METHODS[rank_method_code]["postfix"] = config.get( cfg_function, "relevance_number_output_epilogue") METHODS[rank_method_code][ "chars_alphanumericseparators"] = r"[1234567890\!\"\#\$\%\&\'\(\)\*\+\,\-\.\/\:\;\<\=\>\?\@\[\\\]\^\_\`\{\|\}\~]" else: raise Exception( "Error in configuration file: %s" % (CFG_ETCDIR + "/bibrank/" + rank_method_code + ".cfg")) i8n_names = run_sql( """SELECT ln,value from rnkMETHODNAME,rnkMETHOD where id_rnkMETHOD=rnkMETHOD.id and rnkMETHOD.name=%s""", (rank_method_code, )) for (ln, value) in i8n_names: METHODS[rank_method_code][ln] = value if config.has_option(cfg_function, "table"): METHODS[rank_method_code]["rnkWORD_table"] = config.get( cfg_function, "table") query = "SELECT count(*) FROM %sR" % wash_table_column_name( METHODS[rank_method_code]["rnkWORD_table"][:-1]) METHODS[rank_method_code]["col_size"] = run_sql(query)[0][0] if config.has_option(cfg_function, "stemming") and config.get( cfg_function, "stemming"): try: METHODS[rank_method_code]["stemmer"] = config.get( cfg_function, "stemming") except KeyError: pass if config.has_option(cfg_function, "stopword"): METHODS[rank_method_code]["stopwords"] = config.get( cfg_function, "stopword") if config.has_section("find_similar"): METHODS[rank_method_code]["max_word_occurence"] = float( config.get("find_similar", "max_word_occurence")) METHODS[rank_method_code]["min_word_occurence"] = float( config.get("find_similar", "min_word_occurence")) METHODS[rank_method_code]["min_word_length"] = int( config.get("find_similar", "min_word_length")) METHODS[rank_method_code]["min_nr_words_docs"] = int( config.get("find_similar", "min_nr_words_docs")) METHODS[rank_method_code]["max_nr_words_upper"] = int( config.get("find_similar", "max_nr_words_upper")) METHODS[rank_method_code]["max_nr_words_lower"] = int( config.get("find_similar", "max_nr_words_lower")) METHODS[rank_method_code]["default_min_relevance"] = int( config.get("find_similar", "default_min_relevance")) if cfg_function in ('word_similarity_solr', 'word_similarity_xapian'): create_external_ranking_settings(rank_method_code, config) if config.has_section("combine_method"): i = 1 METHODS[rank_method_code]["combine_method"] = [] while config.has_option("combine_method", "method%s" % i): METHODS[rank_method_code]["combine_method"].append( config.get("combine_method", "method%s" % i).split(",")) i += 1
def get_customevent_dump(args): """ Similar to a get_event_trend implemention, but NO refining aka frequency handling is carried out what so ever. This is just a dump. A dump! @param args['id']: The event id @type args['id']: str @param args['t_start']: Date and time of start point @type args['t_start']: str @param args['t_end']: Date and time of end point @type args['t_end']: str @param args['granularity']: Granularity of date and time @type args['granularity']: str @param args['t_format']: Date and time formatting string @type args['t_format']: str @param args['cols']: Columns and it's content that will be include if don't exist or it's empty it will include all cols @type args['cols']: [ [ str, str ], ] """ # Get a MySQL friendly date lower = _to_datetime(args['t_start'], args['t_format']).isoformat() upper = _to_datetime(args['t_end'], args['t_format']).isoformat() # Get customevents # events_list = [(creation_time, event, [arg1, arg2, ...]), ...] event_list = [] event_cols = {} for id, i in [(args['ids'][i], str(i)) for i in range(len(args['ids']))]: # Get all the event arguments and creation times tbl_name = get_customevent_table(id) col_names = get_customevent_args(id) sql_query = [ "SELECT * FROM %s WHERE creation_time > '%s'" % (tbl_name, lower) ] # Note: SELECT * technique is okay here sql_query.append("AND creation_time < '%s'" % upper) sql_param = [] for col_bool, col_title, col_content in args['cols' + i]: if not col_title in col_names: continue if col_content: if col_bool == "and" or col_bool == "": sql_query.append("AND %s" % wash_table_column_name(col_title)) elif col_bool == "or": sql_query.append("OR %s" % wash_table_column_name(col_title)) elif col_bool == "and_not": sql_query.append("AND NOT %s" % wash_table_column_name(col_title)) else: continue sql_query.append(" LIKE %s") sql_param.append("%" + col_content + "%") sql_query.append("ORDER BY creation_time DESC") sql = ' '.join(sql_query) res = run_sql(sql, tuple(sql_param)) for row in res: event_list.append((row[1], id, row[2:])) # Get the event col names try: event_cols[id] = cPickle.loads( run_sql("SELECT cols FROM staEVENT WHERE id = %s", (id, ))[0][0]) except TypeError: event_cols[id] = ["Unnamed"] event_list.sort() output = [] for row in event_list: temp = [row[1], row[0].strftime('%Y-%m-%d %H:%M:%S')] arguments = [ "%s: %s" % (event_cols[row[1]][i], row[2][i]) for i in range(len(row[2])) ] temp.extend(arguments) output.append(tuple(temp)) return output
def get_customevent_dump(args): """ Similar to a get_event_trend implemention, but NO refining aka frequency handling is carried out what so ever. This is just a dump. A dump! @param args['id']: The event id @type args['id']: str @param args['t_start']: Date and time of start point @type args['t_start']: str @param args['t_end']: Date and time of end point @type args['t_end']: str @param args['granularity']: Granularity of date and time @type args['granularity']: str @param args['t_format']: Date and time formatting string @type args['t_format']: str @param args['cols']: Columns and it's content that will be include if don't exist or it's empty it will include all cols @type args['cols']: [ [ str, str ], ] """ # Get a MySQL friendly date lower = _to_datetime(args["t_start"], args["t_format"]).isoformat() upper = _to_datetime(args["t_end"], args["t_format"]).isoformat() # Get customevents # events_list = [(creation_time, event, [arg1, arg2, ...]), ...] event_list = [] event_cols = {} for id, i in [(args["ids"][i], str(i)) for i in range(len(args["ids"]))]: # Get all the event arguments and creation times tbl_name = get_customevent_table(id) col_names = get_customevent_args(id) sql_query = [ "SELECT * FROM %s WHERE creation_time > '%s'" % (tbl_name, lower) ] # Note: SELECT * technique is okay here sql_query.append("AND creation_time < '%s'" % upper) sql_param = [] for col_bool, col_title, col_content in args["cols" + i]: if not col_title in col_names: continue if col_content: if col_bool == "and" or col_bool == "": sql_query.append("AND %s" % wash_table_column_name(col_title)) elif col_bool == "or": sql_query.append("OR %s" % wash_table_column_name(col_title)) elif col_bool == "and_not": sql_query.append("AND NOT %s" % wash_table_column_name(col_title)) else: continue sql_query.append(" LIKE %s") sql_param.append("%" + col_content + "%") sql_query.append("ORDER BY creation_time DESC") sql = " ".join(sql_query) res = run_sql(sql, tuple(sql_param)) for row in res: event_list.append((row[1], id, row[2:])) # Get the event col names try: event_cols[id] = cPickle.loads(run_sql("SELECT cols FROM staEVENT WHERE id = %s", (id,))[0][0]) except TypeError: event_cols[id] = ["Unnamed"] event_list.sort() output = [] for row in event_list: temp = [row[1], row[0].strftime("%Y-%m-%d %H:%M:%S")] arguments = ["%s: %s" % (event_cols[row[1]][i], row[2][i]) for i in range(len(row[2]))] temp.extend(arguments) output.append(tuple(temp)) return output
def create_customevent(id=None, name=None, cols=[]): """ Creates a new custom event by setting up the necessary MySQL tables. @param id: Proposed human-readable id of the new event. @type id: str @param name: Optionally, a descriptive name. @type name: str @param cols: Optionally, the name of the additional columns. @type cols: [str] @return: A status message @type: str """ if id is None: return "Please specify a human-readable ID for the event." # Only accept id and name with standard characters if not re.search("[^\w]", str(id) + str(name)) is None: return "Please note that both event id and event name needs to be written without any non-standard characters." # Make sure the chosen id is not already taken if len(run_sql("SELECT NULL FROM staEVENT WHERE id = %s", (id,))) != 0: return "Event id [%s] already exists! Aborted." % id # Check if the cols are valid titles for argument in cols: if (argument == "creation_time") or (argument == "id"): return "Invalid column title: %s! Aborted." % argument # Insert a new row into the events table describing the new event sql_param = [id] if name is not None: sql_name = "%s" sql_param.append(name) else: sql_name = "NULL" if len(cols) != 0: sql_cols = "%s" sql_param.append(cPickle.dumps(cols)) else: sql_cols = "NULL" run_sql("INSERT INTO staEVENT (id, name, cols) VALUES (%s, " + sql_name + ", " + sql_cols + ")", tuple(sql_param)) tbl_name = get_customevent_table(id) # Create a table for the new event sql_query = ["CREATE TABLE %s (" % tbl_name] sql_query.append("id MEDIUMINT unsigned NOT NULL auto_increment,") sql_query.append("creation_time TIMESTAMP DEFAULT NOW(),") for argument in cols: arg = wash_table_column_name(argument) sql_query.append("%s MEDIUMTEXT NULL," % arg) sql_query.append("INDEX %s (%s(50))," % (arg, arg)) sql_query.append("PRIMARY KEY (id))") sql_str = ' '.join(sql_query) run_sql(sql_str) # We're done! Print notice containing the name of the event. return ("Event table [%s] successfully created.\n" + "Please use event id [%s] when registering an event.") % (tbl_name, id)
def get_field(recid, field): """ Gets list of field 'field' for the record with 'recid' system number. """ digit = field[0:2] bibbx = "bib%sx" % digit bibx = "bibrec_bib%sx" % digit query = "SELECT bx.value FROM %s AS bx, %s AS bibx WHERE bibx.id_bibrec=%%s AND bx.id=bibx.id_bibxxx AND bx.tag=%%s" % (wash_table_column_name(bibbx), wash_table_column_name(bibx)) return [row[0] for row in run_sql(query, (recid, field))]
def create_customevent(id=None, name=None, cols=[]): """ Creates a new custom event by setting up the necessary MySQL tables. @param id: Proposed human-readable id of the new event. @type id: str @param name: Optionally, a descriptive name. @type name: str @param cols: Optionally, the name of the additional columns. @type cols: [str] @return: A status message @type: str """ if id is None: return "Please specify a human-readable ID for the event." # Only accept id and name with standard characters if not re.search("[^\w]", str(id) + str(name)) is None: return "Please note that both event id and event name needs to be written without any non-standard characters." # Make sure the chosen id is not already taken if len(run_sql("SELECT NULL FROM staEVENT WHERE id = %s", (id, ))) != 0: return "Event id [%s] already exists! Aborted." % id # Check if the cols are valid titles for argument in cols: if (argument == "creation_time") or (argument == "id"): return "Invalid column title: %s! Aborted." % argument # Insert a new row into the events table describing the new event sql_param = [id] if name is not None: sql_name = "%s" sql_param.append(name) else: sql_name = "NULL" if len(cols) != 0: sql_cols = "%s" sql_param.append(cPickle.dumps(cols)) else: sql_cols = "NULL" run_sql( "INSERT INTO staEVENT (id, name, cols) VALUES (%s, " + sql_name + ", " + sql_cols + ")", tuple(sql_param)) tbl_name = get_customevent_table(id) # Create a table for the new event sql_query = ["CREATE TABLE %s (" % tbl_name] sql_query.append("id MEDIUMINT unsigned NOT NULL auto_increment,") sql_query.append("creation_time TIMESTAMP DEFAULT NOW(),") for argument in cols: arg = wash_table_column_name(argument) sql_query.append("%s MEDIUMTEXT NULL," % arg) sql_query.append("INDEX %s (%s(50))," % (arg, arg)) sql_query.append("PRIMARY KEY (id))") sql_str = ' '.join(sql_query) run_sql(sql_str) # We're done! Print notice containing the name of the event. return ("Event table [%s] successfully created.\n" + "Please use event id [%s] when registering an event.") % (tbl_name, id)