def dict2db(table_name, dict_data, mode): """ Load the dict values into the database Three modes of operation: i - insert r - replace c - correct """ #Escape all the content in dict data to avoid " and ' for data in dict_data: dict_data[data] = re.escape(dict_data[data]) if mode == 'i': #Insert mode query_fields = " , " .join(dict_data.keys()) query_values = "' , '" .join(dict_data.values()) query = "INSERT IGNORE INTO %s(%s) VALUES ('%s')" % (wash_table_column_name(table_name), query_fields, query_values) elif mode == 'c': #Correct mode if '_' in table_name: query = "SELECT * FROM %s" % table_name#FIXIT Trick to execute something instead of giving error else: tbl_id = get_primary_keys(table_name)[0] del dict_data[tbl_id] query_update = " , " .join(["%s=\'%s\'" % (field, dict_data[field]) for field in dict_data]) query = "UPDATE %s SET %s" % (wash_table_column_name(table_name), query_update) else: #Try in the default mode dict2db(table_name, dict_data, LOAD_DEFAULT_MODE) try: run_sql(query) except: print("VALUES: %s ALREADY EXIST IN TABLE %s. SKIPPING" % (query_values, table_name)) pass
def get_field(recid, field): """ Gets list of field 'field' for the record with 'recid' system number. """ digit = field[0:2] bibbx = "bib%sx" % digit bibx = "bibrec_bib%sx" % digit query = "SELECT bx.value FROM %s AS bx, %s AS bibx WHERE bibx.id_bibrec=%%s AND bx.id=bibx.id_bibxxx AND bx.tag=%%s" % ( wash_table_column_name(bibbx), wash_table_column_name(bibx)) return [row[0] for row in run_sql(query, (recid, field))]
def get_field(recid, field): """ Gets list of field 'field' for the record with 'recid' system number. """ digit = field[0:2] bibbx = "bib%sx" % digit bibx = "bibrec_bib%sx" % digit query = ( "SELECT bx.value FROM %s AS bx, %s AS bibx WHERE bibx.id_bibrec=%%s AND bx.id=bibx.id_bibxxx AND bx.tag=%%s" % (wash_table_column_name(bibbx), wash_table_column_name(bibx)) ) return [row[0] for row in run_sql(query, (recid, field))]
def modify_translations(ID, langs, sel_type, trans, table, id_column=None): """add or modify translations in tables given by table frmID - the id of the format from the format table sel_type - the name type langs - the languages trans - the translations, in same order as in langs table - the table id_column - name of the column with identifier. If None, expect column to be named 'id_%s' % table """ name = "name" if table[-1:].isupper(): name = "NAME" id_column = id_column or 'id_%s' % table if id_column: id_column = wash_table_column_name(id_column) try: for nr in range(0, len(langs)): res = run_sql("""SELECT value FROM "%s%s" WHERE "%s"=%%s AND type=%%s AND ln=%%s""" % (table, name, id_column), (ID, sel_type, langs[nr][0])) if res: if trans[nr]: res = run_sql("""UPDATE "%s%s" SET value=%%s WHERE "%s"=%%s AND type=%%s AND ln=%%s""" % (table, name, id_column), (trans[nr], ID, sel_type, langs[nr][0])) else: res = run_sql("""DELETE FROM "%s%s" WHERE "%s"=%%s AND type=%%s AND ln=%%s""" % (table, name, id_column), (ID, sel_type, langs[nr][0])) else: if trans[nr]: res = run_sql("""INSERT INTO "%s%s" ("%s", type, ln, value) VALUES (%%s,%%s,%%s,%%s)""" % (table, name, id_column), (ID, sel_type, langs[nr][0], trans[nr])) return (1, "") except StandardError as e: return (0, e)
def dump_collection(collection, config, force_ids, print_to_screen=False): """ Dump the current collection Note: there are a special notation, ori(origin) - rel(relation) - fin(final) For example in the relation field-field_tag-tag: ori(origin): field table rel(relation): field_tag fin(final): tag """ tbl_ori, tbl_rel, tbl_fin = collection['relations'].split("-") query = "SELECT * FROM %s" % (wash_table_column_name(tbl_ori)) lst_ori = query2list(query, tbl_ori) tbl_ori_id = get_primary_keys(tbl_ori)[0] for index_ori, result_ori in enumerate(lst_ori): dict_rels = get_relationship(collection, tbl_ori, tbl_ori_id) query = "SELECT * FROM %s WHERE %s=%s" % ( wash_table_column_name(tbl_rel), dict_rels[tbl_ori + "." + tbl_ori_id], result_ori[tbl_ori_id]) if collection['tables'][tbl_ori].startswith('extend'): add_special_field(collection, tbl_ori, result_ori) lst_rel = query2list(query, tbl_rel) for result_rel in lst_rel: tbl_fin_id = get_primary_keys(tbl_fin)[0] tbl_rel_id = dict_rels[tbl_fin + "." + tbl_fin_id].split(".")[1].strip() query = "SELECT * FROM %s WHERE %s=%s" % (wash_table_column_name( tbl_fin), tbl_fin_id, result_rel[tbl_rel_id]) lst_fin = query2list(query, tbl_fin) for index_fin, result_fin in enumerate(lst_fin): result_ori[tbl_fin + "." + create_section_id( index_fin, with_date=False)] = result_fin section_name = tbl_ori + "." + create_section_id(index_ori) if force_ids == False: #Remove the ids from the dict results = delete_ids(result_ori, collection['relations'].split("-")) config[section_name] = results else: config[section_name] = result_ori if print_to_screen == True: output = StringIO.StringIO() config.write( output) #Write to the output string instead of the file print(output.getvalue()) else: config.write()
def create_rnkmethod_cache(): """Create cache with vital information for each rank method.""" bibrank_meths = run_sql("SELECT name from rnkMETHOD") for (rank_method_code,) in bibrank_meths: filepath = configuration.get(rank_method_code + '.cfg', '') config = ConfigParser.ConfigParser() try: config.readfp(open(filepath)) except IOError: pass cfg_function = config.get("rank_method", "function") if config.has_section(cfg_function): METHODS[rank_method_code] = {} METHODS[rank_method_code]["function"] = cfg_function METHODS[rank_method_code]["prefix"] = config.get(cfg_function, "relevance_number_output_prologue") METHODS[rank_method_code]["postfix"] = config.get(cfg_function, "relevance_number_output_epilogue") METHODS[rank_method_code]["chars_alphanumericseparators"] = r"[1234567890\!\"\#\$\%\&\'\(\)\*\+\,\-\.\/\:\;\<\=\>\?\@\[\\\]\^\_\`\{\|\}\~]" else: raise Exception("Error in configuration config_file: %s" % (config_file + ".cfg", )) i8n_names = run_sql("""SELECT ln,value from rnkMETHODNAME,rnkMETHOD where id_rnkMETHOD=rnkMETHOD.id and rnkMETHOD.name=%s""", (rank_method_code,)) for (ln, value) in i8n_names: METHODS[rank_method_code][ln] = value if config.has_option(cfg_function, "table"): METHODS[rank_method_code]["rnkWORD_table"] = config.get(cfg_function, "table") query = "SELECT count(*) FROM %sR" % wash_table_column_name(METHODS[rank_method_code]["rnkWORD_table"][:-1]) METHODS[rank_method_code]["col_size"] = run_sql(query)[0][0] if config.has_option(cfg_function, "stemming") and config.get(cfg_function, "stemming"): try: METHODS[rank_method_code]["stemmer"] = config.get(cfg_function, "stemming") except KeyError: pass if config.has_option(cfg_function, "stopword"): METHODS[rank_method_code]["stopwords"] = config.get(cfg_function, "stopword") if config.has_section("find_similar"): METHODS[rank_method_code]["max_word_occurence"] = float(config.get("find_similar", "max_word_occurence")) METHODS[rank_method_code]["min_word_occurence"] = float(config.get("find_similar", "min_word_occurence")) METHODS[rank_method_code]["min_word_length"] = int(config.get("find_similar", "min_word_length")) METHODS[rank_method_code]["min_nr_words_docs"] = int(config.get("find_similar", "min_nr_words_docs")) METHODS[rank_method_code]["max_nr_words_upper"] = int(config.get("find_similar", "max_nr_words_upper")) METHODS[rank_method_code]["max_nr_words_lower"] = int(config.get("find_similar", "max_nr_words_lower")) METHODS[rank_method_code]["default_min_relevance"] = int(config.get("find_similar", "default_min_relevance")) if cfg_function in ('word_similarity_solr', 'word_similarity_xapian'): create_external_ranking_settings(rank_method_code, config) if config.has_section("combine_method"): i = 1 METHODS[rank_method_code]["combine_method"] = [] while config.has_option("combine_method", "method%s" % i): METHODS[rank_method_code]["combine_method"].append(config.get("combine_method", "method%s" % i).split(",")) i += 1
def dump_collection(collection, config, force_ids, print_to_screen=False): """ Dump the current collection Note: there are a special notation, ori(origin) - rel(relation) - fin(final) For example in the relation field-field_tag-tag: ori(origin): field table rel(relation): field_tag fin(final): tag """ tbl_ori, tbl_rel, tbl_fin = collection['relations'].split("-") query = "SELECT * FROM %s" % (wash_table_column_name(tbl_ori)) lst_ori = query2list(query, tbl_ori) tbl_ori_id = get_primary_keys(tbl_ori)[0] for index_ori, result_ori in enumerate(lst_ori): dict_rels = get_relationship(collection, tbl_ori, tbl_ori_id) query = "SELECT * FROM %s WHERE %s=%s" % (wash_table_column_name(tbl_rel), dict_rels[tbl_ori+"."+tbl_ori_id], result_ori[tbl_ori_id]) if collection['tables'][tbl_ori].startswith('extend'): add_special_field(collection, tbl_ori, result_ori) lst_rel = query2list(query, tbl_rel) for result_rel in lst_rel: tbl_fin_id = get_primary_keys(tbl_fin)[0] tbl_rel_id = dict_rels[tbl_fin+"."+tbl_fin_id].split(".")[1].strip() query = "SELECT * FROM %s WHERE %s=%s" % (wash_table_column_name(tbl_fin), tbl_fin_id, result_rel[tbl_rel_id]) lst_fin = query2list(query, tbl_fin) for index_fin, result_fin in enumerate(lst_fin): result_ori[tbl_fin+"."+create_section_id(index_fin, with_date=False)] = result_fin section_name = tbl_ori + "." + create_section_id(index_ori) if force_ids == False:#Remove the ids from the dict results = delete_ids(result_ori, collection['relations'].split("-")) config[section_name] = results else: config[section_name] = result_ori if print_to_screen == True: output = StringIO.StringIO() config.write(output)#Write to the output string instead of the file print(output.getvalue()) else: config.write()
def get_primary_keys(table_name): """ Get the primary keys from the table with the DESC mysql function """ lst_keys = [] query = "DESC %s" % wash_table_column_name(table_name) results = run_sql(query) for field in results: if field[3] == 'PRI': lst_keys.append(field[0]) return lst_keys
def check_tables(): """ Check all DB tables. Useful to run from time to time when the site is idle, say once a month during a weekend night. FIXME: should produce useful output about outcome. """ res = run_sql("SHOW TABLES") for row in res: table_name = row[0] write_message("checking table %s" % table_name) run_sql("CHECK TABLE %s" % wash_table_column_name(table_name)) # kwalitee: disable=sql
def optimise_tables(): """ Optimise all DB tables to defragment them in order to increase DB performance. Useful to run from time to time when the site is idle, say once a month during a weekend night. FIXME: should produce useful output about outcome. """ res = run_sql("SHOW TABLES") for row in res: table_name = row[0] write_message("optimising table %s" % table_name) run_sql("OPTIMIZE TABLE %s" % wash_table_column_name(table_name)) # kwalitee: disable=sql
def get_name(ID, ln, rtype, table, id_column=None): """Returns the value from the table name based on arguments ID - id ln - a language supported by Invenio type - the type of value wanted, like 'ln', 'sn' table - tablename id_column - name of the column with identifier. If None, expect column to be named 'id_%s' % table """ name = "name" if table[-1:].isupper(): name = "NAME" if id_column: id_column = wash_table_column_name(id_column) try: res = run_sql("""SELECT value FROM "%s%s" WHERE type='%s' and ln='%s' and %s=%s""" % ( table, name, rtype, ln, (id_column or 'id_%s' % wash_table_column_name(table)), ID)) return res except StandardError as e: return ()
def dict2db(table_name, dict_data, mode): """ Load the dict values into the database Three modes of operation: i - insert r - replace c - correct """ #Escape all the content in dict data to avoid " and ' for data in dict_data: dict_data[data] = re.escape(dict_data[data]) if mode == 'i': #Insert mode query_fields = " , ".join(dict_data.keys()) query_values = "' , '".join(dict_data.values()) query = "INSERT IGNORE INTO %s(%s) VALUES ('%s')" % ( wash_table_column_name(table_name), query_fields, query_values) elif mode == 'c': #Correct mode if '_' in table_name: query = "SELECT * FROM %s" % table_name #FIXIT Trick to execute something instead of giving error else: tbl_id = get_primary_keys(table_name)[0] del dict_data[tbl_id] query_update = " , ".join([ "%s=\'%s\'" % (field, dict_data[field]) for field in dict_data ]) query = "UPDATE %s SET %s" % (wash_table_column_name(table_name), query_update) else: #Try in the default mode dict2db(table_name, dict_data, LOAD_DEFAULT_MODE) try: run_sql(query) except: print("VALUES: %s ALREADY EXIST IN TABLE %s. SKIPPING" % (query_values, table_name)) pass
def get_name(ID, ln, rtype, table, id_column=None): """Returns the value from the table name based on arguments ID - id ln - a language supported by Invenio type - the type of value wanted, like 'ln', 'sn' table - tablename id_column - name of the column with identifier. If None, expect column to be named 'id_%s' % table """ name = "name" if table[-1:].isupper(): name = "NAME" if id_column: id_column = wash_table_column_name(id_column) try: res = run_sql( "SELECT value FROM %s%s WHERE type='%s' and ln='%s' and %s=%s" % (table, name, rtype, ln, (id_column or 'id_%s' % wash_table_column_name(table)), ID)) return res except StandardError as e: return ()
def modify_translations(ID, langs, sel_type, trans, table, id_column=None): """add or modify translations in tables given by table frmID - the id of the format from the format table sel_type - the name type langs - the languages trans - the translations, in same order as in langs table - the table id_column - name of the column with identifier. If None, expect column to be named 'id_%s' % table """ name = "name" if table[-1:].isupper(): name = "NAME" id_column = id_column or 'id_%s' % table if id_column: id_column = wash_table_column_name(id_column) try: for nr in range(0, len(langs)): res = run_sql( "SELECT value FROM %s%s WHERE %s=%%s AND type=%%s AND ln=%%s" % (table, name, id_column), (ID, sel_type, langs[nr][0])) if res: if trans[nr]: res = run_sql( "UPDATE %s%s SET value=%%s WHERE %s=%%s AND type=%%s AND ln=%%s" % (table, name, id_column), (trans[nr], ID, sel_type, langs[nr][0])) else: res = run_sql( "DELETE FROM %s%s WHERE %s=%%s AND type=%%s AND ln=%%s" % (table, name, id_column), (ID, sel_type, langs[nr][0])) else: if trans[nr]: res = run_sql( "INSERT INTO %s%s (%s, type, ln, value) VALUES (%%s,%%s,%%s,%%s)" % (table, name, id_column), (ID, sel_type, langs[nr][0], trans[nr])) return (1, "") except StandardError as e: return (0, e)
def calculate_index_term_count(config): """Calculate the weight of a record set based on number of enries of a tag from the record in another index...useful for authority files""" records = [] if config.has_section("index_term_count"): index = config.get("index_term_count", "index_table_name") tag = config.get("index_term_count", "index_term_value_from_tag") # check against possible SQL injection: dummy = get_table_update_time(index) tag = wash_table_column_name(tag) else: raise Exception("Config file " + config + " does not have index_term_count section") return () task_sleep_now_if_required(can_stop_too=True) write_message("......Processing all records") query = "SELECT id_bibrec, value FROM bib%sx, bibrec_bib%sx WHERE tag=%%s AND id_bibxxx=id" % \ (tag[0:2], tag[0:2]) # we checked that tag is safe records = list(run_sql(query, (tag, ))) write_message("Number of records found with the necessary tags: %s" % len(records)) rnkset = {} for key, value in records: hits = 0 if len(value): query = "SELECT hitlist from %s where term = %%s" % index # we checked that index is a table row = run_sql(query, (value, )) if row and row[0] and row[0][0]: #has to be prepared for corrupted data! try: hits = len(intbitset(row[0][0])) except: hits = 0 rnkset[key] = hits write_message("Number of records available in rank method: %s" % len(rnkset)) return rnkset
def calculate_index_term_count(config): """Calculate the weight of a record set based on number of enries of a tag from the record in another index...useful for authority files""" records = [] if config.has_section("index_term_count"): index = config.get("index_term_count","index_table_name") tag = config.get("index_term_count","index_term_value_from_tag") # check against possible SQL injection: dummy = get_table_update_time(index) tag = wash_table_column_name(tag) else: raise Exception("Config file " + config + " does not have index_term_count section") return() task_sleep_now_if_required(can_stop_too=True) write_message("......Processing all records") query = "SELECT id_bibrec, value FROM bib%sx, bibrec_bib%sx WHERE tag=%%s AND id_bibxxx=id" % \ (tag[0:2], tag[0:2]) # we checked that tag is safe records = list(run_sql(query, (tag,))) write_message("Number of records found with the necessary tags: %s" % len(records)) rnkset = {} for key, value in records: hits = 0 if len(value): query = "SELECT hitlist from %s where term = %%s" % index # we checked that index is a table row = run_sql(query, (value,)) if row and row[0] and row[0][0]: #has to be prepared for corrupted data! try: hits = len(intbitset(row[0][0])) except: hits = 0 rnkset[key] = hits write_message("Number of records available in rank method: %s" % len(rnkset)) return rnkset