def __init__(self): self.index_binary = None self.files = dict() self.aodp = DbHandler() # Создаем временную папку, если ее нет if not os.path.exists(Folders.temp): os.makedirs(Folders.temp) # оздаем 3 папки для Сфинкса if not os.path.exists(SphinxConfig.var_dir+ '/run'): os.makedirs(SphinxConfig.var_dir + '/run') if not os.path.exists(SphinxConfig.var_dir+ '/log'): os.makedirs(SphinxConfig.var_dir + '/log') if not os.path.exists(SphinxConfig.var_dir+ '/data'): os.makedirs(SphinxConfig.var_dir + '/data')
def __init__(self, source="http"): self.db_handler = DbHandler() self.source = source self.updalist_generator = None self.tablelist_generator = None self.allowed_tables = None
class Updater: # Source: "http", directory (as a full path to unpacked xmls) def __init__(self, source="http"): self.db_handler = DbHandler() self.source = source self.updalist_generator = None self.tablelist_generator = None self.allowed_tables = None def __get_entries_from_folder(self, path_to_xmls): for (dirpath, dirnames, filenames) in walk(path_to_xmls): for filename in filenames: if filename.endswith(".XML"): xmltable = AoXmlTableEntry.from_dir(filename, dirpath.replace("\\", "/") + "/") if xmltable.table_name in allowed_tables: yield xmltable break @classmethod def get_current_fias_version(cls): db = None try: db = DBImpl(psycopg2, DatabaseConfig) rows = db.get_rows('SELECT version FROM "CONFIG" WHERE id=0', True) assert len(rows) > 0, "Cannot get a version" return rows[0]['version'] except: return 0 finally: if db: db.close() @classmethod def __set__update_version(cls, updver=0): db = DBImpl(psycopg2, DatabaseConfig) try: assert isinstance(updver, int), "Update version must be of int type." db.execute('UPDATE "CONFIG" SET version={} WHERE id=0'.format(updver)) finally: db.close() # Получает верию ФИАС с клавиатуры (если мы берем базу из папки или локального архива и не можем определить, # что это за версия @staticmethod def __get_update_version_from_console(): mode = None while not mode: try: mode = int(raw_input('Enter FIAS update version (3 digit):')) except ValueError: print "Not a valid fias version, try again." return mode def __get_updates_from_folder(self, foldername): fias_db_version = self.__get_update_version_from_console() yield dict(intver=fias_db_version, textver="Version {}".format(fias_db_version), delta_url=foldername, complete_url=foldername) @staticmethod def __get_updates_from_rar(url): aorar = AoRar() if url.startswith("http://") or url.startswith("https://"): aorar.download(url) if url.endswith(".rar") and path.isfile(url): aorar.local(url) for table_entry in aorar.get_table_entries(allowed_tables): yield table_entry def __init_update_entries(self, updates_generator): if self.source == "http": assert updates_generator, "No generator" self.tablelist_generator = self.__get_updates_from_rar self.updalist_generator = updates_generator return if self.source.endswith(".rar"): self.tablelist_generator = self.__get_updates_from_rar self.updalist_generator = self.__get_updates_from_folder(self.source) return if path.isdir(self.source): self.tablelist_generator = self.__get_entries_from_folder self.updalist_generator = self.__get_updates_from_folder(self.source) assert self.tablelist_generator, "No valid source." def process_single_entry(self, operation_type, table_xmlentry, chunck_size=50000): aoparser = AoDataParser(table_xmlentry, chunck_size) aoparser.parse(lambda x, y: self.db_handler.bulk_csv(operation_type, table_xmlentry.table_name, x, y)) def create(self, updates_generator): self.__init_update_entries(updates_generator) self.db_handler.create_structure() for update_entry in self.updalist_generator: logging.info("Processing DB #%d", update_entry['intver']) for table_entry in self.tablelist_generator(update_entry['complete_url']): if table_entry.operation_type == AoXmlTableEntry.OperationType.update: table_entry.operation_type = AoXmlTableEntry.OperationType.create self.process_single_entry(table_entry.operation_type, table_entry) Updater.__set__update_version(update_entry['intver']) else: logging.info("No updates more.") self.db_handler.create_indexes(db_shemas.keys()) logging.info("Create success") def update(self, updates_generator): self.__init_update_entries(updates_generator) # Drop all indexes if updates needed indexes_dropped = False for update_entry in self.updalist_generator: if not indexes_dropped: self.db_handler.drop_indexes(allowed_tables) indexes_dropped = True logging.info("Processing update #%d", update_entry['intver']) for table_entry in self.tablelist_generator(update_entry['delta_url']): self.process_single_entry(table_entry.operation_type, table_entry) Updater.__set__update_version(update_entry['intver']) else: logging.info("No updates more.") # Re-create all indexes (if dropped) if indexes_dropped: self.db_handler.create_indexes(allowed_tables) logging.info("Update success")
class SphinxHelper: def __init__(self): self.index_binary = None self.files = dict() self.aodp = DbHandler() # Создаем временную папку, если ее нет if not os.path.exists(Folders.temp): os.makedirs(Folders.temp) # оздаем 3 папки для Сфинкса if not os.path.exists(SphinxConfig.var_dir+ '/run'): os.makedirs(SphinxConfig.var_dir + '/run') if not os.path.exists(SphinxConfig.var_dir+ '/log'): os.makedirs(SphinxConfig.var_dir + '/log') if not os.path.exists(SphinxConfig.var_dir+ '/data'): os.makedirs(SphinxConfig.var_dir + '/data') def configure_indexer(self, indexer_binary, config_filename): logging.info("Start configuring Sphinx...") self.index_binary = indexer_binary # Create ADDROBJ config self.files['addrobj.conf'] = self.__create_ao_index_config() # Produce dict file self.files['dict.txt'] = self.__create_suggestion_dict() # Put dict into db self.files['dict.csv'] = self.__dbexport_sugg_dict() # Create SUGGEST config self.files['suggest.conf'] = self.__create_sugg_index_config() # Create main config (sphinx.conf) out_fname = self.__create_main_config(config_filename) # Indexing both configs run_index_cmd = "{} -c {} --all --rotate".format(self.index_binary, out_fname) logging.info("Indexing main (%s)...", out_fname) os.system(run_index_cmd) logging.info("All indexes were created.") # remove temp files for fname, fpath in self.files.iteritems(): try: os.remove(fpath) except: logging.warning("Cannot delete %s. Not accessible.", fpath) logging.info("Temporary files removed.") logging.info("Successfully configured. Please restart searchd.") def __create_sugg_index_config(self): fname = os.path.abspath(Folders.temp + "/suggest.conf") logging.info("Creating config %s", fname) conf_data = template('aore/templates/sphinx/idx_suggest.conf', db_host=DatabaseConfig.host, db_user=DatabaseConfig.user, db_password=DatabaseConfig.password, db_name=DatabaseConfig.database, db_port=DatabaseConfig.port, index_name=SphinxConfig.index_sugg, sphinx_var_path=SphinxConfig.var_dir) f = open(fname, "w") f.write(conf_data) f.close() logging.info("Done.") return fname def __dbexport_sugg_dict(self): logging.info("Place suggestion dict to DB %s...", self.files['dict.txt']) dict_dat_fname = os.path.abspath(Folders.temp + "/suggdict.csv") csv_counter = 0 with open(self.files['dict.txt'], "r") as dict_file, open(dict_dat_fname, "w") as exit_file: line = None while line != '': nodes = [] line = dict_file.readline() if line == '': break csv_counter += 1 splitting_seq = line.split(' ') keyword = splitting_seq[0] freq = splitting_seq[1].rstrip('\n') assert keyword and freq, "Cannot process {}".format(self.files['dict.txt']) nodes.append(keyword) nodes.append(trigram(keyword)) nodes.append(freq) exit_file.write("\t".join(nodes) + "\n") try: dict_file.close() exit_file.close() except: pass self.aodp.bulk_csv(AoXmlTableEntry.OperationType.update, "AOTRIG", csv_counter, dict_dat_fname) logging.info("Done.") def __create_ao_index_config(self): fname = os.path.abspath(Folders.temp + "/addrobj.conf") logging.info("Creating config %s", fname) conf_data = template('aore/templates/sphinx/idx_addrobj.conf', db_host=DatabaseConfig.host, db_user=DatabaseConfig.user, db_password=DatabaseConfig.password, db_name=DatabaseConfig.database, db_port=DatabaseConfig.port, sql_query=template('aore/templates/postgre/sphinx_query.sql').replace("\n", " \\\n"), index_name=SphinxConfig.index_addjobj, sphinx_var_path=SphinxConfig.var_dir, min_length_to_star=SphinxConfig.min_length_to_star) f = open(fname, "w") f.write(conf_data) f.close() logging.info("Done.") return fname def __create_suggestion_dict(self): fname = os.path.abspath(Folders.temp + "/suggdict.txt") logging.info("Make suggestion dict (%s)...", fname) run_builddict_cmd = "{} {} -c {} --buildstops {} 200000 --buildfreqs".format(self.index_binary, SphinxConfig.index_addjobj, self.files['addrobj.conf'], fname) os.system(run_builddict_cmd) logging.info("Done.") return fname def __create_main_config(self, config_fname): out_filename = os.path.abspath(config_fname) logging.info("Creating main config %s...", out_filename) conf_data = template('aore/templates/sphinx/sphinx.conf', sphinx_listen=SphinxConfig.listen.replace("unix://", ''), sphinx_var_path=SphinxConfig.var_dir) f = open(out_filename, "w") for fname, fpath in self.files.iteritems(): if ".conf" in fname: with open(fpath, "r") as conff: for line in conff: f.write(line) f.write('\n') f.write(conf_data) f.close() logging.info("Done.") return out_filename