コード例 #1
0
ファイル: sphinx.py プロジェクト: jar3b/py-phias
    def __init__(self):
        self.index_binary = None
        self.files = dict()
        self.aodp = DbHandler()

        # Создаем временную папку, если ее нет
        if not os.path.exists(Folders.temp):
            os.makedirs(Folders.temp)

        # оздаем 3 папки для Сфинкса
        if not os.path.exists(SphinxConfig.var_dir+ '/run'):
            os.makedirs(SphinxConfig.var_dir + '/run')
        if not os.path.exists(SphinxConfig.var_dir+ '/log'):
            os.makedirs(SphinxConfig.var_dir + '/log')
        if not os.path.exists(SphinxConfig.var_dir+ '/data'):
            os.makedirs(SphinxConfig.var_dir + '/data')
コード例 #2
0
ファイル: updater.py プロジェクト: jar3b/py-phias
 def __init__(self, source="http"):
     self.db_handler = DbHandler()
     self.source = source
     self.updalist_generator = None
     self.tablelist_generator = None
     self.allowed_tables = None
コード例 #3
0
ファイル: updater.py プロジェクト: jar3b/py-phias
class Updater:
    # Source: "http", directory (as a full path to unpacked xmls)
    def __init__(self, source="http"):
        self.db_handler = DbHandler()
        self.source = source
        self.updalist_generator = None
        self.tablelist_generator = None
        self.allowed_tables = None

    def __get_entries_from_folder(self, path_to_xmls):
        for (dirpath, dirnames, filenames) in walk(path_to_xmls):
            for filename in filenames:
                if filename.endswith(".XML"):
                    xmltable = AoXmlTableEntry.from_dir(filename, dirpath.replace("\\", "/") + "/")
                    if xmltable.table_name in allowed_tables:
                        yield xmltable
            break

    @classmethod
    def get_current_fias_version(cls):
        db = None
        try:
            db = DBImpl(psycopg2, DatabaseConfig)
            rows = db.get_rows('SELECT version FROM "CONFIG" WHERE id=0', True)
            assert len(rows) > 0, "Cannot get a version"
            return rows[0]['version']
        except:
            return 0
        finally:
            if db:
                db.close()

    @classmethod
    def __set__update_version(cls, updver=0):
        db = DBImpl(psycopg2, DatabaseConfig)
        try:
            assert isinstance(updver, int), "Update version must be of int type."
            db.execute('UPDATE "CONFIG" SET version={} WHERE id=0'.format(updver))
        finally:
            db.close()

    # Получает верию ФИАС с клавиатуры (если мы берем базу из папки или  локального архива и не можем определить,
    # что это за версия
    @staticmethod
    def __get_update_version_from_console():
        mode = None
        while not mode:
            try:
                mode = int(raw_input('Enter FIAS update version (3 digit):'))
            except ValueError:
                print "Not a valid fias version, try again."

        return mode

    def __get_updates_from_folder(self, foldername):
        fias_db_version = self.__get_update_version_from_console()
        yield dict(intver=fias_db_version,
                   textver="Version {}".format(fias_db_version),
                   delta_url=foldername,
                   complete_url=foldername)

    @staticmethod
    def __get_updates_from_rar(url):
        aorar = AoRar()

        if url.startswith("http://") or url.startswith("https://"):
            aorar.download(url)
        if url.endswith(".rar") and path.isfile(url):
            aorar.local(url)

        for table_entry in aorar.get_table_entries(allowed_tables):
            yield table_entry

    def __init_update_entries(self, updates_generator):
        if self.source == "http":
            assert updates_generator, "No generator"
            self.tablelist_generator = self.__get_updates_from_rar
            self.updalist_generator = updates_generator
            return
        if self.source.endswith(".rar"):
            self.tablelist_generator = self.__get_updates_from_rar
            self.updalist_generator = self.__get_updates_from_folder(self.source)
            return
        if path.isdir(self.source):
            self.tablelist_generator = self.__get_entries_from_folder
            self.updalist_generator = self.__get_updates_from_folder(self.source)

        assert self.tablelist_generator, "No valid source."

    def process_single_entry(self, operation_type, table_xmlentry, chunck_size=50000):
        aoparser = AoDataParser(table_xmlentry, chunck_size)
        aoparser.parse(lambda x, y: self.db_handler.bulk_csv(operation_type, table_xmlentry.table_name, x, y))

    def create(self, updates_generator):
        self.__init_update_entries(updates_generator)
        self.db_handler.create_structure()

        for update_entry in self.updalist_generator:
            logging.info("Processing DB #%d", update_entry['intver'])
            for table_entry in self.tablelist_generator(update_entry['complete_url']):
                if table_entry.operation_type == AoXmlTableEntry.OperationType.update:
                    table_entry.operation_type = AoXmlTableEntry.OperationType.create
                self.process_single_entry(table_entry.operation_type, table_entry)
            Updater.__set__update_version(update_entry['intver'])
        else:
            logging.info("No updates more.")

        self.db_handler.create_indexes(db_shemas.keys())

        logging.info("Create success")

    def update(self, updates_generator):
        self.__init_update_entries(updates_generator)

        # Drop all indexes if updates needed
        indexes_dropped = False

        for update_entry in self.updalist_generator:
            if not indexes_dropped:
                self.db_handler.drop_indexes(allowed_tables)
                indexes_dropped = True
            logging.info("Processing update #%d", update_entry['intver'])
            for table_entry in self.tablelist_generator(update_entry['delta_url']):
                self.process_single_entry(table_entry.operation_type, table_entry)
            Updater.__set__update_version(update_entry['intver'])
        else:
            logging.info("No updates more.")

        # Re-create all indexes (if dropped)
        if indexes_dropped:
            self.db_handler.create_indexes(allowed_tables)

        logging.info("Update success")
コード例 #4
0
ファイル: sphinx.py プロジェクト: jar3b/py-phias
class SphinxHelper:
    def __init__(self):
        self.index_binary = None
        self.files = dict()
        self.aodp = DbHandler()

        # Создаем временную папку, если ее нет
        if not os.path.exists(Folders.temp):
            os.makedirs(Folders.temp)

        # оздаем 3 папки для Сфинкса
        if not os.path.exists(SphinxConfig.var_dir+ '/run'):
            os.makedirs(SphinxConfig.var_dir + '/run')
        if not os.path.exists(SphinxConfig.var_dir+ '/log'):
            os.makedirs(SphinxConfig.var_dir + '/log')
        if not os.path.exists(SphinxConfig.var_dir+ '/data'):
            os.makedirs(SphinxConfig.var_dir + '/data')

    def configure_indexer(self, indexer_binary, config_filename):
        logging.info("Start configuring Sphinx...")
        self.index_binary = indexer_binary

        # Create ADDROBJ config
        self.files['addrobj.conf'] = self.__create_ao_index_config()

        # Produce dict file
        self.files['dict.txt'] = self.__create_suggestion_dict()

        # Put dict into db
        self.files['dict.csv'] = self.__dbexport_sugg_dict()

        # Create SUGGEST config
        self.files['suggest.conf'] = self.__create_sugg_index_config()

        # Create main config (sphinx.conf)
        out_fname = self.__create_main_config(config_filename)

        # Indexing both configs
        run_index_cmd = "{} -c {} --all --rotate".format(self.index_binary, out_fname)
        logging.info("Indexing main (%s)...", out_fname)
        os.system(run_index_cmd)
        logging.info("All indexes were created.")

        # remove temp files
        for fname, fpath in self.files.iteritems():
            try:
                os.remove(fpath)
            except:
                logging.warning("Cannot delete %s. Not accessible.", fpath)
        logging.info("Temporary files removed.")
        logging.info("Successfully configured. Please restart searchd.")

    def __create_sugg_index_config(self):
        fname = os.path.abspath(Folders.temp + "/suggest.conf")
        logging.info("Creating config %s", fname)

        conf_data = template('aore/templates/sphinx/idx_suggest.conf', db_host=DatabaseConfig.host,
                             db_user=DatabaseConfig.user,
                             db_password=DatabaseConfig.password,
                             db_name=DatabaseConfig.database, db_port=DatabaseConfig.port,
                             index_name=SphinxConfig.index_sugg,
                             sphinx_var_path=SphinxConfig.var_dir)

        f = open(fname, "w")
        f.write(conf_data)
        f.close()

        logging.info("Done.")

        return fname

    def __dbexport_sugg_dict(self):
        logging.info("Place suggestion dict to DB %s...", self.files['dict.txt'])
        dict_dat_fname = os.path.abspath(Folders.temp + "/suggdict.csv")

        csv_counter = 0
        with open(self.files['dict.txt'], "r") as dict_file, open(dict_dat_fname, "w") as exit_file:
            line = None
            while line != '':
                nodes = []
                line = dict_file.readline()
                if line == '':
                    break
                csv_counter += 1
                splitting_seq = line.split(' ')
                keyword = splitting_seq[0]
                freq = splitting_seq[1].rstrip('\n')
                assert keyword and freq, "Cannot process {}".format(self.files['dict.txt'])

                nodes.append(keyword)
                nodes.append(trigram(keyword))
                nodes.append(freq)

                exit_file.write("\t".join(nodes) + "\n")
        try:
            dict_file.close()
            exit_file.close()
        except:
            pass

        self.aodp.bulk_csv(AoXmlTableEntry.OperationType.update, "AOTRIG", csv_counter, dict_dat_fname)
        logging.info("Done.")

    def __create_ao_index_config(self):
        fname = os.path.abspath(Folders.temp + "/addrobj.conf")
        logging.info("Creating config %s", fname)

        conf_data = template('aore/templates/sphinx/idx_addrobj.conf', db_host=DatabaseConfig.host,
                             db_user=DatabaseConfig.user,
                             db_password=DatabaseConfig.password,
                             db_name=DatabaseConfig.database, db_port=DatabaseConfig.port,
                             sql_query=template('aore/templates/postgre/sphinx_query.sql').replace("\n", " \\\n"),
                             index_name=SphinxConfig.index_addjobj,
                             sphinx_var_path=SphinxConfig.var_dir,
                             min_length_to_star=SphinxConfig.min_length_to_star)

        f = open(fname, "w")
        f.write(conf_data)
        f.close()

        logging.info("Done.")

        return fname

    def __create_suggestion_dict(self):
        fname = os.path.abspath(Folders.temp + "/suggdict.txt")
        logging.info("Make suggestion dict (%s)...", fname)

        run_builddict_cmd = "{} {} -c {} --buildstops {} 200000 --buildfreqs".format(self.index_binary,
                                                                                     SphinxConfig.index_addjobj,
                                                                                     self.files['addrobj.conf'], fname)
        os.system(run_builddict_cmd)
        logging.info("Done.")

        return fname

    def __create_main_config(self, config_fname):
        out_filename = os.path.abspath(config_fname)
        logging.info("Creating main config %s...", out_filename)

        conf_data = template('aore/templates/sphinx/sphinx.conf',
                             sphinx_listen=SphinxConfig.listen.replace("unix://", ''),
                             sphinx_var_path=SphinxConfig.var_dir)

        f = open(out_filename, "w")
        for fname, fpath in self.files.iteritems():
            if ".conf" in fname:
                with open(fpath, "r") as conff:
                    for line in conff:
                        f.write(line)
                    f.write('\n')
        f.write(conf_data)
        f.close()

        logging.info("Done.")

        return out_filename