def finalize_import(self, config):
     """ Do any required postprocessing to make the tokenizer data ready
         for use.
     """
     with connect(self.dsn) as conn:
         sqlp = SQLPreprocessor(conn, config)
         sqlp.run_sql_file(conn, 'tokenizer/legacy_tokenizer_indices.sql')
Esempio n. 2
0
def create_functions(conn, config, enable_diff_updates=True, enable_debug=False):
    """ (Re)create the PL/pgSQL functions.
    """
    sql = SQLPreprocessor(conn, config)

    sql.run_sql_file(conn, 'functions.sql',
                     disable_diff_updates=not enable_diff_updates,
                     debug=enable_debug)
Esempio n. 3
0
 def _init_db_tables(self, config):
     """ Set up the word table and fill it with pre-computed word
         frequencies.
     """
     with connect(self.dsn) as conn:
         sqlp = SQLPreprocessor(conn, config)
         sqlp.run_sql_file(conn, 'tokenizer/icu_tokenizer_tables.sql')
         conn.commit()
 def update_sql_functions(self, config):
     """ Reimport the SQL functions for this tokenizer.
     """
     with connect(self.dsn) as conn:
         max_word_freq = get_property(conn, DBCFG_MAXWORDFREQ)
         sqlp = SQLPreprocessor(conn, config)
         sqlp.run_sql_file(conn,
                           'tokenizer/legacy_icu_tokenizer.sql',
                           max_word_freq=max_word_freq)
Esempio n. 5
0
def create_tables(conn, config, reverse_only=False):
    """ Create the set of basic tables.
        When `reverse_only` is True, then the main table for searching will
        be skipped and only reverse search is possible.
    """
    sql = SQLPreprocessor(conn, config)
    sql.env.globals['db']['reverse_only'] = reverse_only

    sql.run_sql_file(conn, 'tables.sql')
Esempio n. 6
0
    def _init_db_tables(self, config):
        """ Set up the word table and fill it with pre-computed word
            frequencies.
        """
        with connect(self.dsn) as conn:
            sqlp = SQLPreprocessor(conn, config)
            sqlp.run_sql_file(conn, 'tokenizer/legacy_tokenizer_tables.sql')
            conn.commit()

        LOG.warning("Precomputing word tokens")
        db_utils.execute_file(self.dsn, config.lib_dir.data / 'words.sql')
Esempio n. 7
0
 def update_sql_functions(self, config):
     """ Reimport the SQL functions for this tokenizer.
     """
     with connect(self.dsn) as conn:
         max_word_freq = properties.get_property(conn, DBCFG_MAXWORDFREQ)
         modulepath = config.DATABASE_MODULE_PATH or \
                      str((config.project_dir / 'module').resolve())
         sqlp = SQLPreprocessor(conn, config)
         sqlp.run_sql_file(conn,
                           'tokenizer/legacy_tokenizer.sql',
                           max_word_freq=max_word_freq,
                           modulepath=modulepath)
Esempio n. 8
0
def sql_preprocessor(temp_db_conn, tmp_path, monkeypatch, table_factory):
    table_factory('country_name', 'partition INT', ((0, ), (1, ), (2, )))
    cfg = Configuration(None, SRC_DIR.resolve() / 'settings')
    cfg.set_libdirs(module='.', osm2pgsql='.', php=SRC_DIR / 'lib-php',
                    sql=tmp_path, data=SRC_DIR / 'data')

    return SQLPreprocessor(temp_db_conn, cfg)
Esempio n. 9
0
def sql_preprocessor(temp_db_conn, tmp_path, monkeypatch, table_factory):
    monkeypatch.setenv('NOMINATIM_DATABASE_MODULE_PATH', '.')
    table_factory('country_name', 'partition INT', (0, 1, 2))
    cfg = Configuration(None, SRC_DIR.resolve() / 'settings')
    cfg.set_libdirs(module='.', osm2pgsql='.', php=SRC_DIR / 'lib-php',
                    sql=tmp_path, data=SRC_DIR / 'data')

    return SQLPreprocessor(temp_db_conn, cfg)
Esempio n. 10
0
def create_search_indices(conn, config, drop=False):
    """ Create tables that have explicit partitioning.
    """

    # If index creation failed and left an index invalid, they need to be
    # cleaned out first, so that the script recreates them.
    with conn.cursor() as cur:
        cur.execute("""SELECT relname FROM pg_class, pg_index
                       WHERE pg_index.indisvalid = false
                             AND pg_index.indexrelid = pg_class.oid""")
        bad_indices = [row[0] for row in list(cur)]
        for idx in bad_indices:
            LOG.info("Drop invalid index %s.", idx)
            cur.execute('DROP INDEX "{}"'.format(idx))
    conn.commit()

    sql = SQLPreprocessor(conn, config)

    sql.run_sql_file(conn, 'indices.sql', drop=drop)
def test_load_file_simple(sql_preprocessor_cfg, sql_factory, temp_db_conn,
                          temp_db_cursor, monkeypatch, expr, ret):
    monkeypatch.setenv('NOMINATIM_TABLESPACE_SEARCH_DATA', 'dsearch')
    monkeypatch.setenv('NOMINATIM_TABLESPACE_ADDRESS_INDEX', 'iaddress')
    monkeypatch.setenv('NOMINATIM_TABLESPACE_AUX_DATA', 'daux')
    sqlfile = sql_factory("RETURN {};".format(expr))

    SQLPreprocessor(temp_db_conn,
                    sql_preprocessor_cfg).run_sql_file(temp_db_conn, sqlfile)

    assert temp_db_cursor.scalar('SELECT test()') == ret
Esempio n. 12
0
def sql_functions(temp_db_conn, def_config, src_dir):
    orig_sql = def_config.lib_dir.sql
    def_config.lib_dir.sql = src_dir / 'lib-sql'
    sqlproc = SQLPreprocessor(temp_db_conn, def_config)
    sqlproc.run_sql_file(temp_db_conn, 'functions/utils.sql')
    sqlproc.run_sql_file(temp_db_conn, 'tokenizer/icu_tokenizer.sql')
    def_config.lib_dir.sql = orig_sql
    def _init_db_tables(self, config):
        """ Set up the word table and fill it with pre-computed word
            frequencies.
        """
        with connect(self.dsn) as conn:
            sqlp = SQLPreprocessor(conn, config)
            sqlp.run_sql_file(conn, 'tokenizer/legacy_tokenizer_tables.sql')
            conn.commit()

            LOG.warning("Precomputing word tokens")

            # get partial words and their frequencies
            words = Counter()
            with self.name_analyzer() as analyzer:
                with conn.cursor(name="words") as cur:
                    cur.execute(
                        "SELECT svals(name) as v, count(*) FROM place GROUP BY v"
                    )

                    for name, cnt in cur:
                        term = analyzer.make_standard_word(name)
                        if term:
                            for word in term.split():
                                words[word] += cnt

            # copy them back into the word table
            copystr = io.StringIO(''.join(
                ('{}\t{}\n'.format(*args) for args in words.items())))

            with conn.cursor() as cur:
                copystr.seek(0)
                cur.copy_from(copystr,
                              'word',
                              columns=['word_token', 'search_name_count'])
                cur.execute("""UPDATE word SET word_id = nextval('seq_word')
                               WHERE word_id is null""")

            conn.commit()
Esempio n. 14
0
def add_tiger_data(data_dir, config, threads):
    """ Import tiger data from directory or tar file `data dir`.
    """
    dsn = config.get_libpq_dsn()
    sql_files, tar = handle_tarfile_or_directory(data_dir)

    if not sql_files:
        return

    with connect(dsn) as conn:
        sql = SQLPreprocessor(conn, config)
        sql.run_sql_file(conn, 'tiger_import_start.sql')

    # Reading sql_files and then for each file line handling
    # sql_query in <threads - 1> chunks.
    sel = selectors.DefaultSelector()
    place_threads = max(1, threads - 1)

    # Creates a pool of database connections
    for _ in range(place_threads):
        conn = DBConnection(dsn)
        conn.connect()
        sel.register(conn, selectors.EVENT_WRITE, conn)

    for sql_file in sql_files:
        if not tar:
            file = open(sql_file)
        else:
            file = tar.extractfile(sql_file)

        handle_threaded_sql_statements(sel, file)

    # Unregistering pool of database connections
    handle_unregister_connection_pool(sel, place_threads)

    if tar:
        tar.close()
    print('\n')
    LOG.warning("Creating indexes on Tiger data")
    with connect(dsn) as conn:
        sql = SQLPreprocessor(conn, config)
        sql.run_sql_file(conn, 'tiger_import_finish.sql')
Esempio n. 15
0
def add_tiger_data(data_dir, config, threads, tokenizer):
    """ Import tiger data from directory or tar file `data dir`.
    """
    dsn = config.get_libpq_dsn()
    files, tar = handle_tarfile_or_directory(data_dir)

    if not files:
        return

    with connect(dsn) as conn:
        sql = SQLPreprocessor(conn, config)
        sql.run_sql_file(conn, 'tiger_import_start.sql')

    # Reading files and then for each file line handling
    # sql_query in <threads - 1> chunks.
    place_threads = max(1, threads - 1)

    with WorkerPool(dsn, place_threads, ignore_sql_errors=True) as pool:
        with tokenizer.name_analyzer() as analyzer:
            for fname in files:
                if not tar:
                    fd = open(fname)
                else:
                    fd = io.TextIOWrapper(tar.extractfile(fname))

                handle_threaded_sql_statements(pool, fd, analyzer)

                fd.close()

    if tar:
        tar.close()
    print('\n')
    LOG.warning("Creating indexes on Tiger data")
    with connect(dsn) as conn:
        sql = SQLPreprocessor(conn, config)
        sql.run_sql_file(conn, 'tiger_import_finish.sql')
Esempio n. 16
0
def sql_preprocessor(sql_preprocessor_cfg, temp_db_conn):
    return SQLPreprocessor(temp_db_conn, sql_preprocessor_cfg)
Esempio n. 17
0
def sql_preprocessor(temp_db_conn, tmp_path, def_config, monkeypatch,
                     table_factory):
    monkeypatch.setenv('NOMINATIM_DATABASE_MODULE_PATH', '.')
    table_factory('country_name', 'partition INT', (0, 1, 2))
    return SQLPreprocessor(temp_db_conn, def_config, tmp_path)
Esempio n. 18
0
 def update_sql_functions(self, config):
     """ Reimport the SQL functions for this tokenizer.
     """
     with connect(self.dsn) as conn:
         sqlp = SQLPreprocessor(conn, config)
         sqlp.run_sql_file(conn, 'tokenizer/icu_tokenizer.sql')
Esempio n. 19
0
def create_partition_tables(conn, config):
    """ Create tables that have explicit partitioning.
    """
    sql = SQLPreprocessor(conn, config)
    sql.run_sql_file(conn, 'partition-tables.src.sql')
Esempio n. 20
0
def create_table_triggers(conn, config):
    """ Create the triggers for the tables. The trigger functions must already
        have been imported with refresh.create_functions().
    """
    sql = SQLPreprocessor(conn, config)
    sql.run_sql_file(conn, 'table-triggers.sql')