def test_load_no_tokenizer_dir(test_config):
    factory.create_tokenizer(test_config)

    test_config.project_dir = test_config.project_dir / 'foo'

    with pytest.raises(UsageError):
        factory.get_tokenizer_for_db(test_config)
def test_load_missing_propoerty(temp_db_cursor, test_config):
    factory.create_tokenizer(test_config)

    temp_db_cursor.execute("TRUNCATE TABLE nominatim_properties")

    with pytest.raises(UsageError):
        factory.get_tokenizer_for_db(test_config)
    def setup_api_db(self):
        """ Setup a test against the API test database.
        """
        self.write_nominatim_config(self.api_test_db)

        if not self.api_db_done:
            self.api_db_done = True

            if not self._reuse_or_drop_db(self.api_test_db):
                testdata = Path('__file__') / '..' / '..' / 'testdb'
                self.test_env['NOMINATIM_WIKIPEDIA_DATA_PATH'] = str(testdata.resolve())

                try:
                    self.run_nominatim('import', '--osm-file', str(self.api_test_file))
                    self.run_nominatim('add-data', '--tiger-data', str((testdata / 'tiger').resolve()))
                    self.run_nominatim('freeze')

                    if self.tokenizer != 'icu':
                        phrase_file = str((testdata / 'specialphrases_testdb.sql').resolve())
                        run_script(['psql', '-d', self.api_test_db, '-f', phrase_file])
                    else:
                        csv_path = str((testdata / 'full_en_phrases_test.csv').resolve())
                        self.run_nominatim('special-phrases', '--import-from-csv', csv_path)
                except:
                    self.db_drop_database(self.api_test_db)
                    raise

        tokenizer_factory.create_tokenizer(self.get_test_config(), init_db=False)
def test_load_tokenizer(test_config):
    factory.create_tokenizer(test_config)

    tokenizer = factory.get_tokenizer_for_db(test_config)

    assert isinstance(tokenizer, DummyTokenizer)
    assert tokenizer.init_state == "loaded"
Beispiel #5
0
    def setup_api_db(self):
        """ Setup a test against the API test database.
        """
        self.write_nominatim_config(self.api_test_db)

        if not self.api_db_done:
            self.api_db_done = True

            if not self._reuse_or_drop_db(self.api_test_db):
                testdata = Path('__file__') / '..' / '..' / 'testdb'
                self.test_env['NOMINATIM_WIKIPEDIA_DATA_PATH'] = str(testdata.resolve())

                try:
                    self.run_nominatim('import', '--osm-file', str(self.api_test_file))
                    if self.tokenizer != 'legacy_icu':
                        self.run_nominatim('add-data', '--tiger-data', str((testdata / 'tiger').resolve()))
                    self.run_nominatim('freeze')

                    if self.tokenizer != 'legacy_icu':
                        phrase_file = str((testdata / 'specialphrases_testdb.sql').resolve())
                        run_script(['psql', '-d', self.api_test_db, '-f', phrase_file])
                    else:
                        # XXX Temporary use the wiki while there is no CSV import
                        # available.
                        self.test_env['NOMINATIM_LANGUAGES'] = 'en'
                        self.run_nominatim('special-phrases', '--import-from-wiki')
                        del self.test_env['NOMINATIM_LANGUAGES']
                except:
                    self.db_drop_database(self.api_test_db)
                    raise

        tokenizer_factory.create_tokenizer(self.get_test_config(), init_db=False)
def test_load_tokenizer(temp_db_conn, test_config, tokenizer_mock,
                        property_table):
    factory.create_tokenizer(test_config)

    tokenizer = factory.get_tokenizer_for_db(test_config)

    assert isinstance(tokenizer, DummyTokenizer)
    assert tokenizer.init_state == "loaded"
Beispiel #7
0
    def setup_unknown_db(self):
        """ Setup a test against a non-existing database.
        """
        # The tokenizer needs an existing database to function.
        # So start with the usual database
        class _Context:
            db = None

        context = _Context()
        self.setup_db(context)
        tokenizer_factory.create_tokenizer(self.get_test_config(), init_db=False)

        # Then drop the DB again
        self.teardown_db(context, force_drop=True)
Beispiel #8
0
def import_and_index_data_from_place_table(context):
    """ Import data previously set up in the place table.
    """
    nctx = context.nominatim

    tokenizer = tokenizer_factory.create_tokenizer(nctx.get_test_config())
    context.nominatim.copy_from_place(context.db)

    # XXX use tool function as soon as it is ported
    with context.db.cursor() as cur:
        with (context.nominatim.src_dir / 'lib-sql' / 'postcode_tables.sql').open('r') as fd:
            cur.execute(fd.read())
        cur.execute("""
            INSERT INTO location_postcode
             (place_id, indexed_status, country_code, postcode, geometry)
            SELECT nextval('seq_place'), 1, country_code,
                   upper(trim (both ' ' from address->'postcode')) as pc,
                   ST_Centroid(ST_Collect(ST_Centroid(geometry)))
              FROM placex
             WHERE address ? 'postcode' AND address->'postcode' NOT SIMILAR TO '%(,|;)%'
                   AND geometry IS NOT null
             GROUP BY country_code, pc""")

    # Call directly as the refresh function does not include postcodes.
    indexer.LOG.setLevel(logging.ERROR)
    indexer.Indexer(context.nominatim.get_libpq_dsn(), tokenizer, 1).index_full(analyse=False)

    check_database_integrity(context)
def test_setup_tokenizer_dir_exists(test_config):
    (test_config.project_dir / 'tokenizer').mkdir()

    tokenizer = factory.create_tokenizer(test_config)

    assert isinstance(tokenizer, DummyTokenizer)
    assert tokenizer.init_state == "new"
def test_setup_dummy_tokenizer(temp_db_conn, test_config):
    tokenizer = factory.create_tokenizer(test_config)

    assert isinstance(tokenizer, DummyTokenizer)
    assert tokenizer.init_state == "new"
    assert (test_config.project_dir / 'tokenizer').is_dir()

    assert properties.get_property(temp_db_conn, 'tokenizer') == 'dummy'
Beispiel #11
0
def install_legacy_tokenizer(conn, config, **_):
    """ Setup legacy tokenizer.

        If no other tokenizer has been configured yet, then create the
        configuration for the backwards-compatible legacy tokenizer
    """
    if properties.get_property(conn, 'tokenizer') is None:
        with conn.cursor() as cur:
            for table in ('placex', 'location_property_osmline'):
                has_column = cur.scalar("""SELECT count(*) FROM information_schema.columns
                                           WHERE table_name = %s
                                           and column_name = 'token_info'""",
                                        (table, ))
            if has_column == 0:
                cur.execute('ALTER TABLE {} ADD COLUMN token_info JSONB'.format(table))
        tokenizer = tokenizer_factory.create_tokenizer(config, init_db=False,
                                                       module_name='legacy')

        tokenizer.migrate_database(config)
def test_setup_bad_tokenizer_name(def_config, tmp_path, monkeypatch):
    def_config.project_dir = tmp_path
    monkeypatch.setenv('NOMINATIM_TOKENIZER', 'dummy')

    with pytest.raises(UsageError):
        factory.create_tokenizer(def_config)
def test_setup_tokenizer_dir_failure(test_config):
    (test_config.project_dir / 'tokenizer').write_text("foo")

    with pytest.raises(UsageError):
        factory.create_tokenizer(test_config)
Beispiel #14
0
def test_tokenizer(tokenizer_mock, project_env):
    return factory.create_tokenizer(project_env)
Beispiel #15
0
def test_tokenizer(tokenizer_mock, def_config, tmp_path):
    def_config.project_dir = tmp_path
    return factory.create_tokenizer(def_config)
def test_setup_bad_tokenizer_name(test_config, monkeypatch):
    monkeypatch.setenv('NOMINATIM_TOKENIZER', 'dummy')

    with pytest.raises(UsageError):
        factory.create_tokenizer(test_config)