Ejemplo n.º 1
0
def test_process_place_postcode(analyzer, create_postcode_id, word_table,
                                pcode):
    analyzer.process_place(PlaceInfo({'address': {'postcode': pcode}}))

    assert word_table.get_postcodes() == {
        pcode,
    }
Ejemplo n.º 2
0
    def test_process_place_place(self):
        self.analyzer.process_place(PlaceInfo({'name': {'name' : 'Honu Lulu'}}))
        info = self.process_address(place='Honu Lulu')

        assert eval(info['place_search']) == self.name_token_set('#Honu Lulu',
                                                                 'Honu', 'Lulu')
        assert eval(info['place_match']) == self.name_token_set('#Honu Lulu')
Ejemplo n.º 3
0
    def test_process_place_housenumbers_lists(analyzer):
        info = analyzer.process_place(
            PlaceInfo({'address': {
                'conscriptionnumber': '1; 2;3'
            }}))

        assert set(info['hnr'].split(';')) == set(('1', '2', '3'))
Ejemplo n.º 4
0
def run_sanitizer_on(**kwargs):
    place = PlaceInfo({'name': kwargs})
    name, _ = PlaceSanitizer([{
        'step': 'strip-brace-terms'
    }]).process_names(place)

    return sorted([(p.name, p.kind, p.suffix) for p in name])
Ejemplo n.º 5
0
    def index_places(self, worker, places):
        values = []
        for place in places:
            values.extend((place[x] for x in ('place_id', 'address')))
            values.append(PlaceInfo(place).analyze(self.analyzer))

        worker.perform(self._index_sql(len(places)), values)
Ejemplo n.º 6
0
    def test_process_place_street(self):
        self.analyzer.process_place(PlaceInfo({'name': {
            'name': 'Grand Road'
        }}))
        info = self.process_address(street='Grand Road')

        assert eval(info['street']) == self.name_token_set('#Grand Road')
Ejemplo n.º 7
0
def handle_threaded_sql_statements(pool, fd, analyzer):
    """ Handles sql statement with multiplexing
    """
    lines = 0
    # Using pool of database connections to execute sql statements

    sql = "SELECT tiger_line_import(%s, %s, %s, %s, %s, %s)"

    for row in csv.DictReader(fd, delimiter=';'):
        try:
            address = dict(street=row['street'], postcode=row['postcode'])
            args = ('SRID=4326;' + row['geometry'], int(row['from']),
                    int(row['to']), row['interpolation'],
                    PlaceInfo({
                        'address': address
                    }).analyze(analyzer),
                    analyzer.normalize_postcode(row['postcode']))
        except ValueError:
            continue
        pool.next_free_worker().perform(sql, args=args)

        lines += 1
        if lines == 1000:
            print('.', end='', flush=True)
            lines = 0
Ejemplo n.º 8
0
def test_no_name_list():
    place = PlaceInfo({'address': {'housenumber': '3'}})
    name, address = PlaceSanitizer([{
        'step': 'split-name-list'
    }]).process_names(place)

    assert not name
    assert len(address) == 1
Ejemplo n.º 9
0
    def index_places(self, worker, places):
        values = []
        for place in places:
            for field in ('place_id', 'name', 'address', 'linked_place_id'):
                values.append(place[field])
            values.append(PlaceInfo(place).analyze(self.analyzer))

        worker.perform(self._index_sql(len(places)), values)
Ejemplo n.º 10
0
def test_no_names():
    place = PlaceInfo({'address': {'housenumber': '3'}})
    name, address = PlaceSanitizer([{
        'step': 'strip-brace-terms'
    }]).process_names(place)

    assert not name
    assert len(address) == 1
Ejemplo n.º 11
0
    def test_process_place_housenumbers_simple(analyzer, hnr):
        info = analyzer.process_place(
            PlaceInfo({'address': {
                'housenumber': hnr
            }}))

        assert info['hnr'] == hnr
        assert info['hnr_tokens'].startswith("{")
Ejemplo n.º 12
0
def test_process_place_names(analyzer, make_keywords):
    info = analyzer.process_place(
        PlaceInfo({'name': {
            'name': 'Soft bAr',
            'ref': '34'
        }}))

    assert info['names'] == '{1,2,3}'
Ejemplo n.º 13
0
def sanitize_with_delimiter(delimiter, name):
    place = PlaceInfo({'name': {'name': name}})
    san = PlaceSanitizer([{
        'step': 'split-name-list',
        'delimiters': delimiter
    }])
    name, _ = san.process_names(place)

    return sorted([p.name for p in name])
Ejemplo n.º 14
0
def test_sanitizer_empty_list(rules):
    san = sanitizer.PlaceSanitizer(rules)

    name, address = san.process_names(
        PlaceInfo({'name': {
            'name:de:de': '1;2;3'
        }}))

    assert len(name) == 1
    assert all(isinstance(n, sanitizer.PlaceName) for n in name)
Ejemplo n.º 15
0
    def test_process_place_housenumbers_duplicates(analyzer):
        info = analyzer.process_place(
            PlaceInfo({
                'address': {
                    'housenumber': '134',
                    'conscriptionnumber': '134',
                    'streetnumber': '99a'
                }
            }))

        assert set(info['hnr'].split(';')) == set(('134', '99a'))
Ejemplo n.º 16
0
    def run_sanitizer_on(country, **kwargs):
        place = PlaceInfo({
            'name': {k.replace('_', ':'): v
                     for k, v in kwargs.items()},
            'country_code': country
        })
        name, _ = PlaceSanitizer([{
            'step': 'tag-analyzer-by-language'
        }]).process_names(place)

        return sorted([(p.name, p.kind, p.suffix, p.attr) for p in name])
Ejemplo n.º 17
0
    def test_country_name(self, word_table):
        place = PlaceInfo({'name' : {'name': 'Norge'},
                           'country_code': 'no',
                           'rank_address': 4,
                           'class': 'boundary',
                           'type': 'administrative'})

        info = self.analyzer.process_place(place)

        self.expect_name_terms(info, '#norge', 'norge')
        assert word_table.get_country() == {('no', ' norge')}
Ejemplo n.º 18
0
    def test_missing_country(self):
        place = PlaceInfo({'name': {'name': 'something'}})
        name, _ = PlaceSanitizer([{
            'step': 'tag-analyzer-by-language',
            'use-defaults': 'all',
            'mode': 'replace'
        }]).process_names(place)

        assert len(name) == 1
        assert name[0].name == 'something'
        assert name[0].suffix is None
        assert 'analyzer' not in name[0].attr
Ejemplo n.º 19
0
 def add_country_names(self, country_code, names):
     """ Add names for the given country to the search index.
     """
     # Make sure any name preprocessing for country names applies.
     info = PlaceInfo({
         'name': names,
         'country_code': country_code,
         'rank_address': 4,
         'class': 'boundary',
         'type': 'administrative'
     })
     self._add_country_full_names(country_code,
                                  self.sanitizer.process_names(info)[0])
Ejemplo n.º 20
0
    def test_process_place_multiple_street_tags(self):
        self.analyzer.process_place(
            PlaceInfo({'name': {
                'name': 'Grand Road',
                'ref': '05989'
            }}))
        info = self.process_address(**{
            'street': 'Grand Road',
            'street:sym_ul': '05989'
        })

        assert eval(info['street']) == self.name_token_set(
            '#Grand Road', '#05989')
Ejemplo n.º 21
0
    def test_process_place_address_terms(self):
        for name in ('Zwickau', 'Haupstraße', 'Sachsen'):
            self.analyzer.process_place(PlaceInfo({'name': {'name' : name}}))
        info = self.process_address(country='de', city='Zwickau', state='Sachsen',
                                    suburb='Zwickau', street='Hauptstr',
                                    full='right behind the church')

        city = self.name_token_set('ZWICKAU')
        state = self.name_token_set('SACHSEN')

        print(info)
        result = {k: eval(v[0]) for k,v in info['addr'].items()}

        assert result == {'city': city, 'suburb': city, 'state': state}
Ejemplo n.º 22
0
    def run_sanitizer_on(whitelist, **kwargs):
        place = PlaceInfo(
            {'name': {k.replace('_', ':'): v
                      for k, v in kwargs.items()}})
        name, _ = PlaceSanitizer([{
            'step': 'tag-analyzer-by-language',
            'mode': 'replace',
            'whitelist': whitelist
        }]).process_names(place)

        assert all(isinstance(p.attr, dict) for p in name)
        assert all(len(p.attr) <= 1 for p in name)
        assert all(not p.attr or ('analyzer' in p.attr and p.attr['analyzer'])
                   for p in name)

        return sorted([(p.name, p.attr.get('analyzer', '')) for p in name])
Ejemplo n.º 23
0
    def run_sanitizer_replace(mode, country, **kwargs):
        place = PlaceInfo({
            'name': {k.replace('_', ':'): v
                     for k, v in kwargs.items()},
            'country_code': country
        })
        name, _ = PlaceSanitizer([{
            'step': 'tag-analyzer-by-language',
            'use-defaults': mode,
            'mode': 'replace'
        }]).process_names(place)

        assert all(isinstance(p.attr, dict) for p in name)
        assert all(len(p.attr) <= 1 for p in name)
        assert all(not p.attr or ('analyzer' in p.attr and p.attr['analyzer'])
                   for p in name)

        return sorted([(p.name, p.attr.get('analyzer', '')) for p in name])
Ejemplo n.º 24
0
def test_sanitizer_default():
    san = sanitizer.PlaceSanitizer([{'step': 'split-name-list'}])

    name, address = san.process_names(
        PlaceInfo({
            'name': {
                'name:de:de': '1;2;3'
            },
            'address': {
                'street': 'Bald'
            }
        }))

    assert len(name) == 3
    assert all(isinstance(n, sanitizer.PlaceName) for n in name)
    assert all(n.kind == 'name' for n in name)
    assert all(n.suffix == 'de:de' for n in name)

    assert len(address) == 1
    assert all(isinstance(n, sanitizer.PlaceName) for n in address)
Ejemplo n.º 25
0
 def process_named_place(self, names):
     return self.analyzer.process_place(PlaceInfo({'name': names}))
Ejemplo n.º 26
0
 def process_address(self, **kwargs):
     return self.analyzer.process_place(PlaceInfo({'address': kwargs}))