Example #1
0
def test_extract_manytomany_relations_3(factory, config):
    config.COMMON_THRESHOLD = 2
    latlon = {'lat': 1, 'lon': 2}
    factory(name="Rue Maréchal de Lattre de Tassigny",
            city='Mont-Saint-Aignan',
            postcode='76130',
            housenumbers={
                '45': latlon,
                '3': latlon
            })
    factory(name="rue du port",
            city='Saint-Denis',
            postcode='76370',
            housenumbers={
                '45': latlon,
                '3': latlon
            })
    factory(name="rue à l'eau", city='Saint-Pierre-de-Rouergue')
    factory(name="rue de Saint-Jean", housenumbers={'45': latlon, '3': latlon})
    tokens = [
        Token(s)
        for s in '45 rue de lattre de tassign pleiade a 3 porte 76130 mont '
        'saint aignan'.split()
    ]
    groups = _extract_manytomany_relations(tokens)
    assert groups == [
        {Token('lattre'),
         Token('aignan'),
         Token('76130'),
         Token('mont')},
    ]
Example #2
0
def test_extract_manytomany_relations(factory, config):
    config.COMMON_THRESHOLD = 2
    factory(name="rue de Paris", city='Fecamp')
    factory(name="rue de la porte")
    factory(name="rue de dieppe", housenumbers={'506': {'lat': 1, 'lon': 2}})
    tokens = [Token(s) for s in 'rue de paris porte 506 fecamp'.split()]
    groups = _extract_manytomany_relations(tokens)
    assert groups == [{Token('fecamp'), Token('paris')}]
Example #3
0
def do_fuzzyindex(self, word):
    """Compute fuzzy extensions of word that exist in index.
    FUZZYINDEX lilas"""
    word = list(preprocess_query(word))[0]
    token = Token(word)
    token.make_fuzzy()
    neighbors = [(n, DB.zcard(dbkeys.token_key(n))) for n in token.neighbors]
    neighbors.sort(key=lambda n: n[1], reverse=True)
    for token, freq in neighbors:
        if freq == 0:
            break
        print(white(token), blue(freq))
Example #4
0
def do_fuzzyindex(self, word):
    """Compute fuzzy extensions of word that exist in index.
    FUZZYINDEX lilas"""
    word = list(preprocess_query(word))[0]
    token = Token(word)
    token.make_fuzzy()
    neighbors = [(n, DB.zcard(dbkeys.token_key(n))) for n in token.neighbors]
    neighbors.sort(key=lambda n: n[1], reverse=True)
    for token, freq in neighbors:
        if freq == 0:
            break
        print(white(token), blue(freq))
Example #5
0
def test_synonymize(input, output, config):
    # Make sure we control synonyms.
    config.SYNONYMS = {
        'bd': 'boulevard',
        '13e': 'treizieme',
        '18e': 'dix huitieme'
    }
    assert list(synonymize([Token(input)])) == output
Example #6
0
def test_extract_manytomany_relations_2(factory, config):
    config.COMMON_THRESHOLD = 2
    factory(name="rue de falaise",
            city='dieppe',
            postcode='76370',
            housenumbers={'1': {
                'lat': 1,
                'lon': 2
            }})
    factory(name="chemin du semaphore",
            city='dieppe',
            postcode='76370',
            housenumbers={'1': {
                'lat': 1,
                'lon': 2
            }})
    factory(name="chemin de neuville")
    factory(name="chemin de la tour", housenumbers={'1': {'lat': 1, 'lon': 2}})
    tokens = [
        Token(s)
        for s in '1 chemin de la falaise le semaphore neuville les 76370 '
        'dieppe'.split()
    ]
    groups = _extract_manytomany_relations(tokens)
    assert len(groups) == 2
    assert {Token('dieppe'), Token('falaise'), Token('76370')} in groups
    assert {Token('dieppe'), Token('76370'), Token('semaphore')} in groups
def test_fold_ordinal(input, expected):
    assert fold_ordinal(Token(input)) == expected
def test_flag_housenumber(inputs, expected):
    tokens = [Token(input_) for input_ in inputs]
    tokens = list(flag_housenumber(tokens))
    assert tokens == inputs
    assert (tokens[0].kind == 'housenumber') == expected
def test_glue_ordinal(inputs, expected):
    tokens = [Token(input_) for input_ in inputs]
    assert list(glue_ordinal(tokens)) == expected
Example #10
0
def test_alphanumerize(input, output):
    assert alphanumerize(Token(input)) == output
Example #11
0
def test_normalize(input, output):
    assert _normalize(Token(input)) == output
Example #12
0
def test_synonymize(input, output, monkeypatch):
    # Make sure we control synonyms.
    SYNONYMS = {'bd': 'boulevard', '13e': 'treizieme'}
    monkeypatch.setattr('addok.helpers.text.SYNONYMS', SYNONYMS)
    assert _synonymize(Token(input)) == output
Example #13
0
def test_phonemicize(input, output):
    assert phonemicize(Token(input)) == output