def test_extract_manytomany_relations_3(factory, config): config.COMMON_THRESHOLD = 2 latlon = {'lat': 1, 'lon': 2} factory(name="Rue Maréchal de Lattre de Tassigny", city='Mont-Saint-Aignan', postcode='76130', housenumbers={ '45': latlon, '3': latlon }) factory(name="rue du port", city='Saint-Denis', postcode='76370', housenumbers={ '45': latlon, '3': latlon }) factory(name="rue à l'eau", city='Saint-Pierre-de-Rouergue') factory(name="rue de Saint-Jean", housenumbers={'45': latlon, '3': latlon}) tokens = [ Token(s) for s in '45 rue de lattre de tassign pleiade a 3 porte 76130 mont ' 'saint aignan'.split() ] groups = _extract_manytomany_relations(tokens) assert groups == [ {Token('lattre'), Token('aignan'), Token('76130'), Token('mont')}, ]
def test_extract_manytomany_relations(factory, config): config.COMMON_THRESHOLD = 2 factory(name="rue de Paris", city='Fecamp') factory(name="rue de la porte") factory(name="rue de dieppe", housenumbers={'506': {'lat': 1, 'lon': 2}}) tokens = [Token(s) for s in 'rue de paris porte 506 fecamp'.split()] groups = _extract_manytomany_relations(tokens) assert groups == [{Token('fecamp'), Token('paris')}]
def do_fuzzyindex(self, word): """Compute fuzzy extensions of word that exist in index. FUZZYINDEX lilas""" word = list(preprocess_query(word))[0] token = Token(word) token.make_fuzzy() neighbors = [(n, DB.zcard(dbkeys.token_key(n))) for n in token.neighbors] neighbors.sort(key=lambda n: n[1], reverse=True) for token, freq in neighbors: if freq == 0: break print(white(token), blue(freq))
def test_synonymize(input, output, config): # Make sure we control synonyms. config.SYNONYMS = { 'bd': 'boulevard', '13e': 'treizieme', '18e': 'dix huitieme' } assert list(synonymize([Token(input)])) == output
def test_extract_manytomany_relations_2(factory, config): config.COMMON_THRESHOLD = 2 factory(name="rue de falaise", city='dieppe', postcode='76370', housenumbers={'1': { 'lat': 1, 'lon': 2 }}) factory(name="chemin du semaphore", city='dieppe', postcode='76370', housenumbers={'1': { 'lat': 1, 'lon': 2 }}) factory(name="chemin de neuville") factory(name="chemin de la tour", housenumbers={'1': {'lat': 1, 'lon': 2}}) tokens = [ Token(s) for s in '1 chemin de la falaise le semaphore neuville les 76370 ' 'dieppe'.split() ] groups = _extract_manytomany_relations(tokens) assert len(groups) == 2 assert {Token('dieppe'), Token('falaise'), Token('76370')} in groups assert {Token('dieppe'), Token('76370'), Token('semaphore')} in groups
def test_fold_ordinal(input, expected): assert fold_ordinal(Token(input)) == expected
def test_flag_housenumber(inputs, expected): tokens = [Token(input_) for input_ in inputs] tokens = list(flag_housenumber(tokens)) assert tokens == inputs assert (tokens[0].kind == 'housenumber') == expected
def test_glue_ordinal(inputs, expected): tokens = [Token(input_) for input_ in inputs] assert list(glue_ordinal(tokens)) == expected
def test_alphanumerize(input, output): assert alphanumerize(Token(input)) == output
def test_normalize(input, output): assert _normalize(Token(input)) == output
def test_synonymize(input, output, monkeypatch): # Make sure we control synonyms. SYNONYMS = {'bd': 'boulevard', '13e': 'treizieme'} monkeypatch.setattr('addok.helpers.text.SYNONYMS', SYNONYMS) assert _synonymize(Token(input)) == output
def test_phonemicize(input, output): assert phonemicize(Token(input)) == output