def test_should_give_priority_to_housenumber_if_match(housenumber): housenumber.update(name='rue des Berges') results = search('rue des berges') assert not results[0].housenumber results = search('11 rue des berges') assert results[0].housenumber == '11' assert results[0].type == 'housenumber'
def test_housenumber_id_is_used_when_given(factory): factory(name="rue de Paris", type="street", id="123", housenumbers={'1': {'lat': '48.325', 'lon': '2.256', 'id': 'abc'}}) results = search("rue de paris") assert results[0].id == '123' results = search("1 rue de paris") assert results[0].id == 'abc'
def test_return_housenumber_if_number_included_in_bigger_one(factory): factory(name='rue 1814', housenumbers={'8': {'lat': '48.3254', 'lon': '2.256'}}) results = search('rue 1814') assert not results[0].housenumber results = search('8 rue 1814') assert results[0].housenumber == '8'
def test_id_is_overwritten_when_given_in_housenumber_payload(config, factory): config.HOUSENUMBERS_PAYLOAD_FIELDS = ['id'] factory(name="rue de Paris", type="street", id="123", housenumbers={'1': {'lat': '48.325', 'lon': '2.256', 'id': 'abc'}}) results = search("rue de paris") assert results[0].id == '123' results = search("1 rue de paris") assert results[0].id == 'abc'
def test_unknown_key_in_housenumber_payload_does_not_fail(config, factory): config.HOUSENUMBERS_PAYLOAD_FIELDS = ['xxxyyy'] factory(name="rue de Paris", type="street", id="123", postcode="12345", housenumbers={'1': {'lat': '48.325', 'lon': '2.256'}}) results = search("rue de paris") assert results[0].id == '123' results = search("1 rue de paris") assert results[0].id == '123'
def test_process_should_update_if_action_is_given(factory): doc = factory(name="rue de l'avoine") assert search("rue") doc["_action"] = "update" doc["name"] = "avenue de l'avoine" process(doc) assert search("avenue") assert not search("rue")
def test_should_match_name(street): assert not search('Conflans') street.update(name='Conflans') results = search('Conflans') assert results result = results[0] assert result.name == 'Conflans' assert result.id == street['id']
def test_housenumbers_payload_fields_are_exported(config, factory): config.HOUSENUMBERS_PAYLOAD_FIELDS = ['key'] factory(name="rue de Paris", type="street", id="123", housenumbers={'1': {'lat': '48.32', 'lon': '2.25', 'key': 'abc'}}) results = search("rue de paris") assert results[0].key == '' results = search("1 rue de paris") assert results[0].key == 'abc'
def test_postcode_is_overwritten_when_in_housenumber_payload(config, factory): config.HOUSENUMBERS_PAYLOAD_FIELDS = ['postcode'] factory(name="rue de Paris", type="street", id="123", postcode="12345", housenumbers={'1': {'lat': '48.325', 'lon': '2.256', 'postcode': '54321'}}) results = search("rue de paris") assert results[0].postcode == '12345' results = search("1 rue de paris") assert results[0].postcode == '54321'
def test_should_give_priority_to_best_match3(street, factory): street.update(name="rue de Lille", city="Douai") other = factory(name="rue de Douai", city="Lille") results = search("rue de lille douai") assert len(results) == 2 assert results[0].id == street['id'] results = search("rue de douai lille") assert len(results) == 2 assert results[0].id == other['id']
def test_housenumber_are_not_computed_if_another_type_is_asked(factory): factory(name="rue de Bamako", type="street", housenumbers={'11': {'lat': '48.3254', 'lon': '2.256'}}) results = search("11 rue de bamako") assert len(results) == 1 assert results[0].type == "housenumber" results = search("11 rue de bamako", type="housenumber") assert len(results) == 1 assert results[0].type == "housenumber" results = search("11 rue de bamako", type="street") assert len(results) == 1 assert results[0].type == "street"
def get(self): query = self.request.args.get('q', '') if not query: return Response('Missing query', status=400) try: limit = int(self.request.args.get('limit')) except (ValueError, TypeError): limit = 5 try: autocomplete = int(self.request.args.get('autocomplete')) == 1 except (ValueError, TypeError): autocomplete = True try: lat = float(self.request.args.get('lat')) lon = float(self.request.args.get('lon', self.request.args.get('lng', self.request.args.get('long')))) center = [lat, lon] except (ValueError, TypeError): lat = None lon = None center = None filters = self.match_filters() results = search(query, limit=limit, autocomplete=autocomplete, lat=lat, lon=lon, **filters) if not results: log_notfound(query) log_query(query, results) return self.to_geojson(results, query=query, filters=filters, center=center, limit=limit)
def process_row(self, req, row, filters, columns): # We don't want None in a join. q = ' '.join([row[k] or '' for k in columns]) filters = self.match_row_filters(row, filters) lat_column = req.get_param('lat') lon_column = req.get_param('lon') if lon_column and lat_column: lat = row.get(lat_column) lon = row.get(lon_column) if lat and lon: filters['lat'] = float(lat) filters['lon'] = float(lon) results = search(q, autocomplete=False, limit=1, **filters) log_query(q, results) if results: result = results[0] row.update({ 'latitude': result.lat, 'longitude': result.lon, 'result_label': str(result), 'result_score': round(result.score, 2), 'result_type': result.type, 'result_id': result.id, 'result_housenumber': result.housenumber, }) self.add_extra_fields(row, result) else: log_notfound(q)
def test_closer_result_should_be_first_for_same_score(factory): expected = factory(name='rue de paris', city='Cergy', lat=48.1, lon=2.2) factory(name='rue de paris', city='Perpète', lat=-48.1, lon=-2.2) factory(name='rue de paris', city='Loin', lat=8.1, lon=42.2) results = search('rue de la monnaie', lat=48.1, lon=2.2) assert len(results) == 3 assert results[0].id == expected['id']
def test_search_can_be_filtered(factory): street = factory(name="rue de Paris", type="street") city = factory(name="Paris", type="city") results = search("paris", type="street") ids = [r.id for r in results] assert street['id'] in ids assert city['id'] not in ids
def test_housenumber_type_can_be_filtered(factory): street_without_housenumber = factory(name="avenue de Paris", type="street") street_with_housenumber = factory(name="rue de Paris", type="street", housenumbers={'11': {'lat': '48.3254', 'lon': '2.256'}}) results = search("paris", type="housenumber") ids = [r.id for r in results] assert street_with_housenumber['id'] in ids assert street_without_housenumber['id'] not in ids
def test_allow_to_set_result_values(factory): factory(name="porte des lilas", type="street", id="456") results = search("porte des lilas") result = results[0] result.name = "blah" result.score = 22 # Plugins may need that. assert result.name == "blah" assert result.score == 22
def test_autocomplete_should_give_priority_to_nearby(factory, monkeypatch): monkeypatch.setattr('addok.config.config.BUCKET_LIMIT', 3) monkeypatch.setattr('addok.core.Search.SMALL_BUCKET_LIMIT', 2) expected = factory(name='Le Bourg', lat=48.1, lon=2.2, importance=0.09) factory(name='Le Bourg', lat=-48.1, lon=-2.2, importance=0.1) factory(name='Le Bourg', lat=8.1, lon=42.2, importance=0.1) factory(name='Le Bourg', lat=10, lon=20, importance=0.1) results = search('bou', lat=48.1, lon=2.2, limit=3) assert len(results) == 3 ids = [r.id for r in results] assert expected['id'] in ids
def test_nearby_should_be_included_even_in_overflow(factory, monkeypatch): monkeypatch.setattr('addok.config.BUCKET_LIMIT', 3) monkeypatch.setattr('addok.core.Search.SMALL_BUCKET_LIMIT', 2) expected = factory(name='Le Bourg', lat=48.1, lon=2.2, importance=0.09) factory(name='Le Bourg', lat=-48.1, lon=-2.2, importance=0.1) factory(name='Le Bourg', lat=8.1, lon=42.2, importance=0.1) factory(name='Le Bourg', lat=10, lon=20, importance=0.1) results = search('bourg', lat=48.1, lon=2.2, limit=3, verbose=True) assert len(results) == 3 ids = [r.id for r in results] assert expected['id'] in ids
def test_found_term_is_not_autocompleted_if_enough_results(factory, monkeypatch): monkeypatch.setattr('addok.config.COMMON_THRESHOLD', 3) monkeypatch.setattr('addok.config.BUCKET_LIMIT', 3) montagne = factory(name="rue de la montagne", city="Vitry") factory(name="rue du mont", city="Vitry") factory(name="rue du mont", city="Paris") factory(name="rue du mont", city="Lille") results = search('rue mont', limit=2, autocomplete=True) ids = [r.id for r in results] assert len(ids) == 2 assert montagne['id'] not in ids
def test_should_return_results_if_only_common_terms(factory, monkeypatch): monkeypatch.setattr('addok.config.config.COMMON_THRESHOLD', 3) monkeypatch.setattr('addok.config.config.BUCKET_LIMIT', 3) street1 = factory(name="rue de la monnaie", city="Vitry") street2 = factory(name="rue de la monnaie", city="Paris") street3 = factory(name="rue de la monnaie", city="Condom") street4 = factory(name="La monnaye", city="Saint-Loup-Cammas") results = search('rue de la monnaie') ids = [r.id for r in results] assert street1['id'] in ids assert street2['id'] in ids assert street3['id'] in ids assert street4['id'] not in ids
def test_config_make_labels_is_used_if_defined(config, factory): def make_labels(result): if result.name == "porte des lilas": return ['areallybadlabel'] return [result.name] config.MAKE_LABELS = make_labels factory(name="porte des lilas", type="street", id="456", importance=1) factory(name="porte des Lilas", type="street", id="123") results = search("porte des lilas") assert results[0].id == "123" assert results[0].score > 0.9 assert results[1].score > 0.1
def on_get(self, req, resp, **kwargs): query = req.get_param('q') if not query: raise falcon.HTTPBadRequest('Missing query', 'Missing query') limit = req.get_param_as_int('limit') or 5 # use config autocomplete = req.get_param_as_bool('autocomplete') lon, lat = self.parse_lon_lat(req) center = None if lon and lat: center = (lon, lat) filters = self.match_filters(req) results = search(query, limit=limit, autocomplete=autocomplete, lat=lat, lon=lon, **filters) if not results: log_notfound(query) log_query(query, results) self.to_geojson(req, resp, results, query=query, filters=filters, center=center, limit=limit)
def test_does_not_fail_without_usable_tokens(street): assert not search('./.$*')
def test_fuzzy_should_work_with_inversion(city): city.update(name="Andrésy") assert search('andreys')
def test_process_should_deindex_if_action_is_given(factory): doc = factory(name="Mélicocq") assert search("Mélicoq") process_documents(json.dumps({"_action": "delete", "_id": doc["_id"]})) assert not search("Mélicoq")
def test_should_give_priority_to_best_match(street, city): street.update(name="rue d'Andrésy") city.update(name='Andrésy') results = search('andresy') assert results[0].id == city['id']
def test_should_be_fuzzy(city): city.update(name="Andrésy") assert search('antresy') assert search('antresu')
def test_synonyms_should_be_replaced(street, config): config.MIN_SCORE = 0 config.SYNONYMS = {'bd': 'boulevard'} street.update(name='boulevard des Fleurs') assert search('bd')
def test_should_compare_with_multiple_values(city, factory): city.update(name=["Vernou-la-Celle-sur-Seine", "Vernou"]) factory(name="Vernou", type="city") results = search("vernou") assert len(results) == 2 assert results[0].score == results[1].score
def multiple_search(queries, **args): if len(queries) > 0: return max([search(query, **args) for query in queries], key=lambda x: x and len(x) > 0 and x[0].score or 0) else: return []
def test_should_give_priority_to_best_match2(street, factory): street.update(name="rue d'Andrésy", city="Conflans") factory(name="rue de Conflans", city="Andrésy") results = search("rue andresy") assert len(results) == 2 assert results[0].id == street['id']
def test_process_should_index_by_default(factory): doc = factory(skip_index=True, name="Melicocq") assert not search("Mélicocq") process(doc) assert search("Melicocq")
def test_should_do_autocomplete_on_last_term(street): street.update(name='rue de Wambrechies', city="Bondues") assert search('avenue wambre', autocomplete=True) assert not search('wambre avenue', autocomplete=True)
def test_fuzzy_should_match_with_removal(city): city.update(name="Andrésy") assert search('andressy')
def test_found_term_is_autocompleted_if_missing_results(factory, config): config.COMMON_THRESHOLD = 3 config.BUCKET_MAX = 3 factory(name="rue de la montagne", city="Vitry") factory(name="rue du mont", city="Vitry") assert len(search('rue mont', autocomplete=True)) == 2
def test_should_be_fuzzy_of_1_by_default(city): city.update(name="Andrésy") assert search('antresy') assert not search('antresu')
def test_should_match_name_without_accent(street): assert not search('andresy') street.update(name='Andrésy') assert search('andresy')
def test_should_not_return_housenumber_if_number_is_also_in_name(housenumber): housenumber.update(name='rue du 11 Novembre') results = search('rue du 11 novembre') assert not results[0].housenumber results = search('11 rue du 11 novembre') assert results[0].housenumber == '11'
def test_score_is_not_greater_than_one(factory): factory(name='rue de paris', importance=1) results = search('rue de paris') assert len(results) == 1 assert results[0].score == 1
def test_not_found_term_is_autocompleted(factory, config): config.COMMON_THRESHOLD = 3 config.BUCKET_LIMIT = 3 factory(name="rue de la monnaie", city="Vitry") assert search('rue de la mon')
def test_document_without_name_should_not_be_indexed(factory): doc = factory(skip_index=True, city="Montceau-les-Mines") del doc['name'] doc.index() assert not search('Montceau-les-Mines')
def test_process_should_index_by_default(factory): doc = factory(skip_index=True, name="Melicocq") assert not search("Mélicocq") process_documents(json.dumps(doc.copy())) assert search("Melicocq")
def test_found_term_is_autocompleted_if_missing_results(factory, monkeypatch): monkeypatch.setattr('addok.config.config.COMMON_THRESHOLD', 3) monkeypatch.setattr('addok.config.config.BUCKET_MAX', 3) factory(name="rue de la montagne", city="Vitry") factory(name="rue du mont", city="Vitry") assert len(search('rue mont', autocomplete=True)) == 2
def test_importance_should_be_minored_if_geohash(factory, config): factory(name="rue descartes", lon=2.2, lat=48.1, importance=1) results = search('rue descartes') assert results[0]._scores['importance'][0] == 0.1 results = search('rue descartes', lon=2.2, lat=48.1) assert results[0]._scores['importance'][0] == 0.010000000000000002
def test_not_found_term_is_autocompleted(factory, monkeypatch): monkeypatch.setattr('addok.config.config.COMMON_THRESHOLD', 3) monkeypatch.setattr('addok.config.config.BUCKET_MAX', 3) factory(name="rue de la monnaie", city="Vitry") assert search('rue de la mon')
def test_should_match_name_case_insensitive(street): assert not search('conflans') street.update(name='Conflans') assert search('conflans')
def test_synonyms_should_be_replaced(street, monkeypatch): monkeypatch.setattr('addok.helpers.text.SYNONYMS', {'bd': 'boulevard'}) street.update(name='boulevard des Fleurs') assert search('bd')
def search2steps(config, query1, queries2, autocomplete, limit, **filters): # Fetch the join value join_value = threshold = results = None # Run step 1 query results1 = search2steps_step1(config, query1, config.SEARCH_2_STEPS_STEP1_LIMIT, **filters) if len(queries2) == 0: ret = results1[0:limit] results_full = search(query1, limit=limit, autocomplete=autocomplete, **filters) else: ret = [] if results1: params_steps_2 = [] # Collect step 1 results for result in results1: query_step_1 = result.__getattr__( config.SEARCH_2_STEPS_PIVOT_REWRITE) if config.SEARCH_2_STEPS_PIVOT_FILTER in filters and filters[ config.SEARCH_2_STEPS_PIVOT_FILTER]: join_value = filters[config.SEARCH_2_STEPS_PIVOT_FILTER] threshold = 1 else: join_value = result.__getattr__( config.SEARCH_2_STEPS_PIVOT_FILTER) threshold = result.score if join_value and threshold > config.SEARCH_2_STEPS_STEP1_THRESHOLD: params_steps_2.append((join_value, query_step_1)) # Make results uniq params_steps_2 = set(params_steps_2) # Run steps 2 queries for join_value, query_step_1 in params_steps_2: # Set step 2 query filter from step 1 result filters_step_2 = filters.copy() filters_step_2[config.SEARCH_2_STEPS_PIVOT_FILTER] = join_value filters_step_2['type'] = config.SEARCH_2_STEPS_STEP2_TYPE results_step_2 = multiple_search( [q + ' ' + query_step_1 for q in queries2], limit=limit, autocomplete=autocomplete, **filters_step_2) append = False if results_step_2: for result_step_2 in results_step_2: if result_step_2.score > config.SEARCH_2_STEPS_STEP2_THRESHOLD: append = True ret.append(result_step_2) if not append: # No usable result from steps 2, use steps 1 result # Lower the score result.score *= config.SEARCH_2_STEPS_STEP2_PENALITY_MULTIPLIER if result.score > config.SEARCH_2_STEPS_STEP2_THRESHOLD: ret.append(result) results_full = multiple_search([q + ' ' + query1 for q in queries2], limit=limit, autocomplete=autocomplete, **filters) for result in results_full: # Lower the score result.score *= config.SEARCH_2_STEPS_STEP2_PENALITY_MULTIPLIER ret.append(result) if ret: # Sort and limit results for all queries ret = sorted(ret, key=lambda k: k.score, reverse=True)[0:limit] # Make result uniq ids = [] uniq = [] for e in ret: if e.id not in ids: uniq.append(e) ids.append(e.id) return uniq else: return results1[0:limit]
def test_should_keep_unchanged_name_as_default_label(factory): factory(name="Porte des Lilas") results = search("porte des lilas") str(results[0]) == "Porte des Lilas"
def test_should_do_autocomplete(street): street.update(name='rue de Wambrechies', city="Bondues") assert search('avenue wambre', autocomplete=True) assert search('wambre avenue', autocomplete=True)
def on_get(self, req, resp, **kwargs): query = req.get_param('q') language = req.get_param('language') or 'zh' if not query: raise falcon.HTTPBadRequest('Missing query', 'Missing query') limit = req.get_param_as_int('limit') or 20 # use config autocomplete = req.get_param_as_bool('autocomplete') if autocomplete is None: # Default is True. # https://github.com/falconry/falcon/pull/493#discussion_r44376219 autocomplete = True lon, lat = self.parse_lon_lat(req) center = None if lon and lat: center = (lon, lat) filters = self.match_filters(req) timer = time.perf_counter() try: results = search(query, limit=limit, autocomplete=autocomplete, lat=lat, lon=lon, **filters) except EntityTooLarge as e: raise falcon.HTTPRequestEntityTooLarge(str(e)) timer = int((time.perf_counter() - timer) * 1000) if not results: log_notfound(query) log_query(query, results) if config.SLOW_QUERIES and timer > config.SLOW_QUERIES: log_slow_query(query, results, timer) filtered_results = {} def sortbyindex(item): idx = item.name.lower().find(query.lower()) if idx == -1: idx = 100 return idx def sortbylang(item): if item.lang == language: return 0 else: return 1 def sortbylength(item): return len(item.name) results.sort( key=lambda x: (sortbylang(x), sortbyindex(x), sortbylength(x))) count = 0 for r in results: if not filtered_results.get(r.name.lower()) and r.type[0] != 'R': filtered_results[r.name.lower()] = r count += 1 elif not filtered_results.get(r.id) and r.type[0] == 'R': filtered_results[r.id] = r count += 1 if not req.get_param_as_int('limit') and count == 5: break self.render(req, resp, list(filtered_results.values()), query=query, filters=filters, center=center, limit=limit)