def test_special_characters_are_not_ignored(self): found, *_ = TextSearcher.find(phrase_to_find="Armii Krajowej", text="na ulicy Armii, Krajowej") self.assertFalse(found) found, *_ = TextSearcher.find(phrase_to_find="Armii Krajowej", text="na ulicy Armii Krajowej") self.assertTrue(found)
def test_custom_comparators(self): eq_comparator = MorphologicComparator().equals to_find = "Nowy Kleparz" found, *_ = TextSearcher.find(phrase_to_find=to_find, text="na Nowym Kleparzu", equality_comparator=eq_comparator) self.assertTrue(found) found, *_ = TextSearcher.find(phrase_to_find=to_find, text="lokalizacja Nowy Kleparz", equality_comparator=eq_comparator) self.assertTrue(found) found, *_ = TextSearcher.find(phrase_to_find=to_find, text="niedaleko od Nowego Kleparza", equality_comparator=eq_comparator) self.assertTrue(found)
def _contains_phrase(self, phrase, flat): comparator = MorphologicComparator().equals found_in_title, _ = TextSearcher.find( phrase_to_find=phrase, text=flat.title, equality_comparator=comparator) if found_in_title: return True found_in_description, _ = TextSearcher.find( phrase_to_find=phrase, text=flat.description, equality_comparator=comparator) if found_in_description: return True return False
def __call__(self, description: str): phrases_to_look_for = ['przechodni pokój', 'pokój przechodni'] comparator = MorphologicComparator().equals for phrase in phrases_to_look_for: found, _ = TextSearcher.find(phrase_to_find=phrase, text=description, equality_comparator=comparator) if found: return {True} return {False}
def __call__(self, description: str): phrases_to_look_for = ['aneks kuchenny', 'aneks', 'salon z kuchnią'] comparator = MorphologicComparator().equals for phrase in phrases_to_look_for: found, _ = TextSearcher.find( phrase_to_find=phrase, text=description, equality_comparator=comparator) if found: return {True} return {False}
def __call__(self, description: str): phrases_to_look_for = [ 'kawalerka', 'studio', 'garsoniera', 'jednopokojowe', '1-pokojowe', '1 pokojowe', 'jedno pokojowe' ] comparator = MorphologicComparator().equals for phrase in phrases_to_look_for: found, _ = TextSearcher.find(phrase_to_find=phrase, text=description, equality_comparator=comparator) if found: return {True} return {False}
def does_contain_person_first_name(self, text): # check contain_exceptions for contain_exception, ret_val in self._contain_person_first_name_exceptions.items( ): comparator = MorphologicComparator().equals does_contain, *_ = TextSearcher.find( phrase_to_find=contain_exception, text=text, equality_comparator=comparator) if does_contain: return ret_val # do normal text analysis for word in text.split(): for inflection in self._analyser.get_base_form(word): if inflection in self._first_names: return True return False
def _match_locations(self, all_locations, description): all_matched_locations = [] for location in all_locations: for location_name in [ location["official"], *location["colloquial"] ]: found, all_words = TextSearcher.find( phrase_to_find=location_name, text=description, equality_comparator=self._get_comparator(location_name)) if found: for match_slice_pos in found: match = AddressMatch( location=location['official'], match_slice_position=match_slice_pos, source=all_words) all_matched_locations.append(match) return all_matched_locations
def test_text_searcher_correctly_handles_incorrect_spacing(self): # hyphen should be surrounded by spaces found, *_ = TextSearcher.find(phrase_to_find="Armii Krajowej", text="Armii Krajowej-na skrzyżowaniu") self.assertTrue(found)