def test_special_characters_are_not_ignored(self):
        found, *_ = TextSearcher.find(phrase_to_find="Armii Krajowej",
                                      text="na ulicy Armii, Krajowej")
        self.assertFalse(found)

        found, *_ = TextSearcher.find(phrase_to_find="Armii Krajowej",
                                      text="na ulicy Armii Krajowej")
        self.assertTrue(found)
    def test_custom_comparators(self):
        eq_comparator = MorphologicComparator().equals
        to_find = "Nowy Kleparz"

        found, *_ = TextSearcher.find(phrase_to_find=to_find,
                                      text="na Nowym Kleparzu",
                                      equality_comparator=eq_comparator)
        self.assertTrue(found)

        found, *_ = TextSearcher.find(phrase_to_find=to_find,
                                      text="lokalizacja Nowy Kleparz",
                                      equality_comparator=eq_comparator)
        self.assertTrue(found)

        found, *_ = TextSearcher.find(phrase_to_find=to_find,
                                      text="niedaleko od Nowego Kleparza",
                                      equality_comparator=eq_comparator)
        self.assertTrue(found)
Example #3
0
    def _contains_phrase(self, phrase, flat):
        comparator = MorphologicComparator().equals

        found_in_title, _ = TextSearcher.find(
            phrase_to_find=phrase,
            text=flat.title,
            equality_comparator=comparator)

        if found_in_title:
            return True

        found_in_description, _ = TextSearcher.find(
            phrase_to_find=phrase,
            text=flat.description,
            equality_comparator=comparator)

        if found_in_description:
            return True

        return False
    def __call__(self, description: str):
        phrases_to_look_for = ['przechodni pokój', 'pokój przechodni']

        comparator = MorphologicComparator().equals

        for phrase in phrases_to_look_for:
            found, _ = TextSearcher.find(phrase_to_find=phrase,
                                         text=description,
                                         equality_comparator=comparator)

            if found:
                return {True}

        return {False}
Example #5
0
    def __call__(self, description: str):
        phrases_to_look_for = ['aneks kuchenny', 'aneks', 'salon z kuchnią']

        comparator = MorphologicComparator().equals

        for phrase in phrases_to_look_for:
            found, _ = TextSearcher.find(
                phrase_to_find=phrase,
                text=description,
                equality_comparator=comparator)

            if found:
                return {True}

        return {False}
Example #6
0
    def __call__(self, description: str):
        phrases_to_look_for = [
            'kawalerka', 'studio', 'garsoniera', 'jednopokojowe', '1-pokojowe',
            '1 pokojowe', 'jedno pokojowe'
        ]

        comparator = MorphologicComparator().equals

        for phrase in phrases_to_look_for:
            found, _ = TextSearcher.find(phrase_to_find=phrase,
                                         text=description,
                                         equality_comparator=comparator)

            if found:
                return {True}

        return {False}
Example #7
0
    def does_contain_person_first_name(self, text):
        # check contain_exceptions
        for contain_exception, ret_val in self._contain_person_first_name_exceptions.items(
        ):
            comparator = MorphologicComparator().equals
            does_contain, *_ = TextSearcher.find(
                phrase_to_find=contain_exception,
                text=text,
                equality_comparator=comparator)
            if does_contain:
                return ret_val

        # do normal text analysis
        for word in text.split():
            for inflection in self._analyser.get_base_form(word):
                if inflection in self._first_names:
                    return True
        return False
    def _match_locations(self, all_locations, description):
        all_matched_locations = []

        for location in all_locations:
            for location_name in [
                    location["official"], *location["colloquial"]
            ]:

                found, all_words = TextSearcher.find(
                    phrase_to_find=location_name,
                    text=description,
                    equality_comparator=self._get_comparator(location_name))

                if found:
                    for match_slice_pos in found:
                        match = AddressMatch(
                            location=location['official'],
                            match_slice_position=match_slice_pos,
                            source=all_words)

                        all_matched_locations.append(match)

        return all_matched_locations
 def test_text_searcher_correctly_handles_incorrect_spacing(self):
     # hyphen should be surrounded by spaces
     found, *_ = TextSearcher.find(phrase_to_find="Armii Krajowej",
                                   text="Armii Krajowej-na skrzyżowaniu")
     self.assertTrue(found)