def test_nearby_location_context(self):
        introducers = {'w sąsiedztwie'}
        conjunctions = {'i'}

        mocked_address_provider = MockedAddressProvider(streets=[
            {
                "official": "Rondo Grunwaldzkie",
                "colloquial": [],
            },
            {
                "official": "Wawel",
                "colloquial": [],
            },
        ])

        self._test_nearby_location_context_helper(
            sentence=
            "Znakomita lokalizacja w sąsiedztwie Ronda Grunwaldzkiego i Wawelu",
            subject_slice_beg_end=(4, 6),  # Ronda Grunwaldzkiego
            expected_result=True,
            introducers=introducers,
            conjunctions=conjunctions,
            address_provider=mocked_address_provider)

        self._test_nearby_location_context_helper(
            sentence="W Krakowie znajduje się Rondo Grunwaldzkie i Wawel",
            subject_slice_beg_end=(4, 6),  # Rondo Grunwaldzkie
            expected_result=False,
            introducers=introducers,
            conjunctions=conjunctions,
            address_provider=mocked_address_provider)
    def test_location_is_not_matched_if_it_is_not_flat_address(self):
        mocked_address_provider = MockedAddressProvider(streets=[{
            "official":
            "Szeroka",
            "colloquial": [],
        }, {
            "official":
            "Karmelicka",
            "colloquial": [],
        }],
                                                        places=[{
                                                            "official":
                                                            "Ikea",
                                                            "colloquial": [],
                                                        }])

        ctx_analyser = NearbyLocationContext(
            introducers={'w sąsiedztwie'},
            conjunctions={'i'},
            address_provider=mocked_address_provider)
        extractor = AddressExtractor(mocked_address_provider,
                                     excluded_contexts=[ctx_analyser])

        found_address = extractor(
            "Mieszkanie znajduje się na ulicy Karmelickiej. W sąsiedztwie ul. Szeroka i Ikea"
        )
        self.assertIn("Karmelicka",
                      [str(match.location) for match in found_address.street])
        self.assertEqual(1, len(found_address.all_addresses))
    def test_conjunction_with_address_having_prefix(self):

        mocked_address_provider = MockedAddressProvider(streets=[{
            "official":
            "Szeroka",
            "colloquial": [],
        }],
                                                        places=[{
                                                            "official":
                                                            "Ikea",
                                                            "colloquial": [],
                                                        }])

        for test_case in [
            ("W pobliżu Ikea i ul. Szeroka", (6, 7)),  # 'Szeroka'
            ("W pobliżu Ikea i ul. Szeroka", (4, 7)),  # 'ul. Szeroka'
            ("W pobliżu Ikea i ul. Szeroka", (2, 3)),  # 'Ikea'
            ("W pobliżu ul. Szeroka i Ikea", (4, 5)),  # 'Szeroka'
            ("W pobliżu ul. Szeroka i Ikea", (2, 5)),  # 'ul. Szeroka'
            ("W pobliżu ul. Szeroka i Ikea", (6, 7)),  # 'Ikea'
        ]:
            sentence, subject = test_case
            self._test_nearby_location_context_helper(
                sentence=sentence,
                subject_slice_beg_end=subject,
                expected_result=True,
                introducers={"w pobliżu"},
                conjunctions={"i"},
                address_provider=mocked_address_provider)
    def test_address_extractor_correctly_recognize_location_type(self):
        mocked_address_provider = MockedAddressProvider(streets=[{
            "official":
            "Stanisława",
            "colloquial": [],
        }],
                                                        estates=[{
                                                            "official":
                                                            "Grzegorza",
                                                            "colloquial": [],
                                                        }],
                                                        districts=[{
                                                            "official":
                                                            "Piotra",
                                                            "colloquial": [],
                                                        }])

        extractor = AddressExtractor(mocked_address_provider)

        found_address = extractor(
            "blah blah Piotra blah Grzegorza blah Stanisława")

        self.assertEqual(len(found_address.street), 1)
        self.assertIn("Stanisława",
                      [str(match.location) for match in found_address.street])

        self.assertEqual(len(found_address.estate), 1)
        self.assertIn("Grzegorza",
                      [str(match.location) for match in found_address.estate])

        self.assertEqual(len(found_address.district), 1)
        self.assertIn(
            "Piotra",
            [str(match.location) for match in found_address.district])
    def test_multiple_consecutive_contexts(self):
        mocked_address_provider = MockedAddressProvider(streets=[
            {
                "official": "Rondo Grunwaldzkie",
                "colloquial": [],
            },
            {
                "official": "Wawel",
                "colloquial": [],
            },
            {
                "official": "Stare Miasto",
                "colloquial": [],
            },
            {
                "official": "Nowa Huta",
                "colloquial": [],
            },
        ])

        for subject in [
            (2, 4),  # "Rondo Grunwaldzkie"
            (5, 6),  # "Wawel"
            (8, 10),  # "Stare Miasto"
            (11, 13),  # "Nowa Huta"
        ]:
            with self.subTest(subject=subject):
                self._test_nearby_location_context_helper(
                    sentence=
                    "W pobliżu Rondo Grunwaldzkie i Wawel. Niedaleko Stare Miasto i Nowa Huta",
                    subject_slice_beg_end=subject,
                    expected_result=True,
                    introducers={"W pobliżu", "Niedaleko"},
                    conjunctions={'i'},
                    address_provider=mocked_address_provider)
    def test_only_longest_location_from_overlapping_matches_is_returned(self):
        with self.subTest():
            mocked_address_provider = MockedAddressProvider(streets=[{
                "official":
                "Zygmunta Starego",
                "colloquial": [],
            }, {
                "official":
                "Stare Podgórze",
                "colloquial": [],
            }], )

            extractor = AddressExtractor(mocked_address_provider)
            found_address = extractor(
                "\nDo wynajęcia 1-pokojowe funkcjonalne mieszkanie w spokojnej, dobrze skomunikowanej"
                " okolicy - Stare Podgórze przy ulicy Zamoyskiego, bardzo dobry dojazd do każdej części miasta."
            )
            names_of_matched_locations = [
                str(match.location) for match in found_address.all
            ]

            self.assertIn("Stare Podgórze", names_of_matched_locations)
            self.assertNotIn("Zygmunta Starego", names_of_matched_locations)

        with self.subTest():
            mocked_address_provider = MockedAddressProvider(
                streets=[{
                    "official": "Bronowicka",
                    "colloquial": [],
                }],
                places=[{
                    "official": "Galeria Bronowicka",
                    "colloquial": [],
                }])

            extractor = AddressExtractor(mocked_address_provider)
            found_address = extractor("Galeria Bronowicka")
            names_of_matched_locations = [
                str(match.location) for match in found_address.all
            ]

            self.assertIn("Galeria Bronowicka", names_of_matched_locations)
            self.assertNotIn("Bronowicka", names_of_matched_locations)
    def test_address_extractor_performs_morphological_comparison(self):
        mocked_address_provider = MockedAddressProvider(
            streets=[{
                "official": "Stanisława",
                "colloquial": [],
            }])

        extractor = AddressExtractor(mocked_address_provider)
        found_address = extractor("Stanisławowi")
        self.assertIn("Stanisława",
                      [str(match.location) for match in found_address.street])
    def test_case_matters(self):
        mocked_address_provider = MockedAddressProvider(
            streets=[{
                "official": "Śliczna",
                "colloquial": [],
            }])

        extractor = AddressExtractor(mocked_address_provider)

        found_address = extractor(
            "Oferuję do wynajęcia śliczne mieszkanie 4-pokojowe")
        self.assertEqual(0, len(found_address.all))
    def test_address_extractor_correctly_compares_names(self):
        streets = [{
            "official": "Tadeusza Kościuszki",
            "colloquial": [],
        }]

        extractor = AddressExtractor(MockedAddressProvider(streets=streets))
        found_address = extractor("Kościuszki")
        self.assertIn("Tadeusza Kościuszki",
                      [str(match.location) for match in found_address.street])

        extractor = AddressExtractor(MockedAddressProvider(streets=streets))
        found_address = extractor("Tadeusza Kościuszki")
        self.assertIn("Tadeusza Kościuszki",
                      [str(match.location) for match in found_address.street])

        extractor = AddressExtractor(MockedAddressProvider(streets=streets))
        found_address = extractor("Tadeusza")
        self.assertNotIn(
            "Tadeusza Kościuszki",
            [str(match.location) for match in found_address.street])
    def test_extract_address_with_unit_number(self):
        mocked_address_provider = MockedAddressProvider(
            streets=[{
                "official": "Jana Zamoyskiego",
                "colloquial": [],
            }])

        extractor = AddressExtractor(mocked_address_provider)

        found_address = extractor("Zamoyskiego 15")
        self.assertIn("Jana Zamoyskiego 15",
                      [str(match.location) for match in found_address.street])
    def test_case_does_not_matter_phrase_in_text_is_all_upper_case(self):
        mocked_address_provider = MockedAddressProvider(
            streets=[{
                "official": "Śliczna",
                "colloquial": [],
            }])

        extractor = AddressExtractor(mocked_address_provider)

        found_address = extractor("mieszkanie przy ulicy ŚLICZNEJ")
        self.assertIn("Śliczna",
                      [str(match.location) for match in found_address.street])
    def test_duplications_are_merged(self):
        mocked_address_provider = MockedAddressProvider(districts=[
            {
                "official": "Nowa Huta",
                "colloquial": [],
            },
        ], )

        extractor = AddressExtractor(mocked_address_provider)

        found_address = extractor(
            'Dzielnica Nowa Huta. Mieszkanie się na Nowej Hucie')
        self.assertEqual(1, len(found_address.all))
    def test_osiedle_street_is_not_matched_to_osiedle_location_prefix(self):
        mocked_address_provider = MockedAddressProvider(streets=[
            {
                "official": "Osiedle",
                "colloquial": [],
            },
        ], )

        extractor = AddressExtractor(mocked_address_provider)

        found_address = extractor("Duże osiedle.")
        self.assertNotIn("Osiedle",
                         [match.location for match in found_address.all])
    def test_address_extractor_returns_official_name_if_colloquial_name_matched(
            self):
        mocked_address_provider = MockedAddressProvider(
            estates=[{
                "official": "Osiedle Na Kozłówce",
                "colloquial": ["Kozłówek"],
            }])

        extractor = AddressExtractor(mocked_address_provider)

        found_address = extractor("Kozłówek")
        self.assertIn("Osiedle Na Kozłówce",
                      [match.location for match in found_address.estate])
    def test_actual_all_uppercase_bug(self):
        mocked_address_provider = MockedAddressProvider(streets=[
            {
                "official": "Czyżyny",
                "colloquial": [],
            },
        ], )

        extractor = AddressExtractor(mocked_address_provider)

        found_address = extractor('CZYŻYNY')
        self.assertIn("Czyżyny",
                      [str(match.location) for match in found_address.all])
        self.assertEqual(1, len(found_address.all))
    def test_zl_is_not_matched_to_zlota_street(self):
        mocked_address_provider = MockedAddressProvider(streets=[
            {
                "official": "Złota",
                "colloquial": [],
            },
        ], )

        extractor = AddressExtractor(mocked_address_provider,
                                     excluded_contexts=[PriceContext()])

        found_address = extractor(
            'czynsz najmu : 1600 zł + 553 ZŁ czynsz administracyjny + media .')
        self.assertNotIn("Złota",
                         [match.location for match in found_address.all])
    def test_Krakow_city_is_not_recognized_as_Kraka_street(self):

        mocked_address_provider = MockedAddressProvider(streets=[{
            "official":
            "Kraka",
            "colloquial": [],
        }], )

        extractor = AddressExtractor(mocked_address_provider)

        found_address = extractor("miasto Kraków")
        self.assertEqual(0, len(found_address.all))

        found_address = extractor("w Krakowie")
        self.assertEqual(0, len(found_address.all))
    def test_introducers_are_case_insensitive(self):
        mocked_address_provider = MockedAddressProvider(streets=[
            {
                "official": "Wawel",
                "colloquial": [],
            },
        ])

        self._test_nearby_location_context_helper(
            sentence="W pobliżu Wawel.",
            subject_slice_beg_end=(2, 3),  # "Wawel"
            expected_result=True,
            introducers={"w pobliżu"},
            conjunctions={},
            address_provider=mocked_address_provider)
    def test_word_is_not_interpreted_as_location_if_it_is_first_word_of_a_sentence(
            self):
        mocked_address_provider = MockedAddressProvider(streets=[{
            "official":
            "Piękna",
            "colloquial": [],
        }], )

        extractor = AddressExtractor(
            mocked_address_provider,
            excluded_contexts=[FirstWordOfSentenceContext()])

        found_address = extractor("Jakieś zdanie. Piękna okolica.")
        self.assertEqual(0, len(found_address.all))

        found_address = extractor("Jakieś zdanie. Lokalizacja - Piękna 13")
        self.assertNotEqual(0, len(found_address.all))
    def test_street_duplications_are_merged(self):
        mocked_address_provider = MockedAddressProvider(streets=[
            {
                "official": "Mogilska",
                "colloquial": [],
            },
        ], )

        extractor = AddressExtractor(mocked_address_provider)

        found_address = extractor(
            'Mieszkanie przy ulicy Mogilskiej. Adres Mogilska 66')
        self.assertIn("Mogilska 66",
                      [str(match.location) for match in found_address.all])
        self.assertEqual(1, len(found_address.all))

        found_address = extractor('Mieszkanie przy ulicy Mogilskiej')
        self.assertIn("Mogilska",
                      [str(match.location) for match in found_address.all])
        self.assertEqual(1, len(found_address.all))
    def test_conjunction_with_not_address_location(self):
        #  tests the following case: nearby_location_introducer + non_address_location + conjunction + address

        introducers = {"w pobliżu"}
        conjunctions = {"i"}

        mocked_address_provider = MockedAddressProvider(
            streets=[{
                "official": "Wawel",
                "colloquial": [],
            }],
            places=[
                {
                    "official": "Ikea",
                    "colloquial": [],
                },
                {
                    "official": "Galeria Bronowicka",
                    "colloquial": [],
                },
            ])

        with self.subTest():
            self._test_nearby_location_context_helper(
                sentence="W pobliżu Ikea i Wawel",
                subject_slice_beg_end=(4, 5),  # "Wawel"
                expected_result=True,
                introducers=introducers,
                conjunctions=conjunctions,
                address_provider=mocked_address_provider)

        with self.subTest():
            self._test_nearby_location_context_helper(
                sentence="W pobliżu Galeria Bronowicka i Bronowice",
                subject_slice_beg_end=(5, 6),  # "Bronowice",
                expected_result=True,
                introducers=introducers,
                conjunctions=conjunctions,
                address_provider=mocked_address_provider)
 def test_sentence_with_newline_character_in_context(self):
     mocked_address_provider = MockedAddressProvider(streets=[{
         "official":
         "Kobierzyńska",
         "colloquial": [],
     }, {
         "official":
         "prof.Michała Bobrzyńskiego",
         "colloquial": [],
     }], )
     for sentence in [
             "Blisko przystanek autobusowy \nprzy ul. Bobrzyńskiego  lub ul. Kobierzyńskiej.",
             "Blisko przystanek autobusowy przy ul. Bobrzyńskiego  lub\nul. Kobierzyńskiej.",
             "Blisko przystanek autobusowy przy ul. Bobrzyńskiego  lub ul.\nKobierzyńskiej.",
     ]:
         self._test_nearby_location_context_helper(
             sentence=sentence,
             subject_slice_beg_end=(11, 12),  # "Kobierzyńskiej"
             expected_result=True,
             introducers={"przystanek autobusowy przy"},
             conjunctions={"lub"},
             address_provider=mocked_address_provider)
     """ TODO: