Esempio n. 1
0
    def test_num_of_links(self, soup, expected):

        extractor = DocumentObjectFeature(
            lp_url="https://felp_dummy_internal.com", soup=soup)

        num_of_links = extractor.get_num_of_links()
        assert num_of_links == expected
Esempio n. 2
0
    def test_get_text_size_links_ratio(self, soup, expected):

        extractor = DocumentObjectFeature(
            lp_url="https://felp_dummy_internal.com", soup=soup)
        text_size_links_ratio = extractor.get_text_size_links_ratio()

        assert round(text_size_links_ratio, 2) == expected
Esempio n. 3
0
    def test_number_of_external_total_links_ratio(self, soup, expected):

        extractor = DocumentObjectFeature(
            lp_url="https://felp_dummy_internal.com", soup=soup)

        number_of_external_total_links_ratio = (
            extractor.get_number_of_external_total_links_ratio())
        assert round(number_of_external_total_links_ratio, 2) == expected
Esempio n. 4
0
    def test_get_links(self, soup, expected_internal, expected_external):

        extractor = DocumentObjectFeature(
            lp_url="https://felp_dummy_internal.com", soup=soup)

        links = extractor.get_links()

        assert len(links.internal) == expected_internal
        assert len(links.external) == expected_external
Esempio n. 5
0
    def test_get_main_text(self, soup, expected_main_text_num):

        extractor = DocumentObjectFeature(
            lp_url="https://felp_dummy_internal.com", soup=soup)
        main_text = extractor.get_main_text()

        expected_path = self.FIXTURES_ROOT / expected_main_text_num / "main_text.txt"
        with expected_path.open("r") as rf:
            expected_text = rf.read()

        assert main_text == expected_text
Esempio n. 6
0
 def test_get_external_links(self, soup, expected):
     extractor = DocumentObjectFeature(
         lp_url="https://felp_dummy_internal.com", soup=soup)
     external_links = extractor.get_external_links()
     print(external_links)
     raise NotImplementedError
Esempio n. 7
0
 def __init__(self, lp_url: str, soup: BeautifulSoup) -> None:
     self._soup = soup
     self._doc_feat_extractor = DocumentObjectFeature(lp_url, soup)