def test_extract_text_from_paragraph_that_contains_link(): test_html = """ <html> <p><a href="">Login</a> at this awesome website</p> </html> """ soup = BeautifulSoup(test_html, 'html.parser') result_text = converter.extract_text(soup) assert result_text == "Login at this awesome website"
def test_extract_text_with_comment_in_javascriptcode(): test_html = """ <html> <header> </header> <p> <script> <!-- Text in comment--> (function() {}) </script> </p> </html> """ soup = BeautifulSoup(test_html, 'html.parser') result_text = converter.extract_text(soup) assert result_text == ""
def test_extract_text_from_multiple_paragraphs(): test_html = """ <html> <header> <script> (function () { log.console('test javascript'); }) </script> </header> <p> This is a test text </p> <p> Another test text! </p> </html> """ soup = BeautifulSoup(test_html, 'html.parser') result_text = converter.extract_text(soup) assert result_text == "This is a test text Another test text!"