def test_extract_description(self): # type: () -> None html = """ <html> <body> <div> <div> <p>Description text</p> </div> </div> </body> </html> """ parser = GenericParser(html) result = parser.extract_data() self.assertEqual(result.get('description'), 'Description text') html = """ <html> <head><meta name="description" content="description 123"</head> <body></body> </html> """ parser = GenericParser(html) result = parser.extract_data() self.assertEqual(result.get('description'), 'description 123') html = "<html><body></body></html>" parser = GenericParser(html) result = parser.extract_data() self.assertIsNone(result.get('description'))
def test_extract_description(self) -> None: html = b""" <html> <body> <div> <div> <p>Description text</p> </div> </div> </body> </html> """ parser = GenericParser(html, "text/html; charset=UTF-8") result = parser.extract_data() self.assertEqual(result.get("description"), "Description text") html = b""" <html> <head><meta name="description" content="description 123"</head> <body></body> </html> """ parser = GenericParser(html, "text/html; charset=UTF-8") result = parser.extract_data() self.assertEqual(result.get("description"), "description 123") html = b"<html><body></body></html>" parser = GenericParser(html, "text/html; charset=UTF-8") result = parser.extract_data() self.assertIsNone(result.get("description"))
def test_extract_description(self) -> None: html = """ <html> <body> <div> <div> <p>Description text</p> </div> </div> </body> </html> """ parser = GenericParser(html) result = parser.extract_data() self.assertEqual(result.get('description'), 'Description text') html = """ <html> <head><meta name="description" content="description 123"</head> <body></body> </html> """ parser = GenericParser(html) result = parser.extract_data() self.assertEqual(result.get('description'), 'description 123') html = "<html><body></body></html>" parser = GenericParser(html) result = parser.extract_data() self.assertIsNone(result.get('description'))
def test_parser(self) -> None: html = b""" <html> <head><title>Test title</title></head> <body> <h1>Main header</h1> <p>Description text</p> </body> </html> """ parser = GenericParser(html, "text/html; charset=UTF-8") result = parser.extract_data() self.assertEqual(result.get("title"), "Test title") self.assertEqual(result.get("description"), "Description text")
def test_parser(self) -> None: html = """ <html> <head><title>Test title</title></head> <body> <h1>Main header</h1> <p>Description text</p> </body> </html> """ parser = GenericParser(html) result = parser.extract_data() self.assertEqual(result.get('title'), 'Test title') self.assertEqual(result.get('description'), 'Description text')
def test_parser(self) -> None: html = """ <html> <head><title>Test title</title></head> <body> <h1>Main header</h1> <p>Description text</p> </body> </html> """ parser = GenericParser(html) result = parser.extract_data() self.assertEqual(result.get('title'), 'Test title') self.assertEqual(result.get('description'), 'Description text')
def test_extract_image(self) -> None: html = """ <html> <body> <h1>Main header</h1> <img src="http://test.com/test.jpg"> <div> <p>Description text</p> </div> </body> </html> """ parser = GenericParser(html) result = parser.extract_data() self.assertEqual(result.get('title'), 'Main header') self.assertEqual(result.get('description'), 'Description text') self.assertEqual(result.get('image'), 'http://test.com/test.jpg')
def test_extract_image(self) -> None: html = """ <html> <body> <h1>Main header</h1> <img src="http://test.com/test.jpg"> <div> <p>Description text</p> </div> </body> </html> """ parser = GenericParser(html) result = parser.extract_data() self.assertEqual(result.get('title'), 'Main header') self.assertEqual(result.get('description'), 'Description text') self.assertEqual(result.get('image'), 'http://test.com/test.jpg')
def test_extract_image(self) -> None: html = b""" <html> <body> <h1>Main header</h1> <img data-src="Not an image"> <img src="http://test.com/test.jpg"> <div> <p>Description text</p> </div> </body> </html> """ parser = GenericParser(html, "text/html; charset=UTF-8") result = parser.extract_data() self.assertEqual(result.get("title"), "Main header") self.assertEqual(result.get("description"), "Description text") self.assertEqual(result.get("image"), "http://test.com/test.jpg")