Ejemplo n.º 1
0
    def test_extract_description(self):
        # type: () -> None
        html = """
          <html>
            <body>
                <div>
                    <div>
                        <p>Description text</p>
                    </div>
                </div>
            </body>
          </html>
        """
        parser = GenericParser(html)
        result = parser.extract_data()
        self.assertEqual(result.get('description'), 'Description text')

        html = """
          <html>
            <head><meta name="description" content="description 123"</head>
            <body></body>
          </html>
        """
        parser = GenericParser(html)
        result = parser.extract_data()
        self.assertEqual(result.get('description'), 'description 123')

        html = "<html><body></body></html>"
        parser = GenericParser(html)
        result = parser.extract_data()
        self.assertIsNone(result.get('description'))
Ejemplo n.º 2
0
    def test_extract_description(self) -> None:
        html = b"""
          <html>
            <body>
                <div>
                    <div>
                        <p>Description text</p>
                    </div>
                </div>
            </body>
          </html>
        """
        parser = GenericParser(html, "text/html; charset=UTF-8")
        result = parser.extract_data()
        self.assertEqual(result.get("description"), "Description text")

        html = b"""
          <html>
            <head><meta name="description" content="description 123"</head>
            <body></body>
          </html>
        """
        parser = GenericParser(html, "text/html; charset=UTF-8")
        result = parser.extract_data()
        self.assertEqual(result.get("description"), "description 123")

        html = b"<html><body></body></html>"
        parser = GenericParser(html, "text/html; charset=UTF-8")
        result = parser.extract_data()
        self.assertIsNone(result.get("description"))
Ejemplo n.º 3
0
    def test_extract_description(self) -> None:
        html = """
          <html>
            <body>
                <div>
                    <div>
                        <p>Description text</p>
                    </div>
                </div>
            </body>
          </html>
        """
        parser = GenericParser(html)
        result = parser.extract_data()
        self.assertEqual(result.get('description'), 'Description text')

        html = """
          <html>
            <head><meta name="description" content="description 123"</head>
            <body></body>
          </html>
        """
        parser = GenericParser(html)
        result = parser.extract_data()
        self.assertEqual(result.get('description'), 'description 123')

        html = "<html><body></body></html>"
        parser = GenericParser(html)
        result = parser.extract_data()
        self.assertIsNone(result.get('description'))
Ejemplo n.º 4
0
 def test_parser(self) -> None:
     html = b"""
       <html>
         <head><title>Test title</title></head>
         <body>
             <h1>Main header</h1>
             <p>Description text</p>
         </body>
       </html>
     """
     parser = GenericParser(html, "text/html; charset=UTF-8")
     result = parser.extract_data()
     self.assertEqual(result.get("title"), "Test title")
     self.assertEqual(result.get("description"), "Description text")
Ejemplo n.º 5
0
 def test_parser(self) -> None:
     html = """
       <html>
         <head><title>Test title</title></head>
         <body>
             <h1>Main header</h1>
             <p>Description text</p>
         </body>
       </html>
     """
     parser = GenericParser(html)
     result = parser.extract_data()
     self.assertEqual(result.get('title'), 'Test title')
     self.assertEqual(result.get('description'), 'Description text')
Ejemplo n.º 6
0
 def test_parser(self) -> None:
     html = """
       <html>
         <head><title>Test title</title></head>
         <body>
             <h1>Main header</h1>
             <p>Description text</p>
         </body>
       </html>
     """
     parser = GenericParser(html)
     result = parser.extract_data()
     self.assertEqual(result.get('title'), 'Test title')
     self.assertEqual(result.get('description'), 'Description text')
Ejemplo n.º 7
0
 def test_extract_image(self) -> None:
     html = """
       <html>
         <body>
             <h1>Main header</h1>
             <img src="http://test.com/test.jpg">
             <div>
                 <p>Description text</p>
             </div>
         </body>
       </html>
     """
     parser = GenericParser(html)
     result = parser.extract_data()
     self.assertEqual(result.get('title'), 'Main header')
     self.assertEqual(result.get('description'), 'Description text')
     self.assertEqual(result.get('image'), 'http://test.com/test.jpg')
Ejemplo n.º 8
0
 def test_extract_image(self) -> None:
     html = """
       <html>
         <body>
             <h1>Main header</h1>
             <img src="http://test.com/test.jpg">
             <div>
                 <p>Description text</p>
             </div>
         </body>
       </html>
     """
     parser = GenericParser(html)
     result = parser.extract_data()
     self.assertEqual(result.get('title'), 'Main header')
     self.assertEqual(result.get('description'), 'Description text')
     self.assertEqual(result.get('image'), 'http://test.com/test.jpg')
Ejemplo n.º 9
0
 def test_extract_image(self) -> None:
     html = b"""
       <html>
         <body>
             <h1>Main header</h1>
             <img data-src="Not an image">
             <img src="http://test.com/test.jpg">
             <div>
                 <p>Description text</p>
             </div>
         </body>
       </html>
     """
     parser = GenericParser(html, "text/html; charset=UTF-8")
     result = parser.extract_data()
     self.assertEqual(result.get("title"), "Main header")
     self.assertEqual(result.get("description"), "Description text")
     self.assertEqual(result.get("image"), "http://test.com/test.jpg")