Example #1
0
    def test_comment2(self):
        html = b"""
        <html>
        <head><title>Foo</title></head>
        <body>
        Some text.
        <!-- HTML comment -->
        Some more text.
        <p>Text</p>
        More text
        </body>
        </html>
        """

        tree = decode_body(html, "http://example.com/test.html")
        og = _calc_og(tree, "http://example.com/test.html")

        self.assertEqual(
            og,
            {
                "og:title":
                "Foo",
                "og:description":
                "Some text.\n\nSome more text.\n\nText\n\nMore text",
            },
        )
Example #2
0
 def test_windows_1252(self):
     """A body which uses cp1252, but doesn't declare that."""
     html = b"""
     <html>
     <head><title>\xf3</title></head>
     <body>
     Some text.
     </body>
     </html>
     """
     tree = decode_body(html, "http://example.com/test.html")
     og = _calc_og(tree, "http://example.com/test.html")
     self.assertEqual(og, {"og:title": "ó", "og:description": "Some text."})
Example #3
0
 def test_invalid_encoding(self):
     """An invalid character encoding should be ignored and treated as UTF-8, if possible."""
     html = b"""
     <html>
     <head><title>Foo</title></head>
     <body>
     Some text.
     </body>
     </html>
     """
     tree = decode_body(html, "invalid-encoding")
     og = _calc_og(tree, "http://example.com/test.html")
     self.assertEqual(og, {"og:title": "Foo", "og:description": "Some text."})
Example #4
0
    def test_missing_title(self):
        html = b"""
        <html>
        <body>
        Some text.
        </body>
        </html>
        """

        tree = decode_body(html)
        og = _calc_og(tree, "http://example.com/test.html")

        self.assertEqual(og, {"og:title": None, "og:description": "Some text."})
Example #5
0
 def test_invalid_encoding2(self):
     """A body which doesn't match the sent character encoding."""
     # Note that this contains an invalid UTF-8 sequence in the title.
     html = b"""
     <html>
     <head><title>\xff\xff Foo</title></head>
     <body>
     Some text.
     </body>
     </html>
     """
     tree = decode_body(html)
     og = _calc_og(tree, "http://example.com/test.html")
     self.assertEqual(og, {"og:title": "ÿÿ Foo", "og:description": "Some text."})
Example #6
0
    def test_h1_as_title(self):
        html = b"""
        <html>
        <meta property="og:description" content="Some text."/>
        <body>
        <h1>Title</h1>
        </body>
        </html>
        """

        tree = decode_body(html)
        og = _calc_og(tree, "http://example.com/test.html")

        self.assertEqual(og, {"og:title": "Title", "og:description": "Some text."})
Example #7
0
    def test_simple(self):
        html = b"""
        <html>
        <head><title>Foo</title></head>
        <body>
        Some text.
        </body>
        </html>
        """

        tree = decode_body(html)
        og = _calc_og(tree, "http://example.com/test.html")

        self.assertEqual(og, {"og:title": "Foo", "og:description": "Some text."})
Example #8
0
    def test_xml(self):
        """Test decoding XML and ensure it works properly."""
        # Note that the strip() call is important to ensure the xml tag starts
        # at the initial byte.
        html = b"""
        <?xml version="1.0" encoding="UTF-8"?>

        <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
        <html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en">
        <head><title>Foo</title></head><body>Some text.</body></html>
        """.strip()
        tree = decode_body(html, "http://example.com/test.html")
        og = _calc_og(tree, "http://example.com/test.html")
        self.assertEqual(og, {
            "og:title": "Foo",
            "og:description": "Some text."
        })
Example #9
0
    def test_missing_title_and_broken_h1(self):
        html = b"""
        <html>
        <body>
        <h1><a href="foo"/></h1>
        Some text.
        </body>
        </html>
        """

        tree = decode_body(html, "http://example.com/test.html")
        og = _calc_og(tree, "http://example.com/test.html")

        self.assertEqual(og, {
            "og:title": None,
            "og:description": "Some text."
        })