Beispiel #1
0
 def format_html_title(title, url=None):
     doc = HTMLDocument("""
         <html><head><title>%s</title></head><body>Hello</body></html>
     """ % title,
                        url=url)
     doc.parse()
     return format_title(doc, {})
Beispiel #2
0
def test_format_title():
    def format_html_title(title, url=None):
        doc = HTMLDocument("""
            <html><head><meta charset="UTF-8"><title>%s</title></head><body>Hello</body></html>
        """ % title,
                           url=url)
        doc.parse()
        return format_title(doc, {})

    assert format_html_title("A Title!") == "A Title!"
    assert format_html_title("  A  \n Title\t \t!  ") == "A Title !"
    assert format_html_title("a" * 100) == ("a" * 70) + "..."
    #
    # Test that emoji chararacters and symbols are removed from titles
    emoji_title = u"😋  Super Emoji-Land.com  " ""
    emoji_title = emoji_title.encode('utf8')
    emoji_title = format_html_title(emoji_title)
    assert emoji_title == "Super Emoji-Land.com"

    assert format_html_title(("a" * 60) +
                             " 2345678 1234567") == ("a" * 60) + " 2345678..."
    assert format_html_title(("a" * 60) +
                             " 234567890 1234567") == ("a" *
                                                       60) + " 234567890..."
    assert format_html_title(("a" * 60) +
                             " 2345678901 1234567") == ("a" * 60) + "..."

    # Test domain fallback
    assert format_html_title(
        "  ", url="http://www.example.com/hello.html") == "Example"

    # Test blacklist
    assert format_html_title(
        "  home ", url="http://www.example.com/hello.html") == "Example"

    # Test OGP
    html = """<html>
        <head><meta property="og:title" content="Open graph title  " /></head>
        <body>This is &lt;body&gt; text</body>
    </html>"""

    page = HTMLDocument(html).parse()
    assert format_title(page, {}) == "Open graph title"
def test_format_title():

    def format_html_title(title, url=None):
        doc = HTMLDocument("""
            <html><head><meta charset="UTF-8"><title>%s</title></head><body>Hello</body></html>
        """ % title, url=url)
        doc.parse()
        return format_title(doc, {})

    assert format_html_title("A Title!") == "A Title!"
    assert format_html_title("  A  \n Title\t \t!  ") == "A Title !"
    assert format_html_title("a" * 100) == ("a" * 70) + "..."
    #
    # Test that emoji chararacters and symbols are removed from titles
    emoji_title = u"😋  Super Emoji-Land.com  """
    emoji_title = emoji_title.encode('utf8')
    emoji_title = format_html_title(emoji_title)
    assert emoji_title == "Super Emoji-Land.com"

    assert format_html_title(("a" * 60) + " 2345678 1234567") == ("a" * 60) + " 2345678..."
    assert format_html_title(("a" * 60) + " 234567890 1234567") == ("a" * 60) + " 234567890..."
    assert format_html_title(("a" * 60) + " 2345678901 1234567") == ("a" * 60) + "..."

    # Test domain fallback
    assert format_html_title("  ", url="http://www.example.com/hello.html") == "Example"

    # Test blacklist
    assert format_html_title("  home ", url="http://www.example.com/hello.html") == "Example"

    # Test OGP
    html = """<html>
        <head><meta property="og:title" content="Open graph title  " /></head>
        <body>This is &lt;body&gt; text</body>
    </html>"""

    page = HTMLDocument(html).parse()
    assert format_title(page, {}) == "Open graph title"
Beispiel #4
0
def test_format_title():
    def format_html_title(title, url=None):
        doc = HTMLDocument("""
            <html><head><title>%s</title></head><body>Hello</body></html>
        """ % title,
                           url=url)
        doc.parse()
        return format_title(doc, {})

    assert format_html_title("A Title!") == "A Title!"
    assert format_html_title("  A  \n Title\t \t!  ") == "A Title !"
    assert format_html_title("a" * 100) == ("a" * 70) + "..."

    assert format_html_title(("a" * 60) +
                             " 2345678 1234567") == ("a" * 60) + " 2345678..."
    assert format_html_title(("a" * 60) +
                             " 234567890 1234567") == ("a" *
                                                       60) + " 234567890..."
    assert format_html_title(("a" * 60) +
                             " 2345678901 1234567") == ("a" * 60) + "..."

    # Test domain fallback
    assert format_html_title(
        "  ", url="http://www.example.com/hello.html") == "Example"

    # Test blacklist
    assert format_html_title(
        "  home ", url="http://www.example.com/hello.html") == "Example"

    # Test OGP
    html = """<html>
        <head><meta property="og:title" content="Open graph title  " /></head>
        <body>This is &lt;body&gt; text</body>
    </html>"""

    page = HTMLDocument(html).parse()
    assert format_title(page, {}) == "Open graph title"
 def format_html_title(title, url=None):
     doc = HTMLDocument("""
         <html><head><meta charset="UTF-8"><title>%s</title></head><body>Hello</body></html>
     """ % title, url=url)
     doc.parse()
     return format_title(doc, {})