Beispiel #1
0
def story_readability(content):
    """
    >>> content = '<p>hello <b>world</b><br/>你好<i>世界</i></p>'
    >>> print(story_readability(content))
    <body id="readabilityBody"><p>hello <b>world</b><br/>你好<i>世界</i></p></body>
    """
    if (not content) or (not content.strip()):
        return ""
    doc = ReadabilityDocument(content)
    return doc.summary(html_partial=True) or ""
Beispiel #2
0
def story_readability(content):
    """
    >>> content = '<p>hello <b>world</b><br>你好<i>世界</i></p>'
    >>> print(story_readability(content))
    <body id="readabilityBody"><p>hello <b>world</b><br>你好<i>世界</i></p></body>
    >>> content = '<svg height="16" width="16" class="octicon octicon-search"></svg>'
    >>> content in story_readability(content)
    True
    """
    if (not content) or (not content.strip()):
        return ""
    doc = ReadabilityDocument(content)
    return doc.summary(html_partial=True) or ""
Beispiel #3
0
def clean_page(page):
    # parse text into readability document
    doc = ReadabilityDocument(page)

    # return cleaned content
    return doc