Example #1
0
def truncate_xhtml(string, size, _strip_xhtml=False, _decode_entities=False):
    """Truncate a XHTML string to roughly a given size (full words).

    :param string: XHTML
    :type string: unicode
    :param size: Max length
    :param _strip_xhtml: Flag to strip out all XHTML
    :param _decode_entities: Flag to convert XHTML entities to unicode chars
    :rtype: unicode
    """
    if not string:
        return u''

    if _strip_xhtml:
        # Insert whitespace after block elements.
        # So they are separated when we strip the xhtml.
        string = block_spaces.sub(u"\\1 ", string)
        string = strip_xhtml(string)

    string = decode_entities(string)

    if len(string) > size:
        string = text.truncate(string, length=size, whole_word=True)

        if _strip_xhtml:
            if not _decode_entities:
                # re-encode the entities, if we have to.
                string = encode_entities(string)
        else:
            string = clean(string, **cleaner_settings)

    return string.strip()
Example #2
0
def truncate_xhtml(string, size, _strip_xhtml=False, _decode_entities=False):
    """Truncate a XHTML string to roughly a given size (full words).

    :param string: XHTML
    :type string: unicode
    :param size: Max length
    :param _strip_xhtml: Flag to strip out all XHTML
    :param _decode_entities: Flag to convert XHTML entities to unicode chars
    :rtype: unicode
    """
    if not string:
        return u''

    if _strip_xhtml:
        # Insert whitespace after block elements.
        # So they are separated when we strip the xhtml.
        string = block_spaces.sub(u"\\1 ", string)
        string = strip_xhtml(string)

    string = decode_entities(string)

    if len(string) > size:
        string = text.truncate(string, length=size, whole_word=True)

        if _strip_xhtml:
            if not _decode_entities:
                # re-encode the entities, if we have to.
                string = encode_entities(string)
        else:
            string = clean(string, **cleaner_settings)

    return string.strip()
Example #3
0
def strip_xhtml(string, _decode_entities=False):
    """Strip out xhtml and optionally convert HTML entities to unicode.

    :rtype: unicode
    """
    if not string:
        return u''

    string = ''.join(BeautifulSoup(string).findAll(text=True))

    if _decode_entities:
        string = decode_entities(string)

    return string
Example #4
0
def strip_xhtml(string, _decode_entities=False):
    """Strip out xhtml and optionally convert HTML entities to unicode.

    :rtype: unicode
    """
    if not string:
        return u''

    string = ''.join(BeautifulSoup(string).findAll(text=True))

    if _decode_entities:
        string = decode_entities(string)

    return string
Example #5
0
def excerpt_xhtml(string, size, buffer=60):
    """Return an excerpt for the given string.

    Truncate to the given size iff we are removing more than the buffer size.

    :param string: A XHTML string
    :param size: The desired length
    :type size: int
    :param buffer: How much more than the desired length we can go to
        avoid truncating just a couple words etc.
    :type buffer: int
    :returns: XHTML

    """
    if not string:
        return u''
    new_str = decode_entities(string)
    if len(new_str) <= size + buffer:
        return string
    return truncate_xhtml(new_str, size)
Example #6
0
def excerpt_xhtml(string, size, buffer=60):
    """Return an excerpt for the given string.

    Truncate to the given size iff we are removing more than the buffer size.

    :param string: A XHTML string
    :param size: The desired length
    :type size: int
    :param buffer: How much more than the desired length we can go to
        avoid truncating just a couple words etc.
    :type buffer: int
    :returns: XHTML

    """
    if not string:
        return u''
    new_str = decode_entities(string)
    if len(new_str) <= size + buffer:
        return string
    return truncate_xhtml(new_str, size)
Example #7
0
def strip_xhtml(string, _decode_entities=False):
    """Strip out xhtml and optionally convert HTML entities to unicode.

    :rtype: unicode
    """
    if not string:
        return u''
    # Strip all xhtml:
    string = clean(string, strip=True, tags=[], attributes=[])

    # Decode entities and strip hidden xhtml markup:
    string = decode_entities(string)

    # Note that decode_entities() call above takes care of xhtml
    #   striping of hidden markup, and clean() below will re-encode
    #   escapable characters if they got removed and we weren't
    #   supposed to unescape them:
    if not _decode_entities:
        string = clean(string)

    return string
Example #8
0
def strip_xhtml(string, _decode_entities=False):
    """Strip out xhtml and optionally convert HTML entities to unicode.

    :rtype: unicode
    """
    if not string:
        return u''
    # Strip all xhtml:
    string = clean(string, strip=True, tags=[], attributes=[])

    # Decode entities and strip hidden xhtml markup:
    string = decode_entities(string)

    # Note that decode_entities() call above takes care of xhtml
    #   striping of hidden markup, and clean() below will re-encode
    #   escapable characters if they got removed and we weren't
    #   supposed to unescape them:
    if not _decode_entities:
        string = clean(string)

    return string