def wrap_long_words(string, _encode_entities=True): """Inject <wbr> periodically to let the browser wrap the string. The <wbr /> tag is widely deployed and included in HTML5, but it isn't XHTML-compliant. See this for more info: http://dev.w3.org/html5/spec/text-level-semantics.html#the-wbr-element :type string: unicode :rtype: literal """ if _encode_entities: string = encode_entities(string) def inject_wbr(match): groups = match.groups() return u'%s<wbr />%s' % (groups[0], groups[-1]) string = long_words.sub(inject_wbr, string) string = u'.<wbr />'.join(string.split('.')) return literal(string)
def truncate_xhtml(string, size, _strip_xhtml=False, _decode_entities=False): """Truncate a XHTML string to roughly a given size (full words). :param string: XHTML :type string: unicode :param size: Max length :param _strip_xhtml: Flag to strip out all XHTML :param _decode_entities: Flag to convert XHTML entities to unicode chars :rtype: unicode """ if not string: return u'' if _strip_xhtml: # Insert whitespace after block elements. # So they are separated when we strip the xhtml. string = block_spaces.sub(u"\\1 ", string) string = strip_xhtml(string) string = decode_entities(string) if len(string) > size: string = text.truncate(string, length=size, whole_word=True) if _strip_xhtml: if not _decode_entities: # re-encode the entities, if we have to. string = encode_entities(string) else: if _decode_entities: string = Cleaner(string, *truncate_filters, **cleaner_settings)() else: # re-encode the entities, if we have to. string = Cleaner(string, 'encode_xml_specials', *truncate_filters, **cleaner_settings)() return string.strip()