def htmlentityreplace_errors(exc):
     if isinstance(exc, (UnicodeEncodeError, UnicodeTranslateError)):
         res = []
         codepoints = []
         skip = False
         for i, c in enumerate(exc.object[exc.start:exc.end]):
             if skip:
                 skip = False
                 continue
             index = i + exc.start
             if utils.isSurrogatePair(exc.object[index:min([exc.end, index+2])]):
                 codepoint = utils.surrogatePairToCodepoint(exc.object[index:index+2])
                 skip = True
             else:
                 codepoint = ord(c)
             codepoints.append(codepoint)
         for cp in codepoints:
             e = encode_entity_map.get(cp)
             if e:
                 res.append("&")
                 res.append(e)
                 if not e.endswith(";"):
                     res.append(";")
             else:
                 res.append("&#x%s;"%(hex(cp)[2:]))
         return (u"".join(res), exc.end)
     else:
         return xmlcharrefreplace_errors(exc)
예제 #2
0
 def htmlentityreplace_errors(exc):
     if isinstance(exc, (UnicodeEncodeError, UnicodeTranslateError)):
         res = []
         codepoints = []
         skip = False
         for i, c in enumerate(exc.object[exc.start:exc.end]):
             if skip:
                 skip = False
                 continue
             index = i + exc.start
             if utils.isSurrogatePair(
                     exc.object[index:min([exc.end, index + 2])]):
                 codepoint = utils.surrogatePairToCodepoint(
                     exc.object[index:index + 2])
                 skip = True
             else:
                 codepoint = ord(c)
             codepoints.append(codepoint)
         for cp in codepoints:
             e = encode_entity_map.get(cp)
             if e:
                 res.append(u"&")
                 res.append(e)
                 if not e.endswith(u";"):
                     res.append(u";")
             else:
                 res.append(u"&#x%s;" % (hex(cp)[2:]))
         return (u"".join(res), exc.end)
     else:
         return xmlcharrefreplace_errors(exc)