def _init_message(self): """Find a unicode representation of self.error""" try: self.message = compat.text_type(self.error) except UnicodeError: try: self.message = str(self.error) except UnicodeEncodeError: # Fallback to args as neither unicode nor # str(Exception(u'\xe6')) work in Python < 2.6 self.message = self.error.args[0] if not isinstance(self.message, compat.text_type): self.message = compat.text_type(self.message, 'ascii', 'replace')
def decode(x): if isinstance(x, compat.text_type): return x elif not isinstance(x, compat.binary_type): return decode(str(x)) else: return compat.text_type(x, encoding=key)
def escape(self, text): """Replace characters with their character references. Replace characters by their named entity references. Non-ASCII characters, if they do not have a named entity reference, are replaced by numerical character references. The return value is guaranteed to be ASCII. """ return self.__escapable.sub(self.__escape, compat.text_type(text) ).encode('ascii')
def htmlentityreplace_errors(ex): """An encoding error handler. This python `codecs`_ error handler replaces unencodable characters with HTML entities, or, if no HTML entity exists for the character, XML character references. >>> u'The cost was \u20ac12.'.encode('latin1', 'htmlentityreplace') 'The cost was €12.' """ if isinstance(ex, UnicodeEncodeError): # Handle encoding errors bad_text = ex.object[ex.start:ex.end] text = _html_entities_escaper.escape(bad_text) return (compat.text_type(text), ex.end) raise ex
def __init__(self, codepoint2name, name2codepoint): self.codepoint2entity = dict([(c, compat.text_type('&%s;' % n)) for c, n in codepoint2name.items()]) self.name2codepoint = name2codepoint
def escape_entities(self, text): """Replace characters with their character entity references. Only characters corresponding to a named entity are replaced. """ return compat.text_type(text).translate(self.codepoint2entity)