def escape(matchobj): codepoint = ord(matchobj.group(0)) name = codepoint2name.get(codepoint) if name is None: return '&#%d;' % codepoint else: return '&%s;' % name
def decode_to_entity(s): t = '' for i in s: if ord(i) in codepoint2name: name = codepoint2name.get(ord(i)) t += '&' + name + ';' else: t += i return t
def encode(txt): """Encodes UTF-8 characters with html entities.""" skip = ['<', '>', '"', '&'] ret = '' for c in txt: if c not in skip and ord(c) in codepoint2name: ret += "&" + codepoint2name.get(ord(c)) + ";" else: ret += c return ret
def get_info(cp): char = chr(cp) name = unicode_name(char, '') block = blockof(char) html_name = codepoint2name.get(cp) if html_name is not None: html = '&{};'.format(html_name) else: html = '&#{:x};'.format(cp) return Info(cp, char, html, block, name)
def unicode_escape(unistr): """ Tidys up unicode entities into HTML friendly entities Takes a unicode string as an argument Returns a unicode string """ escaped = "" for char in unistr: if ord(char) in codepoint2name: name = codepoint2name.get(ord(char)) escaped += '&%s;' % name if 'nbsp' not in name else ' ' else: escaped += char return escaped
def sanitize(self, text): """ Escapes all non-ASCII characters as well as the special characters <>&'" using html entities. If encode = True, the source code will be displayed as plain ASCII using the quaint encoding. """ text = super(HTMLFormat, self).sanitize(text) # honestly this is pretty terrible, this functionality has got # to be somewhere in a standard lib return "".join( (c if (ord(c) < 128 and c not in '&<>"') else "&%s;" % (html_entities.get(ord(c), None) or ("#%i" % ord(c)))) for c in text)
def encodeEntity(c): x=reverseentities.get(ord(c),None) if x is None: return c return '&%(x)s;'%vars()