def html_escape(text): """ Escape HTML characters / entities in @text. """ # We start by replacing '&' text = text.replace("&", "&") if isinstance(text, unicode): # We use this to avoid UnicodeDecodeError in text.replace() convert = lambda x: x.decode("iso-8859-1") else: convert = lambda x: x # We don't want '&' in our dict, as it would mess up any previous # replace() we'd done entitydefs_inverted = ((convert(value), key) for key, value in entitydefs.iteritems() if value != "&") for key, value in entitydefs_inverted: text = text.replace(key, "&%s;" % value) return text
def html_escape(text): """ Escape HTML characters / entities in @text. """ # We start by replacing '&' text = text.replace('&', '&') if isinstance(text, unicode): # We use this to avoid UnicodeDecodeError in text.replace() convert = lambda x: x.decode('iso-8859-1') else: convert = lambda x: x # We don't want '&' in our dict, as it would mess up any previous # replace() we'd done entitydefs_inverted = ((convert(value), key) for key, value in entitydefs.iteritems() if value != '&') for key, value in entitydefs_inverted: text = text.replace(key, '&%s;' % value) return text
def cleanHTML(text, skipchars=[], extra_careful=True): '''This is an attempt to get rid of " ä " etc within a string Still working on it ... any help appreicated.''' entitydefs_inverted = {} for k, v in entitydefs.iteritems(): entitydefs_inverted[v] = k badchars_regex = re.compile('|'.join(entitydefs.values())) been_fixed_regex = re.compile('&\w+;|&#[0-9]+;') # if extra_careful we don't attempt to do anything to # the string if it might have been converted already. if extra_careful and been_fixed_regex.findall(text): return text if type(skipchars) == type('s'): skipchars = [skipchars] keyholder = {} for x in badchars_regex.findall(text): if x not in skipchars: keyholder[x] = 1 text = text.replace('&', '&') text = text.replace('\x80', '€') for key in keyholder.keys(): if key == '&': continue better = entitydefs_inverted[key] if not better.startswith('&#'): better = '&%s;' % entitydefs_inverted[each] text = text.replace(key, better) return text
def cleanHTML(text, skipchars=[], extra_careful=True): '''This is an attempt to get rid of " ä " etc within a string Still working on it ... any help appreicated.''' entitydefs_inverted = {} for k,v in entitydefs.iteritems(): entitydefs_inverted[v] = k badchars_regex = re.compile('|'.join(entitydefs.values())) been_fixed_regex = re.compile('&\w+;|&#[0-9]+;') # if extra_careful we don't attempt to do anything to # the string if it might have been converted already. if extra_careful and been_fixed_regex.findall(text): return text if type(skipchars) == type('s'): skipchars = [skipchars] keyholder= {} for x in badchars_regex.findall(text): if x not in skipchars: keyholder[x] = 1 text = text.replace('&','&') text = text.replace('\x80', '€') for key in keyholder.keys(): if key == '&': continue better = entitydefs_inverted[key] if not better.startswith('&#'): better = '&%s;' % entitydefs_inverted[each] text = text.replace(key, better) return text
} def getMimeFromExt(filepath): """Convenience function: equal to extenionMap.get(ext, extensionMap[None]). """ if filepath.endswith('.htaccess'): return 'config/htaccess' ext = os.path.splitext(filepath)[1] return extensionMap.get(ext, extensionMap[None]) ###### Escaping ###### import types from htmlentitydefs import entitydefs entityify = dict([c,e] for e,c in entitydefs.iteritems()) def escapeMarkup(data): """Escape markup characters (&, >, <). Copied from xml.sax.saxutils. """ # must do ampersand first data = data.replace("&", "&") data = data.replace(">", ">") data = data.replace("<", "<") return data def escapeToNamedASCII(text): """Escapes to named entities where possible and numeric-escapes non-ASCII """ return escapeToNamed(text).encode('ascii', 'xmlcharrefreplace')
(FAN_PIECES[WHITE][BISHOP],'j'), (FAN_PIECES[WHITE][ROOK],'l'), (FAN_PIECES[WHITE][QUEEN],'m'), (FAN_PIECES[WHITE][KING],'n'), ('†', '+'), ('‡', '+'), ('1/2', 'Z')) def fanconv(fan): for f,r in lisPieces: fan = fan.replace(f,r) return fan # Dictionaries and expressions for parsing diagrams entitydefs = dict(("&%s;"%a,unichr(ord(b)).encode('utf-8')) for a,b in entitydefs.iteritems() if len(b)==1) def2entity = dict((b, a) for a,b in entitydefs.iteritems()) style = """ @font-face {font-family: "Chess Alpha 2"; src: local("Chess Alpha 2"), url("http://pychess.org/fonts/ChessAlpha2.eot?") format("eot"), url("http://pychess.org/fonts/ChessAlpha2.woff") format("woff"), url("http://pychess.org/fonts/ChessAlpha2.ttf") format("truetype"), url("http://pychess.org/fonts/ChessAlpha2.svg#ChessAlpha2") format("svg"); font-weight:"normal"; font-style:"normal";} table.pychess {display:inline-block; vertical-align:top} table.pychess td {margin:0; padding:0; font-size:10pt; font-family:"Chess Alpha 2"; padding-left:.5em} table.pychess td.numa {width:0; text-align:right} table.pychess td.numa {width:0; text-align:right; padding-left:1em} table.pychess td.status {text-align:center; font-size:12pt; padding-right:2em} table.pychess pre {margin:0; padding:0; font-family:"Chess Alpha 2"; font-size:16pt; text-align:center; line-height:1}"""
__author__ = 'Thomas Perl <*****@*****.**>' __version__ = '0.6.5' __website__ = 'http://gpodder.org/podcastparser/' __license__ = 'ISC License' from xml import sax import re import os import time try: # Python 2 from htmlentitydefs import entitydefs entitydefs = dict((key, value.decode('latin-1')) for key, value in entitydefs.iteritems()) chr = unichr except ImportError: # Python 3 from html.entities import entitydefs try: # Python 2 import urlparse except ImportError: # Python 3 from urllib import parse as urlparse try: # Python 2 from rfc822 import parsedate_tz
# Will be parsed by setup.py to determine package metadata __author__ = 'Thomas Perl <*****@*****.**>' __version__ = '0.6.2' __website__ = 'http://gpodder.org/podcastparser/' __license__ = 'ISC License' from xml import sax import re import os import time try: # Python 2 from htmlentitydefs import entitydefs entitydefs = dict((key, value.decode('latin-1')) for key, value in entitydefs.iteritems()) chr = unichr except ImportError: # Python 3 from html.entities import entitydefs try: # Python 2 import urlparse except ImportError: # Python 3 from urllib import parse as urlparse try: # Python 2 from rfc822 import parsedate_tz
def entity2unicode(text): for (entity, iso) in entitydefs.iteritems(): text = text.replace('&%s;' % entity, iso.decode('iso-8859-1')) text = text.replace(' ', ' ') return text
# Will be parsed by setup.py to determine package metadata __author__ = 'Thomas Perl <*****@*****.**>' __version__ = '0.6.0' __website__ = 'http://gpodder.org/podcastparser/' __license__ = 'ISC License' from xml import sax import re import os import time try: # Python 2 from htmlentitydefs import entitydefs entitydefs = dict((key, value.decode('latin-1')) for key, value in entitydefs.iteritems()) chr = unichr except ImportError: # Python 3 from html.entities import entitydefs try: # Python 2 import urlparse except ImportError: # Python 3 from urllib import parse as urlparse try: # Python 2 from rfc822 import mktime_tz, parsedate_tz
def escaping_all_text(content): for html_code, utf8_value in entitydefs.iteritems(): content = content.replace("&%s;" % html_code, utf8_value.decode('latin-1')) return content
def escaping_all_text(content): for html_code, utf8_value in entitydefs.iteritems(): content = content.replace("&%s;" % html_code, utf8_value.decode("latin-1")) return content
from xml.sax.saxutils import escape, unescape, quoteattr from htmlentitydefs import entitydefs import base64 import urllib def xmlquote(s): 'escapes &, <, >, and " with entity references' return escape(s, {'"' : '"'}) htmlentitymap=dict([(y, '&%s;' % x) for x, y \ in entitydefs.iteritems() if y!='&']) def htmlquote(s): 'escapes everything possible into htmlentities' return escape(s, htmlentitymap) def _latinquotechar(c): o=ord(c) if o>=160: return '&#%03d;' % o return c def latinquote(s): """replaces all characters >=160 with character entities.""" return ''.join(map(_latinquotechar, s)) def fullquote(s): """Escapes all characters to %XX format.""" return (''.join(['%%%02x' % ord(x) for x in s])).upper()