Exemple #1
0
def html_escape(text):
    """
    Escape HTML characters / entities in @text.
    """
    # We start by replacing '&'
    text = text.replace("&", "&")

    if isinstance(text, unicode):
        # We use this to avoid UnicodeDecodeError in text.replace()
        convert = lambda x: x.decode("iso-8859-1")
    else:
        convert = lambda x: x

    # We don't want '&' in our dict, as it would mess up any previous
    # replace() we'd done
    entitydefs_inverted = ((convert(value), key) for key, value in entitydefs.iteritems() if value != "&")
    for key, value in entitydefs_inverted:
        text = text.replace(key, "&%s;" % value)
    return text
Exemple #2
0
def html_escape(text):
    """
    Escape HTML characters / entities in @text.
    """
    # We start by replacing '&'
    text = text.replace('&', '&')

    if isinstance(text, unicode):
        # We use this to avoid UnicodeDecodeError in text.replace()
        convert = lambda x: x.decode('iso-8859-1')
    else:
        convert = lambda x: x

    # We don't want '&' in our dict, as it would mess up any previous
    # replace() we'd done
    entitydefs_inverted = ((convert(value), key)
                           for key, value in entitydefs.iteritems()
                           if value != '&')
    for key, value in entitydefs_inverted:
        text = text.replace(key, '&%s;' % value)
    return text
Exemple #3
0
def cleanHTML(text, skipchars=[], extra_careful=True):
    '''This is an attempt to get rid of " ä " etc within a string
Still working on it ... any help appreicated.'''

    entitydefs_inverted = {}

    for k, v in entitydefs.iteritems():
        entitydefs_inverted[v] = k

    badchars_regex = re.compile('|'.join(entitydefs.values()))
    been_fixed_regex = re.compile('&\w+;|&#[0-9]+;')

    # if extra_careful we don't attempt to do anything to
    # the string if it might have been converted already.
    if extra_careful and been_fixed_regex.findall(text):
        return text

    if type(skipchars) == type('s'):
        skipchars = [skipchars]

    keyholder = {}
    for x in badchars_regex.findall(text):
        if x not in skipchars:
            keyholder[x] = 1
    text = text.replace('&', '&')
    text = text.replace('\x80', '€')
    for key in keyholder.keys():
        if key == '&':
            continue

        better = entitydefs_inverted[key]
        if not better.startswith('&#'):
            better = '&%s;' % entitydefs_inverted[each]

        text = text.replace(key, better)
    return text
Exemple #4
0
def cleanHTML(text, skipchars=[], extra_careful=True):
	'''This is an attempt to get rid of " ä " etc within a string
Still working on it ... any help appreicated.'''

	entitydefs_inverted = {}

	for k,v in entitydefs.iteritems():
		entitydefs_inverted[v] = k

	badchars_regex = re.compile('|'.join(entitydefs.values()))
	been_fixed_regex = re.compile('&\w+;|&#[0-9]+;')

	# if extra_careful we don't attempt to do anything to
	# the string if it might have been converted already.
	if extra_careful and been_fixed_regex.findall(text):
		return text

	if type(skipchars) == type('s'):
		skipchars = [skipchars]

	keyholder= {}
	for x in badchars_regex.findall(text):
		if x not in skipchars:
			keyholder[x] = 1
	text = text.replace('&','&')
	text = text.replace('\x80', '€')
	for key in keyholder.keys():
		if key == '&':
			continue

		better = entitydefs_inverted[key]
		if not better.startswith('&#'):
			better = '&%s;' % entitydefs_inverted[each]

		text = text.replace(key, better)
	return text
Exemple #5
0
               }

def getMimeFromExt(filepath):
  """Convenience function: equal to extenionMap.get(ext, extensionMap[None]).
  """
  if filepath.endswith('.htaccess'):
    return 'config/htaccess'
  ext = os.path.splitext(filepath)[1]
  return extensionMap.get(ext, extensionMap[None])

###### Escaping ######

import types
from htmlentitydefs import entitydefs

entityify = dict([c,e] for e,c in entitydefs.iteritems())

def escapeMarkup(data):
  """Escape markup characters (&, >, <). Copied from xml.sax.saxutils.
  """
  # must do ampersand first
  data = data.replace("&", "&amp;")
  data = data.replace(">", "&gt;")
  data = data.replace("<", "&lt;")
  return data

def escapeToNamedASCII(text):
  """Escapes to named entities where possible and numeric-escapes non-ASCII
  """
  return escapeToNamed(text).encode('ascii', 'xmlcharrefreplace')
Exemple #6
0
 (FAN_PIECES[WHITE][BISHOP],'j'),
 (FAN_PIECES[WHITE][ROOK],'l'),
 (FAN_PIECES[WHITE][QUEEN],'m'),
 (FAN_PIECES[WHITE][KING],'n'),
 ('†', '+'),
 ('‡', '+'),
 ('1/2', 'Z'))

def fanconv(fan):
    for f,r in lisPieces:
        fan = fan.replace(f,r)
    return fan

# Dictionaries and expressions for parsing diagrams
entitydefs = dict(("&%s;"%a,unichr(ord(b)).encode('utf-8'))
                  for a,b in entitydefs.iteritems() if len(b)==1)
def2entity = dict((b, a) for a,b in entitydefs.iteritems())
    

style = """
@font-face {font-family: "Chess Alpha 2"; src: local("Chess Alpha 2"),
url("http://pychess.org/fonts/ChessAlpha2.eot?") format("eot"),
url("http://pychess.org/fonts/ChessAlpha2.woff") format("woff"),
url("http://pychess.org/fonts/ChessAlpha2.ttf") format("truetype"),
url("http://pychess.org/fonts/ChessAlpha2.svg#ChessAlpha2") format("svg"); font-weight:"normal"; font-style:"normal";}
table.pychess {display:inline-block; vertical-align:top}
table.pychess td {margin:0; padding:0; font-size:10pt; font-family:"Chess Alpha 2"; padding-left:.5em}
table.pychess td.numa {width:0; text-align:right}
table.pychess td.numa {width:0; text-align:right; padding-left:1em}
table.pychess td.status {text-align:center; font-size:12pt; padding-right:2em}
table.pychess pre {margin:0; padding:0; font-family:"Chess Alpha 2"; font-size:16pt; text-align:center; line-height:1}"""
__author__ = 'Thomas Perl <*****@*****.**>'
__version__ = '0.6.5'
__website__ = 'http://gpodder.org/podcastparser/'
__license__ = 'ISC License'

from xml import sax

import re
import os
import time

try:
    # Python 2
    from htmlentitydefs import entitydefs
    entitydefs = dict((key, value.decode('latin-1'))
                      for key, value in entitydefs.iteritems())
    chr = unichr
except ImportError:
    # Python 3
    from html.entities import entitydefs

try:
    # Python 2
    import urlparse
except ImportError:
    # Python 3
    from urllib import parse as urlparse

try:
    # Python 2
    from rfc822 import parsedate_tz
# Will be parsed by setup.py to determine package metadata
__author__ = 'Thomas Perl <*****@*****.**>'
__version__ = '0.6.2'
__website__ = 'http://gpodder.org/podcastparser/'
__license__ = 'ISC License'

from xml import sax

import re
import os
import time

try:
    # Python 2
    from htmlentitydefs import entitydefs
    entitydefs = dict((key, value.decode('latin-1')) for key, value in entitydefs.iteritems())
    chr = unichr
except ImportError:
    # Python 3
    from html.entities import entitydefs

try:
    # Python 2
    import urlparse
except ImportError:
    # Python 3
    from urllib import parse as urlparse

try:
    # Python 2
    from rfc822 import parsedate_tz
Exemple #9
0
def entity2unicode(text):
    for (entity, iso) in entitydefs.iteritems():
        text = text.replace('&%s;' % entity, iso.decode('iso-8859-1'))
    text = text.replace('&#13;', ' ')
    return text
# Will be parsed by setup.py to determine package metadata
__author__ = 'Thomas Perl <*****@*****.**>'
__version__ = '0.6.0'
__website__ = 'http://gpodder.org/podcastparser/'
__license__ = 'ISC License'

from xml import sax

import re
import os
import time

try:
    # Python 2
    from htmlentitydefs import entitydefs
    entitydefs = dict((key, value.decode('latin-1')) for key, value in entitydefs.iteritems())
    chr = unichr
except ImportError:
    # Python 3
    from html.entities import entitydefs

try:
    # Python 2
    import urlparse
except ImportError:
    # Python 3
    from urllib import parse as urlparse

try:
    # Python 2
    from rfc822 import mktime_tz, parsedate_tz
def escaping_all_text(content):
    for html_code, utf8_value in entitydefs.iteritems():
        content = content.replace("&%s;" % html_code,
                                  utf8_value.decode('latin-1'))
    return content
def escaping_all_text(content):
    for html_code, utf8_value in entitydefs.iteritems():
        content = content.replace("&%s;" % html_code, utf8_value.decode("latin-1"))
    return content
from xml.sax.saxutils import escape, unescape, quoteattr
from htmlentitydefs import entitydefs
import base64
import urllib

def xmlquote(s):
    'escapes &, <, >, and " with entity references'
    return escape(s, {'"' : '&quot;'})

htmlentitymap=dict([(y, '&%s;' % x) for x, y \
                    in entitydefs.iteritems() if y!='&'])

def htmlquote(s):
    'escapes everything possible into htmlentities'
    return escape(s, htmlentitymap)

def _latinquotechar(c):
    o=ord(c)
    if o>=160:
        return '&#%03d;' % o
    return c

def latinquote(s):
    """replaces all characters >=160 with character entities."""
    return ''.join(map(_latinquotechar, s))

def fullquote(s):
    """Escapes all characters to %XX format."""
    return (''.join(['%%%02x' % ord(x) for x in s])).upper()

Exemple #14
0
               }

def getMimeFromExt(filepath):
  """Convenience function: equal to extenionMap.get(ext, extensionMap[None]).
  """
  if filepath.endswith('.htaccess'):
    return 'config/htaccess'
  ext = os.path.splitext(filepath)[1]
  return extensionMap.get(ext, extensionMap[None])

###### Escaping ######

import types
from htmlentitydefs import entitydefs

entityify = dict([c,e] for e,c in entitydefs.iteritems())

def escapeMarkup(data):
  """Escape markup characters (&, >, <). Copied from xml.sax.saxutils.
  """
  # must do ampersand first
  data = data.replace("&", "&amp;")
  data = data.replace(">", "&gt;")
  data = data.replace("<", "&lt;")
  return data

def escapeToNamedASCII(text):
  """Escapes to named entities where possible and numeric-escapes non-ASCII
  """
  return escapeToNamed(text).encode('ascii', 'xmlcharrefreplace')