Python items Beispiele, htmlentitydefs.entitydefs.items Python Beispiele

Beispiel #1

0

Datei anzeigen

Datei: transforms.py Projekt: Vinsurya/Plone

    def __call__(self):
        text = self.orig
        if text is None:
            text = ''
        if not isinstance(text, unicode):
            text = unicode(text, 'utf-8', 'replace')

        # Do &amp; separately, else, it may replace an already-inserted & from
        # an entity with &amp;, so < becomes &lt; becomes &amp;lt;
        text = text.replace('&', '&amp;')
        # Make funny characters into html entity defs
        for entity, letter in entitydefs.items():
            if entity != 'amp':
                text = text.replace(
                    letter.decode('latin-1'), '&' + entity + ';')

        text = self.urlRegexp.subn(self.replaceURL, text)[0]
        text = self.emailRegexp.subn(self.replaceEmail, text)[0]
        text = self.indentRegexp.subn(self.indentWhitespace, text)[0]

        # convert windows line endings
        text = text.replace('\r\n', '\n')
        # Finally, make \n's into br's
        text = text.replace('\n', '<br />')

        text = text.encode('utf-8')

        return text

Beispiel #2

0

Datei anzeigen

Datei: transforms.py Projekt: kkdhanesh/NBADEMO

    def __call__(self):
        text = self.orig
        if text is None:
            text = ''
        if not isinstance(text, unicode):
            text = unicode(text, 'utf-8', 'replace')

        # Do &amp; separately, else, it may replace an already-inserted & from
        # an entity with &amp;, so < becomes &lt; becomes &amp;lt;
        text = text.replace('&', '&amp;')
        # Make funny characters into html entity defs
        for entity, letter in entitydefs.items():
            if entity != 'amp':
                text = text.replace(letter.decode('latin-1'),
                                    '&' + entity + ';')

        text = self.urlRegexp.subn(self.replaceURL, text)[0]
        text = self.emailRegexp.subn(self.replaceEmail, text)[0]
        text = self.indentRegexp.subn(self.indentWhitespace, text)[0]

        # convert windows line endings
        text = text.replace('\r\n', '\n')
        # Finally, make \n's into br's
        text = text.replace('\n', '<br />')

        text = text.encode('utf-8')

        return text

Beispiel #3

0

Datei anzeigen

Datei: academic_calendar.py Projekt: UCF/Academic-Calendar-Parser

        def _parse_dates(self):
            '''

                Also, strptime returns datetime objects when 
                we really want date objects.

                Possible formats:
                * Friday, July 1, 2011 > Single
                * October 10 - 21, 2011 > Duration - Same Month
                * June 27 - August 26, 2011 > Duration - Different Month
                * October 17, 2011 - January 13, 2012 
                    >  Duration - Different month and year
                * March 21, 2011 - January 8, 2012 
                    and October 24, 2011 - January 8, 2012
                    > Multiple durations
            '''
            
            # Clean Up
            ## Replace any HTML entities and codes with a <space>
            for entity,code in entitydefs.items():
                self._when_text = self._when_text.replace('&'+entity+';', ' ')
                self._when_text = self._when_text.replace(code, ' ')
            ## Force to ascii
            self._when_text = self._when_text.encode('ascii', 'replace')
            ## Replace \xFFFD with <space>
            self._when_text = self._when_text.replace(u'\xFFFD', ' ')
            ## Normalize spacing
            self._when_text = re.sub('\s+', ' ', self._when_text)

            for duration in self._when_text.split('and'):
                date_split = duration.strip().split('-')

                start_date = None
                start_text = date_split[0].strip()
                end_date   = None
                
                if len(date_split) == 1:
                    try:
                        start_date = datetime.strptime(start_text, 
                            UCFAcademicCalendar._Event._DF_SINGLE)
                    except ValueError, e:
                        log.error('Unable to parse single date: ' + start_text)
                        raise UCFAcademicCalendar._Event.ParsingError()
                elif len(date_split) == 2:
                    end_text   = date_split[1].strip()
                    same_month = False

                    try: # Month Day Year
                        end_date = datetime.strptime(end_text,
                            UCFAcademicCalendar._Event._DF_MONTH_DAY_YEAR)
                    except ValueError, e:
                        try: # Day, Year
                            end_date = datetime.strptime(end_text,\
                                UCFAcademicCalendar._Event._DF_DAY_YEAR)
                            same_month = True
                        except ValueError, e:
                            log.error('Unable to parse start date: ' + end_text)
                            raise UCFAcademicCalendar._Event.ParsingFailure()

Beispiel #4

0

Datei anzeigen

Datei: gmailreader.py Projekt: gitGNU/gnu_gmailreader

    def __entitytoletter(self, s):
        newdefs = []
        for k, v in entitydefs.items():
            newdefs.append(('&'+k+';', v))

        for k, v in newdefs:
            s = s.replace(k, v)

        return s

Beispiel #5

0

Datei anzeigen

Datei: converter.py Projekt: tecnologiaenegocios/tn.plonemailing

    def _expand_entities(self, body):
        body = body.replace('&nbsp;', ' ')
        for entity, letter in entitydefs.items():
            # Let plone.intelligenttext handle &lt; and &gt;, or else we may be
            # creating what looks like tags.
            if entity != 'lt' and entity != 'gt':
                body = body.replace('&' + entity + ';',
                                    letter.decode('latin-1'))

        return body

Beispiel #6

0

Datei anzeigen

Datei: tex_to_html.py Projekt: tutor-web/tutorweb.content

    def convert(self, orig, data, **kwargs):
        # "encoding" is both the encoding of orig, and the expected encoding of
        # the data in data.
        if kwargs['encoding'] not in ['utf-8', 'utf_8', 'U8', 'UTF', 'utf8']:
            raise ValueError('Only support unicode, not %s' % kwargs['encoding'])

        if os.path.isfile(TTM_BINARY):
            p = subprocess.Popen(
                [
                    TTM_BINARY,
                    '-a',   # Try to convert picture elements
                    '-e3',  # inline epsfbox w/no icon
                    '-r',   # Don't output a pre/postamble
                    '-u2',   # Unicode please
                ],
                stdin=subprocess.PIPE,
                stdout=subprocess.PIPE,
                stderr=subprocess.PIPE,
            )
            (out, err) = p.communicate(input=(LATEX_PREAMBLE + orig + LATEX_POSTAMBLE))
            if '****' in err:
                # Probably an error, show it.
                data.setData('<pre class="ttm-output error">%s</pre>\n<div class="ttm-output">%s</div>' % (
                    cgi.escape(err.strip()),
                    out.strip(),
                ))
            else:
                data.setData('<div class="ttm-output">%s</div>' % (
                    out.strip(),
                ))
        else:
            out = convertWebIntelligentPlainTextToHtml(orig.decode(kwargs['encoding'])).decode('utf8')
            # Bodge back entities, to save space
            from htmlentitydefs import entitydefs
            for entity, letter in entitydefs.items():
                if entity not in ('amp', 'lt', 'gt',):
                    out = out.replace('&' + entity + ';', letter.decode('latin-1'))
            data.setData('<div class="parse-as-tex">%s</div>' % out.encode(kwargs['encoding']))
        return data

Beispiel #7

0

Datei anzeigen

Datei: __init__.py Projekt: pierre1313/c-t-TV.bundle

def cleanHTML(text, skipchars=[], extra_careful=1):
# This is an attempt to get rid of " &auml; " etc within a string
# Still working on it ... any help appreicated.

	entitydefs_inverted = {}

	for k,v in entitydefs.items():
		entitydefs_inverted[v] = k

	_badchars_regex = re.compile('|'.join(entitydefs.values()))
	_been_fixed_regex = re.compile('&\w+;|&#[0-9]+;')

	# if extra_careful we don't attempt to do anything to
	# the string if it might have been converted already.
	if extra_careful and _been_fixed_regex.findall(text):
		return text

	if type(skipchars) == type('s'):
		skipchars = [skipchars]

	keyholder= {}
	for x in _badchars_regex.findall(text):
		if x not in skipchars:
			keyholder[x] = 1
	text = text.replace('&','&amp;')
	text = text.replace('\x80', '&#8364;')
	for each in keyholder.keys():
		if each == '&':
			continue

		better = entitydefs_inverted[each]
		if not better.startswith('&#'):
			better = '&%s;'%entitydefs_inverted[each]

		text = text.replace(each, better)
	return text

Beispiel #8

0

Datei anzeigen

Datei: crawlerx.py Projekt: ttaylordev/z

match_movie_url = re.compile(r""".*\.(mpg|mpeg|avi)$""", re.I).match

match_movie = re.compile("^(\x00\x00\x01\xba|RIFF....AVI LIST)").match

match_enum_url = re.compile("(.*?)(\d+)(\.\w{3,4})?$").match
	
enum_blacklisted = re.compile("^http://[^\.]*.yimg.com").match

	

# matches html entities
from htmlentitydefs import entitydefs

entities = []
for (e, s) in entitydefs.items():
	if len(s) == 1: entities.append(e)	
	
entity_sub = re.compile("&(%s);" % '|'.join(entities)).sub
get_match_entitydef = lambda match: entitydefs[match.group(1)]

#unescapes html entities
def unentify(s):
    return entity_sub(get_match_entitydef, s)
    
import Queue
	
class MultiQueue(Queue.Queue):

    def _init(self, (maxq, maxsize)):
        self.maxsize = maxsize

Beispiel #9

0

Datei anzeigen

Datei: utils.py Projekt: EvaSDK/python-sjutils

        if size < 1024.0:
            import math

            if math.floor(size) == size:
                return "%d %s" % (int(size), final_unit)
            else:
                return "%3.1f %s" % (size, final_unit)

        if unit != "Yotta":
            size /= 1024.0

    return "%3.1f %s" % (size, final_unit)


entitydefs_inverted = {}
for k, v in entitydefs.items():
    entitydefs_inverted[v] = k

_badchars_regex = re.compile("|".join(entitydefs.values()))
_been_fixed_regex = re.compile("&\w+;|&#[0-9]+;")


def html_entity_fixer(text, skipchars=[], extra_careful=1):
    # if extra_careful we don't attempt to do anything to
    # the string if it might have been converted already.
    if extra_careful and _been_fixed_regex.findall(text):
        return text

    if type(skipchars) == type("s"):
        skipchars = [skipchars]

Beispiel #10

0

Datei anzeigen

from math import floor
from htmlentitydefs import entitydefs
import warnings, pytz
import logging

# import line used by textify
import formatter, htmllib, StringIO

try:
    import markdown
    markdown_converter = markdown.Markdown(safe_mode="escape")
except ImportError:
    markdown_converter = None

entitydefs_inverted = {}
for k, v in entitydefs.items():
    entitydefs_inverted[v] = k

# zope
from Products.PythonScripts.standard import html_quote, newline_to_br, \
         url_quote, url_quote_plus

try:
    # >= Zope 2.12
    from zope.structuredtext import stx2html
except ImportError:
    # < Zope 2.12
    from StructuredText.StructuredText import HTML as stx2html


def structured_text(txt):

Beispiel #11

0

Datei anzeigen

Datei: PCrawler.py Projekt: FergalJPC/PythonProjects

referlink = re.compile(r"""(http|ftp)://(.*?)(http|ftp)://""", re.I).match

match_movie_url = re.compile(r""".*\.(mpg|mpeg|avi)$""", re.I).match

match_movie = re.compile("^(\x00\x00\x01\xba|RIFF....AVI LIST)").match

match_enum_url = re.compile("(.*?)(\d+)(\.\w{3,4})?$").match

enum_blacklisted = re.compile("^http://[^\.]*.yimg.com").match

# matches html entities
from htmlentitydefs import entitydefs

entities = []
for (e, s) in entitydefs.items():
    if len(s) == 1: entities.append(e)

entity_sub = re.compile("&(%s);" % '|'.join(entities)).sub
get_match_entitydef = lambda match: entitydefs[match.group(1)]


#unescapes html entities
def unentify(s):
    return entity_sub(get_match_entitydef, s)


import Queue


class MultiQueue(Queue.Queue):

Beispiel #12

0

Datei anzeigen

Datei: jmlr.py Projekt: Heroku-elasa/heroku-buildpack-python-ieee-new

def decodeentities(string):
    for (htmlent, ch) in entitydefs.items():
	string = string.replace('&'+htmlent+';', ch)
    return string

Beispiel #13

0

Datei anzeigen

entitydefs['nbsp'] = ' '

sgmlentity = {
    'lt': '<',
    'gt': '>',
    'amp': '&',
    'quot': '"',
    'apos': '\'',
    'ndash': '-'
}
sgmlentityget = sgmlentity.get
_sgmlentkeys = sgmlentity.keys()

entcharrefs = {}
entcharrefsget = entcharrefs.get
for _k, _v in entitydefs.items():
    if _k in _sgmlentkeys: continue
    if _v[0:2] == '&#':
        dec_code = _v[1:-1]
        _v = unichr(int(_v[2:-1]))
        entcharrefs[dec_code] = _v
    else:
        dec_code = '#' + str(ord(_v))
        _v = unicode(_v, 'latin_1', 'replace')
        entcharrefs[dec_code] = _v
    entcharrefs[_k] = _v
del _sgmlentkeys, _k, _v
entcharrefs['#160'] = u' '
entcharrefs['#xA0'] = u' '
entcharrefs['#xa0'] = u' '
entcharrefs['#XA0'] = u' '

Beispiel #14

0

Datei anzeigen

                         ("gacute", 501), ("Hacek", 711), ("Breve", 728),
                         ("DiacriticalDot", 729), ("ring", 730), ("ogon", 731),
                         ("DiacriticalTilde", 732),
                         ("DiacriticalDoubleAcute", 733), ("DownBreve", 785),
                         ("UnderBar", 818), ("varepsilon", 949),
                         ("varsigma", 962), ("varphi", 966), ("vartheta", 977),
                         ("Upsi", 978), ("straightphi", 981), ("varpi", 982),
                         ("Gammad", 988), ("digamma", 989), ("varkappa", 1008),
                         ("varrho", 1009), ("straightepsilon", 1013),
                         ("backepsilon", 1014)]

from htmlentitydefs import entitydefs

entitydefs['apos'] = "'"  # only XML entity that isn't an HTML entity as well
List_HTML_Entities = []
for name, value in entitydefs.items():
    List_HTML_Entities.append((name, value[2:-1] or str(ord(value))))
for (name, value) in List_MathML2_Entities:
    assert name not in entitydefs
    List_HTML_Entities.append((name, str(value)))

########## CSS properties ##########

List_CSS_Props = "color display font font-family font-size font-style font-weight list-style margin margin-bottom margin-left margin-right margin-top max-width opacity padding padding-bottom padding-left padding-right padding-top page-break-after page-break-before text-align text-decoration text-indent text-underline white-space word-wrap"

########## CSS colors ##########

# array of name/value for css colors, value is what goes inside MKRGB()
# based on https://developer.mozilla.org/en/CSS/color_value
# TODO: add more colors
List_CSS_Colors = [

Beispiel #15

0

Datei anzeigen

Datei: plainhtml.py Projekt: loveinlastnight/blog

def convertWebIntelligentPlainTextToHtml(orig, tab_width=4):
    """Converts text/x-web-intelligent to text/html
    """
    # very long urls are abbreviated to allow nicer layout
    def abbreviateUrl(url, max = 60,  ellipsis = "[&hellip;]"):
        if len(url) < max:
            return url
        protocolend = url.find("//")
        if protocolend == -1:
            protocol = ""
        else:
            protocol = url[0 : protocolend+2]
            url = url[protocolend+2 : ]
        list = url.split("/")
        if len(list) < 3 or len(list[0])+len(list[-1] )>max:
            url = protocol + url
            center = (max-5)/2
            return url[:center] + ellipsis + url[-center:]
        
        return protocol + list[0] +"/" +ellipsis + "/" + list[-1]

    urlRegexp = re.compile(r'((?:ftp|https?)://(localhost|([12]?[0-9]{1,2}.){3}([12]?[0-9]{1,2})|(?:[a-z0-9](?:[-a-z0-9]*[a-z0-9])?\.)+(?:com|edu|biz|org|gov|int|info|mil|net|name|museum|coop|aero|[a-z][a-z]))\b(?::\d+)?(?:\/[^"\'<>()\[\]{}\s\x7f-\xff]*(?:[.,?]+[^"\'<>()\[\]{}\s\x7f-\xff]+)*)?)', re.I|re.S|re.U)
    emailRegexp = re.compile(r'["=]?(\b[A-Z0-9._%-]+@[A-Z0-9._%-]+\.[A-Z]{2,4}\b)', re.I|re.S|re.U)
    indentRegexp = re.compile(r'^(\s+)', re.M|re.U)
    
    text = orig
    if text is None:
        text = ''
    if not isinstance(text, unicode):
        text = unicode(text, 'utf-8', 'replace')
    
    # Do &amp; separately, else, it may replace an already-inserted & from
    # an entity with &amp;, so < becomes &lt; becomes &amp;lt;
    text = text.replace('&', '&amp;')
    # Make funny characters into html entity defs
    for entity, letter in entitydefs.items():
        if entity != 'amp':
            text = text.replace(letter.decode('latin-1'), '&' + entity + ';')
        
    # Replace hyperlinks with clickable <a> tags
    def replaceURL(match):
        url = match.groups()[0]
        linktext = abbreviateUrl(url)
        # In urls we need the revert our earlier change to the ampersands.
        # We do not want something like:
        # http://google.com/ask?question=everything&amp;answer=42
        url = url.replace('&amp;', '&')
        # Also with <some link> we should only link to some link, not
        # including the brackets.
        end = ''
        # XXX Probably better to fix the regex above.  Maurits
        if url.endswith('&gt;'):
            url = url[:-len('&gt;')]
            linktext = linktext[:-len('&gt;')]
            end = '&gt;'

        # rel="nofollow" shall avoid spamming
        return '<a href="%s" rel="nofollow">%s</a>%s' % (url, linktext, end)
    text = urlRegexp.subn(replaceURL, text)[0]
    
    # Replace email strings with mailto: links
    def replaceEmail(match):
        url = match.groups()[0]
        # following unicode substitutions shall avoid email spam crawlers to pickup email addresses
        url = url.replace('@', '&#0064;');
        return '<a href="&#0109;ailto&#0058;%s">%s</a>' % (url, url)
    text = emailRegexp.subn(replaceEmail, text)[0]

    # Make leading whitespace on a line into &nbsp; to preserve indents
    def indentWhitespace(match):
        indent = match.groups()[0]
        indent = indent.replace(' ', '&nbsp;')
        return indent.replace('\t', '&nbsp;' * tab_width)
    text = indentRegexp.subn(indentWhitespace, text)[0]
    
    # Finally, make \n's into br's
    text = text.replace('\n', '<br />')

    text = text.encode('utf-8')
    
    return text

Beispiel #16

0

Datei anzeigen

Datei: chmviewkit.py Projekt: muayyad-alsadi/chmviewkit

def _build_entiries_re():
    p=[]
    for k,v in entitydefs.items():
        if v.startswith('&'): continue
        p.append(re.escape(k))
    return re.compile("&(%s);" % "|".join(p), re.I)

Beispiel #17

0

Datei anzeigen

Datei: util.py Projekt: MinasAbrahamyan/bobomail

			return string.split(pwd.getpwnam(user.id)[4], ",")[0]
		except: pass
	return ""


def urlquote(s): return urllib.quote_plus(s, "")
def urlunquote(s): return urllib.unquote_plus(s)

def quotedtext(text):
	def quotedline(line):
		return "> %s" % line
	return string.join(map(quotedline, string.split(text, "\n")), "\n")


replace_char = {"\n": "<br>\n", "\t": "&nbsp;" * 4}
for key, value in entitydefs.items():
    replace_char[value] = "&%s;" % key


def escape(s, spaces=0):
	nbsp = entitydefs["nbsp"]
	last = ""
	new = StringIO()
	for char in s:
		if spaces and char == " " and last in [" ", nbsp]:
			char = nbsp
		new.write(replace_char.get(char, char))
		last = char
	newstr = new.getvalue()
	if spaces and newstr and newstr[0] == " ":
	    return "&nbsp;%s" % newstr[1:]

Beispiel #18

0

Datei anzeigen

Datei: plainhtml.py Projekt: loveinlastnight/blog

def convertHtmlToWebIntelligentPlainText(orig):
    """Converts text/html to text/x-web-intelligent.
    """
    preRegex = re.compile(r'<\s*pre[^>]*>(.*?)<\s*/pre\s*>', re.I | re.S)
    
    tagWhitespaceRegex = re.compile(r'\s+((<[^>]+>)\s+)+')
    whitespaceRegex = re.compile(r'\s+')
    
    tdRegex = re.compile(r'<\s*(td)([^>])*>', re.I)
    breakRegex = re.compile(r'<\s*(br)\s*/?>', re.I)
    startBlockRegex = re.compile(r'<\s*(dt)[^>]*>', re.I)
    endBlockRegex = re.compile(r'<\s*/\s*(p|div|tr|ul|ol|dl)[^>]*>', re.I)
    indentBlockRegex = re.compile(r'<\s*(blockquote|dd)[^>]*>', re.I)
    listBlockRegex = re.compile(r'<\s*(li)[^>]*>', re.I)

    tagRegex = re.compile(r'<[^>]+>', re.I | re.M)

    # Save all <pre> sections and restore after other transforms
    preSections = {}
    def savePres(match):
        marker = '__pre_marker__%d__' % (len(preSections),)
        preSections[marker] = match.group(1)
        return marker
    if orig is None:
        orig = ''
    text = preRegex.sub(savePres, orig)

    # Make whitespace-tag-whitespace into whitespace-tag. Repeat this 
    # in case there are directly nested tags
    def fixTagWhitespace(match):
        # Remove any superfluous whitespace, but preserve one leading space
        return ' ' + whitespaceRegex.sub('', match.group(0))
    text = tagWhitespaceRegex.sub(fixTagWhitespace, text)

    # Make all whitespace into a single space
    text = whitespaceRegex.sub(' ', text)

    # Fix entities
    text = text.replace('&nbsp;', ' ')
    for entity, letter in entitydefs.items():
        # Do &lt; and &gt; later, else we may be creating what looks like 
        # tags
        if entity != 'lt' and entity != 'gt':
            text = text.replace('&' + entity + ';', letter)

    # XXX: Remove <head>, <script>, <style> ?

    # Make tabs out of td's
    text = tdRegex.sub('\t', text)

    # Make br's and li's into newlines
    text = breakRegex.sub('\n', text)

    # Make the start of list blocks into paragraphs
    text = startBlockRegex.sub('\n\n', text)

    # Make the close of p's, div's and tr's into paragraphs
    text = endBlockRegex.sub('\n\n', text)

    # Make blockquotes and dd blocks indented
    text = indentBlockRegex.sub('\n\n  ', text)

    # Make list items indented and prefixed with -
    text = listBlockRegex.sub('\n\n  - ', text)

    # Remove other tags
    text = tagRegex.sub('', text)

    # Fix < and > entities
    text = text.replace('&lt;', '<')
    text = text.replace('&gt;', '>')

    # Restore pres
    for marker, section in preSections.items():
        text = text.replace(marker, '\n\n' + section + '\n\n')
    
    return text

Beispiel #19

0

Datei anzeigen

Datei: py2html.py Projekt: bgnori/tonic

#!/usrbin/env python
# -*- coding: us-ascii -*-
# vim: syntax=python
#
# 2-clause BSD license
# Copyright 2009 Noriyuki Hosaka [email protected]
#

import sys
import os.path
from htmlentitydefs import entitydefs

from tonic.lineparser import LineParser

_d = dict([(value, '&'+key+';') for key, value in entitydefs.items()])

def escape(s):
  return ''.join([_d.get(c, c) for c in s])

def resource(name):
  if not name.startswith(os.path.sep):
    name = os.path.join(os.path.split(
                      os.path.abspath(__file__))[0], name)
  f = file(name, 'rb')
  try:
    return f.read(-1)
  finally:
    f.close()

class Formatter(LineParser):
  css_src = resource('python.css')

Beispiel #20

0

Datei anzeigen

Datei: utils.py Projekt: Belgar/CouchPotato

                        lastKey=lastKey)


# Handle HTML/XML/SGML entities.
from htmlentitydefs import entitydefs
entitydefs = entitydefs.copy()
entitydefsget = entitydefs.get
entitydefs['nbsp'] = ' '

sgmlentity = {'lt': '<', 'gt': '>', 'amp': '&', 'quot': '"', 'apos': '\''}
sgmlentityget = sgmlentity.get
_sgmlentkeys = sgmlentity.keys()

entcharrefs = {}
entcharrefsget = entcharrefs.get
for _k, _v in entitydefs.items():
    if _k in _sgmlentkeys: continue
    if _v[0:2] == '&#':
        dec_code = _v[1:-1]
        _v = unichr(int(_v[2:-1]))
        entcharrefs[dec_code] = _v
    else:
        dec_code = '#' + str(ord(_v))
        _v = unicode(_v, 'latin_1', 'replace')
        entcharrefs[dec_code] = _v
    entcharrefs[_k] = _v
del _sgmlentkeys, _k, _v
entcharrefs['#160'] = u' '
entcharrefs['#xA0'] = u' '
entcharrefs['#xa0'] = u' '
entcharrefs['#XA0'] = u' '

Beispiel #21

0

Datei anzeigen

Datei: handy.py Projekt: sabren/appengine-workshop

def htmlEncode(s):
    _entitymap = dict((val, key) for (key,val) in entitydefs.items())
    return ''.join("&" + _entitymap[ch] + ";"
    if ch in _entitymap or str(ch) in _entitymap
    else ch
    for ch in s)

Beispiel #22

0

Datei anzeigen

########## HTML and XML entities ##########

Template_Entities_Comment = """\
// map of entity names to their Unicode runes, cf.
// http://en.wikipedia.org/wiki/List_of_XML_and_HTML_character_entity_references
// and http://www.w3.org/TR/MathML2/bycodes.html
"""

# selection of MathML2 entities that aren't HTML entities
List_MathML2_Entities = [("DoubleDot", 168), ("OverBar", 175), ("PlusMinus", 177), ("Cedilla", 184), ("Amacr", 256), ("amacr", 257), ("Abreve", 258), ("abreve", 259), ("Aogon", 260), ("aogon", 261), ("Cacute", 262), ("cacute", 263), ("Ccirc", 264), ("ccirc", 265), ("Cdot", 266), ("cdot", 267), ("Ccaron", 268), ("ccaron", 269), ("Dcaron", 270), ("dcaron", 271), ("Dstrok", 272), ("dstrok", 273), ("Emacr", 274), ("emacr", 275), ("Edot", 278), ("edot", 279), ("Eogon", 280), ("eogon", 281), ("Ecaron", 282), ("ecaron", 283), ("Gcirc", 284), ("gcirc", 285), ("Gbreve", 286), ("gbreve", 287), ("Gdot", 288), ("gdot", 289), ("Gcedil", 290), ("Hcirc", 292), ("hcirc", 293), ("Hstrok", 294), ("hstrok", 295), ("Itilde", 296), ("itilde", 297), ("Imacr", 298), ("imacr", 299), ("Iogon", 302), ("iogon", 303), ("Idot", 304), ("IJlig", 306), ("ijlig", 307), ("Jcirc", 308), ("jcirc", 309), ("Kcedil", 310), ("kcedil", 311), ("kgreen", 312), ("Lacute", 313), ("lacute", 314), ("Lcedil", 315), ("lcedil", 316), ("Lcaron", 317), ("lcaron", 318), ("Lmidot", 319), ("lmidot", 320), ("Lstrok", 321), ("lstrok", 322), ("Nacute", 323), ("nacute", 324), ("Ncedil", 325), ("ncedil", 326), ("Ncaron", 327), ("ncaron", 328), ("napos", 329), ("ENG", 330), ("eng", 331), ("Omacr", 332), ("omacr", 333), ("Odblac", 336), ("odblac", 337), ("Racute", 340), ("racute", 341), ("Rcedil", 342), ("rcedil", 343), ("Rcaron", 344), ("rcaron", 345), ("Sacute", 346), ("sacute", 347), ("Scirc", 348), ("scirc", 349), ("Scedil", 350), ("scedil", 351), ("Tcedil", 354), ("tcedil", 355), ("Tcaron", 356), ("tcaron", 357), ("Tstrok", 358), ("tstrok", 359), ("Utilde", 360), ("utilde", 361), ("Umacr", 362), ("umacr", 363), ("Ubreve", 364), ("ubreve", 365), ("Uring", 366), ("uring", 367), ("Udblac", 368), ("udblac", 369), ("Uogon", 370), ("uogon", 371), ("Wcirc", 372), ("wcirc", 373), ("Ycirc", 374), ("ycirc", 375), ("Zacute", 377), ("zacute", 378), ("Zdot", 379), ("zdot", 380), ("Zcaron", 381), ("zcaron", 382), ("imped", 437), ("gacute", 501), ("Hacek", 711), ("Breve", 728), ("DiacriticalDot", 729), ("ring", 730), ("ogon", 731), ("DiacriticalTilde", 732), ("DiacriticalDoubleAcute", 733), ("DownBreve", 785), ("UnderBar", 818), ("varepsilon", 949), ("varsigma", 962), ("varphi", 966), ("vartheta", 977), ("Upsi", 978), ("straightphi", 981), ("varpi", 982), ("Gammad", 988), ("digamma", 989), ("varkappa", 1008), ("varrho", 1009), ("straightepsilon", 1013), ("backepsilon", 1014)]

from htmlentitydefs import entitydefs
entitydefs['apos'] = "'" # only XML entity that isn't an HTML entity as well
List_HTML_Entities = []
for name, value in entitydefs.items():
	List_HTML_Entities.append((name, value[2:-1] or str(ord(value))))
for (name, value) in List_MathML2_Entities:
	assert name not in entitydefs
	List_HTML_Entities.append((name, str(value)))

########## CSS properties ##########

List_CSS_Props = "color display font font-family font-size font-style font-weight list-style margin margin-bottom margin-left margin-right margin-top max-width padding padding-bottom padding-left padding-right padding-top page-break-after page-break-before text-align text-decoration text-indent text-underline white-space word-wrap"

########## CSS colors ##########

# array of name/value for css colors, value is what goes inside MKRGB()
# based on https://developer.mozilla.org/en/CSS/color_value
# TODO: add more colors
List_CSS_Colors = [

Beispiel #23

0

Datei anzeigen

def decodeentities(string):
    for (htmlent, ch) in entitydefs.items():
        string = string.replace('&' + htmlent + ';', ch)
    return string