Exemple #1
0
 def __init__(self, *args, **kw):
     HTMLParser.__init__(self, *args, **kw)
     self.sami = u''
     self.line = u''
     self.styles = {}
     self.queue = deque()
     self.langs = {}
     self.last_element = u''
     self.name2codepoint = name2codepoint.copy()
     self.name2codepoint[u'apos'] = 0x0027
Exemple #2
0
 def __init__(self, *args, **kw):
     HTMLParser.__init__(self, *args, **kw)
     self.sami = u""
     self.line = u""
     self.styles = {}
     self.queue = deque()
     self.langs = set()
     self.last_element = u""
     self.name2codepoint = name2codepoint.copy()
     self.name2codepoint[u"apos"] = 0x0027
     self.convert_charrefs = False
def unescaper(match):
    """Custom un-escape function.

    """
    name2codepoint_work = name2codepoint.copy()
    name2codepoint_work['apos']=ord("'")

    code = match.group(1)
    if code:
        return unichr(int(code, 10))
    else:
        code = match.group(2)
        if code:
            return unichr(int(code, 16))
        else:
            code = match.group(3)
            if code in name2codepoint_work:
                return unichr(name2codepoint_work[code])

    return match.group(0)
Exemple #4
0
from exe.webui.blockfactory import g_blockFactory
from exe.engine.error import Error
from exe.engine.path import Path, TempDirPath
from exe.engine.version import release
from exe.export.pages import Page, uniquifyNames
from exe import globals as G
from BeautifulSoup import BeautifulSoup
from htmlentitydefs import name2codepoint
from helper import exportMinFileJS
from helper import exportMinFileCSS
from exe.webui.common import getFilesCSSToMinify
from exe.webui.common import getFilesJSToMinify

log = logging.getLogger(__name__)

entitymap = name2codepoint.copy()
entitymap.pop("amp")
entitymap.pop("lt")
entitymap.pop("gt")
entitymap.pop("quot")


def htmlentitydecode(s):
    return re.sub("&(%s);" % "|".join(entitymap), lambda m: unichr(entitymap[m.group(1)]), s)


# ===========================================================================
class PublicationEpub3(object):
    """
    EPUB Publications 3.0, defines publication-level semantics and conformance requirements for EPUB 3,
    including the format of the Package Document and rules for how this document and other
Exemple #5
0
from collections import deque
from copy import deepcopy
from itertools import imap


_word_split_re = re.compile(r'(\s+)')
_punctuation_re = re.compile(
    '^(?P<lead>(?:%s)*)(?P<middle>.*?)(?P<trail>(?:%s)*)$' % (
        '|'.join(imap(re.escape, ('(', '<', '&lt;'))),
        '|'.join(imap(re.escape, ('.', ',', ')', '>', '\n', '&gt;')))
    )
)
_simple_email_re = re.compile(r'^\S+@[a-zA-Z0-9._-]+\.[a-zA-Z0-9._-]+$')
_striptags_re = re.compile(r'(<!--.*?-->|<[^>]*>)')
_entity_re = re.compile(r'&([^;]+);')
_entities = name2codepoint.copy()
_entities['apos'] = 39

# special singleton representing missing values for the runtime
missing = type('MissingType', (), {'__repr__': lambda x: 'missing'})()


# concatenate a list of strings and convert them to unicode.
# unfortunately there is a bug in python 2.4 and lower that causes
# unicode.join trash the traceback.
_concat = u''.join
try:
    def _test_gen_bug():
        raise TypeError(_test_gen_bug)
        yield None
    _concat(_test_gen_bug())
Exemple #6
0
import uuid
from cgi                           import escape
from zipfile                       import ZipFile, ZIP_DEFLATED, ZIP_STORED
from exe.webui                     import common
from exe.webui.blockfactory        import g_blockFactory
from exe.engine.error              import Error
from exe.engine.path               import Path, TempDirPath
from exe.engine.version            import release
from exe.export.pages              import Page, uniquifyNames
from exe                      	   import globals as G
from BeautifulSoup                 import BeautifulSoup
from htmlentitydefs                import name2codepoint

log = logging.getLogger(__name__)

entitymap = name2codepoint.copy()
entitymap.pop('amp')
entitymap.pop('lt')
entitymap.pop('gt')
entitymap.pop('quot')


def htmlentitydecode(s):
    return re.sub('&(%s);' % '|'.join(entitymap),
                  lambda m: unichr(entitymap[m.group(1)]), s)


# ===========================================================================
class PublicationEpub3(object):
    """
    EPUB Publications 3.0, defines publication-level semantics and conformance requirements for EPUB 3,
Exemple #7
0
class HTMLBuilder(object):
    """Helper object for HTML generation.

    Per default there are two instances of that class.  The `html` one, and
    the `xhtml` one for those two dialects.  The class uses keyword parameters
    and positional parameters to generate small snippets of HTML.

    Keyword parameters are converted to XML/SGML attributes, positional
    arguments are used as children.  Because Python accepts positional
    arguments before keyword arguments it's a good idea to use a list with the
    star-syntax for some children:

    >>> html.p(class_='foo', *[html.a('foo', href='foo.html'), ' ',
    ...                        html.a('bar', href='bar.html')])
    u'<p class="foo"><a href="foo.html">foo</a> <a href="bar.html">bar</a></p>'

    This class works around some browser limitations and can not be used for
    arbitrary SGML/XML generation.  For that purpose lxml and similar
    libraries exist.

    Calling the builder escapes the string passed:

    >>> html.p(html("<foo>"))
    u'<p>&lt;foo&gt;</p>'
    """

    from htmlentitydefs import name2codepoint
    _entity_re = re.compile(r'&([^;]+);')
    _entities = name2codepoint.copy()
    _entities['apos'] = 39
    _empty_elements = set([
        'area', 'base', 'basefont', 'br', 'col', 'command', 'embed', 'frame',
        'hr', 'img', 'input', 'keygen', 'isindex', 'link', 'meta', 'param',
        'source', 'wbr'
    ])
    _boolean_attributes = set([
        'selected', 'checked', 'compact', 'declare', 'defer', 'disabled',
        'ismap', 'multiple', 'nohref', 'noresize', 'noshade', 'nowrap'
    ])
    _plaintext_elements = set(['textarea'])
    _c_like_cdata = set(['script', 'style'])
    del name2codepoint

    def __init__(self, dialect):
        self._dialect = dialect

    def __call__(self, s):
        return escape(s)

    def __getattr__(self, tag):
        if tag[:2] == '__':
            raise AttributeError(tag)

        def proxy(*children, **arguments):
            buffer = '<' + tag
            for key, value in arguments.iteritems():
                if value is None:
                    continue
                if key[-1] == '_':
                    key = key[:-1]
                if key in self._boolean_attributes:
                    if not value:
                        continue
                    if self._dialect == 'xhtml':
                        value = '="' + key + '"'
                    else:
                        value = ''
                else:
                    value = '="' + escape(value, True) + '"'
                buffer += ' ' + key + value
            if not children and tag in self._empty_elements:
                if self._dialect == 'xhtml':
                    buffer += ' />'
                else:
                    buffer += '>'
                return buffer
            buffer += '>'

            children_as_string = ''.join(
                [unicode(x) for x in children if x is not None])

            if children_as_string:
                if tag in self._plaintext_elements:
                    children_as_string = escape(children_as_string)
                elif tag in self._c_like_cdata and self._dialect == 'xhtml':
                    children_as_string = '/*<![CDATA[*/' + \
                                         children_as_string + '/*]]>*/'
            buffer += children_as_string + '</' + tag + '>'
            return buffer

        return proxy

    def __repr__(self):
        return '<%s for %r>' % (self.__class__.__name__, self._dialect)
from Products.CMFCore.utils import getToolByName
import re
from urlparse import urlsplit, urljoin, urlunsplit
from urllib import unquote_plus
from Acquisition import aq_base
from htmlentitydefs import name2codepoint
from zope.component import getAdapters
from zope.interface import implements
from zope.app.component.hooks import getSite
from plone.outputfilters import apply_filters
from plone.outputfilters.interfaces import IFilter
from plone.outputfilters.browser.resolveuid import uuidToObject, uuidFor
from plone.outputfilters.filters.resolveuid_and_caption import IImageCaptioningEnabler
from plone.outputfilters.filters.resolveuid_and_caption import IResolveUidsEnabler

name2codepoint = name2codepoint.copy()
name2codepoint['apos']=ord("'")

__revision__ = '$Id$'

# IMAGE_PATTERN matches an image tag on its own, or an image tag
# enclosed in a simple <p> or <div>. In the latter case we strip out
# the enclosing tag since we are going to insert our own.
PATIMG = '\\<img[^>]+class\s*=[^=>]*captioned[^>]+\\>'
PATA = '(?:(?P<atag0>\\<a[^>]*\\>)'+PATIMG+'\\</a\\>)' + '|' + PATIMG
PAT0 = '(?P<pat0>'+PATA+')'
PAT1 = '<(?:p|div)[^>]*>'+PAT0 + '</(?:p|div)>' + '|' + PAT0.replace('0>','1>')
IMAGE_PATTERN = re.compile(PAT1, re.IGNORECASE)

# Regex to match stupid IE attributes. In IE generated HTML an
# attribute may not be enclosed by quotes if it doesn't contain
Exemple #9
0
except ImportError:
    from dummy_thread import allocate_lock
from htmlentitydefs import name2codepoint
from collections import deque
from copy import deepcopy
from itertools import imap

_word_split_re = re.compile(r'(\s+)')
_punctuation_re = re.compile(
    '^(?P<lead>(?:%s)*)(?P<middle>.*?)(?P<trail>(?:%s)*)$' %
    ('|'.join(imap(re.escape, ('(', '<', '&lt;'))), '|'.join(
        imap(re.escape, ('.', ',', ')', '>', '\n', '&gt;')))))
_simple_email_re = re.compile(r'^\S+@[a-zA-Z0-9._-]+\.[a-zA-Z0-9._-]+$')
_striptags_re = re.compile(r'(<!--.*?-->|<[^>]*>)')
_entity_re = re.compile(r'&([^;]+);')
_entities = name2codepoint.copy()
_entities['apos'] = 39

# special singleton representing missing values for the runtime
missing = type('MissingType', (), {'__repr__': lambda x: 'missing'})()

# concatenate a list of strings and convert them to unicode.
# unfortunately there is a bug in python 2.4 and lower that causes
# unicode.join trash the traceback.
_concat = u''.join
try:

    def _test_gen_bug():
        raise TypeError(_test_gen_bug)
        yield None
Exemple #10
0
class HTMLBuilder(object):
    from htmlentitydefs import name2codepoint
    _entity_re = re.compile('&([^;]+);')
    _entities = name2codepoint.copy()
    _entities['apos'] = 39
    _empty_elements = set([
        'area', 'base', 'basefont', 'br', 'col', 'frame', 'hr', 'img', 'input',
        'isindex', 'link', 'meta', 'param'
    ])
    _boolean_attributes = set([
        'selected', 'checked', 'compact', 'declare', 'defer', 'disabled',
        'ismap', 'multiple', 'nohref', 'noresize', 'noshade', 'nowrap'
    ])
    _plaintext_elements = set(['textarea'])
    _c_like_cdata = set(['script', 'style'])
    del name2codepoint

    def __init__(self, dialect):
        self._dialect = dialect

    def __call__(self, s):
        return escape(s)

    def __getattr__(self, tag):
        if tag[:2] == '__':
            raise AttributeError(tag)

        def proxy(*children, **arguments):
            buffer = ['<' + tag]
            write = buffer.append
            for key, value in arguments.iteritems():
                if value is None:
                    continue
                if key.endswith('_'):
                    key = key[:-1]
                if key in self._boolean_attributes:
                    if not value:
                        continue
                    value = self._dialect == 'xhtml' and '="%s"' % key or ''
                else:
                    value = '="%s"' % escape(value, True)
                write(' ' + key + value)

            if not children and tag in self._empty_elements:
                write(self._dialect == 'xhtml' and ' />' or '>')
                return ''.join(buffer)
            write('>')
            children_as_string = ''.join(
                (unicode(x) for x in children if x is not None))
            if children_as_string:
                if tag in self._plaintext_elements:
                    children_as_string = escape(children_as_string)
                elif tag in self._c_like_cdata and self._dialect == 'xhtml':
                    children_as_string = '/*<![CDATA[*/%s/*]]>*/' % children_as_string
            buffer.extend((children_as_string, '</%s>' % tag))
            return ''.join(buffer)

        return proxy

    def __repr__(self):
        return '<%s for %r>' % (self.__class__.__name__, self._dialect)
        if code:
            return unichr(int(code, 10))
        else:
            code = match.group(2)
            if code:
                return unichr(int(code, 16))
            else:
                code = match.group(3)
                if code in name2codepoint:
                    return unichr(name2codepoint[code])
        return match.group(0)
    s = format_entity.entity_re.sub(unescape, s)
    s = format_entity.tag_re.sub('', s)
    return s

format_entity.name2codepoint = name2codepoint.copy()
format_entity.name2codepoint['apos'] = ord("'")
format_entity.entity_re = re.compile('&(?:#(\d+)|(?:#x([\da-fA-F]+))|([a-zA-Z]+));')
format_entity.tag_re = re.compile('\</?span[^\>]*>')


def extract_correct_title(fragment, title, contentdb):
    # fragment: text starting with an instance of the 記事名の制約 template
    # extract 'title' parameter by intercepting template expansion
    te = Expander(fragment, pagename=title, wikidb=contentdb)
    found = False
    node_list = [te.parsed]
    while True:
        node = node_list.pop(0)
        if isinstance(node, Template):
            found = True
Exemple #12
0
""" Transform
"""
import re
from htmlentitydefs import name2codepoint
from Products.PortalTransforms.interfaces import itransform
from Products.PortalTransforms.interfaces import ITransform
from DocumentTemplate.DT_Util import html_quote
from DocumentTemplate.DT_Var import newline_to_br
from zope.interface import implements

name2codepoint = name2codepoint.copy()
name2codepoint['apos'] = ord("'")
# pylint: disable=W1401
# IMAGE_PATTERN matches an image tag on its own, or an image tag
# enclosed in a simple <p> or <div>. In the latter case we strip out
# the enclosing tag since we are going to insert our own.
PATIMG = '\\<img[^>]+class\s*=[^=>]*captioned[^>]+\\>'
PATA = '(?:(?P<atag0>\\<a[^>]*\\>)'+PATIMG+'\\</a\\>)' + '|' + PATIMG
PAT0 = '(?P<pat0>'+PATA+')'
PAT1 = '<(?:p|div)[^>]*>'+PAT0 + '</(?:p|div)>' + '|' + PAT0.replace('0>', '1>')
IMAGE_PATTERN = re.compile(PAT1, re.IGNORECASE)

# Copied from Products.kupu.plone.config
UID_PATTERN = re.compile('(?P<tag><(?:a|area|img|object|param)'
                         '\\s[^>]*(?:src|href|data|value)\s*=\s'
                         '*")(?P<url>[^"]*resolveuid/(?P<uid>[^/"#? ]*))',
                         re.DOTALL | re.IGNORECASE)

# Regex to match stupid IE attributes. In IE generated HTML an
# attribute may not be enclosed by quotes if it doesn't contain
# certain punctuation.