コード例 #1
0
    def _process_html_tree(elt):
        node_list = safe_dom.NodeList()

        tail = elt.tail

        if elt.tag in tag_bindings:
            elt = tag_bindings[elt.tag]().render(elt, handler)

        try:
            if elt.tag.lower() == 'script':
                out_elt = safe_dom.ScriptElement()
            else:
                out_elt = safe_dom.Element(elt.tag)
            out_elt.add_attribute(**elt.attrib)

            if elt.text:
                out_elt.add_text(elt.text)
            for child in elt:
                out_elt.add_children(_process_html_tree(child))
        except Exception as e:  # pylint: disable-msg=broad-except
            logging.error('Invalid HTML tag: %s. %s', elt, e)
            out_elt = safe_dom.Element('span')
            out_elt.add_attribute(className='gcb-error-tag')
            out_elt.add_text(INVALID_HTML_TAG_MESSAGE)

        node_list.append(out_elt)
        if tail:
            node_list.append(safe_dom.Text(tail))
        return node_list
コード例 #2
0
    def _process_html_tree(elt):
        """Recursively parses an HTML tree into a safe_dom.NodeList()."""
        # Return immediately with an error message if a duplicate instanceid is
        # detected.
        if 'instanceid' in elt.attrib:
            if elt.attrib['instanceid'] in used_instance_ids:
                return _generate_error_message_node_list(
                    elt, DUPLICATE_INSTANCE_ID_MESSAGE)

            used_instance_ids.add(elt.attrib['instanceid'])

        # Otherwise, attempt to parse this tag and all its child tags.
        original_elt = elt
        try:
            if render_custom_tags and elt.tag in tag_bindings:
                tag = tag_bindings[elt.tag]()
                if isinstance(tag, ContextAwareTag):
                    # Get or initialize a environment dict for this type of tag.
                    # Each tag type gets a separate environment shared by all
                    # instances of that tag.
                    context = tag_contexts.get(elt.tag)
                    if context is None:
                        context = ContextAwareTag.Context(handler, {})
                        tag_contexts[elt.tag] = context
                    # Render the tag
                    elt = tag.render(elt, context)
                else:
                    # Render the tag
                    elt = tag.render(elt, handler)

            if elt.tag == cElementTree.Comment:
                out_elt = safe_dom.Comment()
            elif elt.tag.lower() == 'script':
                out_elt = safe_dom.ScriptElement()
            else:
                out_elt = safe_dom.Element(_remove_namespace(elt.tag))
            out_elt.add_attribute(**elt.attrib)

            if elt.text:
                out_elt.add_text(elt.text)
            for child in elt:
                out_elt.add_children(
                    _process_html_tree(child))

            node_list = safe_dom.NodeList()
            node_list.append(out_elt)
            if original_elt.tail:
                node_list.append(safe_dom.Text(original_elt.tail))
            return node_list

        except Exception as e:  # pylint: disable=broad-except
            logging.exception('Error handling tag: %s', elt.tag)
            return _generate_error_message_node_list(
                original_elt, '%s: %s' % (INVALID_HTML_TAG_MESSAGE, e))
コード例 #3
0
    def _generate_error_message_node_list(elt, error_message):
        """Generates a node_list representing an error message."""
        logging.error('[%s, %s]: %s.', elt.tag, dict(**elt.attrib),
                      error_message)

        node_list = safe_dom.NodeList()
        node_list.append(
            safe_dom.Element(
                'span', className='gcb-error-tag').add_text(error_message))

        if elt.tail:
            node_list.append(safe_dom.Text(elt.tail))
        return node_list
コード例 #4
0
ファイル: tags.py プロジェクト: eliesmr4/ULearn
    def _process_html_tree(elt):
        node_list = safe_dom.NodeList()

        tail = elt.tail

        if elt.tag in tag_bindings:
            elt = tag_bindings[elt.tag]().render(elt)

        out_elt = safe_dom.Element(elt.tag)
        out_elt.add_attribute(**elt.attrib)
        if elt.text:
            out_elt.add_text(elt.text)
        for child in elt:
            out_elt.add_children(_process_html_tree(child))
        node_list.append(out_elt)
        if tail:
            node_list.append(safe_dom.Text(tail))
        return node_list
コード例 #5
0
def html_to_safe_dom(html_string):
    """Render HTML text as a tree of safe_dom elements."""

    tag_bindings = get_tag_bindings()

    node_list = safe_dom.NodeList()
    if not html_string:
        return node_list

    def _process_html_tree(elt):
        node_list = safe_dom.NodeList()

        tail = elt.tail

        if elt.tag in tag_bindings:
            elt = tag_bindings[elt.tag]().render(elt)

        if elt.tag.lower() == 'script':
            out_elt = safe_dom.ScriptElement()
        else:
            out_elt = safe_dom.Element(elt.tag)
        out_elt.add_attribute(**elt.attrib)
        if elt.text:
            out_elt.add_text(elt.text)
        for child in elt:
            out_elt.add_children(_process_html_tree(child))
        node_list.append(out_elt)
        if tail:
            node_list.append(safe_dom.Text(tail))
        return node_list

    parser = html5lib.HTMLParser(tree=html5lib.treebuilders.getTreeBuilder(
        'etree', cElementTree),
                                 namespaceHTMLElements=False)
    root = parser.parseFragment('<div>%s</div>' % html_string)[0]

    if root.text:
        node_list.append(safe_dom.Text(root.text))

    for elt in root:
        node_list.append(_process_html_tree(elt))

    return node_list
コード例 #6
0
    def _process_html_tree(elt, used_instance_ids):
        # Return immediately with an error message if a duplicate instanceid is
        # detected.
        if 'instanceid' in elt.attrib:
            if elt.attrib['instanceid'] in used_instance_ids:
                return _generate_error_message_node_list(
                    elt, DUPLICATE_INSTANCE_ID_MESSAGE)

            used_instance_ids.add(elt.attrib['instanceid'])

        # Otherwise, attempt to parse this tag and all its child tags.
        original_elt = elt
        try:
            if elt.tag in tag_bindings:
                elt = tag_bindings[elt.tag]().render(elt, handler)

            if elt.tag.lower() == 'script':
                out_elt = safe_dom.ScriptElement()
            else:
                out_elt = safe_dom.Element(elt.tag)
            out_elt.add_attribute(**elt.attrib)

            if elt.text:
                out_elt.add_text(elt.text)
            for child in elt:
                out_elt.add_children(
                    _process_html_tree(child, used_instance_ids))

            node_list = safe_dom.NodeList()
            node_list.append(out_elt)
            if original_elt.tail:
                node_list.append(safe_dom.Text(original_elt.tail))
            return node_list

        except Exception as e:  # pylint: disable-msg=broad-except
            return _generate_error_message_node_list(
                original_elt, '%s: %s' % (INVALID_HTML_TAG_MESSAGE, e))
コード例 #7
0
def html_to_safe_dom(html_string, handler, render_custom_tags=True):
    """Render HTML text as a tree of safe_dom elements."""

    tag_bindings = get_tag_bindings()

    node_list = safe_dom.NodeList()
    if not html_string:
        return node_list

    # Set of all instance id's used in this dom tree, used to detect duplication
    used_instance_ids = set([])
    # A dictionary of environments, one for each tag type which appears in the
    # page
    tag_contexts = {}

    def _generate_error_message_node_list(elt, error_message):
        """Generates a node_list representing an error message."""
        logging.error('[%s, %s]: %s.', elt.tag, dict(**elt.attrib),
                      error_message)

        node_list = safe_dom.NodeList()
        node_list.append(
            safe_dom.Element(
                'span', className='gcb-error-tag').add_text(error_message))

        if elt.tail:
            node_list.append(safe_dom.Text(elt.tail))
        return node_list

    def _remove_namespace(tag_name):
        # Remove any namespacing which html5lib may have introduced. Html5lib
        # namespacing is of the form, e.g.,
        #     {http://www.w3.org/2000/svg}svg
        return re.sub(r'^\{[^\}]+\}', '', tag_name, count=1)

    def _process_html_tree(elt):
        """Recursively parses an HTML tree into a safe_dom.NodeList()."""
        # Return immediately with an error message if a duplicate instanceid is
        # detected.
        if 'instanceid' in elt.attrib:
            if elt.attrib['instanceid'] in used_instance_ids:
                return _generate_error_message_node_list(
                    elt, DUPLICATE_INSTANCE_ID_MESSAGE)

            used_instance_ids.add(elt.attrib['instanceid'])

        # Otherwise, attempt to parse this tag and all its child tags.
        original_elt = elt
        try:
            if render_custom_tags and elt.tag in tag_bindings:
                tag = tag_bindings[elt.tag]()
                if isinstance(tag, ContextAwareTag):
                    # Get or initialize a environment dict for this type of tag.
                    # Each tag type gets a separate environment shared by all
                    # instances of that tag.
                    context = tag_contexts.get(elt.tag)
                    if context is None:
                        context = ContextAwareTag.Context(handler, {})
                        tag_contexts[elt.tag] = context
                    # Render the tag
                    elt = tag.render(elt, context)
                else:
                    # Render the tag
                    elt = tag.render(elt, handler)

            if elt.tag == cElementTree.Comment:
                out_elt = safe_dom.Comment()
            elif elt.tag.lower() == 'script':
                out_elt = safe_dom.ScriptElement()
            else:
                out_elt = safe_dom.Element(_remove_namespace(elt.tag))
            out_elt.add_attribute(**elt.attrib)

            if elt.text:
                out_elt.add_text(elt.text)
            for child in elt:
                out_elt.add_children(_process_html_tree(child))

            node_list = safe_dom.NodeList()
            node_list.append(out_elt)
            if original_elt.tail:
                node_list.append(safe_dom.Text(original_elt.tail))
            return node_list

        except Exception as e:  # pylint: disable=broad-except
            logging.exception('Error handling tag: %s', elt.tag)
            return _generate_error_message_node_list(
                original_elt, '%s: %s' % (INVALID_HTML_TAG_MESSAGE, e))

    root = html_string_to_element_tree(html_string)
    if root.text:
        node_list.append(safe_dom.Text(root.text))

    for child_elt in root:
        node_list.append(_process_html_tree(child_elt))

    # After the page is processed, rollup any global header/footer data which
    # the environment-aware tags have accumulated in their env's
    for tag_name, context in tag_contexts.items():
        header, footer = tag_bindings[tag_name]().rollup_header_footer(context)
        node_list.insert(0, _process_html_tree(header))
        node_list.append(_process_html_tree(footer))

    return node_list
コード例 #8
0
import re
from xml.etree import cElementTree

import html5lib
import safe_dom
import webapp2

import appengine_config
from common import schema_fields
from models import config

CAN_USE_DYNAMIC_TAGS = config.ConfigProperty(
    'gcb_can_use_dynamic_tags',
    bool,
    safe_dom.Text(
        'Whether lesson content can make use of custom HTML tags such as '
        '<gcb-youtube videoid="...">. If this is enabled some legacy content '
        'may be rendered differently. '),
    default_value=True)

DUPLICATE_INSTANCE_ID_MESSAGE = (
    'Error processing custom HTML tag: duplicate tag id')
INVALID_HTML_TAG_MESSAGE = 'Invalid HTML tag'


class BaseTag(object):
    """Base class for the custom HTML tags."""
    @classmethod
    def name(cls):
        return cls.__name__

    @classmethod
コード例 #9
0
ファイル: jinja_utils.py プロジェクト: danieldanciu/schoggi
from common import caching
from models import config
from models import models
from models.counters import PerfCounter

import gae_mini_profiler.profiler
import gae_mini_profiler.templatetags

# max size for in-process jinja template cache
MAX_GLOBAL_CACHE_SIZE_BYTES = 8 * 1024 * 1024

# this cache used to be memcache based; now it's in-process
CAN_USE_JINJA2_TEMPLATE_CACHE = config.ConfigProperty(
    'gcb_can_use_jinja2_template_cache',
    bool,
    safe_dom.Text(
        'Whether jinja2 can cache bytecode of compiled templates in-process.'),
    default_value=True)


def finalize(x):
    """A finalize method which will correctly handle safe_dom elements."""
    if isinstance(x, safe_dom.Node) or isinstance(x, safe_dom.NodeList):
        return jinja2.utils.Markup(x.sanitized)
    return x


def js_string_raw(data):
    """Escape a string so that it can be put in a JS quote."""
    if not isinstance(data, basestring):
        return data
    data = data.replace('\\', '\\\\')
コード例 #10
0
__author__ = 'John Orr ([email protected])'

import jinja2
from models import config
from models import models
from webapp2_extras import i18n
from models.models import MemcacheManager
import safe_dom
import tags
from jinja2.bccache import BytecodeCache

CAN_USE_JINJA2_TEMPLATE_CACHE = config.ConfigProperty(
    'gcb_can_use_jinja2_template_cache',
    bool,
    safe_dom.Text(
        'Whether jinja2 can cache bytecode of compiled templates in memcache.'
    ),
    default_value=True)


def finalize(x):
    """A finalize method which will correctly handle safe_dom elements."""
    if isinstance(x, safe_dom.Node) or isinstance(x, safe_dom.NodeList):
        return jinja2.utils.Markup(x.sanitized)
    return x


def js_string_raw(data):
    """Escape a string so that it can be put in a JS quote."""
    if not isinstance(data, basestring):
        return data
コード例 #11
0
from webapp2_extras import i18n

import appengine_config
from common import caching
from models import config
from models import models
from models.counters import PerfCounter


# max size for in-process jinja template cache
MAX_GLOBAL_CACHE_SIZE_BYTES = 8 * 1024 * 1024

# this cache used to be memcache based; now it's in-process
CAN_USE_JINJA2_TEMPLATE_CACHE = config.ConfigProperty(
    'gcb_can_use_jinja2_template_cache', bool, safe_dom.Text(
        'Whether jinja2 can cache bytecode of compiled templates in-process.'),
    default_value=True)


def finalize(x):
    """A finalize method which will correctly handle safe_dom elements."""
    if isinstance(x, safe_dom.Node) or isinstance(x, safe_dom.NodeList):
        return jinja2.utils.Markup(x.sanitized)
    return x


def js_string_raw(data):
    """Escape a string so that it can be put in a JS quote."""
    if not isinstance(data, basestring):
        return data
    data = data.replace('\\', '\\\\')
コード例 #12
0
def html_to_safe_dom(html_string, handler):
    """Render HTML text as a tree of safe_dom elements."""

    tag_bindings = get_tag_bindings()

    node_list = safe_dom.NodeList()
    if not html_string:
        return node_list

    def _generate_error_message_node_list(elt, error_message):
        """Generates a node_list representing an error message."""
        logging.error('[%s, %s]: %s.', elt.tag, dict(**elt.attrib),
                      error_message)

        node_list = safe_dom.NodeList()
        node_list.append(
            safe_dom.Element(
                'span', className='gcb-error-tag').add_text(error_message))

        if elt.tail:
            node_list.append(safe_dom.Text(elt.tail))
        return node_list

    def _process_html_tree(elt, used_instance_ids):
        # Return immediately with an error message if a duplicate instanceid is
        # detected.
        if 'instanceid' in elt.attrib:
            if elt.attrib['instanceid'] in used_instance_ids:
                return _generate_error_message_node_list(
                    elt, DUPLICATE_INSTANCE_ID_MESSAGE)

            used_instance_ids.add(elt.attrib['instanceid'])

        # Otherwise, attempt to parse this tag and all its child tags.
        original_elt = elt
        try:
            if elt.tag in tag_bindings:
                elt = tag_bindings[elt.tag]().render(elt, handler)

            if elt.tag.lower() == 'script':
                out_elt = safe_dom.ScriptElement()
            else:
                out_elt = safe_dom.Element(elt.tag)
            out_elt.add_attribute(**elt.attrib)

            if elt.text:
                out_elt.add_text(elt.text)
            for child in elt:
                out_elt.add_children(
                    _process_html_tree(child, used_instance_ids))

            node_list = safe_dom.NodeList()
            node_list.append(out_elt)
            if original_elt.tail:
                node_list.append(safe_dom.Text(original_elt.tail))
            return node_list

        except Exception as e:  # pylint: disable-msg=broad-except
            return _generate_error_message_node_list(
                original_elt, '%s: %s' % (INVALID_HTML_TAG_MESSAGE, e))

    root = html_string_to_element_tree(html_string)
    if root.text:
        node_list.append(safe_dom.Text(root.text))

    used_instance_ids = set([])
    for elt in root:
        node_list.append(_process_html_tree(elt, used_instance_ids))

    return node_list