def contextualize_name(tmpl_name, start_ctx):
     """
     Produces a distinct name for a template in a given context so
     that cloned bodies can be distinguished from the original and we
     can rewrite calls based on the context in which they appear.
     This allows templates to call helper templates in
     multiple contexts.
     """
     if start_ctx == self.start_state:
         return tmpl_name
     key = (tmpl_name, start_ctx)
     contextualized_name = contextualized_names.get(key)
     if contextualized_name is None:
         base_contextualized_name = '%s$%s' % (
             tmpl_name,
             debug.context_to_string(start_ctx).replace(' ', ','))
         contextualized_name = base_contextualized_name
         counter = 0
         # ensure uniqueness by looking into name_to_body
         while contextualized_name in self.name_to_body:
             contextualized_name = '%s%d' % (
                 base_contextualized_name, counter)
             counter += 1
         contextualized_names[key] = contextualized_name
     return contextualized_name
 def no_steady_state(self, states, debug_hint=None):
     for state in states:
         if context.is_error_context(state):
             return state
     self.error(debug_hint, 'loop switches between states (%s)' % (
         ', '.join([debug.context_to_string(state) for state in states])))
     return context.STATE_ERROR
Example #3
0
 def contextualize_name(tmpl_name, start_ctx):
     """
     Produces a distinct name for a template in a given context so
     that cloned bodies can be distinguished from the original and we
     can rewrite calls based on the context in which they appear.
     This allows templates to call helper templates in
     multiple contexts.
     """
     if start_ctx == self.start_state:
         return tmpl_name
     key = (tmpl_name, start_ctx)
     contextualized_name = contextualized_names.get(key)
     if contextualized_name is None:
         base_contextualized_name = '%s$%s' % (
             tmpl_name, debug.context_to_string(start_ctx).replace(
                 ' ', ','))
         contextualized_name = base_contextualized_name
         counter = 0
         # ensure uniqueness by looking into name_to_body
         while contextualized_name in self.name_to_body:
             contextualized_name = '%s%d' % (base_contextualized_name,
                                             counter)
             counter += 1
         contextualized_names[key] = contextualized_name
     return contextualized_name
Example #4
0
 def no_steady_state(self, states, debug_hint=None):
     for state in states:
         if context.is_error_context(state):
             return state
     self.error(
         debug_hint, 'loop switches between states (%s)' %
         (', '.join([debug.context_to_string(state) for state in states])))
     return context.STATE_ERROR
 def join(self, states, debug_hint=None):
     out_state = functools.reduce(context_update.context_union, states)
     if context.is_error_context(out_state):
         # Report an error only if none was reported when the states were
         # produced.
         for state in states:
             if context.is_error_context(state):
                 return out_state
         self.error(debug_hint, 'branches end in incompatible contexts: %s'
                    % ', '.join([debug.context_to_string(state)
                                 for state in states]))
     return out_state
Example #6
0
 def join(self, states, debug_hint=None):
     out_state = functools.reduce(context_update.context_union, states)
     if context.is_error_context(out_state):
         # Report an error only if none was reported when the states were
         # produced.
         for state in states:
             if context.is_error_context(state):
                 return out_state
         self.error(
             debug_hint, 'branches end in incompatible contexts: %s' %
             ', '.join([debug.context_to_string(state)
                        for state in states]))
     return out_state
Example #7
0
def escape(name_to_body,
           public_template_names,
           start_state=context.STATE_TEXT):
    """
    name_to_body - maps template names to template bodies.
        A template body is an object that implements
        1. reduce_traces(start_state, analyzer) -> end_state
        2. clone() -> a structural copy of the body that is distinct according
           to == and is also a template body.
        3. the body node interface described below.
    public_template_names - the names that might be called with an empty
        output buffer in the given start state.
    start_state - the state in which the named templates might be called.

    A body node is an object that implements
        1. children() -> a series of nodes
        2. with_children(children) -> produces a structural copy of
           the body but with the given children instead of children().
    step values must also be body nodes, and the transitively enumerated nodes
    of a body must include all step values encountered when following traces
    that do not include external calls.

    name_to_body may be augmented with new template definitions as a result of
    this call.

    If escape exits with an exception, then it is unsafe to use the templates
    in name_to_body.
    """
    analyzer = _Analyzer(name_to_body, start_state)

    has_errors = False

    for name in public_template_names:
        end_state = analyzer.external_call(name, start_state, None)
        if context.is_error_context(end_state):
            has_errors = True
        elif end_state != start_state:
            # Templates should start and end in the same context.
            # Otherwise concatenation of the output from safe templates is not
            # safe.
            analyzer.error(
                None,
                'template %s does not start and end in the same context: %s' %
                (name, debug.context_to_string(end_state)))
            has_errors = True

    if has_errors:
        raise EscapeError('\n'.join(analyzer.errors))

    analyzer.rewrite()
def escape(name_to_body, public_template_names, start_state=context.STATE_TEXT):
    """
    name_to_body - maps template names to template bodies.
        A template body is an object that implements
        1. reduce_traces(start_state, analyzer) -> end_state
        2. clone() -> a structural copy of the body that is distinct according
           to == and is also a template body.
        3. the body node interface described below.
    public_template_names - the names that might be called with an empty
        output buffer in the given start state.
    start_state - the state in which the named templates might be called.

    A body node is an object that implements
        1. children() -> a series of nodes
        2. with_children(children) -> produces a structural copy of
           the body but with the given children instead of children().
    step values must also be body nodes, and the transitively enumerated nodes
    of a body must include all step values encountered when following traces
    that do not include external calls.

    name_to_body may be augmented with new template definitions as a result of
    this call.

    If escape exits with an exception, then it is unsafe to use the templates
    in name_to_body.
    """
    analyzer = _Analyzer(name_to_body, start_state)

    has_errors = False

    for name in public_template_names:
        end_state = analyzer.external_call(name, start_state, None)
        if context.is_error_context(end_state):
            has_errors = True
        elif end_state != start_state:
            # Templates should start and end in the same context.
            # Otherwise concatenation of the output from safe templates is not
            # safe.
            analyzer.error(
                None,
                'template %s does not start and end in the same context: %s'
                % (name, debug.context_to_string(end_state)))
            has_errors = True

    if has_errors:
        raise EscapeError('\n'.join(analyzer.errors))

    analyzer.rewrite()
def _process_next_token(text, context):
    """
    Consume a portion of text and compute the next context.
    Output is stored in member variables.
    text - Non empty.

    Returns (n, context after text[:n], replacement for text[:n])
    """

    if is_error_context(context):  # The ERROR state is infectious.
        return (len(text), context, text)

    # Find the transition whose pattern matches earliest
    # in the raw text.
    earliest_start = len(text) + 1
    earliest_transition = None
    earliest_match = None

    for transition in _TRANSITIONS[state_of(context)]:
        match = transition.pattern.search(text)
        if not match:
            continue
        start = match.start(0)
        if (start < earliest_start
                and transition.is_applicable_to(context, match)):
            earliest_start = start
            earliest_transition = transition
            earliest_match = match

    if earliest_transition:
        num_consumed = earliest_match.end(0)
        next_context = earliest_transition.compute_next_context(
            context, earliest_match)
        normalized_text = earliest_transition.raw_text(earliest_match)
    else:
        num_consumed = len(text)
        next_context = STATE_ERROR
        normalized_text = text

    if (not num_consumed and state_of(next_context)
            == state_of(context)):  # pragma: no cover
        # Infinite loop.
        raise Exception('inf loop. for %r in %s' %
                        (text, debug.context_to_string(context)))

    return (num_consumed, next_context, normalized_text)
def _process_next_token(text, context):
    """
    Consume a portion of text and compute the next context.
    Output is stored in member variables.
    text - Non empty.

    Returns (n, context after text[:n], replacement for text[:n])
    """

    if is_error_context(context):  # The ERROR state is infectious.
        return (len(text), context, text)

    # Find the transition whose pattern matches earliest
    # in the raw text.
    earliest_start = len(text)+1
    earliest_transition = None
    earliest_match = None

    for transition in _TRANSITIONS[state_of(context)]:
        match = transition.pattern.search(text)
        if not match:
            continue
        start = match.start(0)
        if (start < earliest_start
            and transition.is_applicable_to(context, match)):
            earliest_start = start
            earliest_transition = transition
            earliest_match = match

    if earliest_transition:
        num_consumed = earliest_match.end(0)
        next_context = earliest_transition.compute_next_context(
            context, earliest_match)
        normalized_text = earliest_transition.raw_text(earliest_match)
    else:
        num_consumed = len(text)
        next_context = STATE_ERROR
        normalized_text = text

    if (not num_consumed
        and state_of(next_context) == state_of(context)):  # pragma: no cover
        # Infinite loop.
        raise Exception('inf loop. for %r in %s'
                        % (text, debug.context_to_string(context)))

    return (num_consumed, next_context, normalized_text)
 def _compute_end_context(self, name_and_ctx, body, debug_hint):
     """Propagate context over the body."""
     tmpl_name, start_ctx = name_and_ctx
     ctx, problems = self._escape_template_body(
         name_and_ctx, start_ctx, body)
     if problems is not None:
         # Look for a fixed point by assuming c1 as the output context.
         ctx2, problems2 = self._escape_template_body(
             name_and_ctx, ctx, body)
         if problems2 is None:
             ctx, problems = ctx2, None
     if problems is not None:
         if not context.is_error_context(ctx):
             # We have not explained the problem yet.
             self.error(debug_hint,
                 "cannot compute output context for template %s in %s" % (
                     tmpl_name, debug.context_to_string(start_ctx)))
         self.errors.extend(problems)
         return context.STATE_ERROR
     return ctx
 def step(self, start_state, step_value, debug_hint=None):
     if context.is_error_context(start_state):
         # Simplifies error checking below.
         return start_state
     if hasattr(step_value, 'to_raw_content'):
         # Handle text nodes specified by the template author.
         raw_content = step_value.to_raw_content()
         if raw_content is not None:
             try:
                 end_state, new_content, error_ctx, error_text = (
                     context_update.process_raw_text(
                         raw_content, start_state))
                 if context.is_error_context(end_state):
                     self.error(debug_hint, 'bad content in %s: `%s`' % (
                         debug.context_to_string(error_ctx), error_text))
                 elif new_content != raw_content:
                     self.text_values[step_value] = new_content
             except context_update.ContextUpdateFailure, err:
                 self.error(debug_hint, str(err))
                 end_state = context.STATE_ERROR
             return end_state
Example #13
0
 def _compute_end_context(self, name_and_ctx, body, debug_hint):
     """Propagate context over the body."""
     tmpl_name, start_ctx = name_and_ctx
     ctx, problems = self._escape_template_body(name_and_ctx, start_ctx,
                                                body)
     if problems is not None:
         # Look for a fixed point by assuming c1 as the output context.
         ctx2, problems2 = self._escape_template_body(
             name_and_ctx, ctx, body)
         if problems2 is None:
             ctx, problems = ctx2, None
     if problems is not None:
         if not context.is_error_context(ctx):
             # We have not explained the problem yet.
             self.error(
                 debug_hint,
                 "cannot compute output context for template %s in %s" %
                 (tmpl_name, debug.context_to_string(start_ctx)))
         self.errors.extend(problems)
         return context.STATE_ERROR
     return ctx
Example #14
0
 def step(self, start_state, step_value, debug_hint=None):
     if context.is_error_context(start_state):
         # Simplifies error checking below.
         return start_state
     if hasattr(step_value, 'to_raw_content'):
         # Handle text nodes specified by the template author.
         raw_content = step_value.to_raw_content()
         if raw_content is not None:
             try:
                 end_state, new_content, error_ctx, error_text = (
                     context_update.process_raw_text(
                         raw_content, start_state))
                 if context.is_error_context(end_state):
                     self.error(
                         debug_hint, 'bad content in %s: `%s`' %
                         (debug.context_to_string(error_ctx), error_text))
                 elif new_content != raw_content:
                     self.text_values[step_value] = new_content
             except context_update.ContextUpdateFailure, err:
                 self.error(debug_hint, str(err))
                 end_state = context.STATE_ERROR
             return end_state
Example #15
0
    def test_is_regex_preceder(self):
        """Test heuristic that is used to update JS_CTX_*"""
        tests = (
            # Statement terminators precede regexps.
            (context.JS_CTX_REGEX, ";"),
            # This is not airtight.
            #     ({ valueOf: function () { return 1 } } / 2)
            # is valid JavaScript but in practice, devs do not do this.
            # A block followed by a statement starting with a RegExp is
            # much more common:
            #     while (x) {...} /foo/.test(x) || panic()
            (context.JS_CTX_REGEX, "}"),
            # But member, call, grouping, and array expression terminators
            # precede div ops.
            (context.JS_CTX_DIV_OP, ")"),
            (context.JS_CTX_DIV_OP, "]"),
            # At the start of a primary expression, array, or expression
            # statement, expect a regexp.
            (context.JS_CTX_REGEX, "("),
            (context.JS_CTX_REGEX, "["),
            (context.JS_CTX_REGEX, "{"),
            # Assignment operators precede regexps as do all exclusively
            # prefix and binary operators.
            (context.JS_CTX_REGEX, "="),
            (context.JS_CTX_REGEX, "+="),
            (context.JS_CTX_REGEX, "*="),
            (context.JS_CTX_REGEX, "*"),
            (context.JS_CTX_REGEX, "!"),
            # Whether the + or - is infix or prefix, it cannot precede a
            # div op.
            (context.JS_CTX_REGEX, "+"),
            (context.JS_CTX_REGEX, "-"),
            # An incr/decr op precedes a div operator.
            # This is not airtight. In (g = ++/h/i) a regexp follows a
            # pre-increment operator, but in practice devs do not try to
            # increment or decrement regular expressions.
            # (g++/h/i) where ++ is a postfix operator on g is much more
            # common.
            (context.JS_CTX_DIV_OP, "--"),
            (context.JS_CTX_DIV_OP, "++"),
            (context.JS_CTX_DIV_OP, "x--"),
            # When we have many dashes or pluses, then they are grouped
            # left to right.
            (context.JS_CTX_REGEX, "x---"), # A postfix -- then a -.
            # return followed by a slash returns the regexp literal or the
            # slash starts a regexp literal in an expression statement that
            # is dead code.
            (context.JS_CTX_REGEX, "return"),
            (context.JS_CTX_REGEX, "return "),
            (context.JS_CTX_REGEX, "return\t"),
            (context.JS_CTX_REGEX, "return\n"),
            (context.JS_CTX_REGEX, u"return\u2028"),
            # Identifiers can be divided and cannot validly be preceded by
            # a regular expressions. Semicolon insertion cannot happen
            # between an identifier and a regular expression on a new line
            # because the one token lookahead for semicolon insertion has
            # to conclude that it could be a div binary op and treat it as
            # such.
            (context.JS_CTX_DIV_OP, "x"),
            (context.JS_CTX_DIV_OP, "x "),
            (context.JS_CTX_DIV_OP, "x\t"),
            (context.JS_CTX_DIV_OP, "x\n"),
            (context.JS_CTX_DIV_OP, u"x\u2028"),
            (context.JS_CTX_DIV_OP, "preturn"),
            # Numbers precede div ops.
            (context.JS_CTX_DIV_OP, "0"),
            # Dots that are part of a number are div preceders.
            (context.JS_CTX_DIV_OP, "0."),
            )

        for want_ctx, js_code in tests:
            for start in (context.JS_CTX_REGEX, context.JS_CTX_DIV_OP,
                          context.JS_CTX_DIV_OP | context.STATE_JS):
                got = js.next_js_ctx(js_code, start)
                want = want_ctx | context.state_of(start)
                self.assertEquals(
                    want, got,
                    "%s: want %s got %s" % (
                        js_code,
                        debug.context_to_string(want),
                        debug.context_to_string(got)))

        self.assertEquals(
            context.STATE_JS | context.JS_CTX_REGEX,
            js.next_js_ctx("   ", context.STATE_JS | context.JS_CTX_REGEX),
            "Blank tokens")
        self.assertEquals(
            context.STATE_JS | context.JS_CTX_DIV_OP,
            js.next_js_ctx("   ", context.STATE_JS | context.JS_CTX_DIV_OP),
            "Blank tokens")
Example #16
0
class _Analyzer(trace_analysis.Analyzer):
    """
    Applies the context_update algorithm to text nodes, builds
    side-tables of pipelines that need to be updated, and clones
    templates that are used in non-start contexts.
    """
    def __init__(self, name_to_body, start_state, templates=None):
        trace_analysis.Analyzer.__init__(self)
        # Maps template names to bodies.
        self.name_to_body = name_to_body
        # Maps (name, start_context) -> (body, end_context)
        self.start_state = start_state
        # Maps (template_name, start_context) pairs to end contexts
        self.templates = dict(templates or {})
        # Tracks the set of templates and the contexts in which they are
        # called.  A set (name, start_context)
        self.called = set()
        # Maps interpolation nodes to pipelines and escaping modes
        self.interps = {}
        # Maps text nodes to replacement text.
        self.text_values = {}
        # Maps external calls (step_values) to the contexts
        # in which they occur.
        # This assumes that cloned() step_values are distinct
        # from the original.
        self.calls = {}
        # Messages that explain failure to escape.
        self.errors = []

    def error(self, debug_hint, msg):
        """Queues a message explaining a problem noticed during escaping."""
        if debug_hint:
            msg = '%s: %s' % (debug_hint, msg)
        self.errors.append(msg)

    def step(self, start_state, step_value, debug_hint=None):
        if context.is_error_context(start_state):
            # Simplifies error checking below.
            return start_state
        if hasattr(step_value, 'to_raw_content'):
            # Handle text nodes specified by the template author.
            raw_content = step_value.to_raw_content()
            if raw_content is not None:
                try:
                    end_state, new_content, error_ctx, error_text = (
                        context_update.process_raw_text(
                            raw_content, start_state))
                    if context.is_error_context(end_state):
                        self.error(
                            debug_hint, 'bad content in %s: `%s`' %
                            (debug.context_to_string(error_ctx), error_text))
                    elif new_content != raw_content:
                        self.text_values[step_value] = new_content
                except context_update.ContextUpdateFailure, err:
                    self.error(debug_hint, str(err))
                    end_state = context.STATE_ERROR
                return end_state
        if hasattr(step_value, 'to_pipeline'):
            # Handle interpolation of untrusted values.
            pipeline = step_value.to_pipeline()
            if pipeline is not None:
                end_state, esc_modes, problem = (
                    escaping.esc_mode_for_hole(start_state))
                self.interps[step_value] = pipeline, esc_modes
                if context.is_error_context(end_state):
                    if problem is None:
                        self.error(
                            debug_hint, 'hole cannot appear in %s' %
                            (debug.context_to_string(start_state)))
                    else:
                        self.error(debug_hint, problem)
                return end_state
        if hasattr(step_value, 'to_callee'):
            # Handle calls to other templates by recursively typing the end
            # context of that template.
            callee = step_value.to_callee()
            if callee is not None:
                end_ctx = self.external_call(callee, start_state, debug_hint)
                self.calls[step_value] = start_state
                # rely on external_call to explain failure.
                return end_ctx
        return start_state
Example #17
0
    def test_escape_text(self):
        """
        Tests the content propagation algorithm.
        """
        tests = (
            (
                "",
                0,
            ),
            (
                'Hello, World!',
                0,
            ),
            (
                # An orphaned "<" is OK.
                'I <3 Ponies!',
                0,
                'I &lt;3 Ponies!',
            ),
            (
                '<a',
                context.STATE_TAG_NAME,
            ),
            (
                '<a ',
                context.STATE_TAG,
            ),
            (
                '<a>',
                context.STATE_TEXT,
            ),
            (
                '<a href',
                context.STATE_ATTR_NAME | context.ATTR_URL,
            ),
            (
                '<a on',
                context.STATE_ATTR_NAME | context.ATTR_SCRIPT,
            ),
            (
                '<a href ',
                context.STATE_AFTER_NAME | context.ATTR_URL,
            ),
            (
                '<a style  =  ',
                context.STATE_BEFORE_VALUE | context.ATTR_STYLE,
            ),
            (
                '<a href=',
                context.STATE_BEFORE_VALUE | context.ATTR_URL,
            ),
            (
                '<a href=x',
                context.STATE_URL | context.DELIM_SPACE_OR_TAG_END
                | context.URL_PART_PRE_QUERY,
                '<a href="x',
            ),
            (
                '<a href=x ',
                context.STATE_TAG,
                '<a href="x" ',
            ),
            (
                '<a href=>',
                context.STATE_TEXT,
                '<a href="">',
            ),
            (
                '<a href=x>',
                context.STATE_TEXT,
                '<a href="x">',
            ),
            (
                "<a href ='",
                context.STATE_URL | context.DELIM_SINGLE_QUOTE,
            ),
            (
                "<a href=''",
                context.STATE_TAG,
            ),
            (
                '<a href= "',
                context.STATE_URL | context.DELIM_DOUBLE_QUOTE,
            ),
            (
                '<a href=""',
                context.STATE_TAG,
            ),
            (
                '<a title="',
                context.STATE_ATTR | context.DELIM_DOUBLE_QUOTE,
            ),
            (
                "<a HREF='http:",
                context.STATE_URL | context.DELIM_SINGLE_QUOTE
                | context.URL_PART_PRE_QUERY,
            ),
            (
                "<a Href='/",
                context.STATE_URL | context.DELIM_SINGLE_QUOTE
                | context.URL_PART_PRE_QUERY,
            ),
            (
                "<a href='\"",
                context.STATE_URL | context.DELIM_SINGLE_QUOTE
                | context.URL_PART_PRE_QUERY,
            ),
            (
                '<a href="\'',
                context.STATE_URL | context.DELIM_DOUBLE_QUOTE
                | context.URL_PART_PRE_QUERY,
            ),
            (
                "<a href='&apos;",
                context.STATE_URL | context.DELIM_SINGLE_QUOTE
                | context.URL_PART_PRE_QUERY,
                "<a href='&#39;",
            ),
            (
                '<a href="&quot;',
                context.STATE_URL | context.DELIM_DOUBLE_QUOTE
                | context.URL_PART_PRE_QUERY,
                '<a href="&#34;',
            ),
            (
                '<a href="&#34;',
                context.STATE_URL | context.DELIM_DOUBLE_QUOTE
                | context.URL_PART_PRE_QUERY,
            ),
            (
                '<a href=&quot;',
                context.STATE_URL | context.DELIM_SPACE_OR_TAG_END
                | context.URL_PART_PRE_QUERY,
                '<a href="&#34;',
            ),
            (
                '<a href="/search?q=',
                context.STATE_URL | context.DELIM_DOUBLE_QUOTE
                | context.URL_PART_QUERY_OR_FRAG,
            ),
            (
                '<img alt="1">',
                context.STATE_TEXT,
            ),
            (
                '<img alt="1>"',
                context.STATE_TAG,
                '<img alt="1&gt;"',
            ),
            (
                '<img alt="1>">',
                context.STATE_TEXT,
                '<img alt="1&gt;">',
            ),
            (
                '<input checked type="checkbox"',
                context.STATE_TAG,
            ),
            (
                '<a onclick="',
                context.STATE_JS | context.DELIM_DOUBLE_QUOTE,
            ),
            (
                '<a onclick="//foo',
                context.STATE_JSLINE_CMT | context.DELIM_DOUBLE_QUOTE,
                '<a onclick="',
            ),
            (
                "<a onclick='//\n",
                context.STATE_JS | context.DELIM_SINGLE_QUOTE,
                "<a onclick='\n",
            ),
            (
                "<a onclick='//\r\n",
                context.STATE_JS | context.DELIM_SINGLE_QUOTE,
                "<a onclick='\n\n",  # \n\n is ok, \n is ok, \r\n is ok
            ),
            (
                u"<a onclick='//\u2028",
                context.STATE_JS | context.DELIM_SINGLE_QUOTE,
                "<a onclick='\n",
            ),
            (
                '<a onclick="/*',
                context.STATE_JSBLOCK_CMT | context.DELIM_DOUBLE_QUOTE,
                '<a onclick=" ',
            ),
            (
                '<a onclick="/*/',
                context.STATE_JSBLOCK_CMT | context.DELIM_DOUBLE_QUOTE,
                '<a onclick=" ',
            ),
            (
                '<a onclick="/**/',
                context.STATE_JS | context.DELIM_DOUBLE_QUOTE,
                '<a onclick=" ',
            ),
            (
                '<a onkeypress="&quot;',
                context.STATE_JSDQ_STR | context.DELIM_DOUBLE_QUOTE,
                '<a onkeypress="&#34;',
            ),
            (
                "<a onclick='&quot;foo&quot;",
                context.STATE_JS | context.DELIM_SINGLE_QUOTE
                | context.JS_CTX_DIV_OP,
                "<a onclick='\"foo\"",
            ),
            (
                '<a onclick=&#39;foo&#39;',
                context.STATE_JS | context.DELIM_SPACE_OR_TAG_END
                | context.JS_CTX_DIV_OP,
                '<a onclick="\'foo\'',
            ),
            (
                '<a onclick=&#39;foo',
                context.STATE_JSSQ_STR | context.DELIM_SPACE_OR_TAG_END,
                '<a onclick="\'foo',
            ),
            (
                '<a onclick="&quot;foo\'',
                context.STATE_JSDQ_STR | context.DELIM_DOUBLE_QUOTE,
                '<a onclick="&#34;foo\'',
            ),
            (
                '<a onclick="\'foo&quot;',
                context.STATE_JSSQ_STR | context.DELIM_DOUBLE_QUOTE,
                '<a onclick="\'foo&#34;',
            ),
            (
                '<A ONCLICK="\'',
                context.STATE_JSSQ_STR | context.DELIM_DOUBLE_QUOTE,
            ),
            (
                '<a onclick="/',
                context.STATE_JSREGEXP | context.DELIM_DOUBLE_QUOTE,
            ),
            (
                '<a onclick="\'foo\'',
                context.STATE_JS | context.DELIM_DOUBLE_QUOTE
                | context.JS_CTX_DIV_OP,
            ),
            (
                '<a onclick="\'foo\\\'',
                context.STATE_JSSQ_STR | context.DELIM_DOUBLE_QUOTE,
            ),
            (
                '<a onclick="\'foo\\\'',
                context.STATE_JSSQ_STR | context.DELIM_DOUBLE_QUOTE,
            ),
            (
                '<a onclick="/foo/',
                context.STATE_JS | context.DELIM_DOUBLE_QUOTE
                | context.JS_CTX_DIV_OP,
            ),
            (
                '<script>/foo/ /=',
                context.STATE_JS | context.ELEMENT_SCRIPT,
            ),
            (
                '<a onclick="1 /foo',
                context.STATE_JS | context.DELIM_DOUBLE_QUOTE
                | context.JS_CTX_DIV_OP,
            ),
            (
                '<a onclick="1 /*c*/ /foo',
                context.STATE_JS | context.DELIM_DOUBLE_QUOTE
                | context.JS_CTX_DIV_OP,
                '<a onclick="1   /foo',
            ),
            (
                '<a onclick="/foo[/]',
                context.STATE_JSREGEXP | context.DELIM_DOUBLE_QUOTE,
            ),
            (
                '<a onclick="/foo\\/',
                context.STATE_JSREGEXP | context.DELIM_DOUBLE_QUOTE,
            ),
            (
                '<a onclick="/foo/',
                context.STATE_JS | context.DELIM_DOUBLE_QUOTE
                | context.JS_CTX_DIV_OP,
            ),
            (
                '<input checked style="',
                context.STATE_CSS | context.DELIM_DOUBLE_QUOTE,
            ),
            (
                '<a style="//',
                context.STATE_CSSLINE_CMT | context.DELIM_DOUBLE_QUOTE,
                '<a style="',
            ),
            (
                '<a style="//</script>',
                context.STATE_CSSLINE_CMT | context.DELIM_DOUBLE_QUOTE,
                '<a style="',
            ),
            (
                "<a style='//\n",
                context.STATE_CSS | context.DELIM_SINGLE_QUOTE,
                "<a style='\n",
            ),
            (
                "<a style='//\r",
                context.STATE_CSS | context.DELIM_SINGLE_QUOTE,
                "<a style='\n",
            ),
            (
                '<a style="/*',
                context.STATE_CSSBLOCK_CMT | context.DELIM_DOUBLE_QUOTE,
                '<a style=" ',
            ),
            (
                '<a style="/*/',
                context.STATE_CSSBLOCK_CMT | context.DELIM_DOUBLE_QUOTE,
                '<a style=" ',
            ),
            (
                '<a style="/**/',
                context.STATE_CSS | context.DELIM_DOUBLE_QUOTE,
                '<a style=" ',
            ),
            (
                '<a style="background: \'',
                context.STATE_CSSSQ_STR | context.DELIM_DOUBLE_QUOTE,
            ),
            (
                '<a style="background: &quot;',
                context.STATE_CSSDQ_STR | context.DELIM_DOUBLE_QUOTE,
                '<a style="background: &#34;',
            ),
            (
                '<a style="background: \'/foo?img=',
                context.STATE_CSSSQ_STR | context.DELIM_DOUBLE_QUOTE
                | context.URL_PART_QUERY_OR_FRAG,
            ),
            (
                '<a style="background: \'/',
                context.STATE_CSSSQ_STR | context.DELIM_DOUBLE_QUOTE
                | context.URL_PART_PRE_QUERY,
            ),
            (
                '<a style="background: url(&#x22;/',
                context.STATE_CSSDQ_URL | context.DELIM_DOUBLE_QUOTE
                | context.URL_PART_PRE_QUERY,
                '<a style="background: url(&#34;/',
            ),
            (
                '<a style="background: url(\'/',
                context.STATE_CSSSQ_URL | context.DELIM_DOUBLE_QUOTE
                | context.URL_PART_PRE_QUERY,
            ),
            (
                '<a style="background: url(\'/)',
                context.STATE_CSSSQ_URL | context.DELIM_DOUBLE_QUOTE
                | context.URL_PART_PRE_QUERY,
            ),
            (
                '<a style="background: url(\'/ ',
                context.STATE_CSSSQ_URL | context.DELIM_DOUBLE_QUOTE
                | context.URL_PART_PRE_QUERY,
            ),
            (
                '<a style="background: url(/',
                context.STATE_CSS_URL | context.DELIM_DOUBLE_QUOTE
                | context.URL_PART_PRE_QUERY,
            ),
            (
                '<a style="background: url( ',
                context.STATE_CSS_URL | context.DELIM_DOUBLE_QUOTE,
            ),
            (
                '<a style="background: url( /image?name=',
                context.STATE_CSS_URL | context.DELIM_DOUBLE_QUOTE
                | context.URL_PART_QUERY_OR_FRAG,
            ),
            (
                '<a style="background: url(x)',
                context.STATE_CSS | context.DELIM_DOUBLE_QUOTE,
            ),
            (
                '<a style="background: url(\'x\'',
                context.STATE_CSS | context.DELIM_DOUBLE_QUOTE,
            ),
            (
                '<a style="background: url( x ',
                context.STATE_CSS | context.DELIM_DOUBLE_QUOTE,
            ),
            (
                '<!-- foo',
                context.STATE_HTMLCMT,
                '',
            ),
            (
                '<!-->',
                context.STATE_HTMLCMT,
                '',
            ),
            (
                '<!--->',
                context.STATE_HTMLCMT,
                '',
            ),
            (
                '<!-- foo -->',
                context.STATE_TEXT,
                '',
            ),
            (
                '<script',
                context.STATE_TAG | context.ELEMENT_SCRIPT,
            ),
            (
                '<script ',
                context.STATE_TAG | context.ELEMENT_SCRIPT,
            ),
            (
                '<script src="foo.js" ',
                context.STATE_TAG | context.ELEMENT_SCRIPT,
            ),
            (
                "<script src='foo.js' ",
                context.STATE_TAG | context.ELEMENT_SCRIPT,
            ),
            (
                '<script type=text/javascript ',
                context.STATE_TAG | context.ELEMENT_SCRIPT,
                '<script type="text/javascript" ',
            ),
            (
                '<script>foo',
                context.STATE_JS | context.JS_CTX_DIV_OP
                | context.ELEMENT_SCRIPT,
            ),
            (
                '<script>foo</script>',
                context.STATE_TEXT,
            ),
            (
                '<script>foo</script><!--',
                context.STATE_HTMLCMT,
                '<script>foo</script>',
            ),
            (
                '<script>document.write("<p>foo</p>");',
                context.STATE_JS | context.ELEMENT_SCRIPT,
            ),
            (
                r'<script>document.write("<p>foo<\/script>");',
                context.STATE_JS | context.ELEMENT_SCRIPT,
            ),
            (
                '<script>document.write("<script>alert(1)</script>");',
                context.STATE_TEXT,
            ),
            (
                '<Script>',
                context.STATE_JS | context.ELEMENT_SCRIPT,
            ),
            (
                '<SCRIPT>foo',
                context.STATE_JS | context.JS_CTX_DIV_OP
                | context.ELEMENT_SCRIPT,
            ),
            (
                '<textarea>value',
                context.STATE_RCDATA | context.ELEMENT_TEXTAREA,
            ),
            (
                '<textarea>value</textarea>',
                context.STATE_TEXT,
            ),
            (
                '<textarea>value</TEXTAREA>',
                context.STATE_TEXT,
            ),
            (
                '<textarea name=html><b',
                context.STATE_RCDATA | context.ELEMENT_TEXTAREA,
                '<textarea name="html">&lt;b',
            ),
            (
                '<title>value',
                context.STATE_RCDATA | context.ELEMENT_TITLE,
            ),
            (
                '<style>value',
                context.STATE_CSS | context.ELEMENT_STYLE,
            ),
            (
                '<a xlink:href',
                context.STATE_ATTR_NAME | context.ATTR_URL,
            ),
            (
                '<a xmlns',
                context.STATE_ATTR_NAME | context.ATTR_URL,
            ),
            (
                '<a xmlns:foo',
                context.STATE_ATTR_NAME | context.ATTR_URL,
            ),
            (
                '<a xmlnsxyz',
                context.STATE_ATTR_NAME,
            ),
            (
                '<a data-url',
                context.STATE_ATTR_NAME | context.ATTR_URL,
            ),
            (
                '<a data-iconUri',
                context.STATE_ATTR_NAME | context.ATTR_URL,
            ),
            (
                '<a data-urlItem',
                context.STATE_ATTR_NAME | context.ATTR_URL,
            ),
            (
                '<a g:',
                context.STATE_ATTR_NAME,
            ),
            (
                '<a g:url',
                context.STATE_ATTR_NAME | context.ATTR_URL,
            ),
            (
                '<a g:iconUri',
                context.STATE_ATTR_NAME | context.ATTR_URL,
            ),
            (
                '<a g:urlItem',
                context.STATE_ATTR_NAME | context.ATTR_URL,
            ),
            (
                '<a g:value',
                context.STATE_ATTR_NAME,
            ),
            (
                "<a svg:style='",
                context.STATE_CSS | context.DELIM_SINGLE_QUOTE,
            ),
            (
                '<svg:font-face',
                context.STATE_TAG_NAME,
            ),
            (
                '<svg:a svg:onclick="',
                context.STATE_JS | context.DELIM_DOUBLE_QUOTE,
            ))

        for test_case in tests:
            if len(test_case) == 2:
                test_input, want_ctx = test_case
                want_text = test_input
            else:
                test_input, want_ctx, want_text = test_case
            got_ctx, got_text, _, _ = context_update.process_raw_text(
                test_input, 0)
            if got_ctx != want_ctx:
                self.fail("input %r: want context\n\t%s\ngot\n\t%s" %
                          (test_input, debug.context_to_string(want_ctx),
                           debug.context_to_string(got_ctx)))
            self.assertEquals(got_text,
                              want_text,
                              msg=("input %r: want text\n\t%r\ngot\n\t%r" %
                                   (test_input, want_text, got_text)))
    def test_escape_text(self):
        """
        Tests the content propagation algorithm.
        """
        tests = (
            (
                "",
                0,
            ),
            (
                'Hello, World!',
                0,
                ),
            (
                # An orphaned "<" is OK.
                'I <3 Ponies!',
                0,
                'I &lt;3 Ponies!',
                ),
            (
                '<a',
                context.STATE_TAG_NAME,
                ),
            (
                '<a ',
                context.STATE_TAG,
                ),
            (
                '<a>',
                context.STATE_TEXT,
                ),
            (
                '<a href',
                context.STATE_ATTR_NAME | context.ATTR_URL,
                ),
            (
                '<a on',
                context.STATE_ATTR_NAME | context.ATTR_SCRIPT,
                ),
            (
                '<a href ',
                context.STATE_AFTER_NAME | context.ATTR_URL,
                ),
            (
                '<a style  =  ',
                context.STATE_BEFORE_VALUE | context.ATTR_STYLE,
                ),
            (
                '<a href=',
                context.STATE_BEFORE_VALUE | context.ATTR_URL,
                ),
            (
                '<a href=x',
                context.STATE_URL | context.DELIM_SPACE_OR_TAG_END
                | context.URL_PART_PRE_QUERY,
                '<a href="x',
                ),
            (
                '<a href=x ',
                context.STATE_TAG,
                '<a href="x" ',
                ),
            (
                '<a href=>',
                context.STATE_TEXT,
                '<a href="">',
                ),
            (
                '<a href=x>',
                context.STATE_TEXT,
                '<a href="x">',
                ),
            (
                "<a href ='",
                context.STATE_URL | context.DELIM_SINGLE_QUOTE,
                ),
            (
                "<a href=''",
                context.STATE_TAG,
                ),
            (
                '<a href= "',
                context.STATE_URL | context.DELIM_DOUBLE_QUOTE,
                ),
            (
                '<a href=""',
                context.STATE_TAG,
                ),
            (
                '<a title="',
                context.STATE_ATTR | context.DELIM_DOUBLE_QUOTE,
                ),
            (
                "<a HREF='http:",
                context.STATE_URL | context.DELIM_SINGLE_QUOTE
                | context.URL_PART_PRE_QUERY,
                ),
            (
                "<a Href='/",
                context.STATE_URL | context.DELIM_SINGLE_QUOTE
                | context.URL_PART_PRE_QUERY,
                ),
            (
                "<a href='\"",
                context.STATE_URL | context.DELIM_SINGLE_QUOTE
                | context.URL_PART_PRE_QUERY,
                ),
            (
                '<a href="\'',
                context.STATE_URL | context.DELIM_DOUBLE_QUOTE
                | context.URL_PART_PRE_QUERY,
                ),
            (
                "<a href='&apos;",
                context.STATE_URL | context.DELIM_SINGLE_QUOTE
                | context.URL_PART_PRE_QUERY,
                "<a href='&#39;",
                ),
            (
                '<a href="&quot;',
                context.STATE_URL | context.DELIM_DOUBLE_QUOTE
                | context.URL_PART_PRE_QUERY,
                '<a href="&#34;',
                ),
            (
                '<a href="&#34;',
                context.STATE_URL | context.DELIM_DOUBLE_QUOTE
                | context.URL_PART_PRE_QUERY,
                ),
            (
                '<a href=&quot;',
                context.STATE_URL | context.DELIM_SPACE_OR_TAG_END
                | context.URL_PART_PRE_QUERY,
                '<a href="&#34;',
                ),
            (
                '<a href="/search?q=',
                context.STATE_URL | context.DELIM_DOUBLE_QUOTE
                | context.URL_PART_QUERY_OR_FRAG,
                ),
            (
                '<img alt="1">',
                context.STATE_TEXT,
                ),
            (
                '<img alt="1>"',
                context.STATE_TAG,
                '<img alt="1&gt;"',
                ),
            (
                '<img alt="1>">',
                context.STATE_TEXT,
                '<img alt="1&gt;">',
                ),
            (
                '<input checked type="checkbox"',
                context.STATE_TAG,
                ),
            (
                '<a onclick="',
                context.STATE_JS | context.DELIM_DOUBLE_QUOTE,
                ),
            (
                '<a onclick="//foo',
                context.STATE_JSLINE_CMT | context.DELIM_DOUBLE_QUOTE,
                '<a onclick="',
                ),
            (
                "<a onclick='//\n",
                context.STATE_JS | context.DELIM_SINGLE_QUOTE,
                "<a onclick='\n",
                ),
            (
                "<a onclick='//\r\n",
                context.STATE_JS | context.DELIM_SINGLE_QUOTE,
                "<a onclick='\n\n",  # \n\n is ok, \n is ok, \r\n is ok
                ),
            (
                u"<a onclick='//\u2028",
                context.STATE_JS | context.DELIM_SINGLE_QUOTE,
                "<a onclick='\n",
                ),
            (
                '<a onclick="/*',
                context.STATE_JSBLOCK_CMT | context.DELIM_DOUBLE_QUOTE,
                '<a onclick=" ',
                ),
            (
                '<a onclick="/*/',
                context.STATE_JSBLOCK_CMT | context.DELIM_DOUBLE_QUOTE,
                '<a onclick=" ',
                ),
            (
                '<a onclick="/**/',
                context.STATE_JS | context.DELIM_DOUBLE_QUOTE,
                '<a onclick=" ',
                ),
            (
                '<a onkeypress="&quot;',
                context.STATE_JSDQ_STR | context.DELIM_DOUBLE_QUOTE,
                '<a onkeypress="&#34;',
                ),
            (
                "<a onclick='&quot;foo&quot;",
                context.STATE_JS | context.DELIM_SINGLE_QUOTE
                | context.JS_CTX_DIV_OP,
                "<a onclick='\"foo\"",
                ),
            (
                '<a onclick=&#39;foo&#39;',
                context.STATE_JS | context.DELIM_SPACE_OR_TAG_END
                | context.JS_CTX_DIV_OP,
                '<a onclick="\'foo\'',
                ),
            (
                '<a onclick=&#39;foo',
                context.STATE_JSSQ_STR | context.DELIM_SPACE_OR_TAG_END,
                '<a onclick="\'foo',
                ),
            (
                '<a onclick="&quot;foo\'',
                context.STATE_JSDQ_STR | context.DELIM_DOUBLE_QUOTE,
                '<a onclick="&#34;foo\'',
                ),
            (
                '<a onclick="\'foo&quot;',
                context.STATE_JSSQ_STR | context.DELIM_DOUBLE_QUOTE,
                '<a onclick="\'foo&#34;',
                ),
            (
                '<A ONCLICK="\'',
                context.STATE_JSSQ_STR | context.DELIM_DOUBLE_QUOTE,
                ),
            (
                '<a onclick="/',
                context.STATE_JSREGEXP | context.DELIM_DOUBLE_QUOTE,
                ),
            (
                '<a onclick="\'foo\'',
                context.STATE_JS | context.DELIM_DOUBLE_QUOTE
                | context.JS_CTX_DIV_OP,
                ),
            (
                '<a onclick="\'foo\\\'',
                context.STATE_JSSQ_STR | context.DELIM_DOUBLE_QUOTE,
                ),
            (
                '<a onclick="\'foo\\\'',
                context.STATE_JSSQ_STR | context.DELIM_DOUBLE_QUOTE,
                ),
            (
                '<a onclick="/foo/',
                context.STATE_JS | context.DELIM_DOUBLE_QUOTE
                | context.JS_CTX_DIV_OP,
                ),
            (
                '<script>/foo/ /=',
                context.STATE_JS | context.ELEMENT_SCRIPT,
                ),
            (
                '<a onclick="1 /foo',
                context.STATE_JS | context.DELIM_DOUBLE_QUOTE
                | context.JS_CTX_DIV_OP,
                ),
            (
                '<a onclick="1 /*c*/ /foo',
                context.STATE_JS | context.DELIM_DOUBLE_QUOTE
                | context.JS_CTX_DIV_OP,
                '<a onclick="1   /foo',
                ),
            (
                '<a onclick="/foo[/]',
                context.STATE_JSREGEXP | context.DELIM_DOUBLE_QUOTE,
                ),
            (
                '<a onclick="/foo\\/',
                context.STATE_JSREGEXP | context.DELIM_DOUBLE_QUOTE,
                ),
            (
                '<a onclick="/foo/',
                context.STATE_JS | context.DELIM_DOUBLE_QUOTE
                | context.JS_CTX_DIV_OP,
                ),
            (
                '<input checked style="',
                context.STATE_CSS | context.DELIM_DOUBLE_QUOTE,
                ),
            (
                '<a style="//',
                context.STATE_CSSLINE_CMT | context.DELIM_DOUBLE_QUOTE,
                '<a style="',
                ),
            (
                '<a style="//</script>',
                context.STATE_CSSLINE_CMT | context.DELIM_DOUBLE_QUOTE,
                '<a style="',
                ),
            (
                "<a style='//\n",
                context.STATE_CSS | context.DELIM_SINGLE_QUOTE,
                "<a style='\n",
                ),
            (
                "<a style='//\r",
                context.STATE_CSS | context.DELIM_SINGLE_QUOTE,
                "<a style='\n",
                ),
            (
                '<a style="/*',
                context.STATE_CSSBLOCK_CMT | context.DELIM_DOUBLE_QUOTE,
                '<a style=" ',
                ),
            (
                '<a style="/*/',
                context.STATE_CSSBLOCK_CMT | context.DELIM_DOUBLE_QUOTE,
                '<a style=" ',
                ),
            (
                '<a style="/**/',
                context.STATE_CSS | context.DELIM_DOUBLE_QUOTE,
                '<a style=" ',
                ),
            (
                '<a style="background: \'',
                context.STATE_CSSSQ_STR | context.DELIM_DOUBLE_QUOTE,
                ),
            (
                '<a style="background: &quot;',
                context.STATE_CSSDQ_STR | context.DELIM_DOUBLE_QUOTE,
                '<a style="background: &#34;',
                ),
            (
                '<a style="background: \'/foo?img=',
                context.STATE_CSSSQ_STR | context.DELIM_DOUBLE_QUOTE
                | context.URL_PART_QUERY_OR_FRAG,
                ),
            (
                '<a style="background: \'/',
                context.STATE_CSSSQ_STR | context.DELIM_DOUBLE_QUOTE
                | context.URL_PART_PRE_QUERY,
                ),
            (
                '<a style="background: url(&#x22;/',
                context.STATE_CSSDQ_URL | context.DELIM_DOUBLE_QUOTE
                | context.URL_PART_PRE_QUERY,
                '<a style="background: url(&#34;/',
                ),
            (
                '<a style="background: url(\'/',
                context.STATE_CSSSQ_URL | context.DELIM_DOUBLE_QUOTE
                | context.URL_PART_PRE_QUERY,
                ),
            (
                '<a style="background: url(\'/)',
                context.STATE_CSSSQ_URL | context.DELIM_DOUBLE_QUOTE
                | context.URL_PART_PRE_QUERY,
                ),
            (
                '<a style="background: url(\'/ ',
                context.STATE_CSSSQ_URL | context.DELIM_DOUBLE_QUOTE
                | context.URL_PART_PRE_QUERY,
                ),
            (
                '<a style="background: url(/',
                context.STATE_CSS_URL | context.DELIM_DOUBLE_QUOTE
                | context.URL_PART_PRE_QUERY,
                ),
            (
                '<a style="background: url( ',
                context.STATE_CSS_URL | context.DELIM_DOUBLE_QUOTE,
                ),
            (
                '<a style="background: url( /image?name=',
                context.STATE_CSS_URL | context.DELIM_DOUBLE_QUOTE
                | context.URL_PART_QUERY_OR_FRAG,
                ),
            (
                '<a style="background: url(x)',
                context.STATE_CSS | context.DELIM_DOUBLE_QUOTE,
                ),
            (
                '<a style="background: url(\'x\'',
                context.STATE_CSS | context.DELIM_DOUBLE_QUOTE,
                ),
            (
                '<a style="background: url( x ',
                context.STATE_CSS | context.DELIM_DOUBLE_QUOTE,
                ),
            (
                '<!-- foo',
                context.STATE_HTMLCMT,
                '',
                ),
            (
                '<!-->',
                context.STATE_HTMLCMT,
                '',
                ),
            (
                '<!--->',
                context.STATE_HTMLCMT,
                '',
                ),
            (
                '<!-- foo -->',
                context.STATE_TEXT,
                '',
                ),
            (
                '<script',
                context.STATE_TAG | context.ELEMENT_SCRIPT,
                ),
            (
                '<script ',
                context.STATE_TAG | context.ELEMENT_SCRIPT,
                ),
            (
                '<script src="foo.js" ',
                context.STATE_TAG | context.ELEMENT_SCRIPT,
                ),
            (
                "<script src='foo.js' ",
                context.STATE_TAG | context.ELEMENT_SCRIPT,
                ),
            (
                '<script type=text/javascript ',
                context.STATE_TAG | context.ELEMENT_SCRIPT,
                '<script type="text/javascript" ',
                ),
            (
                '<script>foo',
                context.STATE_JS | context.JS_CTX_DIV_OP
                | context.ELEMENT_SCRIPT,
                ),
            (
                '<script>foo</script>',
                context.STATE_TEXT,
                ),
            (
                '<script>foo</script><!--',
                context.STATE_HTMLCMT,
                '<script>foo</script>',
                ),
            (
                '<script>document.write("<p>foo</p>");',
                context.STATE_JS | context.ELEMENT_SCRIPT,
                ),
            (
                r'<script>document.write("<p>foo<\/script>");',
                context.STATE_JS | context.ELEMENT_SCRIPT,
                ),
            (
                '<script>document.write("<script>alert(1)</script>");',
                context.STATE_TEXT,
                ),
            (
                '<Script>',
                context.STATE_JS | context.ELEMENT_SCRIPT,
                ),
            (
                '<SCRIPT>foo',
                context.STATE_JS | context.JS_CTX_DIV_OP
                | context.ELEMENT_SCRIPT,
                ),
            (
                '<textarea>value',
                context.STATE_RCDATA | context.ELEMENT_TEXTAREA,
                ),
            (
                '<textarea>value</textarea>',
                context.STATE_TEXT,
                ),
            (
                '<textarea>value</TEXTAREA>',
                context.STATE_TEXT,
                ),
            (
                '<textarea name=html><b',
                context.STATE_RCDATA | context.ELEMENT_TEXTAREA,
                '<textarea name="html">&lt;b',
                ),
            (
                '<title>value',
                context.STATE_RCDATA | context.ELEMENT_TITLE,
                ),
            (
                '<style>value',
                context.STATE_CSS | context.ELEMENT_STYLE,
                ),
            (
                '<a xlink:href',
                context.STATE_ATTR_NAME | context.ATTR_URL,
                ),
            (
                '<a xmlns',
                context.STATE_ATTR_NAME | context.ATTR_URL,
                ),
            (
                '<a xmlns:foo',
                context.STATE_ATTR_NAME | context.ATTR_URL,
                ),
            (
                '<a xmlnsxyz',
                context.STATE_ATTR_NAME,
                ),
            (
                '<a data-url',
                context.STATE_ATTR_NAME | context.ATTR_URL,
                ),
            (
                '<a data-iconUri',
                context.STATE_ATTR_NAME | context.ATTR_URL,
                ),
            (
                '<a data-urlItem',
                context.STATE_ATTR_NAME | context.ATTR_URL,
                ),
            (
                '<a g:',
                context.STATE_ATTR_NAME,
                ),
            (
                '<a g:url',
                context.STATE_ATTR_NAME | context.ATTR_URL,
                ),
            (
                '<a g:iconUri',
                context.STATE_ATTR_NAME | context.ATTR_URL,
                ),
            (
                '<a g:urlItem',
                context.STATE_ATTR_NAME | context.ATTR_URL,
                ),
            (
                '<a g:value',
                context.STATE_ATTR_NAME,
                ),
            (
                "<a svg:style='",
                context.STATE_CSS | context.DELIM_SINGLE_QUOTE,
                ),
            (
                '<svg:font-face',
                context.STATE_TAG_NAME,
                ),
            (
                '<svg:a svg:onclick="',
                context.STATE_JS | context.DELIM_DOUBLE_QUOTE,
                )
            )

        for test_case in tests:
            if len(test_case) == 2:
                test_input, want_ctx = test_case
                want_text = test_input
            else:
                test_input, want_ctx, want_text = test_case
            got_ctx, got_text, _, _ = context_update.process_raw_text(
                test_input, 0)
            if got_ctx != want_ctx:
                self.fail("input %r: want context\n\t%s\ngot\n\t%s"
                          % (test_input, debug.context_to_string(want_ctx),
                             debug.context_to_string(got_ctx)))
            self.assertEquals(
                got_text, want_text,
                msg = ("input %r: want text\n\t%r\ngot\n\t%r"
                       % (test_input, want_text, got_text)))