def contextualize_name(tmpl_name, start_ctx): """ Produces a distinct name for a template in a given context so that cloned bodies can be distinguished from the original and we can rewrite calls based on the context in which they appear. This allows templates to call helper templates in multiple contexts. """ if start_ctx == self.start_state: return tmpl_name key = (tmpl_name, start_ctx) contextualized_name = contextualized_names.get(key) if contextualized_name is None: base_contextualized_name = '%s$%s' % ( tmpl_name, debug.context_to_string(start_ctx).replace(' ', ',')) contextualized_name = base_contextualized_name counter = 0 # ensure uniqueness by looking into name_to_body while contextualized_name in self.name_to_body: contextualized_name = '%s%d' % ( base_contextualized_name, counter) counter += 1 contextualized_names[key] = contextualized_name return contextualized_name
def no_steady_state(self, states, debug_hint=None): for state in states: if context.is_error_context(state): return state self.error(debug_hint, 'loop switches between states (%s)' % ( ', '.join([debug.context_to_string(state) for state in states]))) return context.STATE_ERROR
def contextualize_name(tmpl_name, start_ctx): """ Produces a distinct name for a template in a given context so that cloned bodies can be distinguished from the original and we can rewrite calls based on the context in which they appear. This allows templates to call helper templates in multiple contexts. """ if start_ctx == self.start_state: return tmpl_name key = (tmpl_name, start_ctx) contextualized_name = contextualized_names.get(key) if contextualized_name is None: base_contextualized_name = '%s$%s' % ( tmpl_name, debug.context_to_string(start_ctx).replace( ' ', ',')) contextualized_name = base_contextualized_name counter = 0 # ensure uniqueness by looking into name_to_body while contextualized_name in self.name_to_body: contextualized_name = '%s%d' % (base_contextualized_name, counter) counter += 1 contextualized_names[key] = contextualized_name return contextualized_name
def no_steady_state(self, states, debug_hint=None): for state in states: if context.is_error_context(state): return state self.error( debug_hint, 'loop switches between states (%s)' % (', '.join([debug.context_to_string(state) for state in states]))) return context.STATE_ERROR
def join(self, states, debug_hint=None): out_state = functools.reduce(context_update.context_union, states) if context.is_error_context(out_state): # Report an error only if none was reported when the states were # produced. for state in states: if context.is_error_context(state): return out_state self.error(debug_hint, 'branches end in incompatible contexts: %s' % ', '.join([debug.context_to_string(state) for state in states])) return out_state
def join(self, states, debug_hint=None): out_state = functools.reduce(context_update.context_union, states) if context.is_error_context(out_state): # Report an error only if none was reported when the states were # produced. for state in states: if context.is_error_context(state): return out_state self.error( debug_hint, 'branches end in incompatible contexts: %s' % ', '.join([debug.context_to_string(state) for state in states])) return out_state
def escape(name_to_body, public_template_names, start_state=context.STATE_TEXT): """ name_to_body - maps template names to template bodies. A template body is an object that implements 1. reduce_traces(start_state, analyzer) -> end_state 2. clone() -> a structural copy of the body that is distinct according to == and is also a template body. 3. the body node interface described below. public_template_names - the names that might be called with an empty output buffer in the given start state. start_state - the state in which the named templates might be called. A body node is an object that implements 1. children() -> a series of nodes 2. with_children(children) -> produces a structural copy of the body but with the given children instead of children(). step values must also be body nodes, and the transitively enumerated nodes of a body must include all step values encountered when following traces that do not include external calls. name_to_body may be augmented with new template definitions as a result of this call. If escape exits with an exception, then it is unsafe to use the templates in name_to_body. """ analyzer = _Analyzer(name_to_body, start_state) has_errors = False for name in public_template_names: end_state = analyzer.external_call(name, start_state, None) if context.is_error_context(end_state): has_errors = True elif end_state != start_state: # Templates should start and end in the same context. # Otherwise concatenation of the output from safe templates is not # safe. analyzer.error( None, 'template %s does not start and end in the same context: %s' % (name, debug.context_to_string(end_state))) has_errors = True if has_errors: raise EscapeError('\n'.join(analyzer.errors)) analyzer.rewrite()
def _process_next_token(text, context): """ Consume a portion of text and compute the next context. Output is stored in member variables. text - Non empty. Returns (n, context after text[:n], replacement for text[:n]) """ if is_error_context(context): # The ERROR state is infectious. return (len(text), context, text) # Find the transition whose pattern matches earliest # in the raw text. earliest_start = len(text) + 1 earliest_transition = None earliest_match = None for transition in _TRANSITIONS[state_of(context)]: match = transition.pattern.search(text) if not match: continue start = match.start(0) if (start < earliest_start and transition.is_applicable_to(context, match)): earliest_start = start earliest_transition = transition earliest_match = match if earliest_transition: num_consumed = earliest_match.end(0) next_context = earliest_transition.compute_next_context( context, earliest_match) normalized_text = earliest_transition.raw_text(earliest_match) else: num_consumed = len(text) next_context = STATE_ERROR normalized_text = text if (not num_consumed and state_of(next_context) == state_of(context)): # pragma: no cover # Infinite loop. raise Exception('inf loop. for %r in %s' % (text, debug.context_to_string(context))) return (num_consumed, next_context, normalized_text)
def _process_next_token(text, context): """ Consume a portion of text and compute the next context. Output is stored in member variables. text - Non empty. Returns (n, context after text[:n], replacement for text[:n]) """ if is_error_context(context): # The ERROR state is infectious. return (len(text), context, text) # Find the transition whose pattern matches earliest # in the raw text. earliest_start = len(text)+1 earliest_transition = None earliest_match = None for transition in _TRANSITIONS[state_of(context)]: match = transition.pattern.search(text) if not match: continue start = match.start(0) if (start < earliest_start and transition.is_applicable_to(context, match)): earliest_start = start earliest_transition = transition earliest_match = match if earliest_transition: num_consumed = earliest_match.end(0) next_context = earliest_transition.compute_next_context( context, earliest_match) normalized_text = earliest_transition.raw_text(earliest_match) else: num_consumed = len(text) next_context = STATE_ERROR normalized_text = text if (not num_consumed and state_of(next_context) == state_of(context)): # pragma: no cover # Infinite loop. raise Exception('inf loop. for %r in %s' % (text, debug.context_to_string(context))) return (num_consumed, next_context, normalized_text)
def _compute_end_context(self, name_and_ctx, body, debug_hint): """Propagate context over the body.""" tmpl_name, start_ctx = name_and_ctx ctx, problems = self._escape_template_body( name_and_ctx, start_ctx, body) if problems is not None: # Look for a fixed point by assuming c1 as the output context. ctx2, problems2 = self._escape_template_body( name_and_ctx, ctx, body) if problems2 is None: ctx, problems = ctx2, None if problems is not None: if not context.is_error_context(ctx): # We have not explained the problem yet. self.error(debug_hint, "cannot compute output context for template %s in %s" % ( tmpl_name, debug.context_to_string(start_ctx))) self.errors.extend(problems) return context.STATE_ERROR return ctx
def step(self, start_state, step_value, debug_hint=None): if context.is_error_context(start_state): # Simplifies error checking below. return start_state if hasattr(step_value, 'to_raw_content'): # Handle text nodes specified by the template author. raw_content = step_value.to_raw_content() if raw_content is not None: try: end_state, new_content, error_ctx, error_text = ( context_update.process_raw_text( raw_content, start_state)) if context.is_error_context(end_state): self.error(debug_hint, 'bad content in %s: `%s`' % ( debug.context_to_string(error_ctx), error_text)) elif new_content != raw_content: self.text_values[step_value] = new_content except context_update.ContextUpdateFailure, err: self.error(debug_hint, str(err)) end_state = context.STATE_ERROR return end_state
def _compute_end_context(self, name_and_ctx, body, debug_hint): """Propagate context over the body.""" tmpl_name, start_ctx = name_and_ctx ctx, problems = self._escape_template_body(name_and_ctx, start_ctx, body) if problems is not None: # Look for a fixed point by assuming c1 as the output context. ctx2, problems2 = self._escape_template_body( name_and_ctx, ctx, body) if problems2 is None: ctx, problems = ctx2, None if problems is not None: if not context.is_error_context(ctx): # We have not explained the problem yet. self.error( debug_hint, "cannot compute output context for template %s in %s" % (tmpl_name, debug.context_to_string(start_ctx))) self.errors.extend(problems) return context.STATE_ERROR return ctx
def step(self, start_state, step_value, debug_hint=None): if context.is_error_context(start_state): # Simplifies error checking below. return start_state if hasattr(step_value, 'to_raw_content'): # Handle text nodes specified by the template author. raw_content = step_value.to_raw_content() if raw_content is not None: try: end_state, new_content, error_ctx, error_text = ( context_update.process_raw_text( raw_content, start_state)) if context.is_error_context(end_state): self.error( debug_hint, 'bad content in %s: `%s`' % (debug.context_to_string(error_ctx), error_text)) elif new_content != raw_content: self.text_values[step_value] = new_content except context_update.ContextUpdateFailure, err: self.error(debug_hint, str(err)) end_state = context.STATE_ERROR return end_state
def test_is_regex_preceder(self): """Test heuristic that is used to update JS_CTX_*""" tests = ( # Statement terminators precede regexps. (context.JS_CTX_REGEX, ";"), # This is not airtight. # ({ valueOf: function () { return 1 } } / 2) # is valid JavaScript but in practice, devs do not do this. # A block followed by a statement starting with a RegExp is # much more common: # while (x) {...} /foo/.test(x) || panic() (context.JS_CTX_REGEX, "}"), # But member, call, grouping, and array expression terminators # precede div ops. (context.JS_CTX_DIV_OP, ")"), (context.JS_CTX_DIV_OP, "]"), # At the start of a primary expression, array, or expression # statement, expect a regexp. (context.JS_CTX_REGEX, "("), (context.JS_CTX_REGEX, "["), (context.JS_CTX_REGEX, "{"), # Assignment operators precede regexps as do all exclusively # prefix and binary operators. (context.JS_CTX_REGEX, "="), (context.JS_CTX_REGEX, "+="), (context.JS_CTX_REGEX, "*="), (context.JS_CTX_REGEX, "*"), (context.JS_CTX_REGEX, "!"), # Whether the + or - is infix or prefix, it cannot precede a # div op. (context.JS_CTX_REGEX, "+"), (context.JS_CTX_REGEX, "-"), # An incr/decr op precedes a div operator. # This is not airtight. In (g = ++/h/i) a regexp follows a # pre-increment operator, but in practice devs do not try to # increment or decrement regular expressions. # (g++/h/i) where ++ is a postfix operator on g is much more # common. (context.JS_CTX_DIV_OP, "--"), (context.JS_CTX_DIV_OP, "++"), (context.JS_CTX_DIV_OP, "x--"), # When we have many dashes or pluses, then they are grouped # left to right. (context.JS_CTX_REGEX, "x---"), # A postfix -- then a -. # return followed by a slash returns the regexp literal or the # slash starts a regexp literal in an expression statement that # is dead code. (context.JS_CTX_REGEX, "return"), (context.JS_CTX_REGEX, "return "), (context.JS_CTX_REGEX, "return\t"), (context.JS_CTX_REGEX, "return\n"), (context.JS_CTX_REGEX, u"return\u2028"), # Identifiers can be divided and cannot validly be preceded by # a regular expressions. Semicolon insertion cannot happen # between an identifier and a regular expression on a new line # because the one token lookahead for semicolon insertion has # to conclude that it could be a div binary op and treat it as # such. (context.JS_CTX_DIV_OP, "x"), (context.JS_CTX_DIV_OP, "x "), (context.JS_CTX_DIV_OP, "x\t"), (context.JS_CTX_DIV_OP, "x\n"), (context.JS_CTX_DIV_OP, u"x\u2028"), (context.JS_CTX_DIV_OP, "preturn"), # Numbers precede div ops. (context.JS_CTX_DIV_OP, "0"), # Dots that are part of a number are div preceders. (context.JS_CTX_DIV_OP, "0."), ) for want_ctx, js_code in tests: for start in (context.JS_CTX_REGEX, context.JS_CTX_DIV_OP, context.JS_CTX_DIV_OP | context.STATE_JS): got = js.next_js_ctx(js_code, start) want = want_ctx | context.state_of(start) self.assertEquals( want, got, "%s: want %s got %s" % ( js_code, debug.context_to_string(want), debug.context_to_string(got))) self.assertEquals( context.STATE_JS | context.JS_CTX_REGEX, js.next_js_ctx(" ", context.STATE_JS | context.JS_CTX_REGEX), "Blank tokens") self.assertEquals( context.STATE_JS | context.JS_CTX_DIV_OP, js.next_js_ctx(" ", context.STATE_JS | context.JS_CTX_DIV_OP), "Blank tokens")
class _Analyzer(trace_analysis.Analyzer): """ Applies the context_update algorithm to text nodes, builds side-tables of pipelines that need to be updated, and clones templates that are used in non-start contexts. """ def __init__(self, name_to_body, start_state, templates=None): trace_analysis.Analyzer.__init__(self) # Maps template names to bodies. self.name_to_body = name_to_body # Maps (name, start_context) -> (body, end_context) self.start_state = start_state # Maps (template_name, start_context) pairs to end contexts self.templates = dict(templates or {}) # Tracks the set of templates and the contexts in which they are # called. A set (name, start_context) self.called = set() # Maps interpolation nodes to pipelines and escaping modes self.interps = {} # Maps text nodes to replacement text. self.text_values = {} # Maps external calls (step_values) to the contexts # in which they occur. # This assumes that cloned() step_values are distinct # from the original. self.calls = {} # Messages that explain failure to escape. self.errors = [] def error(self, debug_hint, msg): """Queues a message explaining a problem noticed during escaping.""" if debug_hint: msg = '%s: %s' % (debug_hint, msg) self.errors.append(msg) def step(self, start_state, step_value, debug_hint=None): if context.is_error_context(start_state): # Simplifies error checking below. return start_state if hasattr(step_value, 'to_raw_content'): # Handle text nodes specified by the template author. raw_content = step_value.to_raw_content() if raw_content is not None: try: end_state, new_content, error_ctx, error_text = ( context_update.process_raw_text( raw_content, start_state)) if context.is_error_context(end_state): self.error( debug_hint, 'bad content in %s: `%s`' % (debug.context_to_string(error_ctx), error_text)) elif new_content != raw_content: self.text_values[step_value] = new_content except context_update.ContextUpdateFailure, err: self.error(debug_hint, str(err)) end_state = context.STATE_ERROR return end_state if hasattr(step_value, 'to_pipeline'): # Handle interpolation of untrusted values. pipeline = step_value.to_pipeline() if pipeline is not None: end_state, esc_modes, problem = ( escaping.esc_mode_for_hole(start_state)) self.interps[step_value] = pipeline, esc_modes if context.is_error_context(end_state): if problem is None: self.error( debug_hint, 'hole cannot appear in %s' % (debug.context_to_string(start_state))) else: self.error(debug_hint, problem) return end_state if hasattr(step_value, 'to_callee'): # Handle calls to other templates by recursively typing the end # context of that template. callee = step_value.to_callee() if callee is not None: end_ctx = self.external_call(callee, start_state, debug_hint) self.calls[step_value] = start_state # rely on external_call to explain failure. return end_ctx return start_state
def test_escape_text(self): """ Tests the content propagation algorithm. """ tests = ( ( "", 0, ), ( 'Hello, World!', 0, ), ( # An orphaned "<" is OK. 'I <3 Ponies!', 0, 'I <3 Ponies!', ), ( '<a', context.STATE_TAG_NAME, ), ( '<a ', context.STATE_TAG, ), ( '<a>', context.STATE_TEXT, ), ( '<a href', context.STATE_ATTR_NAME | context.ATTR_URL, ), ( '<a on', context.STATE_ATTR_NAME | context.ATTR_SCRIPT, ), ( '<a href ', context.STATE_AFTER_NAME | context.ATTR_URL, ), ( '<a style = ', context.STATE_BEFORE_VALUE | context.ATTR_STYLE, ), ( '<a href=', context.STATE_BEFORE_VALUE | context.ATTR_URL, ), ( '<a href=x', context.STATE_URL | context.DELIM_SPACE_OR_TAG_END | context.URL_PART_PRE_QUERY, '<a href="x', ), ( '<a href=x ', context.STATE_TAG, '<a href="x" ', ), ( '<a href=>', context.STATE_TEXT, '<a href="">', ), ( '<a href=x>', context.STATE_TEXT, '<a href="x">', ), ( "<a href ='", context.STATE_URL | context.DELIM_SINGLE_QUOTE, ), ( "<a href=''", context.STATE_TAG, ), ( '<a href= "', context.STATE_URL | context.DELIM_DOUBLE_QUOTE, ), ( '<a href=""', context.STATE_TAG, ), ( '<a title="', context.STATE_ATTR | context.DELIM_DOUBLE_QUOTE, ), ( "<a HREF='http:", context.STATE_URL | context.DELIM_SINGLE_QUOTE | context.URL_PART_PRE_QUERY, ), ( "<a Href='/", context.STATE_URL | context.DELIM_SINGLE_QUOTE | context.URL_PART_PRE_QUERY, ), ( "<a href='\"", context.STATE_URL | context.DELIM_SINGLE_QUOTE | context.URL_PART_PRE_QUERY, ), ( '<a href="\'', context.STATE_URL | context.DELIM_DOUBLE_QUOTE | context.URL_PART_PRE_QUERY, ), ( "<a href=''", context.STATE_URL | context.DELIM_SINGLE_QUOTE | context.URL_PART_PRE_QUERY, "<a href=''", ), ( '<a href=""', context.STATE_URL | context.DELIM_DOUBLE_QUOTE | context.URL_PART_PRE_QUERY, '<a href=""', ), ( '<a href=""', context.STATE_URL | context.DELIM_DOUBLE_QUOTE | context.URL_PART_PRE_QUERY, ), ( '<a href="', context.STATE_URL | context.DELIM_SPACE_OR_TAG_END | context.URL_PART_PRE_QUERY, '<a href=""', ), ( '<a href="/search?q=', context.STATE_URL | context.DELIM_DOUBLE_QUOTE | context.URL_PART_QUERY_OR_FRAG, ), ( '<img alt="1">', context.STATE_TEXT, ), ( '<img alt="1>"', context.STATE_TAG, '<img alt="1>"', ), ( '<img alt="1>">', context.STATE_TEXT, '<img alt="1>">', ), ( '<input checked type="checkbox"', context.STATE_TAG, ), ( '<a onclick="', context.STATE_JS | context.DELIM_DOUBLE_QUOTE, ), ( '<a onclick="//foo', context.STATE_JSLINE_CMT | context.DELIM_DOUBLE_QUOTE, '<a onclick="', ), ( "<a onclick='//\n", context.STATE_JS | context.DELIM_SINGLE_QUOTE, "<a onclick='\n", ), ( "<a onclick='//\r\n", context.STATE_JS | context.DELIM_SINGLE_QUOTE, "<a onclick='\n\n", # \n\n is ok, \n is ok, \r\n is ok ), ( u"<a onclick='//\u2028", context.STATE_JS | context.DELIM_SINGLE_QUOTE, "<a onclick='\n", ), ( '<a onclick="/*', context.STATE_JSBLOCK_CMT | context.DELIM_DOUBLE_QUOTE, '<a onclick=" ', ), ( '<a onclick="/*/', context.STATE_JSBLOCK_CMT | context.DELIM_DOUBLE_QUOTE, '<a onclick=" ', ), ( '<a onclick="/**/', context.STATE_JS | context.DELIM_DOUBLE_QUOTE, '<a onclick=" ', ), ( '<a onkeypress=""', context.STATE_JSDQ_STR | context.DELIM_DOUBLE_QUOTE, '<a onkeypress=""', ), ( "<a onclick='"foo"", context.STATE_JS | context.DELIM_SINGLE_QUOTE | context.JS_CTX_DIV_OP, "<a onclick='\"foo\"", ), ( '<a onclick='foo'', context.STATE_JS | context.DELIM_SPACE_OR_TAG_END | context.JS_CTX_DIV_OP, '<a onclick="\'foo\'', ), ( '<a onclick='foo', context.STATE_JSSQ_STR | context.DELIM_SPACE_OR_TAG_END, '<a onclick="\'foo', ), ( '<a onclick=""foo\'', context.STATE_JSDQ_STR | context.DELIM_DOUBLE_QUOTE, '<a onclick=""foo\'', ), ( '<a onclick="\'foo"', context.STATE_JSSQ_STR | context.DELIM_DOUBLE_QUOTE, '<a onclick="\'foo"', ), ( '<A ONCLICK="\'', context.STATE_JSSQ_STR | context.DELIM_DOUBLE_QUOTE, ), ( '<a onclick="/', context.STATE_JSREGEXP | context.DELIM_DOUBLE_QUOTE, ), ( '<a onclick="\'foo\'', context.STATE_JS | context.DELIM_DOUBLE_QUOTE | context.JS_CTX_DIV_OP, ), ( '<a onclick="\'foo\\\'', context.STATE_JSSQ_STR | context.DELIM_DOUBLE_QUOTE, ), ( '<a onclick="\'foo\\\'', context.STATE_JSSQ_STR | context.DELIM_DOUBLE_QUOTE, ), ( '<a onclick="/foo/', context.STATE_JS | context.DELIM_DOUBLE_QUOTE | context.JS_CTX_DIV_OP, ), ( '<script>/foo/ /=', context.STATE_JS | context.ELEMENT_SCRIPT, ), ( '<a onclick="1 /foo', context.STATE_JS | context.DELIM_DOUBLE_QUOTE | context.JS_CTX_DIV_OP, ), ( '<a onclick="1 /*c*/ /foo', context.STATE_JS | context.DELIM_DOUBLE_QUOTE | context.JS_CTX_DIV_OP, '<a onclick="1 /foo', ), ( '<a onclick="/foo[/]', context.STATE_JSREGEXP | context.DELIM_DOUBLE_QUOTE, ), ( '<a onclick="/foo\\/', context.STATE_JSREGEXP | context.DELIM_DOUBLE_QUOTE, ), ( '<a onclick="/foo/', context.STATE_JS | context.DELIM_DOUBLE_QUOTE | context.JS_CTX_DIV_OP, ), ( '<input checked style="', context.STATE_CSS | context.DELIM_DOUBLE_QUOTE, ), ( '<a style="//', context.STATE_CSSLINE_CMT | context.DELIM_DOUBLE_QUOTE, '<a style="', ), ( '<a style="//</script>', context.STATE_CSSLINE_CMT | context.DELIM_DOUBLE_QUOTE, '<a style="', ), ( "<a style='//\n", context.STATE_CSS | context.DELIM_SINGLE_QUOTE, "<a style='\n", ), ( "<a style='//\r", context.STATE_CSS | context.DELIM_SINGLE_QUOTE, "<a style='\n", ), ( '<a style="/*', context.STATE_CSSBLOCK_CMT | context.DELIM_DOUBLE_QUOTE, '<a style=" ', ), ( '<a style="/*/', context.STATE_CSSBLOCK_CMT | context.DELIM_DOUBLE_QUOTE, '<a style=" ', ), ( '<a style="/**/', context.STATE_CSS | context.DELIM_DOUBLE_QUOTE, '<a style=" ', ), ( '<a style="background: \'', context.STATE_CSSSQ_STR | context.DELIM_DOUBLE_QUOTE, ), ( '<a style="background: "', context.STATE_CSSDQ_STR | context.DELIM_DOUBLE_QUOTE, '<a style="background: "', ), ( '<a style="background: \'/foo?img=', context.STATE_CSSSQ_STR | context.DELIM_DOUBLE_QUOTE | context.URL_PART_QUERY_OR_FRAG, ), ( '<a style="background: \'/', context.STATE_CSSSQ_STR | context.DELIM_DOUBLE_QUOTE | context.URL_PART_PRE_QUERY, ), ( '<a style="background: url("/', context.STATE_CSSDQ_URL | context.DELIM_DOUBLE_QUOTE | context.URL_PART_PRE_QUERY, '<a style="background: url("/', ), ( '<a style="background: url(\'/', context.STATE_CSSSQ_URL | context.DELIM_DOUBLE_QUOTE | context.URL_PART_PRE_QUERY, ), ( '<a style="background: url(\'/)', context.STATE_CSSSQ_URL | context.DELIM_DOUBLE_QUOTE | context.URL_PART_PRE_QUERY, ), ( '<a style="background: url(\'/ ', context.STATE_CSSSQ_URL | context.DELIM_DOUBLE_QUOTE | context.URL_PART_PRE_QUERY, ), ( '<a style="background: url(/', context.STATE_CSS_URL | context.DELIM_DOUBLE_QUOTE | context.URL_PART_PRE_QUERY, ), ( '<a style="background: url( ', context.STATE_CSS_URL | context.DELIM_DOUBLE_QUOTE, ), ( '<a style="background: url( /image?name=', context.STATE_CSS_URL | context.DELIM_DOUBLE_QUOTE | context.URL_PART_QUERY_OR_FRAG, ), ( '<a style="background: url(x)', context.STATE_CSS | context.DELIM_DOUBLE_QUOTE, ), ( '<a style="background: url(\'x\'', context.STATE_CSS | context.DELIM_DOUBLE_QUOTE, ), ( '<a style="background: url( x ', context.STATE_CSS | context.DELIM_DOUBLE_QUOTE, ), ( '<!-- foo', context.STATE_HTMLCMT, '', ), ( '<!-->', context.STATE_HTMLCMT, '', ), ( '<!--->', context.STATE_HTMLCMT, '', ), ( '<!-- foo -->', context.STATE_TEXT, '', ), ( '<script', context.STATE_TAG | context.ELEMENT_SCRIPT, ), ( '<script ', context.STATE_TAG | context.ELEMENT_SCRIPT, ), ( '<script src="foo.js" ', context.STATE_TAG | context.ELEMENT_SCRIPT, ), ( "<script src='foo.js' ", context.STATE_TAG | context.ELEMENT_SCRIPT, ), ( '<script type=text/javascript ', context.STATE_TAG | context.ELEMENT_SCRIPT, '<script type="text/javascript" ', ), ( '<script>foo', context.STATE_JS | context.JS_CTX_DIV_OP | context.ELEMENT_SCRIPT, ), ( '<script>foo</script>', context.STATE_TEXT, ), ( '<script>foo</script><!--', context.STATE_HTMLCMT, '<script>foo</script>', ), ( '<script>document.write("<p>foo</p>");', context.STATE_JS | context.ELEMENT_SCRIPT, ), ( r'<script>document.write("<p>foo<\/script>");', context.STATE_JS | context.ELEMENT_SCRIPT, ), ( '<script>document.write("<script>alert(1)</script>");', context.STATE_TEXT, ), ( '<Script>', context.STATE_JS | context.ELEMENT_SCRIPT, ), ( '<SCRIPT>foo', context.STATE_JS | context.JS_CTX_DIV_OP | context.ELEMENT_SCRIPT, ), ( '<textarea>value', context.STATE_RCDATA | context.ELEMENT_TEXTAREA, ), ( '<textarea>value</textarea>', context.STATE_TEXT, ), ( '<textarea>value</TEXTAREA>', context.STATE_TEXT, ), ( '<textarea name=html><b', context.STATE_RCDATA | context.ELEMENT_TEXTAREA, '<textarea name="html"><b', ), ( '<title>value', context.STATE_RCDATA | context.ELEMENT_TITLE, ), ( '<style>value', context.STATE_CSS | context.ELEMENT_STYLE, ), ( '<a xlink:href', context.STATE_ATTR_NAME | context.ATTR_URL, ), ( '<a xmlns', context.STATE_ATTR_NAME | context.ATTR_URL, ), ( '<a xmlns:foo', context.STATE_ATTR_NAME | context.ATTR_URL, ), ( '<a xmlnsxyz', context.STATE_ATTR_NAME, ), ( '<a data-url', context.STATE_ATTR_NAME | context.ATTR_URL, ), ( '<a data-iconUri', context.STATE_ATTR_NAME | context.ATTR_URL, ), ( '<a data-urlItem', context.STATE_ATTR_NAME | context.ATTR_URL, ), ( '<a g:', context.STATE_ATTR_NAME, ), ( '<a g:url', context.STATE_ATTR_NAME | context.ATTR_URL, ), ( '<a g:iconUri', context.STATE_ATTR_NAME | context.ATTR_URL, ), ( '<a g:urlItem', context.STATE_ATTR_NAME | context.ATTR_URL, ), ( '<a g:value', context.STATE_ATTR_NAME, ), ( "<a svg:style='", context.STATE_CSS | context.DELIM_SINGLE_QUOTE, ), ( '<svg:font-face', context.STATE_TAG_NAME, ), ( '<svg:a svg:onclick="', context.STATE_JS | context.DELIM_DOUBLE_QUOTE, )) for test_case in tests: if len(test_case) == 2: test_input, want_ctx = test_case want_text = test_input else: test_input, want_ctx, want_text = test_case got_ctx, got_text, _, _ = context_update.process_raw_text( test_input, 0) if got_ctx != want_ctx: self.fail("input %r: want context\n\t%s\ngot\n\t%s" % (test_input, debug.context_to_string(want_ctx), debug.context_to_string(got_ctx))) self.assertEquals(got_text, want_text, msg=("input %r: want text\n\t%r\ngot\n\t%r" % (test_input, want_text, got_text)))
def test_escape_text(self): """ Tests the content propagation algorithm. """ tests = ( ( "", 0, ), ( 'Hello, World!', 0, ), ( # An orphaned "<" is OK. 'I <3 Ponies!', 0, 'I <3 Ponies!', ), ( '<a', context.STATE_TAG_NAME, ), ( '<a ', context.STATE_TAG, ), ( '<a>', context.STATE_TEXT, ), ( '<a href', context.STATE_ATTR_NAME | context.ATTR_URL, ), ( '<a on', context.STATE_ATTR_NAME | context.ATTR_SCRIPT, ), ( '<a href ', context.STATE_AFTER_NAME | context.ATTR_URL, ), ( '<a style = ', context.STATE_BEFORE_VALUE | context.ATTR_STYLE, ), ( '<a href=', context.STATE_BEFORE_VALUE | context.ATTR_URL, ), ( '<a href=x', context.STATE_URL | context.DELIM_SPACE_OR_TAG_END | context.URL_PART_PRE_QUERY, '<a href="x', ), ( '<a href=x ', context.STATE_TAG, '<a href="x" ', ), ( '<a href=>', context.STATE_TEXT, '<a href="">', ), ( '<a href=x>', context.STATE_TEXT, '<a href="x">', ), ( "<a href ='", context.STATE_URL | context.DELIM_SINGLE_QUOTE, ), ( "<a href=''", context.STATE_TAG, ), ( '<a href= "', context.STATE_URL | context.DELIM_DOUBLE_QUOTE, ), ( '<a href=""', context.STATE_TAG, ), ( '<a title="', context.STATE_ATTR | context.DELIM_DOUBLE_QUOTE, ), ( "<a HREF='http:", context.STATE_URL | context.DELIM_SINGLE_QUOTE | context.URL_PART_PRE_QUERY, ), ( "<a Href='/", context.STATE_URL | context.DELIM_SINGLE_QUOTE | context.URL_PART_PRE_QUERY, ), ( "<a href='\"", context.STATE_URL | context.DELIM_SINGLE_QUOTE | context.URL_PART_PRE_QUERY, ), ( '<a href="\'', context.STATE_URL | context.DELIM_DOUBLE_QUOTE | context.URL_PART_PRE_QUERY, ), ( "<a href=''", context.STATE_URL | context.DELIM_SINGLE_QUOTE | context.URL_PART_PRE_QUERY, "<a href=''", ), ( '<a href=""', context.STATE_URL | context.DELIM_DOUBLE_QUOTE | context.URL_PART_PRE_QUERY, '<a href=""', ), ( '<a href=""', context.STATE_URL | context.DELIM_DOUBLE_QUOTE | context.URL_PART_PRE_QUERY, ), ( '<a href="', context.STATE_URL | context.DELIM_SPACE_OR_TAG_END | context.URL_PART_PRE_QUERY, '<a href=""', ), ( '<a href="/search?q=', context.STATE_URL | context.DELIM_DOUBLE_QUOTE | context.URL_PART_QUERY_OR_FRAG, ), ( '<img alt="1">', context.STATE_TEXT, ), ( '<img alt="1>"', context.STATE_TAG, '<img alt="1>"', ), ( '<img alt="1>">', context.STATE_TEXT, '<img alt="1>">', ), ( '<input checked type="checkbox"', context.STATE_TAG, ), ( '<a onclick="', context.STATE_JS | context.DELIM_DOUBLE_QUOTE, ), ( '<a onclick="//foo', context.STATE_JSLINE_CMT | context.DELIM_DOUBLE_QUOTE, '<a onclick="', ), ( "<a onclick='//\n", context.STATE_JS | context.DELIM_SINGLE_QUOTE, "<a onclick='\n", ), ( "<a onclick='//\r\n", context.STATE_JS | context.DELIM_SINGLE_QUOTE, "<a onclick='\n\n", # \n\n is ok, \n is ok, \r\n is ok ), ( u"<a onclick='//\u2028", context.STATE_JS | context.DELIM_SINGLE_QUOTE, "<a onclick='\n", ), ( '<a onclick="/*', context.STATE_JSBLOCK_CMT | context.DELIM_DOUBLE_QUOTE, '<a onclick=" ', ), ( '<a onclick="/*/', context.STATE_JSBLOCK_CMT | context.DELIM_DOUBLE_QUOTE, '<a onclick=" ', ), ( '<a onclick="/**/', context.STATE_JS | context.DELIM_DOUBLE_QUOTE, '<a onclick=" ', ), ( '<a onkeypress=""', context.STATE_JSDQ_STR | context.DELIM_DOUBLE_QUOTE, '<a onkeypress=""', ), ( "<a onclick='"foo"", context.STATE_JS | context.DELIM_SINGLE_QUOTE | context.JS_CTX_DIV_OP, "<a onclick='\"foo\"", ), ( '<a onclick='foo'', context.STATE_JS | context.DELIM_SPACE_OR_TAG_END | context.JS_CTX_DIV_OP, '<a onclick="\'foo\'', ), ( '<a onclick='foo', context.STATE_JSSQ_STR | context.DELIM_SPACE_OR_TAG_END, '<a onclick="\'foo', ), ( '<a onclick=""foo\'', context.STATE_JSDQ_STR | context.DELIM_DOUBLE_QUOTE, '<a onclick=""foo\'', ), ( '<a onclick="\'foo"', context.STATE_JSSQ_STR | context.DELIM_DOUBLE_QUOTE, '<a onclick="\'foo"', ), ( '<A ONCLICK="\'', context.STATE_JSSQ_STR | context.DELIM_DOUBLE_QUOTE, ), ( '<a onclick="/', context.STATE_JSREGEXP | context.DELIM_DOUBLE_QUOTE, ), ( '<a onclick="\'foo\'', context.STATE_JS | context.DELIM_DOUBLE_QUOTE | context.JS_CTX_DIV_OP, ), ( '<a onclick="\'foo\\\'', context.STATE_JSSQ_STR | context.DELIM_DOUBLE_QUOTE, ), ( '<a onclick="\'foo\\\'', context.STATE_JSSQ_STR | context.DELIM_DOUBLE_QUOTE, ), ( '<a onclick="/foo/', context.STATE_JS | context.DELIM_DOUBLE_QUOTE | context.JS_CTX_DIV_OP, ), ( '<script>/foo/ /=', context.STATE_JS | context.ELEMENT_SCRIPT, ), ( '<a onclick="1 /foo', context.STATE_JS | context.DELIM_DOUBLE_QUOTE | context.JS_CTX_DIV_OP, ), ( '<a onclick="1 /*c*/ /foo', context.STATE_JS | context.DELIM_DOUBLE_QUOTE | context.JS_CTX_DIV_OP, '<a onclick="1 /foo', ), ( '<a onclick="/foo[/]', context.STATE_JSREGEXP | context.DELIM_DOUBLE_QUOTE, ), ( '<a onclick="/foo\\/', context.STATE_JSREGEXP | context.DELIM_DOUBLE_QUOTE, ), ( '<a onclick="/foo/', context.STATE_JS | context.DELIM_DOUBLE_QUOTE | context.JS_CTX_DIV_OP, ), ( '<input checked style="', context.STATE_CSS | context.DELIM_DOUBLE_QUOTE, ), ( '<a style="//', context.STATE_CSSLINE_CMT | context.DELIM_DOUBLE_QUOTE, '<a style="', ), ( '<a style="//</script>', context.STATE_CSSLINE_CMT | context.DELIM_DOUBLE_QUOTE, '<a style="', ), ( "<a style='//\n", context.STATE_CSS | context.DELIM_SINGLE_QUOTE, "<a style='\n", ), ( "<a style='//\r", context.STATE_CSS | context.DELIM_SINGLE_QUOTE, "<a style='\n", ), ( '<a style="/*', context.STATE_CSSBLOCK_CMT | context.DELIM_DOUBLE_QUOTE, '<a style=" ', ), ( '<a style="/*/', context.STATE_CSSBLOCK_CMT | context.DELIM_DOUBLE_QUOTE, '<a style=" ', ), ( '<a style="/**/', context.STATE_CSS | context.DELIM_DOUBLE_QUOTE, '<a style=" ', ), ( '<a style="background: \'', context.STATE_CSSSQ_STR | context.DELIM_DOUBLE_QUOTE, ), ( '<a style="background: "', context.STATE_CSSDQ_STR | context.DELIM_DOUBLE_QUOTE, '<a style="background: "', ), ( '<a style="background: \'/foo?img=', context.STATE_CSSSQ_STR | context.DELIM_DOUBLE_QUOTE | context.URL_PART_QUERY_OR_FRAG, ), ( '<a style="background: \'/', context.STATE_CSSSQ_STR | context.DELIM_DOUBLE_QUOTE | context.URL_PART_PRE_QUERY, ), ( '<a style="background: url("/', context.STATE_CSSDQ_URL | context.DELIM_DOUBLE_QUOTE | context.URL_PART_PRE_QUERY, '<a style="background: url("/', ), ( '<a style="background: url(\'/', context.STATE_CSSSQ_URL | context.DELIM_DOUBLE_QUOTE | context.URL_PART_PRE_QUERY, ), ( '<a style="background: url(\'/)', context.STATE_CSSSQ_URL | context.DELIM_DOUBLE_QUOTE | context.URL_PART_PRE_QUERY, ), ( '<a style="background: url(\'/ ', context.STATE_CSSSQ_URL | context.DELIM_DOUBLE_QUOTE | context.URL_PART_PRE_QUERY, ), ( '<a style="background: url(/', context.STATE_CSS_URL | context.DELIM_DOUBLE_QUOTE | context.URL_PART_PRE_QUERY, ), ( '<a style="background: url( ', context.STATE_CSS_URL | context.DELIM_DOUBLE_QUOTE, ), ( '<a style="background: url( /image?name=', context.STATE_CSS_URL | context.DELIM_DOUBLE_QUOTE | context.URL_PART_QUERY_OR_FRAG, ), ( '<a style="background: url(x)', context.STATE_CSS | context.DELIM_DOUBLE_QUOTE, ), ( '<a style="background: url(\'x\'', context.STATE_CSS | context.DELIM_DOUBLE_QUOTE, ), ( '<a style="background: url( x ', context.STATE_CSS | context.DELIM_DOUBLE_QUOTE, ), ( '<!-- foo', context.STATE_HTMLCMT, '', ), ( '<!-->', context.STATE_HTMLCMT, '', ), ( '<!--->', context.STATE_HTMLCMT, '', ), ( '<!-- foo -->', context.STATE_TEXT, '', ), ( '<script', context.STATE_TAG | context.ELEMENT_SCRIPT, ), ( '<script ', context.STATE_TAG | context.ELEMENT_SCRIPT, ), ( '<script src="foo.js" ', context.STATE_TAG | context.ELEMENT_SCRIPT, ), ( "<script src='foo.js' ", context.STATE_TAG | context.ELEMENT_SCRIPT, ), ( '<script type=text/javascript ', context.STATE_TAG | context.ELEMENT_SCRIPT, '<script type="text/javascript" ', ), ( '<script>foo', context.STATE_JS | context.JS_CTX_DIV_OP | context.ELEMENT_SCRIPT, ), ( '<script>foo</script>', context.STATE_TEXT, ), ( '<script>foo</script><!--', context.STATE_HTMLCMT, '<script>foo</script>', ), ( '<script>document.write("<p>foo</p>");', context.STATE_JS | context.ELEMENT_SCRIPT, ), ( r'<script>document.write("<p>foo<\/script>");', context.STATE_JS | context.ELEMENT_SCRIPT, ), ( '<script>document.write("<script>alert(1)</script>");', context.STATE_TEXT, ), ( '<Script>', context.STATE_JS | context.ELEMENT_SCRIPT, ), ( '<SCRIPT>foo', context.STATE_JS | context.JS_CTX_DIV_OP | context.ELEMENT_SCRIPT, ), ( '<textarea>value', context.STATE_RCDATA | context.ELEMENT_TEXTAREA, ), ( '<textarea>value</textarea>', context.STATE_TEXT, ), ( '<textarea>value</TEXTAREA>', context.STATE_TEXT, ), ( '<textarea name=html><b', context.STATE_RCDATA | context.ELEMENT_TEXTAREA, '<textarea name="html"><b', ), ( '<title>value', context.STATE_RCDATA | context.ELEMENT_TITLE, ), ( '<style>value', context.STATE_CSS | context.ELEMENT_STYLE, ), ( '<a xlink:href', context.STATE_ATTR_NAME | context.ATTR_URL, ), ( '<a xmlns', context.STATE_ATTR_NAME | context.ATTR_URL, ), ( '<a xmlns:foo', context.STATE_ATTR_NAME | context.ATTR_URL, ), ( '<a xmlnsxyz', context.STATE_ATTR_NAME, ), ( '<a data-url', context.STATE_ATTR_NAME | context.ATTR_URL, ), ( '<a data-iconUri', context.STATE_ATTR_NAME | context.ATTR_URL, ), ( '<a data-urlItem', context.STATE_ATTR_NAME | context.ATTR_URL, ), ( '<a g:', context.STATE_ATTR_NAME, ), ( '<a g:url', context.STATE_ATTR_NAME | context.ATTR_URL, ), ( '<a g:iconUri', context.STATE_ATTR_NAME | context.ATTR_URL, ), ( '<a g:urlItem', context.STATE_ATTR_NAME | context.ATTR_URL, ), ( '<a g:value', context.STATE_ATTR_NAME, ), ( "<a svg:style='", context.STATE_CSS | context.DELIM_SINGLE_QUOTE, ), ( '<svg:font-face', context.STATE_TAG_NAME, ), ( '<svg:a svg:onclick="', context.STATE_JS | context.DELIM_DOUBLE_QUOTE, ) ) for test_case in tests: if len(test_case) == 2: test_input, want_ctx = test_case want_text = test_input else: test_input, want_ctx, want_text = test_case got_ctx, got_text, _, _ = context_update.process_raw_text( test_input, 0) if got_ctx != want_ctx: self.fail("input %r: want context\n\t%s\ngot\n\t%s" % (test_input, debug.context_to_string(want_ctx), debug.context_to_string(got_ctx))) self.assertEquals( got_text, want_text, msg = ("input %r: want text\n\t%r\ngot\n\t%r" % (test_input, want_text, got_text)))