def context_to_string(ctx): """ Used in debug mode to convert a context represented as an integer to a diagnostic string. """ state = context.state_of(ctx) element = context.element_type_of(ctx) attr = context.attr_type_of(ctx) delim = context.delim_type_of(ctx) js_ctx = context.js_ctx_of(ctx) url_part = context.url_part_of(ctx) parts = [_STATE_NAMES[state], element and _ELEMENT_NAMES[element], attr and _ATTR_NAMES[attr], delim and _DELIM_NAMES[delim], js_ctx and _JS_CTX_NAMES[js_ctx], url_part and _URL_PART_NAMES[url_part], ] return "[Context %s]" % " ".join( [part or 'UNKNOWN' for part in parts if part])
def context_to_string(ctx): """ Used in debug mode to convert a context represented as an integer to a diagnostic string. """ state = context.state_of(ctx) element = context.element_type_of(ctx) attr = context.attr_type_of(ctx) delim = context.delim_type_of(ctx) js_ctx = context.js_ctx_of(ctx) url_part = context.url_part_of(ctx) parts = [ _STATE_NAMES[state], element and _ELEMENT_NAMES[element], attr and _ATTR_NAMES[attr], delim and _DELIM_NAMES[delim], js_ctx and _JS_CTX_NAMES[js_ctx], url_part and _URL_PART_NAMES[url_part], ] return "[Context %s]" % " ".join( [part or 'UNKNOWN' for part in parts if part])
def test_is_regex_preceder(self): """Test heuristic that is used to update JS_CTX_*""" tests = ( # Statement terminators precede regexps. (context.JS_CTX_REGEX, ";"), # This is not airtight. # ({ valueOf: function () { return 1 } } / 2) # is valid JavaScript but in practice, devs do not do this. # A block followed by a statement starting with a RegExp is # much more common: # while (x) {...} /foo/.test(x) || panic() (context.JS_CTX_REGEX, "}"), # But member, call, grouping, and array expression terminators # precede div ops. (context.JS_CTX_DIV_OP, ")"), (context.JS_CTX_DIV_OP, "]"), # At the start of a primary expression, array, or expression # statement, expect a regexp. (context.JS_CTX_REGEX, "("), (context.JS_CTX_REGEX, "["), (context.JS_CTX_REGEX, "{"), # Assignment operators precede regexps as do all exclusively # prefix and binary operators. (context.JS_CTX_REGEX, "="), (context.JS_CTX_REGEX, "+="), (context.JS_CTX_REGEX, "*="), (context.JS_CTX_REGEX, "*"), (context.JS_CTX_REGEX, "!"), # Whether the + or - is infix or prefix, it cannot precede a # div op. (context.JS_CTX_REGEX, "+"), (context.JS_CTX_REGEX, "-"), # An incr/decr op precedes a div operator. # This is not airtight. In (g = ++/h/i) a regexp follows a # pre-increment operator, but in practice devs do not try to # increment or decrement regular expressions. # (g++/h/i) where ++ is a postfix operator on g is much more # common. (context.JS_CTX_DIV_OP, "--"), (context.JS_CTX_DIV_OP, "++"), (context.JS_CTX_DIV_OP, "x--"), # When we have many dashes or pluses, then they are grouped # left to right. (context.JS_CTX_REGEX, "x---"), # A postfix -- then a -. # return followed by a slash returns the regexp literal or the # slash starts a regexp literal in an expression statement that # is dead code. (context.JS_CTX_REGEX, "return"), (context.JS_CTX_REGEX, "return "), (context.JS_CTX_REGEX, "return\t"), (context.JS_CTX_REGEX, "return\n"), (context.JS_CTX_REGEX, u"return\u2028"), # Identifiers can be divided and cannot validly be preceded by # a regular expressions. Semicolon insertion cannot happen # between an identifier and a regular expression on a new line # because the one token lookahead for semicolon insertion has # to conclude that it could be a div binary op and treat it as # such. (context.JS_CTX_DIV_OP, "x"), (context.JS_CTX_DIV_OP, "x "), (context.JS_CTX_DIV_OP, "x\t"), (context.JS_CTX_DIV_OP, "x\n"), (context.JS_CTX_DIV_OP, u"x\u2028"), (context.JS_CTX_DIV_OP, "preturn"), # Numbers precede div ops. (context.JS_CTX_DIV_OP, "0"), # Dots that are part of a number are div preceders. (context.JS_CTX_DIV_OP, "0."), ) for want_ctx, js_code in tests: for start in (context.JS_CTX_REGEX, context.JS_CTX_DIV_OP, context.JS_CTX_DIV_OP | context.STATE_JS): got = js.next_js_ctx(js_code, start) want = want_ctx | context.state_of(start) self.assertEquals( want, got, "%s: want %s got %s" % ( js_code, debug.context_to_string(want), debug.context_to_string(got))) self.assertEquals( context.STATE_JS | context.JS_CTX_REGEX, js.next_js_ctx(" ", context.STATE_JS | context.JS_CTX_REGEX), "Blank tokens") self.assertEquals( context.STATE_JS | context.JS_CTX_DIV_OP, js.next_js_ctx(" ", context.STATE_JS | context.JS_CTX_DIV_OP), "Blank tokens")
def esc_mode_for_hole(context_before): """ Given a context in which an untrusted value hole appears, computes the escaping modes needed to render that untrusted value safe for interpolation and the context after the hole. context_before - The input context before the substitution. Returns (context after, (escaping_modes...,)) """ ctx = context.force_epsilon_transition(context_before) state, url_part = context.state_of(ctx), context.url_part_of(ctx) esc_modes = [ESC_MODE_FOR_STATE[state]] problem = None if url_part == context.URL_PART_NONE: # Make sure that at the start of a URL, we filter out dangerous # protocols. if state in ( context.STATE_URL, context.STATE_CSS_URL, context.STATE_CSSDQ_URL, context.STATE_CSSSQ_URL): esc_modes = [ESC_MODE_FILTER_URL, ESC_MODE_NORMALIZE_URL] ctx = (ctx & ~context.URL_PART_ALL) | context.URL_PART_PRE_QUERY elif state in (context.STATE_CSSDQ_STR, context.STATE_CSSSQ_STR): esc_modes[:0] = [ESC_MODE_FILTER_URL] ctx = (ctx & ~context.URL_PART_ALL) | context.URL_PART_PRE_QUERY elif url_part == context.URL_PART_PRE_QUERY: if state not in (context.STATE_CSSDQ_STR, context.STATE_CSSSQ_STR): esc_modes[0] = ESC_MODE_NORMALIZE_URL elif url_part == context.URL_PART_QUERY_OR_FRAG: esc_modes[0] = ESC_MODE_ESCAPE_URL elif url_part == context.URL_PART_UNKNOWN: ctx = context.STATE_ERROR problem = 'hole appears in an ambiguous URL context' if state == context.STATE_JS: ctx = (ctx & ~context.JS_CTX_ALL) | context.JS_CTX_DIV_OP elif (state == context.STATE_ATTR_NAME and context.attr_type_of(ctx) != context.ATTR_NONE): esc_modes[0] = ESC_MODE_FILTER_HTML_ATTR_SUFFIX if esc_modes[0] is None: ctx = context.STATE_ERROR esc_mode = esc_modes[-1] delim_type = context.delim_type_of(ctx) if delim_type != context.DELIM_NONE: # Figure out how to escape the attribute value. if esc_mode != ESC_MODE_ESCAPE_HTML_ATTRIBUTE: esc_modes.append(ESC_MODE_ESCAPE_HTML_ATTRIBUTE) if (context.delim_type_of(context_before) == context.DELIM_NONE and delim_type == context.DELIM_SPACE_OR_TAG_END): esc_modes.append(ESC_MODE_OPEN_QUOTE) last, i = esc_modes[0], 1 while i < len(esc_modes): curr = esc_modes[i] # If, for all x, f(g(x)) == g(x), we can skip f. if (last, curr) in REDUNDANT_ESC_MODES: esc_modes[i:i+1] = [] else: last = curr i += 1 return ctx, tuple(esc_modes), problem
def esc_mode_for_hole(context_before): """ Given a context in which an untrusted value hole appears, computes the escaping modes needed to render that untrusted value safe for interpolation and the context after the hole. context_before - The input context before the substitution. Returns (context after, (escaping_modes...,)) """ ctx = context.force_epsilon_transition(context_before) state, url_part = context.state_of(ctx), context.url_part_of(ctx) esc_modes = [ESC_MODE_FOR_STATE[state]] problem = None if url_part == context.URL_PART_NONE: # Make sure that at the start of a URL, we filter out dangerous # protocols. if state in (context.STATE_URL, context.STATE_CSS_URL, context.STATE_CSSDQ_URL, context.STATE_CSSSQ_URL): esc_modes = [ESC_MODE_FILTER_URL, ESC_MODE_NORMALIZE_URL] ctx = (ctx & ~context.URL_PART_ALL) | context.URL_PART_PRE_QUERY elif state in (context.STATE_CSSDQ_STR, context.STATE_CSSSQ_STR): esc_modes[:0] = [ESC_MODE_FILTER_URL] ctx = (ctx & ~context.URL_PART_ALL) | context.URL_PART_PRE_QUERY elif url_part == context.URL_PART_PRE_QUERY: if state not in (context.STATE_CSSDQ_STR, context.STATE_CSSSQ_STR): esc_modes[0] = ESC_MODE_NORMALIZE_URL elif url_part == context.URL_PART_QUERY_OR_FRAG: esc_modes[0] = ESC_MODE_ESCAPE_URL elif url_part == context.URL_PART_UNKNOWN: ctx = context.STATE_ERROR problem = 'hole appears in an ambiguous URL context' if state == context.STATE_JS: ctx = (ctx & ~context.JS_CTX_ALL) | context.JS_CTX_DIV_OP elif (state == context.STATE_ATTR_NAME and context.attr_type_of(ctx) != context.ATTR_NONE): esc_modes[0] = ESC_MODE_FILTER_HTML_ATTR_SUFFIX if esc_modes[0] is None: ctx = context.STATE_ERROR esc_mode = esc_modes[-1] delim_type = context.delim_type_of(ctx) if delim_type != context.DELIM_NONE: # Figure out how to escape the attribute value. if esc_mode != ESC_MODE_ESCAPE_HTML_ATTRIBUTE: esc_modes.append(ESC_MODE_ESCAPE_HTML_ATTRIBUTE) if (context.delim_type_of(context_before) == context.DELIM_NONE and delim_type == context.DELIM_SPACE_OR_TAG_END): esc_modes.append(ESC_MODE_OPEN_QUOTE) last, i = esc_modes[0], 1 while i < len(esc_modes): curr = esc_modes[i] # If, for all x, f(g(x)) == g(x), we can skip f. if (last, curr) in REDUNDANT_ESC_MODES: esc_modes[i:i + 1] = [] else: last = curr i += 1 return ctx, tuple(esc_modes), problem