예제 #1
0
 def testOrderedDict_preservesOrder(self):
     # dict iteration order is not specified prior to Python 3.7, and is
     # observably different from insertion order in CPython 2.7.
     od = collections.OrderedDict()
     for c in string.ascii_lowercase:
         od[c] = c
     self.assertEqual(len(od), 26, od)
     self.assertEqual(list(od), list(json_util.Cleanse(od)))
예제 #2
0
def Respond(
    request,
    content,
    content_type,
    code=200,
    expires=0,
    content_encoding=None,
    encoding="utf-8",
    csp_scripts_sha256s=None,
    headers=None,
):
    """Construct a werkzeug Response.

    Responses are transmitted to the browser with compression if: a) the browser
    supports it; b) it's sane to compress the content_type in question; and c)
    the content isn't already compressed, as indicated by the content_encoding
    parameter.

    Browser and proxy caching is completely disabled by default. If the expires
    parameter is greater than zero then the response will be able to be cached by
    the browser for that many seconds; however, proxies are still forbidden from
    caching so that developers can bypass the cache with Ctrl+Shift+R.

    For textual content that isn't JSON, the encoding parameter is used as the
    transmission charset which is automatically appended to the Content-Type
    header. That is unless of course the content_type parameter contains a
    charset parameter. If the two disagree, the characters in content will be
    transcoded to the latter.

    If content_type declares a JSON media type, then content MAY be a dict, list,
    tuple, or set, in which case this function has an implicit composition with
    json_util.Cleanse and json.dumps. The encoding parameter is used to decode
    byte strings within the JSON object; therefore transmitting binary data
    within JSON is not permitted. JSON is transmitted as ASCII unless the
    content_type parameter explicitly defines a charset parameter, in which case
    the serialized JSON bytes will use that instead of escape sequences.

    Args:
      request: A werkzeug Request object. Used mostly to check the
        Accept-Encoding header.
      content: Payload data as byte string, unicode string, or maybe JSON.
      content_type: Media type and optionally an output charset.
      code: Numeric HTTP status code to use.
      expires: Second duration for browser caching.
      content_encoding: Encoding if content is already encoded, e.g. 'gzip'.
      encoding: Input charset if content parameter has byte strings.
      csp_scripts_sha256s: List of base64 serialized sha256 of whitelisted script
        elements for script-src of the Content-Security-Policy. If it is None, the
        HTML will disallow any script to execute. It is only be used when the
        content_type is text/html.
      headers: Any additional headers to include on the response, as a
        list of key-value tuples: e.g., `[("Allow", "GET")]`. In case of
        conflict, these may be overridden with headers added by this function.

    Returns:
      A werkzeug Response object (a WSGI application).
    """

    mimetype = _EXTRACT_MIMETYPE_PATTERN.search(content_type).group(0)
    charset_match = _EXTRACT_CHARSET_PATTERN.search(content_type)
    charset = charset_match.group(1) if charset_match else encoding
    textual = charset_match or mimetype in _TEXTUAL_MIMETYPES
    if mimetype in _JSON_MIMETYPES and isinstance(content,
                                                  (dict, list, set, tuple)):
        content = json.dumps(json_util.Cleanse(content, encoding),
                             ensure_ascii=not charset_match)

    # Ensure correct output encoding, transcoding if necessary.
    if charset != encoding and isinstance(content, bytes):
        content = content.decode(encoding)
    if isinstance(content, str):
        content = content.encode(charset)

    if textual and not charset_match and mimetype not in _JSON_MIMETYPES:
        content_type += "; charset=" + charset
    gzip_accepted = _ALLOWS_GZIP_PATTERN.search(
        request.headers.get("Accept-Encoding", ""))
    # Automatically gzip uncompressed text data if accepted.
    if textual and not content_encoding and gzip_accepted:
        out = six.BytesIO()
        # Set mtime to zero to make payload for a given input deterministic.
        with gzip.GzipFile(fileobj=out, mode="wb", compresslevel=3,
                           mtime=0) as f:
            f.write(content)
        content = out.getvalue()
        content_encoding = "gzip"

    content_length = len(content)
    direct_passthrough = False
    # Automatically streamwise-gunzip precompressed data if not accepted.
    if content_encoding == "gzip" and not gzip_accepted:
        gzip_file = gzip.GzipFile(fileobj=six.BytesIO(content), mode="rb")
        # Last 4 bytes of gzip formatted data (little-endian) store the original
        # content length mod 2^32; we just assume it's the content length. That
        # means we can't streamwise-gunzip >4 GB precompressed file; this is ok.
        content_length = struct.unpack("<I", content[-4:])[0]
        content = werkzeug.wsgi.wrap_file(request.environ, gzip_file)
        content_encoding = None
        direct_passthrough = True

    headers = list(headers or [])
    headers.append(("Content-Length", str(content_length)))
    headers.append(("X-Content-Type-Options", "nosniff"))
    if content_encoding:
        headers.append(("Content-Encoding", content_encoding))
    if expires > 0:
        e = wsgiref.handlers.format_date_time(time.time() + float(expires))
        headers.append(("Expires", e))
        headers.append(("Cache-Control", "private, max-age=%d" % expires))
    else:
        headers.append(("Expires", "0"))
        headers.append(("Cache-Control", "no-cache, must-revalidate"))
    if mimetype == _HTML_MIMETYPE:
        _CSP_IMG_DOMAINS_WHITELIST
        _CSP_STYLE_DOMAINS_WHITELIST
        _CSP_FONT_DOMAINS_WHITELIST
        _CSP_FRAME_DOMAINS_WHITELIST
        _CSP_SCRIPT_DOMAINS_WHITELIST
        _CSP_CONNECT_DOMAINS_WHITELIST

        frags = (_CSP_SCRIPT_DOMAINS_WHITELIST + [
            "'self'" if _CSP_SCRIPT_SELF else "",
            "'unsafe-eval'" if _CSP_SCRIPT_UNSAFE_EVAL else "",
        ] + [
            "'sha256-{}'".format(sha256)
            for sha256 in (csp_scripts_sha256s or [])
        ])
        script_srcs = _create_csp_string(*frags)

        csp_string = ";".join([
            "default-src 'self'",
            "font-src %s" %
            _create_csp_string("'self'", *_CSP_FONT_DOMAINS_WHITELIST),
            # Dynamic plugins are rendered inside an iframe.
            "frame-src %s" %
            _create_csp_string("'self'", *_CSP_FRAME_DOMAINS_WHITELIST),
            "img-src %s" % _create_csp_string(
                "'self'",
                # used by favicon
                "data:",
                # used by What-If tool for image sprites.
                "blob:",
                *_CSP_IMG_DOMAINS_WHITELIST),
            "object-src 'none'",
            "style-src %s" % _create_csp_string(
                "'self'",
                # used by google-chart
                "https://www.gstatic.com",
                "data:",
                # inline styles: Polymer templates + d3 uses inline styles.
                "'unsafe-inline'",
                *_CSP_STYLE_DOMAINS_WHITELIST),
            "connect-src %s" %
            _create_csp_string("'self'", *_CSP_CONNECT_DOMAINS_WHITELIST),
            "script-src %s" % script_srcs,
        ])

        headers.append(("Content-Security-Policy", csp_string))

    if request.method == "HEAD":
        content = None

    return werkzeug.wrappers.Response(
        response=content,
        status=code,
        headers=headers,
        content_type=content_type,
        direct_passthrough=direct_passthrough,
    )
예제 #3
0
def Respond(request,
            content,
            content_type,
            code=200,
            expires=0,
            content_encoding=None,
            encoding='utf-8'):
    """Construct a werkzeug Response.

  Responses are transmitted to the browser with compression if: a) the browser
  supports it; b) it's sane to compress the content_type in question; and c)
  the content isn't already compressed, as indicated by the content_encoding
  parameter.

  Browser and proxy caching is completely disabled by default. If the expires
  parameter is greater than zero then the response will be able to be cached by
  the browser for that many seconds; however, proxies are still forbidden from
  caching so that developers can bypass the cache with Ctrl+Shift+R.

  For textual content that isn't JSON, the encoding parameter is used as the
  transmission charset which is automatically appended to the Content-Type
  header. That is unless of course the content_type parameter contains a
  charset parameter. If the two disagree, the characters in content will be
  transcoded to the latter.

  If content_type declares a JSON media type, then content MAY be a dict, list,
  tuple, or set, in which case this function has an implicit composition with
  json_util.Cleanse and json.dumps. The encoding parameter is used to decode
  byte strings within the JSON object; therefore transmitting binary data
  within JSON is not permitted. JSON is transmitted as ASCII unless the
  content_type parameter explicitly defines a charset parameter, in which case
  the serialized JSON bytes will use that instead of escape sequences.

  Args:
    request: A werkzeug Request object. Used mostly to check the
      Accept-Encoding header.
    content: Payload data as byte string, unicode string, or maybe JSON.
    content_type: Media type and optionally an output charset.
    code: Numeric HTTP status code to use.
    expires: Second duration for browser caching.
    content_encoding: Encoding if content is already encoded, e.g. 'gzip'.
    encoding: Input charset if content parameter has byte strings.

  Returns:
    A werkzeug Response object (a WSGI application).
  """

    mimetype = _EXTRACT_MIMETYPE_PATTERN.search(content_type).group(0)
    charset_match = _EXTRACT_CHARSET_PATTERN.search(content_type)
    charset = charset_match.group(1) if charset_match else encoding
    textual = charset_match or mimetype in _TEXTUAL_MIMETYPES
    if (mimetype in _JSON_MIMETYPES and isinstance(content,
                                                   (dict, list, set, tuple))):
        content = json.dumps(json_util.Cleanse(content, encoding),
                             ensure_ascii=not charset_match)
    if charset != encoding:
        content = tf.compat.as_text(content, encoding)
    content = tf.compat.as_bytes(content, charset)
    if textual and not charset_match and mimetype not in _JSON_MIMETYPES:
        content_type += '; charset=' + charset
    gzip_accepted = _ALLOWS_GZIP_PATTERN.search(
        request.headers.get('Accept-Encoding', ''))
    # Automatically gzip uncompressed text data if accepted.
    if textual and not content_encoding and gzip_accepted:
        out = six.BytesIO()
        # Set mtime to zero to make payload for a given input deterministic.
        with gzip.GzipFile(fileobj=out, mode='wb', compresslevel=3,
                           mtime=0) as f:
            f.write(content)
        content = out.getvalue()
        content_encoding = 'gzip'

    content_length = len(content)
    direct_passthrough = False
    # Automatically streamwise-gunzip precompressed data if not accepted.
    if content_encoding == 'gzip' and not gzip_accepted:
        gzip_file = gzip.GzipFile(fileobj=six.BytesIO(content), mode='rb')
        # Last 4 bytes of gzip formatted data (little-endian) store the original
        # content length mod 2^32; we just assume it's the content length. That
        # means we can't streamwise-gunzip >4 GB precompressed file; this is ok.
        content_length = struct.unpack('<I', content[-4:])[0]
        content = werkzeug.wsgi.wrap_file(request.environ, gzip_file)
        content_encoding = None
        direct_passthrough = True

    headers = []
    headers.append(('Content-Length', str(content_length)))
    if content_encoding:
        headers.append(('Content-Encoding', content_encoding))
    if expires > 0:
        e = wsgiref.handlers.format_date_time(time.time() + float(expires))
        headers.append(('Expires', e))
        headers.append(('Cache-Control', 'private, max-age=%d' % expires))
    else:
        headers.append(('Expires', '0'))
        headers.append(('Cache-Control', 'no-cache, must-revalidate'))

    if request.method == 'HEAD':
        content = None

    return werkzeug.wrappers.Response(response=content,
                                      status=code,
                                      headers=headers,
                                      content_type=content_type,
                                      direct_passthrough=direct_passthrough)
예제 #4
0
 def testByteString_turnsIntoUnicodeString(self):
     self.assertEqual(json_util.Cleanse(b'\xc2\xa3'),
                      u'\u00a3')  # is # sterling
예제 #5
0
 def testSet_turnsIntoSortedList(self):
     self.assertEqual(json_util.Cleanse(set(['b', 'a'])), ['a', 'b'])
예제 #6
0
 def testTuple_turnsIntoList(self):
     self.assertEqual(json_util.Cleanse(('a', 'b')), ['a', 'b'])
예제 #7
0
 def _assertWrapsAs(self, to_wrap, expected):
     """Asserts that |to_wrap| becomes |expected| when wrapped."""
     actual = json_util.Cleanse(to_wrap)
     for a, e in zip(actual, expected):
         self.assertEqual(e, a)
예제 #8
0
def Respond(request,
            content,
            content_type,
            code=200,
            expires=0,
            content_encoding=None,
            encoding='utf-8',
            csp_scripts_sha256s=None):
    """Construct a werkzeug Response.

  Responses are transmitted to the browser with compression if: a) the browser
  supports it; b) it's sane to compress the content_type in question; and c)
  the content isn't already compressed, as indicated by the content_encoding
  parameter.

  Browser and proxy caching is completely disabled by default. If the expires
  parameter is greater than zero then the response will be able to be cached by
  the browser for that many seconds; however, proxies are still forbidden from
  caching so that developers can bypass the cache with Ctrl+Shift+R.

  For textual content that isn't JSON, the encoding parameter is used as the
  transmission charset which is automatically appended to the Content-Type
  header. That is unless of course the content_type parameter contains a
  charset parameter. If the two disagree, the characters in content will be
  transcoded to the latter.

  If content_type declares a JSON media type, then content MAY be a dict, list,
  tuple, or set, in which case this function has an implicit composition with
  json_util.Cleanse and json.dumps. The encoding parameter is used to decode
  byte strings within the JSON object; therefore transmitting binary data
  within JSON is not permitted. JSON is transmitted as ASCII unless the
  content_type parameter explicitly defines a charset parameter, in which case
  the serialized JSON bytes will use that instead of escape sequences.

  Args:
    request: A werkzeug Request object. Used mostly to check the
      Accept-Encoding header.
    content: Payload data as byte string, unicode string, or maybe JSON.
    content_type: Media type and optionally an output charset.
    code: Numeric HTTP status code to use.
    expires: Second duration for browser caching.
    content_encoding: Encoding if content is already encoded, e.g. 'gzip'.
    encoding: Input charset if content parameter has byte strings.
    csp_scripts_sha256s: List of base64 serialized sha256 of whitelisted script
      elements for script-src of the Content-Security-Policy. If it is None, the
      HTML will disallow any script to execute. It is only be used when the
      content_type is text/html.

  Returns:
    A werkzeug Response object (a WSGI application).
  """

    mimetype = _EXTRACT_MIMETYPE_PATTERN.search(content_type).group(0)
    charset_match = _EXTRACT_CHARSET_PATTERN.search(content_type)
    charset = charset_match.group(1) if charset_match else encoding
    textual = charset_match or mimetype in _TEXTUAL_MIMETYPES
    if (mimetype in _JSON_MIMETYPES and isinstance(content,
                                                   (dict, list, set, tuple))):
        content = json.dumps(json_util.Cleanse(content, encoding),
                             ensure_ascii=not charset_match)
    if charset != encoding:
        content = tf.compat.as_text(content, encoding)
    content = tf.compat.as_bytes(content, charset)
    if textual and not charset_match and mimetype not in _JSON_MIMETYPES:
        content_type += '; charset=' + charset
    gzip_accepted = _ALLOWS_GZIP_PATTERN.search(
        request.headers.get('Accept-Encoding', ''))
    # Automatically gzip uncompressed text data if accepted.
    if textual and not content_encoding and gzip_accepted:
        out = six.BytesIO()
        # Set mtime to zero to make payload for a given input deterministic.
        with gzip.GzipFile(fileobj=out, mode='wb', compresslevel=3,
                           mtime=0) as f:
            f.write(content)
        content = out.getvalue()
        content_encoding = 'gzip'

    content_length = len(content)
    direct_passthrough = False
    # Automatically streamwise-gunzip precompressed data if not accepted.
    if content_encoding == 'gzip' and not gzip_accepted:
        gzip_file = gzip.GzipFile(fileobj=six.BytesIO(content), mode='rb')
        # Last 4 bytes of gzip formatted data (little-endian) store the original
        # content length mod 2^32; we just assume it's the content length. That
        # means we can't streamwise-gunzip >4 GB precompressed file; this is ok.
        content_length = struct.unpack('<I', content[-4:])[0]
        content = werkzeug.wsgi.wrap_file(request.environ, gzip_file)
        content_encoding = None
        direct_passthrough = True

    headers = []
    headers.append(('Content-Length', str(content_length)))
    headers.append(('X-Content-Type-Options', 'nosniff'))
    if content_encoding:
        headers.append(('Content-Encoding', content_encoding))
    if expires > 0:
        e = wsgiref.handlers.format_date_time(time.time() + float(expires))
        headers.append(('Expires', e))
        headers.append(('Cache-Control', 'private, max-age=%d' % expires))
    else:
        headers.append(('Expires', '0'))
        headers.append(('Cache-Control', 'no-cache, must-revalidate'))
    if mimetype == _HTML_MIMETYPE:
        _validate_global_whitelist(_CSP_IMG_DOMAINS_WHITELIST)
        _validate_global_whitelist(_CSP_STYLE_DOMAINS_WHITELIST)
        _validate_global_whitelist(_CSP_FONT_DOMAINS_WHITELIST)
        _validate_global_whitelist(_CSP_SCRIPT_DOMAINS_WHITELIST)

        # TODO(stephanwlee): remove `'strict dynamic'` when dynamic plugin
        # resources can be hashed upfront.
        enable_strict_dynamic = (_CSP_SCRIPT_HASHES_STRICT_DYNAMIC
                                 and csp_scripts_sha256s)
        frags = _CSP_SCRIPT_DOMAINS_WHITELIST + [
            "'self'" if _CSP_SCRIPT_SELF else '',
            'strict-dynamic' if enable_strict_dynamic else '',
        ] + [
            "'sha256-{}'".format(sha256)
            for sha256 in (csp_scripts_sha256s or [])
        ]
        script_srcs = _create_csp_string(*frags)

        csp_string = ';'.join([
            "default-src 'none'",
            "base-uri 'self'",
            "connect-src 'self'",
            'font-src %s' %
            _create_csp_string("'self'", *_CSP_FONT_DOMAINS_WHITELIST),
            # data uri used by favicon
            'img-src %s' %
            _create_csp_string("'self'", 'data:', *_CSP_IMG_DOMAINS_WHITELIST),
            "object-src 'none'",
            'style-src %s' % _create_csp_string(
                # used by google-chart
                'https://www.gstatic.com',
                'data:',
                # inline styles: Polymer templates + d3 uses inline styles.
                "'unsafe-inline'",
                *_CSP_STYLE_DOMAINS_WHITELIST),
            "script-src %s" % script_srcs,
        ])

        headers.append(('Content-Security-Policy', csp_string))

    if request.method == 'HEAD':
        content = None

    return werkzeug.wrappers.Response(response=content,
                                      status=code,
                                      headers=headers,
                                      content_type=content_type,
                                      direct_passthrough=direct_passthrough)
예제 #9
0
 def testSet_turnsIntoSortedList(self):
     self.assertEqual(json_util.Cleanse(set(["b", "a"])), ["a", "b"])
예제 #10
0
 def testTuple_turnsIntoList(self):
     self.assertEqual(json_util.Cleanse(("a", "b")), ["a", "b"])