def _build_wsgi_request_data(request): request_data = { 'url': wsgiref.util.request_uri(request), 'user_ip': _wsgi_extract_user_ip(request), 'method': request.get('REQUEST_METHOD'), } if 'QUERY_STRING' in request: request_data['GET'] = parse_qs(request['QUERY_STRING'], keep_blank_values=True) # Collapse single item arrays request_data['GET'] = dict((k, v[0] if len(v) == 1 else v) for k, v in request_data['GET'].items()) request_data['headers'] = _extract_wsgi_headers(request.items()) try: length = int(request.get('CONTENT_LENGTH', 0)) except ValueError: length = 0 input = request.get('wsgi.input') if length and input and hasattr(input, 'seek') and hasattr(input, 'tell'): pos = input.tell() input.seek(0, 0) request_data['body'] = input.read(length) input.seek(pos, 0) return request_data
def redact(self, url_string): _redact = super(ScrubUrlTransform, self).redact missing_colon_double_slash = False if _starts_with_auth_re.match(url_string): missing_colon_double_slash = True url_string = '//%s' % url_string try: url_parts = urlsplit(url_string) qs_params = parse_qs(url_parts.query, keep_blank_values=True) except: # This isn't a URL, return url_string which is a no-op # for this transform return url_string netloc = url_parts.netloc # If there's no netloc, give up if not netloc: return url_string for qs_param, vals in iteritems(qs_params): if qs_param.lower() in self.params_to_scrub: vals2 = map(_redact, vals) qs_params[qs_param] = vals2 scrubbed_qs = urlencode(qs_params, doseq=True) if self.scrub_username and url_parts.username: redacted_username = _redact(url_parts.username) netloc = netloc.replace(url_parts.username, redacted_username) if self.scrub_password and url_parts.password: redacted_pw = _redact(url_parts.password) netloc = netloc.replace(url_parts.password, redacted_pw) scrubbed_url = (url_parts.scheme, netloc, url_parts.path, scrubbed_qs, url_parts.fragment) scrubbed_url_string = urlunsplit(scrubbed_url) if missing_colon_double_slash: scrubbed_url_string = scrubbed_url_string.lstrip('://') return scrubbed_url_string
def redact(self, url_string): _redact = super(ScrubUrlTransform, self).redact missing_colon_double_slash = False if _starts_with_auth_re.match(url_string): missing_colon_double_slash = True url_string = '//%s' % url_string try: url_parts = urlsplit(url_string) qs_params = parse_qs(url_parts.query) except: # This isn't a URL, return url_string which is a no-op # for this transform return url_string netloc = url_parts.netloc # If there's no netloc, give up if not netloc: return url_string for qs_param, vals in iteritems(qs_params): if qs_param.lower() in self.params_to_scrub: vals2 = map(_redact, vals) qs_params[qs_param] = vals2 scrubbed_qs = urlencode(qs_params, doseq=True) if self.scrub_username and url_parts.username: redacted_username = _redact(url_parts.username) netloc = netloc.replace(url_parts.username, redacted_username) if self.scrub_password and url_parts.password: redacted_pw = _redact(url_parts.password) netloc = netloc.replace(url_parts.password, redacted_pw) scrubbed_url = (url_parts.scheme, netloc, url_parts.path, scrubbed_qs, url_parts.fragment) scrubbed_url_string = urlunsplit(scrubbed_url) if missing_colon_double_slash: scrubbed_url_string = scrubbed_url_string.lstrip('://') return scrubbed_url_string
def _compare_urls(self, url1, url2): if _starts_with_auth_re.match(url1): url1 = '//%s' % url1 if _starts_with_auth_re.match(url2): url2 = '//%s' % url2 parsed_urls = map(urlparse, (url1, url2)) qs_params = map(lambda x: parse_qs(x.query), parsed_urls) num_params = map(len, qs_params) param_names = map(lambda x: set(x.keys()), qs_params) self.assertEqual(*num_params) self.assertDictEqual(*qs_params) self.assertSetEqual(*param_names) for facet in ('scheme', 'netloc', 'path', 'params', 'username', 'password', 'hostname', 'port'): comp = map(lambda x: getattr(x, facet), parsed_urls) self.assertEqual(*comp)