def clean_values(self, init_val): if isinstance(init_val, DataContainer)\ or isinstance(init_val, dict): return init_val cleaned_vals = [] # Cleanup whatever came from the wire into a unicode string for key, value in init_val: # I can do this key, value thing because the headers do NOT # have multiple header values like query strings and post-data if isinstance(value, basestring): value = smart_unicode(value) cleaned_vals.append((smart_unicode(key), value)) return cleaned_vals
def clean_values(self, init_val): if isinstance(init_val, DataContainer)\ or isinstance(init_val, dict): return init_val cleaned_vals = [] # Cleanup whatever came from the wire into a unicode string for key, value in init_val: # I can do this key, value thing because the headers do NOT # have multiple header values like query strings and post-data if isinstance(value, basestring): value = smart_unicode(value) cleaned_vals.append( (smart_unicode(key), value) ) return cleaned_vals
def test_parse_response_with_no_charset_in_header(self): # No charset was specified, use the default as well as the default # error handling scheme for body, charset in TEST_RESPONSES.values(): html = body.encode(charset) resp = self.create_resp(Headers([('Content-Type', 'text/xml')]), html) self.assertEquals( smart_unicode(html, DEFAULT_CHARSET, ESCAPED_CHAR, on_error_guess=False), resp.body)
def test_parse_response_with_no_charset_in_header(self): # No charset was specified, use the default as well as the default # error handling scheme for body, charset in TEST_RESPONSES.values(): html = body.encode(charset) resp = self.create_resp( Headers([('Content-Type', 'text/xml')]), html) self.assertEquals( smart_unicode(html, DEFAULT_CHARSET, ESCAPED_CHAR, on_error_guess=False), resp.body )
def test_parse_response_with_wrong_charset(self): # A wrong or non-existant charset was set; try to decode the response # using the default charset and handling scheme from random import choice for body, charset in TEST_RESPONSES.values(): html = body.encode(charset) headers = Headers([('Content-Type', 'text/xml; charset=%s' % choice(('XXX', 'utf-8')))]) resp = self.create_resp(headers, html) self.assertEquals( smart_unicode(html, DEFAULT_CHARSET, ESCAPED_CHAR, on_error_guess=False), resp.body )
def test_parse_response_with_wrong_charset(self): # A wrong or non-existant charset was set; try to decode the response # using the default charset and handling scheme from random import choice for body, charset in TEST_RESPONSES.values(): html = body.encode(charset) headers = Headers([('Content-Type', 'text/xml; charset=%s' % choice( ('XXX', 'utf-8')))]) resp = self.create_resp(headers, html) self.assertEquals( smart_unicode(html, DEFAULT_CHARSET, ESCAPED_CHAR, on_error_guess=False), resp.body)
def test_dump_case03(self): header_value = ''.join(chr(i) for i in xrange(256)) expected = u'\r\n'.join([u'GET http://w3af.com/a/b/c.php HTTP/1.1', u'Hola: %s' % smart_unicode(header_value), u'', u'']) headers = Headers([(u'Hola', header_value)]) #TODO: Note that I'm passing a dc to the FuzzableRequest and it's not # appearing in the dump. It might be a bug... fr = FuzzableRequest(self.url, method='GET', dc={u'a': ['b']}, headers=headers) self.assertEqual(fr.dump(), expected)
def test_from_dict_encodings(self): for body, charset in TEST_RESPONSES.values(): html = body.encode(charset) resp = self.create_resp(Headers([('Content-Type', 'text/xml')]), html) msg = msgpack.dumps(resp.to_dict()) loaded_dict = msgpack.loads(msg) loaded_resp = HTTPResponse.from_dict(loaded_dict) self.assertEquals( smart_unicode(html, DEFAULT_CHARSET, ESCAPED_CHAR, on_error_guess=False), loaded_resp.body)
def test_from_dict_encodings(self): for body, charset in TEST_RESPONSES.values(): html = body.encode(charset) resp = self.create_resp(Headers([('Content-Type', 'text/xml')]), html) msg = msgpack.dumps(resp.to_dict()) loaded_dict = msgpack.loads(msg) loaded_resp = HTTPResponse.from_dict(loaded_dict) self.assertEquals( smart_unicode(html, DEFAULT_CHARSET, ESCAPED_CHAR, on_error_guess=False), loaded_resp.body )
def test_dump_case03(self): header_value = ''.join(chr(i) for i in xrange(256)) expected = u'\r\n'.join([ u'GET http://w3af.com/a/b/c.php HTTP/1.1', u'Hola: %s' % smart_unicode(header_value), u'', u'' ]) headers = Headers([(u'Hola', header_value)]) #TODO: Note that I'm passing a dc to the FuzzableRequest and it's not # appearing in the dump. It might be a bug... fr = FuzzableRequest(self.url, method='GET', dc={u'a': ['b']}, headers=headers) self.assertEqual(fr.dump(), expected)
def create_fuzzable_requests(resp, request=None, add_self=True): ''' Generates the fuzzable requests based on an HTTP response instance. :param resp: An HTTPResponse instance. :param request: The HTTP request that generated the resp :param add_self: If I should add the current HTTP request (:param request) to the result on not. :return: A list of fuzzable requests. ''' res = [] # Headers for all fuzzable requests created here: # And add the fuzzable headers to the dict req_headers = dict((h, '') for h in cf.cf.get('fuzzable_headers')) req_headers.update(request and request.get_headers() or {}) req_headers = Headers(req_headers.items()) # Get the cookie! cookieObj = _create_cookie(resp) # Create the fuzzable request that represents the request object # passed as parameter if add_self: qsr = HTTPQSRequest( resp.get_uri(), headers=req_headers, cookie=cookieObj ) res.append(qsr) # If response was a 30X (i.e. a redirect) then include the # corresponding fuzzable request. resp_headers = resp.get_headers() for url_header_name in URL_HEADERS: url_header_value, _ = resp_headers.iget(url_header_name, '') if url_header_value: url = smart_unicode(url_header_value, encoding=resp.charset) try: absolute_location = resp.get_url().url_join(url) except ValueError: msg = 'The application sent a "%s" redirect that w3af' \ ' failed to correctly parse as an URL, the header' \ ' value was: "%s"' om.out.debug(msg % (url_header_name, url)) else: qsr = HTTPQSRequest( absolute_location, headers=req_headers, cookie=cookieObj ) res.append(qsr) # Try to find forms in the document try: dp = parser_cache.dpc.get_document_parser_for(resp) except w3afException: # Failed to find a suitable parser for the document form_list = [] else: form_list = dp.get_forms() same_domain = lambda f: f.get_action( ).get_domain() == resp.get_url().get_domain() form_list = [f for f in form_list if same_domain(f)] if not form_list: # Check if its a wsdl file #TODO: Rewrite web service support ''' wsdlp = WSDLParser() try: wsdlp.set_wsdl(resp.get_body()) except w3afException: pass else: for rem_meth in wsdlp.get_methods(): wspdr = WebServiceRequest( rem_meth.get_location(), rem_meth.get_action(), rem_meth.get_parameters(), rem_meth.get_namespace(), rem_meth.get_methodName(), req_headers ) res.append(wspdr) ''' else: # Create one HTTPPostDataRequest for each form variant mode = cf.cf.get('form_fuzzing_mode') for form in form_list: for variant in form.get_variants(mode): if form.get_method().upper() == 'POST': r = HTTPPostDataRequest( variant.get_action(), variant.get_method(), req_headers, cookieObj, variant) else: # The default is a GET request r = HTTPQSRequest( variant.get_action(), headers=req_headers, cookie=cookieObj ) r.set_dc(variant) res.append(r) return res
def _charset_handling(self): ''' Decode the body based on the header (or metadata) encoding. The implemented algorithm follows the encoding detection logic used by FF: 1) First try to find a charset using the following search criteria: a) Look in the 'content-type' HTTP header. Example: content-type: text/html; charset=iso-8859-1 b) Look in the 'meta' HTML header. Example: <meta .* content="text/html; charset=utf-8" /> c) Determine the charset using the chardet module (TODO) d) Use the DEFAULT_CHARSET 2) Try to decode the body using the found charset. If it fails, then force it to use the DEFAULT_CHARSET Finally return the unicode (decoded) body and the used charset. Note: If the body is already a unicode string return it as it is. ''' lcase_headers = self.get_lower_case_headers() charset = self._charset rawbody = self._raw_body # Only try to decode <str> strings. Skip <unicode> strings if type(rawbody) is unicode: _body = rawbody assert charset is not None, ( "HTTPResponse objects containing " "unicode body must have an associated charset") elif 'content-type' not in lcase_headers: _body = rawbody charset = DEFAULT_CHARSET if len(_body): msg = "The remote web server failed to send the 'content-type'"\ " header in HTTP response with id %s" % self.id om.out.debug(msg) elif not self.is_text_or_html(): # Not text, save as it is. _body = rawbody charset = charset or DEFAULT_CHARSET else: # Figure out charset to work with if not charset: # Start with the headers charset_mo = CHARSET_EXTRACT_RE.search( lcase_headers['content-type'], re.I) if charset_mo: # Seems like the response's headers contain a charset charset = charset_mo.groups()[0].lower().strip() else: # Continue with the body's meta tag charset_mo = CHARSET_META_RE.search(rawbody, re.IGNORECASE) if charset_mo: charset = charset_mo.groups()[0].lower().strip() else: charset = DEFAULT_CHARSET # Now that we have the charset, we use it! # The return value of the decode function is a unicode string. try: _body = smart_unicode(rawbody, charset, errors=ESCAPED_CHAR, on_error_guess=False) except LookupError: # Warn about a buggy charset msg = ('Charset LookupError: unknown charset: %s; ' 'ignored and set to default: %s' % (charset, self._charset)) om.out.debug(msg) # Forcing it to use the default charset = DEFAULT_CHARSET _body = smart_unicode(rawbody, charset, errors=ESCAPED_CHAR, on_error_guess=False) return _body, charset
def _charset_handling(self): ''' Decode the body based on the header (or metadata) encoding. The implemented algorithm follows the encoding detection logic used by FF: 1) First try to find a charset using the following search criteria: a) Look in the 'content-type' HTTP header. Example: content-type: text/html; charset=iso-8859-1 b) Look in the 'meta' HTML header. Example: <meta .* content="text/html; charset=utf-8" /> c) Determine the charset using the chardet module (TODO) d) Use the DEFAULT_CHARSET 2) Try to decode the body using the found charset. If it fails, then force it to use the DEFAULT_CHARSET Finally return the unicode (decoded) body and the used charset. Note: If the body is already a unicode string return it as it is. ''' lcase_headers = self.get_lower_case_headers() charset = self._charset rawbody = self._raw_body # Only try to decode <str> strings. Skip <unicode> strings if type(rawbody) is unicode: _body = rawbody assert charset is not None, ("HTTPResponse objects containing " "unicode body must have an associated " "charset") elif 'content-type' not in lcase_headers: _body = rawbody charset = DEFAULT_CHARSET if len(_body): msg = "The remote web server failed to send the 'content-type'"\ " header in HTTP response with id %s" % self.id om.out.debug(msg) elif not self.is_text_or_html(): # Not text, save as it is. _body = rawbody charset = charset or DEFAULT_CHARSET else: # Figure out charset to work with if not charset: charset = self.guess_charset(rawbody, lcase_headers) # Now that we have the charset, we use it! # The return value of the decode function is a unicode string. try: _body = smart_unicode( rawbody, charset, errors=ESCAPED_CHAR, on_error_guess=False ) except LookupError: # Warn about a buggy charset msg = ('Charset LookupError: unknown charset: %s; ' 'ignored and set to default: %s' % (charset, self._charset)) om.out.debug(msg) # Forcing it to use the default charset = DEFAULT_CHARSET _body = smart_unicode( rawbody, charset, errors=ESCAPED_CHAR, on_error_guess=False ) return _body, charset