def test_analyze_cookies_https_value_over_http(self): body = '' url = URL('https://www.w3af.com/') headers = Headers({ 'content-type': 'text/html', 'Set-Cookie': 'abc=defjkluio; secure; httponly;' }.items()) response = HTTPResponse(200, body, headers, url, url, _id=1) request = FuzzableRequest(url, method='GET') # Receive the cookie over HTTPS self.plugin.grep(request, response) url = URL('http://www.w3af.com/?id=defjkluio') headers = Headers({'content-type': 'text/html'}.items()) response = HTTPResponse(200, body, headers, url, url, _id=1) request = FuzzableRequest(url, method='GET') # Send the cookie over HTTP as a parameter value self.plugin.grep(request, response) security = kb.kb.get('analyze_cookies', 'security') self.assertEqual(len(kb.kb.get('analyze_cookies', 'cookies')), 1) self.assertEqual(len(security), 1) self.assertEqual(len(kb.kb.get('analyze_cookies', 'invalid-cookies')), 0) names = [i.get_name() for i in security] self.assertIn('Secure cookies over insecure channel', names)
def test_fuzzable_request(self): dl = DiskList() uri = URL('http://w3af.org/?id=2') qsr1 = HTTPQSRequest(uri, method='GET', headers=Headers([('Referer', 'http://w3af.org/') ])) uri = URL('http://w3af.org/?id=3') qsr2 = HTTPQSRequest(uri, method='OPTIONS', headers=Headers([('Referer', 'http://w3af.org/') ])) uri = URL('http://w3af.org/?id=7') qsr3 = HTTPQSRequest(uri, method='FOO', headers=Headers([('Referer', 'http://w3af.org/') ])) dl.append(qsr1) dl.append(qsr2) self.assertEqual(dl[0], qsr1) self.assertEqual(dl[1], qsr2) self.assertFalse(qsr3 in dl) self.assertTrue(qsr2 in dl)
def from_httplib_resp(cls, httplibresp, original_url=None): ''' Factory function. Build a HTTPResponse object from a httplib.HTTPResponse instance :param httplibresp: httplib.HTTPResponse instance :param original_url: Optional 'url_object' instance. :return: A HTTPResponse instance ''' resp = httplibresp code, msg, hdrs, body = (resp.code, resp.msg, resp.info(), resp.read()) hdrs = Headers(hdrs.items()) if original_url: url_inst = URL(resp.geturl(), original_url.encoding) url_inst = url_inst.url_decode() else: url_inst = original_url = URL(resp.geturl()) charset = getattr(resp, 'encoding', None) return cls(code, body, hdrs, url_inst, original_url, msg, charset=charset)
def test_doc_type(self): # Text or HTML text_or_html_mime_types = ('application/javascript', 'text/html', 'text/xml', 'text/cmd', 'text/css', 'text/csv', 'text/javascript', 'text/plain') for mimetype in text_or_html_mime_types: resp = self.create_resp(Headers([('Content-Type', mimetype)])) self.assertEquals( True, resp.is_text_or_html(), "MIME type '%s' wasn't recognized as a valid '%s' type" % (mimetype, HTTPResponse.DOC_TYPE_TEXT_OR_HTML)) # PDF resp = self.create_resp(Headers([('Content-Type', 'application/pdf')])) self.assertEquals(True, resp.is_pdf()) # SWF resp = self.create_resp( Headers([('Content-Type', 'application/x-shockwave-flash')])) self.assertEquals(True, resp.is_swf()) # Image image_mime_types = ('image/gif', 'image/jpeg', 'image/pjpeg', 'image/png', 'image/tiff', 'image/svg+xml', 'image/vnd.microsoft.icon') for mimetype in image_mime_types: resp = self.create_resp(Headers([('Content-Type', mimetype)])) self.assertEquals( True, resp.is_image(), "MIME type '%s' wasn't recognized as a valid '%s' type" % (mimetype, HTTPResponse.DOC_TYPE_IMAGE))
def test_analyze_cookies_collect_uniq(self): body = '' url = URL('http://www.w3af.com/') headers = Headers({ 'content-type': 'text/html', 'Set-Cookie': 'abc=def' }.items()) response = HTTPResponse(200, body, headers, url, url, _id=1) request = FuzzableRequest(url, method='GET') self.plugin.grep(request, response) headers = Headers({ 'content-type': 'text/html', 'Set-Cookie': '123=456' }.items()) response = HTTPResponse(200, body, headers, url, url, _id=1) request = FuzzableRequest(url, method='GET') self.plugin.grep(request, response) headers = Headers({ 'content-type': 'text/html', 'Set-Cookie': 'abc=456' }.items()) response = HTTPResponse(200, body, headers, url, url, _id=1) request = FuzzableRequest(url, method='GET') self.plugin.grep(request, response) self.assertEqual(len(kb.kb.get('analyze_cookies', 'cookies')), 2) self.assertEqual(len(kb.kb.get('analyze_cookies', 'invalid-cookies')), 0)
def test_mutant_creation(self): url = URL('http://moth/?a=1&b=2') headers = Headers([('Referer', 'http://moth/')]) freq = HTTPQSRequest(url, headers=headers) created_mutants = HeadersMutant.create_mutants(freq, self.payloads, [], False, self.fuzzer_config) expected_dc_lst = [ Headers([('Referer', 'abc')]), Headers([('Referer', 'def')]) ] created_dc_lst = [i.get_dc() for i in created_mutants] self.assertEqual(created_dc_lst, expected_dc_lst) self.assertEqual(created_mutants[0].get_var(), 'Referer') self.assertEqual(created_mutants[0].get_var_index(), 0) self.assertEqual(created_mutants[0].get_original_value(), '') self.assertEqual(created_mutants[1].get_var(), 'Referer') self.assertEqual(created_mutants[1].get_var_index(), 0) self.assertEqual(created_mutants[1].get_original_value(), '') self.assertTrue( all(isinstance(m, HeadersMutant) for m in created_mutants))
def from_httplib_resp(cls, httplibresp, original_url=None): ''' Factory function. Build a HTTPResponse object from a httplib.HTTPResponse instance :param httplibresp: httplib.HTTPResponse instance :param original_url: Optional 'url_object' instance. :return: A HTTPResponse instance ''' resp = httplibresp code, msg, hdrs, body = (resp.code, resp.msg, resp.info(), resp.read()) hdrs = Headers(hdrs.items()) if original_url: url_inst = URL(resp.geturl(), original_url.encoding) url_inst = url_inst.url_decode() else: url_inst = original_url = URL(resp.geturl()) if isinstance(resp, urllib2.HTTPError): # This is possible because in errors.py I do: # err = urllib2.HTTPError(req.get_full_url(), code, msg, hdrs, resp) charset = getattr(resp.fp, 'encoding', None) else: # The encoding attribute is only set on CachedResponse instances charset = getattr(resp, 'encoding', None) return cls(code, body, hdrs, url_inst, original_url, msg, charset=charset)
def test_fuzz_headers_no_headers(self): cf_singleton.save('fuzzable_headers', ['Referer']) # This one changed cf_singleton.save('fuzz_cookies', False) cf_singleton.save('fuzz_url_filenames', False) cf_singleton.save('fuzzed_files_extension', 'gif') cf_singleton.save('fuzz_form_files', False) cf_singleton.save('fuzz_url_parts', False) url = URL('http://moth/?id=1') # No headers in the original request #headers = Headers([('Referer', 'http://moth/foo/bar/')]) freq = HTTPQSRequest(url) generated_mutants = create_mutants(freq, self.payloads) expected_urls = ['http://moth/?id=abc', 'http://moth/?id=def', 'http://moth/?id=1', 'http://moth/?id=1', ] generated_urls = [m.get_uri().url_string for m in generated_mutants] self.assertEqual(generated_urls, expected_urls) expected_headers = [Headers(), Headers(), Headers([('Referer', 'abc')]), Headers([('Referer', 'def')]), ] generated_headers = [m.get_headers() for m in generated_mutants] self.assertEqual(expected_headers, generated_headers) self.assertTrue(all(isinstance(m, QSMutant) or isinstance(m, HeadersMutant) for m in generated_mutants))
def test_get_lower_case_headers(self): headers = Headers([('Content-Type', 'text/html')]) lcase_headers = Headers([('content-type', 'text/html')]) resp = self.create_resp(headers, "<html/>") self.assertEqual(resp.get_lower_case_headers(), lcase_headers) self.assertIn('content-type', resp.get_lower_case_headers())
def test_http_auth_detect_simple(self): body = '' hdrs = {'content-type': 'text/html', 'www-authenticate': 'realm-w3af'} hdrs = Headers(hdrs.items()) response = HTTPResponse(401, body, hdrs, self.url, self.url, _id=1) self.plugin.grep(self.request, response) self.assertEqual(len(kb.kb.get('http_auth_detect', 'auth')), 1) self.assertEqual(len(kb.kb.get('http_auth_detect', 'userPassUri')), 0)
def test_analyze_cookies_with_httponly_case_sensitive_expires(self): body = '' url = URL('https://www.w3af.com/') headers = {'content-type': 'text/html', 'Set-Cookie': 'name2=value2; Expires=Wed, 09-Jun-2021 10:18:14 GMT;Secure;HttpOnly'} headers = Headers(headers.items()) response = HTTPResponse(200, body, headers, url, url, _id=1) request = FuzzableRequest(url, method='GET') self.plugin.grep(request, response) self.assertEqual(len(kb.kb.get('analyze_cookies', 'cookies')), 1) self.assertEqual(len(kb.kb.get('analyze_cookies', 'security')), 0)
def __init__(self, uri, method='GET', headers=None, cookie=None, dc=None): super(FuzzableRequest, self).__init__() # Internal variables self._dc = dc or DataContainer() self._method = method self._headers = Headers(headers or ()) self._cookie = cookie or Cookie() self._data = None self.set_uri(uri) # Set the internal variables self._sent_info_comp = None
def _do_GET(self, url, with_rand_ua=True): if not isinstance(url, URL): msg = 'The url parameter of a _do_GET must' msg += ' be of url.URL type.' raise ValueError(msg) if with_rand_ua: random_ua = get_random_user_agent() headers = Headers([('User-Agent', random_ua)]) else: # Please note that some tests show that this is useful for the # mobile search. headers = Headers([('User-Agent', '')]) return self._uri_opener.GET(url, headers=headers)
def test_provides_cors_features_false(self): url = URL('http://moth/') fr = FuzzableRequest(url) http_response = HTTPResponse(200, '', Headers(), url, url) url_opener_mock = Mock() url_opener_mock.GET = MagicMock(return_value=http_response) cors = provides_cors_features(fr, url_opener_mock) call_header = Headers({'Origin': 'www.w3af.org'}.items()) url_opener_mock.GET.assert_called_with(url, headers=call_header) self.assertFalse(cors)
def test_analyze_cookies_with_httponly_case_sensitive_expires(self): body = "" url = URL("https://www.w3af.com/") headers = { "content-type": "text/html", "Set-Cookie": "name2=value2; Expires=Wed, 09-Jun-2021 10:18:14 GMT;Secure;HttpOnly", } headers = Headers(headers.items()) response = HTTPResponse(200, body, headers, url, url, _id=1) request = FuzzableRequest(url, method="GET") self.plugin.grep(request, response) self.assertEqual(len(kb.kb.get("analyze_cookies", "cookies")), 1) self.assertEqual(len(kb.kb.get("analyze_cookies", "security")), 0)
def crawl(self, fuzzable_request): ''' Searches for new Url's by adding and substracting numbers to the url and the parameters. :param fuzzable_request: A fuzzable_request instance that contains (among other things) the URL to test. ''' url = fuzzable_request.get_url() headers = Headers([('Referer', url.url_string)]) original_response = self._uri_opener.GET(fuzzable_request.get_uri(), cache=True, headers=headers) if original_response.is_text_or_html() or self._fuzz_images: fr_generator = self._mangle_digits(fuzzable_request) response_repeater = repeat(original_response) header_repeater = repeat(headers) args = izip(fr_generator, response_repeater, header_repeater) self.worker_pool.map_multi_args(self._do_request, args) # I add myself so the next call to this plugin wont find me ... # Example: index1.html ---> index2.html --!!--> index1.html self._already_visited.add(fuzzable_request.get_uri())
def test_strange_http_codes(self): body = '' url = URL('http://www.w3af.com/') headers = Headers([('content-type', 'text/html')]) request = FuzzableRequest(url, method='GET') resp_200 = HTTPResponse(200, body, headers, url, url, _id=1) resp_404 = HTTPResponse(404, body, headers, url, url, _id=1) KNOWN_GOOD = [resp_200, resp_404] resp_999 = HTTPResponse(999, body, headers, url, url, _id=1) resp_123 = HTTPResponse(123, body, headers, url, url, _id=1) resp_567 = HTTPResponse(567, body, headers, url, url, _id=1) resp_666 = HTTPResponse(666, body, headers, url, url, _id=1) resp_777 = HTTPResponse(777, body, headers, url, url, _id=1) KNOWN_BAD = [resp_999, resp_123, resp_567, resp_666, resp_777] for resp in KNOWN_GOOD: kb.kb.cleanup() self.plugin.grep(request, resp) self.assertEquals(len(kb.kb.get('strange_http_codes', 'strange_http_codes')), 0) for resp in KNOWN_BAD: kb.kb.cleanup() self.plugin.grep(request, resp) self.assertEquals(len(kb.kb.get('strange_http_codes', 'strange_http_codes')), 1)
def mangle_response(self, response): ''' This method mangles the response. :param response: This is the response to mangle. :return: A mangled version of the response. ''' body = response.get_body() for regex, string in self._manglers['s']['b']: body = regex.sub(string, body) response.set_body(body) header_string = str(response.get_headers()) for regex, string in self._manglers['s']['h']: header_string = regex.sub(string, header_string) try: mangled_header = Headers.from_string(header_string) except ValueError: error = 'Your header modifications created an invalid header'\ ' string that could NOT be parsed back to a Header object.' om.out.error(error) else: response.set_headers(mangled_header) if self._user_option_fix_content_len: response = self._fix_content_len(response) return response
def test_headers(self): hdr = Headers([('foo', 'bar')]) fr = create_fuzzable_request_from_parts(self.url, add_headers=hdr) self.assertEqual(fr.get_url(), self.url) self.assertEqual(fr.get_headers(), hdr) self.assertEqual(fr.get_method(), 'GET')
def test_blank_body_code(self): body = '' headers = Headers([('content-type', 'text/html')]) response = HTTPResponse(401, body, headers, self.url, self.url, _id=1) request = FuzzableRequest(self.url, method='GET') self.plugin.grep(request, response) self.assertEqual(len(kb.kb.get('blank_body', 'blank_body')), 0)
def profile_me(): ''' To be profiled ''' for _ in xrange(1): for counter in xrange(1, 5): file_name = 'test-' + str(counter) + '.html' file_path = os.path.join('plugins', 'tests', 'grep', 'data', file_name) body = file(file_path).read() hdrs = Headers({'Content-Type': 'text/html'}.items()) response = HTTPResponse(200, body, hdrs, URL(self.url_str + str(counter)), URL(self.url_str + str(counter)), _id=random.randint(1, 5000)) request = FuzzableRequest(self.url_inst) for pinst in self._plugins: pinst.grep(request, response) for pinst in self._plugins: pinst.end()
def test_str_strange(self): header_value = ''.join(chr(i) for i in xrange(256)) headers = Headers([(u'Hola', header_value)]) # I don't assert in a stricter way because the output depends on # smart_unicode which might change in the future self.assertIn('Hola: \x00\x01\x02', str(headers))
def _build_http_response(body_content, content_type): headers = Headers() headers[u'content-type'] = content_type url = URL('http://w3af.com') return HTTPResponse(200, body_content, headers, url, url, charset='utf-8')
def test_add_HTTPPostDataRequest(self): ds = DiskSet() uri = URL('http://w3af.org/?id=2') hdr = Headers([('Referer', 'http://w3af.org/')]) pdr1 = HTTPPostDataRequest(uri, method='GET', headers=hdr) uri = URL('http://w3af.org/?id=3') pdr2 = HTTPPostDataRequest(uri, method='GET', headers=hdr) uri = URL('http://w3af.org/?id=7') pdr3 = HTTPPostDataRequest(uri, method='FOO', headers=hdr) ds.add(pdr1) ds.add(pdr2) ds.add(pdr2) ds.add(pdr1) self.assertEqual(ds[0], pdr1) self.assertEqual(ds[1], pdr2) self.assertFalse(pdr3 in ds) self.assertTrue(pdr2 in ds) self.assertEqual(len(ds), 2) # This forces an internal change in the URL object pdr2.get_url().url_string self.assertTrue(pdr2 in ds)
def test_check_case09(self): is_vuln = IsVulnerableHelper(200, 301, re.compile('def'), re.compile('xyz'), re.compile('spam')) url = URL('http://moth/') http_response = HTTPResponse(301, 'hello world abc def', Headers(), url, url) self.assertTrue(is_vuln.check(http_response))
def __call__(self, uri, data=None, headers=Headers(), cache=False, grep=True, cookies=True): ''' :return: An HTTPResponse object that's the result of sending the request with a method different from "GET" or "POST". ''' if not isinstance(uri, URL): raise TypeError('The uri parameter of AnyMethod.' '__call__() must be of url.URL type.') if not isinstance(headers, Headers): raise TypeError('The headers parameter of AnyMethod.' '__call__() must be of Headers type.') self._xurllib._init() req = HTTPRequest(uri, data, cookies=cookies, cache=cache, method=self._method) req = self._xurllib._add_headers(req, headers or {}) return self._xurllib._send(req, grep=grep)
def from_dict(cls, unserialized_dict): ''' * msgpack is MUCH faster than cPickle, * msgpack can't serialize python objects, * I have to create a dict representation of HTTPRequest to serialize it, * and a from_dict to have the object back :param unserialized_dict: A dict just as returned by to_dict() ''' udict = unserialized_dict method, uri = udict['method'], udict['uri'] headers, data = udict['headers'], udict['data'] cookies = udict['cookies'] cache = udict['cache'] headers_inst = Headers(headers.items()) url = URL(uri) return cls(url, data=data, headers=headers_inst, cookies=cookies, cache=cache, method=method)
def _create_fuzzable_request(self): ''' Based on the attributes, return a fuzzable request object. Important variables used here: - self.headers : Stores the headers for the request - self.rfile : A file like object that stores the post_data - self.path : Stores the URL that was requested by the browser ''' # See HTTPWrapperClass if hasattr(self.server, 'chainedHandler'): base_path = "https://" + self.server.chainedHandler.path path = base_path + self.path else: path = self.path fuzzable_request = FuzzableRequest( URL(path), self.command, Headers(self.headers.dict.items()) ) post_data = self._get_post_data() if post_data: fuzzable_request.set_data(post_data) return fuzzable_request
def from_dict(cls, unserialized_dict): ''' * msgpack is MUCH faster than cPickle, * msgpack can't serialize python objects, * I have to create a dict representation of HTTPResponse to serialize it, * and a from_dict to have the object back :param unserialized_dict: A dict just as returned by to_dict() ''' udict = unserialized_dict code, msg, hdrs = udict['code'], udict['msg'], udict['headers'] body, _time, _id = udict['body'], udict['time'], udict['id'] headers_inst = Headers(hdrs.items()) url = URL(udict['uri']) return cls(code, body, headers_inst, url, url, msg=msg, _id=_id, time=_time)
def setUp(self): kb.kb.cleanup() self.plugin = path_disclosure() self.url = URL('http://www.w3af.com/foo/bar.py') self.header = Headers([('content-type', 'text/html')]) self.request = FuzzableRequest(self.url, method='GET')
def test_blank_body_method(self): body = '' headers = Headers([('content-type', 'text/html')]) response = HTTPResponse(200, body, headers, self.url, self.url, _id=1) request = FuzzableRequest(self.url, method='ARGENTINA') self.plugin.grep(request, response) self.assertEqual(len(kb.kb.get('ssn', 'ssn')), 0)
def __init__(self, url, data=None, headers=Headers(), origin_req_host=None, unverifiable=False, follow_redir=True, cookies=True, cache=False, method=None): ''' This is a simple wrapper around a urllib2 request object which helps with some common tasks like serialization, cache, etc. :param method: None means "choose the method in the default way": if self.has_data(): return "POST" else: return "GET" ''' # # Save some information for later access in an easier way # self.url_object = url self.follow_redir = follow_redir self.cookies = cookies self.get_from_cache = cache self.method = method if self.method is None: self.method = 'POST' if data else 'GET' headers = dict(headers) # Call the base class constructor urllib2.Request.__init__(self, url.url_encode(), data, headers, origin_req_host, unverifiable) RequestMixIn.__init__(self)
def create_fuzzable_request_from_request(request, add_headers=None): ''' :return: A fuzzable request with the same info as request ''' if not isinstance(request, HTTPRequest): raise TypeError('Requires HTTPRequest to create FuzzableRequest.') url = request.url_object post_data = str(request.get_data() or '') method = request.get_method() headers = Headers(request.headers.items()) headers.update(request.unredirected_hdrs.items()) headers.update(add_headers or Headers()) return create_fuzzable_request_from_parts(url, method=method, post_data=post_data, add_headers=headers)
def set_headers(self, headers): ''' Sets the headers and also analyzes them in order to get the response mime type (text/html , application/pdf, etc). :param headers: The headers dict. ''' # Fix lowercase in header names from HTTPMessage if isinstance(headers, httplib.HTTPMessage): self._headers = Headers() for header in headers.headers: key, value = header.split(':', 1) self._headers[key.strip()] = value.strip() else: self._headers = headers # Set the type, for easy access. self._doc_type = HTTPResponse.DOC_TYPE_OTHER find_word = lambda w: content_type.find(w) != -1 content_type_hvalue, _ = self._headers.iget('content-type', None) # we need exactly content type but not charset if content_type_hvalue is not None: try: self._content_type = content_type_hvalue.split(';', 1)[0] except: msg = 'Invalid Content-Type value "%s" sent in HTTP response.' om.out.debug(msg % (content_type_hvalue,)) else: content_type = self._content_type.lower() # Set the doc_type if content_type.count('image'): self._doc_type = HTTPResponse.DOC_TYPE_IMAGE elif content_type.count('pdf'): self._doc_type = HTTPResponse.DOC_TYPE_PDF elif content_type.count('x-shockwave-flash'): self._doc_type = HTTPResponse.DOC_TYPE_SWF elif any(imap(find_word, ('text', 'html', 'xml', 'txt', 'javascript'))): self._doc_type = HTTPResponse.DOC_TYPE_TEXT_OR_HTML
def mangle_request(self, request): ''' This method mangles the request. :param request: This is the request to mangle. :return: A mangled version of the request. ''' data = request.get_data() for regex, string in self._manglers['q']['b']: data = regex.sub(string, data) header_string = str(request.get_headers()) for regex, string in self._manglers['q']['h']: header_string = regex.sub(string, header_string) headers_inst = Headers.from_string(header_string) return create_fuzzable_request_from_parts( request.get_uri(), request.get_method(), data, headers_inst )
class HTTPResponse(object): DOC_TYPE_TEXT_OR_HTML = 'DOC_TYPE_TEXT_OR_HTML' DOC_TYPE_SWF = 'DOC_TYPE_SWF' DOC_TYPE_PDF = 'DOC_TYPE_PDF' DOC_TYPE_IMAGE = 'DOC_TYPE_IMAGE' DOC_TYPE_OTHER = 'DOC_TYPE_OTHER' def __init__(self, code, read, headers, geturl, original_url, msg='OK', _id=None, time=0.2, alias=None, charset=None): ''' :param code: HTTP code :param read: HTTP body text; typically a string :param headers: HTTP headers, typically a dict or a httplib.HTTPMessage :param geturl: URL object instance :param original_url: URL object instance :param msg: HTTP message :param id: Optional response identifier :param time: The time between the request and the response :param alias: Alias for the response, this contains a hash that helps the backend sqlite find http_responses faster by indexing by this attr. :param charset: Response's encoding; obligatory when `read` is unicode ''' if not isinstance(geturl, URL): raise TypeError('Invalid type %s for HTTPResponse ctor param geturl.' % type(geturl)) if not isinstance(original_url, URL): raise TypeError('Invalid type %s for HTTPResponse ctor param original_url.' % type(original_url)) if not isinstance(headers, Headers): raise TypeError('Invalid type %s for HTTPResponse ctor param headers.' % type(headers)) if not isinstance(read, basestring): raise TypeError('Invalid type %s for HTTPResponse ctor param read.' % type(read)) self._charset = charset self._headers = None self._body = None self._raw_body = read self._content_type = None self._dom = None self._clear_text_body = None # A unique id identifier for the response self.id = _id # From cache defaults to False self._from_cache = False # Set the info self._info = headers # Set code self.set_code(code) # Set the URL variables # The URL that we really GET'ed self._realurl = original_url.uri2url() self._uri = original_url # The URL where we were redirected to (equal to original_url # when no redirect) self._redirected_url = geturl self._redirected_uri = geturl.uri2url() # Set the rest self._msg = msg self._time = time self._alias = alias self._doc_type = None # Internal lock self._body_lock = threading.RLock() @classmethod def from_httplib_resp(cls, httplibresp, original_url=None): ''' Factory function. Build a HTTPResponse object from a httplib.HTTPResponse instance :param httplibresp: httplib.HTTPResponse instance :param original_url: Optional 'url_object' instance. :return: A HTTPResponse instance ''' resp = httplibresp code, msg, hdrs, body = (resp.code, resp.msg, resp.info(), resp.read()) hdrs = Headers(hdrs.items()) if original_url: url_inst = URL(resp.geturl(), original_url.encoding) url_inst = url_inst.url_decode() else: url_inst = original_url = URL(resp.geturl()) charset = getattr(resp, 'encoding', None) return cls(code, body, hdrs, url_inst, original_url, msg, charset=charset) @classmethod def from_dict(cls, unserialized_dict): ''' * msgpack is MUCH faster than cPickle, * msgpack can't serialize python objects, * I have to create a dict representation of HTTPResponse to serialize it, * and a from_dict to have the object back :param unserialized_dict: A dict just as returned by to_dict() ''' udict = unserialized_dict code, msg, hdrs = udict['code'], udict['msg'], udict['headers'] body, _time, _id = udict['body'], udict['time'], udict['id'] headers_inst = Headers(hdrs.items()) url = URL(udict['uri']) return cls(code, body, headers_inst, url, url, msg=msg, _id=_id, time=_time) def to_dict(self): ''' :return: A dict that represents the current object and is serializable by the json or msgpack modules. ''' serializable_dict = {} sdict = serializable_dict # Note: The Headers() object can be serialized by msgpack because it # inherits from dict() and doesn't mangle it too much sdict['code'], sdict['msg'], sdict['headers'] = (self.get_code(), self.get_msg(), self.get_headers()) sdict['body'], sdict['time'], sdict['id'] = (self.get_body(), self.get_wait_time(), self.get_id()) sdict['uri'] = self.get_uri().url_string return serializable_dict def __contains__(self, string_to_test): ''' Determine if the `string_to_test` is contained by the HTTP response body. :param string_to_test: String to look for in the body ''' return string_to_test in self.body def __eq__(self, other): return self.id == other.id and self._code == other._code and \ self.headers == other.headers and self.body == other.body and \ self._uri == other._uri def __repr__(self): vals = { 'code': self.get_code(), 'url': str(self.get_url()), 'id': self.id and ' | id:%s' % self.id or '', 'fcache': self._from_cache and ' | fromCache:True' or '' } return '<HTTPResponse | %(code)s | %(url)s%(id)s%(fcache)s>' % vals def set_id(self, _id): self.id = _id def get_id(self): return self.id def set_code(self, code): self._code = code def get_code(self): return self._code def get_body(self): with self._body_lock: if self._body is None: self._body, self._charset = self._charset_handling() # Free 'raw_body' self._raw_body = None return self._body def set_body(self, body): ''' Setter for body. @body: A string that represents the body of the HTTP response ''' if not isinstance(body, basestring): msg = 'Invalid type %s for set_body parameter body.' raise TypeError(msg % type(body)) self._body = None self._raw_body = body body = property(get_body, set_body) def get_clear_text_body(self): ''' :return: A clear text representation of the HTTP response body. ''' clear_text_body = self._clear_text_body if clear_text_body is None: # Calculate the clear text body dom = self.get_dom() if dom is not None: clear_text_body = ''.join(dom.itertext()) else: clear_text_body = ANY_TAG_MATCH.sub('', self.get_body()) self._clear_text_body = clear_text_body return clear_text_body def set_dom(self, dom_inst): ''' This setter is part of a performance improvement I'm talking about in get_dom() and sgmlParser._parse(). Without this set_dom() which is called from sgmlParser._parse() when the code runs: sgmlParser( http_response ) ... http_response.get_dom() The DOM is calculated twice. We still need to figure out how to solve the other issue which should aim to avoid the double DOM generation when: http_response.get_dom() ... sgmlParser( http_response ) :return: None ''' self._dom = dom_inst def get_dom(self): ''' I don't want to calculate the DOM for all responses, only for those which are needed. This method will first calculate the DOM, and then save it for upcoming calls. @see: TODO: Potential performance improvement in sgmlParser._parse() for ideas on how to reduce CPU usage. :return: The DOM, or None if the HTML normalization failed. ''' if self._dom is None: try: parser = etree.HTMLParser(recover=True) self._dom = etree.fromstring(self.body, parser) except Exception: msg = ('The HTTP body for "%s" could NOT be parsed by lxml.' % self.get_url()) om.out.debug(msg) return self._dom def get_charset(self): if not self._charset: self._body, self._charset = self._charset_handling() # Free 'raw_body' self._raw_body = None return self._charset def set_charset(self, charset): self._charset = charset charset = property(get_charset, set_charset) def set_redir_url(self, ru): self._redirected_url = ru def get_redir_url(self): return self._redirected_url def set_redir_uri(self, ru): self._redirected_uri = ru def get_redir_uri(self): return self._redirected_uri def get_headers(self): if self._headers is None: self.headers = self._info assert self._headers is not None return self._headers def set_headers(self, headers): ''' Sets the headers and also analyzes them in order to get the response mime type (text/html , application/pdf, etc). :param headers: The headers dict. ''' # Fix lowercase in header names from HTTPMessage if isinstance(headers, httplib.HTTPMessage): self._headers = Headers() for header in headers.headers: key, value = header.split(':', 1) self._headers[key.strip()] = value.strip() else: self._headers = headers # Set the type, for easy access. self._doc_type = HTTPResponse.DOC_TYPE_OTHER find_word = lambda w: content_type.find(w) != -1 content_type_hvalue, _ = self._headers.iget('content-type', None) # we need exactly content type but not charset if content_type_hvalue is not None: try: self._content_type = content_type_hvalue.split(';', 1)[0] except: msg = 'Invalid Content-Type value "%s" sent in HTTP response.' om.out.debug(msg % (content_type_hvalue,)) else: content_type = self._content_type.lower() # Set the doc_type if content_type.count('image'): self._doc_type = HTTPResponse.DOC_TYPE_IMAGE elif content_type.count('pdf'): self._doc_type = HTTPResponse.DOC_TYPE_PDF elif content_type.count('x-shockwave-flash'): self._doc_type = HTTPResponse.DOC_TYPE_SWF elif any(imap(find_word, ('text', 'html', 'xml', 'txt', 'javascript'))): self._doc_type = HTTPResponse.DOC_TYPE_TEXT_OR_HTML headers = property(get_headers, set_headers) def get_lower_case_headers(self): ''' If the original headers were: {'Abc-Def': 'F00N3s'} This will return: {'abc-def': 'F00N3s'} The only thing that changes is the header name. ''' lcase_headers = dict( (k.lower(), v) for k, v in self.headers.iteritems()) return Headers(lcase_headers.items()) def set_url(self, url): ''' >>> url = URL('http://www.google.com') >>> r = HTTPResponse(200, '' , Headers(), url, url) >>> r.set_url('http://www.google.com/') Traceback (most recent call last): ... TypeError: The URL of a HTTPResponse object must be of url.URL type. >>> r.set_url(url) >>> r.get_url() == url True ''' if not isinstance(url, URL): raise TypeError('The URL of a HTTPResponse object must be of ' 'url.URL type.') self._realurl = url.uri2url() def get_url(self): return self._realurl def set_uri(self, uri): ''' >>> uri = URL('http://www.google.com/') >>> r = HTTPResponse(200, '' , Headers(), uri, uri) >>> r.set_uri('http://www.google.com/') Traceback (most recent call last): ... TypeError: The URI of a HTTPResponse object must be of url.URL type. >>> r.set_uri(uri) >>> r.get_uri() == uri True ''' if not isinstance(uri, URL): raise TypeError('The URI of a HTTPResponse object must be of ' 'url.URL type.') self._uri = uri self._realurl = uri.uri2url() def get_uri(self): return self._uri def was_redirected(self): return self._uri != self._redirected_uri def set_from_cache(self, fcache): ''' :param fcache: True if this response was obtained from the local cache. ''' self._from_cache = fcache def get_from_cache(self): ''' :return: True if this response was obtained from the local cache. ''' return self._from_cache def set_wait_time(self, t): self._time = t def get_wait_time(self): return self._time def set_alias(self, alias): self._alias = alias def get_alias(self): return self._alias def info(self): return self._info def get_status_line(self): '''Return status-line of response.''' return 'HTTP/1.1' + SP + str(self._code) + SP + self._msg + CRLF def get_msg(self): return self._msg def _charset_handling(self): ''' Decode the body based on the header (or metadata) encoding. The implemented algorithm follows the encoding detection logic used by FF: 1) First try to find a charset using the following search criteria: a) Look in the 'content-type' HTTP header. Example: content-type: text/html; charset=iso-8859-1 b) Look in the 'meta' HTML header. Example: <meta .* content="text/html; charset=utf-8" /> c) Determine the charset using the chardet module (TODO) d) Use the DEFAULT_CHARSET 2) Try to decode the body using the found charset. If it fails, then force it to use the DEFAULT_CHARSET Finally return the unicode (decoded) body and the used charset. Note: If the body is already a unicode string return it as it is. ''' lcase_headers = self.get_lower_case_headers() charset = self._charset rawbody = self._raw_body # Only try to decode <str> strings. Skip <unicode> strings if type(rawbody) is unicode: _body = rawbody assert charset is not None, ("HTTPResponse objects containing " "unicode body must have an associated " "charset") elif 'content-type' not in lcase_headers: _body = rawbody charset = DEFAULT_CHARSET if len(_body): msg = "The remote web server failed to send the 'content-type'"\ " header in HTTP response with id %s" % self.id om.out.debug(msg) elif not self.is_text_or_html(): # Not text, save as it is. _body = rawbody charset = charset or DEFAULT_CHARSET else: # Figure out charset to work with if not charset: charset = self.guess_charset(rawbody, lcase_headers) # Now that we have the charset, we use it! # The return value of the decode function is a unicode string. try: _body = smart_unicode( rawbody, charset, errors=ESCAPED_CHAR, on_error_guess=False ) except LookupError: # Warn about a buggy charset msg = ('Charset LookupError: unknown charset: %s; ' 'ignored and set to default: %s' % (charset, self._charset)) om.out.debug(msg) # Forcing it to use the default charset = DEFAULT_CHARSET _body = smart_unicode( rawbody, charset, errors=ESCAPED_CHAR, on_error_guess=False ) return _body, charset def guess_charset(self, rawbody, headers): # Start with the headers charset_mo = CHARSET_EXTRACT_RE.search(headers['content-type'], re.I) if charset_mo: # Seems like the response's headers contain a charset charset = charset_mo.groups()[0].lower().strip() else: # Continue with the body's meta tag charset_mo = CHARSET_META_RE.search(rawbody, re.IGNORECASE) if charset_mo: charset = charset_mo.groups()[0].lower().strip() else: charset = DEFAULT_CHARSET return charset @property def content_type(self): ''' The content type of the response ''' if self._content_type is None: self.headers = self._info return self._content_type or '' @property def doc_type(self): if self._doc_type is None: self.headers = self._info assert self._doc_type is not None return self._doc_type def is_text_or_html(self): ''' :return: True if this response is text or html ''' return self.doc_type == HTTPResponse.DOC_TYPE_TEXT_OR_HTML def is_pdf(self): ''' :return: True if this response is a PDF file ''' return self.doc_type == HTTPResponse.DOC_TYPE_PDF def is_swf(self): ''' :return: True if this response is a SWF file ''' return self.doc_type == HTTPResponse.DOC_TYPE_SWF def is_image(self): ''' :return: True if this response is an image file ''' return self.doc_type == HTTPResponse.DOC_TYPE_IMAGE def dump_response_head(self): ''' :return: A string with: HTTP/1.1 /login.html 200 Header1: Value1 Header2: Value2 ''' dump_head = "%s%s" % (self.get_status_line(), self.dump_headers()) if type(dump_head) is unicode: dump_head = dump_head.encode(self.charset) return dump_head def dump(self): ''' Return a DETAILED str representation of this HTTP response object. ''' body = self.body # Images, pdf and binary responses in general are never decoded # to unicode if isinstance(body, unicode): body = body.encode(DEFAULT_CHARSET, 'replace') return "%s%s%s" % (self.dump_response_head(), CRLF, body) def dump_headers(self): ''' :return: a str representation of the headers. ''' if self.headers: return CRLF.join(h + ': ' + hv for h, hv in self.headers.items()) + CRLF else: return '' def copy(self): return copy.deepcopy(self) def __getstate__(self): state = self.__dict__.copy() state.pop('_body_lock') return state def __setstate__(self, state): self.__dict__ = state self._body_lock = threading.RLock()
def test_clone_with_list_values(self): headers = Headers([('a', 'b'), ('c', 'd')]) cloned = headers.clone_with_list_values() self.assertEqual(cloned['a'], ['b']) self.assertEqual(cloned['c'], ['d'])
def test_from_string(self): headers_from_str = Headers.from_string('a: b\r\n') headers_from_obj = Headers([('a', 'b')]) self.assertEqual(headers_from_str, headers_from_obj)
class FuzzableRequest(RequestMixIn, DiskItem): ''' This class represents a fuzzable request. Fuzzable requests were created to allow w3af plugins to be much simpler and don't really care if the vulnerability is in the postdata, querystring, header, cookie or any other variable. Other classes should inherit from this one and change the behaviour of get_uri() and get_data(). For example: the class HTTPQSRequest should return the _dc in the querystring (get_uri) and HTTPPostDataRequest should return the _dc in the POSTDATA (get_data()). :author: Andres Riancho ([email protected]) ''' def __init__(self, uri, method='GET', headers=None, cookie=None, dc=None): super(FuzzableRequest, self).__init__() # Internal variables self._dc = dc or DataContainer() self._method = method self._headers = Headers(headers or ()) self._cookie = cookie or Cookie() self._data = None self.set_uri(uri) # Set the internal variables self._sent_info_comp = None def export(self): ''' Generic version of how they are exported: METHOD,URL,DC Example: GET,http://localhost/index.php?abc=123&def=789, POST,http://localhost/index.php,abc=123&def=789 :return: a csv str representation of the request ''' # # FIXME: What if a comma is inside the URL or DC? # TODO: Why don't we export headers and cookies? # meth = self._method str_res = [meth, ',', str(self._url)] if meth == 'GET': if self._dc: str_res.extend(('?', str(self._dc))) str_res.append(',') else: str_res.append(',') if self._dc: str_res.append(str(self._dc)) return ''.join(str_res) def sent(self, smth_instng): ''' Checks if something similar to `smth_instng` was sent in the request. This is used to remove false positives, e.g. if a grep plugin finds a "strange" string and wants to be sure it was not generated by an audit plugin. This method should only be used by grep plugins which often have false positives. The following example shows that we sent d'z"0 but d\'z"0 will as well be recognised as sent TODO: This function is called MANY times, and under some circumstances it's performance REALLY matters. We need to review this function. >>> f = FuzzableRequest(URL("""http://example.com/a?p=d'z"0&paged=2""")) >>> f.sent('d%5C%27z%5C%220') True >>> f._data = 'p=<SCrIPT>alert("bsMs")</SCrIPT>' >>> f.sent('<SCrIPT>alert(\"bsMs\")</SCrIPT>') True >>> f = FuzzableRequest(URL('http://example.com/?p=<ScRIPT>a=/PlaO/%0Afake_alert(a.source)</SCRiPT>')) >>> f.sent('<ScRIPT>a=/PlaO/fake_alert(a.source)</SCRiPT>') True :param smth_instng: The string :return: True if something similar was sent ''' def make_comp(heterogen_string): ''' This basically removes characters that are hard to compare ''' heterogen_characters = ('\\', '\'', '"', '+', ' ', chr(0), chr(int("0D", 16)), chr(int("0A", 16))) #heterogen_characters.extend(string.whitespace) for hetero_char in heterogen_characters: heterogen_string = heterogen_string.replace(hetero_char, '') return heterogen_string data = self._data or '' # This is the easy part. If it was exactly like this in the request if data and smth_instng in data or \ smth_instng in self.get_uri() or \ smth_instng in unquote(data) or \ smth_instng in unicode(self._uri.url_decode()): return True # Ok, it's not in it but maybe something similar # Let's set up something we can compare if self._sent_info_comp is None: dc = self._dc dec_dc = unquote(str(dc)).decode(dc.encoding) data = '%s%s%s' % (unicode(self._uri), data, dec_dc) self._sent_info_comp = make_comp(data + unquote(data)) min_len = 3 # make the smth_instng comparable smth_instng_comps = (make_comp(smth_instng), make_comp(unquote(smth_instng))) for smth_intstng_comp in smth_instng_comps: # We don't want false negatives just because the string is # short after making comparable if smth_intstng_comp in self._sent_info_comp and \ len(smth_intstng_comp) >= min_len: return True # I didn't sent the smth_instng in any way return False def __hash__(self): return hash(str(self._uri)) def __str__(self): ''' :return: A string representation of this fuzzable request. >>> fr = FuzzableRequest(URL("http://www.w3af.com/")) >>> str(fr) 'http://www.w3af.com/ | Method: GET' >>> repr( fr ) '<fuzzable request | GET | http://www.w3af.com/>' >>> fr.set_method('TRACE') >>> str(fr) 'http://www.w3af.com/ | Method: TRACE' ''' strelems = [unicode(self._url)] strelems.append(u' | Method: ' + self._method) if self._dc: strelems.append(u' | Parameters: (') # Mangle the value for printing for pname, values in self._dc.items(): # Because of repeated parameter names, we need to add this: for the_value in values: # the_value is always a string if len(the_value) > 10: the_value = the_value[:10] + '...' the_value = '"' + the_value + '"' strelems.append(pname + '=' + the_value + ', ') strelems[-1] = strelems[-1][:-2] strelems.append(u')') return u''.join(strelems).encode(DEFAULT_ENCODING) def __repr__(self): return '<fuzzable request | %s | %s>' % \ (self.get_method(), self.get_uri()) def __eq__(self, other): ''' Two requests are equal if: - They have the same URL - They have the same method - They have the same parameters - The values for each parameter is equal :return: True if the requests are equal. ''' if isinstance(other, FuzzableRequest): return (self._method == other._method and self._uri == other._uri and self._dc == other._dc) else: return NotImplemented def get_eq_attrs(self): return ['_method', '_uri', '_dc'] def __ne__(self, other): return not self.__eq__(other) def is_variant_of(self, other): ''' Two requests are loosely equal (or variants) if: - They have the same URL - They have the same HTTP method - They have the same parameter names - The values for each parameter have the same type (int / string) :return: True if self and other are variants. ''' dc = self._dc odc = other._dc if (self._method == other._method and self._url == other._url and dc.keys() == odc.keys()): for vself, vother in izip_longest( chain(*dc.values()), chain(*odc.values()), fillvalue=None ): if None in (vself, vother) or \ vself.isdigit() != vother.isdigit(): return False return True return False def set_url(self, url): if not isinstance(url, URL): raise TypeError('The "url" parameter of a %s must be of ' 'url.URL type.' % type(self).__name__) self._url = URL(url.url_string.replace(' ', '%20')) self._uri = self._url def set_uri(self, uri): if not isinstance(uri, URL): raise TypeError('The "uri" parameter of a %s must be of ' 'url.URL type.' % type(self).__name__) self._uri = uri self._url = uri.uri2url() def set_method(self, method): self._method = method def set_dc(self, dataCont): if not isinstance(dataCont, DataContainer): raise TypeError('Invalid call to fuzzable_request.set_dc(), the ' 'argument must be a DataContainer instance.') self._dc = dataCont def set_headers(self, headers): self._headers = Headers(headers) def set_referer(self, referer): self._headers['Referer'] = str(referer) def set_cookie(self, c): ''' :param cookie: A Cookie object as defined in core.data.dc.cookie, or a string. ''' if isinstance(c, Cookie): self._cookie = c elif isinstance(c, basestring): self._cookie = Cookie(c) elif c is None: self._cookie = Cookie() else: fmt = '[FuzzableRequest error] set_cookie received: "%s": "%s".' error_str = fmt % (type(c), repr(c)) om.out.error(error_str) raise w3afException(error_str) def get_url(self): return self._url def get_uri(self): return self._uri def set_data(self, d): ''' The data is the string representation of the DataContainer, in most cases it wont be set. ''' self._data = d def get_data(self): ''' The data is the string representation of the DataContainer, in most cases it will be used as the POSTDATA for requests. Sometimes it is also used as the query string data. ''' return self._data def get_method(self): return self._method def get_dc(self): return self._dc def get_headers(self): return self._headers def get_referer(self): return self._headers.get('Referer', None) def get_cookie(self): return self._cookie def get_file_vars(self): return [] def copy(self): return copy.deepcopy(self)
def set_headers(self, headers): self._headers = Headers(headers)
def test_to_str_from_string(self): headers_from_obj = Headers([('a', 'b')]) headers_from_str = Headers.from_string(str(headers_from_obj)) self.assertEqual(headers_from_str, headers_from_obj)
def get_headers(self): headers = Headers(self.headers.items()) headers.update(self.unredirected_hdrs.items()) return headers
def create_fuzzable_requests(resp, request=None, add_self=True): ''' Generates the fuzzable requests based on an HTTP response instance. :param resp: An HTTPResponse instance. :param request: The HTTP request that generated the resp :param add_self: If I should add the current HTTP request (:param request) to the result on not. :return: A list of fuzzable requests. ''' res = [] # Headers for all fuzzable requests created here: # And add the fuzzable headers to the dict req_headers = dict((h, '') for h in cf.cf.get('fuzzable_headers')) req_headers.update(request and request.get_headers() or {}) req_headers = Headers(req_headers.items()) # Get the cookie! cookieObj = _create_cookie(resp) # Create the fuzzable request that represents the request object # passed as parameter if add_self: qsr = HTTPQSRequest( resp.get_uri(), headers=req_headers, cookie=cookieObj ) res.append(qsr) # If response was a 30X (i.e. a redirect) then include the # corresponding fuzzable request. resp_headers = resp.get_headers() for url_header_name in URL_HEADERS: url_header_value, _ = resp_headers.iget(url_header_name, '') if url_header_value: url = smart_unicode(url_header_value, encoding=resp.charset) try: absolute_location = resp.get_url().url_join(url) except ValueError: msg = 'The application sent a "%s" redirect that w3af' \ ' failed to correctly parse as an URL, the header' \ ' value was: "%s"' om.out.debug(msg % (url_header_name, url)) else: qsr = HTTPQSRequest( absolute_location, headers=req_headers, cookie=cookieObj ) res.append(qsr) # Try to find forms in the document try: dp = parser_cache.dpc.get_document_parser_for(resp) except w3afException: # Failed to find a suitable parser for the document form_list = [] else: form_list = dp.get_forms() same_domain = lambda f: f.get_action( ).get_domain() == resp.get_url().get_domain() form_list = [f for f in form_list if same_domain(f)] if not form_list: # Check if its a wsdl file #TODO: Rewrite web service support ''' wsdlp = WSDLParser() try: wsdlp.set_wsdl(resp.get_body()) except w3afException: pass else: for rem_meth in wsdlp.get_methods(): wspdr = WebServiceRequest( rem_meth.get_location(), rem_meth.get_action(), rem_meth.get_parameters(), rem_meth.get_namespace(), rem_meth.get_methodName(), req_headers ) res.append(wspdr) ''' else: # Create one HTTPPostDataRequest for each form variant mode = cf.cf.get('form_fuzzing_mode') for form in form_list: for variant in form.get_variants(mode): if form.get_method().upper() == 'POST': r = HTTPPostDataRequest( variant.get_action(), variant.get_method(), req_headers, cookieObj, variant) else: # The default is a GET request r = HTTPQSRequest( variant.get_action(), headers=req_headers, cookie=cookieObj ) r.set_dc(variant) res.append(r) return res
def HTTPRequestParser(head, postdata): ''' This function parses HTTP Requests from a string to a FuzzableRequest. :param head: The head of the request. :param postdata: The post data of the request :return: A FuzzableRequest object with all the corresponding information that was sent in head and postdata :author: Andres Riancho ([email protected]) ''' # Parse the request head, the strip() helps us deal with the \r (if any) splitted_head = head.split('\n') splitted_head = [h.strip() for h in splitted_head if h] if not splitted_head: msg = 'The HTTP request is invalid.' raise w3afException(msg) # Get method, uri, version method_uri_version = splitted_head[0] first_line = method_uri_version.split(' ') if len(first_line) == 3: # Ok, we have something like "GET /foo HTTP/1.0". This is the best case for us! method, uri, version = first_line elif len(first_line) < 3: msg = 'The HTTP request has an invalid <method> <uri> <version> token: "' msg += method_uri_version + '".' raise w3afException(msg) elif len(first_line) > 3: # GET /hello world.html HTTP/1.0 # Mostly because we are permissive... we are going to try to parse # the request... method = first_line[0] version = first_line[-1] uri = ' '.join(first_line[1:-1]) check_version_syntax(version) # If we got here, we have a nice method, uri, version first line # Now we parse the headers (easy!) and finally we send the request headers_str = splitted_head[1:] headers_inst = Headers() for header in headers_str: one_splitted_header = header.split(':', 1) if len(one_splitted_header) == 1: msg = 'The HTTP request has an invalid header: "%s".' raise w3afException(msg % header) header_name = one_splitted_header[0].strip() header_value = one_splitted_header[1].strip() if header_name in headers_inst: headers_inst[header_name] += ', ' + header_value else: headers_inst[header_name] = header_value host, _ = headers_inst.iget('host', None) try: uri = URL(check_uri_syntax(uri, host)) except ValueError, ve: raise w3afException(str(ve))