def provides_cors_features(freq, url_opener): """ Method to detect if url provides CORS features. :param freq: A fuzzableRequest object. :param url_opener: "w3af.core.data.url.ExtendedUrllib" class instance to use for HTTP request/response processing. :return: True if the URL provides CORS features, False otherwise. """ response = url_opener.GET(freq.get_url()) ac_value = retrieve_cors_header(response, ACCESS_CONTROL_ALLOW_ORIGIN) if ac_value is not None: return True headers = Headers({'Origin': 'www.w3af.org'}.items()) response = url_opener.GET(freq.get_url(), headers=headers) ac_value = retrieve_cors_header(response, ACCESS_CONTROL_ALLOW_ORIGIN) if ac_value is not None: return True return False
def __call__(self, uri, data=None, headers=Headers(), cache=False, grep=True, cookies=True, ignore_errors=False): """ :return: An HTTPResponse object that's the result of sending the request with a method different from "GET" or "POST". """ if not isinstance(uri, URL): raise TypeError('The uri parameter of AnyMethod.' '__call__() must be of url.URL type.') if not isinstance(headers, Headers): raise TypeError('The headers parameter of AnyMethod.' '__call__() must be of Headers type.') self._xurllib._init() req = HTTPRequest(uri, data, cookies=cookies, cache=cache, method=self._method, ignore_errors=ignore_errors) req = self._xurllib._add_headers(req, headers or {}) return self._xurllib._send(req, grep=grep)
def crawl(self, fuzzable_request, debugging_id): """ Searches for new URLs using fuzzing. :param debugging_id: A unique identifier for this call to discover() :param fuzzable_request: A fuzzable_request instance that contains (among other things) the URL to test. """ url = fuzzable_request.get_url() self._headers = Headers([('Referer', url.url_string)]) if self._first_time: self._verify_head_enabled(url) self._first_time = False # First we need to delete fragments and query strings from URL. url = url.uri2url() # And we mark this one as a "do not return" URL, because the # core already found it using another technique. self._seen.add(url) self._verify_head_enabled(url) if self._head_enabled(): response = self._uri_opener.HEAD(url, cache=True, headers=self._headers) else: response = self._uri_opener.GET(url, cache=True, headers=self._headers) if response.is_text_or_html() or self._fuzz_images: mutants_chain = chain(self._mutate_by_appending(url), self._mutate_path(url), self._mutate_file_type(url), self._mutate_domain_name(url)) url_repeater = repeat(url) args = izip(url_repeater, mutants_chain) self.worker_pool.map_multi_args(self._do_request, args)
def test_parser_simple_form(self): form = """<go method="post" href="post.php"> <postfield name="clave" value="$(clave)"/> <postfield name="cuenta" value="$(cuenta)"/> <postfield name="tipdat" value="D"/> </go>""" response = HTTPResponse(200, form, Headers(), self.url, self.url) w = WMLParser(response) w.parse() forms = w.get_forms() self.assertEqual(len(forms), 1) form = forms[0] self.assertEqual(form.get_action().url_string, u'http://www.w3af.com/post.php') self.assertIn('clave', form) self.assertIn('cuenta', form) self.assertIn('tipdat', form)
def dc_from_hdrs_post(headers, post_data): """ :param headers: HTTP request headers, most importantly containing the content-type info. :param post_data: The HTTP request post-data as a string :return: The best-match from POST_DATA_CONTAINERS to hold the information in self._post_data @ FuzzableRequest """ if headers is None: headers = Headers() for pdc_klass in POST_DATA_CONTAINERS: try: return pdc_klass.from_postdata(headers, post_data) except (ValueError, TypeError) as e: pass else: content_type, _ = headers.iget('content-type', 'None') msg = 'Unknown post-data. Content-type: "%s" and/or post-data "%s"' om.out.debug(msg % (content_type, post_data[:50])) return PlainContainer.from_postdata(headers, post_data)
def test_strange_headers_positive(self): body = 'Hello world' url = URL('http://www.w3af.com/') headers = Headers([('content-type', 'text/html'), ('hello-world', 'yes!')]) request = FuzzableRequest(url, method='GET') resp_positive = HTTPResponse(200, body, headers, url, url, _id=1) self.plugin.grep(request, resp_positive) info_sets = kb.kb.get('strange_headers', 'strange_headers') self.assertEquals(len(info_sets), 1) info = info_sets[0] expected_desc = u'The remote web server sent 1 HTTP responses with' \ u' the uncommon response header "hello-world", one' \ u' of the received header values is "yes!". The' \ u' first ten URLs which sent the uncommon header' \ u' are:\n - http://www.w3af.com/\n' self.assertEqual(info.get_name(), 'Strange header') self.assertEqual(info.get_url(), url) self.assertEqual(info.get_desc(), expected_desc)
def from_dict(cls, unserialized_dict): """ * msgpack is MUCH faster than cPickle, * msgpack can't serialize python objects, * I have to create a dict representation of HTTPResponse to serialize it * and a from_dict to have the object back :param unserialized_dict: A dict just as returned by to_dict() """ udict = unserialized_dict code, msg, hdrs = udict['code'], udict['msg'], udict['headers'] body, _time, _id = udict['body'], udict['time'], udict['id'] headers_inst = Headers(hdrs.items()) url = URL(udict['uri']) redirect_url = url if udict.get('redirect_uri'): redirect_url = URL(udict['redirect_uri']) return cls(code, body, headers_inst, redirect_url, url, msg=msg, _id=_id, time=_time, charset=udict.get('charset'))
def test_clamav_eicar(self, *args): body = pyclamd.ClamdAgnostic().EICAR() url = URL('http://www.w3af.com/') headers = Headers([('content-type', 'text/html')]) response = HTTPResponse(200, body, headers, url, url, _id=1) request = FuzzableRequest(url, method='GET') self.plugin.grep(request, response) # Let the worker pool wait for the clamd response, this is done by # the core when run in a real scan self.plugin.worker_pool.close() self.plugin.worker_pool.join() findings = kb.kb.get('clamav', 'malware') self.assertEqual(len(findings), 1) finding = findings[0] self.assertEqual(finding.get_name(), 'Malware identified') self.assertIn('ClamAV identified malware', finding.get_desc()) self.assertEqual(finding.get_url().url_string, url.url_string)
def test_page_not_found_with_large_response(self): httpretty.register_uri(httpretty.GET, re.compile('w3af.com/(.*)'), body=self.request_callback, status=200) # FIXME: There is an interference issue between unittests, if the same # URL is used for multiple unittests, the test will fail. This # is most likely a cache I'm not clearing in tearDown, but I # was unable to find the root cause. success_url = URL('http://w3af.com/fiaasxd322/') unique_parts = [ 'Welcome to our site', 'Content is being loaded using async JS', 'Please wait...' ] body = self.get_body(unique_parts) headers = Headers([('Content-Type', 'text/html')]) success_200 = HTTPResponse(200, body, headers, success_url, success_url) self.assertFalse(self.fingerprint_404.is_404(success_200))
def test_retrieve_csp_policies_with_policies_case05(self): """ Test case in which 4 policies are specified using 4 differents CSP headers and in which 1 is specified using report only CSP header. Test in which we want only report-only policies. """ hrds = {} hrds[CSP_HEADER_W3C] = CSP_DIRECTIVE_OBJECT + " 'none'" hrds[CSP_HEADER_FIREFOX] = CSP_DIRECTIVE_IMAGE + " *" hrds[CSP_HEADER_CHROME] = CSP_DIRECTIVE_CONNECTION + \ " trust.sample.com" hrds[CSP_HEADER_W3C_REPORT_ONLY] = CSP_DIRECTIVE_SCRIPT + \ " report.sample.com" csp_headers = Headers(hrds.items()) http_response = HTTPResponse(200, '', csp_headers, self.url, self.url) policies = retrieve_csp_policies(http_response, True) self.assertEqual(len(policies), 1) self.assertEqual(len(policies[CSP_DIRECTIVE_SCRIPT]), 1) self.assertEqual(policies[CSP_DIRECTIVE_SCRIPT][0], "report.sample.com")
def _send_to_server(self, grep=False): """ Send a request that arrived from the browser to the remote web server. Important variables used here: - self.headers : Stores the headers for the request - self.rfile : A file like object that stores the post_data - self.path : Stores the URL that was requested by the browser """ self.headers['Connection'] = 'close' # See HTTPWrapperClass if hasattr(self.server, 'chainedHandler'): base_path = "https://" + self.server.chainedHandler.path path = base_path + self.path else: path = self.path uri_instance = URL(path) # # Do the request to the remote server # post_data = None if 'content-length' in self.headers.dict: # most likely a POST request post_data = self._get_post_data() http_method = getattr(self._uri_opener, self.command) headers = Headers(self.headers.items()) try: res = http_method(uri_instance, data=post_data, headers=headers, grep=grep) except BaseFrameworkException, w: om.out.error('The proxy request failed, error: ' + str(w)) raise w
def test_retrieve_csp_policies_with_policies_case03(self): """ Test case in which 3 policies are specified using 3 differents CSP headers. """ hrds = {} hrds[CSP_HEADER_W3C] = CSP_DIRECTIVE_OBJECT + " 'none'" hrds[CSP_HEADER_FIREFOX] = CSP_DIRECTIVE_IMAGE + " *" hrds[CSP_HEADER_CHROME] = CSP_DIRECTIVE_CONNECTION + \ " trust.sample.com" csp_headers = Headers(hrds.items()) http_response = HTTPResponse(200, '', csp_headers, self.url, self.url) policies = retrieve_csp_policies(http_response) self.assertEqual(len(policies), 3) self.assertEqual(len(policies[CSP_DIRECTIVE_OBJECT]), 1) self.assertEqual(policies[CSP_DIRECTIVE_OBJECT][0], "none") self.assertEqual(len(policies[CSP_DIRECTIVE_IMAGE]), 1) self.assertEqual(policies[CSP_DIRECTIVE_IMAGE][0], "*") self.assertEqual(len(policies[CSP_DIRECTIVE_CONNECTION]), 1) self.assertEqual(policies[CSP_DIRECTIVE_CONNECTION][0], "trust.sample.com")
def test_3234(self): """ is_404 can not handle URLs with : in path #3234 :see: https://github.com/andresriancho/w3af/issues/3234 """ # setup httpretty.register_uri(httpretty.GET, re.compile("w3af.com/(.*)"), body="404 found", status=404) url = URL('http://w3af.com/d:a') resp = HTTPResponse(200, 'body', Headers(), url, url) # setup, just to make some config settings values default core = w3afCore() core.scan_start_hook() # test db = fingerprint_404_singleton() self.assertFalse(db._is_404_with_extra_request(resp, 'body'))
def test_analyze_cookies_secure_over_http(self): body = '' url = URL('http://www.w3af.com/') headers = Headers({ 'content-type': 'text/html', 'Set-Cookie': 'abc=def; secure;' }.items()) response = HTTPResponse(200, body, headers, url, url, _id=1) request = FuzzableRequest(url, method='GET') self.plugin.grep(request, response) false_secure = kb.kb.get('analyze_cookies', 'false_secure') self.assertEqual(len(kb.kb.get('analyze_cookies', 'cookies')), 1) self.assertEqual(len(false_secure), 1) self.assertEqual(len(kb.kb.get('analyze_cookies', 'invalid-cookies')), 0) msg = 'A cookie marked with the secure flag' self.assertTrue( any([True for i in false_secure if msg in i.get_desc()]))
def on_request_drop(self, http_request): """ When the UI calls "drop request" we need to modify our queues :param http_request: The request to drop :return: None, simply queue a "Request drop HTTP response" """ content = render('drop.html', {}) headers = Headers(( ('Connection', 'close'), ('Content-type', 'text/html'), )) http_response = HTTPResponse(403, content.encode('utf-8'), headers, http_request.get_uri(), http_request.get_uri(), msg='Request drop') self.parent_process.requests_already_modified.put(http_response)
def test_analyze_cookies_fingerprint(self): body = '' url = URL('http://www.w3af.com/') headers = Headers({ 'content-type': 'text/html', 'Set-Cookie': 'PHPSESSID=d98238ab39de038' }.items()) response = HTTPResponse(200, body, headers, url, url, _id=1) request = FuzzableRequest(url, method='GET') self.plugin.grep(request, response) fingerprint = kb.kb.get('analyze_cookies', 'fingerprint') self.assertEqual(len(kb.kb.get('analyze_cookies', 'cookies')), 1) self.assertEqual(len(fingerprint), 1) self.assertEqual(len(kb.kb.get('analyze_cookies', 'invalid-cookies')), 0) msg = 'The remote platform is: "PHP"' self.assertTrue(any([True for i in fingerprint if msg in i.get_desc()]))
def test_no_content_sniffing(self): body = '' url = URL('https://www.w3af.com/') headers = Headers([('content-type', 'text/html')]) request = FuzzableRequest(url, method='GET') resp = HTTPResponse(200, body, headers, url, url, _id=1) self.plugin.grep(request, resp) findings = kb.kb.get('content_sniffing', 'content_sniffing') self.assertEquals(len(findings), 1, findings) info_set = findings[0] expected_desc = u'The remote web application sent 1 HTTP responses' \ u' which do not contain the X-Content-Type-Options' \ u' header. The first ten URLs which did not send the' \ u' header are:\n - https://www.w3af.com/\n' self.assertEqual(info_set.get_id(), [1]) self.assertEqual(info_set.get_desc(), expected_desc) self.assertEqual(info_set.get_name(), 'Missing X-Content-Type-Options header')
def test_retrieve_csp_policies_with_policies_case02(self): """ Test case in which several policies are specified using only 1 CSP header but with 7 differents directives. """ header_value = "default-src 'self'; img-src *;"\ " object-src media1.example.com media2.example.com"\ " *.cdn.example.com; script-src trustedscripts.example.com;"\ " form-action /ctxroot/action1 /ctxroot/action2;"\ " plugin-types application/pdf application/x-java-applet;"\ " reflected-xss block" hrds = {CSP_HEADER_W3C: header_value}.items() csp_headers = Headers(hrds) http_response = HTTPResponse(200, '', csp_headers, self.url, self.url) policies = retrieve_csp_policies(http_response) self.assertEqual(len(policies), 7) self.assertEqual(len(policies[CSP_DIRECTIVE_DEFAULT]), 1) self.assertEqual(policies[CSP_DIRECTIVE_DEFAULT][0], "self") self.assertEqual(len(policies[CSP_DIRECTIVE_IMAGE]), 1) self.assertEqual(policies[CSP_DIRECTIVE_IMAGE][0], "*") self.assertEqual(len(policies[CSP_DIRECTIVE_SCRIPT]), 1) self.assertEqual( policies[CSP_DIRECTIVE_SCRIPT][0], "trustedscripts.example.com") self.assertEqual(len(policies[CSP_DIRECTIVE_OBJECT]), 3) self.assertTrue("media1.example.com" in policies[CSP_DIRECTIVE_OBJECT]) self.assertTrue("media2.example.com" in policies[CSP_DIRECTIVE_OBJECT]) self.assertTrue("*.cdn.example.com" in policies[CSP_DIRECTIVE_OBJECT]) self.assertEqual(len(policies[CSP_DIRECTIVE_FORM]), 2) self.assertTrue("/ctxroot/action1" in policies[CSP_DIRECTIVE_FORM]) self.assertTrue("/ctxroot/action2" in policies[CSP_DIRECTIVE_FORM]) self.assertEqual(len(policies[CSP_DIRECTIVE_PLUGIN_TYPES]), 2) self.assertTrue( "application/pdf" in policies[CSP_DIRECTIVE_PLUGIN_TYPES]) self.assertTrue( "application/x-java-applet" in policies[CSP_DIRECTIVE_PLUGIN_TYPES]) self.assertEqual(len(policies[CSP_DIRECTIVE_XSS]), 1) self.assertTrue("block" in policies[CSP_DIRECTIVE_XSS])
def test_large_many_endpoints(self): body = file(self.LARGE_MANY_ENDPOINTS).read() headers = Headers({'Content-Type': 'application/json'}.items()) response = HTTPResponse(200, body, headers, URL('http://moth/swagger.json'), URL('http://moth/swagger.json'), _id=1) self.assertTrue(OpenAPI.can_parse(response)) # # In some cases with validation enabled (not the default) we find a set # of endpoints: # parser = OpenAPI(response, validate_swagger_spec=True) parser.parse() api_calls = parser.get_api_calls() expected_api_calls = 161 self.assertEqual(expected_api_calls, len(api_calls)) # # And without spec validation there is a different set of endpoints: # parser = OpenAPI(response) parser.parse() api_calls = parser.get_api_calls() expected_api_calls = 165 self.assertEqual(expected_api_calls, len(api_calls)) first_api_call = api_calls[0] uri = first_api_call.get_uri().url_string expected_uri = 'https://target.com/api/Partners/3419/Agreement?performedBy=56' self.assertEqual(expected_uri, uri)
def __init__(self, url, data=None, headers=Headers(), origin_req_host=None, unverifiable=False, cookies=True, cache=False, method=None, ignore_errors=False): """ This is a simple wrapper around a urllib2 request object which helps with some common tasks like serialization, cache, etc. :param method: None means "choose the method in the default way": if self.has_data(): return "POST" else: return "GET" """ # # Save some information for later access in an easier way # self.url_object = url self.cookies = cookies self.get_from_cache = cache self.ignore_errors = ignore_errors self.method = method if self.method is None: self.method = 'POST' if data else 'GET' headers = dict(headers) # Call the base class constructor urllib2.Request.__init__(self, url.url_encode(), data, headers, origin_req_host, unverifiable) RequestMixIn.__init__(self)
def crawl(self, fuzzable_request): """ Searches for user directories. :param fuzzable_request: A fuzzable_request instance that contains (among other things) the URL to test. """ base_url = fuzzable_request.get_url().base_url() self._headers = Headers([('Referer', base_url.url_string)]) # Create a response body to compare with the others non_existent_user = '******' test_URL = base_url.url_join(non_existent_user) try: response = self._uri_opener.GET(test_URL, cache=True, headers=self._headers) except: msg = 'user_dir failed to create a non existent signature.' raise BaseFrameworkException(msg) response_body = response.get_body() self._non_existent = response_body.replace(non_existent_user, '') # Check the users to see if they exist url_user_list = self._create_dirs(base_url) # Send the requests using threads: self.worker_pool.map_multi_args(self._do_request, url_user_list) # Only do this if I already know that users can be identified. if kb.kb.get('user_dir', 'users'): if self._identify_OS: self._advanced_identification(base_url, 'os') if self._identify_applications: self._advanced_identification(base_url, 'applications') # Report findings of remote OS, applications, users, etc. self._report_findings()
def __init__(self, url, data=None, headers=Headers(), origin_req_host=None, unverifiable=False, cookies=True, cache=False, method=None, ignore_errors=False): """ This is a simple wrapper around a urllib2 request object which helps with some common tasks like serialization, cache, etc. :param method: None means choose the default (POST if data is not None) :param data: The post_data as a string """ # # Save some information for later access in an easier way # self.url_object = url self.cookies = cookies self.get_from_cache = cache self.ignore_errors = ignore_errors self.method = method if self.method is None: self.method = 'POST' if data else 'GET' if isinstance(headers, Headers): headers.tokens_to_value() headers = dict(headers) # Call the base class constructor urllib2.Request.__init__(self, url.url_encode(), data, headers, origin_req_host, unverifiable) RequestMixIn.__init__(self)
def test_get_clear_text_issue_4402(self): """ :see: https://github.com/andresriancho/w3af/issues/4402 """ test_file_path = 'core/data/url/tests/data/encoding_4402.php' test_file = os.path.join(ROOT_PATH, test_file_path) body = file(test_file, 'rb').read() sample_encodings = [ encoding for _, (_, encoding) in TEST_RESPONSES.iteritems() ] sample_encodings.extend(['', 'utf-8']) for encoding in sample_encodings: encoding_header = 'text/html; charset=%s' % encoding headers = Headers([('Content-Type', encoding_header)]) r = _build_http_response(URL_INST, body, headers) p = _SGMLParser(r) p._parse(r) p.get_clear_text_body()
def test_missing_license_name(self): body = file(self.MISSING_LICENSE).read() headers = Headers({'Content-Type': 'application/json'}.items()) response = HTTPResponse(200, body, headers, URL('http://moth/swagger.json'), URL('http://moth/swagger.json'), _id=1) parser = OpenAPI(response) parser.parse() api_calls = parser.get_api_calls() expected_api_calls = 5 self.assertEqual(expected_api_calls, len(api_calls)) first_api_call = api_calls[0] uri = first_api_call.get_uri().url_string expected_uri = 'http://1.2.3.4/api/prod/2.0/employees/3419' self.assertEqual(expected_uri, uri)
def test_model_with_int_param_json(self): specification_as_string = IntParamJson().get_specification() http_response = self.generate_response(specification_as_string) handler = SpecificationHandler(http_response) data = [d for d in handler.get_api_information()] # The specification says that this query string parameter is # required and there is only one parameter, so there is no second # operation with the optional parameters filled in. self.assertEqual(len(data), 1) data_i = data[0] factory = RequestFactory(*data_i) fuzzable_request = factory.get_fuzzable_request() e_url = 'http://petstore.swagger.io/api/pets' e_headers = Headers([('Content-Type', 'application/json')]) self.assertEqual(fuzzable_request.get_method(), 'POST') self.assertEqual(fuzzable_request.get_uri().url_string, e_url) self.assertEqual(fuzzable_request.get_headers(), e_headers) self.assertEqual(fuzzable_request.get_data(), '{"pet": {"count": 42}}')
def test_pickleable_dom(self): msg = 'lxml DOM objects are NOT pickleable. This is an impediment for' \ ' having a multiprocess process that will perform all HTTP requests' \ ' and return HTTP responses over a multiprocessing Queue AND only' \ ' process the DOM once. Of course I can set the dom to None before' \ ' pickling.' raise SkipTest(msg) html = 'header <b>ABC</b>-<b>DEF</b>-<b>XYZ</b> footer' headers = Headers([('Content-Type', 'text/html')]) resp = self.create_resp(headers, html) # This just calculates the DOM and stores it as an attribute, NEEDS # to be done before pickling (dumps) to have a real test. original_dom = resp.get_dom() pickled_resp = cPickle.dumps(resp) unpickled_resp = cPickle.loads(pickled_resp) self.assertEqual(unpickled_resp, resp) unpickled_dom = unpickled_resp.get_dom() self.assertEqual(unpickled_dom, original_dom)
def test_parse_html_performance(self): headers = Headers() headers['content-type'] = 'text/html' body = file(self.HTML_FILE).read() url = URL('http://www.w3af.org/') response = HTTPResponse(200, body, headers, url, url, charset='utf-8') #self.measure_memory(1) parsers = [] for _ in xrange(40): p = HTMLParser(response) p.get_dom() #parsers.append(p) # Clear any reference to the parser #del p #parsers = [] #self.measure_memory(2) time.sleep(360)
def discover(self, fuzzable_request): url = fuzzable_request.get_url() headers = Headers([('Referer', '\x00')]) response = self._uri_opener.GET(url, cache=False, grep=False, headers=headers) if response.get_code() != 400: return if 'Illegal character 0x0 in state' not in response.get_msg(): return desc = ('The application appears to be running a version of Jetty' ' vulnerable to CVE-2015-2080, which allows attackers to' ' read arbitrary server memory buffers') v = Vuln('JetLeak', desc, HIGH, response.id, self.get_name()) v.set_url(response.get_url()) self.kb_append_uniq(self, 'jetleak', v)
def test_https_without_sts(self): body = '' url = URL('https://www.w3af.com/') headers = Headers([('content-type', 'text/html')]) request = FuzzableRequest(url, method='GET') resp = HTTPResponse(200, body, headers, url, url, _id=1) self.plugin.grep(request, resp) findings = kb.kb.get('strict_transport_security', 'strict_transport_security') self.assertEquals(len(findings), 1, findings) info_set = findings[0] expected_desc = u'The remote web server sent 1 HTTPS responses which' \ u' do not contain the Strict-Transport-Security' \ u' header. The first ten URLs which did not send the' \ u' header are:\n - https://www.w3af.com/\n' self.assertEqual(info_set.get_id(), [1]) self.assertEqual(info_set.get_desc(), expected_desc) self.assertEqual(info_set.get_name(), 'Missing Strict Transport Security header')
def test_mark(self): mark_id = 3 url = URL('http://w3af.org/a/b/c.php') for i in xrange(0, 500): request = HTTPRequest(url, data='a=1') hdr = Headers([('Content-Type', 'text/html')]) res = HTTPResponse(200, '<html>', hdr, url, url) h1 = HistoryItem() h1.request = request res.set_id(i) h1.response = res if i == mark_id: h1.toggle_mark() h1.save() h2 = HistoryItem() h2.load(mark_id) self.assertTrue(h2.mark) h3 = HistoryItem() h3.load(mark_id - 1) self.assertFalse(h3.mark)