def batch_injection_test(self, freq, orig_response): """ Uses the batch injection technique to find memcache injections """ # shortcuts send_clean = self._uri_opener.send_clean orig_body = orig_response.get_body() for mutant in create_mutants(freq, ['']): # trying to break normal execution flow with ERROR_1 payload mutant.set_token_value(self.ERROR_1) error_1_response, body_error_1_response = send_clean(mutant) if fuzzy_equal(orig_body, body_error_1_response, self._eq_limit): # # if we manage to break execution flow, there is a potential # injection otherwise - no injection! # continue # trying the correct injection request, to confirm that we've found # it! mutant.set_token_value(self.OK) ok_response, body_ok_response = send_clean(mutant) if fuzzy_equal(body_error_1_response, body_ok_response, self._eq_limit): # # The "OK" and "ERROR_1" responses are equal, this means that # we're not in a memcached injection # continue # ERROR_2 request to just make sure that we're in a memcached case mutant.set_token_value(self.ERROR_2) error_2_response, body_error_2_response = send_clean(mutant) if fuzzy_equal(orig_body, body_error_2_response, self._eq_limit): # # now requests should be different again, otherwise injection # is not confirmed # continue response_ids = [error_1_response.id, ok_response.id, error_2_response.id] desc = ('Memcache injection was found at: "%s", using' ' HTTP method %s. The injectable parameter is: "%s"') desc %= (mutant.get_url(), mutant.get_method(), mutant.get_token_name()) v = Vuln.from_mutant('Memcache injection vulnerability', desc, severity.HIGH, response_ids, 'memcachei', mutant) self.kb_append_uniq(self, 'memcachei', v)
def matches(self, query): """ This method is used to check if the `query` HTTP response body matches the failed login page instance. :param query: An HTTP response body :return: True if the `query` response body is equal to the failed login bodies which were received in __init__(). """ if self.body_a == query: return True if self.body_b == query: return True if not fuzzy_equal(self.body_a, query, 0.60): # They are really different, no need to calculate diff() return False if self.diff_a_b is None: self.diff_a_b, _ = chunked_diff(self.body_a, self.body_b) _, diff_query_a = chunked_diff(self.body_a, query) # Had to add this in order to prevent issues with CSRF tokens, which # might be part of the HTTP response body, are random (not removed by # clean_body) and will "break" the diff if len(diff_query_a) < 64: return True if fuzzy_equal(self.diff_a_b, diff_query_a, 0.9): return True return False
def batch_injection_test(self, freq, orig_response): """ Uses the batch injection technique to find memcache injections """ # shortcuts send_clean = self._uri_opener.send_clean orig_body = orig_response.get_body() for mutant in create_mutants(freq, ['']): # trying to break normal execution flow with ERROR_1 payload mutant.set_token_value(self.ERROR_1) error_1_response, body_error_1_response = send_clean(mutant) if fuzzy_equal(orig_body, body_error_1_response, self._eq_limit): # # if we manage to break execution flow, there is a potential # injection otherwise - no injection! # continue # trying the correct injection request, to confirm that we've found # it! mutant.set_token_value(self.OK) ok_response, body_ok_response = send_clean(mutant) if fuzzy_equal(body_error_1_response, body_ok_response, self._eq_limit): # # The "OK" and "ERROR_1" responses are equal, this means that # we're not in a memcached injection # continue # ERROR_2 request to just make sure that we're in a memcached case mutant.set_token_value(self.ERROR_2) error_2_response, body_error_2_response = send_clean(mutant) if fuzzy_equal(orig_body, body_error_2_response, self._eq_limit): # # now requests should be different again, otherwise injection # is not confirmed # continue response_ids = [ error_1_response.id, ok_response.id, error_2_response.id ] desc = ('Memcache injection was found at: "%s", using' ' HTTP method %s. The injectable parameter is: "%s"') desc %= (mutant.get_url(), mutant.get_method(), mutant.get_token_name()) v = Vuln.from_mutant('Memcache injection vulnerability', desc, severity.HIGH, response_ids, 'memcachei', mutant) self.kb_append_uniq(self, 'memcachei', v)
def fuzzy_equal_for_diff(diff_x, diff_y, is_equal_ratio): """ Does a quick estimation to determine if the two strings (diff_x and diff_y) are fuzzy equal. Not using fuzzy_equal() to compare results of applying diff() because of CSRF tokens and other randomly generated tokens which were breaking the comparison. This function removes those randomly generated strings and then does the compare. :param diff_x: Result of running diff() on responses A and B :param diff_y: Result of running diff() on responses B and C :param is_equal_ratio: The ratio to use when comparing the responses (0 to 1) :return: True if the two results of applying the diff() function are fuzzy equal (applying split_by_sep technique) """ split_x = split_by_sep(diff_x) split_y = split_by_sep(diff_y) split_x = remove_hashes(split_x) split_y = remove_hashes(split_y) x = '\n'.join(split_x) y = '\n'.join(split_y) return fuzzy_equal(x, y, threshold=is_equal_ratio)
def _is_404_with_extra_request(self, http_response, clean_resp_body): """ Performs a very simple check to verify if this response is a 404 or not. It takes the original URL and modifies it by pre-pending a "not-" to the filename, then performs a request to that URL and compares the original response with the modified one. If they are equal then the original request is a 404. :param http_response: The original HTTP response :param clean_resp_body: The original HTML body you could find in http_response after passing it by a cleaner :return: True if the original response was a 404 ! """ response_url = http_response.get_url() filename = response_url.get_file_name() if not filename: relative_url = '../%s/' % rand_alnum(8) url_404 = response_url.url_join(relative_url) else: relative_url = self._generate_404_filename(filename) url_404 = response_url.copy() url_404.set_file_name(relative_url) response_404 = self._send_404(url_404) clean_response_404_body = get_clean_body(response_404) if response_404.get_code() == 404 and \ url_404.get_domain_path() not in self._directory_uses_404_codes: self._directory_uses_404_codes.add(url_404.get_domain_path()) return fuzzy_equal(clean_response_404_body, clean_resp_body, IS_EQUAL_RATIO)
def _is_404_with_extra_request(self, http_response, clean_resp_body): """ Performs a very simple check to verify if this response is a 404 or not. It takes the original URL and modifies it by pre-pending a "not-" to the filename, then performs a request to that URL and compares the original response with the modified one. If they are equal then the original request is a 404. :param http_response: The original HTTP response :param clean_resp_body: The original HTML body you could find in http_response after passing it by a cleaner :return: True if the original response was a 404 ! """ response_url = http_response.get_url() filename = response_url.get_file_name() if not filename: relative_url = '../%s/' % rand_alnum(8) url_404 = response_url.url_join(relative_url) else: relative_url = self._generate_404_filename(filename) url_404 = response_url.copy() url_404.set_file_name(relative_url) response_404 = self._send_404(url_404) clean_response_404_body = get_clean_body(response_404) if response_404.get_code() == 404 and \ url_404.get_domain_path() not in self._directory_uses_404_codes: self._directory_uses_404_codes.add(url_404.get_domain_path()) return fuzzy_equal(clean_response_404_body, clean_resp_body, IS_EQUAL_RATIO)
def fuzzy_equal_for_diff(diff_x, diff_y, is_equal_ratio): """ Does a quick estimation to determine if the two strings (diff_x and diff_y) are fuzzy equal. Not using fuzzy_equal() to compare results of applying diff() because of CSRF tokens and other randomly generated tokens which were breaking the comparison. This function removes those randomly generated strings and then does the compare. :param diff_x: Result of running diff() on responses A and B :param diff_y: Result of running diff() on responses B and C :param is_equal_ratio: The ratio to use when comparing the responses (0 to 1) :return: True if the two results of applying the diff() function are fuzzy equal (applying split_by_sep technique) """ if diff_x == diff_y: return True split_x = split_by_sep(diff_x) split_y = split_by_sep(diff_y) split_x = remove_hashes(split_x) split_y = remove_hashes(split_y) x = '\n'.join(split_x) y = '\n'.join(split_y) return fuzzy_equal(x, y, threshold=is_equal_ratio)
def _filter_errors(self, result, filename): """ Filter out ugly php errors and print a simple "Permission denied" or "File not found" """ #print filename error = None if result.count('Permission denied'): error = PERMISSION_DENIED elif result.count('No such file or directory in'): error = NO_SUCH_FILE elif result.count('Not a directory in'): error = READ_DIRECTORY elif result.count(': failed to open stream: '): error = FAILED_STREAM elif self._file_not_found_str is not None: # The result string has the file I requested inside, so I'm going # to remove it. clean_result = result.replace(filename, '') # Now I compare both strings, if they are VERY similar, then # filename is a non existing file. if fuzzy_equal(self._file_not_found_str, clean_result, 0.9): error = NO_SUCH_FILE # # I want this function to return an empty string on errors. # Not the error itself. # if error is not None: return '' return result
def clean_404_response_db(self): """ During the scan, and because I chose to remove the very broad 404 database lock, the 404 response database might become untidy: the same HTTP response might be appended to the DB multiple times. An untidy DB triggers more comparisons between HTTP responses, which is CPU-intensive. This method cleans the DB every N calls to reduce any duplicates. :return: None. The extended DB is modified. """ self._clean_404_response_db_calls += 1 if self._clean_404_response_db_calls % CLEAN_DB_EVERY != 0: return removed_items = 0 extended_404_response_copy = copy.copy(self._extended_404_responses) for i in extended_404_response_copy: for j in extended_404_response_copy: if i is j: continue if not fuzzy_equal(i.body, j.body, IS_EQUAL_RATIO): continue # i (or something really similar) already exists in # self._extended_404_responses, no need to compare any further # just remove it and continue with the next try: self._extended_404_responses.remove(i) except ValueError: # The 404 response DB might have been changed by another thread break else: msg = ( 'Removed 404 response for "%s" (id: %s) from the 404 DB' ' because it matches 404 response "%s" (id: %s)') args = (i.url, i.id, j.url, j.id) om.out.debug(msg % args) removed_items += 1 break msg = 'Called clean 404 response DB. Removed %s duplicates from DB.' args = (removed_items, ) om.out.debug(msg % args) msg = 'The extended 404 response DB contains responses with IDs: %s' args = (', '.join( str(r.id) for r in copy.copy(self._extended_404_responses))) om.out.debug(msg % args)
def equal_with_limit(self, body1, body2, compare_diff=False): """ Determines if two pages are equal using a ratio, if compare_diff is set then we just compare the parts of the response bodies which are different. """ if compare_diff: body1, body2 = diff(body1, body2) cmp_res = fuzzy_equal(body1, body2, self._eq_limit) return cmp_res
def _response_is_different(self, vhost_response, orig_resp_body, non_existent_responses): """ Note that we use 0.35 in fuzzy_equal because we want the responses to be *really different*. :param vhost_response: The HTTP response body for the virtual host :param orig_resp_body: The original HTTP response body :param non_existent_responses: One or more HTTP responses for virtual hosts that do not exist in the remote server :return: True if vhost_response is different from orig_resp_body and non_existent_responses """ if fuzzy_equal(vhost_response.get_body(), orig_resp_body, 0.35): return False for ner in non_existent_responses: if fuzzy_equal(vhost_response.get_body(), ner.get_body(), 0.35): return False return True
def equal_with_limit(self, body1, body2, compare_diff=False): """ Determines if two pages are equal using a ratio, if compare_diff is set then we just compare the parts of the response bodies which are different. """ if compare_diff: body1, body2 = chunked_diff(body1, body2) cmp_res = fuzzy_equal(body1, body2, self._eq_limit) return cmp_res
def _is_resp_equal(self, res1, res2): """ @see: unittest for this method in test_csrf.py """ if res1.get_code() != res2.get_code(): return False if not fuzzy_equal(res1.body, res2.body, self._equal_limit): return False return True
def _matches_failed_login(self, resp_body, login_failed_result_list): """ :return: True if the resp_body matches the previously created responses that are stored in login_failed_result_list. """ for login_failed_result in login_failed_result_list: if fuzzy_equal(resp_body, login_failed_result, 0.65): return True else: # I'm happy! The response_body *IS NOT* a failed login page. return False
def _matches_failed_login(self, resp_body, login_failed_result_list): """ :return: True if the resp_body matches the previously created responses that are stored in login_failed_result_list. """ for login_failed_result in login_failed_result_list: if fuzzy_equal(resp_body, login_failed_result, 0.65): return True else: # I'm happy! The response_body *IS NOT* a failed login page. return False
def test_17092(self): nginx_404 = ('<html>\n' '<head><title>404 Not Found</title></head>\n' '<body bgcolor="white">\n' '<center><h1>404 Not Found</h1></center>\n' '<hr><center>nginx</center>\n' '</body>\n' '</html>\n') itest = 'itest' # 0.9 is from fingerprint_404.py self.assertFalse(fuzzy_equal(nginx_404, itest, 0.9))
def _is_resp_equal(self, response_1, response_2): """ :param response_1: HTTP response 1 :param response_2: HTTP response 2 :see: unittest for this method in test_csrf.py """ if response_1.get_code() != response_2.get_code(): return False if not fuzzy_equal(response_1.body, response_2.body, self._equal_limit): return False return True
def equal_with_limit(self, body1, body2, compare_diff=False): """ Determines if two pages are equal using a ratio. """ start = time.time() if compare_diff: body1, body2 = diff(body1, body2) cmp_res = fuzzy_equal(body1, body2, self._eq_limit) are = 'ARE' if cmp_res else 'ARE NOT' args = (are, self._eq_limit) self.debug('Strings %s similar enough (limit: %s)' % args) spent = time.time() - start self.debug('Took %.2f seconds to run equal_with_limit' % spent) return cmp_res
def equal_with_limit(self, body1, body2, compare_diff=False): """ Determines if two pages are equal using a ratio. """ start = time.time() if compare_diff: body1, body2 = diff(body1, body2) cmp_res = fuzzy_equal(body1, body2, self._eq_limit) are = 'ARE' if cmp_res else 'ARE NOT' args = (are, self._eq_limit) self.debug('Strings %s similar enough (limit: %s)' % args) spent = time.time() - start self.debug('Took %.2f seconds to run equal_with_limit' % spent) return cmp_res
def test_all(self): acceptance_tests = [] acceptance_tests.append(('a', 'a', 1.0)) acceptance_tests.append(('a', 'a', 0.1)) acceptance_tests.append(('a', 'a', 0.0)) acceptance_tests.append(('a', 'b', 1.0)) acceptance_tests.append(('a', 'b', 0.1)) acceptance_tests.append(('a', 'b', 0.0)) acceptance_tests.append(('a', 'ab', 1.0)) acceptance_tests.append(('a', 'ab', 0.1)) acceptance_tests.append(('a', 'b', 0.0000000000000000001)) acceptance_tests.append(('a', 'b' * 100, 1.0)) acceptance_tests.append(('a', 'ab', 0.66666666666)) acceptance_tests.append(('a', 'aab', 0.5)) acceptance_tests.append(('a', 'aaab', 0.4)) acceptance_tests.append(('a', 'aaaab', 0.33333333333333333333333333333333333333333333333333333333)) acceptance_tests.append(('a' * 25, 'a', 1.0)) acceptance_tests.append(('aaa', 'aa', 1.0)) acceptance_tests.append(('a', 'a', 1.0)) acceptance_tests.append(('a' * 25, 'a', 0.076923076923076927)) acceptance_tests.append(('aaa', 'aa', 0.8)) acceptance_tests.append(('a', 'a', 0.0)) for e, d, f in acceptance_tests: res1 = fuzzy_equal(e, d, f) res2 = relative_distance(e, d) >= f msg = ('fuzzy_equal and relative_distance returned' ' different results for the same parameters:\n' ' - Parameter #1: %s\n' ' - Parameter #2: %s\n' ' - Threshold: %s\n' ' - Result fuzzy_equal: %s\n' ' - Result relative_distance: %s\n') self.assertEqual(res1, res2, msg % (e, d, f, res1, relative_distance(e, d)))
def _find_OS(self, fuzzable_request): """ Analyze responses and determine if remote web server runs on windows or *nix. @Return: None, the knowledge is saved in the knowledgeBase """ freq_url = fuzzable_request.get_url() filename = freq_url.get_file_name() dirs = freq_url.get_directories()[:-1] # Skipping "domain level" dir. if dirs and filename: last_url = dirs[-1] last_url = last_url.url_string windows_url = URL(last_url[0:-1] + '\\' + filename) windows_response = self._uri_opener.GET(windows_url) original_response = self._uri_opener.GET(freq_url) if fuzzy_equal(original_response.get_body(), windows_response.get_body(), 0.98): desc = 'Fingerprinted this host as a Microsoft Windows system.' os_str = 'windows' else: desc = 'Fingerprinted this host as a *nix system. Detection for'\ ' this operating system is weak, "if not windows then'\ ' linux".' os_str = 'unix' response_ids = [windows_response.id, original_response.id] i = Info('Operating system', desc, response_ids, self.get_name()) i.set_url(windows_response.get_url()) kb.kb.raw_write(self, 'operating_system_str', os_str) kb.kb.append(self, 'operating_system', i) om.out.information(i.get_desc()) return True return False
def _find_OS(self, fuzzable_request): """ Analyze responses and determine if remote web server runs on windows or *nix. @Return: None, the knowledge is saved in the knowledgeBase """ freq_url = fuzzable_request.get_url() filename = freq_url.get_file_name() dirs = freq_url.get_directories()[:-1] # Skipping "domain level" dir. if dirs and filename: last_url = dirs[-1] last_url = last_url.url_string windows_url = URL(last_url[0:-1] + '\\' + filename) windows_response = self._uri_opener.GET(windows_url) original_response = self._uri_opener.GET(freq_url) if fuzzy_equal(original_response.get_body(), windows_response.get_body(), 0.98): desc = 'Fingerprinted this host as a Microsoft Windows system.' os_str = 'windows' else: desc = 'Fingerprinted this host as a *nix system. Detection for' \ ' this operating system is weak, "if not windows then' \ ' linux".' os_str = 'unix' response_ids = [windows_response.id, original_response.id] i = Info('Operating system', desc, response_ids, self.get_name()) i.set_url(windows_response.get_url()) kb.kb.raw_write(self, 'operating_system_str', os_str) kb.kb.append(self, 'operating_system', i) om.out.information(i.get_desc()) return True return False
def _test_ip_address(self, original_response, domain): """ Check if http://ip(domain)/ == http://domain/ """ try: ip_address = socket.gethostbyname(domain) except socket.error: return url = original_response.get_url() ip_url = url.copy() ip_url.set_domain(ip_address) try: modified_response = self._uri_opener.GET(ip_url, cache=True) except BaseFrameworkException as bfe: msg = ('An error occurred while fetching IP address URL in ' ' dns_wildcard plugin: "%s"') om.out.debug(msg % bfe) return if is_no_content_response(modified_response): return if fuzzy_equal(modified_response.get_body(), original_response.get_body(), 0.35): return desc = 'The contents of %s and %s differ.' args = (modified_response.get_uri(), original_response.get_uri()) desc %= args i = Info('Default virtual host', desc, modified_response.id, self.get_name()) i.set_url(modified_response.get_url()) kb.kb.append(self, 'dns_wildcard', i) om.out.information(i.get_desc())
def _filter_errors(self, result, filename): """ Filter out ugly php errors and print a simple "Permission denied" or "File not found" """ #print filename error = None if result.count('Permission denied'): error = PERMISSION_DENIED elif result.count('No such file or directory in'): error = NO_SUCH_FILE elif result.count('Not a directory in'): error = READ_DIRECTORY elif result.count(': failed to open stream: '): error = FAILED_STREAM elif self._application_file_not_found_error is not None: # The result string has the file I requested inside, so I'm going # to remove it. clean_result = result.replace(filename, '') # Now I compare both strings, if they are VERY similar, then # filename is a non existing file. if fuzzy_equal(self._application_file_not_found_error, clean_result, 0.9): error = NO_SUCH_FILE # # I want this function to return an empty string on errors. # Not the error itself. # if error is not None: return '' return result
def audit(self, freq, orig_response, debugging_id): """ Check if the protocol specified in freq is https and fetch the same URL using http. ie: - input: https://w3af.org/ - check: http://w3af.org/ :param freq: A FuzzableRequest :param orig_response: The HTTP response associated with the fuzzable request :param debugging_id: A unique identifier for this call to audit() """ if not self._should_run: return initial_uri = freq.get_uri() if initial_uri.get_port() not in {80, 443}: # We get here then the original URL looks like http://foo:3921/ # # It's really strange (maybe not even possible?) to find a server # that listens for HTTP and HTTPS connections on the same port, # since we don't want to guess the port, nor generate errors such # as #8871 we just ignore this case self._should_run = False return # Define some variables insecure_uri = initial_uri.copy() secure_uri = initial_uri.copy() insecure_uri.set_protocol('http') insecure_fr = copy.deepcopy(freq) insecure_fr.set_url(insecure_uri) secure_uri.set_protocol('https') secure_fr = copy.deepcopy(freq) secure_fr.set_url(secure_uri) # Make sure that we disable error handling during these tests, we want # the requests to fail quickly and without affecting the library's error # rate send_mutant = self._uri_opener.send_mutant kwargs = {'grep': False, 'error_handling': False} try: insecure_response = send_mutant(insecure_fr, **kwargs) secure_response = send_mutant(secure_fr, **kwargs) except (HTTPRequestException, ScanMustStopException): # No vulnerability to report since one of these threw an error # (because there is nothing listening on that port). It makes # no sense to keep running since we already got an error self._should_run = False else: if insecure_response is None or secure_response is None: # No vulnerability to report since one of these threw an # error (because there is nothing listening on that port). # It makes no sense to keep running since we already got an # error self._should_run = False return if self._redirects_to_secure(insecure_response, secure_response): return if insecure_response.get_code() == secure_response.get_code()\ and fuzzy_equal(insecure_response.get_body(), secure_response.get_body(), 0.95): desc = ('Secure content can be accessed using the insecure' ' HTTP protocol. The vulnerable URLs used to verify' ' this vulnerability are:\n' ' - %s\n' ' - %s\n') desc %= (secure_uri, insecure_uri) response_ids = [insecure_response.id, secure_response.id] v = Vuln.from_fr('Secure content over insecure channel', desc, severity.MEDIUM, response_ids, self.get_name(), freq) self.kb_append(self, 'un_ssl', v) # In most cases, when one resource is available, all are # so we just stop searching for this vulnerability self._should_run = False
def generate_404_knowledge(self, url): """ Based on a URL, request something that we know is going to be a 404. Afterwards analyze the 404's and summarise them. :return: A list with 404 bodies. """ # # This is the case when nobody has properly configured # the object in order to use it. # if self._uri_opener is None: msg = ('404 fingerprint database was incorrectly initialized.' ' URL opener is None.') raise RuntimeError(msg) # Get the filename extension and create a 404 for it extension = url.get_extension() domain_path = url.get_domain_path() # # This is a list of the most common handlers, in some configurations, # the 404 depends on the handler, so I want to make sure that I catch # the 404 for each one # handlers = {'py', 'php', 'asp', 'aspx', 'do', 'jsp', 'rb', 'action', 'gif', 'htm', 'pl', 'cgi', 'xhtml', 'htmls', 'foobar'} if extension: handlers.add(extension) test_urls = [] for handler_ext in handlers: rand_alnum_file = rand_alnum(8) + '.' + handler_ext url404 = domain_path.url_join(rand_alnum_file) test_urls.append(url404) # Also keep in mind that in some cases we don't have an extension, so # we need to create a URL with just a filename if not extension: rand_alnum_file = rand_alnum(8) url404 = domain_path.url_join(rand_alnum_file) test_urls.append(url404) imap_unordered = self._worker_pool.imap_unordered not_exist_resp_lst = [] for not_exist_resp in imap_unordered(self._send_404, test_urls): four_oh_data = FourOhFourResponseFactory(not_exist_resp) not_exist_resp_lst.append(four_oh_data) # # Populate the self._directory_uses_404_codes with the information # we just retrieved from the application # if not_exist_resp.get_code() == 404: url_404 = not_exist_resp.get_uri() path_extension = (url_404.get_domain_path(), url_404.get_extension()) if path_extension not in self._directory_uses_404_codes: self._directory_uses_404_codes.add(path_extension) # # I have the 404 responses in not_exist_resp_lst, but maybe they # all look the same, so I'll filter the ones that look alike. # # Just add the first one to the 404 responses list, since that one is # "unique" # if len(not_exist_resp_lst): four_oh_data = not_exist_resp_lst[0] self._404_responses.append(four_oh_data) # And now add the unique responses for i in not_exist_resp_lst: for j in self._404_responses: if i is j: break if fuzzy_equal(i.body, j.body, IS_EQUAL_RATIO): # i already exists in the self._404_responses, no need # to compare any further break else: # None of the 404_responses match the item from not_exist_resp_lst # This means that this item is new and we should store it in the # 404_responses db self._404_responses.append(i) # And I return the ones I need msg_fmt = 'The 404 body result database has a length of %s.' om.out.debug(msg_fmt % len(self._404_responses))
def is_404(self, http_response): """ All of my previous versions of is_404 were very complex and tried to struggle with all possible cases. The truth is that in most "strange" cases I was failing miserably, so now I changed my 404 detection once again, but keeping it as simple as possible. Also, and because I was trying to cover ALL CASES, I was performing a lot of requests in order to cover them, which in most situations was unnecessary. So now I go for a much simple approach: 1- Cover the simplest case of all using only 1 HTTP request 2- Give the users the power to configure the 404 detection by setting a string that identifies the 404 response (in case we are missing it for some reason in case #1) :param http_response: The HTTP response which we want to know if it is a 404 or not. """ # # First we handle the user configured exceptions: # domain_path = http_response.get_url().get_domain_path() if domain_path in cf.cf.get('always_404'): return True elif domain_path in cf.cf.get('never_404'): return False # # The user configured setting. "If this string is in the response, # then it is a 404" # if cf.cf.get('string_match_404') and cf.cf.get('string_match_404') in http_response: return True # # This is the most simple case, we don't even have to think about this # # If there is some custom website that always returns 404 codes, then # we are screwed, but this is open source, and the pentester working # on that site can modify these lines. # if http_response.get_code() == 404: return True # # Simple, if the file we requested is in a directory that's known to # return 404 codes for files that do not exist, AND this is NOT a 404 # then we're return False! # if domain_path in self._directory_uses_404_codes and \ http_response.get_code() != 404: return False # # Lets start with the rather complex code... # with self._lock: if not self._already_analyzed: self.generate_404_knowledge(http_response.get_url()) self._already_analyzed = True # 404_body was already cleaned inside generate_404_knowledge # so we need to clean this one in order to have a fair comparison resp_body = get_clean_body(http_response) resp_content_type = http_response.doc_type resp_path = http_response.get_url().get_domain_path().url_string # See https://github.com/andresriancho/w3af/issues/6646 max_similarity_with_404 = 0.0 resp_path_in_db = False with self._lock: # # Compare this response to all the 404's I have in my DB # for resp_404 in self._404_responses: # Since the fuzzy_equal function is CPU-intensive we want to # avoid calling it for cases where we know it won't match, for # example in comparing an image and an html if resp_content_type != resp_404.doc_type: continue if fuzzy_equal(resp_404.body, resp_body, IS_EQUAL_RATIO): msg = '"%s" (id:%s) is a 404 [similarity_index > %s]' fmt = (http_response.get_url(), http_response.id, IS_EQUAL_RATIO) om.out.debug(msg % fmt) return True else: # I could calculate this before and avoid the call to # fuzzy_equal, but I believe it's going to be faster this # way current_ratio = relative_distance(resp_404.body, resp_body) max_similarity_with_404 = max(max_similarity_with_404, current_ratio) # Track if the response path is in the DB if not resp_path_in_db and resp_path == resp_404.path: resp_path_in_db = True # # I get here when the for ends and no body_404_db matched with # the resp_body that was sent as a parameter by the user. This # means one of two things: # * There is not enough knowledge in self._404_responses, or # * The answer is NOT a 404. # # Because we want to reduce the amount of "false positives" that # this method returns, we'll perform some extra checks before # saying that this is NOT a 404. # if resp_path_in_db and max_similarity_with_404 < MUST_VERIFY_RATIO: msg = ('"%s" (id:%s) is NOT a 404 [similarity_index < %s' ' with sample path in 404 DB].') args = (http_response.get_url(), http_response.id, MUST_VERIFY_RATIO) om.out.debug(msg % args) return False if self._is_404_with_extra_request(http_response, resp_body): # # Aha! It actually was a 404! # four_oh_data = FourOhFourResponseFactory(http_response) self._404_responses.append(four_oh_data) msg = ('"%s" (id:%s) is a 404 (similarity_index > %s).' ' Adding new knowledge to the 404_responses database' ' (length=%s).') fmt = (http_response.get_url(), http_response.id, IS_EQUAL_RATIO, len(self._404_responses)) om.out.debug(msg % fmt) return True msg = '"%s" (id:%s) is NOT a 404 [similarity_index < %s].' args = (http_response.get_url(), http_response.id, IS_EQUAL_RATIO) om.out.debug(msg % args) return False
def generate_404_knowledge(self, url): """ Based on a URL, request something that we know is going to be a 404. Afterwards analyze the 404's and summarise them. :return: A list with 404 bodies. """ # # This is the case when nobody has properly configured # the object in order to use it. # if self._uri_opener is None: msg = ('404 fingerprint database was incorrectly initialized.' ' URL opener is None.') raise RuntimeError(msg) # Get the filename extension and create a 404 for it extension = url.get_extension() domain_path = url.get_domain_path() # # This is a list of the most common handlers, in some configurations, # the 404 depends on the handler, so I want to make sure that I catch # the 404 for each one # handlers = {'py', 'php', 'asp', 'aspx', 'do', 'jsp', 'rb', 'do', 'gif', 'htm', 'pl', 'cgi', 'xhtml', 'htmls', 'foobar'} if extension: handlers.add(extension) test_urls = [] for extension in handlers: rand_alnum_file = rand_alnum(8) + '.' + extension url404 = domain_path.url_join(rand_alnum_file) test_urls.append(url404) imap_unordered = self._worker_pool.imap_unordered not_exist_resp_lst = [] for not_exist_resp in imap_unordered(self._send_404, test_urls): not_exist_resp_lst.append(not_exist_resp) # # I have the 404 responses in not_exist_resp_lst, but maybe they # all look the same, so I'll filter the ones that look alike. # # Just add the first one to the 404 responses list, since that one is # "unique" # if len(not_exist_resp_lst): http_response = not_exist_resp_lst[0] four_oh_data = FourOhFourResponseFactory(http_response) self._404_responses.append(four_oh_data) # And now add the unique responses for i in not_exist_resp_lst: for j in not_exist_resp_lst: if i is j: continue if fuzzy_equal(i.body, j.body, IS_EQUAL_RATIO): # They are equal, just ignore it continue else: # They are no equal, this means that we'll have to add this # one to the 404 responses four_oh_data = FourOhFourResponseFactory(j) self._404_responses.append(four_oh_data) # And I return the ones I need msg_fmt = 'The 404 body result database has a length of %s.' om.out.debug(msg_fmt % len(self._404_responses))
def audit(self, freq, orig_response): """ Tests an URL for memcache injection vulnerabilities. """ # first checking error response fake_mutants = create_mutants(freq, ['', ]) for mutant in fake_mutants: orig_body = orig_response.get_body() #trying to break normal execution flow with error1 payload mutant.set_token_value(self.mci.error_1) error_1_response, body_error_1_response = self._uri_opener.send_clean(mutant) if fuzzy_equal(orig_body, body_error_1_response, self._eq_limit): # # if we manage to break execution flow, there is a potential # injection otherwise - no injection! # continue # trying the correct injection request, to confirm that we've found # it! mutant.set_token_value(self.mci.ok) ok_response, body_ok_response = self._uri_opener.send_clean(mutant) if fuzzy_not_equal(orig_body, body_ok_response, self._eq_limit): # # now requests should be equal, otherwise injection failed! # continue #error2 request to just make sure that wasn't random bytes mutant.set_token_value(self.mci.error_2) error_2_response, body_error_2_response = self._uri_opener.send_clean(mutant) if fuzzy_equal(orig_body, body_error_2_response, self._eq_limit): # # now requests should be different again, otherwise injection # is not confirmed # continue response_ids = [error_1_response.id, ok_response.id, error_2_response.id,] desc = 'Memcache injection was found at: "%s", using'\ ' HTTP method %s. The injectable parameter is: "%s"' desc = desc % (mutant.get_url(), mutant.get_method(), mutant.get_token_name()) v = Vuln.from_mutant('Memcache injection vulnerability', desc, severity.HIGH, response_ids, 'memcachei', mutant) self.debug(v.get_desc()) v['ok_html'] = ok_response.get_body() v['error_1_html'] = error_1_response.get_body() v['error_2_html'] = error_2_response.get_body() self.kb_append_uniq(self, 'memcachei', v) return
def generate_404_knowledge(self, url): """ Based on a URL, request something that we know is going to be a 404. Afterwards analyze the 404's and summarise them. :return: A list with 404 bodies. """ # # This is the case when nobody has properly configured # the object in order to use it. # if self._uri_opener is None: msg = "404 fingerprint database was incorrectly initialized." raise RuntimeError(msg) # Get the filename extension and create a 404 for it extension = url.get_extension() domain_path = url.get_domain_path() # # This is a list of the most common handlers, in some configurations, # the 404 depends on the handler, so I want to make sure that I catch # the 404 for each one # handlers = { "py", "php", "asp", "aspx", "do", "jsp", "rb", "do", "gif", "htm", "pl", "cgi", "xhtml", "htmls", "foobar", } if extension: handlers.add(extension) test_urls = [] for extension in handlers: rand_alnum_file = rand_alnum(8) + "." + extension url404 = domain_path.url_join(rand_alnum_file) test_urls.append(url404) imap_unordered = self._worker_pool.imap_unordered not_exist_resp_lst = [] for not_exist_resp in imap_unordered(self._send_404, test_urls): not_exist_resp_lst.append(not_exist_resp) # # I have the 404 responses in not_exist_resp_lst, but maybe they # all look the same, so I'll filter the ones that look alike. # for i in not_exist_resp_lst: for j in not_exist_resp_lst: if i is j: continue if fuzzy_equal(i.get_body(), j.get_body(), IS_EQUAL_RATIO): # They are equal, just ignore it continue else: # They are no equal, this means that we'll have to add this # one to the 404 responses self._404_responses.append(j) # And I return the ones I need msg_fmt = "The 404 body result database has a length of %s." om.out.debug(msg_fmt % len(self._404_responses)) self._fingerprinted_paths.add(domain_path)
def generate_404_knowledge(self, url): """ Based on a URL, request something that we know is going to be a 404. Afterwards analyze the 404's and summarise them. :return: A list with 404 bodies. """ # # This is the case when nobody has properly configured # the object in order to use it. # if self._uri_opener is None: msg = ('404 fingerprint database was incorrectly initialized.' ' URL opener is None.') raise RuntimeError(msg) # Get the filename extension and create a 404 for it extension = url.get_extension() domain_path = url.get_domain_path() # # This is a list of the most common handlers, in some configurations, # the 404 depends on the handler, so I want to make sure that I catch # the 404 for each one # handlers = { 'py', 'php', 'asp', 'aspx', 'do', 'jsp', 'rb', 'action', 'gif', 'htm', 'pl', 'cgi', 'xhtml', 'htmls', 'foobar' } if extension: handlers.add(extension) test_urls = [] for handler_ext in handlers: rand_alnum_file = rand_alnum(8) + '.' + handler_ext url404 = domain_path.url_join(rand_alnum_file) test_urls.append(url404) # Also keep in mind that in some cases we don't have an extension, so # we need to create a URL with just a filename if not extension: rand_alnum_file = rand_alnum(8) url404 = domain_path.url_join(rand_alnum_file) test_urls.append(url404) imap_unordered = self._worker_pool.imap_unordered not_exist_resp_lst = [] for not_exist_resp in imap_unordered(self._send_404, test_urls): four_oh_data = FourOhFourResponse(not_exist_resp) not_exist_resp_lst.append(four_oh_data) # # Populate the self._directory_uses_404_codes with the information # we just retrieved from the application # if not_exist_resp.get_code() == 404: url_404 = not_exist_resp.get_uri() path_extension = (url_404.get_domain_path(), url_404.get_extension()) # No need to check if the ScalableBloomFilter contains the key # It is a "set", adding duplicates is a no-op. self._directory_uses_404_codes.add(path_extension) # # Sort the HTTP responses by length to try to have the same DB on # each call to generate_404_knowledge(). This is required because of # the imap_unordered() above, which will yield the responses in # unexpected order each time we call it. # def sort_by_response_length(a, b): return cmp(len(a.body), len(b.body)) not_exist_resp_lst.sort(sort_by_response_length) # # I have the 404 responses in not_exist_resp_lst, but maybe they # all look the same, so I'll filter the ones that look alike. # # Just add the first one to the 404 responses list, since that one is # "unique" # if len(not_exist_resp_lst): four_oh_data = not_exist_resp_lst[0] self._append_to_base_404_responses(four_oh_data) # And now add the unique responses for i in not_exist_resp_lst: for j in self._base_404_responses: if i is j: break if fuzzy_equal(i.body, j.body, IS_EQUAL_RATIO): # i (or something really similar) already exists in the # self._base_404_responses, no need to compare any further break else: # None of the 404_responses match the item from not_exist_resp_lst # This means that this item is new and we should store it in the # 404_responses db self._append_to_base_404_responses(i) msg = 'The base 404 response DB contains responses with IDs: %s' args = (', '.join( str(r.id) for r in copy.copy(self._base_404_responses))) om.out.debug(msg % args)
def _is_404_complex_impl(self, http_response, query): """ Verifies if the response is a 404 by comparing it with other responses which are known to be 404s, potentially sends HTTP requests to the server. :param http_response: The HTTP response :param query: The HTTP response in FourOhFourResponse form (normalized URL, clean body, etc.) :return: True if the HTTP response is a 404 """ response_did = http_response.get_debugging_id() debugging_id = response_did if response_did is not None else rand_alnum( 8) # # Compare query with a known 404 from the DB (or a generated one # if there is none with the same path in the DB) # known_404 = self._get_404_response(http_response, query, debugging_id) # Trivial performance improvement that prevents running fuzzy_equal if query.code in NOT_404_RESPONSE_CODES and known_404.code == 404: msg = ('"%s" (id:%s, code:%s, len:%s, did:%s) is NOT a 404' ' [known 404 with ID %s uses 404 code]') args = (http_response.get_url(), http_response.id, http_response.get_code(), len(http_response.get_body()), debugging_id, known_404.id) om.out.debug(msg % args) return False # Since the fuzzy_equal function is CPU-intensive we want to # avoid calling it for cases where we know it won't match, for # example in comparing an image and an html if query.content_type != known_404.content_type: msg = ('"%s" (id:%s, code:%s, len:%s, did:%s) is NOT a 404' ' [document type mismatch with known 404 with ID %s]') args = (http_response.get_url(), http_response.id, http_response.get_code(), len(http_response.get_body()), debugging_id, known_404.id) om.out.debug(msg % args) return False # This is the simplest case. If they are 100% equal, no matter how # large or complex the responses are, then query is a 404 if known_404.body == query.body: msg = ('"%s" (id:%s, code:%s, len:%s, did:%s) is a 404' ' [string equals with 404 DB entry with ID %s]') args = (http_response.get_url(), http_response.id, http_response.get_code(), len(http_response.get_body()), debugging_id, known_404.id) om.out.debug(msg % args) return True is_fuzzy_equal = fuzzy_equal(known_404.body, query.body, IS_EQUAL_RATIO) if not is_fuzzy_equal: msg = ('"%s" (id:%s, code:%s, len:%s, did:%s) is NOT a 404' ' [similarity_ratio < %s with known 404 with ID %s]') args = (http_response.get_url(), http_response.id, http_response.get_code(), len(http_response.get_body()), debugging_id, IS_EQUAL_RATIO, known_404.id) om.out.debug(msg % args) return False if len(query.body) < MAX_FUZZY_LENGTH: # The response bodies are fuzzy-equal, and the length is less than # MAX_FUZZY_LENGTH. This is good, it means that they are equal and # long headers / footers in HTTP response bodies are not # interfering with fuzzy-equals. # # Some sites have really large headers and footers which they # include for all pages, including 404s. When that happens one page # might look like: # # {header-4000bytes} # Hello world # {footer-4000bytes} # # The header might contain large CSS and the footer might include # JQuery or some other large JS. Then, the 404 might look like: # # {header-4000bytes} # Not found # {footer-4000bytes} # # A user with a browser might only see the text, and clearly # identify one as a valid page and another as a 404, but the # fuzzy_equal() function will return True, indicating that they # are equal because 99% of the bytes are the same. msg = ('"%s" (id:%s, code:%s, len:%s, did:%s) is a 404' ' [similarity_ratio > %s with 404 DB entry with ID %s]') args = (http_response.get_url(), http_response.id, http_response.get_code(), len(http_response.get_body()), debugging_id, IS_EQUAL_RATIO, known_404.id) om.out.debug(msg % args) return True else: # See the large comment above on why we need to check for # MAX_FUZZY_LENGTH. # # The way to handle this case is to send an extra HTTP # request that will act as a tie-breaker. return self._handle_large_http_responses(http_response, query, known_404, debugging_id)
def batch_injection_test(self, freq, orig_response): """ Uses the batch injection technique to find memcache injections """ # shortcut send_clean = self._uri_opener.send_clean # first checking error response fake_mutants = create_mutants(freq, [ '', ]) for mutant in fake_mutants: orig_body = orig_response.get_body() # trying to break normal execution flow with error1 payload mutant.set_token_value(self.mci.error_1) error_1_response, body_error_1_response = send_clean(mutant) if fuzzy_equal(orig_body, body_error_1_response, self._eq_limit): # # if we manage to break execution flow, there is a potential # injection otherwise - no injection! # continue # trying the correct injection request, to confirm that we've found # it! mutant.set_token_value(self.mci.ok) ok_response, body_ok_response = send_clean(mutant) if fuzzy_not_equal(orig_body, body_ok_response, self._eq_limit): # # now requests should be equal, otherwise injection failed! # continue # error2 request to just make sure that wasn't random bytes mutant.set_token_value(self.mci.error_2) error_2_response, body_error_2_response = send_clean(mutant) if fuzzy_equal(orig_body, body_error_2_response, self._eq_limit): # # now requests should be different again, otherwise injection # is not confirmed # continue response_ids = [ error_1_response.id, ok_response.id, error_2_response.id ] desc = 'Memcache injection was found at: "%s", using' \ ' HTTP method %s. The injectable parameter is: "%s"' desc = desc % (mutant.get_url(), mutant.get_method(), mutant.get_token_name()) v = Vuln.from_mutant('Memcache injection vulnerability', desc, severity.HIGH, response_ids, 'memcachei', mutant) v['ok_html'] = ok_response.get_body() v['error_1_html'] = error_1_response.get_body() v['error_2_html'] = error_2_response.get_body() self.kb_append_uniq(self, 'memcachei', v) return
def is_404(self, http_response): """ All of my previous versions of is_404 were very complex and tried to struggle with all possible cases. The truth is that in most "strange" cases I was failing miserably, so now I changed my 404 detection once again, but keeping it as simple as possible. Also, and because I was trying to cover ALL CASES, I was performing a lot of requests in order to cover them, which in most situations was unnecessary. So now I go for a much simple approach: 1- Cover the simplest case of all using only 1 HTTP request 2- Give the users the power to configure the 404 detection by setting a string that identifies the 404 response (in case we are missing it for some reason in case #1) :param http_response: The HTTP response which we want to know if it is a 404 or not. """ # # First we handle the user configured exceptions: # domain_path = http_response.get_url().get_domain_path() if domain_path in cf.cf.get('always_404'): return True elif domain_path in cf.cf.get('never_404'): return False # # The user configured setting. "If this string is in the response, # then it is a 404" # if cf.cf.get('string_match_404') and cf.cf.get( 'string_match_404') in http_response: return True # # This is the most simple case, we don't even have to think about this # # If there is some custom website that always returns 404 codes, then # we are screwed, but this is open source, and the pentester working # on that site can modify these lines. # if http_response.get_code() == 404: return True # # Simple, if the file we requested is in a directory that's known to # return 404 codes for files that do not exist, AND this is NOT a 404 # then we're return False! # if domain_path in self._directory_uses_404_codes and \ http_response.get_code() != 404: return False # # Lets start with the rather complex code... # with self._lock: if not self._already_analyzed: self.generate_404_knowledge(http_response.get_url()) self._already_analyzed = True # 404_body was already cleaned inside generate_404_knowledge # so we need to clean this one in order to have a fair comparison resp_body = get_clean_body(http_response) resp_content_type = http_response.doc_type resp_path = http_response.get_url().get_domain_path().url_string # See https://github.com/andresriancho/w3af/issues/6646 max_similarity_with_404 = 0.0 resp_path_in_db = False with self._lock: # # Compare this response to all the 404's I have in my DB # for resp_404 in self._404_responses: # Since the fuzzy_equal function is CPU-intensive we want to # avoid calling it for cases where we know it won't match, for # example in comparing an image and an html if resp_content_type != resp_404.doc_type: continue if fuzzy_equal(resp_404.body, resp_body, IS_EQUAL_RATIO): msg = '"%s" (id:%s) is a 404 [similarity_index > %s]' fmt = (http_response.get_url(), http_response.id, IS_EQUAL_RATIO) om.out.debug(msg % fmt) return True else: # I could calculate this before and avoid the call to # fuzzy_equal, but I believe it's going to be faster this # way current_ratio = relative_distance(resp_404.body, resp_body) max_similarity_with_404 = max(max_similarity_with_404, current_ratio) # Track if the response path is in the DB if not resp_path_in_db and resp_path == resp_404.path: resp_path_in_db = True # # I get here when the for ends and no body_404_db matched with # the resp_body that was sent as a parameter by the user. This # means one of two things: # * There is not enough knowledge in self._404_responses, or # * The answer is NOT a 404. # # Because we want to reduce the amount of "false positives" that # this method returns, we'll perform some extra checks before # saying that this is NOT a 404. # if resp_path_in_db and max_similarity_with_404 < MUST_VERIFY_RATIO: msg = ('"%s" (id:%s) is NOT a 404 [similarity_index < %s' ' with sample path in 404 DB].') args = (http_response.get_url(), http_response.id, MUST_VERIFY_RATIO) om.out.debug(msg % args) return False if self._is_404_with_extra_request(http_response, resp_body): # # Aha! It actually was a 404! # four_oh_data = FourOhFourResponseFactory(http_response) self._404_responses.append(four_oh_data) msg = ('"%s" (id:%s) is a 404 (similarity_index > %s).' ' Adding new knowledge to the 404_responses database' ' (length=%s).') fmt = (http_response.get_url(), http_response.id, IS_EQUAL_RATIO, len(self._404_responses)) om.out.debug(msg % fmt) return True msg = '"%s" (id:%s) is NOT a 404 [similarity_index < %s].' args = (http_response.get_url(), http_response.id, IS_EQUAL_RATIO) om.out.debug(msg % args) return False
def generate_404_knowledge(self, url): """ Based on a URL, request something that we know is going to be a 404. Afterwards analyze the 404's and summarise them. :return: A list with 404 bodies. """ # # This is the case when nobody has properly configured # the object in order to use it. # if self._uri_opener is None: msg = ('404 fingerprint database was incorrectly initialized.' ' URL opener is None.') raise RuntimeError(msg) # Get the filename extension and create a 404 for it extension = url.get_extension() domain_path = url.get_domain_path() # # This is a list of the most common handlers, in some configurations, # the 404 depends on the handler, so I want to make sure that I catch # the 404 for each one # handlers = { 'py', 'php', 'asp', 'aspx', 'do', 'jsp', 'rb', 'do', 'gif', 'htm', 'pl', 'cgi', 'xhtml', 'htmls', 'foobar' } if extension: handlers.add(extension) test_urls = [] for extension in handlers: rand_alnum_file = rand_alnum(8) + '.' + extension url404 = domain_path.url_join(rand_alnum_file) test_urls.append(url404) imap_unordered = self._worker_pool.imap_unordered not_exist_resp_lst = [] for not_exist_resp in imap_unordered(self._send_404, test_urls): not_exist_resp_lst.append(not_exist_resp) # # I have the 404 responses in not_exist_resp_lst, but maybe they # all look the same, so I'll filter the ones that look alike. # # Just add the first one to the 404 responses list, since that one is # "unique" # if len(not_exist_resp_lst): http_response = not_exist_resp_lst[0] four_oh_data = FourOhFourResponseFactory(http_response) self._404_responses.append(four_oh_data) # And now add the unique responses for i in not_exist_resp_lst: for j in not_exist_resp_lst: if i is j: continue if fuzzy_equal(i.body, j.body, IS_EQUAL_RATIO): # They are equal, just ignore it continue else: # They are no equal, this means that we'll have to add this # one to the 404 responses four_oh_data = FourOhFourResponseFactory(j) self._404_responses.append(four_oh_data) # And I return the ones I need msg_fmt = 'The 404 body result database has a length of %s.' om.out.debug(msg_fmt % len(self._404_responses))
def _is_404_with_extra_request(self, http_response, clean_resp_body, debugging_id): """ Performs a very simple check to verify if this response is a 404 or not. It takes the original URL and modifies it by flipping some bytes in the filename, then performs a request to that URL and compares the original response with the modified one. If they are equal then the original request is a 404. :param http_response: The original HTTP response :param clean_resp_body: The original HTML body you could find in http_response after passing it by a cleaner :return: True if the original response was a 404 ! """ # # Generate a request that will trigger a 404 # response_url = http_response.get_url() filename = response_url.get_file_name() if not filename: relative_url = '../%s/' % rand_alnum(8) url_404 = response_url.url_join(relative_url) else: relative_url = generate_404_filename(filename) url_404 = response_url.copy() url_404.set_file_name(relative_url) # # Send the 404 request # response_404 = self._send_404(url_404, debugging_id=debugging_id) four_oh_data = FourOhFourResponse(response_404) # # Update _directory_uses_404_codes # if response_404.get_code() == 404: path_extension = (url_404.get_domain_path(), url_404.get_extension()) self._directory_uses_404_codes.add(path_extension) if http_response.get_code() != 404: # Not a 404! We know because of the new knowledge that this path # and extension uses 404 msg = ( 'The generated HTTP response for %s (id: %s) has a 404' ' code, which is different from code %s used by the HTTP' ' response passed as parameter (id:%s, did:%s)') args = (url_404, response_404.id, http_response.get_code(), http_response.id, debugging_id) om.out.debug(msg % args) return False # # If the HTTP response codes are different, then we're almost certain # the HTTP response received as parameter is not a 404 # if response_404.get_code() != http_response.get_code(): msg = ('The generated HTTP response for %s (id: %s) has a %s' ' code, which is different from code %s used by the HTTP' ' response passed as parameter (id:%s, did:%s)') args = (url_404, response_404.id, response_404.get_code(), http_response.get_code(), http_response.id, debugging_id) om.out.debug(msg % args) # # Save the new 404 page to the DB. This might prevent us from # sending extra HTTP requests in the future # self._append_to_extended_404_responses(four_oh_data) return False # # Compare the "response that MUST BE (*) a 404" with the one # received as parameter. # # (*) This works in 95% of the cases, where the application is not # using some kind of URL rewrite rule which completely ignores # the last part of the URL (filename or path) # is_fuzzy_equal = fuzzy_equal(four_oh_data.body, clean_resp_body, IS_EQUAL_RATIO) # # Not equal! This means that the URL we generated really triggered # a 404, and that the response received as parameter is different # (not a 404) # if not is_fuzzy_equal: msg = ('The generated HTTP response for %s (id: %s) is different' ' from the HTTP response body passed as parameter' ' (id: %s, did:%s)') args = (url_404, four_oh_data.id, http_response.id, debugging_id) om.out.debug(msg % args) # # Save the new 404 page to the DB. This might prevent us from # sending extra HTTP requests in the future # self._append_to_extended_404_responses(four_oh_data) return False # # The responses are equal, both can be 404, or both can be the result # of the application ignoring the last part of the URL, example: # # http://w3af.com/foo/ignored # http://w3af.com/foo/also-ignored # if self._looks_like_404_page(response_404): msg = ('The generated HTTP response for %s (id: %s) looks like' ' a 404 response AND is similar to the HTTP response body' ' passed as parameter (id:%s, did:%s)') args = (url_404, four_oh_data.id, http_response.id, debugging_id) om.out.debug(msg % args) # # Save the new 404 page to the DB. This might prevent us from # sending extra HTTP requests in the future # self._append_to_extended_404_responses(four_oh_data) return True # # This is the worse scenario. The responses are equal, none of the # responses look like a 404. We get here when: # # * _looks_like_404_page() has a false negative (the page is a 404, # but the method returns False, this is very common, since the # word database is very small) # # * The site is ignoring the last part of the URL (the filename or # the last path). So requesting /abc/def and /abc/foo will both # yield the same result. # # There is no good answer here... I prefer to return False, which # might add a false positive finding to the KB, instead of returning # True (saying that the response is a 404) and having a false negative # msg = ('The generated HTTP response for %s (id: %s) is very similar to' ' the HTTP response body passed as parameter (id: %s), and the' ' generated response does NOT look like a 404 (did:%s)') args = (url_404, four_oh_data.id, http_response.id, debugging_id) om.out.debug(msg % args) return False
def is_resp_equal(self, resp1, resp2): if resp1.status_code != resp2.status_code: return False if not fuzzy_equal(str(resp1.content), str(resp2.content), 0.99): return False return True
def _is_404_complex(self, http_response): """ Verifies if the response is a 404 by comparing it with other responses which are known to be 404s, potentially sends HTTP requests to the server. :param http_response: The HTTP response :return: True if the HTTP response is a 404 """ response_did = http_response.get_debugging_id() debugging_id = response_did if response_did is not None else rand_alnum(8) # 404_body stored in the DB was cleaned when creating the # FourOhFourResponse class. # # Clean the body received as parameter in order to have a fair # comparison query = FourOhFourResponse(http_response) # # Compare query with a known 404 from the DB (or a generated one # if there is none with the same path in the DB) # known_404 = self._get_404_response(http_response, query, debugging_id) # Trivial performance improvement that prevents running fuzzy_equal if query.code in NOT_404_RESPONSE_CODES and known_404.code == 404: msg = ('"%s" (id:%s, code:%s, len:%s, did:%s) is NOT a 404' ' [known 404 with ID %s uses 404 code]') args = (http_response.get_url(), http_response.id, http_response.get_code(), len(http_response.get_body()), debugging_id, known_404.id) om.out.debug(msg % args) return False # Since the fuzzy_equal function is CPU-intensive we want to # avoid calling it for cases where we know it won't match, for # example in comparing an image and an html if query.doc_type != known_404.doc_type: msg = ('"%s" (id:%s, code:%s, len:%s, did:%s) is NOT a 404' ' [document type mismatch with known 404 with ID %s]') args = (http_response.get_url(), http_response.id, http_response.get_code(), len(http_response.get_body()), debugging_id, known_404.id) om.out.debug(msg % args) return False # This is the simplest case. If they are 100% equal, no matter how # large or complex the responses are, then query is a 404 if known_404.body == query.body: msg = ('"%s" (id:%s, code:%s, len:%s, did:%s) is a 404' ' [string equals with 404 DB entry with ID %s]') args = (http_response.get_url(), http_response.id, http_response.get_code(), len(http_response.get_body()), debugging_id, known_404.id) om.out.debug(msg % args) return True is_fuzzy_equal = fuzzy_equal(known_404.body, query.body, IS_EQUAL_RATIO) if not is_fuzzy_equal: msg = ('"%s" (id:%s, code:%s, len:%s, did:%s) is NOT a 404' ' [similarity_ratio < %s with known 404 with ID %s]') args = (http_response.get_url(), http_response.id, http_response.get_code(), len(http_response.get_body()), debugging_id, IS_EQUAL_RATIO, known_404.id) om.out.debug(msg % args) return False if len(query.body) < MAX_FUZZY_LENGTH: # The response bodies are fuzzy-equal, and the length is less than # MAX_FUZZY_LENGTH. This is good, it means that they are equal and # long headers / footers in HTTP response bodies are not # interfering with fuzzy-equals. # # Some sites have really large headers and footers which they # include for all pages, including 404s. When that happens one page # might look like: # # {header-4000bytes} # Hello world # {footer-4000bytes} # # The header might contain large CSS and the footer might include # JQuery or some other large JS. Then, the 404 might look like: # # {header-4000bytes} # Not found # {footer-4000bytes} # # A user with a browser might only see the text, and clearly # identify one as a valid page and another as a 404, but the # fuzzy_equal() function will return True, indicating that they # are equal because 99% of the bytes are the same. msg = ('"%s" (id:%s, code:%s, len:%s, did:%s) is a 404' ' [similarity_ratio > %s with 404 DB entry with ID %s]') args = (http_response.get_url(), http_response.id, http_response.get_code(), len(http_response.get_body()), debugging_id, IS_EQUAL_RATIO, known_404.id) om.out.debug(msg % args) return True else: # See the large comment above on why we need to check for # MAX_FUZZY_LENGTH. # # The way to handle this case is to send an extra HTTP # request that will act as a tie-breaker. return self._handle_large_http_responses(http_response, query, known_404, debugging_id)
def is_404(self, http_response): """ All of my previous versions of is_404 were very complex and tried to struggle with all possible cases. The truth is that in most "strange" cases I was failing miserably, so now I changed my 404 detection once again, but keeping it as simple as possible. Also, and because I was trying to cover ALL CASES, I was performing a lot of requests in order to cover them, which in most situations was unnecesary. So now I go for a much simple approach: 1- Cover the simplest case of all using only 1 HTTP request 2- Give the users the power to configure the 404 detection by setting a string that identifies the 404 response (in case we are missing it for some reason in case #1) :param http_response: The HTTP response which we want to know if it is a 404 or not. """ # # First we handle the user configured exceptions: # domain_path = http_response.get_url().get_domain_path() if domain_path in cf.cf.get('always_404'): return True elif domain_path in cf.cf.get('never_404'): return False # # The user configured setting. "If this string is in the response, # then it is a 404" # if cf.cf.get('string_match_404') and cf.cf.get( 'string_match_404') in http_response: return True # # This is the most simple case, we don't even have to think about this # # If there is some custom website that always returns 404 codes, then # we are screwed, but this is open source, and the pentester working # on that site can modify these lines. # if http_response.get_code() == 404: return True # # Simple, if the file we requested is in a directory that's known to # return 404 codes for files that do not exist, AND this is NOT a 404 # then we're return False! # if domain_path in self._directory_uses_404_codes and \ http_response.get_code() != 404: return False # # Lets start with the rather complex code... # with self._lock: if not self._already_analyzed: self.generate_404_knowledge(http_response.get_url()) self._already_analyzed = True # 404_body was already cleaned inside generate_404_knowledge # so we need to clean this one in order to have a fair comparison html_body = get_clean_body(http_response) # # Compare this response to all the 404's I have in my DB # # Copy the 404_responses deque in order to be able to iterate over # it from one thread, while it is changed in another. # copy_404_responses = copy.copy(self._404_responses) for resp_404 in copy_404_responses: if fuzzy_equal(resp_404.get_body(), html_body, IS_EQUAL_RATIO): msg = '"%s" (id:%s) is a 404 [similarity_index > %s]' fmt = (http_response.get_url(), http_response.id, IS_EQUAL_RATIO) om.out.debug(msg % fmt) return True else: # # I get here when the for ends and no body_404_db matched with # the html_body that was sent as a parameter by the user. This # means one of two things: # * There is not enough knowledge in self._404_responses, or # * The answer is NOT a 404. # # Because we want to reduce the amount of "false positives" that # this method returns, we'll perform one extra check before # saying that this is NOT a 404. domain_path = http_response.get_url().get_domain_path() if domain_path not in self._fingerprinted_paths: if self._is_404_with_extra_request(http_response, html_body): # # Aha! It actually was a 404! # self._404_responses.append(http_response) self._fingerprinted_paths.add(domain_path) msg = '"%s" (id:%s) is a 404 (similarity_index > %s).'\ ' Adding new knowledge to the 404_bodies database'\ ' (length=%s).' fmt = (http_response.get_url(), http_response.id, IS_EQUAL_RATIO, len(self._404_responses)) om.out.debug(msg % fmt) return True msg = '"%s" (id:%s) is NOT a 404 [similarity_index < %s].' fmt = (http_response.get_url(), http_response.id, IS_EQUAL_RATIO) om.out.debug(msg % fmt) return False
def generate_404_knowledge(self, url): """ Based on a URL, request something that we know is going to be a 404. Afterwards analyze the 404's and summarise them. :return: A list with 404 bodies. """ # # This is the case when nobody has properly configured # the object in order to use it. # if self._uri_opener is None: msg = ('404 fingerprint database was incorrectly initialized.' ' URL opener is None.') raise RuntimeError(msg) # Get the filename extension and create a 404 for it extension = url.get_extension() domain_path = url.get_domain_path() # # This is a list of the most common handlers, in some configurations, # the 404 depends on the handler, so I want to make sure that I catch # the 404 for each one # handlers = { 'py', 'php', 'asp', 'aspx', 'do', 'jsp', 'rb', 'action', 'gif', 'htm', 'pl', 'cgi', 'xhtml', 'htmls', 'foobar' } if extension: handlers.add(extension) test_urls = [] for handler_ext in handlers: rand_alnum_file = rand_alnum(8) + '.' + handler_ext url404 = domain_path.url_join(rand_alnum_file) test_urls.append(url404) # Also keep in mind that in some cases we don't have an extension, so # we need to create a URL with just a filename if not extension: rand_alnum_file = rand_alnum(8) url404 = domain_path.url_join(rand_alnum_file) test_urls.append(url404) imap_unordered = self._worker_pool.imap_unordered not_exist_resp_lst = [] for not_exist_resp in imap_unordered(self._send_404, test_urls): four_oh_data = FourOhFourResponseFactory(not_exist_resp) not_exist_resp_lst.append(four_oh_data) # # Populate the self._directory_uses_404_codes with the information # we just retrieved from the application # if not_exist_resp.get_code() == 404: url_404 = not_exist_resp.get_uri() path_extension = (url_404.get_domain_path(), url_404.get_extension()) if path_extension not in self._directory_uses_404_codes: self._directory_uses_404_codes.add(path_extension) # # I have the 404 responses in not_exist_resp_lst, but maybe they # all look the same, so I'll filter the ones that look alike. # # Just add the first one to the 404 responses list, since that one is # "unique" # if len(not_exist_resp_lst): four_oh_data = not_exist_resp_lst[0] self._404_responses.append(four_oh_data) # And now add the unique responses for i in not_exist_resp_lst: for j in self._404_responses: if i is j: break if fuzzy_equal(i.body, j.body, IS_EQUAL_RATIO): # i already exists in the self._404_responses, no need # to compare any further break else: # None of the 404_responses match the item from not_exist_resp_lst # This means that this item is new and we should store it in the # 404_responses db self._404_responses.append(i) # And I return the ones I need msg_fmt = 'The 404 body result database has a length of %s.' om.out.debug(msg_fmt % len(self._404_responses))
def is_404(self, http_response): """ All of my previous versions of is_404 were very complex and tried to struggle with all possible cases. The truth is that in most "strange" cases I was failing miserably, so now I changed my 404 detection once again, but keeping it as simple as possible. Also, and because I was trying to cover ALL CASES, I was performing a lot of requests in order to cover them, which in most situations was unnecesary. So now I go for a much simple approach: 1- Cover the simplest case of all using only 1 HTTP request 2- Give the users the power to configure the 404 detection by setting a string that identifies the 404 response (in case we are missing it for some reason in case #1) :param http_response: The HTTP response which we want to know if it is a 404 or not. """ # # First we handle the user configured exceptions: # domain_path = http_response.get_url().get_domain_path() if domain_path in cf.cf.get("always_404"): return True elif domain_path in cf.cf.get("never_404"): return False # # The user configured setting. "If this string is in the response, # then it is a 404" # if cf.cf.get("string_match_404") and cf.cf.get("string_match_404") in http_response: return True # # This is the most simple case, we don't even have to think about this # # If there is some custom website that always returns 404 codes, then # we are screwed, but this is open source, and the pentester working # on that site can modify these lines. # if http_response.get_code() == 404: return True # # Simple, if the file we requested is in a directory that's known to # return 404 codes for files that do not exist, AND this is NOT a 404 # then we're return False! # if domain_path in self._directory_uses_404_codes and http_response.get_code() != 404: return False # # Lets start with the rather complex code... # with self._lock: if not self._already_analyzed: self.generate_404_knowledge(http_response.get_url()) self._already_analyzed = True # 404_body was already cleaned inside generate_404_knowledge # so we need to clean this one in order to have a fair comparison html_body = get_clean_body(http_response) # # Compare this response to all the 404's I have in my DB # # Copy the 404_responses deque in order to be able to iterate over # it from one thread, while it is changed in another. # copy_404_responses = copy.copy(self._404_responses) for resp_404 in copy_404_responses: if fuzzy_equal(resp_404.get_body(), html_body, IS_EQUAL_RATIO): msg = '"%s" (id:%s) is a 404 [similarity_index > %s]' fmt = (http_response.get_url(), http_response.id, IS_EQUAL_RATIO) om.out.debug(msg % fmt) return True else: # # I get here when the for ends and no body_404_db matched with # the html_body that was sent as a parameter by the user. This # means one of two things: # * There is not enough knowledge in self._404_responses, or # * The answer is NOT a 404. # # Because we want to reduce the amount of "false positives" that # this method returns, we'll perform one extra check before # saying that this is NOT a 404. domain_path = http_response.get_url().get_domain_path() if domain_path not in self._fingerprinted_paths: if self._is_404_with_extra_request(http_response, html_body): # # Aha! It actually was a 404! # self._404_responses.append(http_response) self._fingerprinted_paths.add(domain_path) msg = ( '"%s" (id:%s) is a 404 (similarity_index > %s).' " Adding new knowledge to the 404_bodies database" " (length=%s)." ) fmt = (http_response.get_url(), http_response.id, IS_EQUAL_RATIO, len(self._404_responses)) om.out.debug(msg % fmt) return True msg = '"%s" (id:%s) is NOT a 404 [similarity_index < %s].' fmt = (http_response.get_url(), http_response.id, IS_EQUAL_RATIO) om.out.debug(msg % fmt) return False