def diff(a, b): """ :param a: A string :param b: A string (similar to a) :return: Two strings (a_mod, b_mod) which are basically: a_mod = a - (a intersection b) b_mod = b - (a intersection b) Or if you want to see it in another way, the results are the parts of the string that make it unique between each other. """ a = smart_str_ignore(a) b = smart_str_ignore(b) dmp = dmp_module.diff_match_patch() changes = dmp.diff_main(a, b, checklines=True, deadline=MAX_DIFF_TIME) dmp.diff_cleanupSemantic(changes) a_changes = [] b_changes = [] for op, change in changes: if op == -1: a_changes.append(change) if op == 1: b_changes.append(change) a_changes = ''.join(a_changes) b_changes = ''.join(b_changes) return a_changes, b_changes
def _should_analyze(self, response): """ :param response: HTTP response :return: True if we should analyze this HTTP response """ # # Avoid running this plugin twice on the same URL # url_hash = hashlib.md5(response.get_url().url_string).hexdigest() if url_hash in self._analyzed_hashes: return False self._analyzed_hashes.add(url_hash) # # Avoid running this plugin twice on the same file content # body = smart_str_ignore(response.get_body()) response_hash = hashlib.md5(body).hexdigest() if response_hash in self._analyzed_hashes: return False self._analyzed_hashes.add(response_hash) return True
def clean_fuzzable_request_form(fuzzable_request, dc_handler=clean_data_container): """ This function will extract data from the fuzzable request and serialize it. The main goal of this function is to return a "unique representation" of how the HTTP request looks like WITHOUT including the URL. Related with https://github.com/andresriancho/w3af/issues/15970 :param fuzzable_request: The fuzzable request instance to clean """ # Method res = [fuzzable_request.get_method().upper()] # Type raw_data = fuzzable_request.get_raw_data() res.append(raw_data.get_type()) # Query string parameters uri = fuzzable_request.get_uri() if uri.has_query_string(): res.append(dc_handler(uri.querystring)) else: res.append('') # Post-data parameters if raw_data: res.append(dc_handler(raw_data)) else: res.append('') return '|'.join([smart_str_ignore(s) for s in res])
def _upload_file(self, domain_path, rand_file, debugging_id): """ Upload the file using author.dll :param domain_path: http://localhost/f00/ :param rand_file: <random>.html """ # TODO: The frontpage version should be obtained from the information # saved in the kb by the infrastructure.frontpage_version plugin! # # The 4.0.2.4715 version should be dynamic! version = '4.0.2.4715' file_path = domain_path.get_path() + rand_file data = POST_BODY % (version, file_path) data += rand_file[::-1] data = smart_str_ignore(data) target_url = self._get_author_url() try: res = self._uri_opener.POST(target_url, data=data, debugging_id=debugging_id) except BaseFrameworkException, e: om.out.debug( 'Exception while uploading file using author.dll: %s' % e) return None
def get_response_cache_key(self, http_response, clean_response=None, headers=None): # When the clean response is available, use that body to calculate the # cache key. It has been cleaned (removed request paths and QS parameters) # so it has a higher chance of being equal to other responses / being # already in the cache if clean_response is not None: body = clean_response.body else: body = http_response.body cache_key = '%s%s' % (smart_str_ignore(body), headers) cache_key = quick_hash(cache_key) result = self._cache.get(cache_key, None) if result is not None: return result result = get_response_cache_key(http_response, clean_response=clean_response, headers=headers) self._cache[cache_key] = result return result
def is_csrf_token(self, key, value): """ Entropy based algorithm http://en.wikipedia.org/wiki/Password_strength """ min_length = 5 max_length = 512 min_entropy = 2.4 # Check length if len(value) <= min_length: return False if len(value) > max_length: # I have never seen a CSRF token longer than 256 bytes, # doubling that and checking to make sure we don't check # parameters which are files in multipart uploads or stuff # like that return False # Check for common CSRF token names for common_csrf_name in COMMON_CSRF_NAMES: if common_csrf_name.lower() in key.lower(): return True # Calculate entropy entropy = self.shannon_entropy(smart_str_ignore(value)) if entropy >= min_entropy: return True return False
def log_enabled_plugins(self, plugins_dict, options_dict): """ This method is called from the output manager object. This method should take an action for the enabled plugins and their configuration. Usually, write the info to a file or print it somewhere. :param plugins_dict: A dict with all the plugin types and the enabled plugins for that type of plugin. :param options_dict: A dict with the options for every plugin. """ now = time.localtime(time.time()) the_time = time.strftime("%c", now) timestamp = '[ %s - Enabled plugins ] ' % the_time to_print = '' for plugin_type in plugins_dict: to_print += self._create_plugin_info(plugin_type, plugins_dict[plugin_type], options_dict[plugin_type]) # And now the target information str_targets = ', '.join( smart_str_ignore(u.url_string) for u in cf.cf.get('targets')) to_print += 'target\n' to_print += ' set target ' + str_targets + '\n' to_print += ' back' to_print = to_print.replace('\n', '\n' + timestamp) + '\n' self._write_to_file(timestamp + to_print)
def clean_fuzzable_request_form(fuzzable_request, dc_handler=clean_data_container): """ This function will extract data from the fuzzable request and serialize it. The main goal of this function is to return a "unique representation" of how the HTTP request looks like WITHOUT including the URL. Related with https://github.com/andresriancho/w3af/issues/15970 :param fuzzable_request: The fuzzable request instance to clean """ # Method res = [fuzzable_request.get_method().upper()] # Type raw_data = fuzzable_request.get_raw_data() res.append(raw_data.get_type()) # Query string parameters uri = fuzzable_request.get_uri() if uri.has_query_string(): res.append(dc_handler(uri.querystring)) else: res.append('') # Post-data parameters if raw_data: res.append(dc_handler(raw_data)) else: res.append('') return '|'.join([smart_str_ignore(s) for s in res])
def is_csrf_token(self, key, value): """ Entropy based algorithm http://en.wikipedia.org/wiki/Password_strength """ min_length = 5 max_length = 512 min_entropy = 2.4 # Check length if len(value) <= min_length: return False if len(value) > max_length: # I have never seen a CSRF token longer than 256 bytes, # doubling that and checking to make sure we don't check # parameters which are files in multipart uploads or stuff # like that return False # Check for common CSRF token names for common_csrf_name in COMMON_CSRF_NAMES: if common_csrf_name.lower() in key.lower(): return True # Calculate entropy entropy = self.shannon_entropy(smart_str_ignore(value)) if entropy >= min_entropy: return True return False
def _parse_xml(self, original_value): """ Parse the XML into an object :param original_value: The XML as sent by the application :return: The XML object or None if parsing failed """ # This is a safety measure to prevent us from loading large XML files # into memory (high memory usage) or loading a very complex xml which # might require a lot of CPU time if len(original_value) > 1024 * 1024: return None # Secure, don't introduce XXE in our XXE detection plugin ;-) parser = etree.XMLParser(load_dtd=False, no_network=True, resolve_entities=False) try: xml_root = etree.fromstring(smart_str_ignore(original_value), parser=parser) except Exception, e: msg = 'Failed to parse XML to inject XXE tests. Exception was: "%s"' om.out.debug(msg % e) return None
def _upload_file(self, domain_path, rand_file, debugging_id): """ Upload the file using author.dll :param domain_path: http://localhost/f00/ :param rand_file: <random>.html """ # TODO: The frontpage version should be obtained from the information # saved in the kb by the infrastructure.frontpage_version plugin! # # The 4.0.2.4715 version should be dynamic! version = '4.0.2.4715' file_path = domain_path.get_path() + rand_file data = POST_BODY % (version, file_path) data += rand_file[::-1] data = smart_str_ignore(data) target_url = self._get_author_url() try: res = self._uri_opener.POST(target_url, data=data, debugging_id=debugging_id) except BaseFrameworkException, e: om.out.debug('Exception while uploading file using author.dll: %s' % e) return None
def _get_all_parameters(self, request): """ :param request: The HTTP request :yield: All the HTTP request parameters as tuples of (name, value) """ headers = request.get_headers() query_string = request.get_uri().get_querystring() dc = dc_from_hdrs_post(headers, request.get_data()) cookie_str, _ = headers.iget('cookie', '') cookie_dc = Cookie(cookie_str) token_generators = itertools.chain(query_string.iter_tokens(), dc.iter_tokens(), headers.iter_tokens(), cookie_dc.iter_tokens()) for token in token_generators: token_name = token.get_name() token_value = token.get_value() token_value = smart_str_ignore(token_value) yield token_name, token_value # Handle the case where the parameter is base64 encoded is_b64, decoded_data = maybe_decode_base64(token_value) if is_b64: yield token_name, decoded_data
def get_response_cache_key(http_response, clean_response=None, headers=None): """ Note: query.body has been cleaned by get_clean_body() :param http_response: The HTTP response we want to get a cache key for :param clean_response: The FourOhFourResponse associated with the HTTPResponse passed as parameter (optional, will be calculated if not provided) :param headers: A string containing the HTTP response headers that have to be used to calculate the hash :return: Hash of the HTTP response body """ headers = '' or headers # # Only some HTTP responses benefit from the XML-bones signature # if _should_use_xml_bones(http_response): body = get_xml_bones(http_response.get_body()) normalized_path = FourOhFourResponse.normalize_path( http_response.get_uri()) else: # # Get a clean_response if it was not provided # if clean_response is None: clean_response = FourOhFourResponse.from_http_response( http_response) body = clean_response.body normalized_path = clean_response.normalized_path # # Calculate the hash using all the captured information # key = ''.join([ str(http_response.get_code()), smart_str_ignore(normalized_path), str(headers), smart_str_ignore(body) ]) return quick_hash(key)
def get_hash(self, exclude_headers=None): exclude_headers = [] or exclude_headers headers = self.dump_response_head(exclude_headers=exclude_headers) body = smart_str_ignore(self.get_body()) args = (headers, body) dump = '%s%s' % args return self._quick_hash(dump)
def found_at(self): """ Return a string representing WHAT was fuzzed. This string is used like this: - v.set_desc('SQL injection was found at: ' + mutant.found_at()) """ dc = self.get_dc() dc_short = dc.get_short_printable_repr() token = dc.get_token() msg = '"%s", using HTTP method %s. The sent data was: "%s"' msg %= (smart_str_ignore(self.get_url()), smart_str_ignore(self.get_method()), smart_str_ignore(dc_short)) if token is not None: msg += ' The modified parameter was "%s".' % smart_str_ignore(token.get_name()) return msg
def found_at(self): """ Return a string representing WHAT was fuzzed. This string is used like this: - v.set_desc('SQL injection was found at: ' + mutant.found_at()) """ dc = self.get_dc() dc_short = dc.get_short_printable_repr() token = dc.get_token() msg = '"%s", using HTTP method %s. The sent data was: "%s"' msg %= (smart_str_ignore(self.get_url()), smart_str_ignore(self.get_method()), smart_str_ignore(dc_short)) if token is not None: msg += ' The modified parameter was "%s".' % smart_str_ignore( token.get_name()) return msg
def set_token(self, set_token_path): """ Sets the token in the DataContainer to point to the variable specified in set_token_path. Usually set_token_path will be one of: * ('id',) - When the data container doesn't support repeated params * ('id', 3) - When it does * A DataToken instance which holds the path :raises: An exception when the DataContainer does NOT contain the specified path in *args to find the variable :return: The token if we were able to set it in the DataContainer """ override_token = False try: # Try to get the path from the parameter, if it is a DataToken # instance this will succeed. token_path = set_token_path.get_path() override_token = True except AttributeError: token_path = set_token_path for key, val, i_token_path, setter in self.iter_setters(): if i_token_path == token_path: if override_token: # Use token provided in parameter token = set_token_path elif isinstance(val, DataToken): # We've already done a set_token(...) for this token path # in the past, and now we're doing it again. Don't double # wrap the pre-existing token! token = val else: token = DataToken(key, val, i_token_path) setter(token) self.token = token return token path_str = lambda path: '(%s)' % ', '.join( [smart_str_ignore(i) for i in path]) ppath = path_str(token_path) vpath = ' - '.join([path_str(p) for _, _, p, _ in self.iter_setters()]) if vpath: msg = 'Invalid token path "%s". Valid paths are: %s' raise RuntimeError(msg % (ppath, vpath)) else: msg = 'Invalid token path "%s". No valid paths for "%s"' raise RuntimeError(msg % (ppath, self.get_type()))
def _save_response_to_file(self, response): # Note: The file needs to have .js extension to force retirejs to # scan it. Any other extension will be ignored. response_file = tempfile.NamedTemporaryFile(prefix='retirejs-response-', suffix='.w3af.js', delete=False, dir=self._get_js_temp_directory()) body = smart_str_ignore(response.get_body()) response_file.write(body) response_file.close() return response_file.name
def set_token(self, set_token_path): """ Sets the token in the DataContainer to point to the variable specified in set_token_path. Usually set_token_path will be one of: * ('id',) - When the data container doesn't support repeated params * ('id', 3) - When it does * A DataToken instance which holds the path :raises: An exception when the DataContainer does NOT contain the specified path in *args to find the variable :return: The token if we were able to set it in the DataContainer """ override_token = False try: # Try to get the path from the parameter, if it is a DataToken # instance this will succeed. token_path = set_token_path.get_path() override_token = True except AttributeError: token_path = set_token_path for key, val, i_token_path, setter in self.iter_setters(): if i_token_path == token_path: if override_token: # Use token provided in parameter token = set_token_path elif isinstance(val, DataToken): # We've already done a set_token(...) for this token path # in the past, and now we're doing it again. Don't double # wrap the pre-existing token! token = val else: token = DataToken(key, val, i_token_path) setter(token) self.token = token return token path_str = lambda path: '(%s)' % ', '.join([smart_str_ignore(i) for i in path]) ppath = path_str(token_path) vpath = ' - '.join([path_str(p) for _, _, p, _ in self.iter_setters()]) if vpath: msg = 'Invalid token path "%s". Valid paths are: %s' raise RuntimeError(msg % (ppath, vpath)) else: msg = 'Invalid token path "%s". No valid paths for "%s"' raise RuntimeError(msg % (ppath, self.get_type()))
def _analyze_domain(self, response, script_full_url, script_tag): """ Checks if the domain is the same, or if it's considered secure. """ url = response.get_url() script_domain = script_full_url.get_domain() if script_domain == response.get_url().get_domain(): return for secure_domain in self._secure_js_domains: # We do a "in" because the secure js domains list contains # entries such as ".google." which should be match. This is to # take into account things like ".google.com.br" without having # to list all of them. # # Not the best, could raise some false negatives, but... bleh! if secure_domain in script_domain: # It's a third party that we trust return to_highlight = script_tag.attrib.get('src') desc = ('The URL: "%s" has a script tag with a source that points' ' to a third party site ("%s"). This practice is not' ' recommended, the security of the current site is being' ' delegated to the external entity.') desc %= (smart_str_ignore(url), smart_str_ignore(script_domain)) i = Info('Cross-domain javascript source', desc, response.id, self.get_name()) i.set_url(url) i.add_to_highlight(to_highlight) i[CrossDomainInfoSet.ITAG] = script_domain self.kb_append_uniq_group(self, 'cross_domain_js', i, group_klass=CrossDomainInfoSet)
def diff(a, b): """ :param a: A string :param b: A string (similar to a) :return: Two strings (a_mod, b_mod) which are basically: a_mod = a - (a intersection b) b_mod = b - (a intersection b) Or if you want to see it in another way, the results are the parts of the string that make it unique between each other. """ a = smart_str_ignore(a) b = smart_str_ignore(b) dmp = dmp_module.diff_match_patch() dmp.Diff_Timeout = MAX_DIFF_TIME changes = dmp.diff_main(a, b, checklines=True) dmp.diff_cleanupSemantic(changes) a_changes = [] b_changes = [] for op, change in changes: if op == -1: a_changes.append(change) if op == 1: b_changes.append(change) a_changes = '\n'.join(a_changes) b_changes = '\n'.join(b_changes) return a_changes, b_changes
def filter_non_printable(_str): chars = [] for c in smart_str_ignore(_str): if is_printable_chr(c): chars.append(c) else: if not chars: chars.append(NON_PRINTABLE_REPLACE) elif chars[-1] != NON_PRINTABLE_REPLACE: chars.append(NON_PRINTABLE_REPLACE) return ''.join(chars)
def filter_non_printable(_str): chars = [] for c in smart_str_ignore(_str): if is_printable_chr(c): chars.append(c) else: if not chars: chars.append(NON_PRINTABLE_REPLACE) elif chars[-1] != NON_PRINTABLE_REPLACE: chars.append(NON_PRINTABLE_REPLACE) return ''.join(chars)
def _analyze_domain(self, response, script_full_url, script_tag): """ Checks if the domain is the same, or if it's considered secure. """ url = response.get_url() script_domain = script_full_url.get_domain() if script_domain == response.get_url().get_domain(): return for secure_domain in self._secure_js_domains: # We do a "in" because the secure js domains list contains # entries such as ".google." which should be match. This is to # take into account things like ".google.com.br" without having # to list all of them. # # Not the best, could raise some false negatives, but... bleh! if secure_domain in script_domain: # It's a third party that we trust return to_highlight = script_tag.attrib.get('src') desc = ('The URL: "%s" has a script tag with a source that points' ' to a third party site ("%s"). This practice is not' ' recommended, the security of the current site is being' ' delegated to the external entity.') desc %= (smart_str_ignore(url), smart_str_ignore(script_domain)) i = Info('Cross-domain javascript source', desc, response.id, self.get_name()) i.set_url(url) i.add_to_highlight(to_highlight) i[CrossDomainInfoSet.ITAG] = script_domain self.kb_append_uniq_group(self, 'cross_domain_js', i, group_klass=CrossDomainInfoSet)
def _analyze_domain(self, response, script_full_url, script_tag): """ Checks if the domain is the same, or if it's considered secure. """ response_url = response.get_url() script_domain = script_full_url.get_domain() if script_domain == response_url.get_domain(): return for _ in self._secure_domain_multi_in.query(script_domain): # Query the multi in to check if any if the domains we loaded # previously match against the script domain we found in the # HTML. # # It's a third party that we trust return to_highlight = script_tag.attrib.get('src') desc = ('The URL: "%s" has a script tag with a source that points' ' to a third party site ("%s"). This practice is not' ' recommended, the security of the current site is being' ' delegated to the external entity.') desc %= (smart_str_ignore(response_url), smart_str_ignore(script_domain)) i = Info('Cross-domain javascript source', desc, response.id, self.get_name()) i.set_url(response_url) i.add_to_highlight(to_highlight) i[CrossDomainInfoSet.ITAG] = script_domain self.kb_append_uniq_group(self, 'cross_domain_js', i, group_klass=CrossDomainInfoSet)
def _get_cache_key(self, mutant): # # Get the cache key for this mutant # method = mutant.get_method() uri = mutant.get_uri() data = mutant.get_data() headers = mutant.get_all_headers() cache_key_parts = [method, uri, data, headers] cache_key_str = ''.join([smart_str_ignore(i) for i in cache_key_parts]) m = hashlib.md5() m.update(cache_key_str) return m.hexdigest()
def _get_cache_key(self, mutant): # # Get the cache key for this mutant # method = mutant.get_method() uri = mutant.get_uri() data = mutant.get_data() headers = mutant.get_all_headers() cache_key_parts = [method, uri, data, headers] cache_key_str = ''.join([smart_str_ignore(i) for i in cache_key_parts]) m = hashlib.md5() m.update(cache_key_str) return m.hexdigest()
def _analyze_strange(self, request, response, ref, token_name, token_value): if self._is_strange(request, token_name, token_value): desc = ('The URI: "%s" has a parameter named: "%s" with value:' ' "%s", which is very uncommon. and requires manual' ' verification.') args = (response.get_uri(), token_name, token_value) args = tuple(smart_str_ignore(i) for i in args) desc %= args i = Info('Uncommon query string parameter', desc, response.id, self.get_name()) i['parameter_value'] = token_value i.add_to_highlight(token_value) i.set_uri(ref) self.kb_append(self, 'strange_parameters', i) return True return False
def _analyze_strange(self, request, response, ref, token_name, token_value): if self._is_strange(request, token_name, token_value): desc = ('The URI: "%s" has a parameter named: "%s" with value:' ' "%s", which is very uncommon. and requires manual' ' verification.') args = (response.get_uri(), token_name, token_value) args = tuple(smart_str_ignore(i) for i in args) desc %= args i = Info('Uncommon query string parameter', desc, response.id, self.get_name()) i['parameter_value'] = token_value i.add_to_highlight(token_value) i.set_uri(ref) self.kb_append(self, 'strange_parameters', i) return True return False
def _parse_xml(self, param_name, original_value): """ Parse the XML into an object :param param_name: The name of the parameter as seen by the HTML parser :param original_value: The XML as sent by the application :return: The XML object or None if parsing failed """ # This is a safety measure to prevent us from loading large XML files # into memory (high memory usage) or loading a very complex xml which # might require a lot of CPU time if len(original_value) > 1024 * 1024: return None try: original_value_str = smart_str_ignore(original_value) except Exception, e: msg = ('Failed to encode unicode original value to string' ' in _parse_xml(). Exception: "%s"') om.out.debug(msg % e) return None
def unique_everseen_hash(iterable): """ List unique elements, preserving order. Remember all elements ever seen, storing the hash of the element instead of the element itself. This will reduce the memory usage in the case where the element is large (an HTTP response body for example). Recommendation: The iterable should generate strings / unicode. """ seen = set() for element in iterable: m = hashlib.md5() m.update(smart_str_ignore(element)) element_hash = m.digest() if element_hash in seen: continue seen.add(element_hash) yield element
def _parse_xml(self, original_value): """ Parse the XML into an object :param original_value: The XML as sent by the application :return: The XML object or None if parsing failed """ # This is a safety measure to prevent us from loading large XML files # into memory (high memory usage) or loading a very complex xml which # might require a lot of CPU time if len(original_value) > 1024 * 1024: return None # Secure, don't introduce XXE in our XXE detection plugin ;-) parser = etree.XMLParser(load_dtd=False, no_network=True, resolve_entities=False) try: xml_root = etree.fromstring(smart_str_ignore(original_value), parser=parser) except Exception, e: msg = 'Failed to parse XML to inject XXE tests. Exception was: "%s"' om.out.debug(msg % e) return None
def sent(self, needle): """ Checks if something similar to `needle` was sent in the request. This is used to remove false positives, e.g. if a grep plugin finds a "strange" string and wants to be sure it was not generated by an audit plugin. This method should only be used by grep plugins which often have false positives. The following example shows that we sent d'z"0 but d\'z"0 will as well be recognised as sent Note on performance: At some point I thought about making all these calls lazy: needles.add(unquote(needle)) needles.add(quote(needle)) needles.add(quote_plus(needle)) needles.add(self.make_comp(needle)) needles.add(self.make_comp(unquote(needle))) To avoid the potentially unnecessary call to self.make_comp(...) if the needle was found in a haystack before, making the result of self.make_comp(...) unnecessary. That would help, but the impact in real life is really small, since in most scenarios this method will return False, which means that all the comparisons need to be done anyways. :param needle: The string :return: True if something similar was sent """ needle = smart_str_ignore(needle) needles = set() needles.add(needle) needles.add(unquote(needle)) needles.add(quote(needle)) needles.add(quote_plus(needle)) needles.add(self.make_comp(needle)) needles.add(self.make_comp(unquote(needle))) # Filter the short needles # # We don't want false negatives just because the string is # short after making comparable needles = {n for n in needles if len(n) >= 3} uri = self.get_uri() data = smart_str_ignore(self.get_data()) headers = smart_str_ignore(self.get_all_headers()) haystacks = set() haystacks.add(smart_str_ignore(uri)) haystacks.add(smart_str_ignore(uri.url_decode())) haystacks.add(self.make_comp(smart_str_ignore(uri.url_decode()))) haystacks.add(data) haystacks.add(unquote(data)) haystacks.add(self.make_comp(data)) haystacks.add(self.make_comp(unquote(data))) haystacks.add(headers) haystacks.add(unquote(headers)) # Filter the short haystacks haystacks = {h for h in haystacks if len(h) >= 3} haystack = '--'.join(haystacks) for needle in needles: if needle in haystack: return True # I didn't send the needle in any way return False
def _find_bsql(self, mutant, statement_tuple, statement_type): """ Is the main algorithm for finding blind SQL injections. :return: A vulnerability object or None if nothing is found """ # shortcuts true_statement = statement_tuple[0] false_statement = statement_tuple[1] send_clean = self._uri_opener.send_clean debugging_id = self.get_debugging_id() mutant.set_token_value(true_statement) true_response, body_true_response = send_clean(mutant, debugging_id=debugging_id, grep=True) mutant.set_token_value(false_statement) false_response, body_false_response = send_clean(mutant, debugging_id=debugging_id, grep=False) if body_true_response == body_false_response: msg = ('There is NO CHANGE between the true and false responses.' ' NO WAY w3af is going to detect a blind SQL injection' ' using response diffs in this case.') self.debug(msg, mutant=mutant) return None compare_diff = False msg = 'Comparing body_true_response and body_false_response.' self.debug(msg, statement_type=statement_type, mutant=mutant, response_1=true_response, response_2=false_response) if self.equal_with_limit(body_true_response, body_false_response, compare_diff): # # They might be equal because of various reasons, in the best # case scenario there IS a blind SQL injection but the % of the # HTTP response body controlled by it is so small that the equal # ratio is not catching it. # self.debug('Setting compare_diff to True', mutant=mutant) compare_diff = True mutant.set_token_value(self.SYNTAX_ERROR) syntax_error_response, body_syntax_error_response = send_clean(mutant, debugging_id=debugging_id, grep=False) msg = 'Comparing body_true_response and body_syntax_error_response.' self.debug(msg, statement_type=statement_type, mutant=mutant, response_1=true_response, response_2=syntax_error_response) if self.equal_with_limit(body_true_response, body_syntax_error_response, compare_diff): return None # Check if its a search engine before we dig any deeper... search_disambiguator = self._remove_all_special_chars(true_statement) mutant.set_token_value(search_disambiguator) search_response, body_search_response = send_clean(mutant, grep=False, debugging_id=debugging_id) # If they are equal then we have a search engine msg = 'Comparing body_true_response and body_search_response.' self.debug(msg, statement_type=statement_type, mutant=mutant, response_1=true_response, response_2=search_response) if self.equal_with_limit(body_true_response, body_search_response, compare_diff): return None # Verify the injection! statements = self._get_statements(mutant) second_true_stm = statements[statement_type][0] second_false_stm = statements[statement_type][1] mutant.set_token_value(second_true_stm) second_true_response, body_second_true_response = send_clean(mutant, grep=False, debugging_id=debugging_id) mutant.set_token_value(second_false_stm) second_false_response, body_second_false_response = send_clean(mutant, grep=False, debugging_id=debugging_id) msg = 'Comparing body_second_true_response and body_true_response.' self.debug(msg, statement_type=statement_type, mutant=mutant, response_1=true_response, response_2=second_true_response) if not self.equal_with_limit(body_second_true_response, body_true_response, compare_diff): return None msg = 'Comparing body_second_false_response and body_false_response.' self.debug(msg, statement_type=statement_type, mutant=mutant, response_1=false_response, response_2=second_false_response) if self.equal_with_limit(body_second_false_response, body_false_response, compare_diff): response_ids = [second_false_response.id, second_true_response.id] desc = ('Blind SQL injection was found at: "%s", using' ' HTTP method %s. The injectable parameter is: "%s"') desc %= (smart_str_ignore(mutant.get_url()), smart_str_ignore(mutant.get_method()), smart_str_ignore(mutant.get_token_name())) v = Vuln.from_mutant('Blind SQL injection vulnerability', desc, severity.HIGH, response_ids, 'blind_sqli', mutant) om.out.debug(v.get_desc()) v['type'] = statement_type v['true_html'] = second_true_response.get_body() v['false_html'] = second_false_response.get_body() v['error_html'] = syntax_error_response.get_body() return v return None
def _analyze_result(self, mutant, response): """ Analyze results of the _send_mutant method. Try to find the local file inclusions. """ # # I will only report the vulnerability once. # if self._has_bug(mutant): return # # Identify the vulnerability # for file_pattern_match in self._find_common_file_fragments(response): if file_pattern_match not in mutant.get_original_response_body(): desc = 'Local File Inclusion was found at: %s' desc %= mutant.found_at() v = Vuln.from_mutant('Local file inclusion vulnerability', desc, severity.MEDIUM, response.id, self.get_name(), mutant) v['file_pattern'] = file_pattern_match v.add_to_highlight(file_pattern_match) self.kb_append_uniq(self, 'lfi', v) return # # If the vulnerability could not be identified by matching strings that # commonly appear in "/etc/passwd", then I'll check one more thing... # (note that this is run if no vulns were identified) # # http://host.tld/show_user.php?id=show_user.php # # The calls to smart_str_ignore fix a UnicodeDecoreError which appears when # the token value is a binary string which can't be converted to unicode. # This happens, for example, when trying to upload JPG files to a multipart form # # >>> u'' in '\x80' # ... # UnicodeDecodeError: 'ascii' codec can't decode byte 0x80 in position 0: ordinal not in range(128) # filename = smart_str_ignore(mutant.get_url().get_file_name()) token_value = smart_str_ignore(mutant.get_token_value()) if filename in token_value: match, lang = contains_source_code(response) if match: # We were able to read the source code of the file that is # vulnerable to local file read desc = ('An arbitrary local file read vulnerability was' ' found at: %s') desc %= mutant.found_at() v = Vuln.from_mutant('Local file inclusion vulnerability', desc, severity.MEDIUM, response.id, self.get_name(), mutant) # # Set which part of the source code to match # match_source_code = match.group(0) v['file_pattern'] = match_source_code self.kb_append_uniq(self, 'lfi', v) return # # Check for interesting errors (note that this is run if no vulns were # identified) # body = response.get_body() for _, error_str, _ in self.file_read_error_multi_re.query(body): if error_str not in mutant.get_original_response_body(): desc = 'A file read error was found at: %s' desc %= mutant.found_at() i = Info.from_mutant('File read error', desc, response.id, self.get_name(), mutant) i.add_to_highlight(error_str) self.kb_append_uniq(self, 'error', i)
def inspect_data_to_log(self, pool, inspect_data): """ Print the inspect_threads data to the log files def get_state(self): return {'func_name': self.func_name, 'args': self.args, 'kwargs': self.kwargs, 'start_time': self.start_time, 'idle': self.is_idle(), 'job': self.job, 'worker_id': self.id} :return: None """ name = pool.worker_names if not len(inspect_data): self.write_to_log('No pool workers at %s.' % (name,)) return # # Write the detailed information # idle_workers = [] for worker_state in inspect_data: if worker_state['idle']: idle_workers.append(worker_state) continue if worker_state['start_time'] is None: continue spent = time.time() - worker_state['start_time'] # Save us some disk space and sanity, only log worker state if it has # been running for at least 10 seconds if spent < 10: continue parts = [] for arg in worker_state['args']: try: arg_repr = repr(arg) except UnicodeEncodeError: arg_str = smart_str_ignore(arg) else: arg_str = smart_str_ignore(arg_repr) if len(arg_str) > 80: arg_str = arg_str[:80] + "...'" parts.append(arg_str) args_str = ', '.join(parts) short_kwargs = {} for key, value in worker_state['kwargs']: try: value_repr = repr(value) except UnicodeEncodeError: value_str = smart_str_ignore(value) else: value_str = smart_str_ignore(value_repr) if len(value_str) > 80: value_str = value_str[:80] + "...'" short_kwargs[key] = value_str kwargs_str = smart_str_ignore(short_kwargs) func_name = smart_str_ignore(worker_state['func_name']) func_name = self.clean_function_name(func_name) message = ('Worker with ID %s(%s) has been running job %s for %.2f seconds.' ' The job is: %s(%s, kwargs=%s)') message %= (worker_state['name'], worker_state['worker_id'], worker_state['job'], spent, func_name, args_str, kwargs_str) trace = worker_state.get('trace', None) if trace is not None: message += '. Function call tree: %s' % trace self.write_to_log(message) # # Write the idle workers all together at the end, this makes # the log easier to read # for worker_state in idle_workers: message = 'Worker with ID %s(%s) is idle.' message %= (worker_state['name'], worker_state['worker_id']) self.write_to_log(message) # # Write some stats # total_workers = len(inspect_data) idle_workers = 0.0 for worker_state in inspect_data: if worker_state['idle']: idle_workers += 1 idle_perc = (idle_workers / total_workers) * 100 self.write_to_log('%i%% of %s workers are idle.' % (idle_perc, name))
def to_string(self): """ :return: An xml node (as a string) representing the HTTP request / response. <http-transaction id="..."> <http-request> <status></status> <headers> <header> <field></field> <content></content> </header> </headers> <body content-encoding="base64"></body> </http-request> <http-response> <status></status> <headers> <header> <field></field> <content></content> </header> </headers> <body content-encoding="base64"></body> </http-response> </http-transaction> One of the differences this class has with the previous implementation is that the body is always encoded, no matter the content-type. This helps prevent encoding issues. """ # Get the data from the cache node = self.get_node_from_cache() if node is not None: return node # HistoryItem to get requests/responses req_history = HistoryItem() # This might raise a DBException in some cases (which I still # need to identify and fix). When an exception is raised here # the caller needs to handle it by ignoring this part of the # HTTP transaction request, response = req_history.load_from_file(self._id) data = request.get_data() or '' b64_encoded_request_body = base64.encodestring(smart_str_ignore(data)) body = response.get_body() or '' b64_encoded_response_body = base64.encodestring(smart_str_ignore(body)) context = {'id': self._id, 'request': {'status': request.get_request_line().strip(), 'headers': request.get_headers(), 'body': b64_encoded_request_body}, 'response': {'status': response.get_status_line().strip(), 'headers': response.get_headers(), 'body': b64_encoded_response_body}} context = dotdict(context) template = self.get_template(self.TEMPLATE) transaction = template.render(context) self.save_node_to_cache(transaction) return transaction
def _find_bsql(self, mutant, statement_tuple, statement_type): """ Is the main algorithm for finding blind SQL injections. :return: A vulnerability object or None if nothing is found """ # shortcuts true_statement = statement_tuple[0] false_statement = statement_tuple[1] send_clean = self._uri_opener.send_clean debugging_id = self.get_debugging_id() mutant.set_token_value(true_statement) true_response, body_true_response = send_clean(mutant, debugging_id=debugging_id, grep=True) mutant.set_token_value(false_statement) false_response, body_false_response = send_clean(mutant, debugging_id=debugging_id, grep=False) if body_true_response == body_false_response: msg = ('There is NO CHANGE between the true and false responses.' ' NO WAY w3af is going to detect a blind SQL injection' ' using response diffs in this case.') self.debug(msg, mutant=mutant) return None compare_diff = False msg = 'Comparing body_true_response and body_false_response.' self.debug(msg, statement_type=statement_type, mutant=mutant, response_1=true_response, response_2=false_response) if self.equal_with_limit(body_true_response, body_false_response, compare_diff): # # They might be equal because of various reasons, in the best # case scenario there IS a blind SQL injection but the % of the # HTTP response body controlled by it is so small that the equal # ratio is not catching it. # self.debug('Setting compare_diff to True', mutant=mutant) compare_diff = True mutant.set_token_value(self.SYNTAX_ERROR) syntax_error_response, body_syntax_error_response = send_clean(mutant, debugging_id=debugging_id, grep=False) msg = 'Comparing body_true_response and body_syntax_error_response.' self.debug(msg, statement_type=statement_type, mutant=mutant, response_1=true_response, response_2=syntax_error_response) if self.equal_with_limit(body_true_response, body_syntax_error_response, compare_diff): return None # Check if its a search engine before we dig any deeper... search_disambiguator = self._remove_all_special_chars(true_statement) mutant.set_token_value(search_disambiguator) search_response, body_search_response = send_clean(mutant, grep=False, debugging_id=debugging_id) # If they are equal then we have a search engine msg = 'Comparing body_true_response and body_search_response.' self.debug(msg, statement_type=statement_type, mutant=mutant, response_1=true_response, response_2=search_response) if self.equal_with_limit(body_true_response, body_search_response, compare_diff): return None # Now a nice trick from real-life. In some search engines when # searching for `46" OR "46"="46" OR "46"="46` we get only a # couple of results, which I assume is because the search # engine is trying to search for more terms. # # Removing the special characters will make w3af search for # `46 OR 46 46 OR 46 46`, which yields many results in # the application's search engine, which I assume is because the # search engine just needs to match objects with 46 / OR. # # So, this means that the responses ARE different, but they came # from a search engine. The check above is NOT going to catch that # and will yield a false positive. # # If this is not a search engine, or is a search engine with a blind # sql injection, the result with `46" OR "46"="46" OR "46"="46` should # be have a larger HTTP response body: "all results" should be there. # # If it is a search engine, then the result for the search string # without special characters will be larger. if len(body_search_response) * 0.8 > len(body_true_response): msg = 'Search engine detected using response length, stop.' self.debug(msg, statement_type=statement_type, mutant=mutant, response_1=true_response, response_2=search_response) return None # Verify the injection! statements = self._get_statements(mutant) second_true_stm = statements[statement_type][0] second_false_stm = statements[statement_type][1] mutant.set_token_value(second_true_stm) second_true_response, body_second_true_response = send_clean(mutant, grep=False, debugging_id=debugging_id) mutant.set_token_value(second_false_stm) second_false_response, body_second_false_response = send_clean(mutant, grep=False, debugging_id=debugging_id) msg = 'Comparing body_second_true_response and body_true_response.' self.debug(msg, statement_type=statement_type, mutant=mutant, response_1=true_response, response_2=second_true_response) if not self.equal_with_limit(body_second_true_response, body_true_response, compare_diff): return None msg = 'Comparing body_second_false_response and body_false_response.' self.debug(msg, statement_type=statement_type, mutant=mutant, response_1=false_response, response_2=second_false_response) if not self.equal_with_limit(body_second_false_response, body_false_response, compare_diff): return None response_ids = [second_false_response.id, second_true_response.id] desc = ('Blind SQL injection was found at: "%s", using' ' HTTP method %s. The injectable parameter is: "%s"') desc %= (smart_str_ignore(mutant.get_url()), smart_str_ignore(mutant.get_method()), smart_str_ignore(mutant.get_token_name())) v = Vuln.from_mutant('Blind SQL injection vulnerability', desc, severity.HIGH, response_ids, 'blind_sqli', mutant) om.out.debug(v.get_desc()) self.debug(v.get_desc(), statement_type=statement_type, mutant=mutant, response_1=false_response, response_2=second_false_response) v['type'] = statement_type v['true_html'] = second_true_response.get_body() v['false_html'] = second_false_response.get_body() v['error_html'] = syntax_error_response.get_body() return v
def inspect_data_to_log(self, pool, inspect_data): """ Print the inspect_threads data to the log files def get_state(self): return {'func_name': self.func_name, 'args': self.args, 'kwargs': self.kwargs, 'start_time': self.start_time, 'idle': self.is_idle(), 'job': self.job, 'worker_id': self.id} :return: None """ name = pool.worker_names if not len(inspect_data): self.write_to_log('No pool workers at %s.' % (name, )) return # # Write the detailed information # idle_workers = [] for worker_state in inspect_data: if worker_state['idle']: idle_workers.append(worker_state) continue spent = time.time() - worker_state['start_time'] # Save us some disk space and sanity, only log worker state if it has # been running for at least 10 seconds if spent < 10: continue args_str = ', '.join( smart_str_ignore(repr(arg)) for arg in worker_state['args']) kwargs_str = smart_str_ignore(worker_state['kwargs']) func_name = smart_str_ignore(worker_state['func_name']) func_name = self.clean_function_name(func_name) message = ( 'Worker with ID %s(%s) has been running job %s for %.2f seconds.' ' The job is: %s(%s, kwargs=%s)') message %= (worker_state['name'], worker_state['worker_id'], worker_state['job'], spent, func_name, args_str, kwargs_str) trace = worker_state.get('trace', None) if trace is not None: message += '. Function call tree: %s' % trace self.write_to_log(message) # # Write the idle workers all together at the end, this makes # the log easier to read # for worker_state in idle_workers: message = 'Worker with ID %s(%s) is idle.' message %= (worker_state['name'], worker_state['worker_id']) self.write_to_log(message) # # Write some stats # total_workers = len(inspect_data) idle_workers = 0.0 for worker_state in inspect_data: if worker_state['idle']: idle_workers += 1 idle_perc = (idle_workers / total_workers) * 100 self.write_to_log('%i%% of %s workers are idle.' % (idle_perc, name))
def sent(self, needle): """ Checks if something similar to `needle` was sent in the request. This is used to remove false positives, e.g. if a grep plugin finds a "strange" string and wants to be sure it was not generated by an audit plugin. This method should only be used by grep plugins which often have false positives. The following example shows that we sent d'z"0 but d\'z"0 will as well be recognised as sent Note on performance: At some point I thought about making all these calls lazy: needles.add(unquote(needle)) needles.add(quote(needle)) needles.add(quote_plus(needle)) needles.add(self.make_comp(needle)) needles.add(self.make_comp(unquote(needle))) To avoid the potentially unnecessary call to self.make_comp(...) if the needle was found in a haystack before, making the result of self.make_comp(...) unnecessary. That would help, but the impact in real life is really small, since in most scenarios this method will return False, which means that all the comparisons need to be done anyways. :param needle: The string :return: True if something similar was sent """ needle = smart_str_ignore(needle) needles = set() needles.add(needle) needles.add(unquote(needle)) needles.add(quote(needle)) needles.add(quote_plus(needle)) needles.add(self.make_comp(needle)) needles.add(self.make_comp(unquote(needle))) # Filter the short needles # # We don't want false negatives just because the string is # short after making comparable needles = {n for n in needles if len(n) >= 3} uri = self.get_uri() data = smart_str_ignore(self.get_data()) headers = smart_str_ignore(self.get_all_headers()) haystacks = set() haystacks.add(smart_str_ignore(uri)) haystacks.add(smart_str_ignore(uri.url_decode())) haystacks.add(self.make_comp(smart_str_ignore(uri.url_decode()))) haystacks.add(data) haystacks.add(unquote(data)) haystacks.add(self.make_comp(data)) haystacks.add(self.make_comp(unquote(data))) haystacks.add(headers) haystacks.add(unquote(headers)) # Filter the short haystacks haystacks = {h for h in haystacks if len(h) >= 3} for needle in needles: for haystack in haystacks: if needle in haystack: return True # I didn't send the needle in any way return False
def should_grep(self, request, response): """ :return: True if I should grep this request/response pair. This method replaces some of the logic that before was in grep_plugin.py, but because of the requirement of a central location to store a bloom filter was moved here. """ if not self._consumer_plugins: return False self._print_should_grep_stats() # This cache is here to avoid a query to the cf each time a request # goes to a grep plugin. Given that in the future the cf will be a # sqlite database, this is an important improvement. if self._target_domains is None: self._target_domains = cf.cf.get('target_domains') if response.get_url().get_domain() not in self._target_domains: self._should_grep_stats['reject-out-of-scope'] += 1 return False # # This prevents responses for the same URL from being analyze twice # # Sometimes the HTTP responses vary in one byte, which will completely # break the filter we have implemented below (it uses a hash for # the response headers and xml-bones body). # # This filter is less effective, mainly during the audit phase where the # plugins are heavily changing the query-string, but will prevent some HTTP # requests and responses from making it to the grep plugins # if not self._already_analyzed_url.add(response.get_uri()): self._should_grep_stats['reject-seen-url'] += 1 return False # # This prevents the same HTTP response from being analyze twice # # The great majority of grep plugins analyze HTTP response bodies, # some analyze HTTP response headers, and a very small subset analyzes # HTTP requests. Based on these facts it was possible to add these # lines to prevent the same HTTP response from being analyzed twice. # # One of the options I had was to use get_response_cache_key() below, # to prevent double processing of HTTP response bodies, but that # strategy had more chances of "hiding" some HTTP responses from grep # plugins: # # * HTTP response A contains header set X and body Y. It will be # processed because it is the first time body Y is seen. # # * HTTP response A contains header set Z and body Y. It will be # ignored because Y was already seen. # # So I decided to use both the headers and body. The filter might be # degraded on sites that use HTTP response headers that contain dates # or some other value that changes a lot, this issue was reduced by # using EXCLUDE_HEADERS_FOR_HASH # headers = response.dump_headers( exclude_headers=self.EXCLUDE_HEADERS_FOR_HASH) headers = smart_str_ignore(headers) # # Note that using cached_get_response_cache_key() here gives a performance # boost, this cache uses the HTTP response body and headers (at least some) # as a key. In initial tests using this cache strategy made the # `test_should_grep_speed` unittest go from 26 to 9 seconds. # response_hash = self._response_cache_key_cache.get_response_cache_key( response, headers=headers) if not self._already_analyzed_body.add(response_hash): self._should_grep_stats['reject-seen-body'] += 1 return False self._should_grep_stats['accept'] += 1 return True
def _find_bsql(self, mutant, statement_tuple, statement_type): """ Is the main algorithm for finding blind SQL injections. :return: A vulnerability object or None if nothing is found """ # shortcuts true_statement = statement_tuple[0] false_statement = statement_tuple[1] send_clean = self._uri_opener.send_clean debugging_id = self.get_debugging_id() mutant.set_token_value(true_statement) true_response, body_true_response = send_clean(mutant, debugging_id=debugging_id, grep=True) mutant.set_token_value(false_statement) false_response, body_false_response = send_clean(mutant, debugging_id=debugging_id, grep=False) if body_true_response == body_false_response: msg = ('There is NO CHANGE between the true and false responses.' ' NO WAY w3af is going to detect a blind SQL injection' ' using response diffs in this case.') self.debug(msg, mutant=mutant) return None compare_diff = False msg = 'Comparing body_true_response and body_false_response.' self.debug(msg, statement_type=statement_type, mutant=mutant, response_1=true_response, response_2=false_response) if self.equal_with_limit(body_true_response, body_false_response, compare_diff): # # They might be equal because of various reasons, in the best # case scenario there IS a blind SQL injection but the % of the # HTTP response body controlled by it is so small that the equal # ratio is not catching it. # self.debug('Setting compare_diff to True', mutant=mutant) compare_diff = True mutant.set_token_value(self.SYNTAX_ERROR) syntax_error_response, body_syntax_error_response = send_clean(mutant, debugging_id=debugging_id, grep=False) msg = 'Comparing body_true_response and body_syntax_error_response.' self.debug(msg, statement_type=statement_type, mutant=mutant, response_1=true_response, response_2=syntax_error_response) if self.equal_with_limit(body_true_response, body_syntax_error_response, compare_diff): return None # Check if its a search engine before we dig any deeper... search_disambiguator = self._remove_all_special_chars(true_statement) mutant.set_token_value(search_disambiguator) search_response, body_search_response = send_clean(mutant, grep=False, debugging_id=debugging_id) # If they are equal then we have a search engine msg = 'Comparing body_true_response and body_search_response.' self.debug(msg, statement_type=statement_type, mutant=mutant, response_1=true_response, response_2=search_response) if self.equal_with_limit(body_true_response, body_search_response, compare_diff): return None # Now a nice trick from real-life. In some search engines when # searching for `46" OR "46"="46" OR "46"="46` we get only a # couple of results, which I assume is because the search # engine is trying to search for more terms. # # Removing the special characters will make w3af search for # `46 OR 46 46 OR 46 46`, which yields many results in # the application's search engine, which I assume is because the # search engine just needs to match objects with 46 / OR. # # So, this means that the responses ARE different, but they came # from a search engine. The check above is NOT going to catch that # and will yield a false positive. # # If this is not a search engine, or is a search engine with a blind # sql injection, the result with `46" OR "46"="46" OR "46"="46` should # be have a larger HTTP response body: "all results" should be there. # # If it is a search engine, then the result for the search string # without special characters will be larger. if len(body_search_response) * 0.8 > len(body_true_response): msg = 'Search engine detected using response length, stop.' self.debug(msg, statement_type=statement_type, mutant=mutant, response_1=true_response, response_2=search_response) return None # Verify the injection! statements = self._get_statements(mutant) second_true_stm = statements[statement_type][0] second_false_stm = statements[statement_type][1] mutant.set_token_value(second_true_stm) second_true_response, body_second_true_response = send_clean(mutant, grep=False, debugging_id=debugging_id) mutant.set_token_value(second_false_stm) second_false_response, body_second_false_response = send_clean(mutant, grep=False, debugging_id=debugging_id) msg = 'Comparing body_second_true_response and body_true_response.' self.debug(msg, statement_type=statement_type, mutant=mutant, response_1=true_response, response_2=second_true_response) if not self.equal_with_limit(body_second_true_response, body_true_response, compare_diff): return None msg = 'Comparing body_second_false_response and body_false_response.' self.debug(msg, statement_type=statement_type, mutant=mutant, response_1=false_response, response_2=second_false_response) if not self.equal_with_limit(body_second_false_response, body_false_response, compare_diff): return None response_ids = [second_false_response.id, second_true_response.id] desc = ('Blind SQL injection was found at: "%s", using' ' HTTP method %s. The injectable parameter is: "%s"') desc %= (smart_str_ignore(mutant.get_url()), smart_str_ignore(mutant.get_method()), smart_str_ignore(mutant.get_token_name())) v = Vuln.from_mutant('Blind SQL injection vulnerability', desc, severity.HIGH, response_ids, 'blind_sqli', mutant) om.out.debug(v.get_desc()) self.debug(v.get_desc(), statement_type=statement_type, mutant=mutant, response_1=false_response, response_2=second_false_response) v['type'] = statement_type v['true_html'] = second_true_response.get_body() v['false_html'] = second_false_response.get_body() v['error_html'] = syntax_error_response.get_body() return v
def _analyze_result(self, mutant, response): """ Analyze results of the _send_mutant method. Try to find the local file inclusions. """ # # I will only report the vulnerability once. # if self._has_bug(mutant): return # # Identify the vulnerability # for file_pattern_match in self._find_common_file_fragments(response): if file_pattern_match not in mutant.get_original_response_body(): desc = 'Local File Inclusion was found at: %s' desc %= mutant.found_at() v = Vuln.from_mutant('Local file inclusion vulnerability', desc, severity.MEDIUM, response.id, self.get_name(), mutant) v['file_pattern'] = file_pattern_match v.add_to_highlight(file_pattern_match) self.kb_append_uniq(self, 'lfi', v) return # # If the vulnerability could not be identified by matching strings that # commonly appear in "/etc/passwd", then I'll check one more thing... # (note that this is run if no vulns were identified) # # http://host.tld/show_user.php?id=show_user.php # # The calls to smart_str_ignore fix a UnicodeDecoreError which appears when # the token value is a binary string which can't be converted to unicode. # This happens, for example, when trying to upload JPG files to a multipart form # # >>> u'' in '\x80' # ... # UnicodeDecodeError: 'ascii' codec can't decode byte 0x80 in position 0: ordinal not in range(128) # filename = smart_str_ignore(mutant.get_url().get_file_name()) token_value = smart_str_ignore(mutant.get_token_value()) if filename in token_value: match, lang = contains_source_code(response) if match: # We were able to read the source code of the file that is # vulnerable to local file read desc = ('An arbitrary local file read vulnerability was' ' found at: %s') desc %= mutant.found_at() v = Vuln.from_mutant('Local file inclusion vulnerability', desc, severity.MEDIUM, response.id, self.get_name(), mutant) # # Set which part of the source code to match # match_source_code = match.group(0) v['file_pattern'] = match_source_code self.kb_append_uniq(self, 'lfi', v) return # # Check for interesting errors (note that this is run if no vulns were # identified) # body = response.get_body() for _, error_str, _ in self.file_read_error_multi_re.query(body): if error_str not in mutant.get_original_response_body(): desc = 'A file read error was found at: %s' desc %= mutant.found_at() i = Info.from_mutant('File read error', desc, response.id, self.get_name(), mutant) i.add_to_highlight(error_str) self.kb_append_uniq(self, 'error', i)
def get_body_hash(self): body = smart_str_ignore(self.get_body()) return self._quick_hash(body)
def to_string(self): """ :return: An xml node (as a string) representing the HTTP request / response. <http-transaction id="..."> <http-request> <status></status> <headers> <header> <field></field> <content></content> </header> </headers> <body content-encoding="base64"></body> </http-request> <http-response> <status></status> <headers> <header> <field></field> <content></content> </header> </headers> <body content-encoding="base64"></body> </http-response> </http-transaction> One of the differences this class has with the previous implementation is that the body is always encoded, no matter the content-type. This helps prevent encoding issues. """ # Get the data from the cache node = self.get_node_from_cache() if node is not None: return node # HistoryItem to get requests/responses req_history = HistoryItem() # This might raise a DBException in some cases (which I still # need to identify and fix). When an exception is raised here # the caller needs to handle it by ignoring this part of the # HTTP transaction request, response = req_history.load_from_file(self._id) data = request.get_data() or '' b64_encoded_request_body = base64.encodestring(smart_str_ignore(data)) body = response.get_body() or '' b64_encoded_response_body = base64.encodestring(smart_str_ignore(body)) context = { 'id': self._id, 'request': { 'status': request.get_request_line().strip(), 'headers': request.get_headers(), 'body': b64_encoded_request_body }, 'response': { 'status': response.get_status_line().strip(), 'headers': response.get_headers(), 'body': b64_encoded_response_body } } context = dotdict(context) template = self.get_template(self.TEMPLATE) transaction = template.render(context) self.save_node_to_cache(transaction) return transaction
def quick_hash(text): text = smart_str_ignore(text) return '%s%s' % (hash(text), zlib.adler32(text))
def _find_bsql(self, mutant, statement_tuple, statement_type): """ Is the main algorithm for finding blind SQL injections. :return: A vulnerability object or None if nothing is found """ # shortcuts true_statement = statement_tuple[0] false_statement = statement_tuple[1] send_clean = self._uri_opener.send_clean debugging_id = self.get_debugging_id() mutant.set_token_value(true_statement) _, body_true_response = send_clean(mutant, debugging_id=debugging_id, grep=True) mutant.set_token_value(false_statement) _, body_false_response = send_clean(mutant, debugging_id=debugging_id, grep=False) if body_true_response == body_false_response: msg = ('There is NO CHANGE between the true and false responses.' ' NO WAY w3af is going to detect a blind SQL injection' ' using response diffs in this case.') self.debug(msg, mutant) return None compare_diff = False self.debug( '[%s] Comparing body_true_response and' ' body_false_response.' % statement_type, mutant) if self.equal_with_limit(body_true_response, body_false_response, compare_diff): # # They might be equal because of various reasons, in the best # case scenario there IS a blind SQL injection but the % of the # HTTP response body controlled by it is so small that the equal # ratio is not catching it. # self.debug('Setting compare_diff to True', mutant) compare_diff = True mutant.set_token_value(self.SYNTAX_ERROR) syntax_error_response, body_syntax_error_response = send_clean( mutant, debugging_id=debugging_id, grep=False) self.debug( '[%s] Comparing body_true_response and' ' body_syntax_error_response.' % statement_type, mutant) if self.equal_with_limit(body_true_response, body_syntax_error_response, compare_diff): return None # Check if its a search engine before we dig any deeper... search_disambiguator = self._remove_all_special_chars(true_statement) mutant.set_token_value(search_disambiguator) _, body_search_response = send_clean(mutant, grep=False, debugging_id=debugging_id) # If they are equal then we have a search engine self.debug( '[%s] Comparing body_true_response and' ' body_search_response.' % statement_type, mutant) if self.equal_with_limit(body_true_response, body_search_response, compare_diff): return None # Verify the injection! statements = self._get_statements(mutant) second_true_stm = statements[statement_type][0] second_false_stm = statements[statement_type][1] mutant.set_token_value(second_true_stm) second_true_response, body_second_true_response = send_clean( mutant, grep=False, debugging_id=debugging_id) mutant.set_token_value(second_false_stm) second_false_response, body_second_false_response = send_clean( mutant, grep=False, debugging_id=debugging_id) self.debug( '[%s] Comparing body_second_true_response and' ' body_true_response.' % statement_type, mutant) if not self.equal_with_limit(body_second_true_response, body_true_response, compare_diff): return None self.debug( '[%s] Comparing body_second_false_response and' ' body_false_response.' % statement_type, mutant) if self.equal_with_limit(body_second_false_response, body_false_response, compare_diff): response_ids = [second_false_response.id, second_true_response.id] desc = 'Blind SQL injection was found at: "%s", using'\ ' HTTP method %s. The injectable parameter is: "%s"' desc %= (smart_str_ignore(mutant.get_url()), smart_str_ignore(mutant.get_method()), smart_str_ignore(mutant.get_token_name())) v = Vuln.from_mutant('Blind SQL injection vulnerability', desc, severity.HIGH, response_ids, 'blind_sqli', mutant) om.out.debug(v.get_desc()) v['type'] = statement_type v['true_html'] = second_true_response.get_body() v['false_html'] = second_false_response.get_body() v['error_html'] = syntax_error_response.get_body() return v return None