Example #1
    def batch_injection_test(self, freq, orig_response):
        Uses the batch injection technique to find memcache injections
        # shortcuts
        send_clean = self._uri_opener.send_clean
        orig_body = orig_response.get_body()

        for mutant in create_mutants(freq, ['']):

            # trying to break normal execution flow with ERROR_1 payload
            error_1_response, body_error_1_response = send_clean(mutant)

            if fuzzy_equal(orig_body, body_error_1_response, self._eq_limit):
                # if we manage to break execution flow, there is a potential
                # injection otherwise - no injection!

            # trying the correct injection request, to confirm that we've found
            # it!
            ok_response, body_ok_response = send_clean(mutant)

            if fuzzy_equal(body_error_1_response, body_ok_response,
                # The "OK" and "ERROR_1" responses are equal, this means that
                # we're not in a memcached injection

            # ERROR_2 request to just make sure that we're in a memcached case
            error_2_response, body_error_2_response = send_clean(mutant)

            if fuzzy_equal(orig_body, body_error_2_response, self._eq_limit):
                # now requests should be different again, otherwise injection
                # is not confirmed

            response_ids = [error_1_response.id,

            desc = ('Memcache injection was found at: "%s", using'
                    ' HTTP method %s. The injectable parameter is: "%s"')
            desc %= (mutant.get_url(),

            v = Vuln.from_mutant('Memcache injection vulnerability', desc,
                                 severity.HIGH, response_ids, 'memcachei',

            self.kb_append_uniq(self, 'memcachei', v)
Example #2
    def matches(self, query):
        This method is used to check if the `query` HTTP response body matches
        the failed login page instance.

        :param query: An HTTP response body
        :return: True if the `query` response body is equal to the failed login
                 bodies which were received in __init__().
        if self.body_a == query:
            return True

        if self.body_b == query:
            return True

        if not fuzzy_equal(self.body_a, query, 0.60):
            # They are really different, no need to calculate diff()
            return False

        if self.diff_a_b is None:
            self.diff_a_b, _ = chunked_diff(self.body_a, self.body_b)

        _, diff_query_a = chunked_diff(self.body_a, query)

        # Had to add this in order to prevent issues with CSRF tokens, which
        # might be part of the HTTP response body, are random (not removed by
        # clean_body) and will "break" the diff
        if len(diff_query_a) < 64:
            return True

        if fuzzy_equal(self.diff_a_b, diff_query_a, 0.9):
            return True

        return False
Example #3
    def batch_injection_test(self, freq, orig_response):
        Uses the batch injection technique to find memcache injections
        # shortcuts
        send_clean = self._uri_opener.send_clean
        orig_body = orig_response.get_body()

        for mutant in create_mutants(freq, ['']):

            # trying to break normal execution flow with ERROR_1 payload
            error_1_response, body_error_1_response = send_clean(mutant)

            if fuzzy_equal(orig_body, body_error_1_response, self._eq_limit):
                # if we manage to break execution flow, there is a potential
                # injection otherwise - no injection!

            # trying the correct injection request, to confirm that we've found
            # it!
            ok_response, body_ok_response = send_clean(mutant)

            if fuzzy_equal(body_error_1_response, body_ok_response,
                # The "OK" and "ERROR_1" responses are equal, this means that
                # we're not in a memcached injection

            # ERROR_2 request to just make sure that we're in a memcached case
            error_2_response, body_error_2_response = send_clean(mutant)

            if fuzzy_equal(orig_body, body_error_2_response, self._eq_limit):
                # now requests should be different again, otherwise injection
                # is not confirmed

            response_ids = [
                error_1_response.id, ok_response.id, error_2_response.id

            desc = ('Memcache injection was found at: "%s", using'
                    ' HTTP method %s. The injectable parameter is: "%s"')
            desc %= (mutant.get_url(), mutant.get_method(),

            v = Vuln.from_mutant('Memcache injection vulnerability', desc,
                                 severity.HIGH, response_ids, 'memcachei',

            self.kb_append_uniq(self, 'memcachei', v)
def fuzzy_equal_for_diff(diff_x, diff_y, is_equal_ratio):
    Does a quick estimation to determine if the two strings (diff_x and diff_y)
    are fuzzy equal.

    Not using fuzzy_equal() to compare results of applying diff() because of
    CSRF tokens and other randomly generated tokens which were breaking the

    This function removes those randomly generated strings and then does the

    :param diff_x: Result of running diff() on responses A and B
    :param diff_y: Result of running diff() on responses B and C
    :param is_equal_ratio: The ratio to use when comparing the responses (0 to 1)
    :return: True if the two results of applying the diff() function are
             fuzzy equal (applying split_by_sep technique)
    split_x = split_by_sep(diff_x)
    split_y = split_by_sep(diff_y)

    split_x = remove_hashes(split_x)
    split_y = remove_hashes(split_y)

    x = '\n'.join(split_x)
    y = '\n'.join(split_y)

    return fuzzy_equal(x, y, threshold=is_equal_ratio)
    def _is_404_with_extra_request(self, http_response, clean_resp_body):
        Performs a very simple check to verify if this response is a 404 or not.

        It takes the original URL and modifies it by pre-pending a "not-" to the
        filename, then performs a request to that URL and compares the original
        response with the modified one. If they are equal then the original
        request is a 404.

        :param http_response: The original HTTP response
        :param clean_resp_body: The original HTML body you could find in
                                http_response after passing it by a cleaner

        :return: True if the original response was a 404 !
        response_url = http_response.get_url()
        filename = response_url.get_file_name()
        if not filename:
            relative_url = '../%s/' % rand_alnum(8)
            url_404 = response_url.url_join(relative_url)
            relative_url = self._generate_404_filename(filename)
            url_404 = response_url.copy()

        response_404 = self._send_404(url_404)
        clean_response_404_body = get_clean_body(response_404)

        if response_404.get_code() == 404 and \
        url_404.get_domain_path() not in self._directory_uses_404_codes:

        return fuzzy_equal(clean_response_404_body, clean_resp_body,
Example #6
    def _is_404_with_extra_request(self, http_response, clean_resp_body):
        Performs a very simple check to verify if this response is a 404 or not.

        It takes the original URL and modifies it by pre-pending a "not-" to the
        filename, then performs a request to that URL and compares the original
        response with the modified one. If they are equal then the original
        request is a 404.

        :param http_response: The original HTTP response
        :param clean_resp_body: The original HTML body you could find in
                                http_response after passing it by a cleaner

        :return: True if the original response was a 404 !
        response_url = http_response.get_url()
        filename = response_url.get_file_name()
        if not filename:
            relative_url = '../%s/' % rand_alnum(8)
            url_404 = response_url.url_join(relative_url)
            relative_url = self._generate_404_filename(filename)
            url_404 = response_url.copy()

        response_404 = self._send_404(url_404)
        clean_response_404_body = get_clean_body(response_404)

        if response_404.get_code() == 404 and \
        url_404.get_domain_path() not in self._directory_uses_404_codes:

        return fuzzy_equal(clean_response_404_body, clean_resp_body,
Example #7
def fuzzy_equal_for_diff(diff_x, diff_y, is_equal_ratio):
    Does a quick estimation to determine if the two strings (diff_x and diff_y)
    are fuzzy equal.

    Not using fuzzy_equal() to compare results of applying diff() because of
    CSRF tokens and other randomly generated tokens which were breaking the

    This function removes those randomly generated strings and then does the

    :param diff_x: Result of running diff() on responses A and B
    :param diff_y: Result of running diff() on responses B and C
    :param is_equal_ratio: The ratio to use when comparing the responses (0 to 1)
    :return: True if the two results of applying the diff() function are
             fuzzy equal (applying split_by_sep technique)
    if diff_x == diff_y:
        return True

    split_x = split_by_sep(diff_x)
    split_y = split_by_sep(diff_y)

    split_x = remove_hashes(split_x)
    split_y = remove_hashes(split_y)

    x = '\n'.join(split_x)
    y = '\n'.join(split_y)

    return fuzzy_equal(x, y, threshold=is_equal_ratio)
Example #8
    def _filter_errors(self, result, filename):
        Filter out ugly php errors and print a simple "Permission denied"
        or "File not found"
        #print filename
        error = None

        if result.count('Permission denied'):
            error = PERMISSION_DENIED
        elif result.count('No such file or directory in'):
            error = NO_SUCH_FILE
        elif result.count('Not a directory in'):
            error = READ_DIRECTORY
        elif result.count(': failed to open stream: '):
            error = FAILED_STREAM

        elif self._file_not_found_str is not None:
            # The result string has the file I requested inside, so I'm going
            # to remove it.
            clean_result = result.replace(filename, '')

            # Now I compare both strings, if they are VERY similar, then
            # filename is a non existing file.
            if fuzzy_equal(self._file_not_found_str, clean_result, 0.9):
                error = NO_SUCH_FILE

        #    I want this function to return an empty string on errors.
        #    Not the error itself.
        if error is not None:
            return ''

        return result
Example #9
    def clean_404_response_db(self):
        During the scan, and because I chose to remove the very broad 404
        database lock, the 404 response database might become untidy: the same
        HTTP response might be appended to the DB multiple times.

        An untidy DB triggers more comparisons between HTTP responses, which
        is CPU-intensive.

        This method cleans the DB every N calls to reduce any duplicates.

        :return: None. The extended DB is modified.
        self._clean_404_response_db_calls += 1

        if self._clean_404_response_db_calls % CLEAN_DB_EVERY != 0:

        removed_items = 0
        extended_404_response_copy = copy.copy(self._extended_404_responses)

        for i in extended_404_response_copy:
            for j in extended_404_response_copy:

                if i is j:

                if not fuzzy_equal(i.body, j.body, IS_EQUAL_RATIO):

                # i (or something really similar) already exists in
                # self._extended_404_responses, no need to compare any further
                # just remove it and continue with the next
                except ValueError:
                    # The 404 response DB might have been changed by another thread
                    msg = (
                        'Removed 404 response for "%s" (id: %s) from the 404 DB'
                        ' because it matches 404 response "%s" (id: %s)')
                    args = (i.url, i.id, j.url, j.id)
                    om.out.debug(msg % args)

                    removed_items += 1


        msg = 'Called clean 404 response DB. Removed %s duplicates from DB.'
        args = (removed_items, )
        om.out.debug(msg % args)

        msg = 'The extended 404 response DB contains responses with IDs: %s'
        args = (', '.join(
            str(r.id) for r in copy.copy(self._extended_404_responses)))
        om.out.debug(msg % args)
Example #10
    def equal_with_limit(self, body1, body2, compare_diff=False):
        Determines if two pages are equal using a ratio, if compare_diff is set
        then we just compare the parts of the response bodies which are different.
        if compare_diff:
            body1, body2 = diff(body1, body2)

        cmp_res = fuzzy_equal(body1, body2, self._eq_limit)
        return cmp_res
Example #11
    def _response_is_different(self, vhost_response, orig_resp_body, non_existent_responses):
        Note that we use 0.35 in fuzzy_equal because we want the responses to be
        *really different*.

        :param vhost_response: The HTTP response body for the virtual host
        :param orig_resp_body: The original HTTP response body
        :param non_existent_responses: One or more HTTP responses for virtual hosts
                                       that do not exist in the remote server
        :return: True if vhost_response is different from orig_resp_body and non_existent_responses
        if fuzzy_equal(vhost_response.get_body(), orig_resp_body, 0.35):
            return False

        for ner in non_existent_responses:
            if fuzzy_equal(vhost_response.get_body(), ner.get_body(), 0.35):
                return False

        return True
Example #12
    def equal_with_limit(self, body1, body2, compare_diff=False):
        Determines if two pages are equal using a ratio, if compare_diff is set
        then we just compare the parts of the response bodies which are different.
        if compare_diff:
            body1, body2 = chunked_diff(body1, body2)

        cmp_res = fuzzy_equal(body1, body2, self._eq_limit)
        return cmp_res
Example #13
    def _is_resp_equal(self, res1, res2):
        @see: unittest for this method in test_csrf.py
        if res1.get_code() != res2.get_code():
            return False

        if not fuzzy_equal(res1.body, res2.body, self._equal_limit):
            return False

        return True
Example #14
 def _matches_failed_login(self, resp_body, login_failed_result_list):
     :return: True if the resp_body matches the previously created
              responses that are stored in login_failed_result_list.
     for login_failed_result in login_failed_result_list:
         if fuzzy_equal(resp_body, login_failed_result, 0.65):
             return True
         # I'm happy! The response_body *IS NOT* a failed login page.
         return False
Example #15
 def _matches_failed_login(self, resp_body, login_failed_result_list):
     :return: True if the resp_body matches the previously created
              responses that are stored in login_failed_result_list.
     for login_failed_result in login_failed_result_list:
         if fuzzy_equal(resp_body, login_failed_result, 0.65):
             return True
         # I'm happy! The response_body *IS NOT* a failed login page.
         return False
    def test_17092(self):
        nginx_404 = ('<html>\n'
                     '<head><title>404 Not Found</title></head>\n'
                     '<body bgcolor="white">\n'
                     '<center><h1>404 Not Found</h1></center>\n'

        itest = 'itest'

        # 0.9 is from fingerprint_404.py
        self.assertFalse(fuzzy_equal(nginx_404, itest, 0.9))
Example #17
    def _is_resp_equal(self, response_1, response_2):
        :param response_1: HTTP response 1
        :param response_2: HTTP response 2
        :see: unittest for this method in test_csrf.py
        if response_1.get_code() != response_2.get_code():
            return False

        if not fuzzy_equal(response_1.body, response_2.body,
            return False

        return True
    def equal_with_limit(self, body1, body2, compare_diff=False):
        Determines if two pages are equal using a ratio.
        start = time.time()

        if compare_diff:
            body1, body2 = diff(body1, body2)

        cmp_res = fuzzy_equal(body1, body2, self._eq_limit)

        are = 'ARE' if cmp_res else 'ARE NOT'
        args = (are, self._eq_limit)
        self.debug('Strings %s similar enough (limit: %s)' % args)

        spent = time.time() - start
        self.debug('Took %.2f seconds to run equal_with_limit' % spent)

        return cmp_res
Example #19
    def equal_with_limit(self, body1, body2, compare_diff=False):
        Determines if two pages are equal using a ratio.
        start = time.time()

        if compare_diff:
            body1, body2 = diff(body1, body2)

        cmp_res = fuzzy_equal(body1, body2, self._eq_limit)

        are = 'ARE' if cmp_res else 'ARE NOT'
        args = (are, self._eq_limit)
        self.debug('Strings %s similar enough (limit: %s)' % args)

        spent = time.time() - start
        self.debug('Took %.2f seconds to run equal_with_limit' % spent)

        return cmp_res
    def test_all(self):
        acceptance_tests = []
        acceptance_tests.append(('a', 'a', 1.0))
        acceptance_tests.append(('a', 'a', 0.1))
        acceptance_tests.append(('a', 'a', 0.0))

        acceptance_tests.append(('a', 'b', 1.0))
        acceptance_tests.append(('a', 'b', 0.1))
        acceptance_tests.append(('a', 'b', 0.0))

        acceptance_tests.append(('a', 'ab', 1.0))
        acceptance_tests.append(('a', 'ab', 0.1))

        acceptance_tests.append(('a', 'b', 0.0000000000000000001))
        acceptance_tests.append(('a', 'b' * 100, 1.0))

        acceptance_tests.append(('a', 'ab', 0.66666666666))
        acceptance_tests.append(('a', 'aab', 0.5))
        acceptance_tests.append(('a', 'aaab', 0.4))
        acceptance_tests.append(('a', 'aaaab', 0.33333333333333333333333333333333333333333333333333333333))

        acceptance_tests.append(('a' * 25, 'a', 1.0))
        acceptance_tests.append(('aaa', 'aa', 1.0))
        acceptance_tests.append(('a', 'a', 1.0))

        acceptance_tests.append(('a' * 25, 'a', 0.076923076923076927))
        acceptance_tests.append(('aaa', 'aa', 0.8))

        acceptance_tests.append(('a', 'a', 0.0))

        for e, d, f in acceptance_tests:
            res1 = fuzzy_equal(e, d, f)
            res2 = relative_distance(e, d) >= f
            msg = ('fuzzy_equal and relative_distance returned'
                   ' different results for the same parameters:\n'
                   '    - Parameter #1: %s\n'
                   '    - Parameter #2: %s\n'
                   '    - Threshold: %s\n'
                   '    - Result fuzzy_equal: %s\n'
                   '    - Result relative_distance: %s\n')
            self.assertEqual(res1, res2, msg % (e, d, f, res1, relative_distance(e, d)))
Example #21
    def _find_OS(self, fuzzable_request):
        Analyze responses and determine if remote web server runs on windows
        or *nix.

        @Return: None, the knowledge is saved in the knowledgeBase
        freq_url = fuzzable_request.get_url()
        filename = freq_url.get_file_name()
        dirs = freq_url.get_directories()[:-1]  # Skipping "domain level" dir.

        if dirs and filename:

            last_url = dirs[-1]
            last_url = last_url.url_string

            windows_url = URL(last_url[0:-1] + '\\' + filename)
            windows_response = self._uri_opener.GET(windows_url)

            original_response = self._uri_opener.GET(freq_url)

            if fuzzy_equal(original_response.get_body(),
                                    windows_response.get_body(), 0.98):
                desc = 'Fingerprinted this host as a Microsoft Windows system.'
                os_str = 'windows'
                desc = 'Fingerprinted this host as a *nix system. Detection for'\
                       ' this operating system is weak, "if not windows then'\
                       ' linux".'
                os_str = 'unix'

            response_ids = [windows_response.id, original_response.id]
            i = Info('Operating system', desc, response_ids,
            kb.kb.raw_write(self, 'operating_system_str', os_str)
            kb.kb.append(self, 'operating_system', i)
            return True

        return False
Example #22
    def _find_OS(self, fuzzable_request):
        Analyze responses and determine if remote web server runs on windows
        or *nix.

        @Return: None, the knowledge is saved in the knowledgeBase
        freq_url = fuzzable_request.get_url()
        filename = freq_url.get_file_name()
        dirs = freq_url.get_directories()[:-1]  # Skipping "domain level" dir.

        if dirs and filename:

            last_url = dirs[-1]
            last_url = last_url.url_string

            windows_url = URL(last_url[0:-1] + '\\' + filename)
            windows_response = self._uri_opener.GET(windows_url)

            original_response = self._uri_opener.GET(freq_url)

            if fuzzy_equal(original_response.get_body(),
                           windows_response.get_body(), 0.98):
                desc = 'Fingerprinted this host as a Microsoft Windows system.'
                os_str = 'windows'
                desc = 'Fingerprinted this host as a *nix system. Detection for' \
                       ' this operating system is weak, "if not windows then' \
                       ' linux".'
                os_str = 'unix'

            response_ids = [windows_response.id, original_response.id]
            i = Info('Operating system', desc, response_ids,

            kb.kb.raw_write(self, 'operating_system_str', os_str)
            kb.kb.append(self, 'operating_system', i)
            return True

        return False
Example #23
    def _test_ip_address(self, original_response, domain):
        Check if http://ip(domain)/ == http://domain/
            ip_address = socket.gethostbyname(domain)
        except socket.error:

        url = original_response.get_url()
        ip_url = url.copy()

            modified_response = self._uri_opener.GET(ip_url, cache=True)
        except BaseFrameworkException as bfe:
            msg = ('An error occurred while fetching IP address URL in '
                   ' dns_wildcard plugin: "%s"')
            om.out.debug(msg % bfe)

        if is_no_content_response(modified_response):

        if fuzzy_equal(modified_response.get_body(), original_response.get_body(), 0.35):

        desc = 'The contents of %s and %s differ.'
        args = (modified_response.get_uri(), original_response.get_uri())
        desc %= args

        i = Info('Default virtual host',

        kb.kb.append(self, 'dns_wildcard', i)
Example #24
    def _filter_errors(self, result, filename):
        Filter out ugly php errors and print a simple "Permission denied"
        or "File not found"
        #print filename
        error = None

        if result.count('Permission denied'):
            error = PERMISSION_DENIED
        elif result.count('No such file or directory in'):
            error = NO_SUCH_FILE
        elif result.count('Not a directory in'):
            error = READ_DIRECTORY
        elif result.count(': failed to open stream: '):
            error = FAILED_STREAM

        elif self._application_file_not_found_error is not None:
            # The result string has the file I requested inside, so I'm going
            # to remove it.
            clean_result = result.replace(filename, '')

            # Now I compare both strings, if they are VERY similar, then
            # filename is a non existing file.
            if fuzzy_equal(self._application_file_not_found_error,
                                    clean_result, 0.9):
                error = NO_SUCH_FILE

        #    I want this function to return an empty string on errors.
        #    Not the error itself.
        if error is not None:
            return ''

        return result
Example #25
    def audit(self, freq, orig_response, debugging_id):
        Check if the protocol specified in freq is https and fetch the same URL
        using http. ie:
            - input: https://w3af.org/
            - check: http://w3af.org/

        :param freq: A FuzzableRequest
        :param orig_response: The HTTP response associated with the fuzzable request
        :param debugging_id: A unique identifier for this call to audit()
        if not self._should_run:

        initial_uri = freq.get_uri()
        if initial_uri.get_port() not in {80, 443}:
            # We get here then the original URL looks like http://foo:3921/
            # It's really strange (maybe not even possible?) to find a server
            # that listens for HTTP and HTTPS connections on the same port,
            # since we don't want to guess the port, nor generate errors such
            # as #8871 we just ignore this case
            self._should_run = False

        # Define some variables
        insecure_uri = initial_uri.copy()
        secure_uri = initial_uri.copy()

        insecure_fr = copy.deepcopy(freq)

        secure_fr = copy.deepcopy(freq)

        # Make sure that we disable error handling during these tests, we want
        # the requests to fail quickly and without affecting the library's error
        # rate
        send_mutant = self._uri_opener.send_mutant
        kwargs = {'grep': False, 'error_handling': False}

            insecure_response = send_mutant(insecure_fr, **kwargs)
            secure_response = send_mutant(secure_fr, **kwargs)
        except (HTTPRequestException, ScanMustStopException):
            # No vulnerability to report since one of these threw an error
            # (because there is nothing listening on that port). It makes
            # no sense to keep running since we already got an error
            self._should_run = False

            if insecure_response is None or secure_response is None:
                # No vulnerability to report since one of these threw an
                # error (because there is nothing listening on that port).
                # It makes no sense to keep running since we already got an
                # error
                self._should_run = False

            if self._redirects_to_secure(insecure_response, secure_response):

            if insecure_response.get_code() == secure_response.get_code()\
            and fuzzy_equal(insecure_response.get_body(),

                desc = ('Secure content can be accessed using the insecure'
                        ' HTTP protocol. The vulnerable URLs used to verify'
                        ' this vulnerability are:\n'
                        ' - %s\n'
                        ' - %s\n')
                desc %= (secure_uri, insecure_uri)

                response_ids = [insecure_response.id, secure_response.id]

                v = Vuln.from_fr('Secure content over insecure channel',
                                 desc, severity.MEDIUM, response_ids,
                                 self.get_name(), freq)

                self.kb_append(self, 'un_ssl', v)

                # In most cases, when one resource is available, all are
                # so we just stop searching for this vulnerability
                self._should_run = False
Example #26
    def generate_404_knowledge(self, url):
        Based on a URL, request something that we know is going to be a 404.
        Afterwards analyze the 404's and summarise them.

        :return: A list with 404 bodies.
        #    This is the case when nobody has properly configured
        #    the object in order to use it.
        if self._uri_opener is None:
            msg = ('404 fingerprint database was incorrectly initialized.'
                   ' URL opener is None.')
            raise RuntimeError(msg)

        # Get the filename extension and create a 404 for it
        extension = url.get_extension()
        domain_path = url.get_domain_path()

        #   This is a list of the most common handlers, in some configurations,
        #   the 404 depends on the handler, so I want to make sure that I catch
        #   the 404 for each one
        handlers = {'py', 'php', 'asp', 'aspx', 'do', 'jsp', 'rb', 'action',
                    'gif', 'htm', 'pl', 'cgi', 'xhtml', 'htmls', 'foobar'}
        if extension:

        test_urls = []

        for handler_ext in handlers:
            rand_alnum_file = rand_alnum(8) + '.' + handler_ext
            url404 = domain_path.url_join(rand_alnum_file)

        # Also keep in mind that in some cases we don't have an extension, so
        # we need to create a URL with just a filename
        if not extension:
            rand_alnum_file = rand_alnum(8)
            url404 = domain_path.url_join(rand_alnum_file)

        imap_unordered = self._worker_pool.imap_unordered
        not_exist_resp_lst = []
        for not_exist_resp in imap_unordered(self._send_404, test_urls):
            four_oh_data = FourOhFourResponseFactory(not_exist_resp)

            # Populate the self._directory_uses_404_codes with the information
            # we just retrieved from the application
            if not_exist_resp.get_code() == 404:

                url_404 = not_exist_resp.get_uri()

                path_extension = (url_404.get_domain_path(),

                if path_extension not in self._directory_uses_404_codes:

        # I have the 404 responses in not_exist_resp_lst, but maybe they
        # all look the same, so I'll filter the ones that look alike.
        # Just add the first one to the 404 responses list, since that one is
        # "unique"
        if len(not_exist_resp_lst):
            four_oh_data = not_exist_resp_lst[0]

        # And now add the unique responses
        for i in not_exist_resp_lst:
            for j in self._404_responses:

                if i is j:

                if fuzzy_equal(i.body, j.body, IS_EQUAL_RATIO):
                    # i already exists in the self._404_responses, no need
                    # to compare any further
                # None of the 404_responses match the item from not_exist_resp_lst
                # This means that this item is new and we should store it in the
                # 404_responses db

        # And I return the ones I need
        msg_fmt = 'The 404 body result database has a length of %s.'
        om.out.debug(msg_fmt % len(self._404_responses))
Example #27
    def is_404(self, http_response):
        All of my previous versions of is_404 were very complex and tried to
        struggle with all possible cases. The truth is that in most "strange"
        cases I was failing miserably, so now I changed my 404 detection once
        again, but keeping it as simple as possible.

        Also, and because I was trying to cover ALL CASES, I was performing a
        lot of requests in order to cover them, which in most situations was

        So now I go for a much simple approach:
            1- Cover the simplest case of all using only 1 HTTP request
            2- Give the users the power to configure the 404 detection by
               setting a string that identifies the 404 response (in case we
               are missing it for some reason in case #1)

        :param http_response: The HTTP response which we want to know if it
                                  is a 404 or not.
        #   First we handle the user configured exceptions:
        domain_path = http_response.get_url().get_domain_path()
        if domain_path in cf.cf.get('always_404'):
            return True
        elif domain_path in cf.cf.get('never_404'):
            return False

        #    The user configured setting. "If this string is in the response,
        #    then it is a 404"
        if cf.cf.get('string_match_404') and cf.cf.get('string_match_404') in http_response:
            return True

        #   This is the most simple case, we don't even have to think about this
        #   If there is some custom website that always returns 404 codes, then
        #   we are screwed, but this is open source, and the pentester working
        #   on that site can modify these lines.
        if http_response.get_code() == 404:
            return True

        #    Simple, if the file we requested is in a directory that's known to
        #    return 404 codes for files that do not exist, AND this is NOT a 404
        #    then we're return False!
        if domain_path in self._directory_uses_404_codes and \
        http_response.get_code() != 404:
            return False

        #   Lets start with the rather complex code...
        with self._lock:
            if not self._already_analyzed:
                self._already_analyzed = True

        # 404_body was already cleaned inside generate_404_knowledge
        # so we need to clean this one in order to have a fair comparison
        resp_body = get_clean_body(http_response)
        resp_content_type = http_response.doc_type
        resp_path = http_response.get_url().get_domain_path().url_string

        # See https://github.com/andresriancho/w3af/issues/6646
        max_similarity_with_404 = 0.0
        resp_path_in_db = False

        with self._lock:
            #   Compare this response to all the 404's I have in my DB
            for resp_404 in self._404_responses:

                # Since the fuzzy_equal function is CPU-intensive we want to
                # avoid calling it for cases where we know it won't match, for
                # example in comparing an image and an html
                if resp_content_type != resp_404.doc_type:

                if fuzzy_equal(resp_404.body, resp_body, IS_EQUAL_RATIO):
                    msg = '"%s" (id:%s) is a 404 [similarity_index > %s]'
                    fmt = (http_response.get_url(),
                    om.out.debug(msg % fmt)
                    return True
                    # I could calculate this before and avoid the call to
                    # fuzzy_equal, but I believe it's going to be faster this
                    # way
                    current_ratio = relative_distance(resp_404.body, resp_body)
                    max_similarity_with_404 = max(max_similarity_with_404,

                # Track if the response path is in the DB
                if not resp_path_in_db and resp_path == resp_404.path:
                    resp_path_in_db = True

            # I get here when the for ends and no body_404_db matched with
            # the resp_body that was sent as a parameter by the user. This
            # means one of two things:
            #     * There is not enough knowledge in self._404_responses, or
            #     * The answer is NOT a 404.
            # Because we want to reduce the amount of "false positives" that
            # this method returns, we'll perform some extra checks before
            # saying that this is NOT a 404.
            if resp_path_in_db and max_similarity_with_404 < MUST_VERIFY_RATIO:
                msg = ('"%s" (id:%s) is NOT a 404 [similarity_index < %s'
                       ' with sample path in 404 DB].')
                args = (http_response.get_url(),
                om.out.debug(msg % args)
                return False

            if self._is_404_with_extra_request(http_response, resp_body):
                #   Aha! It actually was a 404!
                four_oh_data = FourOhFourResponseFactory(http_response)

                msg = ('"%s" (id:%s) is a 404 (similarity_index > %s).'
                       ' Adding new knowledge to the 404_responses database'
                       ' (length=%s).')
                fmt = (http_response.get_url(), http_response.id,
                       IS_EQUAL_RATIO, len(self._404_responses))
                om.out.debug(msg % fmt)
                return True

            msg = '"%s" (id:%s) is NOT a 404 [similarity_index < %s].'
            args = (http_response.get_url(), http_response.id, IS_EQUAL_RATIO)
            om.out.debug(msg % args)

            return False
Example #28
    def generate_404_knowledge(self, url):
        Based on a URL, request something that we know is going to be a 404.
        Afterwards analyze the 404's and summarise them.

        :return: A list with 404 bodies.
        #    This is the case when nobody has properly configured
        #    the object in order to use it.
        if self._uri_opener is None:
            msg = ('404 fingerprint database was incorrectly initialized.'
                   ' URL opener is None.')
            raise RuntimeError(msg)

        # Get the filename extension and create a 404 for it
        extension = url.get_extension()
        domain_path = url.get_domain_path()

        #   This is a list of the most common handlers, in some configurations,
        #   the 404 depends on the handler, so I want to make sure that I catch
        #   the 404 for each one
        handlers = {'py', 'php', 'asp', 'aspx', 'do', 'jsp', 'rb', 'do',
                    'gif', 'htm', 'pl', 'cgi', 'xhtml', 'htmls', 'foobar'}
        if extension:

        test_urls = []

        for extension in handlers:
            rand_alnum_file = rand_alnum(8) + '.' + extension
            url404 = domain_path.url_join(rand_alnum_file)

        imap_unordered = self._worker_pool.imap_unordered
        not_exist_resp_lst = []
        for not_exist_resp in imap_unordered(self._send_404, test_urls):

        # I have the 404 responses in not_exist_resp_lst, but maybe they
        # all look the same, so I'll filter the ones that look alike.
        # Just add the first one to the 404 responses list, since that one is
        # "unique"
        if len(not_exist_resp_lst):
            http_response = not_exist_resp_lst[0]
            four_oh_data = FourOhFourResponseFactory(http_response)

        # And now add the unique responses
        for i in not_exist_resp_lst:
            for j in not_exist_resp_lst:

                if i is j:

                if fuzzy_equal(i.body, j.body, IS_EQUAL_RATIO):
                    # They are equal, just ignore it
                    # They are no equal, this means that we'll have to add this
                    # one to the 404 responses
                    four_oh_data = FourOhFourResponseFactory(j)

        # And I return the ones I need
        msg_fmt = 'The 404 body result database has a length of %s.'
        om.out.debug(msg_fmt % len(self._404_responses))
Example #29
    def audit(self, freq, orig_response):
        Tests an URL for memcache injection vulnerabilities.

        # first checking error response
        fake_mutants = create_mutants(freq, ['', ])

        for mutant in fake_mutants:

            orig_body = orig_response.get_body()

            #trying to break normal execution flow with error1 payload
            error_1_response, body_error_1_response = self._uri_opener.send_clean(mutant)

            if fuzzy_equal(orig_body, body_error_1_response, self._eq_limit):
                # if we manage to break execution flow, there is a potential
                # injection otherwise - no injection!

            # trying the correct injection request, to confirm that we've found
            # it!

            ok_response, body_ok_response = self._uri_opener.send_clean(mutant)

            if fuzzy_not_equal(orig_body, body_ok_response, self._eq_limit):
                #  now requests should be equal, otherwise injection failed!

            #error2 request to just make sure that wasn't random bytes

            error_2_response, body_error_2_response = self._uri_opener.send_clean(mutant)

            if fuzzy_equal(orig_body, body_error_2_response, self._eq_limit):
                # now requests should be different again, otherwise injection
                # is not confirmed

            response_ids = [error_1_response.id,

            desc = 'Memcache injection was found at: "%s", using'\
                   ' HTTP method %s. The injectable parameter is: "%s"'
            desc = desc % (mutant.get_url(),

            v = Vuln.from_mutant('Memcache injection vulnerability', desc,
                                 severity.HIGH, response_ids, 'memcachei',


            v['ok_html'] = ok_response.get_body()
            v['error_1_html'] = error_1_response.get_body()
            v['error_2_html'] = error_2_response.get_body()

            self.kb_append_uniq(self, 'memcachei', v)

Example #30
    def generate_404_knowledge(self, url):
        Based on a URL, request something that we know is going to be a 404.
        Afterwards analyze the 404's and summarise them.

        :return: A list with 404 bodies.
        #    This is the case when nobody has properly configured
        #    the object in order to use it.
        if self._uri_opener is None:
            msg = "404 fingerprint database was incorrectly initialized."
            raise RuntimeError(msg)

        # Get the filename extension and create a 404 for it
        extension = url.get_extension()
        domain_path = url.get_domain_path()

        #   This is a list of the most common handlers, in some configurations,
        #   the 404 depends on the handler, so I want to make sure that I catch
        #   the 404 for each one
        handlers = {
        if extension:

        test_urls = []

        for extension in handlers:
            rand_alnum_file = rand_alnum(8) + "." + extension
            url404 = domain_path.url_join(rand_alnum_file)

        imap_unordered = self._worker_pool.imap_unordered
        not_exist_resp_lst = []

        for not_exist_resp in imap_unordered(self._send_404, test_urls):

        #   I have the 404 responses in not_exist_resp_lst, but maybe they
        #   all look the same, so I'll filter the ones that look alike.
        for i in not_exist_resp_lst:
            for j in not_exist_resp_lst:

                if i is j:

                if fuzzy_equal(i.get_body(), j.get_body(), IS_EQUAL_RATIO):
                    # They are equal, just ignore it
                    # They are no equal, this means that we'll have to add this
                    # one to the 404 responses

        # And I return the ones I need
        msg_fmt = "The 404 body result database has a length of %s."
        om.out.debug(msg_fmt % len(self._404_responses))
Example #31
    def generate_404_knowledge(self, url):
        Based on a URL, request something that we know is going to be a 404.
        Afterwards analyze the 404's and summarise them.

        :return: A list with 404 bodies.
        #    This is the case when nobody has properly configured
        #    the object in order to use it.
        if self._uri_opener is None:
            msg = ('404 fingerprint database was incorrectly initialized.'
                   ' URL opener is None.')
            raise RuntimeError(msg)

        # Get the filename extension and create a 404 for it
        extension = url.get_extension()
        domain_path = url.get_domain_path()

        #   This is a list of the most common handlers, in some configurations,
        #   the 404 depends on the handler, so I want to make sure that I catch
        #   the 404 for each one
        handlers = {
            'py', 'php', 'asp', 'aspx', 'do', 'jsp', 'rb', 'action', 'gif',
            'htm', 'pl', 'cgi', 'xhtml', 'htmls', 'foobar'
        if extension:

        test_urls = []

        for handler_ext in handlers:
            rand_alnum_file = rand_alnum(8) + '.' + handler_ext
            url404 = domain_path.url_join(rand_alnum_file)

        # Also keep in mind that in some cases we don't have an extension, so
        # we need to create a URL with just a filename
        if not extension:
            rand_alnum_file = rand_alnum(8)
            url404 = domain_path.url_join(rand_alnum_file)

        imap_unordered = self._worker_pool.imap_unordered
        not_exist_resp_lst = []

        for not_exist_resp in imap_unordered(self._send_404, test_urls):
            four_oh_data = FourOhFourResponse(not_exist_resp)

            # Populate the self._directory_uses_404_codes with the information
            # we just retrieved from the application
            if not_exist_resp.get_code() == 404:

                url_404 = not_exist_resp.get_uri()

                path_extension = (url_404.get_domain_path(),

                # No need to check if the ScalableBloomFilter contains the key
                # It is a "set", adding duplicates is a no-op.

        # Sort the HTTP responses by length to try to have the same DB on
        # each call to generate_404_knowledge(). This is required because of
        # the imap_unordered() above, which will yield the responses in
        # unexpected order each time we call it.
        def sort_by_response_length(a, b):
            return cmp(len(a.body), len(b.body))


        # I have the 404 responses in not_exist_resp_lst, but maybe they
        # all look the same, so I'll filter the ones that look alike.
        # Just add the first one to the 404 responses list, since that one is
        # "unique"
        if len(not_exist_resp_lst):
            four_oh_data = not_exist_resp_lst[0]

        # And now add the unique responses
        for i in not_exist_resp_lst:
            for j in self._base_404_responses:

                if i is j:

                if fuzzy_equal(i.body, j.body, IS_EQUAL_RATIO):
                    # i (or something really similar) already exists in the
                    # self._base_404_responses, no need to compare any further
                # None of the 404_responses match the item from not_exist_resp_lst
                # This means that this item is new and we should store it in the
                # 404_responses db

        msg = 'The base 404 response DB contains responses with IDs: %s'
        args = (', '.join(
            str(r.id) for r in copy.copy(self._base_404_responses)))
        om.out.debug(msg % args)
Example #32
    def _is_404_complex_impl(self, http_response, query):
        Verifies if the response is a 404 by comparing it with other responses
        which are known to be 404s, potentially sends HTTP requests to the

        :param http_response: The HTTP response
        :param query: The HTTP response in FourOhFourResponse form (normalized
                      URL, clean body, etc.)
        :return: True if the HTTP response is a 404
        response_did = http_response.get_debugging_id()
        debugging_id = response_did if response_did is not None else rand_alnum(

        # Compare query with a known 404 from the DB (or a generated one
        # if there is none with the same path in the DB)
        known_404 = self._get_404_response(http_response, query, debugging_id)

        # Trivial performance improvement that prevents running fuzzy_equal
        if query.code in NOT_404_RESPONSE_CODES and known_404.code == 404:
            msg = ('"%s" (id:%s, code:%s, len:%s, did:%s) is NOT a 404'
                   ' [known 404 with ID %s uses 404 code]')
            args = (http_response.get_url(), http_response.id,
                    http_response.get_code(), len(http_response.get_body()),
                    debugging_id, known_404.id)
            om.out.debug(msg % args)
            return False

        # Since the fuzzy_equal function is CPU-intensive we want to
        # avoid calling it for cases where we know it won't match, for
        # example in comparing an image and an html
        if query.content_type != known_404.content_type:
            msg = ('"%s" (id:%s, code:%s, len:%s, did:%s) is NOT a 404'
                   ' [document type mismatch with known 404 with ID %s]')
            args = (http_response.get_url(), http_response.id,
                    http_response.get_code(), len(http_response.get_body()),
                    debugging_id, known_404.id)
            om.out.debug(msg % args)
            return False

        # This is the simplest case. If they are 100% equal, no matter how
        # large or complex the responses are, then query is a 404
        if known_404.body == query.body:
            msg = ('"%s" (id:%s, code:%s, len:%s, did:%s) is a 404'
                   ' [string equals with 404 DB entry with ID %s]')
            args = (http_response.get_url(), http_response.id,
                    http_response.get_code(), len(http_response.get_body()),
                    debugging_id, known_404.id)
            om.out.debug(msg % args)
            return True

        is_fuzzy_equal = fuzzy_equal(known_404.body, query.body,

        if not is_fuzzy_equal:
            msg = ('"%s" (id:%s, code:%s, len:%s, did:%s) is NOT a 404'
                   ' [similarity_ratio < %s with known 404 with ID %s]')
            args = (http_response.get_url(), http_response.id,
                    http_response.get_code(), len(http_response.get_body()),
                    debugging_id, IS_EQUAL_RATIO, known_404.id)
            om.out.debug(msg % args)
            return False

        if len(query.body) < MAX_FUZZY_LENGTH:
            # The response bodies are fuzzy-equal, and the length is less than
            # MAX_FUZZY_LENGTH. This is good, it means that they are equal and
            # long headers / footers in HTTP response bodies are not
            # interfering with fuzzy-equals.
            # Some sites have really large headers and footers which they
            # include for all pages, including 404s. When that happens one page
            # might look like:
            #   {header-4000bytes}
            #   Hello world
            #   {footer-4000bytes}
            # The header might contain large CSS and the footer might include
            # JQuery or some other large JS. Then, the 404 might look like:
            #   {header-4000bytes}
            #   Not found
            #   {footer-4000bytes}
            # A user with a browser might only see the text, and clearly
            # identify one as a valid page and another as a 404, but the
            # fuzzy_equal() function will return True, indicating that they
            # are equal because 99% of the bytes are the same.
            msg = ('"%s" (id:%s, code:%s, len:%s, did:%s) is a 404'
                   ' [similarity_ratio > %s with 404 DB entry with ID %s]')
            args = (http_response.get_url(), http_response.id,
                    http_response.get_code(), len(http_response.get_body()),
                    debugging_id, IS_EQUAL_RATIO, known_404.id)
            om.out.debug(msg % args)
            return True

            # See the large comment above on why we need to check for
            # MAX_FUZZY_LENGTH.
            # The way to handle this case is to send an extra HTTP
            # request that will act as a tie-breaker.
            return self._handle_large_http_responses(http_response, query,
                                                     known_404, debugging_id)
Example #33
    def batch_injection_test(self, freq, orig_response):
        Uses the batch injection technique to find memcache injections
        # shortcut
        send_clean = self._uri_opener.send_clean

        # first checking error response
        fake_mutants = create_mutants(freq, [

        for mutant in fake_mutants:

            orig_body = orig_response.get_body()

            # trying to break normal execution flow with error1 payload
            error_1_response, body_error_1_response = send_clean(mutant)

            if fuzzy_equal(orig_body, body_error_1_response, self._eq_limit):
                # if we manage to break execution flow, there is a potential
                # injection otherwise - no injection!

            # trying the correct injection request, to confirm that we've found
            # it!

            ok_response, body_ok_response = send_clean(mutant)

            if fuzzy_not_equal(orig_body, body_ok_response, self._eq_limit):
                #  now requests should be equal, otherwise injection failed!

            # error2 request to just make sure that wasn't random bytes

            error_2_response, body_error_2_response = send_clean(mutant)

            if fuzzy_equal(orig_body, body_error_2_response, self._eq_limit):
                # now requests should be different again, otherwise injection
                # is not confirmed

            response_ids = [
                error_1_response.id, ok_response.id, error_2_response.id

            desc = 'Memcache injection was found at: "%s", using' \
                   ' HTTP method %s. The injectable parameter is: "%s"'
            desc = desc % (mutant.get_url(), mutant.get_method(),

            v = Vuln.from_mutant('Memcache injection vulnerability', desc,
                                 severity.HIGH, response_ids, 'memcachei',

            v['ok_html'] = ok_response.get_body()
            v['error_1_html'] = error_1_response.get_body()
            v['error_2_html'] = error_2_response.get_body()

            self.kb_append_uniq(self, 'memcachei', v)

    def is_404(self, http_response):
        All of my previous versions of is_404 were very complex and tried to
        struggle with all possible cases. The truth is that in most "strange"
        cases I was failing miserably, so now I changed my 404 detection once
        again, but keeping it as simple as possible.

        Also, and because I was trying to cover ALL CASES, I was performing a
        lot of requests in order to cover them, which in most situations was

        So now I go for a much simple approach:
            1- Cover the simplest case of all using only 1 HTTP request
            2- Give the users the power to configure the 404 detection by
               setting a string that identifies the 404 response (in case we
               are missing it for some reason in case #1)

        :param http_response: The HTTP response which we want to know if it
                                  is a 404 or not.
        #   First we handle the user configured exceptions:
        domain_path = http_response.get_url().get_domain_path()
        if domain_path in cf.cf.get('always_404'):
            return True
        elif domain_path in cf.cf.get('never_404'):
            return False

        #    The user configured setting. "If this string is in the response,
        #    then it is a 404"
        if cf.cf.get('string_match_404') and cf.cf.get(
                'string_match_404') in http_response:
            return True

        #   This is the most simple case, we don't even have to think about this
        #   If there is some custom website that always returns 404 codes, then
        #   we are screwed, but this is open source, and the pentester working
        #   on that site can modify these lines.
        if http_response.get_code() == 404:
            return True

        #    Simple, if the file we requested is in a directory that's known to
        #    return 404 codes for files that do not exist, AND this is NOT a 404
        #    then we're return False!
        if domain_path in self._directory_uses_404_codes and \
        http_response.get_code() != 404:
            return False

        #   Lets start with the rather complex code...
        with self._lock:
            if not self._already_analyzed:
                self._already_analyzed = True

        # 404_body was already cleaned inside generate_404_knowledge
        # so we need to clean this one in order to have a fair comparison
        resp_body = get_clean_body(http_response)
        resp_content_type = http_response.doc_type
        resp_path = http_response.get_url().get_domain_path().url_string

        # See https://github.com/andresriancho/w3af/issues/6646
        max_similarity_with_404 = 0.0
        resp_path_in_db = False

        with self._lock:
            #   Compare this response to all the 404's I have in my DB
            for resp_404 in self._404_responses:

                # Since the fuzzy_equal function is CPU-intensive we want to
                # avoid calling it for cases where we know it won't match, for
                # example in comparing an image and an html
                if resp_content_type != resp_404.doc_type:

                if fuzzy_equal(resp_404.body, resp_body, IS_EQUAL_RATIO):
                    msg = '"%s" (id:%s) is a 404 [similarity_index > %s]'
                    fmt = (http_response.get_url(), http_response.id,
                    om.out.debug(msg % fmt)
                    return True
                    # I could calculate this before and avoid the call to
                    # fuzzy_equal, but I believe it's going to be faster this
                    # way
                    current_ratio = relative_distance(resp_404.body, resp_body)
                    max_similarity_with_404 = max(max_similarity_with_404,

                # Track if the response path is in the DB
                if not resp_path_in_db and resp_path == resp_404.path:
                    resp_path_in_db = True

            # I get here when the for ends and no body_404_db matched with
            # the resp_body that was sent as a parameter by the user. This
            # means one of two things:
            #     * There is not enough knowledge in self._404_responses, or
            #     * The answer is NOT a 404.
            # Because we want to reduce the amount of "false positives" that
            # this method returns, we'll perform some extra checks before
            # saying that this is NOT a 404.
            if resp_path_in_db and max_similarity_with_404 < MUST_VERIFY_RATIO:
                msg = ('"%s" (id:%s) is NOT a 404 [similarity_index < %s'
                       ' with sample path in 404 DB].')
                args = (http_response.get_url(), http_response.id,
                om.out.debug(msg % args)
                return False

            if self._is_404_with_extra_request(http_response, resp_body):
                #   Aha! It actually was a 404!
                four_oh_data = FourOhFourResponseFactory(http_response)

                msg = ('"%s" (id:%s) is a 404 (similarity_index > %s).'
                       ' Adding new knowledge to the 404_responses database'
                       ' (length=%s).')
                fmt = (http_response.get_url(), http_response.id,
                       IS_EQUAL_RATIO, len(self._404_responses))
                om.out.debug(msg % fmt)
                return True

            msg = '"%s" (id:%s) is NOT a 404 [similarity_index < %s].'
            args = (http_response.get_url(), http_response.id, IS_EQUAL_RATIO)
            om.out.debug(msg % args)

            return False
    def generate_404_knowledge(self, url):
        Based on a URL, request something that we know is going to be a 404.
        Afterwards analyze the 404's and summarise them.

        :return: A list with 404 bodies.
        #    This is the case when nobody has properly configured
        #    the object in order to use it.
        if self._uri_opener is None:
            msg = ('404 fingerprint database was incorrectly initialized.'
                   ' URL opener is None.')
            raise RuntimeError(msg)

        # Get the filename extension and create a 404 for it
        extension = url.get_extension()
        domain_path = url.get_domain_path()

        #   This is a list of the most common handlers, in some configurations,
        #   the 404 depends on the handler, so I want to make sure that I catch
        #   the 404 for each one
        handlers = {
            'py', 'php', 'asp', 'aspx', 'do', 'jsp', 'rb', 'do', 'gif', 'htm',
            'pl', 'cgi', 'xhtml', 'htmls', 'foobar'
        if extension:

        test_urls = []

        for extension in handlers:
            rand_alnum_file = rand_alnum(8) + '.' + extension
            url404 = domain_path.url_join(rand_alnum_file)

        imap_unordered = self._worker_pool.imap_unordered
        not_exist_resp_lst = []

        for not_exist_resp in imap_unordered(self._send_404, test_urls):

        # I have the 404 responses in not_exist_resp_lst, but maybe they
        # all look the same, so I'll filter the ones that look alike.
        # Just add the first one to the 404 responses list, since that one is
        # "unique"
        if len(not_exist_resp_lst):
            http_response = not_exist_resp_lst[0]
            four_oh_data = FourOhFourResponseFactory(http_response)

        # And now add the unique responses
        for i in not_exist_resp_lst:
            for j in not_exist_resp_lst:

                if i is j:

                if fuzzy_equal(i.body, j.body, IS_EQUAL_RATIO):
                    # They are equal, just ignore it
                    # They are no equal, this means that we'll have to add this
                    # one to the 404 responses
                    four_oh_data = FourOhFourResponseFactory(j)

        # And I return the ones I need
        msg_fmt = 'The 404 body result database has a length of %s.'
        om.out.debug(msg_fmt % len(self._404_responses))
Example #36
    def _is_404_with_extra_request(self, http_response, clean_resp_body,
        Performs a very simple check to verify if this response is a 404 or not.

        It takes the original URL and modifies it by flipping some bytes in the
        filename, then performs a request to that URL and compares the original
        response with the modified one. If they are equal then the original
        request is a 404.

        :param http_response: The original HTTP response
        :param clean_resp_body: The original HTML body you could find in
                                http_response after passing it by a cleaner

        :return: True if the original response was a 404 !
        #   Generate a request that will trigger a 404
        response_url = http_response.get_url()
        filename = response_url.get_file_name()

        if not filename:
            relative_url = '../%s/' % rand_alnum(8)
            url_404 = response_url.url_join(relative_url)
            relative_url = generate_404_filename(filename)
            url_404 = response_url.copy()

        #   Send the 404 request
        response_404 = self._send_404(url_404, debugging_id=debugging_id)
        four_oh_data = FourOhFourResponse(response_404)

        #   Update _directory_uses_404_codes
        if response_404.get_code() == 404:
            path_extension = (url_404.get_domain_path(),


            if http_response.get_code() != 404:
                # Not a 404! We know because of the new knowledge that this path
                # and extension uses 404
                msg = (
                    'The generated HTTP response for %s (id: %s) has a 404'
                    ' code, which is different from code %s used by the HTTP'
                    ' response passed as parameter (id:%s, did:%s)')
                args = (url_404, response_404.id, http_response.get_code(),
                        http_response.id, debugging_id)
                om.out.debug(msg % args)
                return False

        #   If the HTTP response codes are different, then we're almost certain
        #   the HTTP response received as parameter is not a 404
        if response_404.get_code() != http_response.get_code():
            msg = ('The generated HTTP response for %s (id: %s) has a %s'
                   ' code, which is different from code %s used by the HTTP'
                   ' response passed as parameter (id:%s, did:%s)')
            args = (url_404, response_404.id, response_404.get_code(),
                    http_response.get_code(), http_response.id, debugging_id)
            om.out.debug(msg % args)

            #   Save the new 404 page to the DB. This might prevent us from
            #   sending extra HTTP requests in the future

            return False

        #   Compare the "response that MUST BE (*) a 404" with the one
        #   received as parameter.
        #   (*) This works in 95% of the cases, where the application is not
        #       using some kind of URL rewrite rule which completely ignores
        #       the last part of the URL (filename or path)
        is_fuzzy_equal = fuzzy_equal(four_oh_data.body, clean_resp_body,

        #   Not equal! This means that the URL we generated really triggered
        #   a 404, and that the response received as parameter is different
        #   (not a 404)
        if not is_fuzzy_equal:
            msg = ('The generated HTTP response for %s (id: %s) is different'
                   ' from the HTTP response body passed as parameter'
                   ' (id: %s, did:%s)')
            args = (url_404, four_oh_data.id, http_response.id, debugging_id)
            om.out.debug(msg % args)

            #   Save the new 404 page to the DB. This might prevent us from
            #   sending extra HTTP requests in the future

            return False

        #   The responses are equal, both can be 404, or both can be the result
        #   of the application ignoring the last part of the URL, example:
        #       http://w3af.com/foo/ignored
        #       http://w3af.com/foo/also-ignored
        if self._looks_like_404_page(response_404):
            msg = ('The generated HTTP response for %s (id: %s) looks like'
                   ' a 404 response AND is similar to the HTTP response body'
                   ' passed as parameter (id:%s, did:%s)')
            args = (url_404, four_oh_data.id, http_response.id, debugging_id)
            om.out.debug(msg % args)

            #   Save the new 404 page to the DB. This might prevent us from
            #   sending extra HTTP requests in the future

            return True

        #   This is the worse scenario. The responses are equal, none of the
        #   responses look like a 404. We get here when:
        #       * _looks_like_404_page() has a false negative (the page is a 404,
        #         but the method returns False, this is very common, since the
        #         word database is very small)
        #       * The site is ignoring the last part of the URL (the filename or
        #         the last path). So requesting /abc/def and /abc/foo will both
        #         yield the same result.
        #   There is no good answer here... I prefer to return False, which
        #   might add a false positive finding to the KB, instead of returning
        #   True (saying that the response is a 404) and having a false negative
        msg = ('The generated HTTP response for %s (id: %s) is very similar to'
               ' the HTTP response body passed as parameter (id: %s), and the'
               ' generated response does NOT look like a 404 (did:%s)')
        args = (url_404, four_oh_data.id, http_response.id, debugging_id)
        om.out.debug(msg % args)
        return False
Example #37
 def is_resp_equal(self, resp1, resp2):
     if resp1.status_code != resp2.status_code:
         return False
     if not fuzzy_equal(str(resp1.content), str(resp2.content), 0.99):
         return False
     return True
Example #38
    def _is_404_complex(self, http_response):
        Verifies if the response is a 404 by comparing it with other responses
        which are known to be 404s, potentially sends HTTP requests to the

        :param http_response: The HTTP response
        :return: True if the HTTP response is a 404
        response_did = http_response.get_debugging_id()
        debugging_id = response_did if response_did is not None else rand_alnum(8)

        # 404_body stored in the DB was cleaned when creating the
        # FourOhFourResponse class.
        # Clean the body received as parameter in order to have a fair
        # comparison
        query = FourOhFourResponse(http_response)

        # Compare query with a known 404 from the DB (or a generated one
        # if there is none with the same path in the DB)
        known_404 = self._get_404_response(http_response, query, debugging_id)

        # Trivial performance improvement that prevents running fuzzy_equal
        if query.code in NOT_404_RESPONSE_CODES and known_404.code == 404:
            msg = ('"%s" (id:%s, code:%s, len:%s, did:%s) is NOT a 404'
                   ' [known 404 with ID %s uses 404 code]')
            args = (http_response.get_url(),
            om.out.debug(msg % args)
            return False

        # Since the fuzzy_equal function is CPU-intensive we want to
        # avoid calling it for cases where we know it won't match, for
        # example in comparing an image and an html
        if query.doc_type != known_404.doc_type:
            msg = ('"%s" (id:%s, code:%s, len:%s, did:%s) is NOT a 404'
                   ' [document type mismatch with known 404 with ID %s]')
            args = (http_response.get_url(),
            om.out.debug(msg % args)
            return False

        # This is the simplest case. If they are 100% equal, no matter how
        # large or complex the responses are, then query is a 404
        if known_404.body == query.body:
            msg = ('"%s" (id:%s, code:%s, len:%s, did:%s) is a 404'
                   ' [string equals with 404 DB entry with ID %s]')
            args = (http_response.get_url(),
            om.out.debug(msg % args)
            return True

        is_fuzzy_equal = fuzzy_equal(known_404.body, query.body, IS_EQUAL_RATIO)

        if not is_fuzzy_equal:
            msg = ('"%s" (id:%s, code:%s, len:%s, did:%s) is NOT a 404'
                   ' [similarity_ratio < %s with known 404 with ID %s]')
            args = (http_response.get_url(),
            om.out.debug(msg % args)
            return False

        if len(query.body) < MAX_FUZZY_LENGTH:
            # The response bodies are fuzzy-equal, and the length is less than
            # MAX_FUZZY_LENGTH. This is good, it means that they are equal and
            # long headers / footers in HTTP response bodies are not
            # interfering with fuzzy-equals.
            # Some sites have really large headers and footers which they
            # include for all pages, including 404s. When that happens one page
            # might look like:
            #   {header-4000bytes}
            #   Hello world
            #   {footer-4000bytes}
            # The header might contain large CSS and the footer might include
            # JQuery or some other large JS. Then, the 404 might look like:
            #   {header-4000bytes}
            #   Not found
            #   {footer-4000bytes}
            # A user with a browser might only see the text, and clearly
            # identify one as a valid page and another as a 404, but the
            # fuzzy_equal() function will return True, indicating that they
            # are equal because 99% of the bytes are the same.
            msg = ('"%s" (id:%s, code:%s, len:%s, did:%s) is a 404'
                   ' [similarity_ratio > %s with 404 DB entry with ID %s]')
            args = (http_response.get_url(),
            om.out.debug(msg % args)
            return True

            # See the large comment above on why we need to check for
            # MAX_FUZZY_LENGTH.
            # The way to handle this case is to send an extra HTTP
            # request that will act as a tie-breaker.
            return self._handle_large_http_responses(http_response,
Example #39
    def is_404(self, http_response):
        All of my previous versions of is_404 were very complex and tried to
        struggle with all possible cases. The truth is that in most "strange"
        cases I was failing miserably, so now I changed my 404 detection once
        again, but keeping it as simple as possible.

        Also, and because I was trying to cover ALL CASES, I was performing a
        lot of requests in order to cover them, which in most situations was

        So now I go for a much simple approach:
            1- Cover the simplest case of all using only 1 HTTP request
            2- Give the users the power to configure the 404 detection by
               setting a string that identifies the 404 response (in case we
               are missing it for some reason in case #1)

        :param http_response: The HTTP response which we want to know if it
                                  is a 404 or not.
        #   First we handle the user configured exceptions:
        domain_path = http_response.get_url().get_domain_path()
        if domain_path in cf.cf.get('always_404'):
            return True
        elif domain_path in cf.cf.get('never_404'):
            return False

        #    The user configured setting. "If this string is in the response,
        #    then it is a 404"
        if cf.cf.get('string_match_404') and cf.cf.get(
                'string_match_404') in http_response:
            return True

        #   This is the most simple case, we don't even have to think about this
        #   If there is some custom website that always returns 404 codes, then
        #   we are screwed, but this is open source, and the pentester working
        #   on that site can modify these lines.
        if http_response.get_code() == 404:
            return True

        #    Simple, if the file we requested is in a directory that's known to
        #    return 404 codes for files that do not exist, AND this is NOT a 404
        #    then we're return False!
        if domain_path in self._directory_uses_404_codes and \
        http_response.get_code() != 404:
            return False

        #   Lets start with the rather complex code...
        with self._lock:
            if not self._already_analyzed:
                self._already_analyzed = True

        # 404_body was already cleaned inside generate_404_knowledge
        # so we need to clean this one in order to have a fair comparison
        html_body = get_clean_body(http_response)

        #   Compare this response to all the 404's I have in my DB
        #   Copy the 404_responses deque in order to be able to iterate over
        #   it from one thread, while it is changed in another.
        copy_404_responses = copy.copy(self._404_responses)

        for resp_404 in copy_404_responses:

            if fuzzy_equal(resp_404.get_body(), html_body, IS_EQUAL_RATIO):
                msg = '"%s" (id:%s) is a 404 [similarity_index > %s]'
                fmt = (http_response.get_url(), http_response.id,
                om.out.debug(msg % fmt)
                return True

            #    I get here when the for ends and no body_404_db matched with
            #    the html_body that was sent as a parameter by the user. This
            #    means one of two things:
            #        * There is not enough knowledge in self._404_responses, or
            #        * The answer is NOT a 404.
            #    Because we want to reduce the amount of "false positives" that
            #    this method returns, we'll perform one extra check before
            #    saying that this is NOT a 404.
            domain_path = http_response.get_url().get_domain_path()
            if domain_path not in self._fingerprinted_paths:

                if self._is_404_with_extra_request(http_response, html_body):
                    #   Aha! It actually was a 404!

                    msg = '"%s" (id:%s) is a 404 (similarity_index > %s).'\
                          ' Adding new knowledge to the 404_bodies database'\
                          ' (length=%s).'
                    fmt = (http_response.get_url(), http_response.id,
                           IS_EQUAL_RATIO, len(self._404_responses))
                    om.out.debug(msg % fmt)

                    return True

            msg = '"%s" (id:%s) is NOT a 404 [similarity_index < %s].'
            fmt = (http_response.get_url(), http_response.id, IS_EQUAL_RATIO)
            om.out.debug(msg % fmt)

            return False
Example #40
    def generate_404_knowledge(self, url):
        Based on a URL, request something that we know is going to be a 404.
        Afterwards analyze the 404's and summarise them.

        :return: A list with 404 bodies.
        #    This is the case when nobody has properly configured
        #    the object in order to use it.
        if self._uri_opener is None:
            msg = ('404 fingerprint database was incorrectly initialized.'
                   ' URL opener is None.')
            raise RuntimeError(msg)

        # Get the filename extension and create a 404 for it
        extension = url.get_extension()
        domain_path = url.get_domain_path()

        #   This is a list of the most common handlers, in some configurations,
        #   the 404 depends on the handler, so I want to make sure that I catch
        #   the 404 for each one
        handlers = {
            'py', 'php', 'asp', 'aspx', 'do', 'jsp', 'rb', 'action', 'gif',
            'htm', 'pl', 'cgi', 'xhtml', 'htmls', 'foobar'
        if extension:

        test_urls = []

        for handler_ext in handlers:
            rand_alnum_file = rand_alnum(8) + '.' + handler_ext
            url404 = domain_path.url_join(rand_alnum_file)

        # Also keep in mind that in some cases we don't have an extension, so
        # we need to create a URL with just a filename
        if not extension:
            rand_alnum_file = rand_alnum(8)
            url404 = domain_path.url_join(rand_alnum_file)

        imap_unordered = self._worker_pool.imap_unordered
        not_exist_resp_lst = []

        for not_exist_resp in imap_unordered(self._send_404, test_urls):
            four_oh_data = FourOhFourResponseFactory(not_exist_resp)

            # Populate the self._directory_uses_404_codes with the information
            # we just retrieved from the application
            if not_exist_resp.get_code() == 404:

                url_404 = not_exist_resp.get_uri()

                path_extension = (url_404.get_domain_path(),

                if path_extension not in self._directory_uses_404_codes:

        # I have the 404 responses in not_exist_resp_lst, but maybe they
        # all look the same, so I'll filter the ones that look alike.
        # Just add the first one to the 404 responses list, since that one is
        # "unique"
        if len(not_exist_resp_lst):
            four_oh_data = not_exist_resp_lst[0]

        # And now add the unique responses
        for i in not_exist_resp_lst:
            for j in self._404_responses:

                if i is j:

                if fuzzy_equal(i.body, j.body, IS_EQUAL_RATIO):
                    # i already exists in the self._404_responses, no need
                    # to compare any further
                # None of the 404_responses match the item from not_exist_resp_lst
                # This means that this item is new and we should store it in the
                # 404_responses db

        # And I return the ones I need
        msg_fmt = 'The 404 body result database has a length of %s.'
        om.out.debug(msg_fmt % len(self._404_responses))
Example #41
    def is_404(self, http_response):
        All of my previous versions of is_404 were very complex and tried to
        struggle with all possible cases. The truth is that in most "strange"
        cases I was failing miserably, so now I changed my 404 detection once
        again, but keeping it as simple as possible.

        Also, and because I was trying to cover ALL CASES, I was performing a
        lot of requests in order to cover them, which in most situations was

        So now I go for a much simple approach:
            1- Cover the simplest case of all using only 1 HTTP request
            2- Give the users the power to configure the 404 detection by
               setting a string that identifies the 404 response (in case we
               are missing it for some reason in case #1)

        :param http_response: The HTTP response which we want to know if it
                                  is a 404 or not.
        #   First we handle the user configured exceptions:
        domain_path = http_response.get_url().get_domain_path()
        if domain_path in cf.cf.get("always_404"):
            return True
        elif domain_path in cf.cf.get("never_404"):
            return False

        #    The user configured setting. "If this string is in the response,
        #    then it is a 404"
        if cf.cf.get("string_match_404") and cf.cf.get("string_match_404") in http_response:
            return True

        #   This is the most simple case, we don't even have to think about this
        #   If there is some custom website that always returns 404 codes, then
        #   we are screwed, but this is open source, and the pentester working
        #   on that site can modify these lines.
        if http_response.get_code() == 404:
            return True

        #    Simple, if the file we requested is in a directory that's known to
        #    return 404 codes for files that do not exist, AND this is NOT a 404
        #    then we're return False!
        if domain_path in self._directory_uses_404_codes and http_response.get_code() != 404:
            return False

        #   Lets start with the rather complex code...
        with self._lock:
            if not self._already_analyzed:
                self._already_analyzed = True

        # 404_body was already cleaned inside generate_404_knowledge
        # so we need to clean this one in order to have a fair comparison
        html_body = get_clean_body(http_response)

        #   Compare this response to all the 404's I have in my DB
        #   Copy the 404_responses deque in order to be able to iterate over
        #   it from one thread, while it is changed in another.
        copy_404_responses = copy.copy(self._404_responses)

        for resp_404 in copy_404_responses:

            if fuzzy_equal(resp_404.get_body(), html_body, IS_EQUAL_RATIO):
                msg = '"%s" (id:%s) is a 404 [similarity_index > %s]'
                fmt = (http_response.get_url(), http_response.id, IS_EQUAL_RATIO)
                om.out.debug(msg % fmt)
                return True

            #    I get here when the for ends and no body_404_db matched with
            #    the html_body that was sent as a parameter by the user. This
            #    means one of two things:
            #        * There is not enough knowledge in self._404_responses, or
            #        * The answer is NOT a 404.
            #    Because we want to reduce the amount of "false positives" that
            #    this method returns, we'll perform one extra check before
            #    saying that this is NOT a 404.
            domain_path = http_response.get_url().get_domain_path()
            if domain_path not in self._fingerprinted_paths:

                if self._is_404_with_extra_request(http_response, html_body):
                    #   Aha! It actually was a 404!

                    msg = (
                        '"%s" (id:%s) is a 404 (similarity_index > %s).'
                        " Adding new knowledge to the 404_bodies database"
                        " (length=%s)."
                    fmt = (http_response.get_url(), http_response.id, IS_EQUAL_RATIO, len(self._404_responses))
                    om.out.debug(msg % fmt)

                    return True

            msg = '"%s" (id:%s) is NOT a 404 [similarity_index < %s].'
            fmt = (http_response.get_url(), http_response.id, IS_EQUAL_RATIO)
            om.out.debug(msg % fmt)

            return False