Example #1
0
    def batch_injection_test(self, freq, orig_response):
        """
        Uses the batch injection technique to find memcache injections
        """
        # shortcuts
        send_clean = self._uri_opener.send_clean
        orig_body = orig_response.get_body()

        for mutant in create_mutants(freq, ['']):

            # trying to break normal execution flow with ERROR_1 payload
            mutant.set_token_value(self.ERROR_1)
            error_1_response, body_error_1_response = send_clean(mutant)

            if fuzzy_equal(orig_body, body_error_1_response, self._eq_limit):
                #
                # if we manage to break execution flow, there is a potential
                # injection otherwise - no injection!
                #
                continue

            # trying the correct injection request, to confirm that we've found
            # it!
            mutant.set_token_value(self.OK)
            ok_response, body_ok_response = send_clean(mutant)

            if fuzzy_equal(body_error_1_response, body_ok_response,
                           self._eq_limit):
                #
                # The "OK" and "ERROR_1" responses are equal, this means that
                # we're not in a memcached injection
                #
                continue

            # ERROR_2 request to just make sure that we're in a memcached case
            mutant.set_token_value(self.ERROR_2)
            error_2_response, body_error_2_response = send_clean(mutant)

            if fuzzy_equal(orig_body, body_error_2_response, self._eq_limit):
                #
                # now requests should be different again, otherwise injection
                # is not confirmed
                #
                continue

            response_ids = [error_1_response.id,
                            ok_response.id,
                            error_2_response.id]

            desc = ('Memcache injection was found at: "%s", using'
                    ' HTTP method %s. The injectable parameter is: "%s"')
            desc %= (mutant.get_url(),
                     mutant.get_method(),
                     mutant.get_token_name())

            v = Vuln.from_mutant('Memcache injection vulnerability', desc,
                                 severity.HIGH, response_ids, 'memcachei',
                                 mutant)

            self.kb_append_uniq(self, 'memcachei', v)
Example #2
0
    def matches(self, query):
        """
        This method is used to check if the `query` HTTP response body matches
        the failed login page instance.

        :param query: An HTTP response body
        :return: True if the `query` response body is equal to the failed login
                 bodies which were received in __init__().
        """
        if self.body_a == query:
            return True

        if self.body_b == query:
            return True

        if not fuzzy_equal(self.body_a, query, 0.60):
            # They are really different, no need to calculate diff()
            return False

        if self.diff_a_b is None:
            self.diff_a_b, _ = chunked_diff(self.body_a, self.body_b)

        _, diff_query_a = chunked_diff(self.body_a, query)

        # Had to add this in order to prevent issues with CSRF tokens, which
        # might be part of the HTTP response body, are random (not removed by
        # clean_body) and will "break" the diff
        if len(diff_query_a) < 64:
            return True

        if fuzzy_equal(self.diff_a_b, diff_query_a, 0.9):
            return True

        return False
Example #3
0
    def batch_injection_test(self, freq, orig_response):
        """
        Uses the batch injection technique to find memcache injections
        """
        # shortcuts
        send_clean = self._uri_opener.send_clean
        orig_body = orig_response.get_body()

        for mutant in create_mutants(freq, ['']):

            # trying to break normal execution flow with ERROR_1 payload
            mutant.set_token_value(self.ERROR_1)
            error_1_response, body_error_1_response = send_clean(mutant)

            if fuzzy_equal(orig_body, body_error_1_response, self._eq_limit):
                #
                # if we manage to break execution flow, there is a potential
                # injection otherwise - no injection!
                #
                continue

            # trying the correct injection request, to confirm that we've found
            # it!
            mutant.set_token_value(self.OK)
            ok_response, body_ok_response = send_clean(mutant)

            if fuzzy_equal(body_error_1_response, body_ok_response,
                           self._eq_limit):
                #
                # The "OK" and "ERROR_1" responses are equal, this means that
                # we're not in a memcached injection
                #
                continue

            # ERROR_2 request to just make sure that we're in a memcached case
            mutant.set_token_value(self.ERROR_2)
            error_2_response, body_error_2_response = send_clean(mutant)

            if fuzzy_equal(orig_body, body_error_2_response, self._eq_limit):
                #
                # now requests should be different again, otherwise injection
                # is not confirmed
                #
                continue

            response_ids = [
                error_1_response.id, ok_response.id, error_2_response.id
            ]

            desc = ('Memcache injection was found at: "%s", using'
                    ' HTTP method %s. The injectable parameter is: "%s"')
            desc %= (mutant.get_url(), mutant.get_method(),
                     mutant.get_token_name())

            v = Vuln.from_mutant('Memcache injection vulnerability', desc,
                                 severity.HIGH, response_ids, 'memcachei',
                                 mutant)

            self.kb_append_uniq(self, 'memcachei', v)
def fuzzy_equal_for_diff(diff_x, diff_y, is_equal_ratio):
    """
    Does a quick estimation to determine if the two strings (diff_x and diff_y)
    are fuzzy equal.

    Not using fuzzy_equal() to compare results of applying diff() because of
    CSRF tokens and other randomly generated tokens which were breaking the
    comparison.

    This function removes those randomly generated strings and then does the
    compare.

    :param diff_x: Result of running diff() on responses A and B
    :param diff_y: Result of running diff() on responses B and C
    :param is_equal_ratio: The ratio to use when comparing the responses (0 to 1)
    :return: True if the two results of applying the diff() function are
             fuzzy equal (applying split_by_sep technique)
    """
    split_x = split_by_sep(diff_x)
    split_y = split_by_sep(diff_y)

    split_x = remove_hashes(split_x)
    split_y = remove_hashes(split_y)

    x = '\n'.join(split_x)
    y = '\n'.join(split_y)

    return fuzzy_equal(x, y, threshold=is_equal_ratio)
    def _is_404_with_extra_request(self, http_response, clean_resp_body):
        """
        Performs a very simple check to verify if this response is a 404 or not.

        It takes the original URL and modifies it by pre-pending a "not-" to the
        filename, then performs a request to that URL and compares the original
        response with the modified one. If they are equal then the original
        request is a 404.

        :param http_response: The original HTTP response
        :param clean_resp_body: The original HTML body you could find in
                                http_response after passing it by a cleaner

        :return: True if the original response was a 404 !
        """
        response_url = http_response.get_url()
        filename = response_url.get_file_name()
        if not filename:
            relative_url = '../%s/' % rand_alnum(8)
            url_404 = response_url.url_join(relative_url)
        else:
            relative_url = self._generate_404_filename(filename)
            url_404 = response_url.copy()
            url_404.set_file_name(relative_url)

        response_404 = self._send_404(url_404)
        clean_response_404_body = get_clean_body(response_404)

        if response_404.get_code() == 404 and \
        url_404.get_domain_path() not in self._directory_uses_404_codes:
            self._directory_uses_404_codes.add(url_404.get_domain_path())

        return fuzzy_equal(clean_response_404_body, clean_resp_body,
                           IS_EQUAL_RATIO)
Example #6
0
    def _is_404_with_extra_request(self, http_response, clean_resp_body):
        """
        Performs a very simple check to verify if this response is a 404 or not.

        It takes the original URL and modifies it by pre-pending a "not-" to the
        filename, then performs a request to that URL and compares the original
        response with the modified one. If they are equal then the original
        request is a 404.

        :param http_response: The original HTTP response
        :param clean_resp_body: The original HTML body you could find in
                                http_response after passing it by a cleaner

        :return: True if the original response was a 404 !
        """
        response_url = http_response.get_url()
        filename = response_url.get_file_name()
        if not filename:
            relative_url = '../%s/' % rand_alnum(8)
            url_404 = response_url.url_join(relative_url)
        else:
            relative_url = self._generate_404_filename(filename)
            url_404 = response_url.copy()
            url_404.set_file_name(relative_url)

        response_404 = self._send_404(url_404)
        clean_response_404_body = get_clean_body(response_404)

        if response_404.get_code() == 404 and \
        url_404.get_domain_path() not in self._directory_uses_404_codes:
            self._directory_uses_404_codes.add(url_404.get_domain_path())

        return fuzzy_equal(clean_response_404_body, clean_resp_body,
                           IS_EQUAL_RATIO)
Example #7
0
def fuzzy_equal_for_diff(diff_x, diff_y, is_equal_ratio):
    """
    Does a quick estimation to determine if the two strings (diff_x and diff_y)
    are fuzzy equal.

    Not using fuzzy_equal() to compare results of applying diff() because of
    CSRF tokens and other randomly generated tokens which were breaking the
    comparison.

    This function removes those randomly generated strings and then does the
    compare.

    :param diff_x: Result of running diff() on responses A and B
    :param diff_y: Result of running diff() on responses B and C
    :param is_equal_ratio: The ratio to use when comparing the responses (0 to 1)
    :return: True if the two results of applying the diff() function are
             fuzzy equal (applying split_by_sep technique)
    """
    if diff_x == diff_y:
        return True

    split_x = split_by_sep(diff_x)
    split_y = split_by_sep(diff_y)

    split_x = remove_hashes(split_x)
    split_y = remove_hashes(split_y)

    x = '\n'.join(split_x)
    y = '\n'.join(split_y)

    return fuzzy_equal(x, y, threshold=is_equal_ratio)
Example #8
0
    def _filter_errors(self, result, filename):
        """
        Filter out ugly php errors and print a simple "Permission denied"
        or "File not found"
        """
        #print filename
        error = None

        if result.count('Permission denied'):
            error = PERMISSION_DENIED
        elif result.count('No such file or directory in'):
            error = NO_SUCH_FILE
        elif result.count('Not a directory in'):
            error = READ_DIRECTORY
        elif result.count(': failed to open stream: '):
            error = FAILED_STREAM

        elif self._file_not_found_str is not None:
            # The result string has the file I requested inside, so I'm going
            # to remove it.
            clean_result = result.replace(filename, '')

            # Now I compare both strings, if they are VERY similar, then
            # filename is a non existing file.
            if fuzzy_equal(self._file_not_found_str, clean_result, 0.9):
                error = NO_SUCH_FILE

        #
        #    I want this function to return an empty string on errors.
        #    Not the error itself.
        #
        if error is not None:
            return ''

        return result
Example #9
0
    def clean_404_response_db(self):
        """
        During the scan, and because I chose to remove the very broad 404
        database lock, the 404 response database might become untidy: the same
        HTTP response might be appended to the DB multiple times.

        An untidy DB triggers more comparisons between HTTP responses, which
        is CPU-intensive.

        This method cleans the DB every N calls to reduce any duplicates.

        :return: None. The extended DB is modified.
        """
        self._clean_404_response_db_calls += 1

        if self._clean_404_response_db_calls % CLEAN_DB_EVERY != 0:
            return

        removed_items = 0
        extended_404_response_copy = copy.copy(self._extended_404_responses)

        for i in extended_404_response_copy:
            for j in extended_404_response_copy:

                if i is j:
                    continue

                if not fuzzy_equal(i.body, j.body, IS_EQUAL_RATIO):
                    continue

                # i (or something really similar) already exists in
                # self._extended_404_responses, no need to compare any further
                # just remove it and continue with the next
                try:
                    self._extended_404_responses.remove(i)
                except ValueError:
                    # The 404 response DB might have been changed by another thread
                    break
                else:
                    msg = (
                        'Removed 404 response for "%s" (id: %s) from the 404 DB'
                        ' because it matches 404 response "%s" (id: %s)')
                    args = (i.url, i.id, j.url, j.id)
                    om.out.debug(msg % args)

                    removed_items += 1

                    break

        msg = 'Called clean 404 response DB. Removed %s duplicates from DB.'
        args = (removed_items, )
        om.out.debug(msg % args)

        msg = 'The extended 404 response DB contains responses with IDs: %s'
        args = (', '.join(
            str(r.id) for r in copy.copy(self._extended_404_responses)))
        om.out.debug(msg % args)
Example #10
0
    def equal_with_limit(self, body1, body2, compare_diff=False):
        """
        Determines if two pages are equal using a ratio, if compare_diff is set
        then we just compare the parts of the response bodies which are different.
        """
        if compare_diff:
            body1, body2 = diff(body1, body2)

        cmp_res = fuzzy_equal(body1, body2, self._eq_limit)
        return cmp_res
Example #11
0
    def _response_is_different(self, vhost_response, orig_resp_body, non_existent_responses):
        """
        Note that we use 0.35 in fuzzy_equal because we want the responses to be
        *really different*.

        :param vhost_response: The HTTP response body for the virtual host
        :param orig_resp_body: The original HTTP response body
        :param non_existent_responses: One or more HTTP responses for virtual hosts
                                       that do not exist in the remote server
        :return: True if vhost_response is different from orig_resp_body and non_existent_responses
        """
        if fuzzy_equal(vhost_response.get_body(), orig_resp_body, 0.35):
            return False

        for ner in non_existent_responses:
            if fuzzy_equal(vhost_response.get_body(), ner.get_body(), 0.35):
                return False

        return True
Example #12
0
    def equal_with_limit(self, body1, body2, compare_diff=False):
        """
        Determines if two pages are equal using a ratio, if compare_diff is set
        then we just compare the parts of the response bodies which are different.
        """
        if compare_diff:
            body1, body2 = chunked_diff(body1, body2)

        cmp_res = fuzzy_equal(body1, body2, self._eq_limit)
        return cmp_res
Example #13
0
    def _is_resp_equal(self, res1, res2):
        """
        @see: unittest for this method in test_csrf.py
        """
        if res1.get_code() != res2.get_code():
            return False

        if not fuzzy_equal(res1.body, res2.body, self._equal_limit):
            return False

        return True
Example #14
0
 def _matches_failed_login(self, resp_body, login_failed_result_list):
     """
     :return: True if the resp_body matches the previously created
              responses that are stored in login_failed_result_list.
     """
     for login_failed_result in login_failed_result_list:
         if fuzzy_equal(resp_body, login_failed_result, 0.65):
             return True
     else:
         # I'm happy! The response_body *IS NOT* a failed login page.
         return False
Example #15
0
 def _matches_failed_login(self, resp_body, login_failed_result_list):
     """
     :return: True if the resp_body matches the previously created
              responses that are stored in login_failed_result_list.
     """
     for login_failed_result in login_failed_result_list:
         if fuzzy_equal(resp_body, login_failed_result, 0.65):
             return True
     else:
         # I'm happy! The response_body *IS NOT* a failed login page.
         return False
    def test_17092(self):
        nginx_404 = ('<html>\n'
                     '<head><title>404 Not Found</title></head>\n'
                     '<body bgcolor="white">\n'
                     '<center><h1>404 Not Found</h1></center>\n'
                     '<hr><center>nginx</center>\n'
                     '</body>\n'
                     '</html>\n')

        itest = 'itest'

        # 0.9 is from fingerprint_404.py
        self.assertFalse(fuzzy_equal(nginx_404, itest, 0.9))
Example #17
0
    def _is_resp_equal(self, response_1, response_2):
        """
        :param response_1: HTTP response 1
        :param response_2: HTTP response 2
        :see: unittest for this method in test_csrf.py
        """
        if response_1.get_code() != response_2.get_code():
            return False

        if not fuzzy_equal(response_1.body, response_2.body,
                           self._equal_limit):
            return False

        return True
    def equal_with_limit(self, body1, body2, compare_diff=False):
        """
        Determines if two pages are equal using a ratio.
        """
        start = time.time()

        if compare_diff:
            body1, body2 = diff(body1, body2)

        cmp_res = fuzzy_equal(body1, body2, self._eq_limit)

        are = 'ARE' if cmp_res else 'ARE NOT'
        args = (are, self._eq_limit)
        self.debug('Strings %s similar enough (limit: %s)' % args)

        spent = time.time() - start
        self.debug('Took %.2f seconds to run equal_with_limit' % spent)

        return cmp_res
Example #19
0
    def equal_with_limit(self, body1, body2, compare_diff=False):
        """
        Determines if two pages are equal using a ratio.
        """
        start = time.time()

        if compare_diff:
            body1, body2 = diff(body1, body2)

        cmp_res = fuzzy_equal(body1, body2, self._eq_limit)

        are = 'ARE' if cmp_res else 'ARE NOT'
        args = (are, self._eq_limit)
        self.debug('Strings %s similar enough (limit: %s)' % args)

        spent = time.time() - start
        self.debug('Took %.2f seconds to run equal_with_limit' % spent)

        return cmp_res
    def test_all(self):
        acceptance_tests = []
        acceptance_tests.append(('a', 'a', 1.0))
        acceptance_tests.append(('a', 'a', 0.1))
        acceptance_tests.append(('a', 'a', 0.0))

        acceptance_tests.append(('a', 'b', 1.0))
        acceptance_tests.append(('a', 'b', 0.1))
        acceptance_tests.append(('a', 'b', 0.0))

        acceptance_tests.append(('a', 'ab', 1.0))
        acceptance_tests.append(('a', 'ab', 0.1))

        acceptance_tests.append(('a', 'b', 0.0000000000000000001))
        acceptance_tests.append(('a', 'b' * 100, 1.0))

        acceptance_tests.append(('a', 'ab', 0.66666666666))
        acceptance_tests.append(('a', 'aab', 0.5))
        acceptance_tests.append(('a', 'aaab', 0.4))
        acceptance_tests.append(('a', 'aaaab', 0.33333333333333333333333333333333333333333333333333333333))

        acceptance_tests.append(('a' * 25, 'a', 1.0))
        acceptance_tests.append(('aaa', 'aa', 1.0))
        acceptance_tests.append(('a', 'a', 1.0))

        acceptance_tests.append(('a' * 25, 'a', 0.076923076923076927))
        acceptance_tests.append(('aaa', 'aa', 0.8))

        acceptance_tests.append(('a', 'a', 0.0))

        for e, d, f in acceptance_tests:
            res1 = fuzzy_equal(e, d, f)
            res2 = relative_distance(e, d) >= f
            
            msg = ('fuzzy_equal and relative_distance returned'
                   ' different results for the same parameters:\n'
                   '    - Parameter #1: %s\n'
                   '    - Parameter #2: %s\n'
                   '    - Threshold: %s\n'
                   '    - Result fuzzy_equal: %s\n'
                   '    - Result relative_distance: %s\n')
            
            self.assertEqual(res1, res2, msg % (e, d, f, res1, relative_distance(e, d)))
Example #21
0
    def _find_OS(self, fuzzable_request):
        """
        Analyze responses and determine if remote web server runs on windows
        or *nix.

        @Return: None, the knowledge is saved in the knowledgeBase
        """
        freq_url = fuzzable_request.get_url()
        filename = freq_url.get_file_name()
        dirs = freq_url.get_directories()[:-1]  # Skipping "domain level" dir.

        if dirs and filename:

            last_url = dirs[-1]
            last_url = last_url.url_string

            windows_url = URL(last_url[0:-1] + '\\' + filename)
            windows_response = self._uri_opener.GET(windows_url)

            original_response = self._uri_opener.GET(freq_url)

            if fuzzy_equal(original_response.get_body(),
                                    windows_response.get_body(), 0.98):
                desc = 'Fingerprinted this host as a Microsoft Windows system.'
                os_str = 'windows'
            else:
                desc = 'Fingerprinted this host as a *nix system. Detection for'\
                       ' this operating system is weak, "if not windows then'\
                       ' linux".'
                os_str = 'unix'

            response_ids = [windows_response.id, original_response.id]
            i = Info('Operating system', desc, response_ids,
                     self.get_name())
            i.set_url(windows_response.get_url())
            
            kb.kb.raw_write(self, 'operating_system_str', os_str)
            kb.kb.append(self, 'operating_system', i)
            om.out.information(i.get_desc())
            return True

        return False
Example #22
0
    def _find_OS(self, fuzzable_request):
        """
        Analyze responses and determine if remote web server runs on windows
        or *nix.

        @Return: None, the knowledge is saved in the knowledgeBase
        """
        freq_url = fuzzable_request.get_url()
        filename = freq_url.get_file_name()
        dirs = freq_url.get_directories()[:-1]  # Skipping "domain level" dir.

        if dirs and filename:

            last_url = dirs[-1]
            last_url = last_url.url_string

            windows_url = URL(last_url[0:-1] + '\\' + filename)
            windows_response = self._uri_opener.GET(windows_url)

            original_response = self._uri_opener.GET(freq_url)

            if fuzzy_equal(original_response.get_body(),
                           windows_response.get_body(), 0.98):
                desc = 'Fingerprinted this host as a Microsoft Windows system.'
                os_str = 'windows'
            else:
                desc = 'Fingerprinted this host as a *nix system. Detection for' \
                       ' this operating system is weak, "if not windows then' \
                       ' linux".'
                os_str = 'unix'

            response_ids = [windows_response.id, original_response.id]
            i = Info('Operating system', desc, response_ids,
                     self.get_name())
            i.set_url(windows_response.get_url())

            kb.kb.raw_write(self, 'operating_system_str', os_str)
            kb.kb.append(self, 'operating_system', i)
            om.out.information(i.get_desc())
            return True

        return False
Example #23
0
    def _test_ip_address(self, original_response, domain):
        """
        Check if http://ip(domain)/ == http://domain/
        """
        try:
            ip_address = socket.gethostbyname(domain)
        except socket.error:
            return

        url = original_response.get_url()
        ip_url = url.copy()
        ip_url.set_domain(ip_address)

        try:
            modified_response = self._uri_opener.GET(ip_url, cache=True)
        except BaseFrameworkException as bfe:
            msg = ('An error occurred while fetching IP address URL in '
                   ' dns_wildcard plugin: "%s"')
            om.out.debug(msg % bfe)
            return

        if is_no_content_response(modified_response):
            return

        if fuzzy_equal(modified_response.get_body(), original_response.get_body(), 0.35):
            return

        desc = 'The contents of %s and %s differ.'
        args = (modified_response.get_uri(), original_response.get_uri())
        desc %= args

        i = Info('Default virtual host',
                 desc,
                 modified_response.id,
                 self.get_name())
        i.set_url(modified_response.get_url())

        kb.kb.append(self, 'dns_wildcard', i)
        om.out.information(i.get_desc())
Example #24
0
    def _filter_errors(self, result, filename):
        """
        Filter out ugly php errors and print a simple "Permission denied"
        or "File not found"
        """
        #print filename
        error = None

        if result.count('Permission denied'):
            error = PERMISSION_DENIED
        elif result.count('No such file or directory in'):
            error = NO_SUCH_FILE
        elif result.count('Not a directory in'):
            error = READ_DIRECTORY
        elif result.count(': failed to open stream: '):
            error = FAILED_STREAM

        elif self._application_file_not_found_error is not None:
            # The result string has the file I requested inside, so I'm going
            # to remove it.
            clean_result = result.replace(filename, '')

            # Now I compare both strings, if they are VERY similar, then
            # filename is a non existing file.
            if fuzzy_equal(self._application_file_not_found_error,
                                    clean_result, 0.9):
                error = NO_SUCH_FILE

        #
        #    I want this function to return an empty string on errors.
        #    Not the error itself.
        #
        if error is not None:
            return ''

        return result
Example #25
0
    def audit(self, freq, orig_response, debugging_id):
        """
        Check if the protocol specified in freq is https and fetch the same URL
        using http. ie:
            - input: https://w3af.org/
            - check: http://w3af.org/

        :param freq: A FuzzableRequest
        :param orig_response: The HTTP response associated with the fuzzable request
        :param debugging_id: A unique identifier for this call to audit()
        """
        if not self._should_run:
            return

        initial_uri = freq.get_uri()
        if initial_uri.get_port() not in {80, 443}:
            # We get here then the original URL looks like http://foo:3921/
            #
            # It's really strange (maybe not even possible?) to find a server
            # that listens for HTTP and HTTPS connections on the same port,
            # since we don't want to guess the port, nor generate errors such
            # as #8871 we just ignore this case
            self._should_run = False
            return

        # Define some variables
        insecure_uri = initial_uri.copy()
        secure_uri = initial_uri.copy()

        insecure_uri.set_protocol('http')
        insecure_fr = copy.deepcopy(freq)
        insecure_fr.set_url(insecure_uri)

        secure_uri.set_protocol('https')
        secure_fr = copy.deepcopy(freq)
        secure_fr.set_url(secure_uri)

        # Make sure that we disable error handling during these tests, we want
        # the requests to fail quickly and without affecting the library's error
        # rate
        send_mutant = self._uri_opener.send_mutant
        kwargs = {'grep': False, 'error_handling': False}

        try:
            insecure_response = send_mutant(insecure_fr, **kwargs)
            secure_response = send_mutant(secure_fr, **kwargs)
        except (HTTPRequestException, ScanMustStopException):
            # No vulnerability to report since one of these threw an error
            # (because there is nothing listening on that port). It makes
            # no sense to keep running since we already got an error
            self._should_run = False

        else:
            if insecure_response is None or secure_response is None:
                # No vulnerability to report since one of these threw an
                # error (because there is nothing listening on that port).
                # It makes no sense to keep running since we already got an
                # error
                self._should_run = False
                return

            if self._redirects_to_secure(insecure_response, secure_response):
                return

            if insecure_response.get_code() == secure_response.get_code()\
            and fuzzy_equal(insecure_response.get_body(),
                            secure_response.get_body(),
                            0.95):

                desc = ('Secure content can be accessed using the insecure'
                        ' HTTP protocol. The vulnerable URLs used to verify'
                        ' this vulnerability are:\n'
                        ' - %s\n'
                        ' - %s\n')
                desc %= (secure_uri, insecure_uri)

                response_ids = [insecure_response.id, secure_response.id]

                v = Vuln.from_fr('Secure content over insecure channel',
                                 desc, severity.MEDIUM, response_ids,
                                 self.get_name(), freq)

                self.kb_append(self, 'un_ssl', v)

                # In most cases, when one resource is available, all are
                # so we just stop searching for this vulnerability
                self._should_run = False
Example #26
0
    def generate_404_knowledge(self, url):
        """
        Based on a URL, request something that we know is going to be a 404.
        Afterwards analyze the 404's and summarise them.

        :return: A list with 404 bodies.
        """
        #
        #    This is the case when nobody has properly configured
        #    the object in order to use it.
        #
        if self._uri_opener is None:
            msg = ('404 fingerprint database was incorrectly initialized.'
                   ' URL opener is None.')
            raise RuntimeError(msg)

        # Get the filename extension and create a 404 for it
        extension = url.get_extension()
        domain_path = url.get_domain_path()

        #
        #   This is a list of the most common handlers, in some configurations,
        #   the 404 depends on the handler, so I want to make sure that I catch
        #   the 404 for each one
        #
        handlers = {'py', 'php', 'asp', 'aspx', 'do', 'jsp', 'rb', 'action',
                    'gif', 'htm', 'pl', 'cgi', 'xhtml', 'htmls', 'foobar'}
        if extension:
            handlers.add(extension)

        test_urls = []

        for handler_ext in handlers:
            rand_alnum_file = rand_alnum(8) + '.' + handler_ext
            url404 = domain_path.url_join(rand_alnum_file)
            test_urls.append(url404)

        # Also keep in mind that in some cases we don't have an extension, so
        # we need to create a URL with just a filename
        if not extension:
            rand_alnum_file = rand_alnum(8)
            url404 = domain_path.url_join(rand_alnum_file)
            test_urls.append(url404)

        imap_unordered = self._worker_pool.imap_unordered
        not_exist_resp_lst = []
        
        for not_exist_resp in imap_unordered(self._send_404, test_urls):
            four_oh_data = FourOhFourResponseFactory(not_exist_resp)
            not_exist_resp_lst.append(four_oh_data)

            #
            # Populate the self._directory_uses_404_codes with the information
            # we just retrieved from the application
            #
            if not_exist_resp.get_code() == 404:

                url_404 = not_exist_resp.get_uri()

                path_extension = (url_404.get_domain_path(),
                                  url_404.get_extension())

                if path_extension not in self._directory_uses_404_codes:
                    self._directory_uses_404_codes.add(path_extension)

        #
        # I have the 404 responses in not_exist_resp_lst, but maybe they
        # all look the same, so I'll filter the ones that look alike.
        #
        # Just add the first one to the 404 responses list, since that one is
        # "unique"
        #
        if len(not_exist_resp_lst):
            four_oh_data = not_exist_resp_lst[0]
            self._404_responses.append(four_oh_data)

        # And now add the unique responses
        for i in not_exist_resp_lst:
            for j in self._404_responses:

                if i is j:
                    break

                if fuzzy_equal(i.body, j.body, IS_EQUAL_RATIO):
                    # i already exists in the self._404_responses, no need
                    # to compare any further
                    break
            else:
                # None of the 404_responses match the item from not_exist_resp_lst
                # This means that this item is new and we should store it in the
                # 404_responses db
                self._404_responses.append(i)

        # And I return the ones I need
        msg_fmt = 'The 404 body result database has a length of %s.'
        om.out.debug(msg_fmt % len(self._404_responses))
Example #27
0
    def is_404(self, http_response):
        """
        All of my previous versions of is_404 were very complex and tried to
        struggle with all possible cases. The truth is that in most "strange"
        cases I was failing miserably, so now I changed my 404 detection once
        again, but keeping it as simple as possible.

        Also, and because I was trying to cover ALL CASES, I was performing a
        lot of requests in order to cover them, which in most situations was
        unnecessary.

        So now I go for a much simple approach:
            1- Cover the simplest case of all using only 1 HTTP request
            2- Give the users the power to configure the 404 detection by
               setting a string that identifies the 404 response (in case we
               are missing it for some reason in case #1)

        :param http_response: The HTTP response which we want to know if it
                                  is a 404 or not.
        """
        #
        #   First we handle the user configured exceptions:
        #
        domain_path = http_response.get_url().get_domain_path()
        if domain_path in cf.cf.get('always_404'):
            return True
        elif domain_path in cf.cf.get('never_404'):
            return False

        #
        #    The user configured setting. "If this string is in the response,
        #    then it is a 404"
        #
        if cf.cf.get('string_match_404') and cf.cf.get('string_match_404') in http_response:
            return True

        #
        #   This is the most simple case, we don't even have to think about this
        #
        #   If there is some custom website that always returns 404 codes, then
        #   we are screwed, but this is open source, and the pentester working
        #   on that site can modify these lines.
        #
        if http_response.get_code() == 404:
            return True

        #
        #    Simple, if the file we requested is in a directory that's known to
        #    return 404 codes for files that do not exist, AND this is NOT a 404
        #    then we're return False!
        #
        if domain_path in self._directory_uses_404_codes and \
        http_response.get_code() != 404:
            return False

        #
        #   Lets start with the rather complex code...
        #
        with self._lock:
            if not self._already_analyzed:
                self.generate_404_knowledge(http_response.get_url())
                self._already_analyzed = True

        # 404_body was already cleaned inside generate_404_knowledge
        # so we need to clean this one in order to have a fair comparison
        resp_body = get_clean_body(http_response)
        resp_content_type = http_response.doc_type
        resp_path = http_response.get_url().get_domain_path().url_string

        # See https://github.com/andresriancho/w3af/issues/6646
        max_similarity_with_404 = 0.0
        resp_path_in_db = False

        with self._lock:
            #
            #   Compare this response to all the 404's I have in my DB
            #
            for resp_404 in self._404_responses:

                # Since the fuzzy_equal function is CPU-intensive we want to
                # avoid calling it for cases where we know it won't match, for
                # example in comparing an image and an html
                if resp_content_type != resp_404.doc_type:
                    continue

                if fuzzy_equal(resp_404.body, resp_body, IS_EQUAL_RATIO):
                    msg = '"%s" (id:%s) is a 404 [similarity_index > %s]'
                    fmt = (http_response.get_url(),
                           http_response.id,
                           IS_EQUAL_RATIO)
                    om.out.debug(msg % fmt)
                    return True
                else:
                    # I could calculate this before and avoid the call to
                    # fuzzy_equal, but I believe it's going to be faster this
                    # way
                    current_ratio = relative_distance(resp_404.body, resp_body)
                    max_similarity_with_404 = max(max_similarity_with_404,
                                                  current_ratio)

                # Track if the response path is in the DB
                if not resp_path_in_db and resp_path == resp_404.path:
                    resp_path_in_db = True

            #
            # I get here when the for ends and no body_404_db matched with
            # the resp_body that was sent as a parameter by the user. This
            # means one of two things:
            #     * There is not enough knowledge in self._404_responses, or
            #     * The answer is NOT a 404.
            #
            # Because we want to reduce the amount of "false positives" that
            # this method returns, we'll perform some extra checks before
            # saying that this is NOT a 404.
            #
            if resp_path_in_db and max_similarity_with_404 < MUST_VERIFY_RATIO:
                msg = ('"%s" (id:%s) is NOT a 404 [similarity_index < %s'
                       ' with sample path in 404 DB].')
                args = (http_response.get_url(),
                        http_response.id,
                        MUST_VERIFY_RATIO)
                om.out.debug(msg % args)
                return False

            if self._is_404_with_extra_request(http_response, resp_body):
                #
                #   Aha! It actually was a 404!
                #
                four_oh_data = FourOhFourResponseFactory(http_response)
                self._404_responses.append(four_oh_data)

                msg = ('"%s" (id:%s) is a 404 (similarity_index > %s).'
                       ' Adding new knowledge to the 404_responses database'
                       ' (length=%s).')
                fmt = (http_response.get_url(), http_response.id,
                       IS_EQUAL_RATIO, len(self._404_responses))
                om.out.debug(msg % fmt)
                return True

            msg = '"%s" (id:%s) is NOT a 404 [similarity_index < %s].'
            args = (http_response.get_url(), http_response.id, IS_EQUAL_RATIO)
            om.out.debug(msg % args)

            return False
Example #28
0
    def generate_404_knowledge(self, url):
        """
        Based on a URL, request something that we know is going to be a 404.
        Afterwards analyze the 404's and summarise them.

        :return: A list with 404 bodies.
        """
        #
        #    This is the case when nobody has properly configured
        #    the object in order to use it.
        #
        if self._uri_opener is None:
            msg = ('404 fingerprint database was incorrectly initialized.'
                   ' URL opener is None.')
            raise RuntimeError(msg)

        # Get the filename extension and create a 404 for it
        extension = url.get_extension()
        domain_path = url.get_domain_path()

        #
        #   This is a list of the most common handlers, in some configurations,
        #   the 404 depends on the handler, so I want to make sure that I catch
        #   the 404 for each one
        #
        handlers = {'py', 'php', 'asp', 'aspx', 'do', 'jsp', 'rb', 'do',
                    'gif', 'htm', 'pl', 'cgi', 'xhtml', 'htmls', 'foobar'}
        if extension:
            handlers.add(extension)

        test_urls = []

        for extension in handlers:
            rand_alnum_file = rand_alnum(8) + '.' + extension
            url404 = domain_path.url_join(rand_alnum_file)
            test_urls.append(url404)

        imap_unordered = self._worker_pool.imap_unordered
        not_exist_resp_lst = []
        
        for not_exist_resp in imap_unordered(self._send_404, test_urls):
            not_exist_resp_lst.append(not_exist_resp)

        #
        # I have the 404 responses in not_exist_resp_lst, but maybe they
        # all look the same, so I'll filter the ones that look alike.
        #
        # Just add the first one to the 404 responses list, since that one is
        # "unique"
        #
        if len(not_exist_resp_lst):
            http_response = not_exist_resp_lst[0]
            four_oh_data = FourOhFourResponseFactory(http_response)
            self._404_responses.append(four_oh_data)

        # And now add the unique responses
        for i in not_exist_resp_lst:
            for j in not_exist_resp_lst:

                if i is j:
                    continue

                if fuzzy_equal(i.body, j.body, IS_EQUAL_RATIO):
                    # They are equal, just ignore it
                    continue
                else:
                    # They are no equal, this means that we'll have to add this
                    # one to the 404 responses
                    four_oh_data = FourOhFourResponseFactory(j)
                    self._404_responses.append(four_oh_data)

        # And I return the ones I need
        msg_fmt = 'The 404 body result database has a length of %s.'
        om.out.debug(msg_fmt % len(self._404_responses))
Example #29
0
    def audit(self, freq, orig_response):
        """
        Tests an URL for memcache injection vulnerabilities.
        """

        # first checking error response
        fake_mutants = create_mutants(freq, ['', ])

        for mutant in fake_mutants:

            orig_body = orig_response.get_body()

            #trying to break normal execution flow with error1 payload
            mutant.set_token_value(self.mci.error_1)
            error_1_response, body_error_1_response = self._uri_opener.send_clean(mutant)

            if fuzzy_equal(orig_body, body_error_1_response, self._eq_limit):
                #
                # if we manage to break execution flow, there is a potential
                # injection otherwise - no injection!
                #
                continue

            # trying the correct injection request, to confirm that we've found
            # it!

            mutant.set_token_value(self.mci.ok)
            ok_response, body_ok_response = self._uri_opener.send_clean(mutant)

            if fuzzy_not_equal(orig_body, body_ok_response, self._eq_limit):
                #
                #  now requests should be equal, otherwise injection failed!
                #
                continue

            #error2 request to just make sure that wasn't random bytes

            mutant.set_token_value(self.mci.error_2)
            error_2_response, body_error_2_response = self._uri_opener.send_clean(mutant)

            if fuzzy_equal(orig_body, body_error_2_response, self._eq_limit):
                #
                # now requests should be different again, otherwise injection
                # is not confirmed
                #
                continue

            response_ids = [error_1_response.id,
                            ok_response.id,
                            error_2_response.id,]

            desc = 'Memcache injection was found at: "%s", using'\
                   ' HTTP method %s. The injectable parameter is: "%s"'
            desc = desc % (mutant.get_url(),
                           mutant.get_method(),
                           mutant.get_token_name())

            v = Vuln.from_mutant('Memcache injection vulnerability', desc,
                                 severity.HIGH, response_ids, 'memcachei',
                                 mutant)

            self.debug(v.get_desc())

            v['ok_html'] = ok_response.get_body()
            v['error_1_html'] = error_1_response.get_body()
            v['error_2_html'] = error_2_response.get_body()

            self.kb_append_uniq(self, 'memcachei', v)

        return
Example #30
0
    def generate_404_knowledge(self, url):
        """
        Based on a URL, request something that we know is going to be a 404.
        Afterwards analyze the 404's and summarise them.

        :return: A list with 404 bodies.
        """
        #
        #    This is the case when nobody has properly configured
        #    the object in order to use it.
        #
        if self._uri_opener is None:
            msg = "404 fingerprint database was incorrectly initialized."
            raise RuntimeError(msg)

        # Get the filename extension and create a 404 for it
        extension = url.get_extension()
        domain_path = url.get_domain_path()

        #
        #   This is a list of the most common handlers, in some configurations,
        #   the 404 depends on the handler, so I want to make sure that I catch
        #   the 404 for each one
        #
        handlers = {
            "py",
            "php",
            "asp",
            "aspx",
            "do",
            "jsp",
            "rb",
            "do",
            "gif",
            "htm",
            "pl",
            "cgi",
            "xhtml",
            "htmls",
            "foobar",
        }
        if extension:
            handlers.add(extension)

        test_urls = []

        for extension in handlers:
            rand_alnum_file = rand_alnum(8) + "." + extension
            url404 = domain_path.url_join(rand_alnum_file)
            test_urls.append(url404)

        imap_unordered = self._worker_pool.imap_unordered
        not_exist_resp_lst = []

        for not_exist_resp in imap_unordered(self._send_404, test_urls):
            not_exist_resp_lst.append(not_exist_resp)

        #
        #   I have the 404 responses in not_exist_resp_lst, but maybe they
        #   all look the same, so I'll filter the ones that look alike.
        #
        for i in not_exist_resp_lst:
            for j in not_exist_resp_lst:

                if i is j:
                    continue

                if fuzzy_equal(i.get_body(), j.get_body(), IS_EQUAL_RATIO):
                    # They are equal, just ignore it
                    continue
                else:
                    # They are no equal, this means that we'll have to add this
                    # one to the 404 responses
                    self._404_responses.append(j)

        # And I return the ones I need
        msg_fmt = "The 404 body result database has a length of %s."
        om.out.debug(msg_fmt % len(self._404_responses))
        self._fingerprinted_paths.add(domain_path)
Example #31
0
    def generate_404_knowledge(self, url):
        """
        Based on a URL, request something that we know is going to be a 404.
        Afterwards analyze the 404's and summarise them.

        :return: A list with 404 bodies.
        """
        #
        #    This is the case when nobody has properly configured
        #    the object in order to use it.
        #
        if self._uri_opener is None:
            msg = ('404 fingerprint database was incorrectly initialized.'
                   ' URL opener is None.')
            raise RuntimeError(msg)

        # Get the filename extension and create a 404 for it
        extension = url.get_extension()
        domain_path = url.get_domain_path()

        #
        #   This is a list of the most common handlers, in some configurations,
        #   the 404 depends on the handler, so I want to make sure that I catch
        #   the 404 for each one
        #
        handlers = {
            'py', 'php', 'asp', 'aspx', 'do', 'jsp', 'rb', 'action', 'gif',
            'htm', 'pl', 'cgi', 'xhtml', 'htmls', 'foobar'
        }
        if extension:
            handlers.add(extension)

        test_urls = []

        for handler_ext in handlers:
            rand_alnum_file = rand_alnum(8) + '.' + handler_ext
            url404 = domain_path.url_join(rand_alnum_file)
            test_urls.append(url404)

        # Also keep in mind that in some cases we don't have an extension, so
        # we need to create a URL with just a filename
        if not extension:
            rand_alnum_file = rand_alnum(8)
            url404 = domain_path.url_join(rand_alnum_file)
            test_urls.append(url404)

        imap_unordered = self._worker_pool.imap_unordered
        not_exist_resp_lst = []

        for not_exist_resp in imap_unordered(self._send_404, test_urls):
            four_oh_data = FourOhFourResponse(not_exist_resp)
            not_exist_resp_lst.append(four_oh_data)

            #
            # Populate the self._directory_uses_404_codes with the information
            # we just retrieved from the application
            #
            if not_exist_resp.get_code() == 404:

                url_404 = not_exist_resp.get_uri()

                path_extension = (url_404.get_domain_path(),
                                  url_404.get_extension())

                # No need to check if the ScalableBloomFilter contains the key
                # It is a "set", adding duplicates is a no-op.
                self._directory_uses_404_codes.add(path_extension)

        #
        # Sort the HTTP responses by length to try to have the same DB on
        # each call to generate_404_knowledge(). This is required because of
        # the imap_unordered() above, which will yield the responses in
        # unexpected order each time we call it.
        #
        def sort_by_response_length(a, b):
            return cmp(len(a.body), len(b.body))

        not_exist_resp_lst.sort(sort_by_response_length)

        #
        # I have the 404 responses in not_exist_resp_lst, but maybe they
        # all look the same, so I'll filter the ones that look alike.
        #
        # Just add the first one to the 404 responses list, since that one is
        # "unique"
        #
        if len(not_exist_resp_lst):
            four_oh_data = not_exist_resp_lst[0]
            self._append_to_base_404_responses(four_oh_data)

        # And now add the unique responses
        for i in not_exist_resp_lst:
            for j in self._base_404_responses:

                if i is j:
                    break

                if fuzzy_equal(i.body, j.body, IS_EQUAL_RATIO):
                    # i (or something really similar) already exists in the
                    # self._base_404_responses, no need to compare any further
                    break
            else:
                # None of the 404_responses match the item from not_exist_resp_lst
                # This means that this item is new and we should store it in the
                # 404_responses db
                self._append_to_base_404_responses(i)

        msg = 'The base 404 response DB contains responses with IDs: %s'
        args = (', '.join(
            str(r.id) for r in copy.copy(self._base_404_responses)))
        om.out.debug(msg % args)
Example #32
0
    def _is_404_complex_impl(self, http_response, query):
        """
        Verifies if the response is a 404 by comparing it with other responses
        which are known to be 404s, potentially sends HTTP requests to the
        server.

        :param http_response: The HTTP response
        :param query: The HTTP response in FourOhFourResponse form (normalized
                      URL, clean body, etc.)
        :return: True if the HTTP response is a 404
        """
        response_did = http_response.get_debugging_id()
        debugging_id = response_did if response_did is not None else rand_alnum(
            8)

        #
        # Compare query with a known 404 from the DB (or a generated one
        # if there is none with the same path in the DB)
        #
        known_404 = self._get_404_response(http_response, query, debugging_id)

        # Trivial performance improvement that prevents running fuzzy_equal
        if query.code in NOT_404_RESPONSE_CODES and known_404.code == 404:
            msg = ('"%s" (id:%s, code:%s, len:%s, did:%s) is NOT a 404'
                   ' [known 404 with ID %s uses 404 code]')
            args = (http_response.get_url(), http_response.id,
                    http_response.get_code(), len(http_response.get_body()),
                    debugging_id, known_404.id)
            om.out.debug(msg % args)
            return False

        # Since the fuzzy_equal function is CPU-intensive we want to
        # avoid calling it for cases where we know it won't match, for
        # example in comparing an image and an html
        if query.content_type != known_404.content_type:
            msg = ('"%s" (id:%s, code:%s, len:%s, did:%s) is NOT a 404'
                   ' [document type mismatch with known 404 with ID %s]')
            args = (http_response.get_url(), http_response.id,
                    http_response.get_code(), len(http_response.get_body()),
                    debugging_id, known_404.id)
            om.out.debug(msg % args)
            return False

        # This is the simplest case. If they are 100% equal, no matter how
        # large or complex the responses are, then query is a 404
        if known_404.body == query.body:
            msg = ('"%s" (id:%s, code:%s, len:%s, did:%s) is a 404'
                   ' [string equals with 404 DB entry with ID %s]')
            args = (http_response.get_url(), http_response.id,
                    http_response.get_code(), len(http_response.get_body()),
                    debugging_id, known_404.id)
            om.out.debug(msg % args)
            return True

        is_fuzzy_equal = fuzzy_equal(known_404.body, query.body,
                                     IS_EQUAL_RATIO)

        if not is_fuzzy_equal:
            msg = ('"%s" (id:%s, code:%s, len:%s, did:%s) is NOT a 404'
                   ' [similarity_ratio < %s with known 404 with ID %s]')
            args = (http_response.get_url(), http_response.id,
                    http_response.get_code(), len(http_response.get_body()),
                    debugging_id, IS_EQUAL_RATIO, known_404.id)
            om.out.debug(msg % args)
            return False

        if len(query.body) < MAX_FUZZY_LENGTH:
            # The response bodies are fuzzy-equal, and the length is less than
            # MAX_FUZZY_LENGTH. This is good, it means that they are equal and
            # long headers / footers in HTTP response bodies are not
            # interfering with fuzzy-equals.
            #
            # Some sites have really large headers and footers which they
            # include for all pages, including 404s. When that happens one page
            # might look like:
            #
            #   {header-4000bytes}
            #   Hello world
            #   {footer-4000bytes}
            #
            # The header might contain large CSS and the footer might include
            # JQuery or some other large JS. Then, the 404 might look like:
            #
            #   {header-4000bytes}
            #   Not found
            #   {footer-4000bytes}
            #
            # A user with a browser might only see the text, and clearly
            # identify one as a valid page and another as a 404, but the
            # fuzzy_equal() function will return True, indicating that they
            # are equal because 99% of the bytes are the same.
            msg = ('"%s" (id:%s, code:%s, len:%s, did:%s) is a 404'
                   ' [similarity_ratio > %s with 404 DB entry with ID %s]')
            args = (http_response.get_url(), http_response.id,
                    http_response.get_code(), len(http_response.get_body()),
                    debugging_id, IS_EQUAL_RATIO, known_404.id)
            om.out.debug(msg % args)
            return True

        else:
            # See the large comment above on why we need to check for
            # MAX_FUZZY_LENGTH.
            #
            # The way to handle this case is to send an extra HTTP
            # request that will act as a tie-breaker.
            return self._handle_large_http_responses(http_response, query,
                                                     known_404, debugging_id)
Example #33
0
    def batch_injection_test(self, freq, orig_response):
        """
        Uses the batch injection technique to find memcache injections
        """
        # shortcut
        send_clean = self._uri_opener.send_clean

        # first checking error response
        fake_mutants = create_mutants(freq, [
            '',
        ])

        for mutant in fake_mutants:

            orig_body = orig_response.get_body()

            # trying to break normal execution flow with error1 payload
            mutant.set_token_value(self.mci.error_1)
            error_1_response, body_error_1_response = send_clean(mutant)

            if fuzzy_equal(orig_body, body_error_1_response, self._eq_limit):
                #
                # if we manage to break execution flow, there is a potential
                # injection otherwise - no injection!
                #
                continue

            # trying the correct injection request, to confirm that we've found
            # it!

            mutant.set_token_value(self.mci.ok)
            ok_response, body_ok_response = send_clean(mutant)

            if fuzzy_not_equal(orig_body, body_ok_response, self._eq_limit):
                #
                #  now requests should be equal, otherwise injection failed!
                #
                continue

            # error2 request to just make sure that wasn't random bytes

            mutant.set_token_value(self.mci.error_2)
            error_2_response, body_error_2_response = send_clean(mutant)

            if fuzzy_equal(orig_body, body_error_2_response, self._eq_limit):
                #
                # now requests should be different again, otherwise injection
                # is not confirmed
                #
                continue

            response_ids = [
                error_1_response.id, ok_response.id, error_2_response.id
            ]

            desc = 'Memcache injection was found at: "%s", using' \
                   ' HTTP method %s. The injectable parameter is: "%s"'
            desc = desc % (mutant.get_url(), mutant.get_method(),
                           mutant.get_token_name())

            v = Vuln.from_mutant('Memcache injection vulnerability', desc,
                                 severity.HIGH, response_ids, 'memcachei',
                                 mutant)

            v['ok_html'] = ok_response.get_body()
            v['error_1_html'] = error_1_response.get_body()
            v['error_2_html'] = error_2_response.get_body()

            self.kb_append_uniq(self, 'memcachei', v)

        return
    def is_404(self, http_response):
        """
        All of my previous versions of is_404 were very complex and tried to
        struggle with all possible cases. The truth is that in most "strange"
        cases I was failing miserably, so now I changed my 404 detection once
        again, but keeping it as simple as possible.

        Also, and because I was trying to cover ALL CASES, I was performing a
        lot of requests in order to cover them, which in most situations was
        unnecessary.

        So now I go for a much simple approach:
            1- Cover the simplest case of all using only 1 HTTP request
            2- Give the users the power to configure the 404 detection by
               setting a string that identifies the 404 response (in case we
               are missing it for some reason in case #1)

        :param http_response: The HTTP response which we want to know if it
                                  is a 404 or not.
        """
        #
        #   First we handle the user configured exceptions:
        #
        domain_path = http_response.get_url().get_domain_path()
        if domain_path in cf.cf.get('always_404'):
            return True
        elif domain_path in cf.cf.get('never_404'):
            return False

        #
        #    The user configured setting. "If this string is in the response,
        #    then it is a 404"
        #
        if cf.cf.get('string_match_404') and cf.cf.get(
                'string_match_404') in http_response:
            return True

        #
        #   This is the most simple case, we don't even have to think about this
        #
        #   If there is some custom website that always returns 404 codes, then
        #   we are screwed, but this is open source, and the pentester working
        #   on that site can modify these lines.
        #
        if http_response.get_code() == 404:
            return True

        #
        #    Simple, if the file we requested is in a directory that's known to
        #    return 404 codes for files that do not exist, AND this is NOT a 404
        #    then we're return False!
        #
        if domain_path in self._directory_uses_404_codes and \
        http_response.get_code() != 404:
            return False

        #
        #   Lets start with the rather complex code...
        #
        with self._lock:
            if not self._already_analyzed:
                self.generate_404_knowledge(http_response.get_url())
                self._already_analyzed = True

        # 404_body was already cleaned inside generate_404_knowledge
        # so we need to clean this one in order to have a fair comparison
        resp_body = get_clean_body(http_response)
        resp_content_type = http_response.doc_type
        resp_path = http_response.get_url().get_domain_path().url_string

        # See https://github.com/andresriancho/w3af/issues/6646
        max_similarity_with_404 = 0.0
        resp_path_in_db = False

        with self._lock:
            #
            #   Compare this response to all the 404's I have in my DB
            #
            for resp_404 in self._404_responses:

                # Since the fuzzy_equal function is CPU-intensive we want to
                # avoid calling it for cases where we know it won't match, for
                # example in comparing an image and an html
                if resp_content_type != resp_404.doc_type:
                    continue

                if fuzzy_equal(resp_404.body, resp_body, IS_EQUAL_RATIO):
                    msg = '"%s" (id:%s) is a 404 [similarity_index > %s]'
                    fmt = (http_response.get_url(), http_response.id,
                           IS_EQUAL_RATIO)
                    om.out.debug(msg % fmt)
                    return True
                else:
                    # I could calculate this before and avoid the call to
                    # fuzzy_equal, but I believe it's going to be faster this
                    # way
                    current_ratio = relative_distance(resp_404.body, resp_body)
                    max_similarity_with_404 = max(max_similarity_with_404,
                                                  current_ratio)

                # Track if the response path is in the DB
                if not resp_path_in_db and resp_path == resp_404.path:
                    resp_path_in_db = True

            #
            # I get here when the for ends and no body_404_db matched with
            # the resp_body that was sent as a parameter by the user. This
            # means one of two things:
            #     * There is not enough knowledge in self._404_responses, or
            #     * The answer is NOT a 404.
            #
            # Because we want to reduce the amount of "false positives" that
            # this method returns, we'll perform some extra checks before
            # saying that this is NOT a 404.
            #
            if resp_path_in_db and max_similarity_with_404 < MUST_VERIFY_RATIO:
                msg = ('"%s" (id:%s) is NOT a 404 [similarity_index < %s'
                       ' with sample path in 404 DB].')
                args = (http_response.get_url(), http_response.id,
                        MUST_VERIFY_RATIO)
                om.out.debug(msg % args)
                return False

            if self._is_404_with_extra_request(http_response, resp_body):
                #
                #   Aha! It actually was a 404!
                #
                four_oh_data = FourOhFourResponseFactory(http_response)
                self._404_responses.append(four_oh_data)

                msg = ('"%s" (id:%s) is a 404 (similarity_index > %s).'
                       ' Adding new knowledge to the 404_responses database'
                       ' (length=%s).')
                fmt = (http_response.get_url(), http_response.id,
                       IS_EQUAL_RATIO, len(self._404_responses))
                om.out.debug(msg % fmt)
                return True

            msg = '"%s" (id:%s) is NOT a 404 [similarity_index < %s].'
            args = (http_response.get_url(), http_response.id, IS_EQUAL_RATIO)
            om.out.debug(msg % args)

            return False
    def generate_404_knowledge(self, url):
        """
        Based on a URL, request something that we know is going to be a 404.
        Afterwards analyze the 404's and summarise them.

        :return: A list with 404 bodies.
        """
        #
        #    This is the case when nobody has properly configured
        #    the object in order to use it.
        #
        if self._uri_opener is None:
            msg = ('404 fingerprint database was incorrectly initialized.'
                   ' URL opener is None.')
            raise RuntimeError(msg)

        # Get the filename extension and create a 404 for it
        extension = url.get_extension()
        domain_path = url.get_domain_path()

        #
        #   This is a list of the most common handlers, in some configurations,
        #   the 404 depends on the handler, so I want to make sure that I catch
        #   the 404 for each one
        #
        handlers = {
            'py', 'php', 'asp', 'aspx', 'do', 'jsp', 'rb', 'do', 'gif', 'htm',
            'pl', 'cgi', 'xhtml', 'htmls', 'foobar'
        }
        if extension:
            handlers.add(extension)

        test_urls = []

        for extension in handlers:
            rand_alnum_file = rand_alnum(8) + '.' + extension
            url404 = domain_path.url_join(rand_alnum_file)
            test_urls.append(url404)

        imap_unordered = self._worker_pool.imap_unordered
        not_exist_resp_lst = []

        for not_exist_resp in imap_unordered(self._send_404, test_urls):
            not_exist_resp_lst.append(not_exist_resp)

        #
        # I have the 404 responses in not_exist_resp_lst, but maybe they
        # all look the same, so I'll filter the ones that look alike.
        #
        # Just add the first one to the 404 responses list, since that one is
        # "unique"
        #
        if len(not_exist_resp_lst):
            http_response = not_exist_resp_lst[0]
            four_oh_data = FourOhFourResponseFactory(http_response)
            self._404_responses.append(four_oh_data)

        # And now add the unique responses
        for i in not_exist_resp_lst:
            for j in not_exist_resp_lst:

                if i is j:
                    continue

                if fuzzy_equal(i.body, j.body, IS_EQUAL_RATIO):
                    # They are equal, just ignore it
                    continue
                else:
                    # They are no equal, this means that we'll have to add this
                    # one to the 404 responses
                    four_oh_data = FourOhFourResponseFactory(j)
                    self._404_responses.append(four_oh_data)

        # And I return the ones I need
        msg_fmt = 'The 404 body result database has a length of %s.'
        om.out.debug(msg_fmt % len(self._404_responses))
Example #36
0
    def _is_404_with_extra_request(self, http_response, clean_resp_body,
                                   debugging_id):
        """
        Performs a very simple check to verify if this response is a 404 or not.

        It takes the original URL and modifies it by flipping some bytes in the
        filename, then performs a request to that URL and compares the original
        response with the modified one. If they are equal then the original
        request is a 404.

        :param http_response: The original HTTP response
        :param clean_resp_body: The original HTML body you could find in
                                http_response after passing it by a cleaner

        :return: True if the original response was a 404 !
        """
        #
        #   Generate a request that will trigger a 404
        #
        response_url = http_response.get_url()
        filename = response_url.get_file_name()

        if not filename:
            relative_url = '../%s/' % rand_alnum(8)
            url_404 = response_url.url_join(relative_url)
        else:
            relative_url = generate_404_filename(filename)
            url_404 = response_url.copy()
            url_404.set_file_name(relative_url)

        #
        #   Send the 404 request
        #
        response_404 = self._send_404(url_404, debugging_id=debugging_id)
        four_oh_data = FourOhFourResponse(response_404)

        #
        #   Update _directory_uses_404_codes
        #
        if response_404.get_code() == 404:
            path_extension = (url_404.get_domain_path(),
                              url_404.get_extension())

            self._directory_uses_404_codes.add(path_extension)

            if http_response.get_code() != 404:
                # Not a 404! We know because of the new knowledge that this path
                # and extension uses 404
                msg = (
                    'The generated HTTP response for %s (id: %s) has a 404'
                    ' code, which is different from code %s used by the HTTP'
                    ' response passed as parameter (id:%s, did:%s)')
                args = (url_404, response_404.id, http_response.get_code(),
                        http_response.id, debugging_id)
                om.out.debug(msg % args)
                return False

        #
        #   If the HTTP response codes are different, then we're almost certain
        #   the HTTP response received as parameter is not a 404
        #
        if response_404.get_code() != http_response.get_code():
            msg = ('The generated HTTP response for %s (id: %s) has a %s'
                   ' code, which is different from code %s used by the HTTP'
                   ' response passed as parameter (id:%s, did:%s)')
            args = (url_404, response_404.id, response_404.get_code(),
                    http_response.get_code(), http_response.id, debugging_id)
            om.out.debug(msg % args)

            #
            #   Save the new 404 page to the DB. This might prevent us from
            #   sending extra HTTP requests in the future
            #
            self._append_to_extended_404_responses(four_oh_data)

            return False

        #
        #   Compare the "response that MUST BE (*) a 404" with the one
        #   received as parameter.
        #
        #   (*) This works in 95% of the cases, where the application is not
        #       using some kind of URL rewrite rule which completely ignores
        #       the last part of the URL (filename or path)
        #
        is_fuzzy_equal = fuzzy_equal(four_oh_data.body, clean_resp_body,
                                     IS_EQUAL_RATIO)

        #
        #   Not equal! This means that the URL we generated really triggered
        #   a 404, and that the response received as parameter is different
        #   (not a 404)
        #
        if not is_fuzzy_equal:
            msg = ('The generated HTTP response for %s (id: %s) is different'
                   ' from the HTTP response body passed as parameter'
                   ' (id: %s, did:%s)')
            args = (url_404, four_oh_data.id, http_response.id, debugging_id)
            om.out.debug(msg % args)

            #
            #   Save the new 404 page to the DB. This might prevent us from
            #   sending extra HTTP requests in the future
            #
            self._append_to_extended_404_responses(four_oh_data)

            return False

        #
        #   The responses are equal, both can be 404, or both can be the result
        #   of the application ignoring the last part of the URL, example:
        #
        #       http://w3af.com/foo/ignored
        #       http://w3af.com/foo/also-ignored
        #
        if self._looks_like_404_page(response_404):
            msg = ('The generated HTTP response for %s (id: %s) looks like'
                   ' a 404 response AND is similar to the HTTP response body'
                   ' passed as parameter (id:%s, did:%s)')
            args = (url_404, four_oh_data.id, http_response.id, debugging_id)
            om.out.debug(msg % args)

            #
            #   Save the new 404 page to the DB. This might prevent us from
            #   sending extra HTTP requests in the future
            #
            self._append_to_extended_404_responses(four_oh_data)

            return True

        #
        #   This is the worse scenario. The responses are equal, none of the
        #   responses look like a 404. We get here when:
        #
        #       * _looks_like_404_page() has a false negative (the page is a 404,
        #         but the method returns False, this is very common, since the
        #         word database is very small)
        #
        #       * The site is ignoring the last part of the URL (the filename or
        #         the last path). So requesting /abc/def and /abc/foo will both
        #         yield the same result.
        #
        #   There is no good answer here... I prefer to return False, which
        #   might add a false positive finding to the KB, instead of returning
        #   True (saying that the response is a 404) and having a false negative
        #
        msg = ('The generated HTTP response for %s (id: %s) is very similar to'
               ' the HTTP response body passed as parameter (id: %s), and the'
               ' generated response does NOT look like a 404 (did:%s)')
        args = (url_404, four_oh_data.id, http_response.id, debugging_id)
        om.out.debug(msg % args)
        return False
Example #37
0
 def is_resp_equal(self, resp1, resp2):
     if resp1.status_code != resp2.status_code:
         return False
     if not fuzzy_equal(str(resp1.content), str(resp2.content), 0.99):
         return False
     return True
Example #38
0
    def _is_404_complex(self, http_response):
        """
        Verifies if the response is a 404 by comparing it with other responses
        which are known to be 404s, potentially sends HTTP requests to the
        server.

        :param http_response: The HTTP response
        :return: True if the HTTP response is a 404
        """
        response_did = http_response.get_debugging_id()
        debugging_id = response_did if response_did is not None else rand_alnum(8)

        # 404_body stored in the DB was cleaned when creating the
        # FourOhFourResponse class.
        #
        # Clean the body received as parameter in order to have a fair
        # comparison
        query = FourOhFourResponse(http_response)

        #
        # Compare query with a known 404 from the DB (or a generated one
        # if there is none with the same path in the DB)
        #
        known_404 = self._get_404_response(http_response, query, debugging_id)

        # Trivial performance improvement that prevents running fuzzy_equal
        if query.code in NOT_404_RESPONSE_CODES and known_404.code == 404:
            msg = ('"%s" (id:%s, code:%s, len:%s, did:%s) is NOT a 404'
                   ' [known 404 with ID %s uses 404 code]')
            args = (http_response.get_url(),
                    http_response.id,
                    http_response.get_code(),
                    len(http_response.get_body()),
                    debugging_id,
                    known_404.id)
            om.out.debug(msg % args)
            return False

        # Since the fuzzy_equal function is CPU-intensive we want to
        # avoid calling it for cases where we know it won't match, for
        # example in comparing an image and an html
        if query.doc_type != known_404.doc_type:
            msg = ('"%s" (id:%s, code:%s, len:%s, did:%s) is NOT a 404'
                   ' [document type mismatch with known 404 with ID %s]')
            args = (http_response.get_url(),
                    http_response.id,
                    http_response.get_code(),
                    len(http_response.get_body()),
                    debugging_id,
                    known_404.id)
            om.out.debug(msg % args)
            return False

        # This is the simplest case. If they are 100% equal, no matter how
        # large or complex the responses are, then query is a 404
        if known_404.body == query.body:
            msg = ('"%s" (id:%s, code:%s, len:%s, did:%s) is a 404'
                   ' [string equals with 404 DB entry with ID %s]')
            args = (http_response.get_url(),
                    http_response.id,
                    http_response.get_code(),
                    len(http_response.get_body()),
                    debugging_id,
                    known_404.id)
            om.out.debug(msg % args)
            return True

        is_fuzzy_equal = fuzzy_equal(known_404.body, query.body, IS_EQUAL_RATIO)

        if not is_fuzzy_equal:
            msg = ('"%s" (id:%s, code:%s, len:%s, did:%s) is NOT a 404'
                   ' [similarity_ratio < %s with known 404 with ID %s]')
            args = (http_response.get_url(),
                    http_response.id,
                    http_response.get_code(),
                    len(http_response.get_body()),
                    debugging_id,
                    IS_EQUAL_RATIO,
                    known_404.id)
            om.out.debug(msg % args)
            return False

        if len(query.body) < MAX_FUZZY_LENGTH:
            # The response bodies are fuzzy-equal, and the length is less than
            # MAX_FUZZY_LENGTH. This is good, it means that they are equal and
            # long headers / footers in HTTP response bodies are not
            # interfering with fuzzy-equals.
            #
            # Some sites have really large headers and footers which they
            # include for all pages, including 404s. When that happens one page
            # might look like:
            #
            #   {header-4000bytes}
            #   Hello world
            #   {footer-4000bytes}
            #
            # The header might contain large CSS and the footer might include
            # JQuery or some other large JS. Then, the 404 might look like:
            #
            #   {header-4000bytes}
            #   Not found
            #   {footer-4000bytes}
            #
            # A user with a browser might only see the text, and clearly
            # identify one as a valid page and another as a 404, but the
            # fuzzy_equal() function will return True, indicating that they
            # are equal because 99% of the bytes are the same.
            msg = ('"%s" (id:%s, code:%s, len:%s, did:%s) is a 404'
                   ' [similarity_ratio > %s with 404 DB entry with ID %s]')
            args = (http_response.get_url(),
                    http_response.id,
                    http_response.get_code(),
                    len(http_response.get_body()),
                    debugging_id,
                    IS_EQUAL_RATIO,
                    known_404.id)
            om.out.debug(msg % args)
            return True

        else:
            # See the large comment above on why we need to check for
            # MAX_FUZZY_LENGTH.
            #
            # The way to handle this case is to send an extra HTTP
            # request that will act as a tie-breaker.
            return self._handle_large_http_responses(http_response,
                                                     query,
                                                     known_404,
                                                     debugging_id)
Example #39
0
    def is_404(self, http_response):
        """
        All of my previous versions of is_404 were very complex and tried to
        struggle with all possible cases. The truth is that in most "strange"
        cases I was failing miserably, so now I changed my 404 detection once
        again, but keeping it as simple as possible.

        Also, and because I was trying to cover ALL CASES, I was performing a
        lot of requests in order to cover them, which in most situations was
        unnecesary.

        So now I go for a much simple approach:
            1- Cover the simplest case of all using only 1 HTTP request
            2- Give the users the power to configure the 404 detection by
               setting a string that identifies the 404 response (in case we
               are missing it for some reason in case #1)

        :param http_response: The HTTP response which we want to know if it
                                  is a 404 or not.
        """
        #
        #   First we handle the user configured exceptions:
        #
        domain_path = http_response.get_url().get_domain_path()
        if domain_path in cf.cf.get('always_404'):
            return True
        elif domain_path in cf.cf.get('never_404'):
            return False

        #
        #    The user configured setting. "If this string is in the response,
        #    then it is a 404"
        #
        if cf.cf.get('string_match_404') and cf.cf.get(
                'string_match_404') in http_response:
            return True

        #
        #   This is the most simple case, we don't even have to think about this
        #
        #   If there is some custom website that always returns 404 codes, then
        #   we are screwed, but this is open source, and the pentester working
        #   on that site can modify these lines.
        #
        if http_response.get_code() == 404:
            return True

        #
        #    Simple, if the file we requested is in a directory that's known to
        #    return 404 codes for files that do not exist, AND this is NOT a 404
        #    then we're return False!
        #
        if domain_path in self._directory_uses_404_codes and \
        http_response.get_code() != 404:
            return False

        #
        #   Lets start with the rather complex code...
        #
        with self._lock:
            if not self._already_analyzed:
                self.generate_404_knowledge(http_response.get_url())
                self._already_analyzed = True

        # 404_body was already cleaned inside generate_404_knowledge
        # so we need to clean this one in order to have a fair comparison
        html_body = get_clean_body(http_response)

        #
        #   Compare this response to all the 404's I have in my DB
        #
        #   Copy the 404_responses deque in order to be able to iterate over
        #   it from one thread, while it is changed in another.
        #
        copy_404_responses = copy.copy(self._404_responses)

        for resp_404 in copy_404_responses:

            if fuzzy_equal(resp_404.get_body(), html_body, IS_EQUAL_RATIO):
                msg = '"%s" (id:%s) is a 404 [similarity_index > %s]'
                fmt = (http_response.get_url(), http_response.id,
                       IS_EQUAL_RATIO)
                om.out.debug(msg % fmt)
                return True

        else:
            #
            #    I get here when the for ends and no body_404_db matched with
            #    the html_body that was sent as a parameter by the user. This
            #    means one of two things:
            #        * There is not enough knowledge in self._404_responses, or
            #        * The answer is NOT a 404.
            #
            #    Because we want to reduce the amount of "false positives" that
            #    this method returns, we'll perform one extra check before
            #    saying that this is NOT a 404.
            domain_path = http_response.get_url().get_domain_path()
            if domain_path not in self._fingerprinted_paths:

                if self._is_404_with_extra_request(http_response, html_body):
                    #
                    #   Aha! It actually was a 404!
                    #
                    self._404_responses.append(http_response)
                    self._fingerprinted_paths.add(domain_path)

                    msg = '"%s" (id:%s) is a 404 (similarity_index > %s).'\
                          ' Adding new knowledge to the 404_bodies database'\
                          ' (length=%s).'
                    fmt = (http_response.get_url(), http_response.id,
                           IS_EQUAL_RATIO, len(self._404_responses))
                    om.out.debug(msg % fmt)

                    return True

            msg = '"%s" (id:%s) is NOT a 404 [similarity_index < %s].'
            fmt = (http_response.get_url(), http_response.id, IS_EQUAL_RATIO)
            om.out.debug(msg % fmt)

            return False
Example #40
0
    def generate_404_knowledge(self, url):
        """
        Based on a URL, request something that we know is going to be a 404.
        Afterwards analyze the 404's and summarise them.

        :return: A list with 404 bodies.
        """
        #
        #    This is the case when nobody has properly configured
        #    the object in order to use it.
        #
        if self._uri_opener is None:
            msg = ('404 fingerprint database was incorrectly initialized.'
                   ' URL opener is None.')
            raise RuntimeError(msg)

        # Get the filename extension and create a 404 for it
        extension = url.get_extension()
        domain_path = url.get_domain_path()

        #
        #   This is a list of the most common handlers, in some configurations,
        #   the 404 depends on the handler, so I want to make sure that I catch
        #   the 404 for each one
        #
        handlers = {
            'py', 'php', 'asp', 'aspx', 'do', 'jsp', 'rb', 'action', 'gif',
            'htm', 'pl', 'cgi', 'xhtml', 'htmls', 'foobar'
        }
        if extension:
            handlers.add(extension)

        test_urls = []

        for handler_ext in handlers:
            rand_alnum_file = rand_alnum(8) + '.' + handler_ext
            url404 = domain_path.url_join(rand_alnum_file)
            test_urls.append(url404)

        # Also keep in mind that in some cases we don't have an extension, so
        # we need to create a URL with just a filename
        if not extension:
            rand_alnum_file = rand_alnum(8)
            url404 = domain_path.url_join(rand_alnum_file)
            test_urls.append(url404)

        imap_unordered = self._worker_pool.imap_unordered
        not_exist_resp_lst = []

        for not_exist_resp in imap_unordered(self._send_404, test_urls):
            four_oh_data = FourOhFourResponseFactory(not_exist_resp)
            not_exist_resp_lst.append(four_oh_data)

            #
            # Populate the self._directory_uses_404_codes with the information
            # we just retrieved from the application
            #
            if not_exist_resp.get_code() == 404:

                url_404 = not_exist_resp.get_uri()

                path_extension = (url_404.get_domain_path(),
                                  url_404.get_extension())

                if path_extension not in self._directory_uses_404_codes:
                    self._directory_uses_404_codes.add(path_extension)

        #
        # I have the 404 responses in not_exist_resp_lst, but maybe they
        # all look the same, so I'll filter the ones that look alike.
        #
        # Just add the first one to the 404 responses list, since that one is
        # "unique"
        #
        if len(not_exist_resp_lst):
            four_oh_data = not_exist_resp_lst[0]
            self._404_responses.append(four_oh_data)

        # And now add the unique responses
        for i in not_exist_resp_lst:
            for j in self._404_responses:

                if i is j:
                    break

                if fuzzy_equal(i.body, j.body, IS_EQUAL_RATIO):
                    # i already exists in the self._404_responses, no need
                    # to compare any further
                    break
            else:
                # None of the 404_responses match the item from not_exist_resp_lst
                # This means that this item is new and we should store it in the
                # 404_responses db
                self._404_responses.append(i)

        # And I return the ones I need
        msg_fmt = 'The 404 body result database has a length of %s.'
        om.out.debug(msg_fmt % len(self._404_responses))
Example #41
0
    def is_404(self, http_response):
        """
        All of my previous versions of is_404 were very complex and tried to
        struggle with all possible cases. The truth is that in most "strange"
        cases I was failing miserably, so now I changed my 404 detection once
        again, but keeping it as simple as possible.

        Also, and because I was trying to cover ALL CASES, I was performing a
        lot of requests in order to cover them, which in most situations was
        unnecesary.

        So now I go for a much simple approach:
            1- Cover the simplest case of all using only 1 HTTP request
            2- Give the users the power to configure the 404 detection by
               setting a string that identifies the 404 response (in case we
               are missing it for some reason in case #1)

        :param http_response: The HTTP response which we want to know if it
                                  is a 404 or not.
        """
        #
        #   First we handle the user configured exceptions:
        #
        domain_path = http_response.get_url().get_domain_path()
        if domain_path in cf.cf.get("always_404"):
            return True
        elif domain_path in cf.cf.get("never_404"):
            return False

        #
        #    The user configured setting. "If this string is in the response,
        #    then it is a 404"
        #
        if cf.cf.get("string_match_404") and cf.cf.get("string_match_404") in http_response:
            return True

        #
        #   This is the most simple case, we don't even have to think about this
        #
        #   If there is some custom website that always returns 404 codes, then
        #   we are screwed, but this is open source, and the pentester working
        #   on that site can modify these lines.
        #
        if http_response.get_code() == 404:
            return True

        #
        #    Simple, if the file we requested is in a directory that's known to
        #    return 404 codes for files that do not exist, AND this is NOT a 404
        #    then we're return False!
        #
        if domain_path in self._directory_uses_404_codes and http_response.get_code() != 404:
            return False

        #
        #   Lets start with the rather complex code...
        #
        with self._lock:
            if not self._already_analyzed:
                self.generate_404_knowledge(http_response.get_url())
                self._already_analyzed = True

        # 404_body was already cleaned inside generate_404_knowledge
        # so we need to clean this one in order to have a fair comparison
        html_body = get_clean_body(http_response)

        #
        #   Compare this response to all the 404's I have in my DB
        #
        #   Copy the 404_responses deque in order to be able to iterate over
        #   it from one thread, while it is changed in another.
        #
        copy_404_responses = copy.copy(self._404_responses)

        for resp_404 in copy_404_responses:

            if fuzzy_equal(resp_404.get_body(), html_body, IS_EQUAL_RATIO):
                msg = '"%s" (id:%s) is a 404 [similarity_index > %s]'
                fmt = (http_response.get_url(), http_response.id, IS_EQUAL_RATIO)
                om.out.debug(msg % fmt)
                return True

        else:
            #
            #    I get here when the for ends and no body_404_db matched with
            #    the html_body that was sent as a parameter by the user. This
            #    means one of two things:
            #        * There is not enough knowledge in self._404_responses, or
            #        * The answer is NOT a 404.
            #
            #    Because we want to reduce the amount of "false positives" that
            #    this method returns, we'll perform one extra check before
            #    saying that this is NOT a 404.
            domain_path = http_response.get_url().get_domain_path()
            if domain_path not in self._fingerprinted_paths:

                if self._is_404_with_extra_request(http_response, html_body):
                    #
                    #   Aha! It actually was a 404!
                    #
                    self._404_responses.append(http_response)
                    self._fingerprinted_paths.add(domain_path)

                    msg = (
                        '"%s" (id:%s) is a 404 (similarity_index > %s).'
                        " Adding new knowledge to the 404_bodies database"
                        " (length=%s)."
                    )
                    fmt = (http_response.get_url(), http_response.id, IS_EQUAL_RATIO, len(self._404_responses))
                    om.out.debug(msg % fmt)

                    return True

            msg = '"%s" (id:%s) is NOT a 404 [similarity_index < %s].'
            fmt = (http_response.get_url(), http_response.id, IS_EQUAL_RATIO)
            om.out.debug(msg % fmt)

            return False