Ejemplo n.º 1
0
 def _matchesFailedLogin(self, resp_body):
     '''
     @return: True if the resp_body matches the previously created 
     responses that are stored in self._login_failed_result_list.
     '''
     lfrl = self._login_failed_result_list
     # 0.65 gives a good measure of similarity
     if relative_distance_ge(resp_body, lfrl[0], 0.65) or \
             relative_distance_ge(resp_body, lfrl[1], 0.65):
         return True
     else:
         # I'm happy! The response_body *IS NOT* a failed login page.
         return False
Ejemplo n.º 2
0
    def _single_404_check(self, http_response, html_body):
        '''
        Performs a very simple check to verify if this response is a 404 or not.

        It takes the original URL and modifies it by pre-pending a "not-" to the
        filename, then performs a request to that URL and compares the original
        response with the modified one. If they are equal then the original
        request is a 404.

        :param http_response: The original HTTP response
        :param html_body: The original HTML body after passing it by a cleaner

        :return: True if the original response was a 404 !
        '''
        response_url = http_response.get_url()
        filename = response_url.get_file_name()
        if not filename:
            relative_url = '../%s/' % rand_alnum(8)
            url_404 = response_url.url_join(relative_url)
        else:
            relative_url = 'not-%s' % filename
            url_404 = response_url.url_join(relative_url)

        response_404 = self._send_404(url_404, store=False)
        clean_response_404_body = get_clean_body(response_404)

        if response_404.get_code() == 404 and \
                url_404.get_domain_path() not in self._directory_uses_404_codes:
            self._directory_uses_404_codes.add(url_404.get_domain_path())

        return relative_distance_ge(clean_response_404_body, html_body, IS_EQUAL_RATIO)
Ejemplo n.º 3
0
    def _single_404_check(self, http_response, html_body):
        '''
        Performs a very simple check to verify if this response is a 404 or not.
        
        It takes the original URL and modifies it by pre-pending a "not-" to the
        filename, then performs a request to that URL and compares the original
        response with the modified one. If they are equal then the original
        request is a 404.
        
        @param http_response: The original HTTP response
        @param html_body: The original HTML body after passing it by a cleaner
        
        @return: True if the original response was a 404 !
        '''
        response_url = http_response.getURL()
        filename = response_url.getFileName()
        if not filename:
            relative_url = '../%s/' % createRandAlNum( 8 )
            url_404 = response_url.urlJoin( relative_url )
        else:
            relative_url = 'not-%s' % filename 
            url_404 = response_url.urlJoin( relative_url )

        response_404 = self._send_404( url_404, store=False )
        clean_response_404_body = get_clean_body(response_404)
        
        return relative_distance_ge(clean_response_404_body, html_body, IS_EQUAL_RATIO)
Ejemplo n.º 4
0
    def _single_404_check(self, http_response, html_body):
        '''
        Performs a very simple check to verify if this response is a 404 or not.

        It takes the original URL and modifies it by pre-pending a "not-" to the
        filename, then performs a request to that URL and compares the original
        response with the modified one. If they are equal then the original
        request is a 404.

        :param http_response: The original HTTP response
        :param html_body: The original HTML body after passing it by a cleaner

        :return: True if the original response was a 404 !
        '''
        response_url = http_response.get_url()
        filename = response_url.get_file_name()
        if not filename:
            relative_url = '../%s/' % rand_alnum(8)
            url_404 = response_url.url_join(relative_url)
        else:
            relative_url = 'not-%s' % filename
            url_404 = response_url.url_join(relative_url)

        response_404 = self._send_404(url_404, store=False)
        clean_response_404_body = get_clean_body(response_404)

        if response_404.get_code() == 404 and \
                url_404.get_domain_path() not in self._directory_uses_404_codes:
            self._directory_uses_404_codes.add(url_404.get_domain_path())

        return relative_distance_ge(clean_response_404_body, html_body,
                                    IS_EQUAL_RATIO)
Ejemplo n.º 5
0
 def test_prox_req_ok(self):
     '''Test if the responses either using a proxy or not are the same'''
     # Get response using the proxy
     proxy_resp = self.proxy_opener.open('http://moth').read()
     # Get it the other way
     resp = urllib2.urlopen('http://moth').read()
     # They must be very similar
     self.assertTrue(relative_distance_ge(resp, proxy_resp, 0.9))
Ejemplo n.º 6
0
    def _generate_404_knowledge( self, url ):
        '''
        Based on a URL, request something that we know is going to be a 404.
        Afterwards analyze the 404's and summarise them.
        
        @return: A list with 404 bodies.
        '''
        # Get the filename extension and create a 404 for it
        extension = urlParser.getExtension( url )
        domain_path = urlParser.getDomainPath( url )
        
        # the result
        self._response_body_list = []
        
        #
        #   This is a list of the most common handlers, in some configurations, the 404
        #   depends on the handler, so I want to make sure that I catch the 404 for each one
        #
        handlers = ['py', 'php', 'asp', 'aspx', 'do', 'jsp', 'rb', 'do', 'gif', 'htm', extension]
        handlers += ['pl', 'cgi', 'xhtml', 'htmls']
        handlers = list(set(handlers))
        
        for extension in handlers:

            rand_alnum_file = createRandAlNum( 8 ) + '.' + extension
                
            url404 = urlParser.urlJoin(  domain_path , rand_alnum_file )

            #   Send the requests using threads:
            targs = ( url404,  )
            tm.startFunction( target=self._send_404, args=targs , ownerObj=self )
            
        # Wait for all threads to finish sending the requests.
        tm.join( self )
        
        #
        #   I have the bodies in self._response_body_list , but maybe they all look the same, so I'll
        #   filter the ones that look alike.
        #
        result = [ self._response_body_list[0], ]
        for i in self._response_body_list:
            for j in self._response_body_list:
                
                if relative_distance_ge(i, j, IS_EQUAL_RATIO):
                    # They are equal, we are ok with that
                    continue
                else:
                    # They are no equal, this means that we'll have to add this to the list
                    result.append(j)
        
        # I don't need these anymore
        self._response_body_list = None
        
        # And I return the ones I need
        result = list(set(result))
        om.out.debug('The 404 body result database has a lenght of ' + str(len(result)) +'.')
        
        return result
Ejemplo n.º 7
0
 def _matches_failed_login(self, resp_body, login_failed_result_list):
     '''
     :return: True if the resp_body matches the previously created
     responses that are stored in self._login_failed_result_list.
     '''
     for login_failed_result in login_failed_result_list:
         if relative_distance_ge(resp_body, login_failed_result, 0.65):
             return True
     else:
         # I'm happy! The response_body *IS NOT* a failed login page.
         return False
Ejemplo n.º 8
0
    def _find_OS(self, fuzzableRequest):
        '''
        Analyze responses and determine if remote web server runs on windows or *nix
        @Return: None, the knowledge is saved in the knowledgeBase
        '''
        found_os = False
        freq_url = fuzzableRequest.getURL() 
        filename = freq_url.getFileName()
        dirs = freq_url.getDirectories()[:-1] # Skipping "domain level" dir.
        
        if dirs and filename:
            
            last_url = dirs[-1]
            last_url = last_url.url_string
            
            windows_url = url_object(last_url[0:-1] + '\\' + filename)
            windows_response = self._uri_opener.GET(windows_url)
            
            original_response = self._uri_opener.GET(freq_url)
            found_os = True

            if relative_distance_ge(original_response.getBody(),
                                    windows_response.getBody(), 0.98):
                i = info.info()
                i.setPluginName(self.getName())
                i.setName('Operating system')
                i.setURL( windows_response.getURL() )
                i.setMethod( 'GET' )
                i.setDesc('Fingerprinted this host as a Microsoft Windows system.' )
                i.setId( [windows_response.id, original_response.id] )
                kb.kb.append( self, 'operating_system_str', 'windows' )
                kb.kb.append( self, 'operating_system', i )
                om.out.information( i.getDesc() )
            else:
                i = info.info()
                i.setPluginName(self.getName())
                i.setName('Operating system')
                i.setURL( original_response.getURL() )
                i.setMethod( 'GET' )
                msg = 'Fingerprinted this host as a *nix system. Detection for this operating'
                msg += ' system is weak, "if not windows: is linux".'
                i.setDesc( msg )
                i.setId( [original_response.id, windows_response.id] )
                kb.kb.append( self, 'operating_system_str', 'unix' )
                kb.kb.append( self, 'operating_system', i )
                om.out.information( i.getDesc() )
        
        return found_os
Ejemplo n.º 9
0
    def _find_OS(self, fuzzableRequest):
        """
        Analyze responses and determine if remote web server runs on windows or *nix
        @Return: None, the knowledge is saved in the knowledgeBase
        """
        dirs = fuzzableRequest.getURL().getDirectories()
        filename = fuzzableRequest.getURL().getFileName()

        if len(dirs) > 1 and filename:

            last_url = dirs[-1]
            last_url = last_url.url_string

            windows_url = url_object(last_url[0:-1] + "\\" + filename)
            windows_response = self._urlOpener.GET(windows_url)

            original_response = self._urlOpener.GET(fuzzableRequest.getURL())
            self._found_OS = True

            if relative_distance_ge(original_response.getBody(), windows_response.getBody(), 0.98):
                i = info.info()
                i.setPluginName(self.getName())
                i.setName("Operating system")
                i.setURL(windows_response.getURL())
                i.setMethod("GET")
                i.setDesc("Fingerprinted this host as a Microsoft Windows system.")
                i.setId([windows_response.id, original_response.id])
                kb.kb.append(self, "operating_system_str", "windows")
                kb.kb.append(self, "operating_system", i)
                om.out.information(i.getDesc())
            else:
                i = info.info()
                i.setPluginName(self.getName())
                i.setName("Operating system")
                i.setURL(original_response.getURL())
                i.setMethod("GET")
                msg = "Fingerprinted this host as a *nix system. Detection for this operating"
                msg += ' system is weak, "if not windows: is linux".'
                i.setDesc(msg)
                i.setId([original_response.id, windows_response.id])
                kb.kb.append(self, "operating_system_str", "unix")
                kb.kb.append(self, "operating_system", i)
                om.out.information(i.getDesc())
Ejemplo n.º 10
0
    def _find_OS(self, fuzzable_request):
        '''
        Analyze responses and determine if remote web server runs on windows
        or *nix.

        @Return: None, the knowledge is saved in the knowledgeBase
        '''
        freq_url = fuzzable_request.get_url()
        filename = freq_url.get_file_name()
        dirs = freq_url.get_directories()[:-1]  # Skipping "domain level" dir.

        if dirs and filename:

            last_url = dirs[-1]
            last_url = last_url.url_string

            windows_url = URL(last_url[0:-1] + '\\' + filename)
            windows_response = self._uri_opener.GET(windows_url)

            original_response = self._uri_opener.GET(freq_url)

            if relative_distance_ge(original_response.get_body(),
                                    windows_response.get_body(), 0.98):
                desc = 'Fingerprinted this host as a Microsoft Windows system.'
                os_str = 'windows'
            else:
                desc = 'Fingerprinted this host as a *nix system. Detection for'\
                       ' this operating system is weak, "if not windows then'\
                       ' linux".'
                os_str = 'unix'

            response_ids = [windows_response.id, original_response.id]
            i = Info('Operating system', desc, response_ids,
                     self.get_name())
            i.set_url(windows_response.get_url())
            
            kb.kb.raw_write(self, 'operating_system_str', os_str)
            kb.kb.append(self, 'operating_system', i)
            om.out.information(i.get_desc())
            return True

        return False
Ejemplo n.º 11
0
    def _find_OS(self, fuzzable_request):
        '''
        Analyze responses and determine if remote web server runs on windows
        or *nix.

        @Return: None, the knowledge is saved in the knowledgeBase
        '''
        freq_url = fuzzable_request.get_url()
        filename = freq_url.get_file_name()
        dirs = freq_url.get_directories()[:-1]  # Skipping "domain level" dir.

        if dirs and filename:

            last_url = dirs[-1]
            last_url = last_url.url_string

            windows_url = URL(last_url[0:-1] + '\\' + filename)
            windows_response = self._uri_opener.GET(windows_url)

            original_response = self._uri_opener.GET(freq_url)

            if relative_distance_ge(original_response.get_body(),
                                    windows_response.get_body(), 0.98):
                desc = 'Fingerprinted this host as a Microsoft Windows system.'
                os_str = 'windows'
            else:
                desc = 'Fingerprinted this host as a *nix system. Detection for'\
                       ' this operating system is weak, "if not windows then'\
                       ' linux".'
                os_str = 'unix'

            response_ids = [windows_response.id, original_response.id]
            i = Info('Operating system', desc, response_ids, self.get_name())
            i.set_url(windows_response.get_url())

            kb.kb.raw_write(self, 'operating_system_str', os_str)
            kb.kb.append(self, 'operating_system', i)
            om.out.information(i.get_desc())
            return True

        return False
Ejemplo n.º 12
0
    def _find_OS( self, fuzzableRequest ):
        '''
        Analyze responses and determine if remote web server runs on windows or *nix
        @Return: None, the knowledge is saved in the knowledgeBase
        '''
        dirs = urlParser.getDirectories( fuzzableRequest.getURL() )
        filename = urlParser.getFileName( fuzzableRequest.getURL() )
        if len( dirs ) > 1 and filename:
            last = dirs[-1]
            windowsURL = last[0:-1] + '\\' + filename
            windows_response = self._urlOpener.GET( windowsURL )
            
            original_response = self._urlOpener.GET( fuzzableRequest.getURL() )
            self._found_OS = True

            if relative_distance_ge(original_response.getBody(),
                                    windows_response.getBody(), 0.98):
                i = info.info()
                i.setPluginName(self.getName())
                i.setName('Operating system')
                i.setURL( windows_response.getURL() )
                i.setMethod( 'GET' )
                i.setDesc('Fingerprinted this host as a Microsoft Windows system.' )
                i.setId( [windows_response.id, original_response.id] )
                kb.kb.append( self, 'operating_system_str', 'windows' )
                kb.kb.append( self, 'operating_system', i )
                om.out.information( i.getDesc() )
            else:
                i = info.info()
                i.setPluginName(self.getName())
                i.setName('Operating system')
                i.setURL( original_response.getURL() )
                i.setMethod( 'GET' )
                msg = 'Fingerprinted this host as a *nix system. Detection for this operating'
                msg += ' system is weak, "if not windows: is linux".'
                i.setDesc( msg )
                i.setId( [original_response.id, windows_response.id] )
                kb.kb.append( self, 'operating_system_str', 'unix' )
                kb.kb.append( self, 'operating_system', i )
                om.out.information( i.getDesc() )
Ejemplo n.º 13
0
    def _filter_errors( self, result,  filename ):
        '''
        Filter out ugly php errors and print a simple "Permission denied" or "File not found"
        '''
        filtered = ''
        
        if result.count('<b>Warning</b>'):
            if result.count( 'Permission denied' ):
                filtered = PERMISSION_DENIED
            elif result.count( 'No such file or directory in' ):
                filtered = NO_SUCH_FILE
            elif result.count( 'Not a directory in' ):
                filtered = READ_DIRECTORY
            elif result.count('</a>]: failed to open stream:'):
                filtered = FAILED_STREAM
                
        elif self._application_file_not_found_error is not None:
            #   The application file not found error string that I have has the "not_exist0.txt"
            #   string in it, so I'm going to remove that string from it.
            app_error = self._application_file_not_found_error.replace("not_exist0.txt",  '')
            
            #   The result string has the file I requested inside, so I'm going to remove it.
            trimmed_result = result.replace( filename,  '')
            
            #   Now I compare both strings, if they are VERY similar, then filename is a non 
            #   existing file.
            if relative_distance_ge(app_error, trimmed_result, 0.9):
                filtered = NO_SUCH_FILE

        #
        #   I want this function to return an empty string on errors. Not the error itself.
        #
        if filtered != '':
            return ''
        
        return result
Ejemplo n.º 14
0
    def _filter_errors(self, result, filename):
        '''
        Filter out ugly php errors and print a simple "Permission denied"
        or "File not found"
        '''
        #print filename
        error = None

        if result.count('Permission denied'):
            error = PERMISSION_DENIED
        elif result.count('No such file or directory in'):
            error = NO_SUCH_FILE
        elif result.count('Not a directory in'):
            error = READ_DIRECTORY
        elif result.count(': failed to open stream: '):
            error = FAILED_STREAM

        elif self._application_file_not_found_error is not None:
            # The result string has the file I requested inside, so I'm going
            # to remove it.
            clean_result = result.replace(filename, '')

            # Now I compare both strings, if they are VERY similar, then
            # filename is a non existing file.
            if relative_distance_ge(self._application_file_not_found_error,
                                    clean_result, 0.9):
                error = NO_SUCH_FILE

        #
        #    I want this function to return an empty string on errors.
        #    Not the error itself.
        #
        if error is not None:
            return ''

        return result
Ejemplo n.º 15
0
    def _filter_errors(self, result, filename):
        '''
        Filter out ugly php errors and print a simple "Permission denied"
        or "File not found"
        '''
        #print filename
        error = None

        if result.count('Permission denied'):
            error = PERMISSION_DENIED
        elif result.count('No such file or directory in'):
            error = NO_SUCH_FILE
        elif result.count('Not a directory in'):
            error = READ_DIRECTORY
        elif result.count(': failed to open stream: '):
            error = FAILED_STREAM

        elif self._application_file_not_found_error is not None:
            # The result string has the file I requested inside, so I'm going
            # to remove it.
            clean_result = result.replace(filename, '')

            # Now I compare both strings, if they are VERY similar, then
            # filename is a non existing file.
            if relative_distance_ge(self._application_file_not_found_error,
                                    clean_result, 0.9):
                error = NO_SUCH_FILE

        #
        #    I want this function to return an empty string on errors.
        #    Not the error itself.
        #
        if error is not None:
            return ''

        return result
Ejemplo n.º 16
0
                        /es/ga.js/google-analytics.com/ga.js/google-analytics.com/ga.js/
                        /ga.js/google-analytics.com/ga.js/google-analytics.com/ga.js/
                        /ga.js/google-analytics.com/ga.js/google-analytics.com/
                        /ga.js/google-analytics.com/ga.js/google-analytics.com/google-analytics.com/ga.js
                        """
                        filename = urlParser.getFileName(reference)
                        if filename:
                            rindex = reference.rindex(filename)
                            # 'ar9k' is just a random string to get a 404
                            new_reference = reference[:rindex] + "ar9k" + reference[rindex:]

                            check_response = self._urlOpener.GET(new_reference, useCache=True, headers=headers)
                            resp_body = response.getBody()
                            check_resp_body = check_response.getBody()

                            if relative_distance_ge(resp_body, check_resp_body, IS_EQUAL_RATIO):
                                # If they are equal, then they are both a 404 (or something invalid)
                                # om.out.debug( reference + ' was broken!')
                                return

                            else:
                                # The URL was possibly_broken, but after testing we found out that
                                # it was not, so not we use it!
                                om.out.debug('Adding relative reference "' + reference + '" to the response.')
                                fuzzable_request_list.extend(
                                    self._createFuzzableRequests(response, request=original_request)
                                )

                    else:  # Not possibly_broken:
                        fuzzable_request_list = self._createFuzzableRequests(response, request=original_request)
Ejemplo n.º 17
0
    def is_404(self, http_response):
        '''
        All of my previous versions of is_404 were very complex and tried to struggle with all
        possible cases. The truth is that in most "strange" cases I was failing miserably, so now
        I changed my 404 detection once again, but keeping it as simple as possible.
        
        Also, and because I was trying to cover ALL CASES, I was performing a lot of
        requests in order to cover them, which in most situations was unnecesary.
        
        So now I go for a much simple approach:
            1- Cover the simplest case of all using only 1 HTTP request
            2- Give the users the power to configure the 404 detection by setting a string that
            identifies the 404 response (in case we are missing it for some reason in case #1)
        
        @parameter http_response: The HTTP response which we want to know if it is a 404 or not.
        '''
    
        #   This is here for testing.
        if self._test_db:
            i = self._test_db_index
            try:
                result = self._test_db[ i ]
                self._test_db_index = i + 1
            except:
                raise Exception('Your test_db is incomplete!')
            else:
                return result

        #
        #   First we handle the user configured exceptions:
        #
        domain_path = http_response.getURL().getDomainPath()
        if domain_path in cf.cf.getData('always404'):
            return True
        elif domain_path in cf.cf.getData('never404'):
            return False        
        
        #
        #   This is the most simple case, we don't even have to think about this.
        #
        #   If there is some custom website that always returns 404 codes, then we are
        #   screwed, but this is open source, and the pentester working on that site can modify
        #   these lines.
        #
        if http_response.getCode() == 404:
            return True
            
        #
        #   The user configured setting. "If this string is in the response, then it is a 404"
        #
        if cf.cf.getData('404string') and cf.cf.getData('404string') in http_response:
            return True
            
        #
        #   Before actually working, I'll check if this response is in the LRU, if it is I just return
        #   the value stored there.
        #
        if http_response.id in self.is_404_LRU:
            return self.is_404_LRU[ http_response.id ]
        
        if self.need_analysis():
            self.generate_404_knowledge( http_response.getURL() )
        
        # self._404_body was already cleaned inside generate_404_knowledge
        # so we need to clean this one.
        html_body = get_clean_body( http_response )
        
        #
        #   Compare this response to all the 404's I have in my DB
        #
        for body_404_db in self._404_bodies:
            
            if relative_distance_ge(body_404_db, html_body, IS_EQUAL_RATIO):
                msg = '"%s" is a 404. [similarity_index > %s]' % \
                    (http_response.getURL(), IS_EQUAL_RATIO)
                om.out.debug(msg)
                self.is_404_LRU[ http_response.id ] = True
                return True
            else:
                # If it is not eq to one of the 404 responses I have in my DB, that does not means
                # that it won't match the next one, so I simply do nothing
                pass
        
        else:
            #
            #   I get here when the for ends and no 404 is matched.
            #
            msg = '"%s" is NOT a 404. [similarity_index < %s]' % \
            (http_response.getURL(), IS_EQUAL_RATIO)
            om.out.debug(msg)
            self.is_404_LRU[ http_response.id ] = False
            return False
Ejemplo n.º 18
0
    def generate_404_knowledge(self, url):
        '''
        Based on a URL, request something that we know is going to be a 404.
        Afterwards analyze the 404's and summarise them.

        :return: A list with 404 bodies.
        '''
        #
        #    This is the case when nobody has properly configured
        #    the object in order to use it.
        #
        if self._uri_opener is None:
            msg = '404 fingerprint database was incorrectly initialized.'
            raise RuntimeError(msg)

        # Get the filename extension and create a 404 for it
        extension = url.get_extension()
        domain_path = url.get_domain_path()

        # the result
        self._response_body_list = []

        #
        #   This is a list of the most common handlers, in some configurations, the 404
        #   depends on the handler, so I want to make sure that I catch the 404 for each one
        #
        handlers = set()
        handlers.update(
            ['py', 'php', 'asp', 'aspx', 'do', 'jsp', 'rb', 'do'])
        handlers.update(
            ['gif', 'htm', 'pl', 'cgi', 'xhtml', 'htmls', 'foobar'])
        if extension:
            handlers.add(extension)

        args_list = []

        for extension in handlers:
            rand_alnum_file = rand_alnum(8) + '.' + extension
            url404 = domain_path.url_join(rand_alnum_file)
            args_list.append(url404)

        self._worker_pool.map(self._send_404, args_list)

        #
        #   I have the bodies in self._response_body_list , but maybe they
        #   all look the same, so I'll filter the ones that look alike.
        #
        result = [self._response_body_list[0], ]
        for i in self._response_body_list:
            for j in self._response_body_list:

                if relative_distance_ge(i, j, IS_EQUAL_RATIO):
                    # They are equal, we are ok with that
                    continue
                else:
                    # They are no equal, this means that we'll have to add this to the list
                    result.append(j)

        # I don't need these anymore
        self._response_body_list = None

        # And I return the ones I need
        result = list(set(result))
        om.out.debug('The 404 body result database has a length of ' +
                     str(len(result)) + '.')

        self._404_bodies = result
        self._already_analyzed = True
        self._fingerprinted_paths.add(domain_path)
Ejemplo n.º 19
0
    def is_404(self, http_response):
        '''
        All of my previous versions of is_404 were very complex and tried to
        struggle with all possible cases. The truth is that in most "strange"
        cases I was failing miserably, so now I changed my 404 detection once
        again, but keeping it as simple as possible.

        Also, and because I was trying to cover ALL CASES, I was performing a
        lot of requests in order to cover them, which in most situations was
        unnecesary.

        So now I go for a much simple approach:
            1- Cover the simplest case of all using only 1 HTTP request
            2- Give the users the power to configure the 404 detection by
               setting a string that identifies the 404 response (in case we
               are missing it for some reason in case #1)

        :param http_response: The HTTP response which we want to know if it
                                  is a 404 or not.
        '''
        #
        #   First we handle the user configured exceptions:
        #
        domain_path = http_response.get_url().get_domain_path()
        if domain_path in cf.cf.get('always_404'):
            return True
        elif domain_path in cf.cf.get('never_404'):
            return False

        #
        #    The user configured setting. "If this string is in the response,
        #    then it is a 404"
        #
        if cf.cf.get('string_match_404') and cf.cf.get('string_match_404') in http_response:
            return True

        #
        #   This is the most simple case, we don't even have to think about this.
        #
        #   If there is some custom website that always returns 404 codes, then we
        #   are screwed, but this is open source, and the pentester working on
        #   that site can modify these lines.
        #
        if http_response.get_code() == 404:
            return True

        #
        #    Simple, if the file we requested is in a directory that's known to
        #    return 404 codes for files that do not exist, AND this is NOT a 404
        #    then we're return False!
        #
        if domain_path in self._directory_uses_404_codes and \
                http_response.get_code() != 404:
            return False

        #
        #   Before actually working, I'll check if this response is in the LRU,
        #   if it is I just return the value stored there.
        #
        if http_response.get_url().get_path() in self.is_404_LRU:
            return self.is_404_LRU[http_response.get_url().get_path()]

        with self._lock:
            if self.need_analysis():
                self.generate_404_knowledge(http_response.get_url())

        # self._404_body was already cleaned inside generate_404_knowledge
        # so we need to clean this one in order to have a fair comparison
        html_body = get_clean_body(http_response)

        #
        #    Compare this response to all the 404's I have in my DB
        #
        #    Note: while self._404_bodies is a list, we can perform this for loop
        #          without "with self._lock", read comments in stackoverflow:
        #          http://stackoverflow.com/questions/9515364/does-python-freeze-the-list-before-for-loop
        #
        for body_404_db in self._404_bodies:

            if relative_distance_ge(body_404_db, html_body, IS_EQUAL_RATIO):
                msg = '"%s" (id:%s) is a 404 [similarity_index > %s]'
                fmt = (
                    http_response.get_url(), http_response.id, IS_EQUAL_RATIO)
                om.out.debug(msg % fmt)
                return self._fingerprinted_as_404(http_response)

        else:
            #
            #    I get here when the for ends and no body_404_db matched with the
            #    html_body that was sent as a parameter by the user. This means one
            #    of two things:
            #        * There is not enough knowledge in self._404_bodies, or
            #        * The answer is NOT a 404.
            #
            #    Because we want to reduce the amount of "false positives" that
            #    this method returns, we'll perform one extra check before saying
            #    that this is NOT a 404.
            if http_response.get_url().get_domain_path() not in self._fingerprinted_paths:
                if self._single_404_check(http_response, html_body):
                    self._404_bodies.append(html_body)
                    self._fingerprinted_paths.add(
                        http_response.get_url().get_domain_path())

                    msg = '"%s" (id:%s) is a 404 (similarity_index > %s). Adding new'
                    msg += ' knowledge to the 404_bodies database (length=%s).'
                    fmt = (http_response.get_url(), http_response.id,
                           IS_EQUAL_RATIO, len(self._404_bodies))
                    om.out.debug(msg % fmt)

                    return self._fingerprinted_as_404(http_response)

            msg = '"%s" (id:%s) is NOT a 404 [similarity_index < %s].'
            fmt = (http_response.get_url(), http_response.id, IS_EQUAL_RATIO)
            om.out.debug(msg % fmt)
            return self._fingerprinted_as_200(http_response)
Ejemplo n.º 20
0
    def generate_404_knowledge(self, url):
        '''
        Based on a URL, request something that we know is going to be a 404.
        Afterwards analyze the 404's and summarise them.

        :return: A list with 404 bodies.
        '''
        #
        #    This is the case when nobody has properly configured
        #    the object in order to use it.
        #
        if self._uri_opener is None:
            msg = '404 fingerprint database was incorrectly initialized.'
            raise RuntimeError(msg)

        # Get the filename extension and create a 404 for it
        extension = url.get_extension()
        domain_path = url.get_domain_path()

        # the result
        self._response_body_list = []

        #
        #   This is a list of the most common handlers, in some configurations, the 404
        #   depends on the handler, so I want to make sure that I catch the 404 for each one
        #
        handlers = set()
        handlers.update(['py', 'php', 'asp', 'aspx', 'do', 'jsp', 'rb', 'do'])
        handlers.update(
            ['gif', 'htm', 'pl', 'cgi', 'xhtml', 'htmls', 'foobar'])
        if extension:
            handlers.add(extension)

        args_list = []

        for extension in handlers:
            rand_alnum_file = rand_alnum(8) + '.' + extension
            url404 = domain_path.url_join(rand_alnum_file)
            args_list.append(url404)

        self._worker_pool.map(self._send_404, args_list)

        #
        #   I have the bodies in self._response_body_list , but maybe they
        #   all look the same, so I'll filter the ones that look alike.
        #
        result = [
            self._response_body_list[0],
        ]
        for i in self._response_body_list:
            for j in self._response_body_list:

                if relative_distance_ge(i, j, IS_EQUAL_RATIO):
                    # They are equal, we are ok with that
                    continue
                else:
                    # They are no equal, this means that we'll have to add this to the list
                    result.append(j)

        # I don't need these anymore
        self._response_body_list = None

        # And I return the ones I need
        result = list(set(result))
        om.out.debug('The 404 body result database has a length of ' +
                     str(len(result)) + '.')

        self._404_bodies = result
        self._already_analyzed = True
        self._fingerprinted_paths.add(domain_path)
Ejemplo n.º 21
0
    def is_404(self, http_response):
        '''
        All of my previous versions of is_404 were very complex and tried to
        struggle with all possible cases. The truth is that in most "strange"
        cases I was failing miserably, so now I changed my 404 detection once
        again, but keeping it as simple as possible.

        Also, and because I was trying to cover ALL CASES, I was performing a
        lot of requests in order to cover them, which in most situations was
        unnecesary.

        So now I go for a much simple approach:
            1- Cover the simplest case of all using only 1 HTTP request
            2- Give the users the power to configure the 404 detection by
               setting a string that identifies the 404 response (in case we
               are missing it for some reason in case #1)

        :param http_response: The HTTP response which we want to know if it
                                  is a 404 or not.
        '''
        #
        #   First we handle the user configured exceptions:
        #
        domain_path = http_response.get_url().get_domain_path()
        if domain_path in cf.cf.get('always_404'):
            return True
        elif domain_path in cf.cf.get('never_404'):
            return False

        #
        #    The user configured setting. "If this string is in the response,
        #    then it is a 404"
        #
        if cf.cf.get('string_match_404') and cf.cf.get(
                'string_match_404') in http_response:
            return True

        #
        #   This is the most simple case, we don't even have to think about this.
        #
        #   If there is some custom website that always returns 404 codes, then we
        #   are screwed, but this is open source, and the pentester working on
        #   that site can modify these lines.
        #
        if http_response.get_code() == 404:
            return True

        #
        #    Simple, if the file we requested is in a directory that's known to
        #    return 404 codes for files that do not exist, AND this is NOT a 404
        #    then we're return False!
        #
        if domain_path in self._directory_uses_404_codes and \
                http_response.get_code() != 404:
            return False

        #
        #   Before actually working, I'll check if this response is in the LRU,
        #   if it is I just return the value stored there.
        #
        if http_response.get_url().get_path() in self.is_404_LRU:
            return self.is_404_LRU[http_response.get_url().get_path()]

        with self._lock:
            if self.need_analysis():
                self.generate_404_knowledge(http_response.get_url())

        # self._404_body was already cleaned inside generate_404_knowledge
        # so we need to clean this one in order to have a fair comparison
        html_body = get_clean_body(http_response)

        #
        #    Compare this response to all the 404's I have in my DB
        #
        #    Note: while self._404_bodies is a list, we can perform this for loop
        #          without "with self._lock", read comments in stackoverflow:
        #          http://stackoverflow.com/questions/9515364/does-python-freeze-the-list-before-for-loop
        #
        for body_404_db in self._404_bodies:

            if relative_distance_ge(body_404_db, html_body, IS_EQUAL_RATIO):
                msg = '"%s" (id:%s) is a 404 [similarity_index > %s]'
                fmt = (http_response.get_url(), http_response.id,
                       IS_EQUAL_RATIO)
                om.out.debug(msg % fmt)
                return self._fingerprinted_as_404(http_response)

        else:
            #
            #    I get here when the for ends and no body_404_db matched with the
            #    html_body that was sent as a parameter by the user. This means one
            #    of two things:
            #        * There is not enough knowledge in self._404_bodies, or
            #        * The answer is NOT a 404.
            #
            #    Because we want to reduce the amount of "false positives" that
            #    this method returns, we'll perform one extra check before saying
            #    that this is NOT a 404.
            if http_response.get_url().get_domain_path(
            ) not in self._fingerprinted_paths:
                if self._single_404_check(http_response, html_body):
                    self._404_bodies.append(html_body)
                    self._fingerprinted_paths.add(
                        http_response.get_url().get_domain_path())

                    msg = '"%s" (id:%s) is a 404 (similarity_index > %s). Adding new'
                    msg += ' knowledge to the 404_bodies database (length=%s).'
                    fmt = (http_response.get_url(), http_response.id,
                           IS_EQUAL_RATIO, len(self._404_bodies))
                    om.out.debug(msg % fmt)

                    return self._fingerprinted_as_404(http_response)

            msg = '"%s" (id:%s) is NOT a 404 [similarity_index < %s].'
            fmt = (http_response.get_url(), http_response.id, IS_EQUAL_RATIO)
            om.out.debug(msg % fmt)
            return self._fingerprinted_as_200(http_response)