Example #1
0
def diff(a, b):
    """
    :param a: A string
    :param b: A string (similar to a)
    :return: Two strings (a_mod, b_mod) which are basically:

                a_mod = a - (a intersection b)
                b_mod = b - (a intersection b)

             Or if you want to see it in another way, the results are the
             parts of the string that make it unique between each other.
    """
    a = smart_str_ignore(a)
    b = smart_str_ignore(b)

    dmp = dmp_module.diff_match_patch()

    changes = dmp.diff_main(a, b, checklines=True, deadline=MAX_DIFF_TIME)

    dmp.diff_cleanupSemantic(changes)

    a_changes = []
    b_changes = []

    for op, change in changes:
        if op == -1:
            a_changes.append(change)

        if op == 1:
            b_changes.append(change)

    a_changes = ''.join(a_changes)
    b_changes = ''.join(b_changes)

    return a_changes, b_changes
Example #2
0
    def _should_analyze(self, response):
        """
        :param response: HTTP response
        :return: True if we should analyze this HTTP response
        """
        #
        # Avoid running this plugin twice on the same URL
        #
        url_hash = hashlib.md5(response.get_url().url_string).hexdigest()
        if url_hash in self._analyzed_hashes:
            return False

        self._analyzed_hashes.add(url_hash)

        #
        # Avoid running this plugin twice on the same file content
        #
        body = smart_str_ignore(response.get_body())
        response_hash = hashlib.md5(body).hexdigest()

        if response_hash in self._analyzed_hashes:
            return False

        self._analyzed_hashes.add(response_hash)
        return True
Example #3
0
def clean_fuzzable_request_form(fuzzable_request,
                                dc_handler=clean_data_container):
    """
    This function will extract data from the fuzzable request and serialize it.

    The main goal of this function is to return a "unique representation"
    of how the HTTP request looks like WITHOUT including the URL.

    Related with https://github.com/andresriancho/w3af/issues/15970

    :param fuzzable_request: The fuzzable request instance to clean
    """
    # Method
    res = [fuzzable_request.get_method().upper()]

    # Type
    raw_data = fuzzable_request.get_raw_data()
    res.append(raw_data.get_type())

    # Query string parameters
    uri = fuzzable_request.get_uri()
    if uri.has_query_string():
        res.append(dc_handler(uri.querystring))
    else:
        res.append('')

    # Post-data parameters
    if raw_data:
        res.append(dc_handler(raw_data))
    else:
        res.append('')

    return '|'.join([smart_str_ignore(s) for s in res])
Example #4
0
    def _upload_file(self, domain_path, rand_file, debugging_id):
        """
        Upload the file using author.dll

        :param domain_path: http://localhost/f00/
        :param rand_file: <random>.html
        """
        # TODO: The frontpage version should be obtained from the information
        # saved in the kb by the infrastructure.frontpage_version plugin!
        #
        # The 4.0.2.4715 version should be dynamic!
        version = '4.0.2.4715'

        file_path = domain_path.get_path() + rand_file

        data = POST_BODY % (version, file_path)
        data += rand_file[::-1]
        data = smart_str_ignore(data)

        target_url = self._get_author_url()

        try:
            res = self._uri_opener.POST(target_url,
                                        data=data,
                                        debugging_id=debugging_id)
        except BaseFrameworkException, e:
            om.out.debug(
                'Exception while uploading file using author.dll: %s' % e)
            return None
Example #5
0
    def get_response_cache_key(self,
                               http_response,
                               clean_response=None,
                               headers=None):

        # When the clean response is available, use that body to calculate the
        # cache key. It has been cleaned (removed request paths and QS parameters)
        # so it has a higher chance of being equal to other responses / being
        # already in the cache
        if clean_response is not None:
            body = clean_response.body
        else:
            body = http_response.body

        cache_key = '%s%s' % (smart_str_ignore(body), headers)
        cache_key = quick_hash(cache_key)

        result = self._cache.get(cache_key, None)

        if result is not None:
            return result

        result = get_response_cache_key(http_response,
                                        clean_response=clean_response,
                                        headers=headers)

        self._cache[cache_key] = result
        return result
Example #6
0
    def is_csrf_token(self, key, value):
        """
        Entropy based algorithm
        http://en.wikipedia.org/wiki/Password_strength
        """
        min_length = 5
        max_length = 512
        min_entropy = 2.4

        # Check length
        if len(value) <= min_length:
            return False

        if len(value) > max_length:
            # I have never seen a CSRF token longer than 256 bytes,
            # doubling that and checking to make sure we don't check
            # parameters which are files in multipart uploads or stuff
            # like that
            return False
        
        # Check for common CSRF token names
        for common_csrf_name in COMMON_CSRF_NAMES:
            if common_csrf_name.lower() in key.lower():
                return True
    
        # Calculate entropy
        entropy = self.shannon_entropy(smart_str_ignore(value))
        if entropy >= min_entropy:
            return True

        return False
Example #7
0
    def log_enabled_plugins(self, plugins_dict, options_dict):
        """
        This method is called from the output manager object. This method should
        take an action for the enabled plugins and their configuration. Usually,
        write the info to a file or print it somewhere.

        :param plugins_dict: A dict with all the plugin types and the enabled
                                plugins for that type of plugin.
        :param options_dict: A dict with the options for every plugin.
        """
        now = time.localtime(time.time())
        the_time = time.strftime("%c", now)
        timestamp = '[ %s - Enabled plugins ] ' % the_time

        to_print = ''

        for plugin_type in plugins_dict:
            to_print += self._create_plugin_info(plugin_type,
                                                 plugins_dict[plugin_type],
                                                 options_dict[plugin_type])

        # And now the target information
        str_targets = ', '.join(
            smart_str_ignore(u.url_string) for u in cf.cf.get('targets'))
        to_print += 'target\n'
        to_print += '    set target ' + str_targets + '\n'
        to_print += '    back'

        to_print = to_print.replace('\n', '\n' + timestamp) + '\n'

        self._write_to_file(timestamp + to_print)
Example #8
0
def clean_fuzzable_request_form(fuzzable_request, dc_handler=clean_data_container):
    """
    This function will extract data from the fuzzable request and serialize it.

    The main goal of this function is to return a "unique representation"
    of how the HTTP request looks like WITHOUT including the URL.

    Related with https://github.com/andresriancho/w3af/issues/15970

    :param fuzzable_request: The fuzzable request instance to clean
    """
    # Method
    res = [fuzzable_request.get_method().upper()]

    # Type
    raw_data = fuzzable_request.get_raw_data()
    res.append(raw_data.get_type())

    # Query string parameters
    uri = fuzzable_request.get_uri()
    if uri.has_query_string():
        res.append(dc_handler(uri.querystring))
    else:
        res.append('')

    # Post-data parameters
    if raw_data:
        res.append(dc_handler(raw_data))
    else:
        res.append('')

    return '|'.join([smart_str_ignore(s) for s in res])
Example #9
0
    def is_csrf_token(self, key, value):
        """
        Entropy based algorithm
        http://en.wikipedia.org/wiki/Password_strength
        """
        min_length = 5
        max_length = 512
        min_entropy = 2.4

        # Check length
        if len(value) <= min_length:
            return False

        if len(value) > max_length:
            # I have never seen a CSRF token longer than 256 bytes,
            # doubling that and checking to make sure we don't check
            # parameters which are files in multipart uploads or stuff
            # like that
            return False

        # Check for common CSRF token names
        for common_csrf_name in COMMON_CSRF_NAMES:
            if common_csrf_name.lower() in key.lower():
                return True

        # Calculate entropy
        entropy = self.shannon_entropy(smart_str_ignore(value))
        if entropy >= min_entropy:
            return True

        return False
Example #10
0
File: xxe.py Project: zsdlove/w3af
    def _parse_xml(self, original_value):
        """
        Parse the XML into an object

        :param original_value: The XML as sent by the application
        :return: The XML object or None if parsing failed
        """
        # This is a safety measure to prevent us from loading large XML files
        # into memory (high memory usage) or loading a very complex xml which
        # might require a lot of CPU time
        if len(original_value) > 1024 * 1024:
            return None

        # Secure, don't introduce XXE in our XXE detection plugin ;-)
        parser = etree.XMLParser(load_dtd=False,
                                 no_network=True,
                                 resolve_entities=False)

        try:
            xml_root = etree.fromstring(smart_str_ignore(original_value),
                                        parser=parser)
        except Exception, e:
            msg = 'Failed to parse XML to inject XXE tests. Exception was: "%s"'
            om.out.debug(msg % e)
            return None
Example #11
0
    def _upload_file(self, domain_path, rand_file, debugging_id):
        """
        Upload the file using author.dll

        :param domain_path: http://localhost/f00/
        :param rand_file: <random>.html
        """
        # TODO: The frontpage version should be obtained from the information
        # saved in the kb by the infrastructure.frontpage_version plugin!
        #
        # The 4.0.2.4715 version should be dynamic!
        version = '4.0.2.4715'

        file_path = domain_path.get_path() + rand_file

        data = POST_BODY % (version, file_path)
        data += rand_file[::-1]
        data = smart_str_ignore(data)

        target_url = self._get_author_url()

        try:
            res = self._uri_opener.POST(target_url,
                                        data=data,
                                        debugging_id=debugging_id)
        except BaseFrameworkException, e:
            om.out.debug('Exception while uploading file using author.dll: %s' % e)
            return None
Example #12
0
    def _get_all_parameters(self, request):
        """
        :param request: The HTTP request
        :yield: All the HTTP request parameters as tuples of (name, value)
        """
        headers = request.get_headers()
        query_string = request.get_uri().get_querystring()
        dc = dc_from_hdrs_post(headers, request.get_data())

        cookie_str, _ = headers.iget('cookie', '')
        cookie_dc = Cookie(cookie_str)

        token_generators = itertools.chain(query_string.iter_tokens(),
                                           dc.iter_tokens(),
                                           headers.iter_tokens(),
                                           cookie_dc.iter_tokens())

        for token in token_generators:
            token_name = token.get_name()

            token_value = token.get_value()
            token_value = smart_str_ignore(token_value)

            yield token_name, token_value

            # Handle the case where the parameter is base64 encoded
            is_b64, decoded_data = maybe_decode_base64(token_value)
            if is_b64:
                yield token_name, decoded_data
Example #13
0
def get_response_cache_key(http_response, clean_response=None, headers=None):
    """
    Note: query.body has been cleaned by get_clean_body()

    :param http_response: The HTTP response we want to get a cache key for

    :param clean_response: The FourOhFourResponse associated with the HTTPResponse
                           passed as parameter (optional, will be calculated if not
                           provided)

    :param headers: A string containing the HTTP response headers that have to be
                    used to calculate the hash

    :return: Hash of the HTTP response body
    """
    headers = '' or headers

    #
    # Only some HTTP responses benefit from the XML-bones signature
    #
    if _should_use_xml_bones(http_response):
        body = get_xml_bones(http_response.get_body())
        normalized_path = FourOhFourResponse.normalize_path(
            http_response.get_uri())
    else:
        #
        # Get a clean_response if it was not provided
        #
        if clean_response is None:
            clean_response = FourOhFourResponse.from_http_response(
                http_response)

        body = clean_response.body
        normalized_path = clean_response.normalized_path

    #
    # Calculate the hash using all the captured information
    #
    key = ''.join([
        str(http_response.get_code()),
        smart_str_ignore(normalized_path),
        str(headers),
        smart_str_ignore(body)
    ])

    return quick_hash(key)
Example #14
0
    def get_hash(self, exclude_headers=None):
        exclude_headers = [] or exclude_headers

        headers = self.dump_response_head(exclude_headers=exclude_headers)
        body = smart_str_ignore(self.get_body())

        args = (headers, body)
        dump = '%s%s' % args

        return self._quick_hash(dump)
Example #15
0
    def found_at(self):
        """
        Return a string representing WHAT was fuzzed. This string
        is used like this:
            - v.set_desc('SQL injection was found at: ' + mutant.found_at())
        """
        dc = self.get_dc()
        dc_short = dc.get_short_printable_repr()
        token = dc.get_token()

        msg = '"%s", using HTTP method %s. The sent data was: "%s"'
        msg %= (smart_str_ignore(self.get_url()),
                smart_str_ignore(self.get_method()),
                smart_str_ignore(dc_short))

        if token is not None:
            msg += ' The modified parameter was "%s".' % smart_str_ignore(token.get_name())

        return msg
Example #16
0
    def found_at(self):
        """
        Return a string representing WHAT was fuzzed. This string
        is used like this:
            - v.set_desc('SQL injection was found at: ' + mutant.found_at())
        """
        dc = self.get_dc()
        dc_short = dc.get_short_printable_repr()
        token = dc.get_token()

        msg = '"%s", using HTTP method %s. The sent data was: "%s"'
        msg %= (smart_str_ignore(self.get_url()),
                smart_str_ignore(self.get_method()),
                smart_str_ignore(dc_short))

        if token is not None:
            msg += ' The modified parameter was "%s".' % smart_str_ignore(
                token.get_name())

        return msg
Example #17
0
    def set_token(self, set_token_path):
        """
        Sets the token in the DataContainer to point to the variable specified
        in set_token_path. Usually set_token_path will be one of:
            * ('id',) - When the data container doesn't support repeated params
            * ('id', 3) - When it does
            * A DataToken instance which holds the path

        :raises: An exception when the DataContainer does NOT contain the
                 specified path in *args to find the variable
        :return: The token if we were able to set it in the DataContainer
        """
        override_token = False
        try:
            # Try to get the path from the parameter, if it is a DataToken
            # instance this will succeed.
            token_path = set_token_path.get_path()
            override_token = True
        except AttributeError:
            token_path = set_token_path

        for key, val, i_token_path, setter in self.iter_setters():
            if i_token_path == token_path:

                if override_token:
                    # Use token provided in parameter
                    token = set_token_path

                elif isinstance(val, DataToken):
                    # We've already done a set_token(...) for this token path
                    # in the past, and now we're doing it again. Don't double
                    # wrap the pre-existing token!
                    token = val
                else:
                    token = DataToken(key, val, i_token_path)

                setter(token)
                self.token = token

                return token

        path_str = lambda path: '(%s)' % ', '.join(
            [smart_str_ignore(i) for i in path])
        ppath = path_str(token_path)
        vpath = ' - '.join([path_str(p) for _, _, p, _ in self.iter_setters()])

        if vpath:
            msg = 'Invalid token path "%s". Valid paths are: %s'
            raise RuntimeError(msg % (ppath, vpath))
        else:
            msg = 'Invalid token path "%s". No valid paths for "%s"'
            raise RuntimeError(msg % (ppath, self.get_type()))
Example #18
0
    def _save_response_to_file(self, response):
        # Note: The file needs to have .js extension to force retirejs to
        #       scan it. Any other extension will be ignored.
        response_file = tempfile.NamedTemporaryFile(prefix='retirejs-response-',
                                                    suffix='.w3af.js',
                                                    delete=False,
                                                    dir=self._get_js_temp_directory())

        body = smart_str_ignore(response.get_body())
        response_file.write(body)
        response_file.close()

        return response_file.name
Example #19
0
    def set_token(self, set_token_path):
        """
        Sets the token in the DataContainer to point to the variable specified
        in set_token_path. Usually set_token_path will be one of:
            * ('id',) - When the data container doesn't support repeated params
            * ('id', 3) - When it does
            * A DataToken instance which holds the path

        :raises: An exception when the DataContainer does NOT contain the
                 specified path in *args to find the variable
        :return: The token if we were able to set it in the DataContainer
        """
        override_token = False
        try:
            # Try to get the path from the parameter, if it is a DataToken
            # instance this will succeed.
            token_path = set_token_path.get_path()
            override_token = True
        except AttributeError:
            token_path = set_token_path

        for key, val, i_token_path, setter in self.iter_setters():
            if i_token_path == token_path:

                if override_token:
                    # Use token provided in parameter
                    token = set_token_path

                elif isinstance(val, DataToken):
                    # We've already done a set_token(...) for this token path
                    # in the past, and now we're doing it again. Don't double
                    # wrap the pre-existing token!
                    token = val
                else:
                    token = DataToken(key, val, i_token_path)

                setter(token)
                self.token = token

                return token

        path_str = lambda path: '(%s)' % ', '.join([smart_str_ignore(i) for i in path])
        ppath = path_str(token_path)
        vpath = ' - '.join([path_str(p) for _, _, p, _ in self.iter_setters()])

        if vpath:
            msg = 'Invalid token path "%s". Valid paths are: %s'
            raise RuntimeError(msg % (ppath, vpath))
        else:
            msg = 'Invalid token path "%s". No valid paths for "%s"'
            raise RuntimeError(msg % (ppath, self.get_type()))
Example #20
0
    def _analyze_domain(self, response, script_full_url, script_tag):
        """
        Checks if the domain is the same, or if it's considered secure.
        """
        url = response.get_url()
        script_domain = script_full_url.get_domain()

        if script_domain == response.get_url().get_domain():
            return

        for secure_domain in self._secure_js_domains:
            # We do a "in" because the secure js domains list contains
            # entries such as ".google." which should be match. This is to
            # take into account things like ".google.com.br" without having
            # to list all of them.
            #
            # Not the best, could raise some false negatives, but... bleh!
            if secure_domain in script_domain:
                # It's a third party that we trust
                return

        to_highlight = script_tag.attrib.get('src')
        desc = ('The URL: "%s" has a script tag with a source that points'
                ' to a third party site ("%s"). This practice is not'
                ' recommended, the security of the current site is being'
                ' delegated to the external entity.')
        desc %= (smart_str_ignore(url), smart_str_ignore(script_domain))

        i = Info('Cross-domain javascript source', desc, response.id,
                 self.get_name())
        i.set_url(url)
        i.add_to_highlight(to_highlight)
        i[CrossDomainInfoSet.ITAG] = script_domain

        self.kb_append_uniq_group(self,
                                  'cross_domain_js',
                                  i,
                                  group_klass=CrossDomainInfoSet)
Example #21
0
def diff(a, b):
    """
    :param a: A string
    :param b: A string (similar to a)
    :return: Two strings (a_mod, b_mod) which are basically:

                a_mod = a - (a intersection b)
                b_mod = b - (a intersection b)

             Or if you want to see it in another way, the results are the
             parts of the string that make it unique between each other.
    """
    a = smart_str_ignore(a)
    b = smart_str_ignore(b)

    dmp = dmp_module.diff_match_patch()
    dmp.Diff_Timeout = MAX_DIFF_TIME

    changes = dmp.diff_main(a,
                            b,
                            checklines=True)

    dmp.diff_cleanupSemantic(changes)

    a_changes = []
    b_changes = []

    for op, change in changes:
        if op == -1:
            a_changes.append(change)

        if op == 1:
            b_changes.append(change)

    a_changes = '\n'.join(a_changes)
    b_changes = '\n'.join(b_changes)

    return a_changes, b_changes
Example #22
0
def filter_non_printable(_str):
    chars = []

    for c in smart_str_ignore(_str):
        if is_printable_chr(c):
            chars.append(c)
        else:
            if not chars:
                chars.append(NON_PRINTABLE_REPLACE)

            elif chars[-1] != NON_PRINTABLE_REPLACE:
                chars.append(NON_PRINTABLE_REPLACE)

    return ''.join(chars)
Example #23
0
def filter_non_printable(_str):
    chars = []

    for c in smart_str_ignore(_str):
        if is_printable_chr(c):
            chars.append(c)
        else:
            if not chars:
                chars.append(NON_PRINTABLE_REPLACE)

            elif chars[-1] != NON_PRINTABLE_REPLACE:
                chars.append(NON_PRINTABLE_REPLACE)

    return ''.join(chars)
Example #24
0
    def _analyze_domain(self, response, script_full_url, script_tag):
        """
        Checks if the domain is the same, or if it's considered secure.
        """
        url = response.get_url()
        script_domain = script_full_url.get_domain()

        if script_domain == response.get_url().get_domain():
            return

        for secure_domain in self._secure_js_domains:
            # We do a "in" because the secure js domains list contains
            # entries such as ".google." which should be match. This is to
            # take into account things like ".google.com.br" without having
            # to list all of them.
            #
            # Not the best, could raise some false negatives, but... bleh!
            if secure_domain in script_domain:
                # It's a third party that we trust
                return

        to_highlight = script_tag.attrib.get('src')
        desc = ('The URL: "%s" has a script tag with a source that points'
                ' to a third party site ("%s"). This practice is not'
                ' recommended, the security of the current site is being'
                ' delegated to the external entity.')
        desc %= (smart_str_ignore(url),
                 smart_str_ignore(script_domain))

        i = Info('Cross-domain javascript source', desc,
                 response.id, self.get_name())
        i.set_url(url)
        i.add_to_highlight(to_highlight)
        i[CrossDomainInfoSet.ITAG] = script_domain

        self.kb_append_uniq_group(self, 'cross_domain_js', i,
                                  group_klass=CrossDomainInfoSet)
Example #25
0
    def _analyze_domain(self, response, script_full_url, script_tag):
        """
        Checks if the domain is the same, or if it's considered secure.
        """
        response_url = response.get_url()
        script_domain = script_full_url.get_domain()

        if script_domain == response_url.get_domain():
            return

        for _ in self._secure_domain_multi_in.query(script_domain):
            # Query the multi in to check if any if the domains we loaded
            # previously match against the script domain we found in the
            # HTML.
            #
            # It's a third party that we trust
            return

        to_highlight = script_tag.attrib.get('src')
        desc = ('The URL: "%s" has a script tag with a source that points'
                ' to a third party site ("%s"). This practice is not'
                ' recommended, the security of the current site is being'
                ' delegated to the external entity.')
        desc %= (smart_str_ignore(response_url),
                 smart_str_ignore(script_domain))

        i = Info('Cross-domain javascript source', desc, response.id,
                 self.get_name())
        i.set_url(response_url)
        i.add_to_highlight(to_highlight)
        i[CrossDomainInfoSet.ITAG] = script_domain

        self.kb_append_uniq_group(self,
                                  'cross_domain_js',
                                  i,
                                  group_klass=CrossDomainInfoSet)
Example #26
0
    def _get_cache_key(self, mutant):
        #
        # Get the cache key for this mutant
        #
        method = mutant.get_method()
        uri = mutant.get_uri()
        data = mutant.get_data()
        headers = mutant.get_all_headers()

        cache_key_parts = [method, uri, data, headers]
        cache_key_str = ''.join([smart_str_ignore(i) for i in cache_key_parts])

        m = hashlib.md5()
        m.update(cache_key_str)
        return m.hexdigest()
Example #27
0
    def _get_cache_key(self, mutant):
        #
        # Get the cache key for this mutant
        #
        method = mutant.get_method()
        uri = mutant.get_uri()
        data = mutant.get_data()
        headers = mutant.get_all_headers()

        cache_key_parts = [method, uri, data, headers]
        cache_key_str = ''.join([smart_str_ignore(i) for i in cache_key_parts])

        m = hashlib.md5()
        m.update(cache_key_str)
        return m.hexdigest()
Example #28
0
    def _analyze_strange(self, request, response, ref, token_name, token_value):
        if self._is_strange(request, token_name, token_value):
            desc = ('The URI: "%s" has a parameter named: "%s" with value:'
                    ' "%s", which is very uncommon. and requires manual'
                    ' verification.')
            args = (response.get_uri(), token_name, token_value)
            args = tuple(smart_str_ignore(i) for i in args)
            desc %= args

            i = Info('Uncommon query string parameter', desc, response.id,
                     self.get_name())
            i['parameter_value'] = token_value
            i.add_to_highlight(token_value)
            i.set_uri(ref)

            self.kb_append(self, 'strange_parameters', i)
            return True

        return False
Example #29
0
    def _analyze_strange(self, request, response, ref, token_name,
                         token_value):
        if self._is_strange(request, token_name, token_value):
            desc = ('The URI: "%s" has a parameter named: "%s" with value:'
                    ' "%s", which is very uncommon. and requires manual'
                    ' verification.')
            args = (response.get_uri(), token_name, token_value)
            args = tuple(smart_str_ignore(i) for i in args)
            desc %= args

            i = Info('Uncommon query string parameter', desc, response.id,
                     self.get_name())
            i['parameter_value'] = token_value
            i.add_to_highlight(token_value)
            i.set_uri(ref)

            self.kb_append(self, 'strange_parameters', i)
            return True

        return False
Example #30
0
    def _parse_xml(self, param_name, original_value):
        """
        Parse the XML into an object

        :param param_name: The name of the parameter as seen by the HTML parser
        :param original_value: The XML as sent by the application
        :return: The XML object or None if parsing failed
        """
        # This is a safety measure to prevent us from loading large XML files
        # into memory (high memory usage) or loading a very complex xml which
        # might require a lot of CPU time
        if len(original_value) > 1024 * 1024:
            return None

        try:
            original_value_str = smart_str_ignore(original_value)
        except Exception, e:
            msg = ('Failed to encode unicode original value to string'
                   ' in _parse_xml(). Exception: "%s"')
            om.out.debug(msg % e)
            return None
Example #31
0
def unique_everseen_hash(iterable):
    """
    List unique elements, preserving order.

    Remember all elements ever seen, storing the hash of the element instead
    of the element itself. This will reduce the memory usage in the case where
    the element is large (an HTTP response body for example).

    Recommendation: The iterable should generate strings / unicode.
    """
    seen = set()

    for element in iterable:
        m = hashlib.md5()
        m.update(smart_str_ignore(element))
        element_hash = m.digest()

        if element_hash in seen:
            continue

        seen.add(element_hash)
        yield element
Example #32
0
    def _parse_xml(self, original_value):
        """
        Parse the XML into an object

        :param original_value: The XML as sent by the application
        :return: The XML object or None if parsing failed
        """
        # This is a safety measure to prevent us from loading large XML files
        # into memory (high memory usage) or loading a very complex xml which
        # might require a lot of CPU time
        if len(original_value) > 1024 * 1024:
            return None

        # Secure, don't introduce XXE in our XXE detection plugin ;-)
        parser = etree.XMLParser(load_dtd=False,
                                 no_network=True,
                                 resolve_entities=False)

        try:
            xml_root = etree.fromstring(smart_str_ignore(original_value), parser=parser)
        except Exception, e:
            msg = 'Failed to parse XML to inject XXE tests. Exception was: "%s"'
            om.out.debug(msg % e)
            return None
Example #33
0
    def sent(self, needle):
        """
        Checks if something similar to `needle` was sent in the request.
        This is used to remove false positives, e.g. if a grep plugin finds a
        "strange" string and wants to be sure it was not generated by an audit
        plugin.

        This method should only be used by grep plugins which often have false
        positives.

        The following example shows that we sent d'z"0 but d\'z"0 will
        as well be recognised as sent

        Note on performance:

            At some point I thought about making all these calls lazy:
                needles.add(unquote(needle))
                needles.add(quote(needle))
                needles.add(quote_plus(needle))
                needles.add(self.make_comp(needle))
                needles.add(self.make_comp(unquote(needle)))

            To avoid the potentially unnecessary call to self.make_comp(...)
            if the needle was found in a haystack before, making the result
            of self.make_comp(...) unnecessary.

            That would help, but the impact in real life is really small, since
            in most scenarios this method will return False, which means that
            all the comparisons need to be done anyways.

        :param needle: The string
        :return: True if something similar was sent
        """
        needle = smart_str_ignore(needle)

        needles = set()
        needles.add(needle)
        needles.add(unquote(needle))
        needles.add(quote(needle))
        needles.add(quote_plus(needle))
        needles.add(self.make_comp(needle))
        needles.add(self.make_comp(unquote(needle)))

        # Filter the short needles
        #
        # We don't want false negatives just because the string is
        # short after making comparable
        needles = {n for n in needles if len(n) >= 3}

        uri = self.get_uri()
        data = smart_str_ignore(self.get_data())
        headers = smart_str_ignore(self.get_all_headers())

        haystacks = set()
        haystacks.add(smart_str_ignore(uri))
        haystacks.add(smart_str_ignore(uri.url_decode()))
        haystacks.add(self.make_comp(smart_str_ignore(uri.url_decode())))
        haystacks.add(data)
        haystacks.add(unquote(data))
        haystacks.add(self.make_comp(data))
        haystacks.add(self.make_comp(unquote(data)))
        haystacks.add(headers)
        haystacks.add(unquote(headers))

        # Filter the short haystacks
        haystacks = {h for h in haystacks if len(h) >= 3}

        haystack = '--'.join(haystacks)

        for needle in needles:
            if needle in haystack:
                return True

        # I didn't send the needle in any way
        return False
Example #34
0
    def _find_bsql(self, mutant, statement_tuple, statement_type):
        """
        Is the main algorithm for finding blind SQL injections.

        :return: A vulnerability object or None if nothing is found
        """
        # shortcuts
        true_statement = statement_tuple[0]
        false_statement = statement_tuple[1]
        send_clean = self._uri_opener.send_clean
        debugging_id = self.get_debugging_id()

        mutant.set_token_value(true_statement)
        true_response, body_true_response = send_clean(mutant,
                                                       debugging_id=debugging_id,
                                                       grep=True)

        mutant.set_token_value(false_statement)
        false_response, body_false_response = send_clean(mutant,
                                                         debugging_id=debugging_id,
                                                         grep=False)

        if body_true_response == body_false_response:
            msg = ('There is NO CHANGE between the true and false responses.'
                   ' NO WAY w3af is going to detect a blind SQL injection'
                   ' using response diffs in this case.')
            self.debug(msg, mutant=mutant)
            return None

        compare_diff = False

        msg = 'Comparing body_true_response and body_false_response.'
        self.debug(msg,
                   statement_type=statement_type,
                   mutant=mutant,
                   response_1=true_response,
                   response_2=false_response)

        if self.equal_with_limit(body_true_response,
                                 body_false_response,
                                 compare_diff):
            #
            # They might be equal because of various reasons, in the best
            # case scenario there IS a blind SQL injection but the % of the
            # HTTP response body controlled by it is so small that the equal
            # ratio is not catching it.
            #
            self.debug('Setting compare_diff to True', mutant=mutant)
            compare_diff = True

        mutant.set_token_value(self.SYNTAX_ERROR)
        syntax_error_response, body_syntax_error_response = send_clean(mutant,
                                                                       debugging_id=debugging_id,
                                                                       grep=False)

        msg = 'Comparing body_true_response and body_syntax_error_response.'
        self.debug(msg,
                   statement_type=statement_type,
                   mutant=mutant,
                   response_1=true_response,
                   response_2=syntax_error_response)

        if self.equal_with_limit(body_true_response,
                                 body_syntax_error_response,
                                 compare_diff):
            return None

        # Check if its a search engine before we dig any deeper...
        search_disambiguator = self._remove_all_special_chars(true_statement)
        mutant.set_token_value(search_disambiguator)
        search_response, body_search_response = send_clean(mutant,
                                                           grep=False,
                                                           debugging_id=debugging_id)

        # If they are equal then we have a search engine
        msg = 'Comparing body_true_response and body_search_response.'
        self.debug(msg,
                   statement_type=statement_type,
                   mutant=mutant,
                   response_1=true_response,
                   response_2=search_response)

        if self.equal_with_limit(body_true_response,
                                 body_search_response,
                                 compare_diff):
            return None

        # Verify the injection!
        statements = self._get_statements(mutant)
        second_true_stm = statements[statement_type][0]
        second_false_stm = statements[statement_type][1]

        mutant.set_token_value(second_true_stm)
        second_true_response, body_second_true_response = send_clean(mutant,
                                                                     grep=False,
                                                                     debugging_id=debugging_id)

        mutant.set_token_value(second_false_stm)
        second_false_response, body_second_false_response = send_clean(mutant,
                                                                       grep=False,
                                                                       debugging_id=debugging_id)

        msg = 'Comparing body_second_true_response and body_true_response.'
        self.debug(msg,
                   statement_type=statement_type,
                   mutant=mutant,
                   response_1=true_response,
                   response_2=second_true_response)

        if not self.equal_with_limit(body_second_true_response,
                                     body_true_response,
                                     compare_diff):
            return None

        msg = 'Comparing body_second_false_response and body_false_response.'
        self.debug(msg,
                   statement_type=statement_type,
                   mutant=mutant,
                   response_1=false_response,
                   response_2=second_false_response)

        if self.equal_with_limit(body_second_false_response,
                                 body_false_response,
                                 compare_diff):
            
            response_ids = [second_false_response.id,
                            second_true_response.id]
            
            desc = ('Blind SQL injection was found at: "%s", using'
                    ' HTTP method %s. The injectable parameter is: "%s"')
            desc %= (smart_str_ignore(mutant.get_url()),
                     smart_str_ignore(mutant.get_method()),
                     smart_str_ignore(mutant.get_token_name()))
            
            v = Vuln.from_mutant('Blind SQL injection vulnerability', desc,
                                 severity.HIGH, response_ids, 'blind_sqli',
                                 mutant)
            
            om.out.debug(v.get_desc())

            v['type'] = statement_type
            v['true_html'] = second_true_response.get_body()
            v['false_html'] = second_false_response.get_body()
            v['error_html'] = syntax_error_response.get_body()
            return v

        return None
Example #35
0
    def _analyze_result(self, mutant, response):
        """
        Analyze results of the _send_mutant method.
        Try to find the local file inclusions.
        """
        #
        #   I will only report the vulnerability once.
        #
        if self._has_bug(mutant):
            return

        #
        #   Identify the vulnerability
        #
        for file_pattern_match in self._find_common_file_fragments(response):
            if file_pattern_match not in mutant.get_original_response_body():
                
                desc = 'Local File Inclusion was found at: %s'
                desc %= mutant.found_at()
                
                v = Vuln.from_mutant('Local file inclusion vulnerability',
                                     desc, severity.MEDIUM, response.id,
                                     self.get_name(), mutant)

                v['file_pattern'] = file_pattern_match
                
                v.add_to_highlight(file_pattern_match)
                self.kb_append_uniq(self, 'lfi', v)
                return

        #
        # If the vulnerability could not be identified by matching strings that
        # commonly appear in "/etc/passwd", then I'll check one more thing...
        # (note that this is run if no vulns were identified)
        #
        # http://host.tld/show_user.php?id=show_user.php
        #
        # The calls to smart_str_ignore fix a UnicodeDecoreError which appears when
        # the token value is a binary string which can't be converted to unicode.
        # This happens, for example, when trying to upload JPG files to a multipart form
        #
        # >>> u'' in '\x80'
        # ...
        # UnicodeDecodeError: 'ascii' codec can't decode byte 0x80 in position 0: ordinal not in range(128)
        #
        filename = smart_str_ignore(mutant.get_url().get_file_name())
        token_value = smart_str_ignore(mutant.get_token_value())

        if filename in token_value:
            match, lang = contains_source_code(response)
            if match:
                # We were able to read the source code of the file that is
                # vulnerable to local file read
                desc = ('An arbitrary local file read vulnerability was'
                        ' found at: %s')
                desc %= mutant.found_at()
                
                v = Vuln.from_mutant('Local file inclusion vulnerability',
                                     desc, severity.MEDIUM, response.id,
                                     self.get_name(), mutant)

                #
                #    Set which part of the source code to match
                #
                match_source_code = match.group(0)
                v['file_pattern'] = match_source_code

                self.kb_append_uniq(self, 'lfi', v)
                return

        #
        #   Check for interesting errors (note that this is run if no vulns were
        #   identified)
        #
        body = response.get_body()
        for _, error_str, _ in self.file_read_error_multi_re.query(body):
            if error_str not in mutant.get_original_response_body():
                desc = 'A file read error was found at: %s'
                desc %= mutant.found_at()
                
                i = Info.from_mutant('File read error', desc, response.id,
                                     self.get_name(), mutant)
                i.add_to_highlight(error_str)
                
                self.kb_append_uniq(self, 'error', i)
    def inspect_data_to_log(self, pool, inspect_data):
        """
        Print the inspect_threads data to the log files

        def get_state(self):
            return {'func_name': self.func_name,
                    'args': self.args,
                    'kwargs': self.kwargs,
                    'start_time': self.start_time,
                    'idle': self.is_idle(),
                    'job': self.job,
                    'worker_id': self.id}

        :return: None
        """
        name = pool.worker_names

        if not len(inspect_data):
            self.write_to_log('No pool workers at %s.' % (name,))
            return

        #
        #   Write the detailed information
        #
        idle_workers = []

        for worker_state in inspect_data:
            if worker_state['idle']:
                idle_workers.append(worker_state)
                continue

            if worker_state['start_time'] is None:
                continue

            spent = time.time() - worker_state['start_time']

            # Save us some disk space and sanity, only log worker state if it has
            # been running for at least 10 seconds
            if spent < 10:
                continue

            parts = []
            for arg in worker_state['args']:
                try:
                    arg_repr = repr(arg)
                except UnicodeEncodeError:
                    arg_str = smart_str_ignore(arg)
                else:
                    arg_str = smart_str_ignore(arg_repr)

                if len(arg_str) > 80:
                    arg_str = arg_str[:80] + "...'"

                parts.append(arg_str)

            args_str = ', '.join(parts)

            short_kwargs = {}
            for key, value in worker_state['kwargs']:
                try:
                    value_repr = repr(value)
                except UnicodeEncodeError:
                    value_str = smart_str_ignore(value)
                else:
                    value_str = smart_str_ignore(value_repr)

                if len(value_str) > 80:
                    value_str = value_str[:80] + "...'"

                short_kwargs[key] = value_str

            kwargs_str = smart_str_ignore(short_kwargs)

            func_name = smart_str_ignore(worker_state['func_name'])
            func_name = self.clean_function_name(func_name)

            message = ('Worker with ID %s(%s) has been running job %s for %.2f seconds.'
                       ' The job is: %s(%s, kwargs=%s)')
            message %= (worker_state['name'],
                        worker_state['worker_id'],
                        worker_state['job'],
                        spent,
                        func_name,
                        args_str,
                        kwargs_str)

            trace = worker_state.get('trace', None)
            if trace is not None:
                message += '. Function call tree: %s' % trace

            self.write_to_log(message)

        #
        #   Write the idle workers all together at the end, this makes
        #   the log easier to read
        #
        for worker_state in idle_workers:
            message = 'Worker with ID %s(%s) is idle.'
            message %= (worker_state['name'], worker_state['worker_id'])
            self.write_to_log(message)

        #
        #   Write some stats
        #
        total_workers = len(inspect_data)
        idle_workers = 0.0

        for worker_state in inspect_data:
            if worker_state['idle']:
                idle_workers += 1

        idle_perc = (idle_workers / total_workers) * 100
        self.write_to_log('%i%% of %s workers are idle.' % (idle_perc, name))
Example #37
0
    def to_string(self):
        """
        :return: An xml node (as a string) representing the HTTP request / response.

        <http-transaction id="...">
            <http-request>
                <status></status>
                <headers>
                    <header>
                        <field></field>
                        <content></content>
                    </header>
                </headers>
                <body content-encoding="base64"></body>
            </http-request>

            <http-response>
                <status></status>
                <headers>
                    <header>
                        <field></field>
                        <content></content>
                    </header>
                </headers>
                <body content-encoding="base64"></body>
            </http-response>
        </http-transaction>

        One of the differences this class has with the previous implementation is
        that the body is always encoded, no matter the content-type. This helps
        prevent encoding issues.
        """
        # Get the data from the cache
        node = self.get_node_from_cache()
        if node is not None:
            return node

        # HistoryItem to get requests/responses
        req_history = HistoryItem()

        # This might raise a DBException in some cases (which I still
        # need to identify and fix). When an exception is raised here
        # the caller needs to handle it by ignoring this part of the
        # HTTP transaction
        request, response = req_history.load_from_file(self._id)

        data = request.get_data() or ''
        b64_encoded_request_body = base64.encodestring(smart_str_ignore(data))

        body = response.get_body() or ''
        b64_encoded_response_body = base64.encodestring(smart_str_ignore(body))

        context = {'id': self._id,
                   'request': {'status': request.get_request_line().strip(),
                               'headers': request.get_headers(),
                               'body': b64_encoded_request_body},
                   'response': {'status': response.get_status_line().strip(),
                                'headers': response.get_headers(),
                                'body': b64_encoded_response_body}}

        context = dotdict(context)

        template = self.get_template(self.TEMPLATE)
        transaction = template.render(context)
        self.save_node_to_cache(transaction)

        return transaction
    def _find_bsql(self, mutant, statement_tuple, statement_type):
        """
        Is the main algorithm for finding blind SQL injections.

        :return: A vulnerability object or None if nothing is found
        """
        # shortcuts
        true_statement = statement_tuple[0]
        false_statement = statement_tuple[1]
        send_clean = self._uri_opener.send_clean
        debugging_id = self.get_debugging_id()

        mutant.set_token_value(true_statement)
        true_response, body_true_response = send_clean(mutant,
                                                       debugging_id=debugging_id,
                                                       grep=True)

        mutant.set_token_value(false_statement)
        false_response, body_false_response = send_clean(mutant,
                                                         debugging_id=debugging_id,
                                                         grep=False)

        if body_true_response == body_false_response:
            msg = ('There is NO CHANGE between the true and false responses.'
                   ' NO WAY w3af is going to detect a blind SQL injection'
                   ' using response diffs in this case.')
            self.debug(msg, mutant=mutant)
            return None

        compare_diff = False

        msg = 'Comparing body_true_response and body_false_response.'
        self.debug(msg,
                   statement_type=statement_type,
                   mutant=mutant,
                   response_1=true_response,
                   response_2=false_response)

        if self.equal_with_limit(body_true_response,
                                 body_false_response,
                                 compare_diff):
            #
            # They might be equal because of various reasons, in the best
            # case scenario there IS a blind SQL injection but the % of the
            # HTTP response body controlled by it is so small that the equal
            # ratio is not catching it.
            #
            self.debug('Setting compare_diff to True', mutant=mutant)
            compare_diff = True

        mutant.set_token_value(self.SYNTAX_ERROR)
        syntax_error_response, body_syntax_error_response = send_clean(mutant,
                                                                       debugging_id=debugging_id,
                                                                       grep=False)

        msg = 'Comparing body_true_response and body_syntax_error_response.'
        self.debug(msg,
                   statement_type=statement_type,
                   mutant=mutant,
                   response_1=true_response,
                   response_2=syntax_error_response)

        if self.equal_with_limit(body_true_response,
                                 body_syntax_error_response,
                                 compare_diff):
            return None

        # Check if its a search engine before we dig any deeper...
        search_disambiguator = self._remove_all_special_chars(true_statement)
        mutant.set_token_value(search_disambiguator)
        search_response, body_search_response = send_clean(mutant,
                                                           grep=False,
                                                           debugging_id=debugging_id)

        # If they are equal then we have a search engine
        msg = 'Comparing body_true_response and body_search_response.'
        self.debug(msg,
                   statement_type=statement_type,
                   mutant=mutant,
                   response_1=true_response,
                   response_2=search_response)

        if self.equal_with_limit(body_true_response,
                                 body_search_response,
                                 compare_diff):
            return None

        # Now a nice trick from real-life. In some search engines when
        # searching for `46" OR "46"="46" OR "46"="46` we get only a
        # couple of results, which I assume is because the search
        # engine is trying to search for more terms.
        #
        # Removing the special characters will make w3af search for
        # `46  OR  46   46  OR  46   46`, which yields many results in
        # the application's search engine, which I assume is because the
        # search engine just needs to match objects with 46 / OR.
        #
        # So, this means that the responses ARE different, but they came
        # from a search engine. The check above is NOT going to catch that
        # and will yield a false positive.
        #
        # If this is not a search engine, or is a search engine with a blind
        # sql injection, the result with `46" OR "46"="46" OR "46"="46` should
        # be have a larger HTTP response body: "all results" should be there.
        #
        # If it is a search engine, then the result for the search string
        # without special characters will be larger.
        if len(body_search_response) * 0.8 > len(body_true_response):
            msg = 'Search engine detected using response length, stop.'
            self.debug(msg,
                       statement_type=statement_type,
                       mutant=mutant,
                       response_1=true_response,
                       response_2=search_response)
            return None

        # Verify the injection!
        statements = self._get_statements(mutant)
        second_true_stm = statements[statement_type][0]
        second_false_stm = statements[statement_type][1]

        mutant.set_token_value(second_true_stm)
        second_true_response, body_second_true_response = send_clean(mutant,
                                                                     grep=False,
                                                                     debugging_id=debugging_id)

        mutant.set_token_value(second_false_stm)
        second_false_response, body_second_false_response = send_clean(mutant,
                                                                       grep=False,
                                                                       debugging_id=debugging_id)

        msg = 'Comparing body_second_true_response and body_true_response.'
        self.debug(msg,
                   statement_type=statement_type,
                   mutant=mutant,
                   response_1=true_response,
                   response_2=second_true_response)

        if not self.equal_with_limit(body_second_true_response,
                                     body_true_response,
                                     compare_diff):
            return None

        msg = 'Comparing body_second_false_response and body_false_response.'
        self.debug(msg,
                   statement_type=statement_type,
                   mutant=mutant,
                   response_1=false_response,
                   response_2=second_false_response)

        if not self.equal_with_limit(body_second_false_response,
                                     body_false_response,
                                     compare_diff):
            return None
            
        response_ids = [second_false_response.id,
                        second_true_response.id]

        desc = ('Blind SQL injection was found at: "%s", using'
                ' HTTP method %s. The injectable parameter is: "%s"')
        desc %= (smart_str_ignore(mutant.get_url()),
                 smart_str_ignore(mutant.get_method()),
                 smart_str_ignore(mutant.get_token_name()))

        v = Vuln.from_mutant('Blind SQL injection vulnerability', desc,
                             severity.HIGH, response_ids, 'blind_sqli',
                             mutant)

        om.out.debug(v.get_desc())
        self.debug(v.get_desc(),
                   statement_type=statement_type,
                   mutant=mutant,
                   response_1=false_response,
                   response_2=second_false_response)

        v['type'] = statement_type
        v['true_html'] = second_true_response.get_body()
        v['false_html'] = second_false_response.get_body()
        v['error_html'] = syntax_error_response.get_body()
        return v
Example #39
0
    def inspect_data_to_log(self, pool, inspect_data):
        """
        Print the inspect_threads data to the log files

        def get_state(self):
            return {'func_name': self.func_name,
                    'args': self.args,
                    'kwargs': self.kwargs,
                    'start_time': self.start_time,
                    'idle': self.is_idle(),
                    'job': self.job,
                    'worker_id': self.id}

        :return: None
        """
        name = pool.worker_names

        if not len(inspect_data):
            self.write_to_log('No pool workers at %s.' % (name, ))
            return

        #
        #   Write the detailed information
        #
        idle_workers = []

        for worker_state in inspect_data:
            if worker_state['idle']:
                idle_workers.append(worker_state)
                continue

            spent = time.time() - worker_state['start_time']

            # Save us some disk space and sanity, only log worker state if it has
            # been running for at least 10 seconds
            if spent < 10:
                continue

            args_str = ', '.join(
                smart_str_ignore(repr(arg)) for arg in worker_state['args'])
            kwargs_str = smart_str_ignore(worker_state['kwargs'])

            func_name = smart_str_ignore(worker_state['func_name'])
            func_name = self.clean_function_name(func_name)

            message = (
                'Worker with ID %s(%s) has been running job %s for %.2f seconds.'
                ' The job is: %s(%s, kwargs=%s)')
            message %= (worker_state['name'], worker_state['worker_id'],
                        worker_state['job'], spent, func_name, args_str,
                        kwargs_str)

            trace = worker_state.get('trace', None)
            if trace is not None:
                message += '. Function call tree: %s' % trace

            self.write_to_log(message)

        #
        #   Write the idle workers all together at the end, this makes
        #   the log easier to read
        #
        for worker_state in idle_workers:
            message = 'Worker with ID %s(%s) is idle.'
            message %= (worker_state['name'], worker_state['worker_id'])
            self.write_to_log(message)

        #
        #   Write some stats
        #
        total_workers = len(inspect_data)
        idle_workers = 0.0

        for worker_state in inspect_data:
            if worker_state['idle']:
                idle_workers += 1

        idle_perc = (idle_workers / total_workers) * 100
        self.write_to_log('%i%% of %s workers are idle.' % (idle_perc, name))
Example #40
0
    def sent(self, needle):
        """
        Checks if something similar to `needle` was sent in the request.
        This is used to remove false positives, e.g. if a grep plugin finds a
        "strange" string and wants to be sure it was not generated by an audit
        plugin.

        This method should only be used by grep plugins which often have false
        positives.

        The following example shows that we sent d'z"0 but d\'z"0 will
        as well be recognised as sent

        Note on performance:

            At some point I thought about making all these calls lazy:
                needles.add(unquote(needle))
                needles.add(quote(needle))
                needles.add(quote_plus(needle))
                needles.add(self.make_comp(needle))
                needles.add(self.make_comp(unquote(needle)))

            To avoid the potentially unnecessary call to self.make_comp(...)
            if the needle was found in a haystack before, making the result
            of self.make_comp(...) unnecessary.

            That would help, but the impact in real life is really small, since
            in most scenarios this method will return False, which means that
            all the comparisons need to be done anyways.

        :param needle: The string
        :return: True if something similar was sent
        """
        needle = smart_str_ignore(needle)

        needles = set()
        needles.add(needle)
        needles.add(unquote(needle))
        needles.add(quote(needle))
        needles.add(quote_plus(needle))
        needles.add(self.make_comp(needle))
        needles.add(self.make_comp(unquote(needle)))

        # Filter the short needles
        #
        # We don't want false negatives just because the string is
        # short after making comparable
        needles = {n for n in needles if len(n) >= 3}

        uri = self.get_uri()
        data = smart_str_ignore(self.get_data())
        headers = smart_str_ignore(self.get_all_headers())

        haystacks = set()
        haystacks.add(smart_str_ignore(uri))
        haystacks.add(smart_str_ignore(uri.url_decode()))
        haystacks.add(self.make_comp(smart_str_ignore(uri.url_decode())))
        haystacks.add(data)
        haystacks.add(unquote(data))
        haystacks.add(self.make_comp(data))
        haystacks.add(self.make_comp(unquote(data)))
        haystacks.add(headers)
        haystacks.add(unquote(headers))

        # Filter the short haystacks
        haystacks = {h for h in haystacks if len(h) >= 3}

        for needle in needles:
            for haystack in haystacks:
                if needle in haystack:
                    return True

        # I didn't send the needle in any way
        return False
Example #41
0
    def should_grep(self, request, response):
        """
        :return: True if I should grep this request/response pair. This method
                 replaces some of the logic that before was in grep_plugin.py,
                 but because of the requirement of a central location to store
                 a bloom filter was moved here.
        """
        if not self._consumer_plugins:
            return False

        self._print_should_grep_stats()

        # This cache is here to avoid a query to the cf each time a request
        # goes to a grep plugin. Given that in the future the cf will be a
        # sqlite database, this is an important improvement.
        if self._target_domains is None:
            self._target_domains = cf.cf.get('target_domains')

        if response.get_url().get_domain() not in self._target_domains:
            self._should_grep_stats['reject-out-of-scope'] += 1
            return False

        #
        # This prevents responses for the same URL from being analyze twice
        #
        # Sometimes the HTTP responses vary in one byte, which will completely
        # break the filter we have implemented below (it uses a hash for
        # the response headers and xml-bones body).
        #
        # This filter is less effective, mainly during the audit phase where the
        # plugins are heavily changing the query-string, but will prevent some HTTP
        # requests and responses from making it to the grep plugins
        #
        if not self._already_analyzed_url.add(response.get_uri()):
            self._should_grep_stats['reject-seen-url'] += 1
            return False

        #
        # This prevents the same HTTP response from being analyze twice
        #
        # The great majority of grep plugins analyze HTTP response bodies,
        # some analyze HTTP response headers, and a very small subset analyzes
        # HTTP requests. Based on these facts it was possible to add these
        # lines to prevent the same HTTP response from being analyzed twice.
        #
        # One of the options I had was to use get_response_cache_key() below,
        # to prevent double processing of HTTP response bodies, but that
        # strategy had more chances of "hiding" some HTTP responses from grep
        # plugins:
        #
        #   * HTTP response A contains header set X and body Y. It will be
        #     processed because it is the first time body Y is seen.
        #
        #   * HTTP response A contains header set Z and body Y. It will be
        #     ignored because Y was already seen.
        #
        # So I decided to use both the headers and body. The filter might be
        # degraded on sites that use HTTP response headers that contain dates
        # or some other value that changes a lot, this issue was reduced by
        # using EXCLUDE_HEADERS_FOR_HASH
        #
        headers = response.dump_headers(
            exclude_headers=self.EXCLUDE_HEADERS_FOR_HASH)
        headers = smart_str_ignore(headers)

        #
        # Note that using cached_get_response_cache_key() here gives a performance
        # boost, this cache uses the HTTP response body and headers (at least some)
        # as a key. In initial tests using this cache strategy made the
        # `test_should_grep_speed` unittest go from 26 to 9 seconds.
        #
        response_hash = self._response_cache_key_cache.get_response_cache_key(
            response, headers=headers)

        if not self._already_analyzed_body.add(response_hash):
            self._should_grep_stats['reject-seen-body'] += 1
            return False

        self._should_grep_stats['accept'] += 1
        return True
    def _find_bsql(self, mutant, statement_tuple, statement_type):
        """
        Is the main algorithm for finding blind SQL injections.

        :return: A vulnerability object or None if nothing is found
        """
        # shortcuts
        true_statement = statement_tuple[0]
        false_statement = statement_tuple[1]
        send_clean = self._uri_opener.send_clean
        debugging_id = self.get_debugging_id()

        mutant.set_token_value(true_statement)
        true_response, body_true_response = send_clean(mutant,
                                                       debugging_id=debugging_id,
                                                       grep=True)

        mutant.set_token_value(false_statement)
        false_response, body_false_response = send_clean(mutant,
                                                         debugging_id=debugging_id,
                                                         grep=False)

        if body_true_response == body_false_response:
            msg = ('There is NO CHANGE between the true and false responses.'
                   ' NO WAY w3af is going to detect a blind SQL injection'
                   ' using response diffs in this case.')
            self.debug(msg, mutant=mutant)
            return None

        compare_diff = False

        msg = 'Comparing body_true_response and body_false_response.'
        self.debug(msg,
                   statement_type=statement_type,
                   mutant=mutant,
                   response_1=true_response,
                   response_2=false_response)

        if self.equal_with_limit(body_true_response,
                                 body_false_response,
                                 compare_diff):
            #
            # They might be equal because of various reasons, in the best
            # case scenario there IS a blind SQL injection but the % of the
            # HTTP response body controlled by it is so small that the equal
            # ratio is not catching it.
            #
            self.debug('Setting compare_diff to True', mutant=mutant)
            compare_diff = True

        mutant.set_token_value(self.SYNTAX_ERROR)
        syntax_error_response, body_syntax_error_response = send_clean(mutant,
                                                                       debugging_id=debugging_id,
                                                                       grep=False)

        msg = 'Comparing body_true_response and body_syntax_error_response.'
        self.debug(msg,
                   statement_type=statement_type,
                   mutant=mutant,
                   response_1=true_response,
                   response_2=syntax_error_response)

        if self.equal_with_limit(body_true_response,
                                 body_syntax_error_response,
                                 compare_diff):
            return None

        # Check if its a search engine before we dig any deeper...
        search_disambiguator = self._remove_all_special_chars(true_statement)
        mutant.set_token_value(search_disambiguator)
        search_response, body_search_response = send_clean(mutant,
                                                           grep=False,
                                                           debugging_id=debugging_id)

        # If they are equal then we have a search engine
        msg = 'Comparing body_true_response and body_search_response.'
        self.debug(msg,
                   statement_type=statement_type,
                   mutant=mutant,
                   response_1=true_response,
                   response_2=search_response)

        if self.equal_with_limit(body_true_response,
                                 body_search_response,
                                 compare_diff):
            return None

        # Now a nice trick from real-life. In some search engines when
        # searching for `46" OR "46"="46" OR "46"="46` we get only a
        # couple of results, which I assume is because the search
        # engine is trying to search for more terms.
        #
        # Removing the special characters will make w3af search for
        # `46  OR  46   46  OR  46   46`, which yields many results in
        # the application's search engine, which I assume is because the
        # search engine just needs to match objects with 46 / OR.
        #
        # So, this means that the responses ARE different, but they came
        # from a search engine. The check above is NOT going to catch that
        # and will yield a false positive.
        #
        # If this is not a search engine, or is a search engine with a blind
        # sql injection, the result with `46" OR "46"="46" OR "46"="46` should
        # be have a larger HTTP response body: "all results" should be there.
        #
        # If it is a search engine, then the result for the search string
        # without special characters will be larger.
        if len(body_search_response) * 0.8 > len(body_true_response):
            msg = 'Search engine detected using response length, stop.'
            self.debug(msg,
                       statement_type=statement_type,
                       mutant=mutant,
                       response_1=true_response,
                       response_2=search_response)
            return None

        # Verify the injection!
        statements = self._get_statements(mutant)
        second_true_stm = statements[statement_type][0]
        second_false_stm = statements[statement_type][1]

        mutant.set_token_value(second_true_stm)
        second_true_response, body_second_true_response = send_clean(mutant,
                                                                     grep=False,
                                                                     debugging_id=debugging_id)

        mutant.set_token_value(second_false_stm)
        second_false_response, body_second_false_response = send_clean(mutant,
                                                                       grep=False,
                                                                       debugging_id=debugging_id)

        msg = 'Comparing body_second_true_response and body_true_response.'
        self.debug(msg,
                   statement_type=statement_type,
                   mutant=mutant,
                   response_1=true_response,
                   response_2=second_true_response)

        if not self.equal_with_limit(body_second_true_response,
                                     body_true_response,
                                     compare_diff):
            return None

        msg = 'Comparing body_second_false_response and body_false_response.'
        self.debug(msg,
                   statement_type=statement_type,
                   mutant=mutant,
                   response_1=false_response,
                   response_2=second_false_response)

        if not self.equal_with_limit(body_second_false_response,
                                     body_false_response,
                                     compare_diff):
            return None
            
        response_ids = [second_false_response.id,
                        second_true_response.id]

        desc = ('Blind SQL injection was found at: "%s", using'
                ' HTTP method %s. The injectable parameter is: "%s"')
        desc %= (smart_str_ignore(mutant.get_url()),
                 smart_str_ignore(mutant.get_method()),
                 smart_str_ignore(mutant.get_token_name()))

        v = Vuln.from_mutant('Blind SQL injection vulnerability', desc,
                             severity.HIGH, response_ids, 'blind_sqli',
                             mutant)

        om.out.debug(v.get_desc())
        self.debug(v.get_desc(),
                   statement_type=statement_type,
                   mutant=mutant,
                   response_1=false_response,
                   response_2=second_false_response)

        v['type'] = statement_type
        v['true_html'] = second_true_response.get_body()
        v['false_html'] = second_false_response.get_body()
        v['error_html'] = syntax_error_response.get_body()
        return v
Example #43
0
    def _analyze_result(self, mutant, response):
        """
        Analyze results of the _send_mutant method.
        Try to find the local file inclusions.
        """
        #
        #   I will only report the vulnerability once.
        #
        if self._has_bug(mutant):
            return

        #
        #   Identify the vulnerability
        #
        for file_pattern_match in self._find_common_file_fragments(response):
            if file_pattern_match not in mutant.get_original_response_body():

                desc = 'Local File Inclusion was found at: %s'
                desc %= mutant.found_at()

                v = Vuln.from_mutant('Local file inclusion vulnerability',
                                     desc, severity.MEDIUM, response.id,
                                     self.get_name(), mutant)

                v['file_pattern'] = file_pattern_match

                v.add_to_highlight(file_pattern_match)
                self.kb_append_uniq(self, 'lfi', v)
                return

        #
        # If the vulnerability could not be identified by matching strings that
        # commonly appear in "/etc/passwd", then I'll check one more thing...
        # (note that this is run if no vulns were identified)
        #
        # http://host.tld/show_user.php?id=show_user.php
        #
        # The calls to smart_str_ignore fix a UnicodeDecoreError which appears when
        # the token value is a binary string which can't be converted to unicode.
        # This happens, for example, when trying to upload JPG files to a multipart form
        #
        # >>> u'' in '\x80'
        # ...
        # UnicodeDecodeError: 'ascii' codec can't decode byte 0x80 in position 0: ordinal not in range(128)
        #
        filename = smart_str_ignore(mutant.get_url().get_file_name())
        token_value = smart_str_ignore(mutant.get_token_value())

        if filename in token_value:
            match, lang = contains_source_code(response)
            if match:
                # We were able to read the source code of the file that is
                # vulnerable to local file read
                desc = ('An arbitrary local file read vulnerability was'
                        ' found at: %s')
                desc %= mutant.found_at()

                v = Vuln.from_mutant('Local file inclusion vulnerability',
                                     desc, severity.MEDIUM, response.id,
                                     self.get_name(), mutant)

                #
                #    Set which part of the source code to match
                #
                match_source_code = match.group(0)
                v['file_pattern'] = match_source_code

                self.kb_append_uniq(self, 'lfi', v)
                return

        #
        #   Check for interesting errors (note that this is run if no vulns were
        #   identified)
        #
        body = response.get_body()
        for _, error_str, _ in self.file_read_error_multi_re.query(body):
            if error_str not in mutant.get_original_response_body():
                desc = 'A file read error was found at: %s'
                desc %= mutant.found_at()

                i = Info.from_mutant('File read error', desc, response.id,
                                     self.get_name(), mutant)
                i.add_to_highlight(error_str)

                self.kb_append_uniq(self, 'error', i)
Example #44
0
 def get_body_hash(self):
     body = smart_str_ignore(self.get_body())
     return self._quick_hash(body)
Example #45
0
    def to_string(self):
        """
        :return: An xml node (as a string) representing the HTTP request / response.

        <http-transaction id="...">
            <http-request>
                <status></status>
                <headers>
                    <header>
                        <field></field>
                        <content></content>
                    </header>
                </headers>
                <body content-encoding="base64"></body>
            </http-request>

            <http-response>
                <status></status>
                <headers>
                    <header>
                        <field></field>
                        <content></content>
                    </header>
                </headers>
                <body content-encoding="base64"></body>
            </http-response>
        </http-transaction>

        One of the differences this class has with the previous implementation is
        that the body is always encoded, no matter the content-type. This helps
        prevent encoding issues.
        """
        # Get the data from the cache
        node = self.get_node_from_cache()
        if node is not None:
            return node

        # HistoryItem to get requests/responses
        req_history = HistoryItem()

        # This might raise a DBException in some cases (which I still
        # need to identify and fix). When an exception is raised here
        # the caller needs to handle it by ignoring this part of the
        # HTTP transaction
        request, response = req_history.load_from_file(self._id)

        data = request.get_data() or ''
        b64_encoded_request_body = base64.encodestring(smart_str_ignore(data))

        body = response.get_body() or ''
        b64_encoded_response_body = base64.encodestring(smart_str_ignore(body))

        context = {
            'id': self._id,
            'request': {
                'status': request.get_request_line().strip(),
                'headers': request.get_headers(),
                'body': b64_encoded_request_body
            },
            'response': {
                'status': response.get_status_line().strip(),
                'headers': response.get_headers(),
                'body': b64_encoded_response_body
            }
        }

        context = dotdict(context)

        template = self.get_template(self.TEMPLATE)
        transaction = template.render(context)
        self.save_node_to_cache(transaction)

        return transaction
Example #46
0
def quick_hash(text):
    text = smart_str_ignore(text)
    return '%s%s' % (hash(text), zlib.adler32(text))
Example #47
0
    def _find_bsql(self, mutant, statement_tuple, statement_type):
        """
        Is the main algorithm for finding blind SQL injections.

        :return: A vulnerability object or None if nothing is found
        """
        # shortcuts
        true_statement = statement_tuple[0]
        false_statement = statement_tuple[1]
        send_clean = self._uri_opener.send_clean
        debugging_id = self.get_debugging_id()

        mutant.set_token_value(true_statement)
        _, body_true_response = send_clean(mutant,
                                           debugging_id=debugging_id,
                                           grep=True)

        mutant.set_token_value(false_statement)
        _, body_false_response = send_clean(mutant,
                                            debugging_id=debugging_id,
                                            grep=False)

        if body_true_response == body_false_response:
            msg = ('There is NO CHANGE between the true and false responses.'
                   ' NO WAY w3af is going to detect a blind SQL injection'
                   ' using response diffs in this case.')
            self.debug(msg, mutant)
            return None

        compare_diff = False

        self.debug(
            '[%s] Comparing body_true_response and'
            ' body_false_response.' % statement_type, mutant)
        if self.equal_with_limit(body_true_response, body_false_response,
                                 compare_diff):
            #
            # They might be equal because of various reasons, in the best
            # case scenario there IS a blind SQL injection but the % of the
            # HTTP response body controlled by it is so small that the equal
            # ratio is not catching it.
            #
            self.debug('Setting compare_diff to True', mutant)
            compare_diff = True

        mutant.set_token_value(self.SYNTAX_ERROR)
        syntax_error_response, body_syntax_error_response = send_clean(
            mutant, debugging_id=debugging_id, grep=False)

        self.debug(
            '[%s] Comparing body_true_response and'
            ' body_syntax_error_response.' % statement_type, mutant)
        if self.equal_with_limit(body_true_response,
                                 body_syntax_error_response, compare_diff):
            return None

        # Check if its a search engine before we dig any deeper...
        search_disambiguator = self._remove_all_special_chars(true_statement)
        mutant.set_token_value(search_disambiguator)
        _, body_search_response = send_clean(mutant,
                                             grep=False,
                                             debugging_id=debugging_id)

        # If they are equal then we have a search engine
        self.debug(
            '[%s] Comparing body_true_response and'
            ' body_search_response.' % statement_type, mutant)
        if self.equal_with_limit(body_true_response, body_search_response,
                                 compare_diff):
            return None

        # Verify the injection!
        statements = self._get_statements(mutant)
        second_true_stm = statements[statement_type][0]
        second_false_stm = statements[statement_type][1]

        mutant.set_token_value(second_true_stm)
        second_true_response, body_second_true_response = send_clean(
            mutant, grep=False, debugging_id=debugging_id)

        mutant.set_token_value(second_false_stm)
        second_false_response, body_second_false_response = send_clean(
            mutant, grep=False, debugging_id=debugging_id)

        self.debug(
            '[%s] Comparing body_second_true_response and'
            ' body_true_response.' % statement_type, mutant)
        if not self.equal_with_limit(body_second_true_response,
                                     body_true_response, compare_diff):
            return None

        self.debug(
            '[%s] Comparing body_second_false_response and'
            ' body_false_response.' % statement_type, mutant)
        if self.equal_with_limit(body_second_false_response,
                                 body_false_response, compare_diff):

            response_ids = [second_false_response.id, second_true_response.id]

            desc = 'Blind SQL injection was found at: "%s", using'\
                   ' HTTP method %s. The injectable parameter is: "%s"'
            desc %= (smart_str_ignore(mutant.get_url()),
                     smart_str_ignore(mutant.get_method()),
                     smart_str_ignore(mutant.get_token_name()))

            v = Vuln.from_mutant('Blind SQL injection vulnerability', desc,
                                 severity.HIGH, response_ids, 'blind_sqli',
                                 mutant)

            om.out.debug(v.get_desc())

            v['type'] = statement_type
            v['true_html'] = second_true_response.get_body()
            v['false_html'] = second_false_response.get_body()
            v['error_html'] = syntax_error_response.get_body()
            return v

        return None