Ejemplo n.º 1
0
    def get_desc(self, with_id=False):
        if self.TEMPLATE is None:
            return self.first_info.get_desc(with_id=with_id)

        # We render the template using the information set data
        context = {'urls': [smart_unicode(u) for u in self.get_urls()],
                   'uris': [smart_unicode(u) for u in self.get_uris()],
                   'severity': self.get_severity(),
                   'name': self.get_name(),
                   'id': self.get_id(),
                   'method': smart_unicode(self.get_method()),
                   'plugin': self.get_plugin_name()}
        context.update(self.first_info.items())

        template_str = textwrap.dedent(self.TEMPLATE)
        template = self.JINJA2_ENV.from_string(template_str)

        try:
            rendered_desc = template.render(context)
        except UnicodeDecodeError:
            context_pp = pprint.pformat(context, indent=4)
            msg = ('UnicodeDecodeError found while rendering:\n\n%s\n\n'
                   'Using the following context:\n\n%r\n\n')
            om.out.debug(msg % (smart_str(template_str),
                                smart_str(context_pp)))
            raise

        return rendered_desc
Ejemplo n.º 2
0
    def get_desc(self, with_id=False):
        if self.TEMPLATE is None:
            return self.first_info.get_desc(with_id=with_id)

        # We render the template using the information set data
        context = {
            'urls': [smart_unicode(u) for u in self.get_urls()],
            'uris': [smart_unicode(u) for u in self.get_uris()],
            'severity': self.get_severity(),
            'name': self.get_name(),
            'id': self.get_id(),
            'method': smart_unicode(self.get_method()),
            'plugin': self.get_plugin_name()
        }
        context.update(self.first_info.items())

        template_str = textwrap.dedent(self.TEMPLATE)
        template = self.JINJA2_ENV.from_string(template_str)

        try:
            rendered_desc = template.render(context)
        except UnicodeDecodeError:
            context_pp = pprint.pformat(context, indent=4)
            msg = ('UnicodeDecodeError found while rendering:\n\n%s\n\n'
                   'Using the following context:\n\n%r\n\n')
            om.out.debug(msg %
                         (smart_str(template_str), smart_str(context_pp)))
            raise

        return rendered_desc
Ejemplo n.º 3
0
    def __setitem__(self, k, v):
        if isinstance(k, basestring):
            k = smart_unicode(k, encoding=self.encoding)
        else:
            raise ValueError('Header name must be a string.')

        if isinstance(v, basestring):
            v = smart_unicode(v, encoding=self.encoding)
        elif isinstance(v, DataToken):
            encoded_str = smart_unicode(v.get_value(), encoding=self.encoding)
            v.set_value(encoded_str)
        else:
            raise ValueError('Header value must be a string.')

        super(Headers, self).__setitem__(k, v)
Ejemplo n.º 4
0
    def __setitem__(self, k, v):
        if isinstance(k, basestring):
            k = smart_unicode(k, encoding=self.encoding)
        else:
            raise ValueError('Header name must be a string.')

        if isinstance(v, basestring):
            v = smart_unicode(v, encoding=self.encoding)
        elif isinstance(v, DataToken):
            encoded_str = smart_unicode(v.get_value(), encoding=self.encoding)
            v.set_value(encoded_str)
        else:
            raise ValueError('Header value must be a string.')

        super(Headers, self).__setitem__(k, v)
Ejemplo n.º 5
0
def headers_url_generator(resp, fuzzable_req):
    """
    Yields tuples containing:
        * Newly found URL
        * The FuzzableRequest instance passed as parameter
        * The HTTPResponse generated by the FuzzableRequest
        * Boolean indicating if we trust this reference or not

    The newly found URLs are extracted from the http response headers such
    as "Location".

    :param resp: HTTP response object
    :param fuzzable_req: The HTTP request that generated the response
    """
    resp_headers = resp.get_headers()

    for parser, header_names in URL_HEADERS.iteritems():
        for header_name in header_names:

            header_value, _ = resp_headers.iget(header_name, None)
            if header_value is not None:

                header_value = smart_unicode(header_value,
                                             encoding=resp.charset)

                for ref in parser(resp, header_name, header_value):
                    yield ref, fuzzable_req, resp, False
Ejemplo n.º 6
0
    def end(self):
        """
        This method is called when the scan has finished, we perform these
        main tasks:
            * Get the target URLs
            * Get the enabled plugins
            * Get the vulnerabilities and infos from the KB
            * Get the debug data
            * Send all the data to jinja2 for rendering the template

        """
        target_urls = [t.url_string for t in cf.cf.get('targets')]
        target_domain = cf.cf.get('target_domains')[0]
        enabled_plugins = self._enabled_plugins
        findings = kb.kb.get_all_findings()
        debug_log = ((t, l, smart_unicode(m))
                     for (t, l, m) in self._additional_info)
        known_urls = kb.kb.get_all_known_urls()

        context = {
            'target_urls': target_urls,
            'target_domain': target_domain,
            'enabled_plugins': enabled_plugins,
            'findings': findings,
            'debug_log': debug_log,
            'known_urls': known_urls
        }

        # The file was verified to exist when setting the plugin configuration
        template_fh = file(os.path.expanduser(self._template), 'r')
        output_fh = file(os.path.expanduser(self._output_file_name), 'w')

        self._render_html_file(template_fh, context, output_fh)
 def test_parse_response_with_no_charset_in_header(self):
     # No charset was specified, use the default as well as the default
     # error handling scheme
     for body, charset in TEST_RESPONSES.values():
         html = body.encode(charset)
         resp = self.create_resp(Headers([("Content-Type", "text/xml")]), html)
         self.assertEquals(smart_unicode(html, DEFAULT_CHARSET, ESCAPED_CHAR, on_error_guess=False), resp.body)
Ejemplo n.º 8
0
def headers_url_generator(resp, fuzzable_req):
    """
    Yields tuples containing:
        * Newly found URL
        * The FuzzableRequest instance passed as parameter
        * The HTTPResponse generated by the FuzzableRequest
        * Boolean indicating if we trust this reference or not

    The newly found URLs are extracted from the http response headers such
    as "Location".

    :param resp: HTTP response object
    :param fuzzable_req: The HTTP request that generated the response
    """
    resp_headers = resp.get_headers()

    for parser, header_names in URL_HEADERS.iteritems():
        for header_name in header_names:

            header_value, _ = resp_headers.iget(header_name, None)
            if header_value is not None:

                header_value = smart_unicode(header_value, encoding=resp.charset)

                for ref in parser(resp, header_name, header_value):
                    yield ref, fuzzable_req, resp, False
Ejemplo n.º 9
0
    def _clean_filenames(self, filenames):
        """
        Filter some characters from filenames.

        :return: A clear list of filenames.
        """
        resources = set()

        for filename in filenames:

            # Sometimes we get random bytes from the .git/index because of
            # git versions we don't fully support, so we ignore any encoding
            # errors
            filename = smart_unicode(filename, errors='ignore')

            if filename.startswith('/'):
                filename = filename[1:]
            if filename.startswith('./'):
                filename = filename[2:]
            if filename.endswith('/'):
                filename = filename[:-1]

            resources.add(filename)

        return resources
Ejemplo n.º 10
0
    def end(self):
        """
        This method is called when the scan has finished, we perform these
        main tasks:
            * Get the target URLs
            * Get the enabled plugins
            * Get the vulnerabilities and infos from the KB
            * Get the debug data
            * Send all the data to jinja2 for rendering the template

        """
        target_urls = [t.url_string for t in cf.cf.get('targets')]
        target_domain = cf.cf.get('target_domains')[0]
        enabled_plugins = self._enabled_plugins
        findings = kb.kb.get_all_findings()
        debug_log = ((t, l, smart_unicode(m)) for (t, l, m) in self._additional_info)
        known_urls = kb.kb.get_all_known_urls()

        context = {'target_urls': target_urls,
                   'target_domain': target_domain,
                   'enabled_plugins': enabled_plugins,
                   'findings': findings,
                   'debug_log': debug_log,
                   'known_urls': known_urls}

        # The file was verified to exist when setting the plugin configuration
        template_fh = file(os.path.expanduser(self._template), 'r')
        output_fh = file(os.path.expanduser(self._output_file_name), 'w')

        self._render_html_file(template_fh, context, output_fh)
Ejemplo n.º 11
0
    def _headers_url_generator(self, resp, fuzzable_req):
        """
        Yields tuples containing:
            * Newly found URL
            * The FuzzableRequest instance passed as parameter
            * The HTTPResponse generated by the FuzzableRequest
            * Boolean indicating if we trust this reference or not

        The newly found URLs are extracted from the http response headers such
        as "Location".

        :param resp: HTTP response object
        :param fuzzable_req: The HTTP request that generated the response
        """
        # If response was a 30X (i.e. a redirect) then include the
        # corresponding fuzzable request.
        resp_headers = resp.get_headers()

        for url_header_name in URL_HEADERS:
            url_header_value, _ = resp_headers.iget(url_header_name, '')
            if url_header_value:
                url = smart_unicode(url_header_value, encoding=resp.charset)
                try:
                    ref = resp.get_url().url_join(url)
                except ValueError:
                    msg = 'The application sent a "%s" redirect that w3af' \
                          ' failed to correctly parse as an URL, the header' \
                          ' value was: "%s"'
                    om.out.debug(msg % (url_header_name, url))
                else:
                    yield ref, fuzzable_req, resp, False
Ejemplo n.º 12
0
    def _headers_url_generator(self, resp, fuzzable_req):
        """
        Yields tuples containing:
            * Newly found URL
            * The FuzzableRequest instance passed as parameter
            * The HTTPResponse generated by the FuzzableRequest
            * Boolean indicating if we trust this reference or not

        The newly found URLs are extracted from the http response headers such
        as "Location".

        :param resp: HTTP response object
        :param fuzzable_req: The HTTP request that generated the response
        """
        # If response was a 30X (i.e. a redirect) then include the
        # corresponding fuzzable request.
        resp_headers = resp.get_headers()

        for url_header_name in URL_HEADERS:
            url_header_value, _ = resp_headers.iget(url_header_name, '')
            if url_header_value:
                url = smart_unicode(url_header_value, encoding=resp.charset)
                try:
                    ref = resp.get_url().url_join(url)
                except ValueError:
                    msg = 'The application sent a "%s" redirect that w3af' \
                          ' failed to correctly parse as an URL, the header' \
                          ' value was: "%s"'
                    om.out.debug(msg % (url_header_name, url))
                else:
                    yield ref, fuzzable_req, resp, False
Ejemplo n.º 13
0
    def clean_values(self, init_val):
        if isinstance(init_val, DataContainer)\
        or isinstance(init_val, dict):
            return init_val

        cleaned_vals = []

        # Cleanup whatever came from the wire into a unicode string
        for key, value in init_val:
            # I can do this key, value thing because the headers do NOT
            # have multiple header values like query strings and post-data
            if isinstance(value, basestring):
                value = smart_unicode(value)
            
            cleaned_vals.append((smart_unicode(key), value))
        
        return cleaned_vals
Ejemplo n.º 14
0
    def comment(self, elem):
        if self._inside_script:
            # This handles the case where we have:
            # <script><!-- code(); --></script>
            return

        if elem.text is not None:
            self._comments_in_doc.append(smart_unicode(elem.text))
Ejemplo n.º 15
0
    def test_invalid_utf8(self):
        invalid_utf8 = '\xf3'
        token = DataToken(self.NAME, invalid_utf8, self.PATH)

        self.assertRaises(UnicodeDecodeError, unicode, token)

        encoded_token = smart_unicode(token)
        self.assertEqual(encoded_token, u'\xf3')
Ejemplo n.º 16
0
    def comment(self, elem):
        if self._inside_script:
            # This handles the case where we have:
            # <script><!-- code(); --></script>
            return

        if elem.text is not None:
            self._comments_in_doc.append(smart_unicode(elem.text))
Ejemplo n.º 17
0
    def clean_values(self, init_val):
        if isinstance(init_val, NonRepeatKeyValueContainer)\
        or isinstance(init_val, dict):
            return init_val

        cleaned_vals = []

        # Cleanup whatever came from the wire into a unicode string
        for key, value in init_val:
            # I can do this (key, value) thing because the headers do NOT
            # have multiple header values like query strings and post-data
            if isinstance(value, basestring):
                value = smart_unicode(value)

            cleaned_vals.append((smart_unicode(key), value))

        return cleaned_vals
Ejemplo n.º 18
0
    def test_invalid_utf8(self):
        invalid_utf8 = "\xf3"
        token = DataToken(self.NAME, invalid_utf8, self.PATH)

        self.assertRaises(UnicodeDecodeError, unicode, token)

        encoded_token = smart_unicode(token)
        self.assertEqual(encoded_token, u"\xf3")
Ejemplo n.º 19
0
def get_clean_body(mutant, response):
    """
    @see: Very similar to fingerprint_404.py get_clean_body() bug not quite
          the same maybe in the future I can merge both?

    Definition of clean in this method:
        - input:
            - response.get_url() == http://host.tld/aaaaaaa/?id=1 OR 23=23
            - response.get_body() == '...<x>1 OR 23=23</x>...'

        - output:
            - self._clean_body(response) == '...<x></x>...'

    All injected values are removed encoded and 'as is'.

    :param mutant: The mutant where I can get the value from.
    :param response: The HTTPResponse object to clean
    :return: A string that represents the 'cleaned' response body.
    """
    if not response.is_text_or_html():
        return response.body

    body = response.body
    mod_value_1 = mutant.get_token_value()

    # Since the body is already in unicode, when we call body.replace() all
    # arguments are converted to unicode by python. If there are special
    # chars in the mod_value then we end up with an UnicodeDecodeError, so
    # I convert it myself with some error handling
    #
    # https://github.com/andresriancho/w3af/issues/8953
    mod_value_1 = smart_unicode(mod_value_1, errors=PERCENT_ENCODE)

    # unquote, just in case the plugin did an extra encoding of some type.
    # what we want to do here is get the original version of the string
    mod_value_2 = urllib.unquote_plus(mod_value_1)

    payloads_to_replace = set()
    payloads_to_replace.add(mod_value_1)
    payloads_to_replace.add(mod_value_2)

    encoded_payloads = set()

    for payload in payloads_to_replace:
        for encoded_payload in apply_multi_escape_table(payload,
                                                        EXTENDED_TABLE):
            encoded_payloads.add(encoded_payload)

    # uniq sorted by longest len
    encoded_payloads = list(encoded_payloads)
    encoded_payloads.sort(lambda x, y: cmp(len(y), len(x)))

    empty = u''
    replace = unicode.replace
    for to_replace in encoded_payloads:
        body = replace(body, to_replace, empty)

    return body
    def test_parse_response_with_wrong_charset(self):
        # A wrong or non-existant charset was set; try to decode the response
        # using the default charset and handling scheme
        from random import choice

        for body, charset in TEST_RESPONSES.values():
            html = body.encode(charset)
            headers = Headers([("Content-Type", "text/xml; charset=%s" % choice(("XXX", "utf-8")))])
            resp = self.create_resp(headers, html)
            self.assertEquals(smart_unicode(html, DEFAULT_CHARSET, ESCAPED_CHAR, on_error_guess=False), resp.body)
Ejemplo n.º 21
0
Archivo: url.py Proyecto: zsdlove/w3af
 def get_path_qs(self):
     """
     :return: Returns the path for the url containing the QS
     """
     res = self.path
     if self.params != u'':
         res += u';' + self.params
     if self.has_query_string():
         res += u'?' + smart_unicode(self.querystring)
     return res
Ejemplo n.º 22
0
 def test_parse_response_with_no_charset_in_header(self):
     # No charset was specified, use the default as well as the default
     # error handling scheme
     for body, charset in TEST_RESPONSES.values():
         html = body.encode(charset)
         resp = self.create_resp(Headers([('Content-Type', 'text/xml')]),
                                 html)
         self.assertEquals(
             smart_unicode(html,
                           DEFAULT_CHARSET,
                           ESCAPED_CHAR,
                           on_error_guess=False), resp.body)
Ejemplo n.º 23
0
    def __setattr__(self, key, value):
        """
        Overriding in order to translate every value to an unicode object

        :param key: The attribute name to set
        :param value: The value (string, unicode or anything else)
        :return: None
        """
        if isinstance(value, basestring):
            value = smart_unicode(value)

        self[key] = value
Ejemplo n.º 24
0
    def _to_str_with_separators(self, key_val_sep, pair_sep):
        """
        :return: Join all the values stored in this data container using the
                 specified separators.
        """
        lst = []

        for k, v in self.items():
            to_app = u"%s%s%s" % (k, key_val_sep, smart_unicode(v, encoding=UTF8))
            lst.append(to_app)

        return pair_sep.join(lst)
Ejemplo n.º 25
0
    def __setattr__(self, key, value):
        """
        Overriding in order to translate every value to an unicode object

        :param key: The attribute name to set
        :param value: The value (string, unicode or anything else)
        :return: None
        """
        if isinstance(value, basestring):
            value = smart_unicode(value)

        self[key] = value
Ejemplo n.º 26
0
    def _to_str_with_separators(self, key_val_sep, pair_sep):
        """
        :return: Join all the values stored in this data container using the
                 specified separators.
        """
        lst = []

        for k, v in self.items():
            to_app = u'%s%s%s' % (k, key_val_sep,
                                  smart_unicode(v, encoding=UTF8))
            lst.append(to_app)

        return pair_sep.join(lst)
Ejemplo n.º 27
0
def response_dump(_id):
    """
    :param _id: The ID to query in the database
    :return: The response as unicode
    """
    _history = HistoryItem()

    try:
        details = _history.read(_id)
    except DBException:
        return None

    return smart_unicode(details.response.dump().strip())
    def test_from_dict_encodings(self):
        for body, charset in TEST_RESPONSES.values():
            html = body.encode(charset)
            resp = self.create_resp(Headers([("Content-Type", "text/xml")]), html)

            msg = msgpack.dumps(resp.to_dict())
            loaded_dict = msgpack.loads(msg)

            loaded_resp = HTTPResponse.from_dict(loaded_dict)

            self.assertEquals(
                smart_unicode(html, DEFAULT_CHARSET, ESCAPED_CHAR, on_error_guess=False), loaded_resp.body
            )
Ejemplo n.º 29
0
 def test_parse_response_with_wrong_charset(self):
     # A wrong or non-existant charset was set; try to decode the response
     # using the default charset and handling scheme
     for body, charset in TEST_RESPONSES.values():
         html = body.encode(charset)
         headers = Headers([('Content-Type', 'text/xml; charset=%s' %
                                             choice(('XXX', 'utf-8')))])
         resp = self.create_resp(headers, html)
         self.assertEquals(
             smart_unicode(html, DEFAULT_CHARSET,
                           ESCAPED_CHAR, on_error_guess=False),
             resp.body
         )
Ejemplo n.º 30
0
def response_dump(_id):
    """
    :param _id: The ID to query in the database
    :return: The response as unicode
    """
    _history = HistoryItem()

    try:
        details = _history.read(_id)
    except DBException:
        return None

    return smart_unicode(details.response.dump().strip())
Ejemplo n.º 31
0
    def _to_str_with_separators(self, key_val_sep, pair_sep, errors='strict'):
        """
        :return: Join all the values stored in this data container using the
                 specified separators.
        """
        lst = []

        for key, value_list in self.items():
            for value in value_list:
                value = smart_unicode(value, encoding=UTF8, errors=errors)
                to_app = u'%s%s%s' % (key, key_val_sep, value)
                lst.append(to_app)

        return pair_sep.join(lst)
Ejemplo n.º 32
0
    def _to_str_with_separators(self, key_val_sep, pair_sep, errors='strict'):
        """
        :return: Join all the values stored in this data container using the
                 specified separators.
        """
        lst = []

        for key, value_list in self.items():
            for value in value_list:
                value = smart_unicode(value, encoding=UTF8, errors=errors)
                to_app = u'%s%s%s' % (key, key_val_sep, value)
                lst.append(to_app)

        return pair_sep.join(lst)
    def test_dump_case03(self):
        header_value = ''.join(chr(i) for i in xrange(256))
        
        expected = u'\r\n'.join([u'GET http://w3af.com/a/b/c.php HTTP/1.1',
                                 u'Hola: %s' % smart_unicode(header_value),
                                 u'',
                                 u'a=b'])

        headers = Headers([(u'Hola', header_value)])
        post_data = KeyValueContainer(init_val=[('a', ['b'])])
        fr = FuzzableRequest(self.url, method='GET', post_data=post_data,
                             headers=headers)

        self.assertEqual(fr.dump(), expected)
Ejemplo n.º 34
0
    def test_dump_case03(self):
        header_value = ''.join(chr(i) for i in xrange(256))
        
        expected = u'\r\n'.join([u'GET http://w3af.com/a/b/c.php HTTP/1.1',
                                 u'Hola: %s' % smart_unicode(header_value),
                                 u'',
                                 u'a=b'])

        headers = Headers([(u'Hola', header_value)])
        post_data = KeyValueContainer(init_val=[('a', ['b'])])
        fr = FuzzableRequest(self.url, method='GET', post_data=post_data,
                             headers=headers)

        self.assertEqual(fr.dump(), expected)
Ejemplo n.º 35
0
    def test_dump_case03(self):
        header_value = ''.join(chr(i) for i in xrange(256))
        
        expected = u'\r\n'.join([u'GET http://w3af.com/a/b/c.php HTTP/1.1',
                                 u'Hola: %s' % smart_unicode(header_value),
                                 u'',
                                 u''])

        headers = Headers([(u'Hola', header_value)])
        
        #TODO: Note that I'm passing a dc to the FuzzableRequest and it's not
        # appearing in the dump. It might be a bug...
        fr = FuzzableRequest(self.url, method='GET', dc={u'a': ['b']},
                             headers=headers)
        self.assertEqual(fr.dump(), expected)
Ejemplo n.º 36
0
    def url_string(self):
        """
        :return: A <unicode> representation of the URL
        """
        data = (self.scheme, self.netloc, self.path, self.params,
                self.querystring, self.fragment)
        data = [smart_unicode(s) for s in data]

        calc = urlparse.urlunparse(data)

        # ensuring this is actually unicode
        if not isinstance(calc, unicode):
            calc = unicode(calc, self.encoding, 'replace')

        return calc
Ejemplo n.º 37
0
    def test_from_dict_encodings(self):
        for body, charset in TEST_RESPONSES.values():
            html = body.encode(charset)
            resp = self.create_resp(Headers([('Content-Type', 'text/xml')]),
                                    html)

            msg = msgpack.dumps(resp.to_dict())
            loaded_dict = msgpack.loads(msg)

            loaded_resp = HTTPResponse.from_dict(loaded_dict)

            self.assertEquals(
                smart_unicode(html,
                              DEFAULT_CHARSET,
                              ESCAPED_CHAR,
                              on_error_guess=False), loaded_resp.body)
Ejemplo n.º 38
0
    def test_dump_case03(self):
        header_value = ''.join(chr(i) for i in xrange(256))

        expected = u'\r\n'.join([
            u'GET http://w3af.com/a/b/c.php HTTP/1.1',
            u'Hola: %s' % smart_unicode(header_value), u'', u''
        ])

        headers = Headers([(u'Hola', header_value)])

        #TODO: Note that I'm passing a dc to the FuzzableRequest and it's not
        # appearing in the dump. It might be a bug...
        fr = FuzzableRequest(self.url,
                             method='GET',
                             dc={u'a': ['b']},
                             headers=headers)
        self.assertEqual(fr.dump(), expected)
Ejemplo n.º 39
0
def jinja2_attr_value_escape_filter(value):
    """
    This method is used to escape attribute values:

        <tag attribute="value">

    The objective is to escape all the special characters which can not be
    printed in that context.

    We also implement something very specific for special characters. We're
    replacing the XML invalid characters with:

        <character code="%04x"/>

    The parser should handle that and replace these tags with the real char
    (if it can be handled by the reader).

    Something to note is that when escaping special characters we print the
    HTML-encoded (< replaced by &lt; and so on) version of the `character`
    tag. We do that because it is invalid to print < inside the attribute
    value.

    :param value: The value to escape
    :return: The escaped string
    """
    if not isinstance(value, basestring):
        return value

    # Fix some encoding errors which are triggered when the value is not an
    # unicode string
    value = smart_unicode(value)
    retval = u''

    for letter in value:
        if letter in ATTR_VALUE_ESCAPES_IGNORE:
            retval += letter
            continue

        escape = ATTR_VALUE_ESCAPES.get(letter, None)
        if escape is not None:
            retval += escape
        else:
            retval += letter

    return jinja2.Markup(retval)
Ejemplo n.º 40
0
    def url_string(self):
        """
        :return: A <unicode> representation of the URL
        """
        data = (self.scheme,
                self.netloc,
                self.path,
                self.params,
                self.querystring,
                self.fragment)
        data = [smart_unicode(s) for s in data]

        calc = urlparse.urlunparse(data)

        # ensuring this is actually unicode
        if not isinstance(calc, unicode):
            calc = unicode(calc, self.encoding, 'replace')

        return calc
Ejemplo n.º 41
0
def get_clean_body(mutant, response):
    """
    @see: Very similar to fingerprint_404.py get_clean_body() bug not quite
          the same maybe in the future I can merge both?

    Definition of clean in this method:
        - input:
            - response.get_url() == http://host.tld/aaaaaaa/?id=1 OR 23=23
            - response.get_body() == '...<x>1 OR 23=23</x>...'

        - output:
            - self._clean_body( response ) == '...<x></x>...'

    All injected values are removed encoded and "as is".

    :param mutant: The mutant where I can get the value from.
    :param response: The HTTPResponse object to clean
    :return: A string that represents the "cleaned" response body.
    """
    body = response.body

    if response.is_text_or_html():
        mod_value = mutant.get_token_value()

        # Since the body is already in unicode, when we call body.replace() all
        # arguments are converted to unicode by python. If there are special
        # chars in the mod_value then we end up with an UnicodeDecodeError, so
        # I convert it myself with some error handling
        #
        # https://github.com/andresriancho/w3af/issues/8953
        mod_value = smart_unicode(mod_value, errors=PERCENT_ENCODE)

        empty = u''
        unquoted = urllib.unquote_plus(mod_value)
        cgi_escape = cgi.escape

        body = body.replace(mod_value, empty)
        body = body.replace(unquoted, empty)
        body = body.replace(cgi_escape(mod_value), empty)
        body = body.replace(cgi_escape(unquoted), empty)

    return body
Ejemplo n.º 42
0
def jinja2_text_value_escape_filter(value):
    """
    This method is used to escape text values:

        <tag>text</tag>

    The objective is to escape all the special characters which can not be
    printed in that context, and the special characters which might be in
    the input and we want to escape to avoid "xml injection".

    We also implement something very specific for special characters. We're
    replacing the XML invalid characters with:

        <character code="%04x"/>

    The parser should handle that and replace these tags with the real char
    (if it can be handled by the reader).

    :param value: The value to escape
    :return: The escaped string
    """
    if not isinstance(value, basestring):
        return value

    # Fix some encoding errors which are triggered when the value is not an
    # unicode string
    value = smart_unicode(value)
    retval = u''

    for letter in value:
        if letter in TEXT_VALUE_ESCAPES_IGNORE:
            retval += letter
            continue

        escape = TEXT_VALUE_ESCAPES.get(letter, None)
        if escape is not None:
            retval += escape
        else:
            retval += letter

    return jinja2.Markup(retval)
Ejemplo n.º 43
0
def jinja2_attr_value_escape_filter(value):
    if not isinstance(value, basestring):
        return value

    # Fix some encoding errors which are triggered when the value is not an
    # unicode string
    value = smart_unicode(value)
    retval = u''

    for letter in value:
        if letter in ATTR_VALUE_ESCAPES_IGNORE:
            retval += letter
            continue

        escape = ATTR_VALUE_ESCAPES.get(letter, None)
        if escape is not None:
            retval += escape
        else:
            retval += letter

    return jinja2.Markup(retval)
Ejemplo n.º 44
0
def jinja2_attr_value_escape_filter(value):
    if not isinstance(value, basestring):
        return value

    # Fix some encoding errors which are triggered when the value is not an
    # unicode string
    value = smart_unicode(value)
    retval = u''

    for letter in value:
        if letter in ATTR_VALUE_ESCAPES_IGNORE:
            retval += letter
            continue

        escape = ATTR_VALUE_ESCAPES.get(letter, None)
        if escape is not None:
            retval += escape
        else:
            retval += letter

    return jinja2.Markup(retval)
Ejemplo n.º 45
0
    def __init__(self, code, read, headers, geturl, original_url,
                 msg='OK', _id=None, time=DEFAULT_WAIT_TIME, alias=None,
                 charset=None):
        """
        :param code: HTTP code
        :param read: HTTP body text; typically a string
        :param headers: HTTP headers, typically a dict or a httplib.HTTPMessage
        :param geturl: URL object instance
        :param original_url: URL object instance
        :param msg: HTTP message
        :param _id: Optional response identifier
        :param time: The time between the request and the response
        :param alias: Alias for the response, this contains a hash that helps
                      the backend sqlite find http_responses faster by indexing
                      by this attr.
        :param charset: Response's encoding; obligatory when `read` is unicode
        """
        if not isinstance(geturl, URL):
            msg = 'Invalid type %s for HTTPResponse ctor param geturl.'
            raise TypeError(msg % type(geturl))

        if not isinstance(original_url, URL):
            msg = 'Invalid type %s for HTTPResponse ctor param original_url.'
            raise TypeError(msg % type(original_url))

        if not isinstance(headers, Headers):
            msg = 'Invalid type %s for HTTPResponse ctor param headers.'
            raise TypeError(msg % type(headers))
        
        if not isinstance(read, basestring):
            raise TypeError('Invalid type %s for HTTPResponse ctor param read.'
                            % type(read))

        self._charset = charset
        self._headers = None
        self._body = None
        self._raw_body = read
        self._content_type = None
        self._dom = None
        # A unique id identifier for the response
        self.id = _id
        # From cache defaults to False
        self._from_cache = False
        # Set the info
        self._info = headers
        # Set code
        self.set_code(code)

        # Set the URL variables
        # The URL that we really GET'ed
        self._realurl = original_url.uri2url()
        self._uri = original_url
        # The URL where we were redirected to (equal to original_url
        # when no redirect)
        self._redirected_url = geturl
        self._redirected_uri = geturl.uri2url()

        # Set the rest
        self._msg = smart_unicode(msg)
        self._time = time
        self._alias = alias
        self._doc_type = None
        
        # Internal lock
        self._body_lock = threading.RLock()
Ejemplo n.º 46
0
 def set_protocol(self, protocol):
     """
     :return: Returns the domain name for the url.
     """
     self._scheme = smart_unicode(protocol)
Ejemplo n.º 47
0
 def set_path(self, path):
     self._path = smart_unicode(path) or u'/'
Ejemplo n.º 48
0
def _get_clean_body_impl(response, strings_to_replace_list, multi_encode=True):
    """
    This is a low level function which allows me to use all the improvements
    I did in the helpers.get_clean_body() in fingerprint_404.get_clean_body().

    Both helpers.get_clean_body() and fingerprint_404.get_clean_body() receive
    different parameters, do some preparation work, and then call this function
    to really do the replacements.

    :param response: HTTP response object
    :param strings_to_replace_list: A list of strings to replace. These can be
                                    byte strings or unicode, we'll handle both
                                    internally.
    :param multi_encode: Apply the multiple encodings before replacing, setting
                         this to True with many strings to replace in the list
                         will consume considerable CPU time.
    :return: The body as a unicode with all strings to replace removed.
    """
    body = response.body
    unicodes_to_replace_set = set()

    for str_to_repl in strings_to_replace_list:

        # Since the body is already in unicode, when we call body.replace() all
        # arguments are converted to unicode by python. If there are special
        # chars in the mod_value then we end up with an UnicodeDecodeError, so
        # I convert it myself with some error handling
        #
        # https://github.com/andresriancho/w3af/issues/8953
        unicode_to_repl = smart_unicode(str_to_repl, errors=PERCENT_ENCODE)

        # unquote, just in case the plugin did an extra encoding of some type.
        # what we want to do here is get the original version of the string
        unicode_to_repl_unquoted = urllib.unquote_plus(unicode_to_repl)

        unicodes_to_replace_set.add(unicode_to_repl)
        unicodes_to_replace_set.add(unicode_to_repl_unquoted)

    # Now we apply multiple encodings to find in different responses
    encoded_payloads = set()

    if multi_encode:
        # Populate the set with multiple versions of the same set
        for unicode_to_repl in unicodes_to_replace_set:
            for encoded_to_repl in apply_multi_escape_table(
                    unicode_to_repl, EXTENDED_TABLE):
                encoded_payloads.add(encoded_to_repl)
    else:
        # Just leave the the two we have
        encoded_payloads = unicodes_to_replace_set

    # uniq sorted by longest len
    encoded_payloads = list(encoded_payloads)
    encoded_payloads.sort(lambda x, y: cmp(len(y), len(x)))

    empty = u''
    replace = unicode.replace

    for to_replace in encoded_payloads:
        body = replace(body, to_replace, empty)

    return body
Ejemplo n.º 49
0
def get_clean_body_impl(response,
                        strings_to_replace_list,
                        multi_encode=True,
                        max_escape_count=None):
    """
    This is a low level function which allows me to use all the improvements
    I did in the helpers.get_clean_body() in fingerprint_404.get_clean_body().

    Both helpers.get_clean_body() and fingerprint_404.get_clean_body() receive
    different parameters, do some preparation work, and then call this function
    to really do the replacements.

    :param response: HTTP response object
    :param strings_to_replace_list: A list of strings to replace. These can be
                                    byte strings or unicode, we'll handle both
                                    internally.
    :param multi_encode: Apply the multiple encodings before replacing, setting
                         this to True with many strings to replace in the list
                         will consume considerable CPU time.
    :param max_escape_count: The max number of escapes to try to replace, note
                             that the default here is 500, which is a little bit
                             more than the max number of escapes generated in the
                             worse case I could imagine at test_apply_multi_escape_table_count
                             which generated ~350.

                             The goal is to make sure that everything is generated
                             but at the same time control any edge cases which I might
                             have missed.
    :return: The body as a unicode with all strings to replace removed.
    """
    body = response.body
    body_lower = body.lower()
    body_len = len(body)
    unicodes_to_replace_set = set()

    for str_to_repl in strings_to_replace_list:

        # Since the body is already in unicode, when we call body.replace() all
        # arguments are converted to unicode by python. If there are special
        # chars in the mod_value then we end up with an UnicodeDecodeError, so
        # I convert it myself with some error handling
        #
        # https://github.com/andresriancho/w3af/issues/8953
        unicode_to_repl = smart_unicode(str_to_repl, errors=PERCENT_ENCODE)

        # unquote, just in case the plugin did an extra encoding of some type.
        # what we want to do here is get the original version of the string
        unicode_to_repl_unquoted = urllib.unquote_plus(unicode_to_repl)

        unicodes_to_replace_set.add(unicode_to_repl)
        unicodes_to_replace_set.add(unicode_to_repl_unquoted)

    # Now we apply multiple encodings to find in different responses
    encoded_payloads = set()

    if multi_encode:
        # Populate the set with multiple versions of the same set
        for unicode_to_repl in unicodes_to_replace_set:

            # If the unicode_to_repl (in its original version, without applying
            # the multi escape table) is larger than the response body; and
            # taking into account that `apply_multi_escape_table` will always
            # return a string which is equal or larger than the original; we
            # reduce the CPU-usage of this function by preventing the generation
            # of strings which will NEVER be replaced in:
            #
            #   body = replace(body, to_replace, empty)
            #
            # Because to_replace will be larger than body: ergo, it will never
            # be there.
            if len(unicode_to_repl) > body_len:
                continue

            # Note that we also do something similar with the max_len=body_len
            # parameter we send to apply_multi_escape_table
            for encoded_to_repl in apply_multi_escape_table(
                    unicode_to_repl, max_len=body_len,
                    max_count=max_escape_count):
                encoded_payloads.add(encoded_to_repl)
    else:
        # Just leave the the two we have
        encoded_payloads = unicodes_to_replace_set

    # uniq sorted by longest len
    encoded_payloads = list(encoded_payloads)
    encoded_payloads.sort(lambda x, y: cmp(len(y), len(x)))

    for to_replace in encoded_payloads:
        body, body_lower = remove_using_lower_case(body, body_lower,
                                                   to_replace)

    return body
Ejemplo n.º 50
0
 def set_net_location(self, netloc):
     self._netloc = smart_unicode(netloc)
Ejemplo n.º 51
0
    def _charset_handling(self):
        """
        Decode the body based on the header (or metadata) encoding.
        The implemented algorithm follows the encoding detection logic
        used by FF:

            1) First try to find a charset using the following search criteria:
                a) Look in the CONTENT_TYPE HTTP header. Example:
                    content-type: text/html; charset=iso-8859-1
                b) Look in the 'meta' HTML header. Example:
                    <meta .* content="text/html; charset=utf-8" />
                c) Determine the charset using the chardet module (TODO)
                d) Use the DEFAULT_CHARSET

            2) Try to decode the body using the found charset. If it fails,
            then force it to use the DEFAULT_CHARSET

        Finally return the unicode (decoded) body and the used charset.

        Note: If the body is already a unicode string return it as it is.
        """
        charset = self._charset
        raw_body = self._raw_body
        headers = self.get_headers()
        content_type, _ = headers.iget(CONTENT_TYPE, None)

        # Only try to decode <str> strings. Skip <unicode> strings
        if type(raw_body) is unicode:
            _body = raw_body
            assert charset is not None, (
                "HTTPResponse objects containing "
                "unicode body must have an associated "
                "charset")
        elif content_type is None:
            _body = raw_body
            charset = DEFAULT_CHARSET

            if _body:
                msg = ('The remote web server failed to send the CONTENT_TYPE'
                       ' header in HTTP response with id %s')
                om.out.debug(msg % self.id)

        elif not self.is_text_or_html():
            # Not text, save as it is.
            _body = raw_body
            charset = charset or DEFAULT_CHARSET
        else:
            # Figure out charset to work with
            if not charset:
                charset = self.guess_charset(raw_body, headers)

            # Now that we have the charset, we use it!
            # The return value of the decode function is a unicode string.
            try:
                _body = smart_unicode(raw_body,
                                      charset,
                                      errors=ESCAPED_CHAR,
                                      on_error_guess=False)
            except LookupError:
                # Warn about a buggy charset
                msg = ('Charset LookupError: unknown charset: %s; '
                       'ignored and set to default: %s' %
                       (charset, DEFAULT_CHARSET))
                om.out.debug(msg)

                # Forcing it to use the default
                charset = DEFAULT_CHARSET
                _body = smart_unicode(raw_body,
                                      charset,
                                      errors=ESCAPED_CHAR,
                                      on_error_guess=False)

        return _body, charset
Ejemplo n.º 52
0
 def set_param(self, param_string):
     """
     :param param_string: The param to set (e.g. "foo=aaa").
     :return: Returns the url containing param.
     """
     self._params = smart_unicode(param_string)
Ejemplo n.º 53
0
    def _charset_handling(self):
        """
        Decode the body based on the header (or metadata) encoding.
        The implemented algorithm follows the encoding detection logic
        used by FF:

            1) First try to find a charset using the following search criteria:
                a) Look in the CONTENT_TYPE HTTP header. Example:
                    content-type: text/html; charset=iso-8859-1
                b) Look in the 'meta' HTML header. Example:
                    <meta .* content="text/html; charset=utf-8" />
                c) Determine the charset using the chardet module (TODO)
                d) Use the DEFAULT_CHARSET

            2) Try to decode the body using the found charset. If it fails,
            then force it to use the DEFAULT_CHARSET

        Finally return the unicode (decoded) body and the used charset.

        Note: If the body is already a unicode string return it as it is.
        """
        headers = self.get_headers()
        content_type, _ = headers.iget(CONTENT_TYPE, None)
        charset = self._charset
        rawbody = self._raw_body

        # Only try to decode <str> strings. Skip <unicode> strings
        if type(rawbody) is unicode:
            _body = rawbody
            assert charset is not None, ("HTTPResponse objects containing "
                                         "unicode body must have an associated "
                                         "charset")
        elif content_type is None:
            _body = rawbody
            charset = DEFAULT_CHARSET

            if len(_body):
                msg = ('The remote web server failed to send the CONTENT_TYPE'
                       ' header in HTTP response with id %s')
                om.out.debug(msg % self.id)

        elif not self.is_text_or_html():
            # Not text, save as it is.
            _body = rawbody
            charset = charset or DEFAULT_CHARSET
        else:
            # Figure out charset to work with
            if not charset:
                charset = self.guess_charset(rawbody, headers)

            # Now that we have the charset, we use it!
            # The return value of the decode function is a unicode string.
            try:
                _body = smart_unicode(rawbody,
                                      charset,
                                      errors=ESCAPED_CHAR,
                                      on_error_guess=False)
            except LookupError:
                # Warn about a buggy charset
                msg = ('Charset LookupError: unknown charset: %s; '
                       'ignored and set to default: %s' %
                       (charset, DEFAULT_CHARSET))
                om.out.debug(msg)

                # Forcing it to use the default
                charset = DEFAULT_CHARSET
                _body = smart_unicode(rawbody,
                                      charset,
                                      errors=ESCAPED_CHAR,
                                      on_error_guess=False)

        return _body, charset
Ejemplo n.º 54
0
Archivo: url.py Proyecto: zsdlove/w3af
 def __contains__(self, s):
     """
     :return: True if "s" in url_string
     """
     s = smart_unicode(s)
     return s in self.url_string
Ejemplo n.º 55
0
    def __init__(self,
                 code,
                 read,
                 headers,
                 geturl,
                 original_url,
                 msg='OK',
                 _id=None,
                 time=DEFAULT_WAIT_TIME,
                 alias=None,
                 charset=None,
                 binary_response=False,
                 set_body=False,
                 debugging_id=None):
        """
        :param code: HTTP code
        :param read: HTTP body text; typically a string
        :param headers: HTTP headers, typically a dict or a httplib.HTTPMessage
        :param geturl: URL object instance
        :param original_url: URL object instance
        :param msg: HTTP message
        :param _id: Optional response identifier
        :param time: The time between the request and the response
        :param alias: Alias for the response, this contains a hash that helps
                      the backend sqlite find http_responses faster by indexing
                      by this attr.
        :param charset: Response's encoding; obligatory when `read` is unicode
        """
        if not isinstance(geturl, URL):
            msg = 'Invalid type %s for HTTPResponse ctor param geturl.'
            raise TypeError(msg % type(geturl))

        if not isinstance(original_url, URL):
            msg = 'Invalid type %s for HTTPResponse ctor param original_url.'
            raise TypeError(msg % type(original_url))

        if not isinstance(headers, Headers):
            msg = 'Invalid type %s for HTTPResponse ctor param headers.'
            raise TypeError(msg % type(headers))

        if not isinstance(read, basestring):
            raise TypeError(
                'Invalid type %s for HTTPResponse ctor param read.' %
                type(read))

        self._charset = charset
        self._headers = None

        if set_body and isinstance(read, unicode):
            # We use this case for deserialization via from_dict()
            #
            # The goal is to prevent the body to be analyzed for charset data
            # once again, since it was already done during to_dict() in the
            # get_body() call.
            self._body = self._raw_body = read
        else:
            self._body = None
            self._raw_body = read

        self._binary_response = binary_response
        self._content_type = None
        self._dom = None
        # A unique id identifier for the response
        self.id = _id
        # From cache defaults to False
        self._from_cache = False
        # Set the info
        self._info = headers
        # Set code
        self._code = None
        self.set_code(code)

        # Set the URL variables
        # The URL that we really GET'ed
        self._realurl = original_url.uri2url()
        self._uri = original_url
        # The URL where we were redirected to (equal to original_url
        # when no redirect)
        self._redirected_url = geturl.uri2url()
        self._redirected_uri = geturl

        # Set the rest
        self._msg = smart_unicode(msg)
        self._time = time
        self._alias = alias
        self._doc_type = None
        self._debugging_id = debugging_id

        # Internal lock
        self._body_lock = threading.RLock()
Ejemplo n.º 56
0
 def set_fragment(self, fragment):
     self._fragment = smart_unicode(fragment)