Python smart_unicode Examples, core.data.misc.encoding.smart_unicode Python Examples

Example #1

0

Show file

File: headers.py Project: weisst/w3af

    def clean_values(self, init_val):
        if isinstance(init_val, DataContainer)\
        or isinstance(init_val, dict):
            return init_val

        cleaned_vals = []

        # Cleanup whatever came from the wire into a unicode string
        for key, value in init_val:
            # I can do this key, value thing because the headers do NOT
            # have multiple header values like query strings and post-data
            if isinstance(value, basestring):
                value = smart_unicode(value)

            cleaned_vals.append((smart_unicode(key), value))

        return cleaned_vals

Example #2

0

Show file

File: headers.py Project: Adastra-thw/w3af

    def clean_values(self, init_val):        
        if isinstance(init_val, DataContainer)\
        or isinstance(init_val, dict):
            return init_val

        cleaned_vals = []

        # Cleanup whatever came from the wire into a unicode string
        for key, value in init_val:
            # I can do this key, value thing because the headers do NOT
            # have multiple header values like query strings and post-data
            if isinstance(value, basestring):
                value = smart_unicode(value)
            
            cleaned_vals.append( (smart_unicode(key), value) )
        
        return cleaned_vals

Example #3

0

Show file

 def test_parse_response_with_no_charset_in_header(self):
     # No charset was specified, use the default as well as the default
     # error handling scheme
     for body, charset in TEST_RESPONSES.values():
         html = body.encode(charset)
         resp = self.create_resp(Headers([('Content-Type', 'text/xml')]),
                                 html)
         self.assertEquals(
             smart_unicode(html,
                           DEFAULT_CHARSET,
                           ESCAPED_CHAR,
                           on_error_guess=False), resp.body)

Example #4

0

Show file

File: test_HTTPResponse.py Project: Adastra-thw/w3af

 def test_parse_response_with_no_charset_in_header(self):
     # No charset was specified, use the default as well as the default
     # error handling scheme
     for body, charset in TEST_RESPONSES.values():
         html = body.encode(charset)
         resp = self.create_resp(
             Headers([('Content-Type', 'text/xml')]), html)
         self.assertEquals(
             smart_unicode(html, DEFAULT_CHARSET,
                           ESCAPED_CHAR, on_error_guess=False),
             resp.body
         )

Example #5

0

Show file

File: test_HTTPResponse.py Project: Adastra-thw/w3af

 def test_parse_response_with_wrong_charset(self):
     # A wrong or non-existant charset was set; try to decode the response
     # using the default charset and handling scheme
     from random import choice
     for body, charset in TEST_RESPONSES.values():
         html = body.encode(charset)
         headers = Headers([('Content-Type', 'text/xml; charset=%s' %
                                             choice(('XXX', 'utf-8')))])
         resp = self.create_resp(headers, html)
         self.assertEquals(
             smart_unicode(html, DEFAULT_CHARSET,
                           ESCAPED_CHAR, on_error_guess=False),
             resp.body
         )

Example #6

0

Show file

 def test_parse_response_with_wrong_charset(self):
     # A wrong or non-existant charset was set; try to decode the response
     # using the default charset and handling scheme
     from random import choice
     for body, charset in TEST_RESPONSES.values():
         html = body.encode(charset)
         headers = Headers([('Content-Type',
                             'text/xml; charset=%s' % choice(
                                 ('XXX', 'utf-8')))])
         resp = self.create_resp(headers, html)
         self.assertEquals(
             smart_unicode(html,
                           DEFAULT_CHARSET,
                           ESCAPED_CHAR,
                           on_error_guess=False), resp.body)

Example #7

0

Show file

File: test_fuzzable_request.py Project: Adastra-thw/w3af

    def test_dump_case03(self):
        header_value = ''.join(chr(i) for i in xrange(256))
        
        expected = u'\r\n'.join([u'GET http://w3af.com/a/b/c.php HTTP/1.1',
                                 u'Hola: %s' % smart_unicode(header_value),
                                 u'',
                                 u''])

        headers = Headers([(u'Hola', header_value)])
        
        #TODO: Note that I'm passing a dc to the FuzzableRequest and it's not
        # appearing in the dump. It might be a bug...
        fr = FuzzableRequest(self.url, method='GET', dc={u'a': ['b']},
                             headers=headers)
        self.assertEqual(fr.dump(), expected)

Example #8

0

Show file

    def test_from_dict_encodings(self):
        for body, charset in TEST_RESPONSES.values():
            html = body.encode(charset)
            resp = self.create_resp(Headers([('Content-Type', 'text/xml')]),
                                    html)

            msg = msgpack.dumps(resp.to_dict())
            loaded_dict = msgpack.loads(msg)

            loaded_resp = HTTPResponse.from_dict(loaded_dict)

            self.assertEquals(
                smart_unicode(html,
                              DEFAULT_CHARSET,
                              ESCAPED_CHAR,
                              on_error_guess=False), loaded_resp.body)

Example #9

0

Show file

File: test_HTTPResponse.py Project: Adastra-thw/w3af

    def test_from_dict_encodings(self):
        for body, charset in TEST_RESPONSES.values():
            html = body.encode(charset)
            resp = self.create_resp(Headers([('Content-Type', 'text/xml')]),
                                    html)
            
            msg = msgpack.dumps(resp.to_dict())
            loaded_dict = msgpack.loads(msg)
            
            loaded_resp = HTTPResponse.from_dict(loaded_dict)

            self.assertEquals(
                smart_unicode(html, DEFAULT_CHARSET,
                              ESCAPED_CHAR, on_error_guess=False),
                loaded_resp.body
            )

Example #10

0

Show file

    def test_dump_case03(self):
        header_value = ''.join(chr(i) for i in xrange(256))

        expected = u'\r\n'.join([
            u'GET http://w3af.com/a/b/c.php HTTP/1.1',
            u'Hola: %s' % smart_unicode(header_value), u'', u''
        ])

        headers = Headers([(u'Hola', header_value)])

        #TODO: Note that I'm passing a dc to the FuzzableRequest and it's not
        # appearing in the dump. It might be a bug...
        fr = FuzzableRequest(self.url,
                             method='GET',
                             dc={u'a': ['b']},
                             headers=headers)
        self.assertEqual(fr.dump(), expected)

Example #11

0

Show file

def create_fuzzable_requests(resp, request=None, add_self=True):
    '''
    Generates the fuzzable requests based on an HTTP response instance.

    :param resp: An HTTPResponse instance.
    :param request: The HTTP request that generated the resp
    :param add_self: If I should add the current HTTP request
                         (:param request) to the result on not.

    :return: A list of fuzzable requests.
    '''
    res = []

    # Headers for all fuzzable requests created here:
    # And add the fuzzable headers to the dict
    req_headers = dict((h, '') for h in cf.cf.get('fuzzable_headers'))
    req_headers.update(request and request.get_headers() or {})
    req_headers = Headers(req_headers.items())

    # Get the cookie!
    cookieObj = _create_cookie(resp)

    # Create the fuzzable request that represents the request object
    # passed as parameter
    if add_self:
        qsr = HTTPQSRequest(
            resp.get_uri(),
            headers=req_headers,
            cookie=cookieObj
        )
        res.append(qsr)

    # If response was a 30X (i.e. a redirect) then include the
    # corresponding fuzzable request.
    resp_headers = resp.get_headers()

    for url_header_name in URL_HEADERS:
        url_header_value, _ = resp_headers.iget(url_header_name, '')
        if url_header_value:
            url = smart_unicode(url_header_value, encoding=resp.charset)
            try:
                absolute_location = resp.get_url().url_join(url)
            except ValueError:
                msg = 'The application sent a "%s" redirect that w3af' \
                      ' failed to correctly parse as an URL, the header' \
                      ' value was: "%s"'
                om.out.debug(msg % (url_header_name, url))
            else:
                qsr = HTTPQSRequest(
                    absolute_location,
                    headers=req_headers,
                    cookie=cookieObj
                )
                res.append(qsr)

    # Try to find forms in the document
    try:
        dp = parser_cache.dpc.get_document_parser_for(resp)
    except w3afException:
        # Failed to find a suitable parser for the document
        form_list = []
    else:
        form_list = dp.get_forms()
        same_domain = lambda f: f.get_action(
        ).get_domain() == resp.get_url().get_domain()
        form_list = [f for f in form_list if same_domain(f)]

    if not form_list:
        # Check if its a wsdl file
        #TODO: Rewrite web service support
        '''
        wsdlp = WSDLParser()
        try:
            wsdlp.set_wsdl(resp.get_body())
        except w3afException:
            pass
        else:
            for rem_meth in wsdlp.get_methods():
                wspdr = WebServiceRequest(
                    rem_meth.get_location(),
                    rem_meth.get_action(),
                    rem_meth.get_parameters(),
                    rem_meth.get_namespace(),
                    rem_meth.get_methodName(),
                    req_headers
                )
                res.append(wspdr)
        '''
    else:
        # Create one HTTPPostDataRequest for each form variant
        mode = cf.cf.get('form_fuzzing_mode')
        for form in form_list:
            for variant in form.get_variants(mode):
                if form.get_method().upper() == 'POST':
                    r = HTTPPostDataRequest(
                        variant.get_action(),
                        variant.get_method(),
                        req_headers,
                        cookieObj,
                        variant)
                else:
                    # The default is a GET request
                    r = HTTPQSRequest(
                        variant.get_action(),
                        headers=req_headers,
                        cookie=cookieObj
                    )
                    r.set_dc(variant)

                res.append(r)
    return res

Example #12

0

Show file

File: HTTPResponse.py Project: daemon13/w3af

    def _charset_handling(self):
        '''
        Decode the body based on the header (or metadata) encoding.
        The implemented algorithm follows the encoding detection logic
        used by FF:

            1) First try to find a charset using the following search criteria:
                a) Look in the 'content-type' HTTP header. Example:
                    content-type: text/html; charset=iso-8859-1
                b) Look in the 'meta' HTML header. Example:
                    <meta .* content="text/html; charset=utf-8" />
                c) Determine the charset using the chardet module (TODO)
                d) Use the DEFAULT_CHARSET

            2) Try to decode the body using the found charset. If it fails,
            then force it to use the DEFAULT_CHARSET

        Finally return the unicode (decoded) body and the used charset.

        Note: If the body is already a unicode string return it as it is.
        '''
        lcase_headers = self.get_lower_case_headers()
        charset = self._charset
        rawbody = self._raw_body

        # Only try to decode <str> strings. Skip <unicode> strings
        if type(rawbody) is unicode:
            _body = rawbody
            assert charset is not None, (
                "HTTPResponse objects containing "
                "unicode body must have an associated charset")
        elif 'content-type' not in lcase_headers:
            _body = rawbody
            charset = DEFAULT_CHARSET

            if len(_body):
                msg = "The remote web server failed to send the 'content-type'"\
                      " header in HTTP response with id %s" % self.id
                om.out.debug(msg)

        elif not self.is_text_or_html():
            # Not text, save as it is.
            _body = rawbody
            charset = charset or DEFAULT_CHARSET
        else:
            # Figure out charset to work with
            if not charset:
                # Start with the headers
                charset_mo = CHARSET_EXTRACT_RE.search(
                    lcase_headers['content-type'], re.I)
                if charset_mo:
                    # Seems like the response's headers contain a charset
                    charset = charset_mo.groups()[0].lower().strip()
                else:
                    # Continue with the body's meta tag
                    charset_mo = CHARSET_META_RE.search(rawbody, re.IGNORECASE)
                    if charset_mo:
                        charset = charset_mo.groups()[0].lower().strip()
                    else:
                        charset = DEFAULT_CHARSET

            # Now that we have the charset, we use it!
            # The return value of the decode function is a unicode string.
            try:
                _body = smart_unicode(rawbody,
                                      charset,
                                      errors=ESCAPED_CHAR,
                                      on_error_guess=False)
            except LookupError:
                # Warn about a buggy charset
                msg = ('Charset LookupError: unknown charset: %s; '
                       'ignored and set to default: %s' %
                       (charset, self._charset))
                om.out.debug(msg)
                # Forcing it to use the default
                charset = DEFAULT_CHARSET
                _body = smart_unicode(rawbody,
                                      charset,
                                      errors=ESCAPED_CHAR,
                                      on_error_guess=False)

        return _body, charset

Example #13

0

Show file

File: HTTPResponse.py Project: anemic/w3af

    def _charset_handling(self):
        '''
        Decode the body based on the header (or metadata) encoding.
        The implemented algorithm follows the encoding detection logic
        used by FF:

            1) First try to find a charset using the following search criteria:
                a) Look in the 'content-type' HTTP header. Example:
                    content-type: text/html; charset=iso-8859-1
                b) Look in the 'meta' HTML header. Example:
                    <meta .* content="text/html; charset=utf-8" />
                c) Determine the charset using the chardet module (TODO)
                d) Use the DEFAULT_CHARSET

            2) Try to decode the body using the found charset. If it fails,
            then force it to use the DEFAULT_CHARSET

        Finally return the unicode (decoded) body and the used charset.

        Note: If the body is already a unicode string return it as it is.
        '''
        lcase_headers = self.get_lower_case_headers()
        charset = self._charset
        rawbody = self._raw_body

        # Only try to decode <str> strings. Skip <unicode> strings
        if type(rawbody) is unicode:
            _body = rawbody
            assert charset is not None, ("HTTPResponse objects containing "
                                         "unicode body must have an associated "
                                         "charset")
        elif 'content-type' not in lcase_headers:
            _body = rawbody
            charset = DEFAULT_CHARSET

            if len(_body):
                msg = "The remote web server failed to send the 'content-type'"\
                      " header in HTTP response with id %s" % self.id
                om.out.debug(msg)

        elif not self.is_text_or_html():
            # Not text, save as it is.
            _body = rawbody
            charset = charset or DEFAULT_CHARSET
        else:
            # Figure out charset to work with
            if not charset:
                charset = self.guess_charset(rawbody, lcase_headers)

            # Now that we have the charset, we use it!
            # The return value of the decode function is a unicode string.
            try:
                _body = smart_unicode(
                    rawbody,
                    charset,
                    errors=ESCAPED_CHAR,
                    on_error_guess=False
                )
            except LookupError:
                # Warn about a buggy charset
                msg = ('Charset LookupError: unknown charset: %s; '
                       'ignored and set to default: %s' %
                      (charset, self._charset))
                om.out.debug(msg)
                # Forcing it to use the default
                charset = DEFAULT_CHARSET
                _body = smart_unicode(
                    rawbody,
                    charset,
                    errors=ESCAPED_CHAR,
                    on_error_guess=False
                )

        return _body, charset