def test_parse_response_with_no_charset_in_header(self):
     # No charset was specified, use the default as well as the default
     # error handling scheme
     for body, charset in TEST_RESPONSES.values():
         html = body.encode(charset)
         resp = self.create_resp({'Content-Type':'text/xml'}, html)
         self.assertEquals(
             smart_unicode(html, DEFAULT_CHARSET,
                           ESCAPED_CHAR, on_error_guess=False),
             resp.body
         )
 def test_parse_response_with_wrong_charset(self):
     # A wrong or non-existant charset was set; try to decode the response
     # using the default charset and handling scheme
     from random import choice
     for body, charset in TEST_RESPONSES.values():
         html = body.encode(charset)
         headers = {'Content-Type': 'text/xml; charset=%s' % 
                                         choice(('XXX', 'utf-8'))}
         resp = self.create_resp(headers, html)
         self.assertEquals(
             smart_unicode(html, DEFAULT_CHARSET,
                           ESCAPED_CHAR, on_error_guess=False),
             resp.body
         )
Exemple #3
0
def createFuzzableRequests(resp, request=None, add_self=True):
    '''
    Generates the fuzzable requests based on an http response instance.
    
    @parameter resp: An HTTPResponse instance.
    @parameter request: The HTTP request that generated the resp
    @parameter add_self: If I should add the current HTTP request
        (@parameter request) to the result on not.
    
    @return: A list of fuzzable requests.
    '''
    is_redirect = lambda resp: 300 <= resp.getCode() < 400
    res = []
    
    # Headers for all fuzzable requests created here:
    # And add the fuzzable headers to the dict
    headers = dict((h, '') for h in cf.cf.getData('fuzzableHeaders'))
    req_headers = dict(headers)
    req_headers.update(request and request.getHeaders() or {})
    
    # Get the cookie!
    cookieObj = _create_cookie(resp)
    
    # Create the fuzzable request that represents the request object
    # passed as parameter
    if add_self:
        qsr = HTTPQSRequest(
                    resp.getURI(),
                    headers=req_headers,
                    cookie=cookieObj
                    )
        res.append(qsr)
    
    # If response was a 30X (i.e. a redirect) then include the
    # corresponding fuzzable request. 
    if is_redirect(resp):
        redir_headers = resp.getLowerCaseHeaders()
        location = redir_headers.get('location') or \
                        redir_headers.get('uri', '')
        if location:
            location = smart_unicode(location, encoding=resp.charset)
            try:
                absolute_location = resp.getURL().urlJoin(location)
            except ValueError:
                msg = 'The application sent a 30x redirect "Location:" that'
                msg += ' w3af failed to correctly parse as an URL, the header'
                msg += ' value was: "%s"'
                om.out.debug( msg % location )
            else:
                qsr = HTTPQSRequest(
                    absolute_location,
                    headers=req_headers,
                    cookie=cookieObj
                    )
                res.append(qsr)
    
    # Try to find forms in the document
    try:
        dp = dpCache.dpc.getDocumentParserFor(resp)
    except w3afException:
        # Failed to find a suitable parser for the document
        form_list = []
    else:
        form_list = dp.getForms()
    
    if not form_list:
        # Check if its a wsdl file
        wsdlp = wsdlParser.wsdlParser()
        try:
            wsdlp.setWsdl(resp.getBody())
        except w3afException:
            pass
        else:
            for rem_meth in wsdlp.getMethods():
                wspdr = wsPostDataRequest(
                                  rem_meth.getLocation(),
                                  rem_meth.getAction(),
                                  rem_meth.getParameters(),
                                  rem_meth.getNamespace(),
                                  rem_meth.getMethodName(),
                                  headers
                                  )
                res.append(wspdr)
    else:
        # Create one httpPostDataRequest for each form variant
        mode = cf.cf.getData('fuzzFormComboValues')
        for form in form_list:
            for variant in form.getVariants(mode):
                if form.getMethod().upper() == 'POST':
                    r = httpPostDataRequest(
                                        variant.getAction(),
                                        variant.getMethod(),
                                        headers,
                                        cookieObj,
                                        variant,
                                        form.getFileVariables()
                                        )
                else:
                    # The default is a GET request
                    r = HTTPQSRequest(
                                  variant.getAction(),
                                  headers=headers,
                                  cookie=cookieObj
                                  )
                    r.setDc(variant)
                
                res.append(r)
    return res
Exemple #4
0
    def _charset_handling(self):
        '''
        Decode the body based on the header (or metadata) encoding.
        The implemented algorithm follows the encoding detection logic
        used by FF:

            1) First try to find a charset using the following search criteria:
                a) Look in the 'content-type' HTTP header. Example:
                    content-type: text/html; charset=iso-8859-1
                b) Look in the 'meta' HTML header. Example:
                    <meta .* content="text/html; charset=utf-8" />
                c) Determine the charset using the chardet module (TODO)
                d) Use the DEFAULT_CHARSET
            
            2) Try to decode the body using the found charset. If it fails,
            then force it to use the DEFAULT_CHARSET
        
        Finally return the unicode (decoded) body and the used charset.  
        
        Note: If the body is already a unicode string return it as it is.
        '''
        lowerCaseHeaders = self.getLowerCaseHeaders()
        charset = self._charset
        rawbody = self._raw_body
        
        # Only try to decode <str> strings. Skip <unicode> strings
        if type(rawbody) is unicode:
            _body = rawbody
            assert charset is not None, ("httpResponse objects containing "
                             "unicode body must have an associated charset")
        elif not 'content-type' in lowerCaseHeaders:
            om.out.debug("hmmm... wtf?! The remote web server failed to "
                         "send the 'content-type' header.")
            _body = rawbody
            charset = DEFAULT_CHARSET
        elif not self.is_text_or_html():
            # Not text, save as it is.
            _body = rawbody
            charset = charset or DEFAULT_CHARSET
        else:
            # Figure out charset to work with
            if not charset:
                # Start with the headers
                charset_mo = re.search('charset=\s*?([\w-]+)',
                                        lowerCaseHeaders['content-type'],
                                        re.I)
                if charset_mo:
                    # Seems like the response's headers contain a charset
                    charset = charset_mo.groups()[0].lower().strip()
                else:
                    # Continue with the body's meta tag
                    charset_mo = re.search(
                            '<meta.*?content=".*?charset=\s*?([\w-]+)".*?>',
                            rawbody, re.IGNORECASE)
                    if charset_mo:
                        charset = charset_mo.groups()[0].lower().strip()
                    else:
                        charset = DEFAULT_CHARSET

            # Now that we have the charset, we use it!
            # The return value of the decode function is a unicode string.
            try:
                _body = smart_unicode(
                                rawbody,
                                charset,
                                errors=ESCAPED_CHAR,
                                on_error_guess=False
                            )
            except LookupError:
                # Warn about a buggy charset
                msg = ('Charset LookupError: unknown charset: %s; '
                    'ignored and set to default: %s' % 
                    (charset, self._charset))
                om.out.debug(msg)
                # Forcing it to use the default
                charset = DEFAULT_CHARSET
                _body = smart_unicode(
                                rawbody,
                                charset,
                                errors=ESCAPED_CHAR,
                                on_error_guess=False
                            )
            
        return _body, charset