Beispiel #1
0
    def test_analyze_cookies_https_value_over_http(self):
        body = ''
        url = URL('https://www.w3af.com/')
        headers = Headers({
            'content-type': 'text/html',
            'Set-Cookie': 'abc=defjkluio; secure; httponly;'
        }.items())
        response = HTTPResponse(200, body, headers, url, url, _id=1)
        request = FuzzableRequest(url, method='GET')

        # Receive the cookie over HTTPS
        self.plugin.grep(request, response)

        url = URL('http://www.w3af.com/?id=defjkluio')
        headers = Headers({'content-type': 'text/html'}.items())
        response = HTTPResponse(200, body, headers, url, url, _id=1)
        request = FuzzableRequest(url, method='GET')

        # Send the cookie over HTTP as a parameter value
        self.plugin.grep(request, response)

        security = kb.kb.get('analyze_cookies', 'security')

        self.assertEqual(len(kb.kb.get('analyze_cookies', 'cookies')), 1)
        self.assertEqual(len(security), 1)
        self.assertEqual(len(kb.kb.get('analyze_cookies', 'invalid-cookies')),
                         0)

        names = [i.get_name() for i in security]
        self.assertIn('Secure cookies over insecure channel', names)
Beispiel #2
0
    def test_fuzzable_request(self):
        dl = DiskList()

        uri = URL('http://w3af.org/?id=2')
        qsr1 = HTTPQSRequest(uri,
                             method='GET',
                             headers=Headers([('Referer', 'http://w3af.org/')
                                              ]))

        uri = URL('http://w3af.org/?id=3')
        qsr2 = HTTPQSRequest(uri,
                             method='OPTIONS',
                             headers=Headers([('Referer', 'http://w3af.org/')
                                              ]))

        uri = URL('http://w3af.org/?id=7')
        qsr3 = HTTPQSRequest(uri,
                             method='FOO',
                             headers=Headers([('Referer', 'http://w3af.org/')
                                              ]))

        dl.append(qsr1)
        dl.append(qsr2)

        self.assertEqual(dl[0], qsr1)
        self.assertEqual(dl[1], qsr2)
        self.assertFalse(qsr3 in dl)
        self.assertTrue(qsr2 in dl)
Beispiel #3
0
    def from_httplib_resp(cls, httplibresp, original_url=None):
        '''
        Factory function. Build a HTTPResponse object from a httplib.HTTPResponse
        instance
    
        :param httplibresp: httplib.HTTPResponse instance
        :param original_url: Optional 'url_object' instance.
    
        :return: A HTTPResponse instance
        '''
        resp = httplibresp
        code, msg, hdrs, body = (resp.code, resp.msg, resp.info(), resp.read())
        hdrs = Headers(hdrs.items())

        if original_url:
            url_inst = URL(resp.geturl(), original_url.encoding)
            url_inst = url_inst.url_decode()
        else:
            url_inst = original_url = URL(resp.geturl())

        charset = getattr(resp, 'encoding', None)
        return cls(code,
                   body,
                   hdrs,
                   url_inst,
                   original_url,
                   msg,
                   charset=charset)
Beispiel #4
0
    def test_doc_type(self):

        # Text or HTML
        text_or_html_mime_types = ('application/javascript', 'text/html',
                                   'text/xml', 'text/cmd', 'text/css',
                                   'text/csv', 'text/javascript', 'text/plain')
        for mimetype in text_or_html_mime_types:
            resp = self.create_resp(Headers([('Content-Type', mimetype)]))
            self.assertEquals(
                True, resp.is_text_or_html(),
                "MIME type '%s' wasn't recognized as a valid '%s' type" %
                (mimetype, HTTPResponse.DOC_TYPE_TEXT_OR_HTML))

        # PDF
        resp = self.create_resp(Headers([('Content-Type', 'application/pdf')]))
        self.assertEquals(True, resp.is_pdf())

        # SWF
        resp = self.create_resp(
            Headers([('Content-Type', 'application/x-shockwave-flash')]))
        self.assertEquals(True, resp.is_swf())

        # Image
        image_mime_types = ('image/gif', 'image/jpeg', 'image/pjpeg',
                            'image/png', 'image/tiff', 'image/svg+xml',
                            'image/vnd.microsoft.icon')
        for mimetype in image_mime_types:
            resp = self.create_resp(Headers([('Content-Type', mimetype)]))
            self.assertEquals(
                True, resp.is_image(),
                "MIME type '%s' wasn't recognized as a valid '%s' type" %
                (mimetype, HTTPResponse.DOC_TYPE_IMAGE))
Beispiel #5
0
    def test_analyze_cookies_collect_uniq(self):
        body = ''
        url = URL('http://www.w3af.com/')
        headers = Headers({
            'content-type': 'text/html',
            'Set-Cookie': 'abc=def'
        }.items())
        response = HTTPResponse(200, body, headers, url, url, _id=1)
        request = FuzzableRequest(url, method='GET')
        self.plugin.grep(request, response)

        headers = Headers({
            'content-type': 'text/html',
            'Set-Cookie': '123=456'
        }.items())
        response = HTTPResponse(200, body, headers, url, url, _id=1)
        request = FuzzableRequest(url, method='GET')
        self.plugin.grep(request, response)

        headers = Headers({
            'content-type': 'text/html',
            'Set-Cookie': 'abc=456'
        }.items())
        response = HTTPResponse(200, body, headers, url, url, _id=1)
        request = FuzzableRequest(url, method='GET')
        self.plugin.grep(request, response)

        self.assertEqual(len(kb.kb.get('analyze_cookies', 'cookies')), 2)
        self.assertEqual(len(kb.kb.get('analyze_cookies', 'invalid-cookies')),
                         0)
Beispiel #6
0
    def test_mutant_creation(self):
        url = URL('http://moth/?a=1&b=2')
        headers = Headers([('Referer', 'http://moth/')])
        freq = HTTPQSRequest(url, headers=headers)

        created_mutants = HeadersMutant.create_mutants(freq, self.payloads, [],
                                                       False,
                                                       self.fuzzer_config)

        expected_dc_lst = [
            Headers([('Referer', 'abc')]),
            Headers([('Referer', 'def')])
        ]

        created_dc_lst = [i.get_dc() for i in created_mutants]

        self.assertEqual(created_dc_lst, expected_dc_lst)

        self.assertEqual(created_mutants[0].get_var(), 'Referer')
        self.assertEqual(created_mutants[0].get_var_index(), 0)
        self.assertEqual(created_mutants[0].get_original_value(), '')
        self.assertEqual(created_mutants[1].get_var(), 'Referer')
        self.assertEqual(created_mutants[1].get_var_index(), 0)
        self.assertEqual(created_mutants[1].get_original_value(), '')

        self.assertTrue(
            all(isinstance(m, HeadersMutant) for m in created_mutants))
Beispiel #7
0
 def from_httplib_resp(cls, httplibresp, original_url=None):
     '''
     Factory function. Build a HTTPResponse object from a httplib.HTTPResponse
     instance
 
     :param httplibresp: httplib.HTTPResponse instance
     :param original_url: Optional 'url_object' instance.
 
     :return: A HTTPResponse instance
     '''
     resp = httplibresp
     code, msg, hdrs, body = (resp.code, resp.msg, resp.info(), resp.read())
     hdrs = Headers(hdrs.items())
 
     if original_url:
         url_inst = URL(resp.geturl(), original_url.encoding)
         url_inst = url_inst.url_decode()
     else:
         url_inst = original_url = URL(resp.geturl())
 
     
     if isinstance(resp, urllib2.HTTPError):
         # This is possible because in errors.py I do:
         # err = urllib2.HTTPError(req.get_full_url(), code, msg, hdrs, resp)
         charset = getattr(resp.fp, 'encoding', None)
     else:
         # The encoding attribute is only set on CachedResponse instances
         charset = getattr(resp, 'encoding', None)
     
     return cls(code, body, hdrs, url_inst, original_url,
                msg, charset=charset)
Beispiel #8
0
    def test_fuzz_headers_no_headers(self):
        cf_singleton.save('fuzzable_headers', ['Referer'])  # This one changed
        cf_singleton.save('fuzz_cookies', False)
        cf_singleton.save('fuzz_url_filenames', False)
        cf_singleton.save('fuzzed_files_extension', 'gif')
        cf_singleton.save('fuzz_form_files', False)
        cf_singleton.save('fuzz_url_parts', False)

        url = URL('http://moth/?id=1')
        # No headers in the original request
        #headers = Headers([('Referer', 'http://moth/foo/bar/')])
        freq = HTTPQSRequest(url)
        generated_mutants = create_mutants(freq, self.payloads)

        expected_urls = ['http://moth/?id=abc',
                         'http://moth/?id=def',
                         'http://moth/?id=1',
                         'http://moth/?id=1', ]
        generated_urls = [m.get_uri().url_string for m in generated_mutants]

        self.assertEqual(generated_urls, expected_urls)

        expected_headers = [Headers(),
                            Headers(),
                            Headers([('Referer', 'abc')]),
                            Headers([('Referer', 'def')]), ]

        generated_headers = [m.get_headers() for m in generated_mutants]

        self.assertEqual(expected_headers, generated_headers)

        self.assertTrue(all(isinstance(m, QSMutant) or isinstance(m, HeadersMutant)
                            for m in generated_mutants))
Beispiel #9
0
    def from_httplib_resp(cls, httplibresp, original_url=None):
        '''
        Factory function. Build a HTTPResponse object from a httplib.HTTPResponse
        instance
    
        :param httplibresp: httplib.HTTPResponse instance
        :param original_url: Optional 'url_object' instance.
    
        :return: A HTTPResponse instance
        '''
        resp = httplibresp
        code, msg, hdrs, body = (resp.code, resp.msg, resp.info(), resp.read())
        hdrs = Headers(hdrs.items())

        if original_url:
            url_inst = URL(resp.geturl(), original_url.encoding)
            url_inst = url_inst.url_decode()
        else:
            url_inst = original_url = URL(resp.geturl())

        if isinstance(resp, urllib2.HTTPError):
            # This is possible because in errors.py I do:
            # err = urllib2.HTTPError(req.get_full_url(), code, msg, hdrs, resp)
            charset = getattr(resp.fp, 'encoding', None)
        else:
            # The encoding attribute is only set on CachedResponse instances
            charset = getattr(resp, 'encoding', None)

        return cls(code,
                   body,
                   hdrs,
                   url_inst,
                   original_url,
                   msg,
                   charset=charset)
Beispiel #10
0
    def test_get_lower_case_headers(self):
        headers = Headers([('Content-Type', 'text/html')])
        lcase_headers = Headers([('content-type', 'text/html')])

        resp = self.create_resp(headers, "<html/>")

        self.assertEqual(resp.get_lower_case_headers(), lcase_headers)
        self.assertIn('content-type', resp.get_lower_case_headers())
Beispiel #11
0
 def test_http_auth_detect_simple(self):
     body = ''
     hdrs = {'content-type': 'text/html', 'www-authenticate': 'realm-w3af'}
     hdrs = Headers(hdrs.items())
     response = HTTPResponse(401, body, hdrs, self.url, self.url, _id=1)
     self.plugin.grep(self.request, response)
     self.assertEqual(len(kb.kb.get('http_auth_detect', 'auth')), 1)
     self.assertEqual(len(kb.kb.get('http_auth_detect', 'userPassUri')), 0)
 def test_http_auth_detect_simple(self):
     body = ''
     hdrs = {'content-type': 'text/html', 'www-authenticate': 'realm-w3af'}
     hdrs = Headers(hdrs.items())
     response = HTTPResponse(401, body, hdrs, self.url, self.url, _id=1)
     self.plugin.grep(self.request, response)
     self.assertEqual(len(kb.kb.get('http_auth_detect', 'auth')), 1)
     self.assertEqual(len(kb.kb.get('http_auth_detect', 'userPassUri')), 0)
    def test_analyze_cookies_with_httponly_case_sensitive_expires(self):
        body = ''
        url = URL('https://www.w3af.com/')
        headers = {'content-type': 'text/html',
                   'Set-Cookie': 'name2=value2; Expires=Wed, 09-Jun-2021 10:18:14 GMT;Secure;HttpOnly'}
        headers = Headers(headers.items())
        response = HTTPResponse(200, body, headers, url, url, _id=1)
        request = FuzzableRequest(url, method='GET')

        self.plugin.grep(request, response)

        self.assertEqual(len(kb.kb.get('analyze_cookies', 'cookies')), 1)
        self.assertEqual(len(kb.kb.get('analyze_cookies', 'security')), 0)
Beispiel #14
0
    def __init__(self, uri, method='GET', headers=None, cookie=None, dc=None):
        super(FuzzableRequest, self).__init__()

        # Internal variables
        self._dc = dc or DataContainer()
        self._method = method
        self._headers = Headers(headers or ())
        self._cookie = cookie or Cookie()
        self._data = None
        self.set_uri(uri)

        # Set the internal variables
        self._sent_info_comp = None
Beispiel #15
0
    def _do_GET(self, url, with_rand_ua=True):
        if not isinstance(url, URL):
            msg = 'The url parameter of a _do_GET  must'
            msg += ' be of url.URL type.'
            raise ValueError(msg)

        if with_rand_ua:
            random_ua = get_random_user_agent()
            headers = Headers([('User-Agent', random_ua)])
        else:
            # Please note that some tests show that this is useful for the
            # mobile search.
            headers = Headers([('User-Agent', '')])

        return self._uri_opener.GET(url, headers=headers)
Beispiel #16
0
    def test_provides_cors_features_false(self):
        url = URL('http://moth/')
        fr = FuzzableRequest(url)

        http_response = HTTPResponse(200, '', Headers(), url, url)

        url_opener_mock = Mock()
        url_opener_mock.GET = MagicMock(return_value=http_response)

        cors = provides_cors_features(fr, url_opener_mock)

        call_header = Headers({'Origin': 'www.w3af.org'}.items())
        url_opener_mock.GET.assert_called_with(url, headers=call_header)

        self.assertFalse(cors)
Beispiel #17
0
    def test_analyze_cookies_with_httponly_case_sensitive_expires(self):
        body = ""
        url = URL("https://www.w3af.com/")
        headers = {
            "content-type": "text/html",
            "Set-Cookie": "name2=value2; Expires=Wed, 09-Jun-2021 10:18:14 GMT;Secure;HttpOnly",
        }
        headers = Headers(headers.items())
        response = HTTPResponse(200, body, headers, url, url, _id=1)
        request = FuzzableRequest(url, method="GET")

        self.plugin.grep(request, response)

        self.assertEqual(len(kb.kb.get("analyze_cookies", "cookies")), 1)
        self.assertEqual(len(kb.kb.get("analyze_cookies", "security")), 0)
Beispiel #18
0
    def crawl(self, fuzzable_request):
        '''
        Searches for new Url's by adding and substracting numbers to the url
        and the parameters.

        :param fuzzable_request: A fuzzable_request instance that contains
                                     (among other things) the URL to test.
        '''
        url = fuzzable_request.get_url()
        headers = Headers([('Referer', url.url_string)])

        original_response = self._uri_opener.GET(fuzzable_request.get_uri(),
                                                 cache=True, headers=headers)

        if original_response.is_text_or_html() or self._fuzz_images:

            fr_generator = self._mangle_digits(fuzzable_request)
            response_repeater = repeat(original_response)
            header_repeater = repeat(headers)

            args = izip(fr_generator, response_repeater, header_repeater)

            self.worker_pool.map_multi_args(self._do_request, args)

            # I add myself so the next call to this plugin wont find me ...
            # Example: index1.html ---> index2.html --!!--> index1.html
            self._already_visited.add(fuzzable_request.get_uri())
Beispiel #19
0
    def test_strange_http_codes(self):
        body = ''
        url = URL('http://www.w3af.com/')
        headers = Headers([('content-type', 'text/html')])
        request = FuzzableRequest(url, method='GET')

        resp_200 = HTTPResponse(200, body, headers, url, url, _id=1)
        resp_404 = HTTPResponse(404, body, headers, url, url, _id=1)
        KNOWN_GOOD = [resp_200, resp_404]

        resp_999 = HTTPResponse(999, body, headers, url, url, _id=1)
        resp_123 = HTTPResponse(123, body, headers, url, url, _id=1)
        resp_567 = HTTPResponse(567, body, headers, url, url, _id=1)
        resp_666 = HTTPResponse(666, body, headers, url, url, _id=1)
        resp_777 = HTTPResponse(777, body, headers, url, url, _id=1)
        KNOWN_BAD = [resp_999, resp_123, resp_567, resp_666, resp_777]

        for resp in KNOWN_GOOD:
            kb.kb.cleanup()
            self.plugin.grep(request, resp)
            self.assertEquals(len(kb.kb.get('strange_http_codes',
                                            'strange_http_codes')), 0)

        for resp in KNOWN_BAD:
            kb.kb.cleanup()
            self.plugin.grep(request, resp)
            self.assertEquals(len(kb.kb.get('strange_http_codes',
                                            'strange_http_codes')), 1)
Beispiel #20
0
    def mangle_response(self, response):
        '''
        This method mangles the response.

        :param response: This is the response to mangle.
        :return: A mangled version of the response.
        '''
        body = response.get_body()

        for regex, string in self._manglers['s']['b']:
            body = regex.sub(string, body)

        response.set_body(body)

        header_string = str(response.get_headers())

        for regex, string in self._manglers['s']['h']:
            header_string = regex.sub(string, header_string)

        try:
            mangled_header = Headers.from_string(header_string)
        except ValueError:
            error = 'Your header modifications created an invalid header'\
                    ' string that could NOT be parsed back to a Header object.'
            om.out.error(error)
        else:
            response.set_headers(mangled_header)

        if self._user_option_fix_content_len:
            response = self._fix_content_len(response)

        return response
Beispiel #21
0
    def test_headers(self):
        hdr = Headers([('foo', 'bar')])
        fr = create_fuzzable_request_from_parts(self.url, add_headers=hdr)

        self.assertEqual(fr.get_url(), self.url)
        self.assertEqual(fr.get_headers(), hdr)
        self.assertEqual(fr.get_method(), 'GET')
Beispiel #22
0
 def test_blank_body_code(self):
     body = ''
     headers = Headers([('content-type', 'text/html')])
     response = HTTPResponse(401, body, headers, self.url, self.url, _id=1)
     request = FuzzableRequest(self.url, method='GET')
     self.plugin.grep(request, response)
     self.assertEqual(len(kb.kb.get('blank_body', 'blank_body')), 0)
Beispiel #23
0
        def profile_me():
            '''
            To be profiled
            '''
            for _ in xrange(1):
                for counter in xrange(1, 5):

                    file_name = 'test-' + str(counter) + '.html'
                    file_path = os.path.join('plugins', 'tests', 'grep',
                                             'data', file_name)

                    body = file(file_path).read()
                    hdrs = Headers({'Content-Type': 'text/html'}.items())
                    response = HTTPResponse(200,
                                            body,
                                            hdrs,
                                            URL(self.url_str + str(counter)),
                                            URL(self.url_str + str(counter)),
                                            _id=random.randint(1, 5000))

                    request = FuzzableRequest(self.url_inst)
                    for pinst in self._plugins:
                        pinst.grep(request, response)

            for pinst in self._plugins:
                pinst.end()
Beispiel #24
0
    def test_str_strange(self):
        header_value = ''.join(chr(i) for i in xrange(256))
        headers = Headers([(u'Hola', header_value)])

        # I don't assert in a stricter way because the output depends on
        # smart_unicode which might change in the future
        self.assertIn('Hola: \x00\x01\x02', str(headers))
Beispiel #25
0
def _build_http_response(body_content, content_type):
    headers = Headers()
    headers[u'content-type'] = content_type

    url = URL('http://w3af.com')

    return HTTPResponse(200, body_content, headers, url, url, charset='utf-8')
Beispiel #26
0
    def test_add_HTTPPostDataRequest(self):
        ds = DiskSet()

        uri = URL('http://w3af.org/?id=2')
        hdr = Headers([('Referer', 'http://w3af.org/')])

        pdr1 = HTTPPostDataRequest(uri, method='GET', headers=hdr)

        uri = URL('http://w3af.org/?id=3')
        pdr2 = HTTPPostDataRequest(uri, method='GET', headers=hdr)

        uri = URL('http://w3af.org/?id=7')
        pdr3 = HTTPPostDataRequest(uri, method='FOO', headers=hdr)

        ds.add(pdr1)
        ds.add(pdr2)
        ds.add(pdr2)
        ds.add(pdr1)

        self.assertEqual(ds[0], pdr1)
        self.assertEqual(ds[1], pdr2)
        self.assertFalse(pdr3 in ds)
        self.assertTrue(pdr2 in ds)
        self.assertEqual(len(ds), 2)

        # This forces an internal change in the URL object
        pdr2.get_url().url_string
        self.assertTrue(pdr2 in ds)
Beispiel #27
0
 def test_check_case09(self):
     is_vuln = IsVulnerableHelper(200, 301, re.compile('def'),
                                  re.compile('xyz'), re.compile('spam'))
     url = URL('http://moth/')
     http_response = HTTPResponse(301, 'hello world abc def', Headers(),
                                  url, url)
     self.assertTrue(is_vuln.check(http_response))
Beispiel #28
0
            def __call__(self,
                         uri,
                         data=None,
                         headers=Headers(),
                         cache=False,
                         grep=True,
                         cookies=True):
                '''
                :return: An HTTPResponse object that's the result of
                    sending the request with a method different from
                    "GET" or "POST".
                '''
                if not isinstance(uri, URL):
                    raise TypeError('The uri parameter of AnyMethod.'
                                    '__call__() must be of url.URL type.')

                if not isinstance(headers, Headers):
                    raise TypeError('The headers parameter of AnyMethod.'
                                    '__call__() must be of Headers type.')

                self._xurllib._init()

                req = HTTPRequest(uri,
                                  data,
                                  cookies=cookies,
                                  cache=cache,
                                  method=self._method)
                req = self._xurllib._add_headers(req, headers or {})
                return self._xurllib._send(req, grep=grep)
Beispiel #29
0
    def from_dict(cls, unserialized_dict):
        '''
        * msgpack is MUCH faster than cPickle,
        * msgpack can't serialize python objects,
        * I have to create a dict representation of HTTPRequest to serialize it,
        * and a from_dict to have the object back
        
        :param unserialized_dict: A dict just as returned by to_dict()
        '''
        udict = unserialized_dict

        method, uri = udict['method'], udict['uri']
        headers, data = udict['headers'], udict['data']
        cookies = udict['cookies']
        cache = udict['cache']

        headers_inst = Headers(headers.items())
        url = URL(uri)

        return cls(url,
                   data=data,
                   headers=headers_inst,
                   cookies=cookies,
                   cache=cache,
                   method=method)
Beispiel #30
0
    def _create_fuzzable_request(self):
        '''
        Based on the attributes, return a fuzzable request object.

        Important variables used here:
            - self.headers : Stores the headers for the request
            - self.rfile : A file like object that stores the post_data
            - self.path : Stores the URL that was requested by the browser
        '''
        # See HTTPWrapperClass
        if hasattr(self.server, 'chainedHandler'):
            base_path = "https://" + self.server.chainedHandler.path
            path = base_path + self.path
        else:
            path = self.path

        fuzzable_request = FuzzableRequest(
            URL(path),
            self.command,
            Headers(self.headers.dict.items())
        )
        post_data = self._get_post_data()
        if post_data:
            fuzzable_request.set_data(post_data)
        return fuzzable_request
Beispiel #31
0
    def from_dict(cls, unserialized_dict):
        '''
        * msgpack is MUCH faster than cPickle,
        * msgpack can't serialize python objects,
        * I have to create a dict representation of HTTPResponse to serialize it,
        * and a from_dict to have the object back
        
        :param unserialized_dict: A dict just as returned by to_dict()
        '''
        udict = unserialized_dict

        code, msg, hdrs = udict['code'], udict['msg'], udict['headers']
        body, _time, _id = udict['body'], udict['time'], udict['id']

        headers_inst = Headers(hdrs.items())
        url = URL(udict['uri'])

        return cls(code,
                   body,
                   headers_inst,
                   url,
                   url,
                   msg=msg,
                   _id=_id,
                   time=_time)
Beispiel #32
0
    def setUp(self):
        kb.kb.cleanup()

        self.plugin = path_disclosure()
        self.url = URL('http://www.w3af.com/foo/bar.py')
        self.header = Headers([('content-type', 'text/html')])
        self.request = FuzzableRequest(self.url, method='GET')
Beispiel #33
0
 def test_blank_body_method(self):
     body = ''
     headers = Headers([('content-type', 'text/html')])
     response = HTTPResponse(200, body, headers, self.url, self.url, _id=1)
     request = FuzzableRequest(self.url, method='ARGENTINA')
     self.plugin.grep(request, response)
     self.assertEqual(len(kb.kb.get('ssn', 'ssn')), 0)
Beispiel #34
0
    def __init__(self, url, data=None, headers=Headers(),
                 origin_req_host=None, unverifiable=False,
                 follow_redir=True, cookies=True, cache=False, method=None):
        '''
        This is a simple wrapper around a urllib2 request object which helps
        with some common tasks like serialization, cache, etc.

        :param method: None means "choose the method in the default way":
                            if self.has_data():
                                return "POST"
                            else:
                                return "GET"
        '''
        #
        # Save some information for later access in an easier way
        #
        self.url_object = url
        self.follow_redir = follow_redir
        self.cookies = cookies
        self.get_from_cache = cache

        self.method = method
        if self.method is None:
            self.method = 'POST' if data else 'GET'
        
        headers = dict(headers)

        # Call the base class constructor
        urllib2.Request.__init__(self, url.url_encode(), data,
                                 headers, origin_req_host, unverifiable)
        RequestMixIn.__init__(self)
Beispiel #35
0
def create_fuzzable_request_from_request(request, add_headers=None):
    '''
    :return: A fuzzable request with the same info as request
    '''
    if not isinstance(request, HTTPRequest):
        raise TypeError('Requires HTTPRequest to create FuzzableRequest.')
    
    url = request.url_object
    post_data = str(request.get_data() or '')
    method = request.get_method()
    headers = Headers(request.headers.items())
    headers.update(request.unredirected_hdrs.items())
    headers.update(add_headers or Headers())

    return create_fuzzable_request_from_parts(url, method=method,
                                              post_data=post_data,
                                              add_headers=headers)
Beispiel #36
0
    def __init__(self, uri, method='GET',
                 headers=None, cookie=None, dc=None):
        super(FuzzableRequest, self).__init__()
        
        # Internal variables
        self._dc = dc or DataContainer()
        self._method = method
        self._headers = Headers(headers or ())
        self._cookie = cookie or Cookie()
        self._data = None
        self.set_uri(uri)

        # Set the internal variables
        self._sent_info_comp = None
Beispiel #37
0
 def from_httplib_resp(cls, httplibresp, original_url=None):
     '''
     Factory function. Build a HTTPResponse object from a httplib.HTTPResponse
     instance
 
     :param httplibresp: httplib.HTTPResponse instance
     :param original_url: Optional 'url_object' instance.
 
     :return: A HTTPResponse instance
     '''
     resp = httplibresp
     code, msg, hdrs, body = (resp.code, resp.msg, resp.info(), resp.read())
     hdrs = Headers(hdrs.items())
 
     if original_url:
         url_inst = URL(resp.geturl(), original_url.encoding)
         url_inst = url_inst.url_decode()
     else:
         url_inst = original_url = URL(resp.geturl())
 
     charset = getattr(resp, 'encoding', None)
     return cls(code, body, hdrs, url_inst, original_url,
                msg, charset=charset)
Beispiel #38
0
    def set_headers(self, headers):
        '''
        Sets the headers and also analyzes them in order to get the response
        mime type (text/html , application/pdf, etc).

        :param headers: The headers dict.
        '''
        # Fix lowercase in header names from HTTPMessage
        if isinstance(headers, httplib.HTTPMessage):
            self._headers = Headers()
            for header in headers.headers:
                key, value = header.split(':', 1)
                self._headers[key.strip()] = value.strip()
        else:
            self._headers = headers

        # Set the type, for easy access.
        self._doc_type = HTTPResponse.DOC_TYPE_OTHER
        find_word = lambda w: content_type.find(w) != -1

        content_type_hvalue, _ = self._headers.iget('content-type', None)

        # we need exactly content type but not charset
        if content_type_hvalue is not None:
            try:
                self._content_type = content_type_hvalue.split(';', 1)[0]
            except:
                msg = 'Invalid Content-Type value "%s" sent in HTTP response.'
                om.out.debug(msg % (content_type_hvalue,))
            else:
                content_type = self._content_type.lower()

                # Set the doc_type
                if content_type.count('image'):
                    self._doc_type = HTTPResponse.DOC_TYPE_IMAGE

                elif content_type.count('pdf'):
                    self._doc_type = HTTPResponse.DOC_TYPE_PDF

                elif content_type.count('x-shockwave-flash'):
                    self._doc_type = HTTPResponse.DOC_TYPE_SWF

                elif any(imap(find_word,
                              ('text', 'html', 'xml', 'txt', 'javascript'))):
                    self._doc_type = HTTPResponse.DOC_TYPE_TEXT_OR_HTML
Beispiel #39
0
    def mangle_request(self, request):
        '''
        This method mangles the request.

        :param request: This is the request to mangle.
        :return: A mangled version of the request.
        '''
        data = request.get_data()
        for regex, string in self._manglers['q']['b']:
            data = regex.sub(string, data)

        header_string = str(request.get_headers())
        
        for regex, string in self._manglers['q']['h']:
            header_string = regex.sub(string, header_string)
        
        headers_inst = Headers.from_string(header_string)

        return create_fuzzable_request_from_parts(
                                                  request.get_uri(),
                                                  request.get_method(),
                                                  data, headers_inst
                                                  )
Beispiel #40
0
class HTTPResponse(object):

    DOC_TYPE_TEXT_OR_HTML = 'DOC_TYPE_TEXT_OR_HTML'
    DOC_TYPE_SWF = 'DOC_TYPE_SWF'
    DOC_TYPE_PDF = 'DOC_TYPE_PDF'
    DOC_TYPE_IMAGE = 'DOC_TYPE_IMAGE'
    DOC_TYPE_OTHER = 'DOC_TYPE_OTHER'

    def __init__(self, code, read, headers, geturl, original_url,
                 msg='OK', _id=None, time=0.2, alias=None, charset=None):
        '''
        :param code: HTTP code
        :param read: HTTP body text; typically a string
        :param headers: HTTP headers, typically a dict or a httplib.HTTPMessage
        :param geturl: URL object instance
        :param original_url: URL object instance
        :param msg: HTTP message
        :param id: Optional response identifier
        :param time: The time between the request and the response
        :param alias: Alias for the response, this contains a hash that helps
                      the backend sqlite find http_responses faster by indexing
                      by this attr.
        :param charset: Response's encoding; obligatory when `read` is unicode
        '''
        if not isinstance(geturl, URL):
            raise TypeError('Invalid type %s for HTTPResponse ctor param geturl.'
                            % type(geturl))

        if not isinstance(original_url, URL):
            raise TypeError('Invalid type %s for HTTPResponse ctor param original_url.'
                            % type(original_url))

        if not isinstance(headers, Headers):
            raise TypeError('Invalid type %s for HTTPResponse ctor param headers.'
                            % type(headers))
        
        if not isinstance(read, basestring):
            raise TypeError('Invalid type %s for HTTPResponse ctor param read.'
                            % type(read))

        self._charset = charset
        self._headers = None
        self._body = None
        self._raw_body = read
        self._content_type = None
        self._dom = None
        self._clear_text_body = None
        # A unique id identifier for the response
        self.id = _id
        # From cache defaults to False
        self._from_cache = False
        # Set the info
        self._info = headers
        # Set code
        self.set_code(code)

        # Set the URL variables
        # The URL that we really GET'ed
        self._realurl = original_url.uri2url()
        self._uri = original_url
        # The URL where we were redirected to (equal to original_url
        # when no redirect)
        self._redirected_url = geturl
        self._redirected_uri = geturl.uri2url()

        # Set the rest
        self._msg = msg
        self._time = time
        self._alias = alias
        self._doc_type = None
        
        # Internal lock
        self._body_lock = threading.RLock()

    @classmethod
    def from_httplib_resp(cls, httplibresp, original_url=None):
        '''
        Factory function. Build a HTTPResponse object from a httplib.HTTPResponse
        instance
    
        :param httplibresp: httplib.HTTPResponse instance
        :param original_url: Optional 'url_object' instance.
    
        :return: A HTTPResponse instance
        '''
        resp = httplibresp
        code, msg, hdrs, body = (resp.code, resp.msg, resp.info(), resp.read())
        hdrs = Headers(hdrs.items())
    
        if original_url:
            url_inst = URL(resp.geturl(), original_url.encoding)
            url_inst = url_inst.url_decode()
        else:
            url_inst = original_url = URL(resp.geturl())
    
        charset = getattr(resp, 'encoding', None)
        return cls(code, body, hdrs, url_inst, original_url,
                   msg, charset=charset)

    @classmethod    
    def from_dict(cls, unserialized_dict):
        '''
        * msgpack is MUCH faster than cPickle,
        * msgpack can't serialize python objects,
        * I have to create a dict representation of HTTPResponse to serialize it,
        * and a from_dict to have the object back
        
        :param unserialized_dict: A dict just as returned by to_dict()
        '''
        udict = unserialized_dict
        
        code, msg, hdrs = udict['code'], udict['msg'], udict['headers']
        body, _time, _id = udict['body'], udict['time'], udict['id']
        
        headers_inst = Headers(hdrs.items())
        url = URL(udict['uri'])
    
        return cls(code, body, headers_inst, url, url, msg=msg, _id=_id,
                   time=_time)

    def to_dict(self):
        '''
        :return: A dict that represents the current object and is serializable
                 by the json or msgpack modules.
        '''
        serializable_dict = {}
        sdict = serializable_dict
        
        # Note: The Headers() object can be serialized by msgpack because it
        #       inherits from dict() and doesn't mangle it too much
        sdict['code'], sdict['msg'], sdict['headers'] = (self.get_code(),
                                                         self.get_msg(),
                                                         self.get_headers())
        sdict['body'], sdict['time'], sdict['id'] = (self.get_body(),
                                                     self.get_wait_time(),
                                                     self.get_id())
        
        sdict['uri'] = self.get_uri().url_string
    
        return serializable_dict

    def __contains__(self, string_to_test):
        '''
        Determine if the `string_to_test` is contained by the HTTP response
        body.

        :param string_to_test: String to look for in the body
        '''
        return string_to_test in self.body
    
    def __eq__(self, other):
        return self.id == other.id and self._code == other._code and \
               self.headers == other.headers and self.body == other.body and \
               self._uri == other._uri

    def __repr__(self):

        vals = {
            'code': self.get_code(),
            'url': str(self.get_url()),
            'id': self.id and ' | id:%s' % self.id or '',
            'fcache': self._from_cache and ' | fromCache:True' or ''
        }
        return '<HTTPResponse | %(code)s | %(url)s%(id)s%(fcache)s>' % vals

    def set_id(self, _id):
        self.id = _id

    def get_id(self):
        return self.id

    def set_code(self, code):
        self._code = code

    def get_code(self):
        return self._code

    def get_body(self):
        with self._body_lock:
            if self._body is None:
                self._body, self._charset = self._charset_handling()
                # Free 'raw_body'
                self._raw_body = None
            return self._body

    def set_body(self, body):
        '''
        Setter for body.

        @body: A string that represents the body of the HTTP response
        '''
        if not isinstance(body, basestring):
            msg = 'Invalid type %s for set_body parameter body.'
            raise TypeError(msg % type(body))
            
        self._body = None
        self._raw_body = body
    
    body = property(get_body, set_body)

    def get_clear_text_body(self):
        '''
        :return: A clear text representation of the HTTP response body.
        '''
        clear_text_body = self._clear_text_body

        if clear_text_body is None:
            
            # Calculate the clear text body
            dom = self.get_dom()
            if dom is not None:
                clear_text_body = ''.join(dom.itertext())
            else:
                clear_text_body = ANY_TAG_MATCH.sub('', self.get_body())
            
            self._clear_text_body = clear_text_body

        return clear_text_body

    def set_dom(self, dom_inst):
        '''
        This setter is part of a performance improvement I'm talking about in
        get_dom() and sgmlParser._parse().

        Without this set_dom() which is called from sgmlParser._parse() when the
        code runs:
            sgmlParser( http_response )
            ...
            http_response.get_dom()

        The DOM is calculated twice.

        We still need to figure out how to solve the other issue which should
        aim to avoid the double DOM generation when:
            http_response.get_dom()
            ...
            sgmlParser( http_response )

        :return: None
        '''
        self._dom = dom_inst

    def get_dom(self):
        '''
        I don't want to calculate the DOM for all responses, only for those
        which are needed. This method will first calculate the DOM, and then
        save it for upcoming calls.

        @see: TODO: Potential performance improvement in sgmlParser._parse()
                    for ideas on how to reduce CPU usage.

        :return: The DOM, or None if the HTML normalization failed.
        '''
        if self._dom is None:
            try:
                parser = etree.HTMLParser(recover=True)
                self._dom = etree.fromstring(self.body, parser)
            except Exception:
                msg = ('The HTTP body for "%s" could NOT be parsed by lxml.'
                       % self.get_url())
                om.out.debug(msg)
        return self._dom

    def get_charset(self):
        if not self._charset:
            self._body, self._charset = self._charset_handling()
            # Free 'raw_body'
            self._raw_body = None
        return self._charset

    def set_charset(self, charset):
        self._charset = charset
    
    charset = property(get_charset, set_charset)
    
    def set_redir_url(self, ru):
        self._redirected_url = ru

    def get_redir_url(self):
        return self._redirected_url

    def set_redir_uri(self, ru):
        self._redirected_uri = ru

    def get_redir_uri(self):
        return self._redirected_uri

    def get_headers(self):
        if self._headers is None:
            self.headers = self._info
            assert self._headers is not None
        return self._headers

    def set_headers(self, headers):
        '''
        Sets the headers and also analyzes them in order to get the response
        mime type (text/html , application/pdf, etc).

        :param headers: The headers dict.
        '''
        # Fix lowercase in header names from HTTPMessage
        if isinstance(headers, httplib.HTTPMessage):
            self._headers = Headers()
            for header in headers.headers:
                key, value = header.split(':', 1)
                self._headers[key.strip()] = value.strip()
        else:
            self._headers = headers

        # Set the type, for easy access.
        self._doc_type = HTTPResponse.DOC_TYPE_OTHER
        find_word = lambda w: content_type.find(w) != -1

        content_type_hvalue, _ = self._headers.iget('content-type', None)

        # we need exactly content type but not charset
        if content_type_hvalue is not None:
            try:
                self._content_type = content_type_hvalue.split(';', 1)[0]
            except:
                msg = 'Invalid Content-Type value "%s" sent in HTTP response.'
                om.out.debug(msg % (content_type_hvalue,))
            else:
                content_type = self._content_type.lower()

                # Set the doc_type
                if content_type.count('image'):
                    self._doc_type = HTTPResponse.DOC_TYPE_IMAGE

                elif content_type.count('pdf'):
                    self._doc_type = HTTPResponse.DOC_TYPE_PDF

                elif content_type.count('x-shockwave-flash'):
                    self._doc_type = HTTPResponse.DOC_TYPE_SWF

                elif any(imap(find_word,
                              ('text', 'html', 'xml', 'txt', 'javascript'))):
                    self._doc_type = HTTPResponse.DOC_TYPE_TEXT_OR_HTML

    headers = property(get_headers, set_headers)
    
    def get_lower_case_headers(self):
        '''
        If the original headers were:
            {'Abc-Def': 'F00N3s'}
        This will return:
            {'abc-def': 'F00N3s'}

        The only thing that changes is the header name.
        '''
        lcase_headers = dict(
            (k.lower(), v) for k, v in self.headers.iteritems())
        return Headers(lcase_headers.items())

    def set_url(self, url):
        '''
        >>> url = URL('http://www.google.com')
        >>> r = HTTPResponse(200, '' , Headers(), url, url)
        >>> r.set_url('http://www.google.com/')
        Traceback (most recent call last):
          ...
        TypeError: The URL of a HTTPResponse object must be of url.URL type.
        >>> r.set_url(url)
        >>> r.get_url() == url
        True
        '''
        if not isinstance(url, URL):
            raise TypeError('The URL of a HTTPResponse object must be of '
                            'url.URL type.')

        self._realurl = url.uri2url()

    def get_url(self):
        return self._realurl

    def set_uri(self, uri):
        '''
        >>> uri = URL('http://www.google.com/')
        >>> r = HTTPResponse(200, '' , Headers(), uri, uri)
        >>> r.set_uri('http://www.google.com/')
        Traceback (most recent call last):
          ...
        TypeError: The URI of a HTTPResponse object must be of url.URL type.
        >>> r.set_uri(uri)
        >>> r.get_uri() == uri
        True

        '''
        if not isinstance(uri, URL):
            raise TypeError('The URI of a HTTPResponse object must be of '
                            'url.URL type.')

        self._uri = uri
        self._realurl = uri.uri2url()

    def get_uri(self):
        return self._uri

    def was_redirected(self):
        return self._uri != self._redirected_uri

    def set_from_cache(self, fcache):
        '''
        :param fcache: True if this response was obtained from the
        local cache.
        '''
        self._from_cache = fcache

    def get_from_cache(self):
        '''
        :return: True if this response was obtained from the local cache.
        '''
        return self._from_cache

    def set_wait_time(self, t):
        self._time = t

    def get_wait_time(self):
        return self._time

    def set_alias(self, alias):
        self._alias = alias

    def get_alias(self):
        return self._alias

    def info(self):
        return self._info

    def get_status_line(self):
        '''Return status-line of response.'''
        return 'HTTP/1.1' + SP + str(self._code) + SP + self._msg + CRLF

    def get_msg(self):
        return self._msg

    def _charset_handling(self):
        '''
        Decode the body based on the header (or metadata) encoding.
        The implemented algorithm follows the encoding detection logic
        used by FF:

            1) First try to find a charset using the following search criteria:
                a) Look in the 'content-type' HTTP header. Example:
                    content-type: text/html; charset=iso-8859-1
                b) Look in the 'meta' HTML header. Example:
                    <meta .* content="text/html; charset=utf-8" />
                c) Determine the charset using the chardet module (TODO)
                d) Use the DEFAULT_CHARSET

            2) Try to decode the body using the found charset. If it fails,
            then force it to use the DEFAULT_CHARSET

        Finally return the unicode (decoded) body and the used charset.

        Note: If the body is already a unicode string return it as it is.
        '''
        lcase_headers = self.get_lower_case_headers()
        charset = self._charset
        rawbody = self._raw_body

        # Only try to decode <str> strings. Skip <unicode> strings
        if type(rawbody) is unicode:
            _body = rawbody
            assert charset is not None, ("HTTPResponse objects containing "
                                         "unicode body must have an associated "
                                         "charset")
        elif 'content-type' not in lcase_headers:
            _body = rawbody
            charset = DEFAULT_CHARSET

            if len(_body):
                msg = "The remote web server failed to send the 'content-type'"\
                      " header in HTTP response with id %s" % self.id
                om.out.debug(msg)

        elif not self.is_text_or_html():
            # Not text, save as it is.
            _body = rawbody
            charset = charset or DEFAULT_CHARSET
        else:
            # Figure out charset to work with
            if not charset:
                charset = self.guess_charset(rawbody, lcase_headers)

            # Now that we have the charset, we use it!
            # The return value of the decode function is a unicode string.
            try:
                _body = smart_unicode(
                    rawbody,
                    charset,
                    errors=ESCAPED_CHAR,
                    on_error_guess=False
                )
            except LookupError:
                # Warn about a buggy charset
                msg = ('Charset LookupError: unknown charset: %s; '
                       'ignored and set to default: %s' %
                      (charset, self._charset))
                om.out.debug(msg)
                # Forcing it to use the default
                charset = DEFAULT_CHARSET
                _body = smart_unicode(
                    rawbody,
                    charset,
                    errors=ESCAPED_CHAR,
                    on_error_guess=False
                )

        return _body, charset

    def guess_charset(self, rawbody, headers):
        # Start with the headers
        charset_mo = CHARSET_EXTRACT_RE.search(headers['content-type'], re.I)
        if charset_mo:
            # Seems like the response's headers contain a charset
            charset = charset_mo.groups()[0].lower().strip()
        else:
            # Continue with the body's meta tag
            charset_mo = CHARSET_META_RE.search(rawbody, re.IGNORECASE)
            if charset_mo:
                charset = charset_mo.groups()[0].lower().strip()
            else:
                charset = DEFAULT_CHARSET
        
        return charset

    @property
    def content_type(self):
        '''
        The content type of the response
        '''
        if self._content_type is None:
            self.headers = self._info
        return self._content_type or ''

    @property
    def doc_type(self):
        if self._doc_type is None:
            self.headers = self._info
            assert self._doc_type is not None
        return self._doc_type

    def is_text_or_html(self):
        '''
        :return: True if this response is text or html
        '''
        return self.doc_type == HTTPResponse.DOC_TYPE_TEXT_OR_HTML

    def is_pdf(self):
        '''
        :return: True if this response is a PDF file
        '''
        return self.doc_type == HTTPResponse.DOC_TYPE_PDF

    def is_swf(self):
        '''
        :return: True if this response is a SWF file
        '''
        return self.doc_type == HTTPResponse.DOC_TYPE_SWF

    def is_image(self):
        '''
        :return: True if this response is an image file
        '''
        return self.doc_type == HTTPResponse.DOC_TYPE_IMAGE

    def dump_response_head(self):
        '''
        :return: A string with:
            HTTP/1.1 /login.html 200
            Header1: Value1
            Header2: Value2
        '''
        dump_head = "%s%s" % (self.get_status_line(), self.dump_headers())
        if type(dump_head) is unicode:
            dump_head = dump_head.encode(self.charset)
        return dump_head

    def dump(self):
        '''
        Return a DETAILED str representation of this HTTP response object.
        '''
        body = self.body
        # Images, pdf and binary responses in general are never decoded
        # to unicode
        if isinstance(body, unicode):
            body = body.encode(DEFAULT_CHARSET, 'replace')
        return "%s%s%s" % (self.dump_response_head(), CRLF, body)

    def dump_headers(self):
        '''
        :return: a str representation of the headers.
        '''
        if self.headers:
            return CRLF.join(h + ': ' + hv for h, hv in self.headers.items()) + CRLF
        else:
            return ''

    def copy(self):
        return copy.deepcopy(self)

    def __getstate__(self):
        state = self.__dict__.copy()
        state.pop('_body_lock')
        return state
    
    def __setstate__(self, state):
        self.__dict__ = state
        self._body_lock = threading.RLock()
        
Beispiel #41
0
    def test_clone_with_list_values(self):
        headers = Headers([('a', 'b'), ('c', 'd')])
        cloned = headers.clone_with_list_values()

        self.assertEqual(cloned['a'], ['b'])
        self.assertEqual(cloned['c'], ['d'])
Beispiel #42
0
 def test_from_string(self):
     headers_from_str = Headers.from_string('a: b\r\n')
     headers_from_obj = Headers([('a', 'b')])
     self.assertEqual(headers_from_str, headers_from_obj)
Beispiel #43
0
class FuzzableRequest(RequestMixIn, DiskItem):
    '''
    This class represents a fuzzable request. Fuzzable requests were created
    to allow w3af plugins to be much simpler and don't really care if the
    vulnerability is in the postdata, querystring, header, cookie or any other
    variable.

    Other classes should inherit from this one and change the behaviour of
    get_uri() and get_data(). For example: the class HTTPQSRequest should return
    the _dc in the querystring (get_uri) and HTTPPostDataRequest should return
    the _dc in the POSTDATA (get_data()).

    :author: Andres Riancho ([email protected])
    '''

    def __init__(self, uri, method='GET',
                 headers=None, cookie=None, dc=None):
        super(FuzzableRequest, self).__init__()
        
        # Internal variables
        self._dc = dc or DataContainer()
        self._method = method
        self._headers = Headers(headers or ())
        self._cookie = cookie or Cookie()
        self._data = None
        self.set_uri(uri)

        # Set the internal variables
        self._sent_info_comp = None

    def export(self):
        '''
        Generic version of how they are exported:
            METHOD,URL,DC

        Example:
            GET,http://localhost/index.php?abc=123&def=789,
            POST,http://localhost/index.php,abc=123&def=789

        :return: a csv str representation of the request
        '''
        #
        # FIXME: What if a comma is inside the URL or DC?
        # TODO: Why don't we export headers and cookies?
        #
        meth = self._method
        str_res = [meth, ',', str(self._url)]

        if meth == 'GET':
            if self._dc:
                str_res.extend(('?', str(self._dc)))
            str_res.append(',')
        else:
            str_res.append(',')
            if self._dc:
                str_res.append(str(self._dc))

        return ''.join(str_res)

    def sent(self, smth_instng):
        '''
        Checks if something similar to `smth_instng` was sent in the request.
        This is used to remove false positives, e.g. if a grep plugin finds a "strange"
        string and wants to be sure it was not generated by an audit plugin.

        This method should only be used by grep plugins which often have false
        positives.

        The following example shows that we sent d'z"0 but d\'z"0 will
        as well be recognised as sent

        TODO: This function is called MANY times, and under some circumstances it's
        performance REALLY matters. We need to review this function.

        >>> f = FuzzableRequest(URL("""http://example.com/a?p=d'z"0&paged=2"""))
        >>> f.sent('d%5C%27z%5C%220')
        True

        >>> f._data = 'p=<SCrIPT>alert("bsMs")</SCrIPT>'
        >>> f.sent('<SCrIPT>alert(\"bsMs\")</SCrIPT>')
        True

        >>> f = FuzzableRequest(URL('http://example.com/?p=<ScRIPT>a=/PlaO/%0Afake_alert(a.source)</SCRiPT>'))
        >>> f.sent('<ScRIPT>a=/PlaO/fake_alert(a.source)</SCRiPT>')
        True

        :param smth_instng: The string
        :return: True if something similar was sent
        '''
        def make_comp(heterogen_string):
            '''
            This basically removes characters that are hard to compare
            '''
            heterogen_characters = ('\\', '\'', '"', '+', ' ', chr(0),
                                    chr(int("0D", 16)), chr(int("0A", 16)))
            #heterogen_characters.extend(string.whitespace)

            for hetero_char in heterogen_characters:
                heterogen_string = heterogen_string.replace(hetero_char, '')
            return heterogen_string

        data = self._data or ''
        # This is the easy part. If it was exactly like this in the request
        if data and smth_instng in data or \
        smth_instng in self.get_uri() or \
        smth_instng in unquote(data) or \
        smth_instng in unicode(self._uri.url_decode()):
            return True

        # Ok, it's not in it but maybe something similar
        # Let's set up something we can compare
        if self._sent_info_comp is None:
            dc = self._dc
            dec_dc = unquote(str(dc)).decode(dc.encoding)
            data = '%s%s%s' % (unicode(self._uri), data, dec_dc)

            self._sent_info_comp = make_comp(data + unquote(data))

        min_len = 3
        # make the smth_instng comparable
        smth_instng_comps = (make_comp(smth_instng),
                             make_comp(unquote(smth_instng)))
        for smth_intstng_comp in smth_instng_comps:
            # We don't want false negatives just because the string is
            # short after making comparable
            if smth_intstng_comp in self._sent_info_comp and \
                    len(smth_intstng_comp) >= min_len:
                return True
        # I didn't sent the smth_instng in any way
        return False

    def __hash__(self):
        return hash(str(self._uri))

    def __str__(self):
        '''
        :return: A string representation of this fuzzable request.

        >>> fr = FuzzableRequest(URL("http://www.w3af.com/"))
        >>> str(fr)
        'http://www.w3af.com/ | Method: GET'

        >>> repr( fr )
        '<fuzzable request | GET | http://www.w3af.com/>'

        >>> fr.set_method('TRACE')
        >>> str(fr)
        'http://www.w3af.com/ | Method: TRACE'

        '''
        strelems = [unicode(self._url)]
        strelems.append(u' | Method: ' + self._method)

        if self._dc:
            strelems.append(u' | Parameters: (')

            # Mangle the value for printing
            for pname, values in self._dc.items():
                # Because of repeated parameter names, we need to add this:
                for the_value in values:
                    # the_value is always a string
                    if len(the_value) > 10:
                        the_value = the_value[:10] + '...'
                    the_value = '"' + the_value + '"'
                    strelems.append(pname + '=' + the_value + ', ')

            strelems[-1] = strelems[-1][:-2]
            strelems.append(u')')

        return u''.join(strelems).encode(DEFAULT_ENCODING)

    def __repr__(self):
        return '<fuzzable request | %s | %s>' % \
            (self.get_method(), self.get_uri())

    def __eq__(self, other):
        '''
        Two requests are equal if:
            - They have the same URL
            - They have the same method
            - They have the same parameters
            - The values for each parameter is equal

        :return: True if the requests are equal.
        '''
        if isinstance(other, FuzzableRequest):
            return (self._method == other._method and
                    self._uri == other._uri and
                    self._dc == other._dc)
        else:
            return NotImplemented

    def get_eq_attrs(self):
        return ['_method', '_uri', '_dc']

    def __ne__(self, other):
        return not self.__eq__(other)

    def is_variant_of(self, other):
        '''
        Two requests are loosely equal (or variants) if:
            - They have the same URL
            - They have the same HTTP method
            - They have the same parameter names
            - The values for each parameter have the same type (int / string)

        :return: True if self and other are variants.
        '''
        dc = self._dc
        odc = other._dc

        if (self._method == other._method and
            self._url == other._url and
                dc.keys() == odc.keys()):
            for vself, vother in izip_longest(
                chain(*dc.values()),
                chain(*odc.values()),
                fillvalue=None
            ):
                if None in (vself, vother) or \
                        vself.isdigit() != vother.isdigit():
                    return False
            return True
        return False

    def set_url(self, url):
        if not isinstance(url, URL):
            raise TypeError('The "url" parameter of a %s must be of '
                            'url.URL type.' % type(self).__name__)

        self._url = URL(url.url_string.replace(' ', '%20'))
        self._uri = self._url

    def set_uri(self, uri):
        if not isinstance(uri, URL):
            raise TypeError('The "uri" parameter of a %s must be of '
                            'url.URL type.' % type(self).__name__)
        self._uri = uri
        self._url = uri.uri2url()

    def set_method(self, method):
        self._method = method

    def set_dc(self, dataCont):
        if not isinstance(dataCont, DataContainer):
            raise TypeError('Invalid call to fuzzable_request.set_dc(), the '
                            'argument must be a DataContainer instance.')
        self._dc = dataCont

    def set_headers(self, headers):
        self._headers = Headers(headers)

    def set_referer(self, referer):
        self._headers['Referer'] = str(referer)

    def set_cookie(self, c):
        '''
        :param cookie: A Cookie object as defined in core.data.dc.cookie,
            or a string.
        '''
        if isinstance(c, Cookie):
            self._cookie = c
        elif isinstance(c, basestring):
            self._cookie = Cookie(c)
        elif c is None:
            self._cookie = Cookie()
        else:
            fmt = '[FuzzableRequest error] set_cookie received: "%s": "%s".'
            error_str = fmt % (type(c), repr(c))
            om.out.error(error_str)
            raise w3afException(error_str)

    def get_url(self):
        return self._url

    def get_uri(self):
        return self._uri

    def set_data(self, d):
        '''
        The data is the string representation of the DataContainer, in most
        cases it wont be set.
        '''
        self._data = d

    def get_data(self):
        '''
        The data is the string representation of the DataContainer, in most
        cases it will be used as the POSTDATA for requests. Sometimes it is
        also used as the query string data.
        '''
        return self._data

    def get_method(self):
        return self._method

    def get_dc(self):
        return self._dc

    def get_headers(self):
        return self._headers

    def get_referer(self):
        return self._headers.get('Referer', None)

    def get_cookie(self):
        return self._cookie

    def get_file_vars(self):
        return []

    def copy(self):
        return copy.deepcopy(self)
Beispiel #44
0
 def set_headers(self, headers):
     self._headers = Headers(headers)
Beispiel #45
0
 def test_to_str_from_string(self):
     headers_from_obj = Headers([('a', 'b')])
     headers_from_str = Headers.from_string(str(headers_from_obj))
     
     self.assertEqual(headers_from_str, headers_from_obj)
Beispiel #46
0
 def get_headers(self):
     headers = Headers(self.headers.items())
     headers.update(self.unredirected_hdrs.items())
     return headers
Beispiel #47
0
def create_fuzzable_requests(resp, request=None, add_self=True):
    '''
    Generates the fuzzable requests based on an HTTP response instance.

    :param resp: An HTTPResponse instance.
    :param request: The HTTP request that generated the resp
    :param add_self: If I should add the current HTTP request
                         (:param request) to the result on not.

    :return: A list of fuzzable requests.
    '''
    res = []

    # Headers for all fuzzable requests created here:
    # And add the fuzzable headers to the dict
    req_headers = dict((h, '') for h in cf.cf.get('fuzzable_headers'))
    req_headers.update(request and request.get_headers() or {})
    req_headers = Headers(req_headers.items())

    # Get the cookie!
    cookieObj = _create_cookie(resp)

    # Create the fuzzable request that represents the request object
    # passed as parameter
    if add_self:
        qsr = HTTPQSRequest(
            resp.get_uri(),
            headers=req_headers,
            cookie=cookieObj
        )
        res.append(qsr)

    # If response was a 30X (i.e. a redirect) then include the
    # corresponding fuzzable request.
    resp_headers = resp.get_headers()

    for url_header_name in URL_HEADERS:
        url_header_value, _ = resp_headers.iget(url_header_name, '')
        if url_header_value:
            url = smart_unicode(url_header_value, encoding=resp.charset)
            try:
                absolute_location = resp.get_url().url_join(url)
            except ValueError:
                msg = 'The application sent a "%s" redirect that w3af' \
                      ' failed to correctly parse as an URL, the header' \
                      ' value was: "%s"'
                om.out.debug(msg % (url_header_name, url))
            else:
                qsr = HTTPQSRequest(
                    absolute_location,
                    headers=req_headers,
                    cookie=cookieObj
                )
                res.append(qsr)

    # Try to find forms in the document
    try:
        dp = parser_cache.dpc.get_document_parser_for(resp)
    except w3afException:
        # Failed to find a suitable parser for the document
        form_list = []
    else:
        form_list = dp.get_forms()
        same_domain = lambda f: f.get_action(
        ).get_domain() == resp.get_url().get_domain()
        form_list = [f for f in form_list if same_domain(f)]

    if not form_list:
        # Check if its a wsdl file
        #TODO: Rewrite web service support
        '''
        wsdlp = WSDLParser()
        try:
            wsdlp.set_wsdl(resp.get_body())
        except w3afException:
            pass
        else:
            for rem_meth in wsdlp.get_methods():
                wspdr = WebServiceRequest(
                    rem_meth.get_location(),
                    rem_meth.get_action(),
                    rem_meth.get_parameters(),
                    rem_meth.get_namespace(),
                    rem_meth.get_methodName(),
                    req_headers
                )
                res.append(wspdr)
        '''
    else:
        # Create one HTTPPostDataRequest for each form variant
        mode = cf.cf.get('form_fuzzing_mode')
        for form in form_list:
            for variant in form.get_variants(mode):
                if form.get_method().upper() == 'POST':
                    r = HTTPPostDataRequest(
                        variant.get_action(),
                        variant.get_method(),
                        req_headers,
                        cookieObj,
                        variant)
                else:
                    # The default is a GET request
                    r = HTTPQSRequest(
                        variant.get_action(),
                        headers=req_headers,
                        cookie=cookieObj
                    )
                    r.set_dc(variant)

                res.append(r)
    return res
Beispiel #48
0
def HTTPRequestParser(head, postdata):
    '''
    This function parses HTTP Requests from a string to a FuzzableRequest.

    :param head: The head of the request.
    :param postdata: The post data of the request
    :return: A FuzzableRequest object with all the corresponding information
        that was sent in head and postdata

    :author: Andres Riancho ([email protected])

    '''
    # Parse the request head, the strip() helps us deal with the \r (if any)
    splitted_head = head.split('\n')
    splitted_head = [h.strip() for h in splitted_head if h]

    if not splitted_head:
        msg = 'The HTTP request is invalid.'
        raise w3afException(msg)

    # Get method, uri, version
    method_uri_version = splitted_head[0]
    first_line = method_uri_version.split(' ')
    if len(first_line) == 3:
        # Ok, we have something like "GET /foo HTTP/1.0". This is the best case for us!
        method, uri, version = first_line
    elif len(first_line) < 3:
        msg = 'The HTTP request has an invalid <method> <uri> <version> token: "'
        msg += method_uri_version + '".'
        raise w3afException(msg)
    elif len(first_line) > 3:
        # GET /hello world.html HTTP/1.0
        # Mostly because we are permissive... we are going to try to parse
        # the request...
        method = first_line[0]
        version = first_line[-1]
        uri = ' '.join(first_line[1:-1])

    check_version_syntax(version)

    # If we got here, we have a nice method, uri, version first line
    # Now we parse the headers (easy!) and finally we send the request
    headers_str = splitted_head[1:]
    headers_inst = Headers()
    for header in headers_str:
        one_splitted_header = header.split(':', 1)
        if len(one_splitted_header) == 1:
            msg = 'The HTTP request has an invalid header: "%s".'
            raise w3afException(msg % header)

        header_name = one_splitted_header[0].strip()
        header_value = one_splitted_header[1].strip()
        if header_name in headers_inst:
            headers_inst[header_name] += ', ' + header_value
        else:
            headers_inst[header_name] = header_value

    host, _ = headers_inst.iget('host', None)
    
    try:
        uri = URL(check_uri_syntax(uri, host))
    except ValueError, ve:
        raise w3afException(str(ve))