Example #1
0
    def test_headers_iget(self):
        upper_headers = Headers([('Abc', 'b')])

        value, real_header = upper_headers.iget('abc')

        self.assertEqual(value, 'b')
        self.assertEqual(real_header, 'Abc')
Example #2
0
    def from_httplib_resp(cls, httplibresp, original_url=None):
        """
        Factory function. Build a HTTPResponse object from a
        httplib.HTTPResponse instance
    
        :param httplibresp: httplib.HTTPResponse instance
        :param original_url: Optional 'url_object' instance.
    
        :return: A HTTPResponse instance
        """
        resp = httplibresp
        code, msg, hdrs, body = (resp.code, resp.msg, resp.info(), resp.read())
        hdrs = Headers(hdrs.items())

        if original_url:
            url_inst = URL(resp.geturl(), original_url.encoding)
            url_inst = url_inst.url_decode()
        else:
            url_inst = original_url = URL(resp.geturl())

        httplib_time = DEFAULT_WAIT_TIME
        if hasattr(httplibresp, 'get_wait_time'):
            # This is defined in the keep alive http response object
            httplib_time = httplibresp.get_wait_time()

        if isinstance(resp, urllib2.HTTPError):
            # This is possible because in errors.py I do:
            # err = urllib2.HTTPError(req.get_full_url(), code, msg, hdrs, resp)
            charset = getattr(resp.fp, 'encoding', None)
        else:
            # The encoding attribute is only set on CachedResponse instances
            charset = getattr(resp, 'encoding', None)
        
        return cls(code, body, hdrs, url_inst, original_url,
                   msg, charset=charset, time=httplib_time)
Example #3
0
    def get_all_headers(self):
        """
        :return: Calls get_default_headers to get the default framework headers,
        get_post_data_headers to get the DataContainer headers, merges that info
        with the user specified headers (which live in self._headers) and
        returns a Headers instance which will be sent to the wire.
        """
        wire_headers = Headers()

        for k, v in chain(self._headers.items(),
                          self.get_post_data_headers().items()):

            # Please note that here we're overwriting the headers from the
            # fuzzable request with the headers from the data container,
            # the overwriting is done in this order due to the order in the
            # chain() items above
            #
            # I found a bug where I loaded a request from spider_man, saved
            # it using dump() and then tried to load it again and failed because
            # of this overwriting not being done (the multipart boundary was
            # incorrect).
            #
            # Keep that in mind in case you want to change this overwriting!
            #
            # Overwrite the existing one, case insensitive style
            _, stored_header_name = wire_headers.iget(k, None)
            if stored_header_name is not None:
                wire_headers[stored_header_name] = v
            else:
                wire_headers[k] = v

        return wire_headers
Example #4
0
    def get_headers(self):
        """
        Query the spec / operation and return the headers, including the
        content-type, which will be used later to know how to serialize the
        body.
        """
        request_dict = self._bravado_construct_request()
        headers = Headers(request_dict['headers'].items())

        # First, we try to extract content type from a 'consumes'
        # if the operation has one.
        content_type = self.get_consuming_content_type()
        if content_type is not None:
            headers['Content-Type'] = content_type

        content_type, _ = headers.iget('content-type', None)
        if content_type is None and self.parameters:
            # Content-Type is not set yet.
            #
            # There are some specification documents where the consumes
            # section might be empty. This is because the operation doesn't
            # receive anything or because the specification is wrong.
            #
            # If there are parameters then we opt for serializing them as
            # JSON, which is a safe default
            headers['Content-Type'] = self.DEFAULT_CONTENT_TYPE

        return headers
Example #5
0
File: factory.py Project: EnDe/w3af
def dc_from_hdrs_post(headers, post_data):
    """
    :param headers: HTTP request headers, most importantly containing the
                    content-type info.
    :param post_data: The HTTP request post-data as a string
    :return: The best-match from POST_DATA_CONTAINERS to hold the information
             in self._post_data @ FuzzableRequest
    """
    if headers is None:
        headers = Headers()

    for pdc_klass in POST_DATA_CONTAINERS:
        try:
            return pdc_klass.from_postdata(headers, post_data)
        except (ValueError, TypeError) as e:
            pass
    else:
        content_type, _ = headers.iget("content-type", "None")
        msg = 'Unknown post-data. Content-type: "%s" and/or post-data "%s"'
        om.out.debug(msg % (content_type, post_data[:50]))

        # These lines are for debugging
        # import traceback
        # traceback.print_stack()

        return PlainContainer.from_postdata(headers, post_data)
Example #6
0
 def test_http_auth_detect_simple(self):
     body = ''
     hdrs = {'content-type': 'text/html', 'www-authenticate': 'realm-w3af'}
     hdrs = Headers(hdrs.items())
     response = HTTPResponse(401, body, hdrs, self.url, self.url, _id=1)
     self.plugin.grep(self.request, response)
     self.assertEqual(len(kb.kb.get('http_auth_detect', 'auth')), 1)
     self.assertEqual(len(kb.kb.get('http_auth_detect', 'userPassUri')), 0)
Example #7
0
    def test_headers_update_overlap(self):
        abc_headers = Headers([('Abc', 'b'), ('def', '2')])
        def_headers = Headers([('def', '1')])

        abc_headers.update(def_headers)

        expected_headers = Headers([('Abc', 'b'), ('def', '1')])

        self.assertEqual(expected_headers, abc_headers)
Example #8
0
    def test_tokens_to_value(self):
        token = DataToken('a', 'b', ('a',))
        headers = Headers([('a', token)])

        headers.tokens_to_value()

        self.assertIn('a', headers)
        self.assertEqual(headers['a'], 'b')
        self.assertIsInstance(headers['a'], basestring)
Example #9
0
    def test_copy_with_token(self):
        dc = Headers([('Abc', 'b')])

        dc.set_token(('Abc',))
        dc_copy = copy.deepcopy(dc)

        self.assertEqual(dc.get_token(), dc_copy.get_token())
        self.assertIsNotNone(dc.get_token())
        self.assertIsNotNone(dc_copy.get_token())
        self.assertEqual(dc_copy.get_token().get_name(), 'Abc')
Example #10
0
    def test_analyze_cookies_with_httponly_case_sensitive_expires(self):
        body = ''
        url = URL('https://www.w3af.com/')
        headers = {'content-type': 'text/html',
                   'Set-Cookie': 'name2=value2; Expires=Wed, 09-Jun-2021 10:18:14 GMT;Secure;HttpOnly'}
        headers = Headers(headers.items())
        response = HTTPResponse(200, body, headers, url, url, _id=1)
        request = FuzzableRequest(url, method='GET')

        self.plugin.grep(request, response)

        self.assertEqual(len(kb.kb.get('analyze_cookies', 'cookies')), 1)
        self.assertEqual(len(kb.kb.get('analyze_cookies', 'security')), 0)
Example #11
0
    def from_http_request(cls, request):
        """
        :param request: The instance we'll use as base
        :return: An instance of FuzzableRequest based on a urllib2 HTTP request
                 instance.
        """
        headers = request.headers
        headers.update(request.unredirected_hdrs)
        headers = Headers(headers.items())

        post_data = request.get_data() or ""

        return cls.from_parts(request.url_object, method=request.get_method(), headers=headers, post_data=post_data)
Example #12
0
    def mangle_response(self, response):
        """
        This method mangles the response.

        :param response: This is the response to mangle.
        :return: A mangled version of the response.
        """
        body = response.get_body()

        for regex, string in self._manglers['s']['b']:
            body = regex.sub(string, body)

        response.set_body(body)

        header_string = str(response.get_headers())

        for regex, string in self._manglers['s']['h']:
            header_string = regex.sub(string, header_string)

        try:
            mangled_header = Headers.from_string(header_string)
        except ValueError:
            error = 'Your header modifications created an invalid header'\
                    ' string that could NOT be parsed back to a Header object.'
            om.out.error(error)
        else:
            response.set_headers(mangled_header)

        if self._user_option_fix_content_len:
            response = self._fix_content_len(response)

        return response
Example #13
0
    def _urllib_request_to_fr(self, request):
        """
        Convert a urllib2 request object to a FuzzableRequest.
        Used in http_request.

        :param request: A urllib2 request obj.
        :return: A FuzzableRequest.
        """
        headers = request.headers
        headers.update(request.unredirected_hdrs)
        headers = Headers(headers.items())
        fr = FuzzableRequest(request.url_object,
                             request.get_method(),
                             headers)
        fr.set_data(request.get_data() or '')
        return fr
Example #14
0
def create_fuzzable_request_from_request(request, add_headers=None):
    """
    :return: A fuzzable request with the same info as request
    """
    if not isinstance(request, HTTPRequest):
        raise TypeError('Requires HTTPRequest to create FuzzableRequest.')
    
    url = request.url_object
    post_data = str(request.get_data() or '')
    method = request.get_method()
    headers = Headers(request.headers.items())
    headers.update(request.unredirected_hdrs.items())
    headers.update(add_headers or Headers())

    return create_fuzzable_request_from_parts(url, method=method,
                                              post_data=post_data,
                                              add_headers=headers)
Example #15
0
    def http_response(self, request, response):

        if len(self._plugin_list) and response._connection.sock is not None:
            # Create the HTTPResponse object
            code, msg, hdrs = response.code, response.msg, response.info()
            hdrs = Headers(hdrs.items())
            url_instance = URL(response.geturl())
            body = response.read()
            # Id is not here, the mangle is done BEFORE logging
            # id = response.id

            http_resp = HTTPResponse(code, body, hdrs, url_instance,
                                     request.url_object, msg=msg)

            for plugin in self._plugin_list:
                plugin.mangle_response(http_resp)

            response = self._HTTPResponse2httplib(response, http_resp)

        return response
Example #16
0
    def set_headers(self, headers):
        """
        Sets the headers and also analyzes them in order to get the response
        mime type (text/html , application/pdf, etc).

        :param headers: The headers dict.
        """
        # Fix lowercase in header names from HTTPMessage
        if isinstance(headers, httplib.HTTPMessage):
            self._headers = Headers()
            for header in headers.headers:
                key, value = header.split(':', 1)
                self._headers[key.strip()] = value.strip()
        else:
            self._headers = headers

        find_word = lambda w: content_type.find(w) != -1

        content_type_hvalue, _ = self._headers.iget(CONTENT_TYPE, None)

        # we need exactly content type but not charset
        if content_type_hvalue is not None:
            try:
                self._content_type = content_type_hvalue.split(';', 1)[0].strip().lower()
            except:
                msg = 'Invalid Content-Type value "%s" sent in HTTP response.'
                om.out.debug(msg % (content_type_hvalue,))
            else:
                content_type = self._content_type

                # Set the doc_type
                if content_type.count('image'):
                    self._doc_type = HTTPResponse.DOC_TYPE_IMAGE

                elif content_type.count('pdf'):
                    self._doc_type = HTTPResponse.DOC_TYPE_PDF

                elif content_type.count('x-shockwave-flash'):
                    self._doc_type = HTTPResponse.DOC_TYPE_SWF

                elif any(imap(find_word,
                              ('text', 'html', 'xml', 'txt', 'javascript'))):
                    self._doc_type = HTTPResponse.DOC_TYPE_TEXT_OR_HTML

        # Check if the doc type is still None, that would mean that none of the
        # previous if statements matched.
        #
        # Note that I'm doing this here and not before the other if statements
        # because that triggered a race condition with threads asking if the
        # _doc_type was != None (which it was because I was setting it to
        # DOC_TYPE_OTHER) and that raised all types of errors.
        if self._doc_type is None:
            self._doc_type = HTTPResponse.DOC_TYPE_OTHER
    def get_all_headers(self):
        """
        :return: Calls get_default_headers to get the default framework headers,
        get_post_data_headers to get the DataContainer headers, merges that info
        with the user specified headers (which live in self._headers) and
        returns a Headers instance which will be sent to the wire.
        """
        wire_headers = Headers()

        for k, v in chain(self._headers.items(),
                          self.get_post_data_headers().items()):

            # Ignore any keys which are already defined in the user-specified
            # headers
            kvalue, kreal = wire_headers.iget(k, None)
            if kvalue is not None:
                continue

            wire_headers[k] = v

        return wire_headers
Example #18
0
    def __init__(self, uri, method='GET', headers=None, cookie=None, dc=None):
        super(FuzzableRequest, self).__init__()
        
        # Internal variables
        self._dc = dc or DataContainer()
        self._method = method
        self._headers = Headers(headers or ())
        self._cookie = cookie or Cookie()
        self._data = None
        self.set_uri(uri)

        # Set the internal variables
        self._sent_info_comp = None
Example #19
0
    def _grep(self, request, response):

        url_instance = request.url_object
        domain = url_instance.get_domain()

        if self._grep_queue_put is not None and\
        domain in cf.cf.get('target_domains'):

            # Create a fuzzable request based on the urllib2 request object
            headers_inst = Headers(request.header_items())
            fr = FuzzableRequest.from_parts(url_instance,
                                            request.get_method(),
                                            str(request.get_data()),
                                            headers_inst)

            self._grep_queue_put((fr, response))
Example #20
0
    def test_provides_cors_features_true(self):
        url = URL('http://moth/')
        fr = FuzzableRequest(url)

        hdrs = {'Access-Control-Allow-Origin': 'http://www.w3af.org/'}.items()
        cors_headers = Headers(hdrs)
        http_response = HTTPResponse(200, '', cors_headers, url, url)

        url_opener_mock = Mock()
        url_opener_mock.GET = MagicMock(return_value=http_response)

        cors = provides_cors_features(fr, url_opener_mock)

        url_opener_mock.GET.assert_called_with(url)

        self.assertTrue(cors)
Example #21
0
    def test_unsafe_inline_enabled_no_case01(self):
        """
        Test case in which site do not provides "unsafe-inline" related CSP
        (no directive value "unsafe-inline").
        """
        hrds = {}
        hrds[CSP_HEADER_FIREFOX] = CSP_DIRECTIVE_SCRIPT + " 'self'"
        hrds[CSP_HEADER_W3C_REPORT_ONLY] = CSP_DIRECTIVE_DEFAULT + \
            " 'self';" + CSP_DIRECTIVE_REPORT_URI + " http://example.com"
        hrds[CSP_HEADER_W3C] = CSP_DIRECTIVE_SCRIPT + " 'self';" + \
            CSP_DIRECTIVE_REPORT_URI + " /myrelativeuri"

        csp_headers = Headers(hrds.items())
        http_response = HTTPResponse(200, '', csp_headers, self.url, self.url)

        self.assertFalse(unsafe_inline_enabled(http_response))
Example #22
0
    def test_find_vulns_case04(self):
        """
        Test case in which we configure correctly policies for all directives.
        """
        header_value = "default-src 'self';script-src 'self';object-src 'self';" \
                       "style-src 'self';img-src 'self';media-src 'self';" \
                       "frame-src 'self';font-src 'self';sandbox;" \
                       "form-action '/myCtx/act';connect-src 'self';"\
                       "plugin-types application/pdf;reflected-xss filter;"\
                       "script-nonce AABBCCDDEE;"
        hrds = {CSP_HEADER_W3C: header_value}.items()
        csp_headers = Headers(hrds)

        http_response = HTTPResponse(200, '', csp_headers, self.url, self.url)
        vulns = find_vulns(http_response)
        self.assertEqual(len(vulns), 0)
Example #23
0
    def POST(self,
             uri,
             data='',
             headers=Headers(),
             grep=True,
             cache=False,
             cookies=True,
             ignore_errors=False):
        """
        POST's data to a uri using a proxy, user agents, and other settings
        that where set previously.

        :param uri: This is the url where to post.
        :param data: A string with the data for the POST.
        :return: An HTTPResponse object.
        """
        if not isinstance(uri, URL):
            raise TypeError('The uri parameter of ExtendedUrllib.POST() must'
                            ' be of url.URL type.')

        if not isinstance(headers, Headers):
            raise TypeError(
                'The header parameter of ExtendedUrllib.POST() must'
                ' be of Headers type.')

        #    Validate what I'm sending, init the library (if needed)
        self._init()

        #
        #    Create and send the request
        #
        #    Please note that the cache=False overrides the user setting
        #    since we *never* want to return cached responses for POST
        #    requests.
        #
        data = str(data)

        req = HTTPRequest(uri,
                          data=data,
                          cookies=cookies,
                          cache=False,
                          ignore_errors=ignore_errors,
                          method='POST',
                          retries=self.settings.get_max_retrys())
        req = self._add_headers(req, headers)

        return self._send(req, grep=grep)
Example #24
0
        def any_method(uri_opener,
                       method,
                       uri,
                       data=None,
                       headers=Headers(),
                       cache=False,
                       grep=True,
                       cookies=True,
                       error_handling=True,
                       timeout=None,
                       use_basic_auth=True,
                       use_proxy=True,
                       follow_redirects=False):
            """
            :return: An HTTPResponse object that's the result of sending
                     the request with a method different from GET or POST.
            """
            if not isinstance(uri, URL):
                raise TypeError('The uri parameter of any_method must be'
                                ' of url.URL type.')

            if not isinstance(headers, Headers):
                raise TypeError('The headers parameter of any_method must be'
                                ' of Headers type.')

            uri_opener.setup()

            max_retries = uri_opener.settings.get_max_retrys()

            new_connection = True if timeout is not None else False
            host = uri.get_domain()
            timeout = uri_opener.get_timeout(
                host) if timeout is None else timeout
            req = HTTPRequest(uri,
                              data,
                              cookies=cookies,
                              cache=cache,
                              method=method,
                              error_handling=error_handling,
                              retries=max_retries,
                              timeout=timeout,
                              new_connection=new_connection,
                              use_basic_auth=use_basic_auth,
                              follow_redirects=follow_redirects,
                              use_proxy=True)
            req = uri_opener.add_headers(req, headers or {})
            return uri_opener.send(req, grep=grep)
Example #25
0
    def test_object(self):
        body = """header
        <OBJECT
          classid="clsid:8AD9C840-044E-11D1-B3E9-00805F499D93"
          width="200" height="200">
          <PARAM name="code" value="Applet1.class">
        </OBJECT>
        footer"""
        url = URL('http://www.w3af.com/')
        headers = Headers([('content-type', 'text/html')])
        response = HTTPResponse(200, body, headers, url, url, _id=1)
        request = FuzzableRequest(url, method='GET')
        self.plugin.grep(request, response)

        self.assertEquals(len(kb.kb.get('objects', 'object')), 1)
        i = kb.kb.get('objects', 'object')[0]
        self.assertTrue('"object"' in i.get_desc())
Example #26
0
    def test_get_clear_text_issue_4402(self):
        """
        :see: https://github.com/andresriancho/w3af/issues/4402
        """
        test_file_path = 'core/data/url/tests/data/encoding_4402.php'
        test_file = os.path.join(ROOT_PATH, test_file_path)
        body = file(test_file, 'rb').read()

        sample_encodings = [encoding for _, (_, encoding) in TEST_RESPONSES.iteritems()]
        sample_encodings.extend(['', 'utf-8'])

        for encoding in sample_encodings:
            encoding_header = 'text/html; charset=%s' % encoding
            headers = Headers([('Content-Type', encoding_header)])

            resp = self.create_resp(headers, body)
            resp.get_clear_text_body()
Example #27
0
    def test_applet(self):
        body = """header
        <APPLET code="XYZApp.class" codebase="html/" align="baseline"
            width="200" height="200">
            <PARAM name="model" value="models/HyaluronicAcid.xyz">
            No Java 2 SDK, Standard Edition v 1.4.2 support for APPLET!!
        </APPLET>
        footer"""
        url = URL('http://www.w3af.com/')
        headers = Headers([('content-type', 'text/html')])
        response = HTTPResponse(200, body, headers, url, url, _id=1)
        request = FuzzableRequest(url, method='GET')
        self.plugin.grep(request, response)

        self.assertEquals(len(kb.kb.get('objects', 'applet')), 1)
        i = kb.kb.get('objects', 'applet')[0]
        self.assertTrue('"applet"' in i.get_desc())
Example #28
0
    def test_dump_case03(self):
        header_value = ''.join(chr(i) for i in xrange(256))

        expected = u'\r\n'.join([
            u'GET http://w3af.com/a/b/c.php HTTP/1.1',
            u'Hola: %s' % smart_unicode(header_value), u'', u''
        ])

        headers = Headers([(u'Hola', header_value)])

        #TODO: Note that I'm passing a dc to the FuzzableRequest and it's not
        # appearing in the dump. It might be a bug...
        fr = FuzzableRequest(self.url,
                             method='GET',
                             dc={u'a': ['b']},
                             headers=headers)
        self.assertEqual(fr.dump(), expected)
Example #29
0
    def test_raises_other_exceptions(self):
        body = '<meta test="user/pass"></script>'
        url = URL('http://www.w3af.com/')
        headers = Headers([('content-type', 'text/html')])
        request = FuzzableRequest(url, method='GET')
        resp = HTTPResponse(200, body, headers, url, url, _id=1)

        with patch('w3af.plugins.grep.meta_tags.is_404') as is_404_mock:
            msg = 'Foos and bars'
            is_404_mock.side_effect = Exception(msg)

            try:
                self.plugin.grep_wrapper(request, resp)
            except Exception as e:
                self.assertEqual(str(e), msg)
            else:
                self.assertTrue(False, 'Expected exception, success found!')
    def test_xmlrpc_post(self):
        post_data = """<methodCall>
            <methodName>system.listMethods</methodName>
            <params></params>
        </methodCall>"""

        headers = Headers([('content-length', str(len(post_data)))])

        fr = FuzzableRequest.from_parts(self.url,
                                        headers=headers,
                                        post_data=post_data,
                                        method='POST')

        self.assertEqual(fr.get_url(), self.url)
        self.assertEqual(fr.get_headers(), headers)
        self.assertEqual(fr.get_method(), 'POST')
        self.assertIsInstance(fr.get_raw_data(), XmlRpcContainer)
Example #31
0
    def POST(self, uri, data='', headers=Headers(), grep=True, cache=False,
             cookies=True, error_handling=True, timeout=None,
             follow_redirects=None, use_basic_auth=True, use_proxy=True):
        """
        POST's data to a uri using a proxy, user agents, and other settings
        that where set previously.

        :param uri: This is the url where to post.
        :param data: A string with the data for the POST.
        :see: The GET() for documentation on the other parameters
        :return: An HTTPResponse object.
        """
        if not isinstance(uri, URL):
            raise TypeError('The uri parameter of ExtendedUrllib.POST() must'
                            ' be of url.URL type.')

        if not isinstance(headers, Headers):
            raise TypeError('The header parameter of ExtendedUrllib.POST() must'
                            ' be of Headers type.')

        #    Validate what I'm sending, init the library (if needed)
        self.setup()

        # follow_redirects is ignored because according to the RFC browsers
        # should not follow 30x redirects on POST

        #
        #    Create and send the request
        #
        #    Please note that the cache=False overrides the user setting
        #    since we *never* want to return cached responses for POST
        #    requests.
        #
        data = str(data)
        host = uri.get_domain()
        new_connection = True if timeout is not None else False
        timeout = self.get_timeout(host) if timeout is None else timeout

        req = HTTPRequest(uri, data=data, cookies=cookies, cache=False,
                          error_handling=error_handling, method='POST',
                          retries=self.settings.get_max_retrys(),
                          timeout=timeout, new_connection=new_connection,
                          use_basic_auth=use_basic_auth, use_proxy=use_proxy)
        req = self.add_headers(req, headers)

        return self.send(req, grep=grep)
Example #32
0
    def test_from_dict(self):
        html = 'header <b>ABC</b>-<b>DEF</b>-<b>XYZ</b> footer'
        headers = Headers([('Content-Type', 'text/html')])
        orig_resp = self.create_resp(headers, html)

        msg = msgpack.dumps(orig_resp.to_dict())
        loaded_dict = msgpack.loads(msg)

        loaded_resp = HTTPResponse.from_dict(loaded_dict)

        self.assertEqual(orig_resp, loaded_resp)

        orig_resp.__dict__.pop('_body_lock')
        loaded_resp.__dict__.pop('_body_lock')

        self.assertEqual(orig_resp.__dict__.values(),
                         loaded_resp.__dict__.values())
    def test_POST_repeated(self):
        request_head = 'POST http://www.w3af.org/ HTTP/1.1\n' \
                       'Host: www.w3af.org\n' \
                       'Content-Length: 7\n' \
                       'Content-Type: application/x-www-form-urlencoded\n' \
                       'Foo: spam\n' \
                       'Foo: eggs\n'
        post_data = 'a=1&a=2'
        fr = http_request_parser(request_head, post_data)

        exp_headers = Headers([('Host', 'www.w3af.org'),
                               ('Content-Type',
                                'application/x-www-form-urlencoded'),
                               ('Foo', 'spam, eggs')])

        self.assertEqual(fr.get_headers(), exp_headers)
        self.assertEquals(fr.get_data(), post_data)
Example #34
0
def new_no_content_resp(uri, add_id=False):
    """
    Return a new NO_CONTENT HTTPResponse object.
    
    :param uri: URI string or request object
    """
    no_content_response = HTTPResponse(NO_CONTENT,
                                       '',
                                       Headers(),
                                       uri,
                                       uri,
                                       msg='No Content')

    if add_id:
        no_content_response.id = consecutive_number_generator.inc()

    return no_content_response
Example #35
0
    def test_cache_control_correct_headers(self):
        """
        Sensitive content with cache control headers so NO BUG is stored in KB.
        """
        body = 'abc'
        url = URL('https://www.w3af.com/')
        headers = Headers([('content-type', 'text/html'),
                           ('Pragma', 'No-cache'),
                           ('Cache-Control', 'No-store')])
        request = FuzzableRequest(url, method='GET')
        resp = HTTPResponse(200, body, headers, url, url, _id=1)

        self.plugin.grep(request, resp)
        self.plugin.end()

        infos = kb.kb.get('cache_control', 'cache_control')
        self.assertEquals(len(infos), 0)
Example #36
0
def build_cors_request(url, origin_header_value):
    """
    Method to generate a "GET" CORS HTTP request based on input context.

    :param url: a URL object object.
    :param origin_header_value: Value of the "ORIGIN" HTTP request header
                                  (if value is set to None then the "ORIGIN"
                                  header is skipped).
    :return: A fuzzable request that will be sent to @url and has
             @origin_header_value in the Origin header.
    """
    headers = Headers()
    if origin_header_value is not None:
        headers["Origin"] = origin_header_value.strip()

    forged_req = FuzzableRequest(url, 'GET', headers=headers)
    return forged_req
 def test_m1(self, *args):
     """
     Vulnerable to MITM since login form was submitted over HTTP
     """
     body = 'header <form action="https://www.w3af.com/">' \
            '<input type="password" name="passwd" /></form>footer'
     url = URL('http://www.w3af.com/')
     headers = Headers([('content-type', 'text/html')])
     response = HTTPResponse(200, body, headers, url, url, _id=1)
     request = FuzzableRequest(url, method='GET')
     self.plugin.grep(request, response)
     self.assertEqual(
         len(kb.kb.get('form_cleartext_password',
                       'form_cleartext_password')), 1)
     self.assertEqual(
         kb.kb.get('form_cleartext_password', 'form_cleartext_password')
         [0].get_name() == 'Insecure password form access over HTTP', 1)
Example #38
0
 def test_retrieve_csp_policies_with_special_policies_case01(self):
     """
     Test case in which 2 policies are specified using special directives
     with empty value.
     """
     header_value = "sandbox ; script-nonce "
     hrds = {CSP_HEADER_W3C: header_value}.items()
     csp_headers = Headers(hrds)
     
     http_response = HTTPResponse(200, '', csp_headers, self.url, self.url)
     policies = retrieve_csp_policies(http_response)
     
     self.assertEqual(len(policies), 2)
     self.assertEqual(len(policies[CSP_DIRECTIVE_SANDBOX]), 1)
     self.assertEqual(policies[CSP_DIRECTIVE_SANDBOX][0], "")
     self.assertEqual(len(policies[CSP_DIRECTIVE_SCRIPT_NONCE]), 1)        
     self.assertEqual(policies[CSP_DIRECTIVE_SCRIPT_NONCE][0], "")     
Example #39
0
    def _bruteforce(self):
        """
        Use some common words to bruteforce file names and find new resources.
        This process is done only once for every new directory.

        :return: A list of new fuzzable requests.
        """
        wl_url_generator = self._wordlist_url_generator()
        args_generator = izip(wl_url_generator, repeat(Headers()))

        # Send the requests using threads:
        for base_url, alternates in self.worker_pool.map_multi_args(
            self._request_and_get_alternates,
            args_generator, chunksize=10):

            for fr in self._create_new_fuzzable_requests(base_url, alternates):
                self.output_queue.put(fr)
 def test_sl_2(self, *args):
     """
     Static link 2, report two different InfoSets, one for each URL
     """
     body = 'header<script>' \
            'ws1 = ' \
            'new WebSocket("ws://www.example.com/socketserver");' \
            'ws2 = '\
            'new WebSocket("wss://SECURESOCKETSERVER:8080");' \
            '</script>'
     url = URL('https://www.w3af.com/')
     headers = Headers([('content-type', 'text/html')])
     response = HTTPResponse(200, body, headers, url, url, _id=1)
     request = FuzzableRequest(url, method='GET')
     self.plugin.grep(request, response)
     self.assertEqual(len(kb.kb.get('websockets_links',
                                    'websockets_links')), 2)
Example #41
0
    def _send_end(self):
        """
        Sends an HTML indicating that w3af spider_man plugin has finished its
        execution.
        """
        html = '<html>spider_man plugin finished its execution.</html>'
        html_len = str(len(html))
        headers = Headers([('Content-Length', html_len)])

        resp = HTTPResponse.HTTPResponse(
            200,
            html,
            headers,
            TERMINATE_URL,
            TERMINATE_URL,
        )
        self._send_to_browser(resp)
    def test_xmlrpc_post(self):
        post_data = """<methodCall>
            <methodName>system.listMethods</methodName>
            <params></params>
        </methodCall>"""

        headers = Headers([('content-length', str(len(post_data)))])

        fr = create_fuzzable_request_from_parts(self.url,
                                                add_headers=headers,
                                                post_data=post_data,
                                                method='POST')

        self.assertEqual(fr.get_url(), self.url)
        self.assertEqual(fr.get_headers(), headers)
        self.assertEqual(fr.get_method(), 'POST')
        self.assertIsInstance(fr, XMLRPCRequest)
Example #43
0
    def test_clamav_empty(self, *args):
        body = ''
        url = URL('http://www.w3af.com/')
        headers = Headers([('content-type', 'text/html')])
        response = HTTPResponse(200, body, headers, url, url, _id=1)
        request = FuzzableRequest(url, method='GET')

        self.plugin.grep(request, response)

        # Let the worker pool wait for the clamd response, this is done by
        # the core when run in a real scan
        self.plugin.worker_pool.close()
        self.plugin.worker_pool.join()

        findings = kb.kb.get('clamav', 'malware')

        self.assertEqual(len(findings), 0, findings)
Example #44
0
    def test_pdf_parser(self):
        body = file(self.LINKS_SAMPLE).read()
        hdrs = Headers({'Content-Type': 'application/pdf'}.items())
        response = HTTPResponse(200,
                                body,
                                hdrs,
                                URL('http://moth/'),
                                URL('http://moth/'),
                                _id=1)

        parser = PDFParser(response)
        parsed, re_refs = parser.get_references()

        self.assertEqual(parsed, [])
        self.assertEqual(re_refs, [
            URL('http://moth/pdf/'),
        ])
Example #45
0
    def test_get_clean_body_double_encoded(self):
        payload = 'hello/world'

        body = 'abc %s def' % urllib.quote_plus(urllib.quote_plus(payload))
        url = URL('http://w3af.com')
        headers = Headers([('Content-Type', 'text/html')])
        response = HTTPResponse(200, body, headers, url, url)

        freq = FuzzableRequest(URL('http://w3af.com/?a=1'))
        created_mutants = FakeMutant.create_mutants(freq, [payload], [],
                                                    False, {})

        mutant = created_mutants[0]

        clean_body = get_clean_body(mutant, response)

        self.assertEqual(clean_body, 'abc  def')
        self.assertIsInstance(clean_body, unicode)
Example #46
0
    def test_get_clear_text_body_encodings(self):

        raise SkipTest('Not sure why this one is failing :S')

        for lang_desc, (body, encoding) in TEST_RESPONSES.iteritems():
            encoding_header = 'text/html; charset=%s' % encoding
            headers = Headers([('Content-Type', encoding_header)])

            encoded_body = body.encode(encoding)
            r = _build_http_response(URL_INST, encoded_body, headers)

            p = _SGMLParser(r)
            p._parse(r)

            ct_body = p.get_clear_text_body()

            # These test strings don't really have tags, so they should be eq
            self.assertEqual(ct_body, body)
Example #47
0
 def test_retrieve_csp_report_uri_yes(self):
     """
     Test case in which site provides CSP report uri.
     """
     hrds = {}
     hrds[CSP_HEADER_FIREFOX] = CSP_DIRECTIVE_OBJECT + " 'self'"
     hrds[CSP_HEADER_W3C_REPORT_ONLY] = CSP_DIRECTIVE_DEFAULT + \
         " 'self';" + CSP_DIRECTIVE_REPORT_URI + " http://example.com"
     hrds[CSP_HEADER_W3C] = CSP_DIRECTIVE_SCRIPT + " 'self';" + \
         CSP_DIRECTIVE_REPORT_URI + " /myrelativeuri"
     
     csp_headers = Headers(hrds.items())
     http_response = HTTPResponse(200, '', csp_headers, self.url, self.url)
     uri_set = retrieve_csp_report_uri(http_response)
     
     self.assertEqual(len(uri_set), 2)
     self.assertTrue("http://example.com" in uri_set)
     self.assertTrue("/myrelativeuri" in uri_set)
Example #48
0
    def test_eval_xpath_in_dom(self):
        html = """
        <html>
          <head>
            <title>THE TITLE</title>
          </head>
          <body>
            <input name="user" type="text">
            <input name="pass" type="password">
          </body>
        </html>"""
        headers = Headers([('Content-Type', 'text/xml')])
        r = _build_http_response(URL_INST, html, headers)

        p = _SGMLParser(r)
        p._parse(r)

        self.assertEquals(2, len(p.get_dom().xpath('.//input')))
Example #49
0
    def mangle_request(self, request):
        """
        This method mangles the request.

        :param request: This is the request to mangle.
        :return: A mangled version of the request.
        """
        data = request.get_data()
        for regex, string in self._manglers['q']['b']:
            data = regex.sub(string, data)

        header_string = str(request.get_headers())
        
        for regex, string in self._manglers['q']['h']:
            header_string = regex.sub(string, header_string)
        
        headers_inst = Headers.from_string(header_string)

        request.set_headers(headers_inst)
        request.add_data(data)
        return request
Example #50
0
File: sed.py Project: ElAleyo/w3af
    def mangle_request(self, request):
        """
        This method mangles the request.

        :param request: This is the request to mangle.
        :return: A mangled version of the request.
        """
        data = request.get_data()
        for regex, string in self._manglers['q']['b']:
            data = regex.sub(string, data)

        header_string = str(request.get_headers())
        
        for regex, string in self._manglers['q']['h']:
            header_string = regex.sub(string, header_string)
        
        headers_inst = Headers.from_string(header_string)

        return FuzzableRequest.from_parts(request.get_uri(),
                                          method=request.get_method(),
                                          post_data=data, headers=headers_inst)
Example #51
0
def http_request_parser(head, postdata):
    """
    This function parses HTTP Requests from a string to a FuzzableRequest.

    :param head: The head of the request.
    :param postdata: The post data of the request
    :return: A FuzzableRequest object with all the corresponding information
             that was sent in head and postdata

    :author: Andres Riancho ([email protected])
    """
    # Parse the request head, the strip() helps us deal with the \r (if any)
    split_head = head.split("\n")
    split_head = [h.strip() for h in split_head if h]

    if not split_head:
        msg = "The HTTP request is invalid."
        raise BaseFrameworkException(msg)

    # Get method, uri, version
    method_uri_version = split_head[0]
    first_line = method_uri_version.split(" ")
    if len(first_line) == 3:
        # Ok, we have something like "GET /foo HTTP/1.0". This is the best case
        # for us!
        method, uri, version = first_line

    elif len(first_line) < 3:
        msg = 'The HTTP request has an invalid <method> <uri> <version>: "%s"'
        raise BaseFrameworkException(msg % method_uri_version)

    elif len(first_line) > 3:
        # GET /hello world.html HTTP/1.0
        # Mostly because we are permissive... we are going to try to parse
        # the request...
        method = first_line[0]
        version = first_line[-1]
        uri = " ".join(first_line[1:-1])

    check_version_syntax(version)

    # If we got here, we have a nice method, uri, version first line
    # Now we parse the headers (easy!) and finally we send the request
    headers_str = split_head[1:]
    headers_inst = Headers()
    for header in headers_str:
        one_split_header = header.split(":", 1)
        if len(one_split_header) == 1:
            msg = 'The HTTP request has an invalid header: "%s".'
            raise BaseFrameworkException(msg % header)

        header_name = one_split_header[0].strip()
        header_value = one_split_header[1].strip()
        if header_name in headers_inst:
            headers_inst[header_name] += ", " + header_value
        else:
            headers_inst[header_name] = header_value

    host, _ = headers_inst.iget("host", None)

    try:
        uri = URL(check_uri_syntax(uri, host))
    except ValueError, ve:
        raise BaseFrameworkException(str(ve))
Example #52
0
    def test_clone_with_list_values(self):
        headers = Headers([('a', 'b'), ('c', 'd')])
        cloned = headers.clone_with_list_values()

        self.assertEqual(cloned['a'], ['b'])
        self.assertEqual(cloned['c'], ['d'])
Example #53
0
 def test_from_string(self):
     headers_from_str = Headers.from_string('a: b\r\n')
     headers_from_obj = Headers([('a', 'b')])
     self.assertEqual(headers_from_str, headers_from_obj)
Example #54
0
def create_fuzzable_requests(resp, request=None, add_self=True):
    """
    Generates the fuzzable requests based on an HTTP response instance.

    :param resp: An HTTPResponse instance.
    :param request: The HTTP request that generated the resp
    :param add_self: If I should add the current HTTP request
                         (:param request) to the result on not.

    :return: A list of fuzzable requests.
    """
    res = []

    # Headers for all fuzzable requests created here:
    # And add the fuzzable headers to the dict
    req_headers = dict((h, '') for h in cf.cf.get('fuzzable_headers'))
    req_headers.update(request and request.get_headers() or {})
    req_headers = Headers(req_headers.items())

    # Get the cookie!
    cookie_obj = _create_cookie(resp)

    # Create the fuzzable request that represents the request object
    # passed as parameter
    if add_self:
        qsr = HTTPQSRequest(
            resp.get_uri(),
            headers=req_headers,
            cookie=cookie_obj
        )
        res.append(qsr)

    # If response was a 30X (i.e. a redirect) then include the
    # corresponding fuzzable request.
    resp_headers = resp.get_headers()

    for url_header_name in URL_HEADERS:
        url_header_value, _ = resp_headers.iget(url_header_name, '')
        if url_header_value:
            url = smart_unicode(url_header_value, encoding=resp.charset)
            try:
                absolute_location = resp.get_url().url_join(url)
            except ValueError:
                msg = 'The application sent a "%s" redirect that w3af' \
                      ' failed to correctly parse as an URL, the header' \
                      ' value was: "%s"'
                om.out.debug(msg % (url_header_name, url))
            else:
                qsr = HTTPQSRequest(
                    absolute_location,
                    headers=req_headers,
                    cookie=cookie_obj
                )
                res.append(qsr)

    # Try to find forms in the document
    try:
        dp = parser_cache.dpc.get_document_parser_for(resp)
    except BaseFrameworkException:
        # Failed to find a suitable parser for the document
        form_list = []
    else:
        form_list = dp.get_forms()
        same_domain = lambda f: f.get_action().get_domain() == \
                                resp.get_url().get_domain()
        form_list = [f for f in form_list if same_domain(f)]

    if not form_list:
        # Check if its a wsdl file
        #TODO: Rewrite web service support
        """
        wsdlp = WSDLParser()
        try:
            wsdlp.set_wsdl(resp.get_body())
        except BaseFrameworkException:
            pass
        else:
            for rem_meth in wsdlp.get_methods():
                wspdr = WebServiceRequest(
                    rem_meth.get_location(),
                    rem_meth.get_action(),
                    rem_meth.get_parameters(),
                    rem_meth.get_namespace(),
                    rem_meth.get_methodName(),
                    req_headers
                )
                res.append(wspdr)
        """
    else:
        # Create one HTTPPostDataRequest for each form variant
        mode = cf.cf.get('form_fuzzing_mode')
        for form in form_list:
            for variant in form.get_variants(mode):
                if form.get_method().upper() == 'POST':
                    r = HTTPPostDataRequest(
                        variant.get_action(),
                        variant.get_method(),
                        req_headers,
                        cookie_obj,
                        variant)
                else:
                    # The default is a GET request
                    r = HTTPQSRequest(
                        variant.get_action(),
                        headers=req_headers,
                        cookie=cookie_obj
                    )
                    r.set_dc(variant)

                res.append(r)
    return res
Example #55
0
 def test_to_str_from_string(self):
     headers_from_obj = Headers([('a', 'b')])
     headers_from_str = Headers.from_string(str(headers_from_obj))
     
     self.assertEqual(headers_from_str, headers_from_obj)
Example #56
0
 def get_headers(self):
     headers = Headers(self.headers.items())
     headers.update(self.unredirected_hdrs.items())
     return headers
Example #57
0
 def set_headers(self, headers):
     self._headers = Headers(headers)
Example #58
0
class FuzzableRequest(RequestMixIn, DiskItem):
    """
    This class represents a fuzzable request. Fuzzable requests were created
    to allow w3af plugins to be much simpler and don't really care if the
    vulnerability is in the postdata, querystring, header, cookie or any other
    variable.

    Other classes should inherit from this one and change the behaviour of
    get_uri() and get_data(). For example: the class HTTPQSRequest should return
    the _dc in the querystring (get_uri) and HTTPPostDataRequest should return
    the _dc in the POSTDATA (get_data()).

    :author: Andres Riancho ([email protected])
    """

    def __init__(self, uri, method='GET',
                 headers=None, cookie=None, dc=None):
        super(FuzzableRequest, self).__init__()
        
        # Internal variables
        self._dc = dc or DataContainer()
        self._method = method
        self._headers = Headers(headers or ())
        self._cookie = cookie or Cookie()
        self._data = None
        self.set_uri(uri)

        # Set the internal variables
        self._sent_info_comp = None

    def export(self):
        """
        Generic version of how they are exported:
            METHOD,URL,DC

        Example:
            GET,http://localhost/index.php?abc=123&def=789,
            POST,http://localhost/index.php,abc=123&def=789

        :return: a csv str representation of the request
        """
        #
        # FIXME: What if a comma is inside the URL or DC?
        # TODO: Why don't we export headers and cookies?
        #
        meth = self._method
        str_res = [meth, ',', str(self._url)]

        if meth == 'GET':
            if self._dc:
                str_res.extend(('?', str(self._dc)))
            str_res.append(',')
        else:
            str_res.append(',')
            if self._dc:
                str_res.append(str(self._dc))

        return ''.join(str_res)

    def sent(self, smth_instng):
        """
        Checks if something similar to `smth_instng` was sent in the request.
        This is used to remove false positives, e.g. if a grep plugin finds a "strange"
        string and wants to be sure it was not generated by an audit plugin.

        This method should only be used by grep plugins which often have false
        positives.

        The following example shows that we sent d'z"0 but d\'z"0 will
        as well be recognised as sent

        TODO: This function is called MANY times, and under some circumstances it's
        performance REALLY matters. We need to review this function.

        >>> f = FuzzableRequest(URL('''http://example.com/a?p=d'z"0&paged=2'''))
        >>> f.sent('d%5C%27z%5C%220')
        True

        >>> f._data = 'p=<SCrIPT>alert("bsMs")</SCrIPT>'
        >>> f.sent('<SCrIPT>alert(\"bsMs\")</SCrIPT>')
        True

        >>> f = FuzzableRequest(URL('http://example.com/?p=<ScRIPT>a=/PlaO/%0Afake_alert(a.source)</SCRiPT>'))
        >>> f.sent('<ScRIPT>a=/PlaO/fake_alert(a.source)</SCRiPT>')
        True

        :param smth_instng: The string
        :return: True if something similar was sent
        """
        def make_comp(heterogen_string):
            """
            This basically removes characters that are hard to compare
            """
            heterogen_characters = ('\\', '\'', '"', '+', ' ', chr(0),
                                    chr(int("0D", 16)), chr(int("0A", 16)))
            #heterogen_characters.extend(string.whitespace)

            for hetero_char in heterogen_characters:
                heterogen_string = heterogen_string.replace(hetero_char, '')
            return heterogen_string

        data = self._data or ''
        # This is the easy part. If it was exactly like this in the request
        if data and smth_instng in data or \
        smth_instng in self.get_uri() or \
        smth_instng in unquote(data) or \
        smth_instng in unicode(self._uri.url_decode()):
            return True

        # Ok, it's not in it but maybe something similar
        # Let's set up something we can compare
        if self._sent_info_comp is None:
            dc = self._dc
            dec_dc = unquote(str(dc)).decode(dc.encoding)
            data = '%s%s%s' % (unicode(self._uri), data, dec_dc)

            self._sent_info_comp = make_comp(data + unquote(data))

        min_len = 3
        # make the smth_instng comparable
        smth_instng_comps = (make_comp(smth_instng),
                             make_comp(unquote(smth_instng)))
        for smth_intstng_comp in smth_instng_comps:
            # We don't want false negatives just because the string is
            # short after making comparable
            if smth_intstng_comp in self._sent_info_comp and \
                    len(smth_intstng_comp) >= min_len:
                return True
        # I didn't sent the smth_instng in any way
        return False

    def __hash__(self):
        return hash(str(self._uri))

    def __str__(self):
        """
        :return: A string representation of this fuzzable request.

        >>> fr = FuzzableRequest(URL("http://www.w3af.com/"))
        >>> str(fr)
        'http://www.w3af.com/ | Method: GET'

        >>> repr( fr )
        '<fuzzable request | GET | http://www.w3af.com/>'

        >>> fr.set_method('TRACE')
        >>> str(fr)
        'http://www.w3af.com/ | Method: TRACE'

        """
        strelems = [unicode(self._url)]
        strelems.append(u' | Method: ' + self._method)

        if self._dc:
            strelems.append(u' | Parameters: (')

            # Mangle the value for printing
            for pname, values in self._dc.items():
                # Because of repeated parameter names, we need to add this:
                for the_value in values:
                    # the_value is always a string
                    if len(the_value) > 10:
                        the_value = the_value[:10] + '...'
                    the_value = '"' + the_value + '"'
                    strelems.append(pname + '=' + the_value + ', ')

            strelems[-1] = strelems[-1][:-2]
            strelems.append(u')')

        return u''.join(strelems).encode(DEFAULT_ENCODING)

    def __repr__(self):
        return '<fuzzable request | %s | %s>' % \
            (self.get_method(), self.get_uri())

    def __eq__(self, other):
        """
        Two requests are equal if:
            - They have the same URL
            - They have the same method
            - They have the same parameters
            - The values for each parameter is equal

        :return: True if the requests are equal.
        """
        if isinstance(other, FuzzableRequest):
            return (self._method == other._method and
                    self._uri == other._uri and
                    self._dc == other._dc)
        else:
            return NotImplemented

    def get_eq_attrs(self):
        return ['_method', '_uri', '_dc']

    def __ne__(self, other):
        return not self.__eq__(other)

    def is_variant_of(self, other):
        """
        Two requests are loosely equal (or variants) if:
            - They have the same URL
            - They have the same HTTP method
            - They have the same parameter names
            - The values for each parameter have the same type (int / string)

        :return: True if self and other are variants.
        """
        dc = self._dc
        odc = other._dc

        if (self._method == other._method and
            self._url == other._url and
                dc.keys() == odc.keys()):
            for vself, vother in izip_longest(
                chain(*dc.values()),
                chain(*odc.values()),
                fillvalue=None
            ):
                if None in (vself, vother) or \
                        vself.isdigit() != vother.isdigit():
                    return False
            return True
        return False

    def set_url(self, url):
        if not isinstance(url, URL):
            raise TypeError('The "url" parameter of a %s must be of '
                            'url.URL type.' % type(self).__name__)

        self._url = URL(url.url_string.replace(' ', '%20'))
        self._uri = self._url

    def set_uri(self, uri):
        if not isinstance(uri, URL):
            raise TypeError('The "uri" parameter of a %s must be of '
                            'url.URL type.' % type(self).__name__)
        self._uri = uri
        self._url = uri.uri2url()

    def set_method(self, method):
        self._method = method

    def set_dc(self, dataCont):
        if not isinstance(dataCont, DataContainer):
            raise TypeError('Invalid call to fuzzable_request.set_dc(), the '
                            'argument must be a DataContainer instance.')
        self._dc = dataCont

    def set_headers(self, headers):
        self._headers = Headers(headers)

    def set_referer(self, referer):
        self._headers['Referer'] = str(referer)

    def set_cookie(self, c):
        """
        :param cookie: A Cookie object as defined in core.data.dc.cookie,
            or a string.
        """
        if isinstance(c, Cookie):
            self._cookie = c
        elif isinstance(c, basestring):
            self._cookie = Cookie(c)
        elif c is None:
            self._cookie = Cookie()
        else:
            fmt = '[FuzzableRequest error] set_cookie received: "%s": "%s".'
            error_str = fmt % (type(c), repr(c))
            om.out.error(error_str)
            raise BaseFrameworkException(error_str)

    def get_url(self):
        return self._url

    def get_uri(self):
        return self._uri

    def set_data(self, d):
        """
        The data is the string representation of the DataContainer, in most
        cases it wont be set.
        """
        self._data = d

    def get_data(self):
        """
        The data is the string representation of the DataContainer, in most
        cases it will be used as the POSTDATA for requests. Sometimes it is
        also used as the query string data.
        """
        return self._data

    def get_method(self):
        return self._method

    def get_dc(self):
        return self._dc

    def get_headers(self):
        return self._headers

    def get_referer(self):
        return self._headers.get('Referer', None)

    def get_cookie(self):
        return self._cookie

    def get_file_vars(self):
        return []

    def copy(self):
        return copy.deepcopy(self)
Example #59
0
    def test_headers_idel(self):
        upper_headers = Headers([('Abc', 'b')])

        upper_headers.idel('abc')

        self.assertNotIn('Abc', upper_headers)
Example #60
0
class HTTPResponse(object):

    DOC_TYPE_TEXT_OR_HTML = 'DOC_TYPE_TEXT_OR_HTML'
    DOC_TYPE_SWF = 'DOC_TYPE_SWF'
    DOC_TYPE_PDF = 'DOC_TYPE_PDF'
    DOC_TYPE_IMAGE = 'DOC_TYPE_IMAGE'
    DOC_TYPE_OTHER = 'DOC_TYPE_OTHER'

    __slots__ = ('_code',
                 '_charset',
                 '_headers',
                 '_body',
                 '_raw_body',
                 '_content_type',
                 '_dom',
                 'id',
                 '_from_cache',
                 '_info',
                 '_realurl',
                 '_uri',
                 '_redirected_url',
                 '_redirected_uri',
                 '_msg',
                 '_time',
                 '_alias',
                 '_doc_type',
                 '_body_lock')

    def __init__(self, code, read, headers, geturl, original_url,
                 msg='OK', _id=None, time=DEFAULT_WAIT_TIME, alias=None,
                 charset=None):
        """
        :param code: HTTP code
        :param read: HTTP body text; typically a string
        :param headers: HTTP headers, typically a dict or a httplib.HTTPMessage
        :param geturl: URL object instance
        :param original_url: URL object instance
        :param msg: HTTP message
        :param _id: Optional response identifier
        :param time: The time between the request and the response
        :param alias: Alias for the response, this contains a hash that helps
                      the backend sqlite find http_responses faster by indexing
                      by this attr.
        :param charset: Response's encoding; obligatory when `read` is unicode
        """
        if not isinstance(geturl, URL):
            msg = 'Invalid type %s for HTTPResponse ctor param geturl.'
            raise TypeError(msg % type(geturl))

        if not isinstance(original_url, URL):
            msg = 'Invalid type %s for HTTPResponse ctor param original_url.'
            raise TypeError(msg % type(original_url))

        if not isinstance(headers, Headers):
            msg = 'Invalid type %s for HTTPResponse ctor param headers.'
            raise TypeError(msg % type(headers))
        
        if not isinstance(read, basestring):
            raise TypeError('Invalid type %s for HTTPResponse ctor param read.'
                            % type(read))

        self._charset = charset
        self._headers = None
        self._body = None
        self._raw_body = read
        self._content_type = None
        self._dom = None
        # A unique id identifier for the response
        self.id = _id
        # From cache defaults to False
        self._from_cache = False
        # Set the info
        self._info = headers
        # Set code
        self.set_code(code)

        # Set the URL variables
        # The URL that we really GET'ed
        self._realurl = original_url.uri2url()
        self._uri = original_url
        # The URL where we were redirected to (equal to original_url
        # when no redirect)
        self._redirected_url = geturl
        self._redirected_uri = geturl.uri2url()

        # Set the rest
        self._msg = smart_unicode(msg)
        self._time = time
        self._alias = alias
        self._doc_type = None
        
        # Internal lock
        self._body_lock = threading.RLock()

    @classmethod
    def from_httplib_resp(cls, httplibresp, original_url=None):
        """
        Factory function. Build a HTTPResponse object from a
        httplib.HTTPResponse instance
    
        :param httplibresp: httplib.HTTPResponse instance
        :param original_url: Optional 'url_object' instance.
    
        :return: A HTTPResponse instance
        """
        resp = httplibresp
        code, msg, hdrs, body = (resp.code, resp.msg, resp.info(), resp.read())
        hdrs = Headers(hdrs.items())

        if original_url:
            url_inst = URL(resp.geturl(), original_url.encoding)
            url_inst = url_inst.url_decode()
        else:
            url_inst = original_url = URL(resp.geturl())

        httplib_time = DEFAULT_WAIT_TIME
        if hasattr(httplibresp, 'get_wait_time'):
            # This is defined in the keep alive http response object
            httplib_time = httplibresp.get_wait_time()

        if isinstance(resp, urllib2.HTTPError):
            # This is possible because in errors.py I do:
            # err = urllib2.HTTPError(req.get_full_url(), code, msg, hdrs, resp)
            charset = getattr(resp.fp, 'encoding', None)
        else:
            # The encoding attribute is only set on CachedResponse instances
            charset = getattr(resp, 'encoding', None)
        
        return cls(code, body, hdrs, url_inst, original_url,
                   msg, charset=charset, time=httplib_time)

    @classmethod    
    def from_dict(cls, unserialized_dict):
        """
        * msgpack is MUCH faster than cPickle,
        * msgpack can't serialize python objects,
        * I have to create a dict representation of HTTPResponse to serialize it
        * and a from_dict to have the object back
        
        :param unserialized_dict: A dict just as returned by to_dict()
        """
        udict = unserialized_dict
        
        code, msg, hdrs = udict['code'], udict['msg'], udict['headers']
        body, _time, _id = udict['body'], udict['time'], udict['id']
        
        headers_inst = Headers(hdrs.items())
        url = URL(udict['uri'])
    
        return cls(code, body, headers_inst, url, url, msg=msg, _id=_id,
                   time=_time)

    def to_dict(self):
        """
        :return: A dict that represents the current object and is serializable
                 by the json or msgpack modules.
        """
        serializable_dict = {}
        sdict = serializable_dict
        
        # Note: The Headers() object can be serialized by msgpack because it
        #       inherits from dict() and doesn't mangle it too much
        sdict['code'], sdict['msg'], sdict['headers'] = (self.get_code(),
                                                         self.get_msg(),
                                                         dict(self.get_headers()))
        sdict['body'], sdict['time'], sdict['id'] = (self.get_body(),
                                                     self.get_wait_time(),
                                                     self.get_id())
        
        sdict['uri'] = self.get_uri().url_string
    
        return serializable_dict

    def __contains__(self, string_to_test):
        """
        Determine if the `string_to_test` is contained by the HTTP response
        body.

        :param string_to_test: String to look for in the body
        """
        return string_to_test in self.body
    
    def __eq__(self, other):
        return (self.id == other.id and
                self._code == other._code and
                self.headers == other.headers and
                self.body == other.body and
                self._uri == other._uri)

    def __repr__(self):
        vals = {
            'code': self.get_code(),
            'url': str(self.get_url()),
            'id': self.id and ' | id:%s' % self.id or '',
            'fcache': self._from_cache and ' | fcache:True' or ''
        }
        return '<HTTPResponse | %(code)s | %(url)s%(id)s%(fcache)s>' % vals

    def set_id(self, _id):
        self.id = _id

    def get_id(self):
        return self.id

    def set_code(self, code):
        self._code = code

    def get_code(self):
        return self._code

    def get_body(self):
        with self._body_lock:
            if self._body is None:
                self._body, self._charset = self._charset_handling()
                # Free 'raw_body'
                self._raw_body = None
            return self._body

    def set_body(self, body):
        """
        Setter for body.

        @body: A string that represents the body of the HTTP response
        """
        if not isinstance(body, basestring):
            msg = 'Invalid type %s for set_body parameter body.'
            raise TypeError(msg % type(body))
            
        self._body = None
        self._raw_body = body

    body = property(get_body, set_body)

    def get_clear_text_body(self):
        """
        Just a shortcut to get the clear text body
        :return: A unicode string
        """
        parser = self.get_parser()
        if parser is not None:
            return parser.get_clear_text_body()

        return u''

    def get_parser(self):
        """
        Just a shortcut to get the parser for this response, we get this from
        the document parser cache.

        :return: A DocumentParser instance or None
        """
        try:
            return parser_cache.dpc.get_document_parser_for(self)
        except BaseFrameworkException:
            # Failed to find a suitable parser for the document
            return

    def get_charset(self):
        if not self._charset:
            self._body, self._charset = self._charset_handling()
            # Free 'raw_body'
            self._raw_body = None
        return self._charset

    def set_charset(self, charset):
        self._charset = charset
    
    charset = property(get_charset, set_charset)
    
    def set_redir_url(self, ru):
        self._redirected_url = ru

    def get_redir_url(self):
        return self._redirected_url

    def set_redir_uri(self, ru):
        self._redirected_uri = ru

    def get_redir_uri(self):
        return self._redirected_uri

    def get_headers(self):
        if self._headers is None:
            self.headers = self._info
            assert self._headers is not None
        return self._headers

    def set_headers(self, headers):
        """
        Sets the headers and also analyzes them in order to get the response
        mime type (text/html , application/pdf, etc).

        :param headers: The headers dict.
        """
        # Fix lowercase in header names from HTTPMessage
        if isinstance(headers, httplib.HTTPMessage):
            self._headers = Headers()
            for header in headers.headers:
                key, value = header.split(':', 1)
                self._headers[key.strip()] = value.strip()
        else:
            self._headers = headers

        find_word = lambda w: content_type.find(w) != -1

        content_type_hvalue, _ = self._headers.iget(CONTENT_TYPE, None)

        # we need exactly content type but not charset
        if content_type_hvalue is not None:
            try:
                self._content_type = content_type_hvalue.split(';', 1)[0].strip().lower()
            except:
                msg = 'Invalid Content-Type value "%s" sent in HTTP response.'
                om.out.debug(msg % (content_type_hvalue,))
            else:
                content_type = self._content_type

                # Set the doc_type
                if content_type.count('image'):
                    self._doc_type = HTTPResponse.DOC_TYPE_IMAGE

                elif content_type.count('pdf'):
                    self._doc_type = HTTPResponse.DOC_TYPE_PDF

                elif content_type.count('x-shockwave-flash'):
                    self._doc_type = HTTPResponse.DOC_TYPE_SWF

                elif any(imap(find_word,
                              ('text', 'html', 'xml', 'txt', 'javascript'))):
                    self._doc_type = HTTPResponse.DOC_TYPE_TEXT_OR_HTML

        # Check if the doc type is still None, that would mean that none of the
        # previous if statements matched.
        #
        # Note that I'm doing this here and not before the other if statements
        # because that triggered a race condition with threads asking if the
        # _doc_type was != None (which it was because I was setting it to
        # DOC_TYPE_OTHER) and that raised all types of errors.
        if self._doc_type is None:
            self._doc_type = HTTPResponse.DOC_TYPE_OTHER

    headers = property(get_headers, set_headers)

    @memoized
    def get_lower_case_headers(self):
        """
        If the original headers were:
            {'Abc-Def': 'F00N3s'}
        This will return:
            {'abc-def': 'F00N3s'}

        The only thing that changes is the header name.
        """
        return Headers([(k.lower(), v) for k, v in self.headers.iteritems()])

    def set_url(self, url):
        """
        >>> url = URL('http://www.google.com')
        >>> r = HTTPResponse(200, '' , Headers(), url, url)
        >>> r.set_url('http://www.google.com/')
        Traceback (most recent call last):
          ...
        TypeError: The URL of a HTTPResponse object must be of url.URL type.
        >>> r.set_url(url)
        >>> r.get_url() == url
        True
        """
        if not isinstance(url, URL):
            raise TypeError('The URL of a HTTPResponse object must be of '
                            'url.URL type.')

        self._realurl = url.uri2url()

    def get_url(self):
        return self._realurl

    def get_host(self):
        return self.get_url().get_domain()

    def set_uri(self, uri):
        """
        >>> uri = URL('http://www.google.com/')
        >>> r = HTTPResponse(200, '' , Headers(), uri, uri)
        >>> r.set_uri('http://www.google.com/')
        Traceback (most recent call last):
          ...
        TypeError: The URI of a HTTPResponse object must be of url.URL type.
        >>> r.set_uri(uri)
        >>> r.get_uri() == uri
        True

        """
        if not isinstance(uri, URL):
            raise TypeError('The URI of a HTTPResponse object must be of '
                            'url.URL type.')

        self._uri = uri
        self._realurl = uri.uri2url()

    def get_uri(self):
        return self._uri

    def was_redirected(self):
        return self._uri != self._redirected_uri

    def set_from_cache(self, fcache):
        """
        :param fcache: True if this response was obtained from the
        local cache.
        """
        self._from_cache = fcache

    def get_from_cache(self):
        """
        :return: True if this response was obtained from the local cache.
        """
        return self._from_cache

    def set_wait_time(self, t):
        self._time = t

    def get_wait_time(self):
        return self._time

    def set_alias(self, alias):
        self._alias = alias

    def get_alias(self):
        return self._alias

    def info(self):
        return self._info

    def get_status_line(self):
        """
        Return status-line of response.
        """
        return STATUS_LINE % (self._code, self._msg)

    def get_msg(self):
        return self._msg

    def _charset_handling(self):
        """
        Decode the body based on the header (or metadata) encoding.
        The implemented algorithm follows the encoding detection logic
        used by FF:

            1) First try to find a charset using the following search criteria:
                a) Look in the CONTENT_TYPE HTTP header. Example:
                    content-type: text/html; charset=iso-8859-1
                b) Look in the 'meta' HTML header. Example:
                    <meta .* content="text/html; charset=utf-8" />
                c) Determine the charset using the chardet module (TODO)
                d) Use the DEFAULT_CHARSET

            2) Try to decode the body using the found charset. If it fails,
            then force it to use the DEFAULT_CHARSET

        Finally return the unicode (decoded) body and the used charset.

        Note: If the body is already a unicode string return it as it is.
        """
        headers = self.get_headers()
        content_type, _ = headers.iget(CONTENT_TYPE, None)
        charset = self._charset
        rawbody = self._raw_body

        # Only try to decode <str> strings. Skip <unicode> strings
        if type(rawbody) is unicode:
            _body = rawbody
            assert charset is not None, ("HTTPResponse objects containing "
                                         "unicode body must have an associated "
                                         "charset")
        elif content_type is None:
            _body = rawbody
            charset = DEFAULT_CHARSET

            if len(_body):
                msg = ('The remote web server failed to send the CONTENT_TYPE'
                       ' header in HTTP response with id %s')
                om.out.debug(msg % self.id)

        elif not self.is_text_or_html():
            # Not text, save as it is.
            _body = rawbody
            charset = charset or DEFAULT_CHARSET
        else:
            # Figure out charset to work with
            if not charset:
                charset = self.guess_charset(rawbody, headers)

            # Now that we have the charset, we use it!
            # The return value of the decode function is a unicode string.
            try:
                _body = smart_unicode(rawbody,
                                      charset,
                                      errors=ESCAPED_CHAR,
                                      on_error_guess=False)
            except LookupError:
                # Warn about a buggy charset
                msg = ('Charset LookupError: unknown charset: %s; '
                       'ignored and set to default: %s' %
                       (charset, DEFAULT_CHARSET))
                om.out.debug(msg)

                # Forcing it to use the default
                charset = DEFAULT_CHARSET
                _body = smart_unicode(rawbody,
                                      charset,
                                      errors=ESCAPED_CHAR,
                                      on_error_guess=False)

        return _body, charset

    def guess_charset(self, rawbody, headers):
        # Start with the headers
        content_type, _ = headers.iget(CONTENT_TYPE, None)
        charset_mo = CHARSET_EXTRACT_RE.search(content_type, re.I)
        if charset_mo:
            # Seems like the response's headers contain a charset
            charset = charset_mo.groups()[0].lower().strip()
        else:
            # Continue with the body's meta tag
            charset_mo = CHARSET_META_RE.search(rawbody, re.IGNORECASE)
            if charset_mo:
                charset = charset_mo.groups()[0].lower().strip()
            else:
                charset = DEFAULT_CHARSET

        return charset

    @property
    def content_type(self):
        """
        The content type of the response
        """
        if self._content_type is None:
            self.headers = self._info
        return self._content_type or ''

    @property
    def doc_type(self):
        if self._doc_type is None:
            self.headers = self._info
            assert self._doc_type is not None
        return self._doc_type

    def is_text_or_html(self):
        """
        :return: True if this response is text or html
        """
        return self.doc_type == HTTPResponse.DOC_TYPE_TEXT_OR_HTML

    def is_pdf(self):
        """
        :return: True if this response is a PDF file
        """
        return self.doc_type == HTTPResponse.DOC_TYPE_PDF

    def is_swf(self):
        """
        :return: True if this response is a SWF file
        """
        return self.doc_type == HTTPResponse.DOC_TYPE_SWF

    def is_image(self):
        """
        :return: True if this response is an image file
        """
        return self.doc_type == HTTPResponse.DOC_TYPE_IMAGE

    def dump_response_head(self):
        """
        :return: A byte-string, as we would send to the wire, containing:

            HTTP/1.1 /login.html 200
            Header1: Value1
            Header2: Value2

        """
        status_line = self.get_status_line()
        dumped_headers = self.dump_headers()

        dump_head = '%s%s' % (status_line, dumped_headers)

        if isinstance(dump_head, unicode):
            dump_head = dump_head.encode(self.charset, 'replace')

        return dump_head

    def dump(self):
        """
        Return a DETAILED str representation of this HTTP response object.
        """
        body = self.body

        # Images, pdf and binary responses in general are never decoded
        # to unicode
        if isinstance(body, unicode):
            body = body.encode(self.charset, 'replace')

        return "%s%s%s" % (self.dump_response_head(), CRLF, body)

    def dump_headers(self):
        """
        :return: a str representation of the headers.
        """
        if self.headers:
            return CRLF.join(h + ': ' + hv for h, hv in self.headers.items()) + CRLF
        else:
            return ''

    def copy(self):
        return copy.deepcopy(self)

    def __getstate__(self):
        state = {k: getattr(self, k) for k in self.__slots__}
        state.pop('_body_lock')
        return state
    
    def __setstate__(self, state):
        [setattr(self, k, v) for k, v in state.iteritems()]
        self._body_lock = threading.RLock()