Exemple #1
0
    def _verify_reference(self,
                          reference,
                          original_request,
                          original_response,
                          possibly_broken,
                          be_recursive=True):
        """
        The parameters are:
            * Newly found URL
            * The FuzzableRequest instance which generated the response where
              the new URL was found
            * The HTTPResponse generated by the FuzzableRequest
            * Boolean indicating if we trust this reference or not

        This method GET's every new link and parses it in order to get
        new links and forms.
        """
        #
        # Remember that this "breaks" the cache=True in most cases!
        #     headers = { 'Referer': original_url }
        #
        # But this does not, and it is friendlier than simply ignoring the
        # referer
        #
        #    if self._fuzzy_browser.check_page( str(reference) ):
        #        self._fuzzy_browser.add_page( str(reference) )
        #    else:
        #        return
        referer = original_response.get_url().base_url().url_string
        headers = Headers([('Referer', referer)])

        # Note: We're not grep'ing this HTTP request/response now because it
        #       has high probability of being a 404, and the grep plugins
        #       already got enough 404 responses to analyze (from is_404 for
        #       example). If it's not a 404 then we'll push it to the core
        #       and it will come back to this plugin's crawl() where it will
        #       be requested with grep=True
        self._requests_count += 1
        if self._requests_count > self._max_requests_count:
            return

        resp = self._uri_opener.GET(reference,
                                    cache=True,
                                    headers=headers,
                                    grep=False)

        if is_404(resp):
            # Note: I WANT to follow links that are in the 404 page, but
            # DO NOT return the 404 itself to the core.
            #
            # This will parse the 404 response and add the 404-links in the
            # output queue, so that the core can get them
            #
            if be_recursive:
                #
                # Only follow one level of links in 404 pages, this limits the
                # potential issue when this is found:
                #
                #   http://foo.com/abc/ => 404
                #   Body: <a href="def/">link</a>
                #
                # Which would lead to this function to perform requests to:
                #   * http://foo.com/abc/
                #   * http://foo.com/abc/def/
                #   * http://foo.com/abc/def/def/
                #   * http://foo.com/abc/def/def/def/
                #   * ...
                #

                # Do not use threads here, it will dead-lock (for unknown
                # reasons). This is tested in TestDeadLock unittest.
                for args in self._urls_to_verify_generator(
                        resp, original_request):
                    self._verify_reference(*args, be_recursive=False)

            # Store the broken links
            if not possibly_broken and resp.get_code(
            ) not in self.UNAUTH_FORBID:
                t = (resp.get_url(), original_request.get_uri())
                self._broken_links.add(t)
        else:
            msg = '[web_spider] Sending link to w3af core: "%s"'
            om.out.debug(msg % reference)

            fuzz_req = FuzzableRequest(reference, headers=headers)

            # These next steps are simple, but actually allows me to set the
            # referer and cookie for the FuzzableRequest instances I'm sending
            # to the core, which will then allow the fuzzer to create
            # CookieMutant and HeadersMutant instances.
            #
            # Without setting the Cookie, the CookieMutant would never have any
            # data to modify; remember that cookies are actually set by the
            # urllib2 cookie handler when the request already exited the
            # framework.
            cookie = Cookie.from_http_response(original_response)

            fuzz_req.set_referer(referer)
            fuzz_req.set_cookie(cookie)
            self.output_queue.put(fuzz_req)
Exemple #2
0
    def _verify_reference(self,
                          reference,
                          original_request,
                          original_response,
                          possibly_broken,
                          be_recursive=True):
        """
        The parameters are:
            * Newly found URL
            * The FuzzableRequest instance which generated the response where
              the new URL was found
            * The HTTPResponse generated by the FuzzableRequest
            * Boolean indicating if we trust this reference or not

        This method GET's every new link and parses it in order to get
        new links and forms.
        """
        #
        # Remember that this "breaks" the cache=True in most cases!
        #     headers = { 'Referer': original_url }
        #
        # But this does not, and it is friendlier than simply ignoring the
        # referer
        #
        referer = original_response.get_url().base_url().url_string
        headers = Headers([('Referer', referer)])

        resp = self._uri_opener.GET(reference, cache=True, headers=headers)

        if is_404(resp):
            # Note: I WANT to follow links that are in the 404 page, but
            # DO NOT return the 404 itself to the core.
            #
            # This will parse the 404 response and add the 404-links in the
            # output queue, so that the core can get them
            #
            if be_recursive:
                #
                # Only follow one level of links in 404 pages, this limits the
                # potential issue when this is found:
                #
                #   http://foo.com/abc/ => 404
                #   Body: <a href="def/">link</a>
                #
                # Which would lead to this function to perform requests to:
                #   * http://foo.com/abc/
                #   * http://foo.com/abc/def/
                #   * http://foo.com/abc/def/def/
                #   * http://foo.com/abc/def/def/def/
                #   * ...
                #
                non_recursive_verify_ref = partial(self._verify_reference,
                                                   be_recursive=False)
                self.worker_pool.map_multi_args(
                    non_recursive_verify_ref,
                    self._urls_to_verify_generator(resp, original_request))

            # Store the broken links
            if not possibly_broken and resp.get_code(
            ) not in self.UNAUTH_FORBID:
                t = (resp.get_url(), original_request.get_uri())
                self._broken_links.add(t)
        else:
            msg = 'Adding reference "%s" to the result.'
            om.out.debug(msg % reference)

            fuzz_req = FuzzableRequest(reference, headers=headers)

            # These next steps are simple, but actually allows me to set the
            # referer and cookie for the FuzzableRequest instances I'm sending
            # to the core, which will then allow the fuzzer to create
            # CookieMutant and HeadersMutant instances.
            #
            # Without setting the Cookie, the CookieMutant would never have any
            # data to modify; remember that cookies are actually set by the
            # urllib2 cookie handler when the request already exited the
            # framework.
            cookie = Cookie.from_http_response(original_response)

            fuzz_req.set_referer(referer)
            fuzz_req.set_cookie(cookie)

            self.output_queue.put(fuzz_req)
    def _verify_reference(self, reference, original_request,
                          original_response, possibly_broken,
                          be_recursive=True):
        """
        The parameters are:
            * Newly found URL
            * The FuzzableRequest instance which generated the response where
              the new URL was found
            * The HTTPResponse generated by the FuzzableRequest
            * Boolean indicating if we trust this reference or not

        This method GET's every new link and parses it in order to get
        new links and forms.
        """
        #
        # Remember that this "breaks" the cache=True in most cases!
        #     headers = { 'Referer': original_url }
        #
        # But this does not, and it is friendlier than simply ignoring the
        # referer
        #
        referer = original_response.get_url().base_url().url_string
        headers = Headers([('Referer', referer)])

        # Note: We're not grep'ing this HTTP request/response now because it
        #       has high probability of being a 404, and the grep plugins
        #       already got enough 404 responses to analyze (from is_404 for
        #       example). If it's not a 404 then we'll push it to the core
        #       and it will come back to this plugin's crawl() where it will
        #       be requested with grep=True
        resp = self._uri_opener.GET(reference, cache=True, headers=headers,
                                    grep=False)

        if is_404(resp):
            # Note: I WANT to follow links that are in the 404 page, but
            # DO NOT return the 404 itself to the core.
            #
            # This will parse the 404 response and add the 404-links in the
            # output queue, so that the core can get them
            #
            if be_recursive:
                #
                # Only follow one level of links in 404 pages, this limits the
                # potential issue when this is found:
                #
                #   http://foo.com/abc/ => 404
                #   Body: <a href="def/">link</a>
                #
                # Which would lead to this function to perform requests to:
                #   * http://foo.com/abc/
                #   * http://foo.com/abc/def/
                #   * http://foo.com/abc/def/def/
                #   * http://foo.com/abc/def/def/def/
                #   * ...
                #

                # Do not use threads here, it will dead-lock (for unknown
                # reasons). This is tested in TestDeadLock unittest.
                for args in self._urls_to_verify_generator(resp, original_request):
                    self._verify_reference(*args, be_recursive=False)

            # Store the broken links
            if not possibly_broken and resp.get_code() not in self.UNAUTH_FORBID:
                t = (resp.get_url(), original_request.get_uri())
                self._broken_links.add(t)
        else:
            msg = '[web_spider] Sending link to w3af core: "%s"'
            om.out.debug(msg % reference)

            fuzz_req = FuzzableRequest(reference, headers=headers)

            # These next steps are simple, but actually allows me to set the
            # referer and cookie for the FuzzableRequest instances I'm sending
            # to the core, which will then allow the fuzzer to create
            # CookieMutant and HeadersMutant instances.
            #
            # Without setting the Cookie, the CookieMutant would never have any
            # data to modify; remember that cookies are actually set by the
            # urllib2 cookie handler when the request already exited the
            # framework.
            cookie = Cookie.from_http_response(original_response)

            fuzz_req.set_referer(referer)
            fuzz_req.set_cookie(cookie)

            self.output_queue.put(fuzz_req)
Exemple #4
0
    def _verify_reference(self, reference, original_request,
                          original_response, possibly_broken,
                          be_recursive=True):
        """
        The parameters are:
            * Newly found URL
            * The FuzzableRequest instance which generated the response where
              the new URL was found
            * The HTTPResponse generated by the FuzzableRequest
            * Boolean indicating if we trust this reference or not

        This method GET's every new link and parses it in order to get
        new links and forms.
        """
        #
        # Remember that this "breaks" the cache=True in most cases!
        #     headers = { 'Referer': original_url }
        #
        # But this does not, and it is friendlier than simply ignoring the
        # referer
        #
        referer = original_response.get_url().base_url().url_string
        headers = Headers([('Referer', referer)])

        resp = self._uri_opener.GET(reference, cache=True, headers=headers)

        if is_404(resp):
            # Note: I WANT to follow links that are in the 404 page, but
            # DO NOT return the 404 itself to the core.
            #
            # This will parse the 404 response and add the 404-links in the
            # output queue, so that the core can get them
            #
            if be_recursive:
                #
                # Only follow one level of links in 404 pages, this limits the
                # potential issue when this is found:
                #
                #   http://foo.com/abc/ => 404
                #   Body: <a href="def/">link</a>
                #
                # Which would lead to this function to perform requests to:
                #   * http://foo.com/abc/
                #   * http://foo.com/abc/def/
                #   * http://foo.com/abc/def/def/
                #   * http://foo.com/abc/def/def/def/
                #   * ...
                #
                non_recursive_verify_ref = partial(self._verify_reference,
                                                   be_recursive=False)
                self.worker_pool.map_multi_args(
                    non_recursive_verify_ref,
                    self._urls_to_verify_generator(resp, original_request))

            # Store the broken links
            if not possibly_broken and resp.get_code() not in self.UNAUTH_FORBID:
                t = (resp.get_url(), original_request.get_uri())
                self._broken_links.add(t)
        else:
            msg = 'Adding reference "%s" to the result.'
            om.out.debug(msg % reference)

            fuzz_req = FuzzableRequest(reference, headers=headers)

            # These next steps are simple, but actually allows me to set the
            # referer and cookie for the FuzzableRequest instances I'm sending
            # to the core, which will then allow the fuzzer to create
            # CookieMutant and HeadersMutant instances.
            #
            # Without setting the Cookie, the CookieMutant would never have any
            # data to modify; remember that cookies are actually set by the
            # urllib2 cookie handler when the request already exited the
            # framework.
            cookie = Cookie.from_http_response(original_response)

            fuzz_req.set_referer(referer)
            fuzz_req.set_cookie(cookie)

            self.output_queue.put(fuzz_req)