Ejemplo n.º 1
0
    def _setup_404_detection(self):
        #
        #    NOTE: I need to perform this test here in order to avoid some weird
        #    thread locking that happens when the webspider calls is_404, and
        #    because I want to initialize the is_404 database in a controlled
        #    try/except block.
        #
        from w3af.core.controllers.core_helpers.fingerprint_404 import is_404

        for url in cf.cf.get('targets'):
            try:
                response = self._w3af_core.uri_opener.GET(url, cache=True)
            except ScanMustStopByUserRequest:
                raise
            except Exception, e:
                msg = ('Failed to send HTTP request to the configured target'
                       ' URL "%s", the original exception was: "%s" (%s).')
                args = (url, e, e.__class__.__name__)
                raise ScanMustStopException(msg % args)

            try:
                is_404(response)
            except ScanMustStopByUserRequest:
                raise
            except Exception, e:
                msg = ('Failed to initialize the 404 detection using HTTP'
                       ' response from "%s", the original exception was: "%s"'
                       ' (%s).')
                args = (url, e, e.__class__.__name__)
                raise ScanMustStopException(msg % args)
Ejemplo n.º 2
0
    def get_document_parser_for(self, http_response):
        """
        Get a document parser for http_response

        This parses the http_response in a pool worker. This method has two
        features:
            * We can kill the worker if the parser is taking too long
            * We can have different parsers

        :param http_response: The http response instance
        :return: An instance of DocumentParser
        """
        # Start the worker processes if needed
        self.start_workers()

        apply_args = (process_document_parser, http_response, self.DEBUG)

        # Push the task to the workers
        try:
            future = self._pool.schedule(apply_with_return_error,
                                         args=(apply_args, ),
                                         timeout=self.PARSER_TIMEOUT)
        except RuntimeError, rte:
            # We get here when the pebble pool management thread dies and
            # suddenly starts answering all calls with:
            #
            # RuntimeError('Unexpected error within the Pool')
            #
            # The scan needs to stop because we can't parse any more
            # HTTP responses, which is a very critical part of the process
            msg = str(rte)
            raise ScanMustStopException(msg)
Ejemplo n.º 3
0
        def analyze_state():
            # There might be errors that make us stop the process
            if self._error_stopped:
                msg = 'Multiple exceptions found while sending HTTP requests.'
                raise ScanMustStopException(msg)

            if self._user_stopped:
                msg = 'The user stopped the scan.'
                raise ScanMustStopByUserRequest(msg)
Ejemplo n.º 4
0
    def get_tags_by_filter(self, http_response, tags, yield_text=False):
        """
        Return Tag instances for the tags which match the `tags` filter,
        parsing and all lxml stuff is done in another process and the Tag
        instances are sent to the main process (the one calling this method)
        through a pipe

        Some things to note:
            * Not all responses can be parsed, so I need to call DocumentParser
              and handle exceptions

            * The parser selected by DocumentParser might not have tags, and
              it might not have get_tags_by_filter. In this case just return an
              empty list

            * Just like get_document_parser_for we have a timeout in place,
              when we hit the timeout just return an empty list, this is not
              the best thing to do, but makes the plugin code easier to write
              (plugins would ignore this anyways)

        :param tags: The filter
        :param yield_text: Should we yield the tag text?
        :return: A list of Tag instances as defined in sgml.py

        :see: SGMLParser.get_tags_by_filter
        """
        # Start the worker processes if needed
        self.start_workers()

        filename = write_http_response_to_temp_file(http_response)

        apply_args = (process_get_tags_by_filter,
                      filename,
                      tags,
                      yield_text,
                      self.DEBUG)

        #
        # Push the task to the workers
        #
        try:
            future = self._pool.schedule(apply_with_return_error,
                                         args=(apply_args,),
                                         timeout=self.PARSER_TIMEOUT)
        except RuntimeError, rte:
            # Remove the temp file used to send data to the process
            remove_file_if_exists(filename)

            # We get here when the pebble pool management thread dies and
            # suddenly starts answering all calls with:
            #
            # RuntimeError('Unexpected error within the Pool')
            #
            # The scan needs to stop because we can't parse any more
            # HTTP responses, which is a very critical part of the process
            msg = str(rte)
            raise ScanMustStopException(msg)
Ejemplo n.º 5
0
    def verify_target_server_up(self):
        """
        Well, it is more common than expected that the user configures a target
        which is offline, is not a web server, etc. So we're going to verify
        all that before even starting our work, and provide a nice error message
        so that users can change their config if needed.
        
        Note that we send MAX_ERROR_COUNT tests to the remote end in order to
        trigger any errors in the remote end and have the Extended URL Library
        error handle return errors.
        
        :raises: A friendly exception with lots of details of what could have
                 happen.
        """
        sent_requests = 0

        msg = (
            'The remote web server is not answering our HTTP requests,'
            ' multiple errors have been found while trying to GET a response'
            ' from the server.\n\n'
            'In most cases this means that the configured target is'
            ' incorrect, the port is closed, there is a firewall blocking'
            ' our packets or there is no HTTP daemon listening on that'
            ' port.\n\n'
            'Please verify your target configuration and try again. The'
            ' tested targets were:\n\n'
            ' %s\n')

        targets = cf.cf.get('targets')

        while sent_requests < MAX_ERROR_COUNT * 1.5:
            for url in targets:
                try:
                    self._w3af_core.uri_opener.GET(url, cache=False)
                except ScanMustStopByUserRequest:
                    # Not a real error, the user stopped the scan
                    raise
                except Exception, e:
                    dbg = 'Exception found during verify_target_server_up: "%s"'
                    om.out.debug(dbg % e)

                    target_list = '\n'.join(' - %s\n' % url for url in targets)

                    raise ScanMustStopException(msg % target_list)
                else:
                    sent_requests += 1
Ejemplo n.º 6
0
    def _setup_404_detection(self):
        #
        #    NOTE: I need to perform this test here in order to avoid some weird
        #    thread locking that happens when the webspider calls is_404, and
        #    because I want to initialize the is_404 database in a controlled
        #    try/except block.
        #
        from w3af.core.controllers.core_helpers.fingerprint_404 import is_404

        for url in cf.cf.get('targets'):
            try:
                response = self._w3af_core.uri_opener.GET(url, cache=True)
                is_404(response)
            except ScanMustStopByUserRequest:
                raise
            except Exception, e:
                msg = 'Failed to initialize the 404 detection, original' \
                      ' exception was: "%s".'
                raise ScanMustStopException(msg % e)
Ejemplo n.º 7
0
    def test_teardown_with_must_stop_exception(self):
        w3af_core = w3afCore()

        xss_instance = xss()
        xss_instance.set_url_opener(w3af_core.uri_opener)
        xss_instance.set_worker_pool(w3af_core.worker_pool)

        audit_plugins = [xss_instance]

        audit_consumer = audit(audit_plugins, w3af_core)
        audit_consumer.start()

        url = 'http://w3af.org/?id=1'

        httpretty.register_uri(httpretty.GET,
                               url,
                               body='hello world',
                               content_type='application/html')

        url = URL(url)
        fr = FuzzableRequest(url)

        # This will trigger a few HTTP requests to the target URL which will
        # also initialize all the xss plugin internals to be able to run end()
        # later.
        audit_consumer.in_queue_put(fr)
        kb.kb.add_fuzzable_request(fr)

        # Now that xss.audit() was called, we want to simulate network errors
        # that will put the uri opener in a state where it always answers with
        # ScanMustStopException
        w3af_core.uri_opener._stop_exception = ScanMustStopException('mock')

        # And now we just call terminate() which injects the poison pill and will
        # call teardown, which should call xss.end(), which should try to send HTTP
        # requests, which will raise a ScanMustStopException
        with patch('w3af.core.controllers.core_helpers.consumers.audit.om.out'
                   ) as om_mock:
            audit_consumer.terminate()

            msg = ('Spent 0.00 seconds running xss.end() until a scan must'
                   ' stop exception was raised.')
            self.assertIn(call.debug(msg), om_mock.mock_calls)
Ejemplo n.º 8
0
    def store_in_cache(request, response):
        # Create the http response object
        resp = HTTPResponse.from_httplib_resp(response,
                                              original_url=request.url_object)
        resp.set_id(response.id)
        resp.set_alias(gen_hash(request))

        hi = HistoryItem()
        hi.request = request
        hi.response = resp

        # Now save them
        try:
            hi.save()
        except sqlite3.Error, e:
            msg = 'A sqlite3 error was raised: "%s".' % e

            if 'disk' in str(e).lower():
                msg += ' Please check if your disk is full.'

            raise ScanMustStopException(msg)
Ejemplo n.º 9
0
    def get_tags_by_filter(self, http_response, tags, yield_text=False, cache=True):
        """
        Get specific tags from http_response using the cache if possible

        :param http_response: The http response instance
        :param tags: List of tags to get, or None if all tags should be returned
        :param yield_text: Include the tag text (<a>text</a>)
        :param cache: True if the document parser should be saved to the cache
        :return: An instance of DocumentParser
        """
        #
        # This is a performance hack that should reduce the time consumed by
        # this method without impacting its results. Note that in HTML this is
        # valid:
        #
        #   <script
        #
        # And this is invalid:
        #
        #   < script
        #
        # We use that in order to speed-up this function
        #
        if tags is not None:
            body_lower = http_response.get_body().lower()

            for tag in tags:
                lt_tag = '<%s' % tag
                if lt_tag in body_lower:
                    break
            else:
                # No tag was found in the HTML
                return []

        #
        # Before doing anything too complex like caching, sending the HTTP
        # response to a different process for parsing, checking events, etc.
        # check if we can parse this HTTP response.
        #
        # This is a performance improvement that works *only if* the
        # DocumentParser.can_parse call is *fast*, which means that the
        # `can_parse` implementations of each parser needs to be fast
        #
        # It doesn't matter if we say "yes" here and then parsing exceptions
        # appear later, that should be a 1 / 10000 calls and we would still
        # be gaining a lot of performance
        #
        if not self.can_parse(http_response):
            self._log_return_empty(http_response, 'No parser available')
            return []

        args = '%r%r' % (tags, yield_text)
        hash_string = get_body_unique_id(http_response, prepend=args)

        if hash_string in self._parser_blacklist:
            self._log_return_empty(http_response, 'HTTP response is blacklisted')
            return []

        #
        # We know that we can parse this document, lets work!
        #
        parser_finished = self._parser_finished_events.get(hash_string, None)
        if parser_finished is not None:
            # There is one subprocess already processing this http response
            # body, the best thing to do here is to make this thread wait
            # until that process has finished
            wait_result = parser_finished.wait(timeout=mp_doc_parser.PARSER_TIMEOUT)
            if not wait_result:
                # Act just like when there is no parser
                self._log_return_empty(http_response, 'Timeout waiting for response')
                return []

        # metric increase
        self.inc_query_count()

        parser = self._cache.get(hash_string, None)
        if parser is not None:
            self._handle_cache_hit(hash_string)
            return parser
        else:
            # Not in cache, have to work.
            self._handle_cache_miss(hash_string)

            # Create a new instance of DocumentParser, add it to the cache
            event = threading.Event()
            self._parser_finished_events[hash_string] = event

            try:
                tags = mp_doc_parser.get_tags_by_filter(http_response,
                                                        tags,
                                                        yield_text=yield_text)
            except TimeoutError:
                # We failed to get a parser for this HTTP response, we better
                # ban this HTTP response so we don't waste more CPU cycles trying
                # to parse it over and over.
                self.add_to_blacklist(hash_string)

                # Act just like when there is no parser
                self._log_return_empty(http_response, 'Timeout waiting for get_tags_by_filter()')
                return []
            except MemoryError:
                # We failed to get a parser for this HTTP response, we better
                # ban this HTTP response so we don't waste more CPU cycles or
                # memory trying to parse it over and over.
                self.add_to_blacklist(hash_string)

                # Act just like when there is no parser
                self._log_return_empty(http_response, 'Reached memory usage limit')
                return []
            except ScanMustStopException, e:
                msg = 'The document parser is in an invalid state! %s'
                raise ScanMustStopException(msg % e)
            except Exception, e:
                # Act just like when there is no parser
                msg = 'Unhandled exception running get_tags_by_filter("%s"): %s'
                args = (http_response.get_url(), e)
                raise BaseFrameworkException(msg % args)
Ejemplo n.º 10
0
    def get_document_parser_for(self, http_response, cache=True):
        """
        Get a document parser for http_response using the cache if possible

        :param http_response: The http response instance
        :param cache: True if the document parser should be saved to the cache
        :return: An instance of DocumentParser
        """
        #
        # Before doing anything too complex like caching, sending the HTTP
        # response to a different process for parsing, checking events, etc.
        # check if we can parse this HTTP response.
        #
        # This is a performance improvement that works *only if* the
        # DocumentParser.can_parse call is *fast*, which means that the
        # `can_parse` implementations of each parser needs to be fast
        #
        # It doesn't matter if we say "yes" here and then parsing exceptions
        # appear later, that should be a 1 / 10000 calls and we would still
        # be gaining a lot of performance
        #
        if not self.can_parse(http_response):
            msg = 'There is no parser for "%s".'
            raise BaseFrameworkException(msg % http_response.get_url())

        hash_string = get_response_unique_id(http_response)

        if hash_string in self._parser_blacklist:
            msg = 'Exceeded timeout while parsing "%s" in the past. Not trying again.'
            raise BaseFrameworkException(msg % http_response.get_url())

        #
        # We know that we can parse this document, lets work!
        #
        parser_finished = self._parser_finished_events.get(hash_string, None)
        if parser_finished is not None:
            # There is one subprocess already processing this http response
            # body, the best thing to do here is to make this thread wait
            # until that process has finished
            wait_result = parser_finished.wait(timeout=mp_doc_parser.PARSER_TIMEOUT)
            if not wait_result:
                # Act just like when there is no parser
                msg = 'There is no parser for "%s". Waited more than %s sec.'
                args = (http_response.get_url(), mp_doc_parser.PARSER_TIMEOUT)
                raise BaseFrameworkException(msg % args)

        # metric increase
        self.inc_query_count()

        parser = self._cache.get(hash_string, None)
        if parser is not None:
            self._handle_cache_hit(hash_string)
            return parser
        else:
            # Not in cache, have to work.
            self._handle_cache_miss(hash_string)

            # Create a new instance of DocumentParser, add it to the cache
            event = threading.Event()
            self._parser_finished_events[hash_string] = event

            try:
                parser = mp_doc_parser.get_document_parser_for(http_response)
            except TimeoutError:
                # We failed to get a parser for this HTTP response, we better
                # ban this HTTP response so we don't waste more CPU cycles trying
                # to parse it over and over.
                self.add_to_blacklist(hash_string)

                # Act just like when there is no parser
                msg = 'Reached timeout parsing "%s".' % http_response.get_url()
                raise BaseFrameworkException(msg)
            except MemoryError:
                # We failed to get a parser for this HTTP response, we better
                # ban this HTTP response so we don't waste more CPU cycles or
                # memory trying to parse it over and over.
                self.add_to_blacklist(hash_string)

                # Act just like when there is no parser
                msg = 'Reached memory usage limit parsing "%s".' % http_response.get_url()
                raise BaseFrameworkException(msg)
            except ScanMustStopException, e:
                msg = 'The document parser is in an invalid state! %s'
                raise ScanMustStopException(msg % e)
            except:
Ejemplo n.º 11
0
    def alert_if_target_is_301_all(self):
        """
        Alert the user when the configured target is set to a site which will
        301 redirect all requests to https://

        :see: https://github.com/andresriancho/w3af/issues/14976
        :return: True if the site returns 301 for all resources. Also an Info
                 instance is saved to the KB in order to alert the user.
        """
        site_does_redirect = False
        msg = ('The configured target domain redirects all HTTP requests to a'
               ' different location. The most common scenarios are:\n\n'
               ''
               '    * HTTP redirect to HTTPS\n'
               '    * domain.com redirect to www.domain.com\n\n'
               ''
               'While the scan engine can identify URLs and vulnerabilities'
               ' using the current configuration it might be wise to start'
               ' a new scan setting the target URL to the redirect target.')

        targets = cf.cf.get('targets')

        for url in targets:
            # We test if the target URLs are redirecting to a different protocol
            # or domain.
            try:
                http_response = self._w3af_core.uri_opener.GET(url,
                                                               cache=False)
            except ScanMustStopByUserRequest:
                # Not a real error, the user stopped the scan
                raise
            except Exception, e:
                emsg = 'Exception found during alert_if_target_is_301_all(): "%s"'
                emsg %= e

                om.out.debug(emsg)
                raise ScanMustStopException(emsg)
            else:
                if 300 <= http_response.get_code() <= 399:

                    # Get the redirect target
                    lower_headers = http_response.get_lower_case_headers()
                    redirect_url = None

                    for header_name in ('location', 'uri'):
                        if header_name in lower_headers:
                            header_value = lower_headers[header_name]
                            header_value = header_value.strip()
                            try:
                                redirect_url = URL(header_value)
                            except ValueError:
                                # No special invalid URL handling required
                                continue

                    if not redirect_url:
                        continue

                    # Check if the protocol was changed:
                    target_proto = url.get_protocol()
                    redirect_proto = redirect_url.get_protocol()

                    if target_proto != redirect_proto:
                        site_does_redirect = True
                        break

                    # Check if the domain was changed:
                    target_domain = url.get_domain()
                    redirect_domain = redirect_url.get_domain()

                    if target_domain != redirect_domain:
                        site_does_redirect = True
                        break