Example #1
0
    def _url_path_url_generator(self, resp, fuzzable_req):
        """
        Yields tuples containing:
            * Newly found URL
            * The FuzzableRequest instance passed as parameter
            * The HTTPResponse generated by the FuzzableRequest
            * Boolean indicating if we trust this reference or not

        :param resp: HTTP response object
        :param fuzzable_req: The HTTP request that generated the response
        """
        # Analyze all directories, if the URL w3af just found is:
        #
        #   http://localhost/a/b/c/f00.php
        #
        # I want to GET:
        #
        #   http://localhost/a/b/c/
        #   http://localhost/a/b/
        #   http://localhost/a/
        #   http://localhost/
        #
        # And analyze the responses...
        dirs = resp.get_url().get_directories()

        for ref in unique_justseen(dirs):
            yield ref, fuzzable_req, resp, False
Example #2
0
    def end(self):
        """
        Called when the process ends, prints out the list of broken links.
        """
        if len(self._broken_links):

            om.out.information('The following is a list of broken links that '
                               'were found by the web_spider plugin:')
            for broken, where in unique_justseen(self._broken_links.ordered_iter()):
                om.out.information('- %s [ referenced from: %s ]' %
                                   (broken, where))
        
        self._broken_links.cleanup()
Example #3
0
    def end(self):
        """
        Called when the process ends, prints out the list of broken links.
        """
        if len(self._broken_links):

            om.out.information('The following is a list of broken links that'
                               ' were found by the web_spider plugin:')
            for broken, where in unique_justseen(self._broken_links.ordered_iter()):
                om.out.information('- %s [ referenced from: %s ]' %
                                   (broken, where))
        
        self._broken_links.cleanup()
Example #4
0
    def _body_url_generator(self, resp, fuzzable_req):
        """
        Yields tuples containing:
            * Newly found URL
            * The FuzzableRequest instance passed as parameter
            * The HTTPResponse generated by the FuzzableRequest
            * Boolean indicating if we trust this reference or not

        The newly found URLs are extracted from the http response body using
        one of the framework's parsers.

        :param resp: HTTP response object
        :param fuzzable_req: The HTTP request that generated the response
        """
        #
        # Note: I WANT to follow links that are in the 404 page.
        #
        try:
            doc_parser = parser_cache.dpc.get_document_parser_for(resp)
        except BaseFrameworkException as w3:
            om.out.debug('Failed to find a suitable document parser. '
                         'Exception "%s"' % w3)
        else:
            # Note:
            #
            # - With parsed_refs I'm 100% that it's really
            #   something in the HTML that the developer intended to add.
            #
            # - The re_refs are the result of regular expressions,
            #   which in some cases are just false positives.
            parsed_refs, re_refs = doc_parser.get_references()

            dirs = resp.get_url().get_directories()
            only_re_refs = set(re_refs) - set(dirs + parsed_refs)

            all_refs = itertools.chain(parsed_refs, re_refs)
            resp_is_404 = is_404(resp)

            for ref in unique_justseen(sorted(all_refs)):
                possibly_broken = resp_is_404 or (ref in only_re_refs)
                yield ref, fuzzable_req, resp, possibly_broken
Example #5
0
            # I also want to analyze all directories, if the URL I just
            # fetched is:
            # http://localhost/a/b/c/f00.php I want to GET:
            # http://localhost/a/b/c/
            # http://localhost/a/b/
            # http://localhost/a/
            # http://localhost/
            # And analyze the responses...
            dirs = resp.get_url().get_directories()
            only_re_refs = set(re_refs) - set(dirs + parsed_refs)

            all_refs = itertools.chain(dirs, parsed_refs, re_refs)
            resp_is_404 = is_404(resp)

            for ref in unique_justseen(sorted(all_refs)):
                possibly_broken = resp_is_404 or (ref in only_re_refs)
                yield ref, fuzzable_req, resp, possibly_broken

    def _should_verify_extracted_url(self, ref, resp):
        """
        :param ref: A newly found URL
        :param resp: The HTTP response where the URL was found

        :return: Boolean indicating if I should send this new reference to the
                 core.
        """
        # Ignore myself
        if ref == resp.get_uri():
            return False
Example #6
0
            # I also want to analyze all directories, if the URL I just
            # fetched is:
            # http://localhost/a/b/c/f00.php I want to GET:
            # http://localhost/a/b/c/
            # http://localhost/a/b/
            # http://localhost/a/
            # http://localhost/
            # And analyze the responses...
            dirs = resp.get_url().get_directories()
            only_re_refs = set(re_refs) - set(dirs + parsed_refs)

            all_refs = itertools.chain(dirs, parsed_refs, re_refs)
            resp_is_404 = is_404(resp)

            for ref in unique_justseen(sorted(all_refs)):
                possibly_broken = resp_is_404 or (ref in only_re_refs)
                yield ref, fuzzable_req, resp, possibly_broken

    def _should_verify_extracted_url(self, ref, resp):
        """
        :param ref: A newly found URL
        :param resp: The HTTP response where the URL was found

        :return: Boolean indicating if I should send this new reference to the
                 core.
        """
        # Ignore myself
        if ref == resp.get_uri():
            return False
Example #7
0
    def _urls_to_verify_generator(self, resp, fuzzable_req):
        """
        :param resp: HTTP response object
        :param fuzzable_req: The HTTP request that generated the response
        """
        #
        # Note: I WANT to follow links that are in the 404 page.
        #

        # Modified when I added the PDFParser
        # I had to add this x OR y stuff, just because I don't want
        # the SGML parser to analyze a image file, its useless and
        # consumes CPU power.
        if resp.is_text_or_html() or resp.is_pdf() or resp.is_swf():
            original_url = resp.get_redir_uri()
            try:
                doc_parser = parser_cache.dpc.get_document_parser_for(resp)
            except BaseFrameworkException, w3:
                om.out.debug('Failed to find a suitable document parser. '
                             'Exception "%s"' % w3)
            else:
                # Note:
                # - With parsed_refs I'm 100% that it's really
                # something in the HTML that the developer intended to add.
                #
                # - The re_refs are the result of regular expressions,
                # which in some cases are just false positives.

                parsed_refs, re_refs = doc_parser.get_references()

                # I also want to analyze all directories, if the URL I just
                # fetched is:
                # http://localhost/a/b/c/f00.php I want to GET:
                # http://localhost/a/b/c/
                # http://localhost/a/b/
                # http://localhost/a/
                # http://localhost/
                # And analyze the responses...
                dirs = resp.get_url().get_directories()
                only_re_refs = set(re_refs) - set(dirs + parsed_refs)

                all_refs = itertools.chain(dirs, parsed_refs, re_refs)

                for ref in unique_justseen(sorted(all_refs)):

                    # Ignore myself
                    if ref == resp.get_uri():
                        continue

                    # I don't want w3af sending requests to 3rd parties!
                    if ref.get_domain() != self._target_domain:
                        continue

                    # Filter the URL's according to the configured regexs
                    urlstr = ref.url_string
                    if not self._compiled_follow_re.match(urlstr) or \
                    self._compiled_ignore_re.match(urlstr):
                        continue

                    if self._only_forward:
                        if not self._is_forward(ref):
                            continue

                    # Work with the parsed references and report broken
                    # links. Then work with the regex references and DO NOT
                    # report broken links
                    if self._need_more_variants(ref):
                        self._known_variants.append(ref)
                        possibly_broken = ref in only_re_refs
                        yield ref, fuzzable_req, original_url, possibly_broken