Python get_tags_by_filter Beispiele, w3af.core.data.parsers.mp_document_parser.mp_doc_parser.get_tags_by_filter Python Beispiele

Beispiel #1

0

Datei anzeigen

Datei: phishing_vector.py Projekt: 0x554simon/w3af

    def _analyze_result(self, mutant, response):
        """
        Analyze results of the _send_mutant method.
        """
        if not response.is_text_or_html():
            return

        if self._has_bug(mutant):
            return

        for tag in mp_doc_parser.get_tags_by_filter(response, self.TAGS):
            src_attr = tag.attrib.get('src', None)
            if src_attr is None:
                continue

            for url in self._test_urls:
                if not src_attr.startswith(url):
                    continue

                # Vuln vuln!
                desc = 'A phishing vector was found at: %s'
                desc %= mutant.found_at()

                v = Vuln.from_mutant('Phishing vector', desc, severity.LOW,
                                     response.id, self.get_name(), mutant)

                v.add_to_highlight(src_attr)
                self.kb_append_uniq(self, 'phishing_vector', v)
                break

Beispiel #2

0

Datei anzeigen

Datei: file_upload.py Projekt: llcoolj1/w3af-kali

    def grep(self, request, response):
        """
        Plugin entry point, verify if the HTML has a form with file uploads.

        :param request: The HTTP request object.
        :param response: The HTTP response object
        :return: None
        """
        if not response.is_text_or_html():
            return

        url = response.get_url()

        for tag in mp_doc_parser.get_tags_by_filter(response, ('input', )):
            input_type = tag.attrib.get('type', None)

            if input_type is None:
                continue

            if input_type.lower() != 'file':
                continue

            msg = 'A form which allows file uploads was found at "%s"'
            msg %= url

            i = Info('File upload form', msg, response.id, self.get_name())
            i.set_url(url)

            self.kb_append_uniq(self, 'file_upload', i, 'URL')

Beispiel #3

0

Datei anzeigen

Datei: cross_domain_js.py Projekt: batmanWjw/w3af

    def grep(self, request, response):
        """
        Plugin entry point, verify if the HTML has <script> tags with src
        pointing to external+insecure domains.

        :param request: The HTTP request object.
        :param response: The HTTP response object
        :return: None
        """
        if not response.is_text_or_html():
            return

        for tag in mp_doc_parser.get_tags_by_filter(response, ('script',)):
            script_src = tag.attrib.get('src', None)

            if script_src is None:
                continue

            try:
                script_full_url = response.get_url().url_join(script_src)
            except ValueError:
                msg = 'Invalid URL found by cross_domain_js: "%s"'
                om.out.debug(msg % script_src)
                continue

            # More analysis methods might be added here later
            self._analyze_domain(response, script_full_url, tag)

Beispiel #4

0

Datei anzeigen

Datei: phishing_vector.py Projekt: tim124058/w3af

    def _analyze_result(self, mutant, response):
        """
        Analyze results of the _send_mutant method.
        """
        if not response.is_text_or_html():
            return

        if self._has_bug(mutant):
            return

        for tag in mp_doc_parser.get_tags_by_filter(response, self.TAGS):
            src_attr = tag.attrib.get('src', None)
            if src_attr is None:
                continue

            for url in self._test_urls:
                if not src_attr.startswith(url):
                    continue

                # Vuln vuln!
                desc = 'A phishing vector was found at: %s'
                desc %= mutant.found_at()

                v = Vuln.from_mutant('Phishing vector', desc, severity.LOW,
                                     response.id, self.get_name(), mutant)

                v.add_to_highlight(src_attr)
                self.kb_append_uniq(self, 'phishing_vector', v)
                break

Beispiel #5

0

Datei anzeigen

    def grep(self, request, response):
        """
        Plugin entry point, find feeds.

        :param request: The HTTP request object.
        :param response: The HTTP response object
        :return: None
        """
        uri = response.get_uri()

        for tag in mp_doc_parser.get_tags_by_filter(response, self.TAGS):

            feed_tag = tag.name
            feed_type = self._feed_types[feed_tag.lower()]
            version = tag.attrib.get('version', 'unknown')

            fmt = 'The URL "%s" is a %s version %s feed.'
            desc = fmt % (uri, feed_type, version)

            i = Info('Content feed resource', desc, response.id,
                     self.get_name())
            i.set_uri(uri)
            i.add_to_highlight(feed_type)

            self.kb_append_uniq(self, 'feeds', i, 'URL')

Beispiel #6

0

Datei anzeigen

Datei: file_upload.py Projekt: 0x554simon/w3af

    def grep(self, request, response):
        """
        Plugin entry point, verify if the HTML has a form with file uploads.

        :param request: The HTTP request object.
        :param response: The HTTP response object
        :return: None
        """
        if not response.is_text_or_html():
            return
        
        url = response.get_url()

        for tag in mp_doc_parser.get_tags_by_filter(response, ('input',)):
            input_type = tag.attrib.get('type', None)

            if input_type is None:
                continue

            if input_type.lower() != 'file':
                continue

            msg = 'A form which allows file uploads was found at "%s"'
            msg %= url

            i = Info('File upload form', msg, response.id, self.get_name())
            i.set_url(url)

            self.kb_append_uniq(self, 'file_upload', i, 'URL')

Beispiel #7

0

Datei anzeigen

Datei: html.py Projekt: llcoolj1/w3af-kali

    def get_words(self, response):
        """
        Get words from the body, this is a modified "strings" that filters out
        HTML tags.

        :param response: In most common cases, an html. Could be almost anything
        :return: A map of strings:repetitions.
        """
        if not response.is_text_or_html():
            return {}

        data = {}
        split = words_split_re.split
        filter_by_len = lambda x: len(x) > 3

        for tag in mp_doc_parser.get_tags_by_filter(response, None, yield_text=True):

            text = tag.text

            if text is None:
                continue

            # Words inside <title> weights more.
            inc = (tag.name == 'title') and 5 or 1

            # Filter by length of the word (> 3)
            for w in filter(filter_by_len, split(text)):
                if w in data:
                    data[w] += inc
                else:
                    data[w] = inc

        return data

Beispiel #8

0

Datei anzeigen

Datei: cross_domain_js.py Projekt: webvul/webfuzzer

    def grep(self, request, response):
        """
        Plugin entry point, verify if the HTML has <script> tags with src
        pointing to external+insecure domains.

        :param request: The HTTP request object.
        :param response: The HTTP response object
        :return: None
        """
        if not response.is_text_or_html():
            return

        for tag in mp_doc_parser.get_tags_by_filter(response, ('script', )):
            script_src = tag.attrib.get('src', None)

            if script_src is None:
                continue

            try:
                script_full_url = response.get_url().url_join(script_src)
            except ValueError:
                msg = 'Invalid URL found by cross_domain_js: "%s"'
                om.out.debug(msg % script_src)
                continue

            # More analysis methods might be added here later
            self._analyze_domain(response, script_full_url, tag)

Beispiel #9

0

Datei anzeigen

    def _is_attr_value(self, path_disclosure_string, response):
        """
        This method was created to remove some false positives.

        This method consumes 99% of the CPU usage of the plugin, but there
        are only a few improvements that come to mind:

            * Run the code that checks if the value is in the attributes
              in the subprocess. The performance of this plugin will be
              slightly improved.

            * Before calling the document parser check at least it looks like
              the path_disclosure_string is part of an attribute value using
              a regular expression such as:

                </?\w+((\s+\w+(\s*=\s*(?:".*?"|'.*?'|[\^'">\s]+))?)+\s*|\s*)/?>

                (I just need to add the path_disclosure_string somewhere there)

              At some point I was using a similar approach [0] but it seems
              that it was slow? (I doubt that it will be slower than parsing
              the response with lxml).

              Something that could be done, and given that we know that this
              is an HTML string is:

                - Find all places in the response where path_disclosure_string
                  appears

                - Create 'HTTP response snippets' with the locations of
                  path_disclosure_string +/- 500 strings.

                - Apply the regular expression over those strings only, avoiding
                  the cost of applying the regex to the whole HTML response

        [0] https://github.com/andresriancho/w3af/commit/f1029328fcaf7e790cc317701b63954c55a3f4c8
        [1] https://haacked.com/archive/2004/10/25/usingregularexpressionstomatchhtml.aspx/

        :return: True if path_disclosure_string is the value of an attribute
                 inside a tag.

        Examples:
            path_disclosure_string = '/home/image.png'
            response_body = '....<img src="/home/image.png">...'
            return: True

            path_disclosure_string = '/home/image.png'
            response_body = '...<b>Error while checking /home/image.png</b>...'
            return: False
        """
        for tag in mp_doc_parser.get_tags_by_filter(response, None):
            for value in tag.attrib.itervalues():
                if path_disclosure_string in value:
                    return True

        return False

Beispiel #10

0

Datei anzeigen

Datei: symfony.py Projekt: llcoolj1/w3af-kali

    def has_csrf_token(self, response):
        """
        :return: True if there is CSRF protection enabled in this symfony app
        """
        for tag in mp_doc_parser.get_tags_by_filter(response, ('input',)):
            input_id = tag.attrib.get('id', '')
            if self._csrf_token_re.search(input_id):
                return True

        return False

Beispiel #11

0

Datei anzeigen

Datei: phishing_vector.py Projekt: woverines/w3af

    def _analyze_result(self, mutant, response):
        """
        Analyze results of the _send_mutant method.
        """
        if not response.is_text_or_html():
            return

        if self._has_bug(mutant):
            return

        # Performance improvement to prevent calling the CPU-expensive
        # get_tags_by_filter
        if not self._contains_payload(response):
            return

        for tag in mp_doc_parser.get_tags_by_filter(response, self.TAGS):
            src_attr = tag.attrib.get('src', None)
            if src_attr is None:
                continue

            for url in self.TEST_URLS:
                if not src_attr.startswith(url):
                    continue

                # Vuln vuln!
                desc = 'A phishing vector was found at: %s'
                desc %= mutant.found_at()

                v = Vuln.from_mutant('Phishing vector', desc, severity.LOW,
                                     response.id, self.get_name(), mutant)

                v.add_to_highlight(src_attr)
                self.kb_append_uniq(self, 'phishing_vector', v)
                break

        msg = (
            'Performed HTTP response analysis at audit.phishing_vector URL %s,'
            ' HTTP response ID %s.')
        args = (response.get_uri(), response.id)
        om.out.debug(msg % args)

Beispiel #12

0

Datei anzeigen

Datei: path_disclosure.py Projekt: 0x554simon/w3af

    def _is_attr_value(self, path_disclosure_string, response):
        """
        This method was created to remove some false positives.

        :return: True if path_disclosure_string is the value of an attribute
                 inside a tag.

        Examples:
            path_disclosure_string = '/home/image.png'
            response_body = '....<img src="/home/image.png">...'
            return: True

            path_disclosure_string = '/home/image.png'
            response_body = '...<b>Error while checking /home/image.png</b>...'
            return: False
        """
        for tag in mp_doc_parser.get_tags_by_filter(response, None):
            for value in tag.attrib.itervalues():
                if path_disclosure_string in value:
                    return True

        return False

Beispiel #13

0

Datei anzeigen

    def _is_attr_value(self, path_disclosure_string, response):
        """
        This method was created to remove some false positives.

        :return: True if path_disclosure_string is the value of an attribute
                 inside a tag.

        Examples:
            path_disclosure_string = '/home/image.png'
            response_body = '....<img src="/home/image.png">...'
            return: True

            path_disclosure_string = '/home/image.png'
            response_body = '...<b>Error while checking /home/image.png</b>...'
            return: False
        """
        for tag in mp_doc_parser.get_tags_by_filter(response, None):
            for value in tag.attrib.itervalues():
                if path_disclosure_string in value:
                    return True

        return False

Beispiel #14

0

Datei anzeigen

    def grep(self, request, response):
        """
        Plugin entry point. Parse the object tags.

        :param request: The HTTP request object.
        :param response: The HTTP response object
        :return: None
        """
        if not response.is_text_or_html():
            return

        url = response.get_url()

        for tag in mp_doc_parser.get_tags_by_filter(response, self.TAGS):
            desc = ('The URL: "%s" has an "%s" tag. We recommend you download'
                    ' the client side code and analyze it manually.')
            desc %= (response.get_uri(), tag.name)

            i = Info('Browser plugin content', desc, response.id,
                     self.get_name())
            i.set_url(url)
            i.add_to_highlight('<%s' % tag.name)

            self.kb_append_uniq(self, tag.name, i, 'URL')

Beispiel #15

0

Datei anzeigen

Datei: feeds.py Projekt: 0x554simon/w3af

    def grep(self, request, response):
        """
        Plugin entry point, find feeds.

        :param request: The HTTP request object.
        :param response: The HTTP response object
        :return: None
        """
        uri = response.get_uri()

        for tag in mp_doc_parser.get_tags_by_filter(response, self.TAGS):

            feed_tag = tag.name
            feed_type = self._feed_types[feed_tag.lower()]
            version = tag.attrib.get('version', 'unknown')

            fmt = 'The URL "%s" is a %s version %s feed.'
            desc = fmt % (uri, feed_type, version)

            i = Info('Content feed resource', desc, response.id, self.get_name())
            i.set_uri(uri)
            i.add_to_highlight(feed_type)
            
            self.kb_append_uniq(self, 'feeds', i, 'URL')

Beispiel #16

0

Datei anzeigen

    def get_tags_by_filter(self, http_response, tags, yield_text=False, cache=True):
        """
        Get specific tags from http_response using the cache if possible

        :param http_response: The http response instance
        :param tags: List of tags to get, or None if all tags should be returned
        :param yield_text: Include the tag text (<a>text</a>)
        :param cache: True if the document parser should be saved to the cache
        :return: An instance of DocumentParser
        """
        #
        # This is a performance hack that should reduce the time consumed by
        # this method without impacting its results. Note that in HTML this is
        # valid:
        #
        #   <script
        #
        # And this is invalid:
        #
        #   < script
        #
        # We use that in order to speed-up this function
        #
        if tags is not None:
            body_lower = http_response.get_body().lower()

            for tag in tags:
                lt_tag = '<%s' % tag
                if lt_tag in body_lower:
                    break
            else:
                # No tag was found in the HTML
                return []

        #
        # Before doing anything too complex like caching, sending the HTTP
        # response to a different process for parsing, checking events, etc.
        # check if we can parse this HTTP response.
        #
        # This is a performance improvement that works *only if* the
        # DocumentParser.can_parse call is *fast*, which means that the
        # `can_parse` implementations of each parser needs to be fast
        #
        # It doesn't matter if we say "yes" here and then parsing exceptions
        # appear later, that should be a 1 / 10000 calls and we would still
        # be gaining a lot of performance
        #
        if not self.can_parse(http_response):
            self._log_return_empty(http_response, 'No parser available')
            return []

        args = '%r%r' % (tags, yield_text)
        hash_string = get_body_unique_id(http_response, prepend=args)

        if hash_string in self._parser_blacklist:
            self._log_return_empty(http_response, 'HTTP response is blacklisted')
            return []

        #
        # We know that we can parse this document, lets work!
        #
        parser_finished = self._parser_finished_events.get(hash_string, None)
        if parser_finished is not None:
            # There is one subprocess already processing this http response
            # body, the best thing to do here is to make this thread wait
            # until that process has finished
            wait_result = parser_finished.wait(timeout=mp_doc_parser.PARSER_TIMEOUT)
            if not wait_result:
                # Act just like when there is no parser
                self._log_return_empty(http_response, 'Timeout waiting for response')
                return []

        # metric increase
        self.inc_query_count()

        parser = self._cache.get(hash_string, None)
        if parser is not None:
            self._handle_cache_hit(hash_string)
            return parser
        else:
            # Not in cache, have to work.
            self._handle_cache_miss(hash_string)

            # Create a new instance of DocumentParser, add it to the cache
            event = threading.Event()
            self._parser_finished_events[hash_string] = event

            try:
                tags = mp_doc_parser.get_tags_by_filter(http_response,
                                                        tags,
                                                        yield_text=yield_text)
            except TimeoutError:
                # We failed to get a parser for this HTTP response, we better
                # ban this HTTP response so we don't waste more CPU cycles trying
                # to parse it over and over.
                self.add_to_blacklist(hash_string)

                # Act just like when there is no parser
                self._log_return_empty(http_response, 'Timeout waiting for get_tags_by_filter()')
                return []
            except MemoryError:
                # We failed to get a parser for this HTTP response, we better
                # ban this HTTP response so we don't waste more CPU cycles or
                # memory trying to parse it over and over.
                self.add_to_blacklist(hash_string)

                # Act just like when there is no parser
                self._log_return_empty(http_response, 'Reached memory usage limit')
                return []
            except ScanMustStopException, e:
                msg = 'The document parser is in an invalid state! %s'
                raise ScanMustStopException(msg % e)
            except Exception, e:
                # Act just like when there is no parser
                msg = 'Unhandled exception running get_tags_by_filter("%s"): %s'
                args = (http_response.get_url(), e)
                raise BaseFrameworkException(msg % args)

Beispiel #17

0

Datei anzeigen

Datei: parser_cache.py Projekt: knucker/w3af

    def get_tags_by_filter(self, http_response, tags, yield_text=False, cache=True):
        """
        Get specific tags from http_response using the cache if possible

        :param http_response: The http response instance
        :param tags: List of tags to get
        :param yield_text: Include the tag text (<a>text</a>)
        :param cache: True if the document parser should be saved to the cache
        :return: An instance of DocumentParser
        """
        #
        # Before doing anything too complex like caching, sending the HTTP
        # response to a different process for parsing, checking events, etc.
        # check if we can parse this HTTP response.
        #
        # This is a performance improvement that works *only if* the
        # DocumentParser.can_parse call is *fast*, which means that the
        # `can_parse` implementations of each parser needs to be fast
        #
        # It doesn't matter if we say "yes" here and then parsing exceptions
        # appear later, that should be a 1 / 10000 calls and we would still
        # be gaining a lot of performance
        #
        if not self.can_parse(http_response):
            self._log_return_empty(http_response, 'No parser available')
            return []

        args = '%r%r' % (tags, yield_text)
        hash_string = get_body_unique_id(http_response, prepend=args)

        if hash_string in self._parser_blacklist:
            self._log_return_empty(http_response, 'HTTP response is blacklisted')
            return []

        #
        # We know that we can parse this document, lets work!
        #
        parser_finished = self._parser_finished_events.get(hash_string, None)
        if parser_finished is not None:
            # There is one subprocess already processing this http response
            # body, the best thing to do here is to make this thread wait
            # until that process has finished
            try:
                parser_finished.wait(timeout=mp_doc_parser.PARSER_TIMEOUT)
            except:
                # Act just like when there is no parser
                self._log_return_empty(http_response, 'Timeout waiting for response')
                return []

        # metric increase
        self.inc_query_count()

        parser = self._cache.get(hash_string, None)
        if parser is not None:
            self._handle_cache_hit(hash_string)
            return parser
        else:
            # Not in cache, have to work.
            self._handle_cache_miss(hash_string)

            # Create a new instance of DocumentParser, add it to the cache
            event = threading.Event()
            self._parser_finished_events[hash_string] = event

            try:
                tags = mp_doc_parser.get_tags_by_filter(http_response,
                                                        tags,
                                                        yield_text=yield_text)
            except TimeoutError:
                # We failed to get a parser for this HTTP response, we better
                # ban this HTTP response so we don't waste more CPU cycles trying
                # to parse it over and over.
                self.add_to_blacklist(hash_string)

                # Act just like when there is no parser
                self._log_return_empty(http_response, 'Timeout waiting for get_tags_by_filter()')
                return []
            except MemoryError:
                # We failed to get a parser for this HTTP response, we better
                # ban this HTTP response so we don't waste more CPU cycles or
                # memory trying to parse it over and over.
                self.add_to_blacklist(hash_string)

                # Act just like when there is no parser
                self._log_return_empty(http_response, 'Reached memory usage limit')
                return []
            except ScanMustStopException, e:
                msg = 'The document parser is in an invalid state! %s'
                raise ScanMustStopException(msg % e)
            except Exception, e:
                # Act just like when there is no parser
                msg = 'Unhandled exception running get_tags_by_filter("%s"): %s'
                args = (http_response.get_url(), e)
                raise BaseFrameworkException(msg % args)