def generic_fuzzy_string_diff_runner_against_200(self, fuzzy_func, ratio):
        """
        Generic runner for fuzzy string diff, choose the first five 200
        responses, match it against all the other. None should match.
        """
        MAX_TESTS = 5
        failed_domains = set()
        total = 0
        run_tests = 0
        start = time.time()

        for domain_base, (base, _) in self.not_exists_data.iteritems():
            base = self._gunzip(base)

            if run_tests == MAX_TESTS:
                break

            run_tests += 1

            base_resp = self._create_http_response(domain_base, base, False)
            clean_body_base = get_clean_body(base_resp)

            for domain_test, (ok, _) in self.not_exists_data.iteritems():
                total += 1
                ok = self._gunzip(ok)

                # Skip myself
                if base == ok:
                    continue

                ok_resp = self._create_http_response(domain_test, ok, False)
                clean_body_ok = get_clean_body(ok_resp)

                if fuzzy_func(clean_body_base, clean_body_ok, ratio):
                    failed_domains.add((domain_base, domain_test))

        end = time.time()

        perc_fail = len(failed_domains) / total
        func_name = fuzzy_func.__name__

        print('%s fail rate: %s' % (func_name, perc_fail))
        print('Total time: %ss' % (end - start))
        print('Analyzed samples: %s' % total)

        output = '/tmp/%s.txt' % func_name
        output_fh = file(output, 'w')
        for domain_a, domain_b in sorted(failed_domains):
            output_fh.write('%s - %s\n' % (domain_a, domain_b))

        print('Failed domains stored at %s' % output)
    def generic_fuzzy_string_diff_runner_against_200(self, fuzzy_func, ratio):
        """
        Generic runner for fuzzy string diff, choose the first five 200
        responses, match it against all the other. None should match.
        """
        MAX_TESTS = 5
        failed_domains = set()
        total = 0
        run_tests = 0
        start = time.time()

        for domain_base, (base, _) in self.not_exists_data.iteritems():
            base = self._gunzip(base)

            if run_tests == MAX_TESTS:
                break

            run_tests += 1

            base_resp = self._create_http_response(domain_base, base, False)
            clean_body_base = get_clean_body(base_resp)

            for domain_test, (ok, _) in self.not_exists_data.iteritems():
                total += 1
                ok = self._gunzip(ok)

                # Skip myself
                if base == ok:
                    continue

                ok_resp = self._create_http_response(domain_test, ok, False)
                clean_body_ok = get_clean_body(ok_resp)

                if fuzzy_func(clean_body_base, clean_body_ok, ratio):
                    failed_domains.add((domain_base, domain_test))

        end = time.time()

        perc_fail = len(failed_domains) / total
        func_name = fuzzy_func.__name__

        print('%s fail rate: %s' % (func_name, perc_fail))
        print('Total time: %ss' % (end-start))
        print('Analyzed samples: %s' % total)

        output = '/tmp/%s.txt' % func_name
        output_fh = file(output, 'w')
        for domain_a, domain_b in sorted(failed_domains):
            output_fh.write('%s - %s\n' % (domain_a, domain_b))

        print('Failed domains stored at %s' % output)
Exemple #3
0
    def test_get_clean_body_14956(self):
        """
        Trying to fix issue 14956
        https://github.com/andresriancho/w3af/issues/14956
        """
        url = URL('http://w3af.org/install.php?mode=phpinfo')
        headers = Headers([('Content-Type', 'text/html')])

        # Note that the redirect changes the protocol, which is probably why the
        # get_clean_body wasn't removing the URL from the body
        #
        # Also, after this URL is not removed
        body = (
            '<head><title>Document Moved</title></head>'
            '<body><h1>Object Moved</h1>This document may be found '
            '<a HREF="https://w3af.org/install.php?mode=phpinfo">here</a></body>'
        )

        resp = HTTPResponse(200, body, headers, url, url)

        clean_body = get_clean_body(resp)

        ebody = ('<head><title>Document Moved</title></head>'
                 '<body><h1>Object Moved</h1>This document may be found '
                 '<a HREF="">here</a></body>')
        self.assertEqual(clean_body, ebody)
    def generic_fuzzy_string_diff_runner_noise(self, fuzzy_func, ratio):
        """
        Generic runner for fuzzy string diff which adds noise to the cmp
        """
        failed_domains = set()
        total = 0
        start = time.time()

        for domain, (ok, _) in self.not_exists_data.iteritems():
            total += 1

            ok = self._gunzip(ok)

            ok_resp = self._create_http_response(domain, ok, False)
            ok_with_noise = self._add_noise_to_str(ok, 10, 12)
            # print ok_with_noise
            # break
            ok_noise_resp = self._create_http_response(domain, ok_with_noise,
                                                       True)

            clean_body_ok = get_clean_body(ok_resp)
            clean_body_noise = get_clean_body(ok_noise_resp)

            if not fuzzy_func(clean_body_noise, clean_body_ok, ratio):
                failed_domains.add(domain)

        end = time.time()

        perc_fail = len(failed_domains) / total
        func_name = fuzzy_func.__name__

        print('%s fail rate: %s' % (func_name, perc_fail))
        print('Total time: %ss' % (end - start))
        print('Analyzed samples: %s' % total)

        output = '/tmp/%s.txt' % func_name
        output_fh = file(output, 'w')
        for domain in sorted(failed_domains):
            output_fh.write('%s\n' % domain)

        print('Failed domains stored at %s' % output)
    def generic_fuzzy_string_diff_runner_noise(self, fuzzy_func, ratio):
        """
        Generic runner for fuzzy string diff which adds noise to the cmp
        """
        failed_domains = set()
        total = 0
        start = time.time()

        for domain, (ok, _) in self.not_exists_data.iteritems():
            total += 1

            ok = self._gunzip(ok)

            ok_resp = self._create_http_response(domain, ok, False)
            ok_with_noise = self._add_noise_to_str(ok, 10, 12)
            #print ok_with_noise
            #break
            ok_noise_resp = self._create_http_response(domain, ok_with_noise,
                                                       True)

            clean_body_ok = get_clean_body(ok_resp)
            clean_body_noise = get_clean_body(ok_noise_resp)

            if not fuzzy_func(clean_body_noise, clean_body_ok, ratio):
                failed_domains.add(domain)

        end = time.time()

        perc_fail = len(failed_domains) / total
        func_name = fuzzy_func.__name__

        print('%s fail rate: %s' % (func_name, perc_fail))
        print('Total time: %ss' % (end-start))
        print('Analyzed samples: %s' % total)

        output = '/tmp/%s.txt' % func_name
        output_fh = file(output, 'w')
        for domain in sorted(failed_domains):
            output_fh.write('%s\n' % domain)

        print('Failed domains stored at %s' % output)
    def test_get_clean_body_14955(self):
        """
        Trying to fix issue 14955
        https://github.com/andresriancho/w3af/issues/14955
        """
        url = URL('http://w3af.org/.git/.git/.git/index')
        headers = Headers([('Content-Type', 'text/html')])

        body = ('<head><title>Document Moved</title></head>'
                '<body><h1>Object Moved</h1>This document may be found '
                '<a HREF="http://w3af.org/.git/.git/.git/index/">here</a></body>')

        resp = HTTPResponse(200, body, headers, url, url)

        clean_body = get_clean_body(resp)

        ebody = ('<head><title>Document Moved</title></head>'
                 '<body><h1>Object Moved</h1>This document may be found '
                 '<a HREF="/">here</a></body>')
        self.assertEqual(clean_body, ebody)
Exemple #7
0
    def test_get_clean_body_14955(self):
        """
        Trying to fix issue 14955
        https://github.com/andresriancho/w3af/issues/14955
        """
        url = URL('http://w3af.org/.git/.git/.git/index')
        headers = Headers([('Content-Type', 'text/html')])

        body = (
            '<head><title>Document Moved</title></head>'
            '<body><h1>Object Moved</h1>This document may be found '
            '<a HREF="http://w3af.org/.git/.git/.git/index/">here</a></body>')

        resp = HTTPResponse(200, body, headers, url, url)

        clean_body = get_clean_body(resp)

        ebody = ('<head><title>Document Moved</title></head>'
                 '<body><h1>Object Moved</h1>This document may be found '
                 '<a HREF="/">here</a></body>')
        self.assertEqual(clean_body, ebody)
    def test_get_clean_body_14956(self):
        """
        Trying to fix issue 14956
        https://github.com/andresriancho/w3af/issues/14956
        """
        url = URL('http://w3af.org/install.php?mode=phpinfo')
        headers = Headers([('Content-Type', 'text/html')])

        # Note that the redirect changes the protocol, which is probably why the
        # get_clean_body wasn't removing the URL from the body
        #
        # Also, after this URL is not removed
        body = ('<head><title>Document Moved</title></head>'
                '<body><h1>Object Moved</h1>This document may be found '
                '<a HREF="https://w3af.org/install.php?mode=phpinfo">here</a></body>')

        resp = HTTPResponse(200, body, headers, url, url)

        clean_body = get_clean_body(resp)

        ebody = ('<head><title>Document Moved</title></head>'
                 '<body><h1>Object Moved</h1>This document may be found '
                 '<a HREF="">here</a></body>')
        self.assertEqual(clean_body, ebody)