Ejemplo n.º 1
0
    def matches(self, query):
        """
        This method is used to check if the `query` HTTP response body matches
        the failed login page instance.

        :param query: An HTTP response body
        :return: True if the `query` response body is equal to the failed login
                 bodies which were received in __init__().
        """
        if self.body_a == query:
            return True

        if self.body_b == query:
            return True

        if not fuzzy_equal(self.body_a, query, 0.60):
            # They are really different, no need to calculate diff()
            return False

        if self.diff_a_b is None:
            self.diff_a_b, _ = diff(self.body_a, self.body_b)

        _, diff_query_a = diff(self.body_a, query)

        # Had to add this in order to prevent issues with CSRF tokens, which
        # might be part of the HTTP response body, are random (not removed by
        # clean_body) and will "break" the diff
        if len(diff_query_a) < 64:
            return True

        if fuzzy_equal(self.diff_a_b, diff_query_a, 0.9):
            return True

        return False
Ejemplo n.º 2
0
    def test_xml(self):
        """
        Comment the @SkipTest and then run:

        nosetests --with-timer -s -v -x w3af/core/controllers/misc/tests/test_diff.py
        """
        a = file(os.path.join(self.DATA, 'source.xml')).read()
        b = file(os.path.join(self.DATA, 'target.xml')).read()

        # This takes ~2.5 seconds on my workstation
        diff(a, b)
Ejemplo n.º 3
0
    def test_diff_large_different_responses(self):
        """
        Same here, this test took 8 seconds to run, and now it takes 0.4704s!
        """
        large_file_1 = ''
        large_file_2 = ''
        _max = 10000

        for i in xrange(_max):
            large_file_1 += 'A' * i
            large_file_1 += '\n'

        for i in xrange(_max):
            if i == _max - 3:
                large_file_2 += 'B' * i
            else:
                large_file_2 += 'A' * i

            large_file_2 += '\n'

        start = time.time()

        body1, body2 = diff(large_file_1, large_file_2)

        spent = time.time() - start
        self.assertGreater(1.0, spent)

        self.assertEqual(body1, 'A' * (_max - 3))
        self.assertEqual(body2, 'B' * (_max - 3))
Ejemplo n.º 4
0
    def test_xml(self):
        """
        Before using https://pypi.org/project/diff-match-patch/ this test took
        around 2 seconds to run. Now it only takes 0.0056 sec!

        nosetests --with-timer -s -v -x w3af/core/controllers/misc/tests/test_diff.py
        """
        a = file(os.path.join(self.DATA, 'source.xml')).read()
        b = file(os.path.join(self.DATA, 'target.xml')).read()

        start = time.time()

        diff(a, b)

        spent = time.time() - start
        self.assertGreater(1.0, spent)
Ejemplo n.º 5
0
    def test_diff_large_different_responses(self):
        """
        Same here, this test took 8 seconds to run, and now it takes 0.4704s!
        """
        large_file_1 = ''
        large_file_2 = ''
        _max = 10000

        for i in xrange(_max):
            large_file_1 += 'A' * i
            large_file_1 += '\n'

        for i in xrange(_max):
            if i == _max - 3:
                large_file_2 += 'B' * i
            else:
                large_file_2 += 'A' * i

            large_file_2 += '\n'

        start = time.time()

        body1, body2 = diff(large_file_1, large_file_2)

        spent = time.time() - start
        self.assertGreater(1.0, spent)

        self.assertEqual(body1, 'A' * (_max - 3))
        self.assertEqual(body2, 'B' * (_max - 3))
Ejemplo n.º 6
0
    def test_xml(self):
        """
        Before using https://pypi.org/project/diff-match-patch/ this test took
        around 2 seconds to run. Now it only takes 0.0056 sec!

        nosetests --with-timer -s -v -x w3af/core/controllers/misc/tests/test_diff.py
        """
        a = file(os.path.join(self.DATA, 'source.xml')).read()
        b = file(os.path.join(self.DATA, 'target.xml')).read()

        start = time.time()

        diff(a, b)

        spent = time.time() - start
        self.assertGreater(15.0, spent)
Ejemplo n.º 7
0
    def equal_with_limit(self, body1, body2, compare_diff=False):
        """
        Determines if two pages are equal using a ratio, if compare_diff is set
        then we just compare the parts of the response bodies which are different.
        """
        if compare_diff:
            body1, body2 = diff(body1, body2)

        cmp_res = fuzzy_equal(body1, body2, self._eq_limit)
        return cmp_res
Ejemplo n.º 8
0
    def equal_with_limit(self, body1, body2, compare_diff=False):
        """
        Determines if two pages are equal using a ratio.
        """
        if compare_diff:
            body1, body2 = diff(body1, body2)

        cmp_res = relative_distance_boolean(body1, body2, self._eq_limit)
        self.debug('Result: %s' % cmp_res)

        return cmp_res
Ejemplo n.º 9
0
    def equal_with_limit(self, body1, body2, compare_diff=False):
        """
        Determines if two pages are equal using a ratio.
        """
        if compare_diff:
            body1, body2 = diff(body1, body2)

        cmp_res = relative_distance_boolean(body1, body2, self._eq_limit)
        self.debug("Result: %s" % cmp_res)

        return cmp_res
Ejemplo n.º 10
0
    def equal_with_limit(self, body1, body2, compare_diff=False):
        """
        Determines if two pages are equal using a ratio.
        """
        if compare_diff:
            body1, body2 = diff(body1, body2)

        cmp_res = relative_distance_boolean(body1, body2, self._eq_limit)

        args = (self._eq_limit, cmp_res)
        self.debug('Strings are similar enough with limit %s? %s' % args, None)

        return cmp_res
Ejemplo n.º 11
0
    def equal_with_limit(self, body1, body2, compare_diff=False):
        """
        Determines if two pages are equal using a ratio.
        """
        if compare_diff:
            body1, body2 = diff(body1, body2)

        cmp_res = relative_distance_boolean(body1, body2, self._eq_limit)

        args = (self._eq_limit, cmp_res)
        self.debug('Strings are similar enough with limit %s? %s' % args, None)

        return cmp_res
Ejemplo n.º 12
0
    def test_large_equal_responses(self):
        large_file = ''

        for i in xrange(10000):
            large_file += 'A' * i
            large_file += '\n'

        start = time.time()

        body1, body2 = diff(large_file, large_file)

        self.assertEqual(body1, '')
        self.assertEqual(body2, '')

        spent = time.time() - start
        self.assertGreater(1.0, spent)
Ejemplo n.º 13
0
    def test_large_equal_responses(self):
        large_file = ''

        for i in xrange(10000):
            large_file += 'A' * i
            large_file += '\n'

        start = time.time()

        body1, body2 = diff(large_file, large_file)

        self.assertEqual(body1, '')
        self.assertEqual(body2, '')

        spent = time.time() - start
        self.assertGreater(1.0, spent)
Ejemplo n.º 14
0
    def equal_with_limit(self, body1, body2, compare_diff=False):
        """
        Determines if two pages are equal using a ratio.
        """
        start = time.time()

        if compare_diff:
            body1, body2 = diff(body1, body2)

        cmp_res = relative_distance_boolean(body1, body2, self._eq_limit)

        are = 'ARE' if cmp_res else 'ARE NOT'
        args = (are, self._eq_limit)
        self.debug('Strings %s similar enough (limit: %s)' % args)

        spent = time.time() - start
        self.debug('Took %.2f seconds to run equal_with_limit' % spent)

        return cmp_res
Ejemplo n.º 15
0
    def equal_with_limit(self, body1, body2, compare_diff=False):
        """
        Determines if two pages are equal using a ratio.
        """
        start = time.time()

        if compare_diff:
            body1, body2 = diff(body1, body2)

        cmp_res = relative_distance_boolean(body1, body2, self._eq_limit)

        are = 'ARE' if cmp_res else 'ARE NOT'
        args = (are, self._eq_limit)
        self.debug('Strings %s similar enough (limit: %s)' % args)

        spent = time.time() - start
        self.debug('Took %.2f seconds to run equal_with_limit' % spent)

        return cmp_res
Ejemplo n.º 16
0
 def test_special_chars(self):
     a = 'X\tB\nC'
     b = 'A<B\nC'
     self.assertEqual(diff(a, b), ('X\t', 'A<'))
Ejemplo n.º 17
0
 def test_start(self):
     a = 'X\nB\nC'
     b = 'A\nB\nC'
     self.assertEqual(diff(a, b), ('X', 'A'))
Ejemplo n.º 18
0
 def test_start(self):
     self.assertEqual(diff('yes 123abc', 'no 123abc'), ('yes', 'no'))
Ejemplo n.º 19
0
 def test_all_no_sep(self):
     a = 'ABC'
     b = 'AXC'
     self.assertEqual(diff(a, b), ('B', 'X'))
Ejemplo n.º 20
0
 def test_nono(self):
     self.assertEqual(diff('123abc yes', 'no 123abc no'),
                      ('yes', 'no no'))
Ejemplo n.º 21
0
    def _handle_large_http_responses(self, http_response, query, known_404, debugging_id):
        """
        When HTTP response bodies are large the fuzzy_equal() will generate
        404 false positives. This is explained in a comment above,
        (search for "{header-4000bytes}").

        This method will handle that case by using three HTTP responses instead
        of two (which is the most common case). The three HTTP responses used
        by this method are:

            * known_404: The forced 404 generated by this class
            * query:  The HTTP response we want to know if it is a 404
            * Another forced 404 generated by this method

        The method will diff the two 404 responses, and one 404 response with
        the query response, then compare using fuzzy_equal() to determine if the
        query is a 404.

        :return: True if the query response is a 404!
        """
        # Make the algorithm easier to read
        known_404_1 = known_404

        if known_404_1.diff is not None:
            # At some point during the execution of this scan we already sent
            # an HTTP request to use in this process and calculated the diff
            #
            # In order to prevent more HTTP requests from being sent to the
            # server, and also to reduce CPU usage, we saved the diff as an
            # attribute.
            pass
        else:
            # Need to send the second request and calculate the diff, there is
            # no previous knowledge that we can use
            #
            # Send exclude=[known_404_1.url] to prevent the function from sending
            # an HTTP request to the same forced 404 URL
            known_404_2 = send_request_generate_404(self._uri_opener,
                                                    http_response,
                                                    debugging_id,
                                                    exclude=[known_404_1.url])

            known_404_1.diff, _ = diff(known_404_1.body, known_404_2.body)
            known_404_1.diff_with_id = known_404_2.id
            self._404_responses[query.normalized_path] = known_404_1

        diff_x = known_404_1.diff
        _, diff_y = diff(known_404_1.body, query.body)

        is_fuzzy_equal = fuzzy_equal_for_diff(diff_x, diff_y, IS_EQUAL_RATIO)

        if not is_fuzzy_equal:
            msg = ('"%s" (id:%s, code:%s, len:%s, did:%s) is NOT a 404'
                   ' [similarity_ratio < %s with diff of 404]'
                   ' [Request IDs: %s]')
            args = (http_response.get_url(),
                    http_response.id,
                    http_response.get_code(),
                    len(http_response.get_body()),
                    debugging_id,
                    IS_EQUAL_RATIO,
                    ', '.join([str(http_response.id),
                               str(known_404_1.id),
                               str(known_404_1.diff_with_id)]))
            om.out.debug(msg % args)
            return False

        msg = ('"%s" (id:%s, code:%s, len:%s, did:%s) is a 404'
               ' [similarity_ratio > %s with diff of 404]'
               ' [Request IDs: %s]')
        args = (http_response.get_url(),
                http_response.id,
                http_response.get_code(),
                len(http_response.get_body()),
                debugging_id,
                IS_EQUAL_RATIO,
                ', '.join([str(http_response.id),
                           str(known_404_1.id),
                           str(known_404_1.diff_with_id)]))
        om.out.debug(msg % args)
        return True
Ejemplo n.º 22
0
 def test_middle(self):
     a = 'A\nB\nC'
     b = 'A\nX\nC'
     self.assertEqual(diff(a, b), ('B', 'X'))
Ejemplo n.º 23
0
 def test_empty(self):
     self.assertEqual(diff('', ''), ('', ''))
Ejemplo n.º 24
0
    def _handle_large_http_responses(self, http_response, query, known_404,
                                     debugging_id):
        """
        When HTTP response bodies are large the fuzzy_equal() will generate
        404 false positives. This is explained in a comment above,
        (search for "{header-4000bytes}").

        This method will handle that case by using three HTTP responses instead
        of two (which is the most common case). The three HTTP responses used
        by this method are:

            * known_404: The forced 404 generated by this class
            * query:  The HTTP response we want to know if it is a 404
            * Another forced 404 generated by this method

        The method will diff the two 404 responses, and one 404 response with
        the query response, then compare using fuzzy_equal() to determine if the
        query is a 404.

        :return: True if the query response is a 404!
        """
        # Make the algorithm easier to read
        known_404_1 = known_404

        if known_404_1.diff is not None:
            # At some point during the execution of this scan we already sent
            # an HTTP request to use in this process and calculated the diff
            #
            # In order to prevent more HTTP requests from being sent to the
            # server, and also to reduce CPU usage, we saved the diff as an
            # attribute.
            pass
        else:
            # Need to send the second request and calculate the diff, there is
            # no previous knowledge that we can use
            #
            # Send exclude=[known_404_1.url] to prevent the function from sending
            # an HTTP request to the same forced 404 URL
            known_404_2 = send_request_generate_404(self._uri_opener,
                                                    http_response,
                                                    debugging_id,
                                                    exclude=[known_404_1.url])

            known_404_1.diff, _ = diff(known_404_1.body, known_404_2.body)
            known_404_1.diff_with_id = known_404_2.id
            self._404_responses[query.normalized_path] = known_404_1

        diff_x = known_404_1.diff
        _, diff_y = diff(known_404_1.body, query.body)

        is_fuzzy_equal = fuzzy_equal_for_diff(diff_x, diff_y, IS_EQUAL_RATIO)

        if not is_fuzzy_equal:
            msg = ('"%s" (id:%s, code:%s, len:%s, did:%s) is NOT a 404'
                   ' [similarity_ratio < %s with diff of 404]'
                   ' [Request IDs: %s]')
            args = (http_response.get_url(), http_response.id,
                    http_response.get_code(), len(http_response.get_body()),
                    debugging_id, IS_EQUAL_RATIO, ', '.join([
                        str(http_response.id),
                        str(known_404_1.id),
                        str(known_404_1.diff_with_id)
                    ]))
            om.out.debug(msg % args)
            return False

        msg = ('"%s" (id:%s, code:%s, len:%s, did:%s) is a 404'
               ' [similarity_ratio > %s with diff of 404]'
               ' [Request IDs: %s]')
        args = (http_response.get_url(), http_response.id,
                http_response.get_code(), len(http_response.get_body()),
                debugging_id, IS_EQUAL_RATIO, ', '.join([
                    str(http_response.id),
                    str(known_404_1.id),
                    str(known_404_1.diff_with_id)
                ]))
        om.out.debug(msg % args)
        return True
Ejemplo n.º 25
0
 def test_start(self):
     a = 'X\nB\nC'
     b = 'A\nB\nC'
     self.assertEqual(diff(a, b), ('X', 'A'))
Ejemplo n.º 26
0
 def test_nono(self):
     self.assertEqual(diff('123abc yes', 'no 123abc no'), ('yes', 'no no'))
Ejemplo n.º 27
0
 def test_middle(self):
     self.assertEqual(diff('123456', '123a56'),
                      ('4', 'a'))
Ejemplo n.º 28
0
 def test_start(self):
     self.assertEqual(diff('yes 123abc', 'no 123abc'),
                      ('yes', 'no'))
Ejemplo n.º 29
0
 def test_special_chars(self):
     a = 'X\tB\nC'
     b = 'A<B\nC'
     self.assertEqual(diff(a, b), ('X\t', 'A<'))
Ejemplo n.º 30
0
 def test_middle_not_aligned(self):
     a = 'A\nB\nC'
     b = 'A\nXY\nC'
     self.assertEqual(diff(a, b), ('B', 'XY'))
Ejemplo n.º 31
0
 def test_middle_not_aligned(self):
     a = 'A\nB\nC'
     b = 'A\nXY\nC'
     self.assertEqual(diff(a, b), ('B', 'XY'))
Ejemplo n.º 32
0
 def test_middle(self):
     a = 'A\nB\nC'
     b = 'A\nX\nC'
     self.assertEqual(diff(a, b), ('B', 'X'))
Ejemplo n.º 33
0
 def test_empty(self):
     self.assertEqual(diff('', ''), ('', ''))
Ejemplo n.º 34
0
 def test_all_no_sep(self):
     a = 'ABC'
     b = 'AXC'
     self.assertEqual(diff(a, b), ('B', 'X'))
Ejemplo n.º 35
0
 def test_middle(self):
     self.assertEqual(diff('123456', '123a56'), ('4', 'a'))