예제 #1
0
    def test_content_use_encoding_in_mimetype_if_present(self):
        raw_bytes = "abcdé".encode("iso-8859-1")
        response = Response(200,
                            {"content-type": "text/html; charset=iso-8859-1"})
        response.set_content(raw_bytes, True)

        self.assertEqual(response.content, raw_bytes.decode("iso-8859-1"))
예제 #2
0
    def test_partial_content_raise_unicode_decode_error_if_error_not_caused_by_truncated_content(
            self):
        raw_bytes = b"\x80" * 10  # Not an encoded utf-8 string
        response = Response(200, {})
        response.set_content(raw_bytes, False)

        with self.assertRaises(UnicodeDecodeError):
            response.partial_content
예제 #3
0
    def test_content_find_content_encoding_if_utf8_fails(self):
        raw_bytes = '<html><head>' \
                    '<meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1">' \
                    '</head><body>abcdé</body></html>'.encode("iso-8859-1")
        response = Response(200, {})
        response.set_content(raw_bytes, True)

        self.assertEqual(response.content, raw_bytes.decode("iso-8859-1"))
예제 #4
0
    def test_partial_content_ignore_truncated_utf8_characters_when_decoding(
            self):
        raw_bytes = "abcdé".encode("utf-8")[:
                                            -1]  # 'é' 2nd byte will be missing
        response = Response(200, {})
        response.set_content(raw_bytes, False)

        self.assertEqual(response.partial_content, "abcd")
예제 #5
0
    async def test_add_hash_of_raw_content_if_response_content_of_sample_is_not_text(self):
        bytes = b'Invalid UTF8 x\x80Z"'
        sample_response = Response(200, {})
        sample_response.set_content(bytes, True)
        self.engine.mock.perform_high_priority.return_value = sample_response

        await self.filter.after_response(Entry.create("http://example.com/?wsdl",
                                                      response=StaticResponse(200, {}, "123")))

        self.assertEqual(self.kb.query_samples["example.com/"], {"md5": hashlib.md5(bytes).digest()})
    async def test_add_hash_of_raw_content_if_response_content_of_sample_is_not_text(self):
        bytes = b'Invalid UTF8 x\x80Z"'
        sample_response = Response(200, {})
        sample_response.set_content(bytes, True)
        self.engine.mock.perform_high_priority.return_value = sample_response

        await self.filter.after_response(Entry.create("http://example.com/?wsdl",
                                                      response=StaticResponse(200, {}, "123")))

        self.assertEqual(self.kb.query_samples["example.com/"], {"md5": hashlib.md5(bytes).digest()})
예제 #7
0
    async def test_content_that_is_not_text_never_match_content_simhash_of_sample(self):
        raw = b'Invalid UTF8 x\x80Z"'
        response = Response(200, {})
        response.set_content(raw, True)
        hash = self.filter._hash_response(StaticResponse(200, {}, "content"))
        self.kb.query_samples["example.com/"] = {"simhash": hash}

        with patch("tachyon.heuristics.rejectignoredquery.Simhash") as Simhash:
            await self.filter.after_response(Entry.create("http://example.com/?wsdl", response=response))

            Simhash.assert_not_called()
    def test_dont_match_if_simhash_in_knowledge_base_but_response_content_is_not_text(
            self):
        raw = b'x\x80Z"\x1a\x98\x8ey\xef?B\xd7\xc5\xbf\xd4\x18'
        response = Response(200, {})
        response.set_content(raw, True)

        self.assertFalse(
            self.rule._match(response, {
                "code": 200,
                "content_simhash": 12345
            }))
예제 #9
0
    def test_content_dont_decode_twice_if_utf8_is_invalid(self):
        raw_bytes = MagicMock()
        raw_bytes.decode.side_effect = UnicodeDecodeError(
            "utf-8", b"abc", 1, 2, "reason")
        response = Response(200, {"content-type": "text/html; charset=utf-8"})
        response.set_content(raw_bytes, at_eof=True)

        with self.assertRaises(UnicodeDecodeError):
            response.content

        raw_bytes.decode.assert_called_once_with("utf-8")
예제 #10
0
    async def test_content_that_is_not_text_never_match_content_simhash_of_sample(self):
        raw = b'Invalid UTF8 x\x80Z"'
        response = Response(200, {})
        response.set_content(raw, True)
        hash = self.filter._hash_response(StaticResponse(200, {}, "content"))
        self.kb.query_samples["example.com/"] = {"simhash": hash}

        with patch("tachyon.heuristics.rejectignoredquery.Simhash") as Simhash:
            await self.filter.after_response(Entry.create("http://example.com/?wsdl", response=response))

            Simhash.assert_not_called()
    async def test_compare_hash_of_raw_content_if_raw_content_hash_in_knowledge_base(
            self):
        raw = b'Invalid UTF8 x\x80Z"'
        _hash = hashlib.md5(raw).digest()
        self.kb.soft_404_responses["http://example.com/"][
            "/\l"] = ContentSignature(code=200, content_hash=_hash)
        self.rule.performed["http://example.com/"] = {"/\l": None}
        response = Response(200, {})
        response.set_content(raw, True)
        entry = Entry.create("http://example.com/test", response=response)

        await self.runner.perform_ok(entry)

        self.assertTrue(entry.result.soft404)
    async def test_compare_hash_of_raw_content_if_no_simhash_in_knowledge_base(
            self):
        raw = b'x\x80Z"\x1a\x98\x8ey\xef?B\xd7\xc5\xbf\xd4\x18'
        _hash = hashlib.md5(raw).digest()
        self.kb.soft_404_responses["http://example.com/"]["/\l"] = {
            "code": 200,
            "raw_content_hash": _hash
        }
        self.rule.performed["http://example.com/"] = {"/\l": None}
        response = Response(200, {})
        response.set_content(raw, True)

        with self.assertRaises(RejectRequest):
            await self.rule.after_response(
                Entry.create("http://example.com/test", response=response))
    async def test_add_hash_of_raw_content_if_response_content_of_sample_is_not_text(
            self):
        bytes = b'Invalid UTF8 x\x80Z"'
        sample_response = Response(200, {})
        sample_response.set_content(bytes, True)
        self.engine.response = sample_response

        await self.runner.perform_ok(
            self.create_entry("http://example.com/test",
                              response_content="response"))

        self.assertEqual(
            self.kb.soft_404_responses["http://example.com/"], {
                "/\l": [
                    ContentSignature(code=200,
                                     content_hash=hashlib.md5(bytes).digest(),
                                     content_sample=ANY)
                ]
            })
    async def test_mutliple_values_for_sample_url(self):
        a = Response(404, {})
        a.set_content(b"a", True)
        b = Response(200, {})
        b.set_content(b"response", True)
        self.engine.response = [a, b]

        entry = self.create_entry("http://example.com/test",
                                  response_content="response")
        self.rule.confirmation_factor = 5
        await self.runner.perform_ok(entry)

        self.assertTrue(entry.result.soft404)
    async def test_add_hash_of_raw_content_if_response_content_is_not_text(
            self):
        response = Response(200, {})
        response.set_content(b'x\x80Z"\x1a\x98\x8ey\xef?B\xd7\xc5\xbf\xd4\x18',
                             True)
        self.engine.response = response
        await self.rule.after_response(
            self.create_entry("http://example.com/test",
                              response_content="response"))

        self.assertEqual(
            self.kb.soft_404_responses["http://example.com/"], {
                "/\l": {
                    "code":
                    200,
                    "raw_content_hash":
                    hashlib.md5(
                        b'x\x80Z"\x1a\x98\x8ey\xef?B\xd7\xc5\xbf\xd4\x18').
                    digest()
                }
            })
예제 #16
0
    def test_partial_content_return_decoded_truncated_raw_bytes(self):
        raw_bytes = b"abcdefg"
        response = Response(200, {})
        response.set_content(raw_bytes, False)

        self.assertEqual(response.partial_content, "abcdefg")