def test_expand_redirects(self): entry = Entry.create("http://example.com/", response=StaticResponse(code=200, content="hello", headers={})) entry.result.redirects = [ Entry.create("http://example.com/", response=StaticResponse(code=302, headers={ "Location": "http://example.com/a/", })), Entry.create("http://example.com/a/", response=StaticResponse(code=302, headers={ "Location": "http://example.com/b/", })), Entry.create("http://example.com/b/", response=StaticResponse(code=200, content="hello", headers={})), ] har = self.conv.convert_entries([entry]) self.assertEqual([ "http://example.com/", "http://example.com/a/", "http://example.com/b/" ], [e.request.url for e in har.entries])
async def test_on_request_successful_set_final_response_as_entry_response(self): final_response = Response(code=200, headers={}) final_response.set_content(b"data", at_eof=True) self.engine.response = final_response await self.rule.on_request_successful(self.entry) self.assertEqual(self.entry.response, final_response)
async def test_reject_nothing(self): r = RejectStatusCode() await r.after_headers( Entry.create("http://example.om/test", response=StaticResponse(200, {}))) await r.after_headers( Entry.create("http://example.om/test", response=StaticResponse(302, {})))
async def test_on_request_successful_increment_stats_for_each_redirect(self): final_response = Response(code=200, headers={}) final_response.set_content(b"response content", at_eof=True) self.engine.response = final_response await self.rule.on_request_successful(self.entry) self.assertEqual(self.engine.stats.requested, 2) self.assertEqual(self.engine.stats.completed, 1)
async def test_after_response_reject_request_if_simhash_of_response_content_equals_sample_simhash(self): self.kb.query_samples["example.com/"] = {"simhash": Simhash("response content").value} entry = Entry.create("http://example.com/?wsdl", response=StaticResponse(200, {}, "response content")) slightly_different_response = Entry.create("http://example.com/?wsdl", response=StaticResponse(200, {}, "response-content")) with self.assertRaises(RejectRequest): await self.filter.after_response(entry) with self.assertRaises(RejectRequest): await self.filter.after_response(slightly_different_response)
async def test_after_response_take_a_sample_with_a_junk_query(self): response = StaticResponse(200, {}, "content") entry = Entry.create("http://example.com/?wsdl", response=StaticResponse(200, {}, "not same content")) self.engine.mock.perform_high_priority.return_value = response with patch("tachyon.heuristics.rejectignoredquery.uuid4", MagicMock(return_value="random-uuid-abc123")): await self.filter.after_response(entry) self.engine.mock.perform_high_priority.assert_called_once_with( Entry.create("http://example.com/?random-uuid-abc123", response=response), self.filter.child_heuristics)
async def test_match_bytes_with_binary_response(self): file_to_fetch = create_json_data(["file"], match_bytes="0102030405060708090a0b0c0d0e0f10")[0] match_string = MatchString() response = StaticResponse(200, {}) response.raw = b'\x00\x01\x02\x03\x04\x05\x06\x07\x08\t\n\x0b\x0c\r\x0e\x0f\x10\x11\x12\x13\x14\x15' entry = Entry.create("http://example.com/file", arguments={"file": file_to_fetch}, response=response) await match_string.after_response(entry) self.assertTrue(entry.result.string_match)
async def test_on_request_successful_store_intermediate_entry_in_result(self): response = copy(self.response) final_response = Response(code=200, headers={}) final_response.set_content(b"response content", at_eof=True) self.engine.response = final_response await self.rule.on_request_successful(self.entry) expected = [Entry.create(self.entry.request.url, method=self.entry.request.method, headers=self.entry.request.headers, response=response), Entry.create("https://www.example.com/", method='GET', headers={}, response=final_response)] self.assertEqual(self.entry.result.redirects, expected)
async def test_after_response_store_samples_in_kb(self): root_path = Entry.create("http://example.com/?wsdl", response=StaticResponse(200, {}, "123")) root_path_response = StaticResponse(200, {}, "homepage data") admin_path = Entry.create("http://example.com/admin/?wsdl", response=StaticResponse(200, {}, "123")) admin_path_response = StaticResponse(200, {}, "admin data") images_path = Entry.create("http://example.com/images/?wsdl", response=StaticResponse(200, {}, "123")) images_path_response = StaticResponse(200, {}, "images...") login_file = Entry.create("http://example.com/login.php?login", response=StaticResponse(200, {}, "123")) login_file_response = StaticResponse(200, {}, "login page") self.engine.mock.perform_high_priority.side_effect = [ root_path_response, admin_path_response, images_path_response, login_file_response ] with patch("tachyon.heuristics.rejectignoredquery.Simhash", FakeSimhash): await self.filter.after_response(root_path) await self.filter.after_response(admin_path) await self.filter.after_response(images_path) await self.filter.after_response(login_file) self.assertEqual(self.kb.query_samples["example.com/"], self.hash(root_path_response)) self.assertEqual(self.kb.query_samples["example.com/admin/"], self.hash(admin_path_response)) self.assertEqual(self.kb.query_samples["example.com/images/"], self.hash(images_path_response)) self.assertEqual(self.kb.query_samples["example.com/login.php"], self.hash(login_file_response))
async def test_do_not_reject_outside_specified_ranges(self): r = RejectStatusCode(range(400, 410), range(500, 700)) await r.after_headers( Entry.create("http://example.om/test", response=StaticResponse(200, {}))) await r.after_headers( Entry.create("http://example.om/test", response=StaticResponse(302, {}))) await r.after_headers( Entry.create("http://example.om/test", response=StaticResponse(410, {}))) await r.after_headers( Entry.create("http://example.om/test", response=StaticResponse(460, {})))
def setUp(self): self.engine = FakeEngine() self.rule = FollowRedirects(max_redirect=10) self.rule.set_engine(self.engine) self.response = Response(code=302, headers={"location": "https://www.example.com/"}) self.response.set_content(b"", at_eof=True) self.entry = Entry.create("http://example.com", response=self.response)
async def test_do_not_reject_reject_within_the_specified_ranges(self): r = RejectStatusCode(range(400, 410), range(500, 700)) with self.assertRaises(RejectRequest): await r.after_headers( Entry.create("http://example.om/test", response=StaticResponse(400, {}))) with self.assertRaises(RejectRequest): await r.after_headers( Entry.create("http://example.om/test", response=StaticResponse(409, {}))) with self.assertRaises(RejectRequest): await r.after_headers( Entry.create("http://example.om/test", response=StaticResponse(503, {})))
def test_convert_response_with_non_standard_code(self): ht = Entry.create("http://example.com/", response=StaticResponse(code=599, content="hello", headers={})) har = self.conv.convert_entry(ht) self.assertEqual(har.response.status, 599) self.assertEqual(har.response.status_text, "Unknown")
async def test_content_that_is_text_never_match_sample_that_contains_md5(self): self.kb.query_samples["example.com/"] = {"md5": "12345"} response = StaticResponse(200, {}, "content") with patch("tachyon.heuristics.rejectignoredquery.hashlib") as hashlib: await self.filter.after_response(Entry.create("http://example.com/?wsdl", response=response)) hashlib.md5.assert_not_called()
async def test_add_alternate_url_response_to_knowledge_base(self): response = StaticResponse(200, {}) response.content = "response content" self.engine.response = response await self.runner.perform_ok( self.create_entry("http://example.com/test", response_content="response")) await self.runner.perform_ok( self.create_entry("http://example.com/123/", response_content="response")) await self.runner.perform_ok( self.create_entry("http://example.com/.test", response_content="response")) await self.runner.perform_ok( self.create_entry("http://example.com/123/test.js", response_content="response")) raw = ContentHashSampling()._hash(response) self.assertEqual( self.kb.soft_404_responses["http://example.com/"], { "/\l": [ ContentSignature(code=200, content_simhash=ANY, content_hash=raw, content_sample=ANY) ], "/\d/": [ ContentSignature(code=200, content_simhash=ANY, content_hash=raw, content_sample=ANY) ], "/.\l": [ ContentSignature(code=200, content_simhash=ANY, content_hash=raw, content_sample=ANY) ], "/123/\l.js": [ ContentSignature(code=200, content_simhash=ANY, content_hash=raw, content_sample=ANY) ] })
async def perform(self, entry, heuristics): await heuristics.before_request(entry) entry.response = StaticResponse(200, {"Content-Type": "text/junk"}) await heuristics.after_headers(entry) entry.response.content = entry.request.url await heuristics.after_response(entry) return entry
async def perform(self, entry, heuristics): self.mock.perform(entry, heuristics) await heuristics.before_request(entry) entry.response = StaticResponse(200, headers={}) await heuristics.after_headers(entry) entry.response.set_content(b"data", at_eof=False) await heuristics.after_response(entry) return entry
def setUp(self): self.entry = Entry.create("http://example.com/", response=StaticResponse(200, {}, content="data")) self.runner = Pipeline() self.behavior_detection = DetectBehaviorChange() self.kb = self.runner.kb self.runner.add(ContentSimhashSampling()) self.runner.add(self.behavior_detection)
async def test_set_string_match_to_false_if_no_match_string_in_file(self): file_to_fetch = create_json_data(["file"])[0] match_string = MatchString() response = StaticResponse(200, {}, content="content") entry = Entry.create("http://example.com/file", arguments={"file": file_to_fetch}, response=response) await match_string.after_response(entry) self.assertFalse(entry.result.string_match)
async def test_set_string_match_flag_in_entry_result_to_false_if_string_to_match_found_in_response_content(self): file_to_fetch = create_json_data(["file"], match_string="abc123")[0] match_string = MatchString() response = StaticResponse(200, {}, content="Content is not matching") entry = Entry.create("http://example.com/file", arguments={"file": file_to_fetch}, response=response) await match_string.after_response(entry) self.assertFalse(entry.result.string_match)
async def test_after_response_use_existing_sample(self): initial_sample = "hash of response content" self.kb.query_samples["example.com/"] = initial_sample entry = Entry.create("http://example.com/?wsdl", response=StaticResponse(200, {}, "content 123")) await self.filter.after_response(entry) self.engine.mock.perform_high_priority.assert_not_called() self.assertEqual(self.kb.query_samples["example.com/"], initial_sample)
async def test_after_headers_doesnt_request_random_filename_if_response_is_not_redirect( self): self.fake_engine.perform_high_priority = make_mocked_coro() entry = Entry.create("http://example.com/junkpath", response=StaticResponse(404, {}, b"Not found")) await self.heuristic.after_headers(entry) self.fake_engine.perform_high_priority.assert_not_called()
async def test_calculated_limit_is_always_integer(self): body_sizes = [43, 26, 79, 32, 97, 54, 81, 33, 29, 103] * 100 for size in body_sizes: response = StaticResponse(200, {'Content-Length': size}) await self.r.after_headers( Entry.create("http://example.com/", response=response)) self.assertEqual(self.r.data.calculated_limit, int(self.r.data.calculated_limit))
async def test_only_add_string_match_flag_for_file(self): path = create_json_data(["/path/"])[0] match_string = MatchString() response = StaticResponse(200, {}, content="") entry = Entry.create("http://example.com/file", arguments={"path": path}, response=response) await match_string.after_response(entry) self.assertFalse(hasattr(entry.result, "string_match"))
async def test_on_request_successful(self): entry = Entry.create("http://example.com/", response=StaticResponse(404, {}, "Not Found")) self.rule.get_soft_404_sample = make_mocked_coro() await self.runner.perform_ok(entry) self.rule.get_soft_404_sample.assert_not_called() self.assertFalse(entry.result.soft404)
async def test_simple_request(self): entry = Entry.create("http://example.com/test", response=StaticResponse(200, content="Hello world.", headers={})) await self.rule.after_response(entry) self.assertTrue(entry.response.code, 200)
def test_convert_response(self): ht = Entry.create("http://example.com/", response=StaticResponse(code=405, content="hello", headers={})) har = self.conv.convert_entry(ht) self.assertEqual(har.response.content.text, "hello") self.assertEqual(har.response.content.size, -1) self.assertEqual(har.response.content.mime_type, None) self.assertEqual(har.response.status, 405) self.assertEqual(har.response.status_text, "Method Not Allowed")
async def test_no_not_read_full_once_statistics_are_obtained(self): for _ in range(1000): response = StaticResponse( 200, {'Content-Length': random.randint(10000, 20000)}) await self.r.after_headers( Entry.create("http://example.om/test", response=response)) await self.r.after_headers(self.entry) self.assertLess(self.entry.result.read_length, self.r.initial_limit) self.assertNotEqual(self.entry.result.read_length, -1)
async def test_match_bytes_with_string(self): bytes_as_string = binascii.hexlify(b"abc123").decode("utf-8") file_to_fetch = create_json_data(["file"], match_bytes=bytes_as_string)[0] match_string = MatchString() response = StaticResponse(200, {}, content="abc123") entry = Entry.create("http://example.com/file", arguments={"file": file_to_fetch}, response=response) await match_string.after_response(entry) self.assertTrue(entry.result.string_match)
async def test_add_hash_of_raw_content_if_response_content_of_sample_is_not_text(self): bytes = b'Invalid UTF8 x\x80Z"' sample_response = Response(200, {}) sample_response.set_content(bytes, True) self.engine.mock.perform_high_priority.return_value = sample_response await self.filter.after_response(Entry.create("http://example.com/?wsdl", response=StaticResponse(200, {}, "123"))) self.assertEqual(self.kb.query_samples["example.com/"], {"md5": hashlib.md5(bytes).digest()})