async def test_content_that_is_not_text_never_match_content_simhash_of_sample(self): raw = b'Invalid UTF8 x\x80Z"' response = Response(200, {}) response.set_content(raw, True) hash = self.filter._hash_response(StaticResponse(200, {}, "content")) self.kb.query_samples["example.com/"] = {"simhash": hash} with patch("tachyon.heuristics.rejectignoredquery.Simhash") as Simhash: await self.filter.after_response(Entry.create("http://example.com/?wsdl", response=response)) Simhash.assert_not_called()
async def on_request_successful(self, entry): if entry.response.code in self.safe_status_codes: entry.result.error_behavior = False return content_simhash = entry.result.content_simhash if content_simhash is None: entry.result.error_behavior = False return if any( content_simhash.distance(Simhash(known)) < self.match_threshold for known in self.known_bad_behavior): entry.result.error_behavior = True return if len(self.behavior_buffer) >= self.max_buffer_size: self.error_behavior = self._is_error_behavior(content_simhash) self.behavior_buffer.pop(0) if self.error_behavior: for x in self.behavior_buffer: self.known_bad_behavior.add(x) self.behavior_buffer.append(content_simhash.value) entry.result.error_behavior = self.error_behavior
async def is_valid(self, entry): value = getattr(entry.result, 'error_simhash', None) if value is not None: # Revalidate the responses with the known bad behavior signatures. current = Simhash(value) if any(current.distance(Simhash(k)) < 5 for k in self.hammertime.kb.bad_behavior_response): return False try: await self.hammertime.request(entry.request.url, arguments=entry.arguments) return True except RejectRedirection: # This is most likely the home path check as the result would never reach revalidation otherwise return True except Exception as e: return False
async def test_on_request_successful_pop_first_result_when_buffer_is_full( self): self.behavior_detection.behavior_buffer.extend([i for i in range(10)]) await self.runner.perform_ok(self.entry) self.assertEqual(self.behavior_detection.behavior_buffer, [1, 2, 3, 4, 5, 6, 7, 8, 9, Simhash("data").value])
async def test_after_response_reject_request_if_simhash_of_response_content_equals_sample_simhash(self): self.kb.query_samples["example.com/"] = {"simhash": Simhash("response content").value} entry = Entry.create("http://example.com/?wsdl", response=StaticResponse(200, {}, "response content")) slightly_different_response = Entry.create("http://example.com/?wsdl", response=StaticResponse(200, {}, "response-content")) with self.assertRaises(RejectRequest): await self.filter.after_response(entry) with self.assertRaises(RejectRequest): await self.filter.after_response(slightly_different_response)
async def test_homepage_do_not_count_as_soft_404(self): simhash = Simhash("response content").value for pattern in self.patterns: self.kb.soft_404_responses["http://example.com/"][pattern] = { "code": 200, "content_simhash": simhash } self.rule.performed["http://example.com/"] = {pattern: None} try: await self.rule.after_response( self.create_entry("http://example.com/", response_content="home page")) except RejectRequest: self.fail("Request rejected.")
async def test_homepage_do_not_count_as_soft_404(self): simhash = Simhash("response content") for pattern in ["/\l/\d.html", "/\d-\l.js", "/\L/", "/\i", "/.\l.js"]: self.kb.soft_404_responses["http://example.com/"][pattern] = { "code": 200, "content_simhash": simhash } self.rule.performed["http://example.com/"] = {pattern: None} entry = self.create_entry("http://example.com/", response_content="home page") await self.runner.perform_ok(entry) self.assertFalse(entry.result.soft404)
async def test_reject_request_if_pattern_and_response_match_request_in_knowledge_base( self): for pattern in self.patterns: simhash = Simhash("response content").value self.kb.soft_404_responses["http://example.com/"][pattern] = { "code": 200, "content_simhash": simhash } self.rule.performed["http://example.com/"][pattern] = None urls = [ urljoin("http://example.com/", path) for path in ["/test/123.html", "/123-test.js", "/TEST/", "/TesT", "/.test.js"] ] for url in urls: with self.assertRaises(RejectRequest): await self.rule.after_response(self.create_entry(url))
async def test_dont_mark_as_soft404_if_no_match_in_knowledge_base(self): simhash = Simhash("response content") for pattern in ["/\l.html", "/\l", "/.\l", "/\l.php"]: self.kb.soft_404_responses["http://example.com/"][ pattern] = ContentSignature(code=200, content_simhash=simhash) self.rule.performed["http://example.com/"][pattern] = None entries = [ self.create_entry("http://example.com/test.html", response_content="test"), self.create_entry("http://example.com/test", response_content="test"), self.create_entry("http://example.com/.test", response_content="test"), self.create_entry("http://example.com/test.php", response_content="test") ] for entry in entries: await self.runner.perform_ok(entry) self.assertFalse(any(entry.result.soft404 for entry in entries))
async def test_mark_request_has_soft404_if_pattern_and_response_match_request_in_knowledge_base( self): for pattern in [ "/test/\d.html", "/\d-\l.js", "/\L/", "/\i", "/abc/.\l.js" ]: simhash = Simhash("response content") self.kb.soft_404_responses["http://example.com/"][ pattern] = ContentSignature(code=200, content_simhash=simhash) self.rule.performed["http://example.com/"][pattern] = None urls = [ urljoin("http://example.com/", path) for path in [ "/test/123.html", "/123-test.js", "/TEST/", "/TesT", "/abc/.test.js" ] ] entries = [self.create_entry(url) for url in urls] for entry in entries: await self.runner.perform_ok(entry) self.assertTrue(all(entry.result.soft404 for entry in entries))
async def test_add_alternate_url_response_to_knowledge_base(self): response = StaticResponse(200, {}) response.content = "response content" self.engine.response = response await self.rule.after_response( self.create_entry("http://example.com/test", response_content="response")) await self.rule.after_response( self.create_entry("http://example.com/123/", response_content="response")) await self.rule.after_response( self.create_entry("http://example.com/.test", response_content="response")) await self.rule.after_response( self.create_entry("http://example.com/123/test.js", response_content="response")) simhash = Simhash(response.content).value self.assertEqual( self.kb.soft_404_responses["http://example.com/"], { "/\l": { "code": 200, "content_simhash": simhash }, "/\d/": { "code": 200, "content_simhash": simhash }, "/.\l": { "code": 200, "content_simhash": simhash }, "/\d/\l.js": { "code": 200, "content_simhash": simhash } })
async def test_dont_reject_request_if_no_match_in_knowledge_base(self): simhash = Simhash("response content").value for pattern in ["/\l.html", "/\l", "/.\l", "/\l.php"]: self.kb.soft_404_responses["http://example.com/"][pattern] = { "code": 200, "content_simhash": simhash } self.rule.performed["http://example.com/"][pattern] = None try: await self.rule.after_response( self.create_entry("http://example.com/test.html", response_content="test")) await self.rule.after_response( self.create_entry("http://example.com/test", response_content="test")) await self.rule.after_response( self.create_entry("http://example.com/.test", response_content="test")) await self.rule.after_response( self.create_entry("http://example.com/test.php", response_content="test")) except RejectRequest: self.fail("Request rejected.")
def _responses_match(self, resp_simhash): for simhash_value in self.behavior_buffer: simhash = Simhash(simhash_value) yield resp_simhash.distance(simhash) < self.match_threshold
async def test_on_request_successful_store_simhash_of_response_content_in_knowledge_base( self): await self.runner.perform_ok(self.entry) self.assertEqual(self.behavior_detection.behavior_buffer, [Simhash(self.entry.response.content).value])
def _create_simhash(self, content_response): return Simhash(content_response, self.match_filter, self.token_size)