def scan_entropy(self, chunk: types.Chunk) -> List[Issue]: """Scan a chunk of data for apparent high entropy. :param chunk: The chunk of data to be scanned """ issues: List[Issue] = [] for line in chunk.contents.split("\n"): for word in line.split(): b64_strings = util.get_strings_of_set(word, BASE64_CHARS) hex_strings = util.get_strings_of_set(word, HEX_CHARS) for string in b64_strings: if not self.signature_is_excluded(string, chunk.file_path): b64_entropy = self.calculate_entropy( string, BASE64_CHARS) if b64_entropy > 4.5: issues.append( Issue(types.IssueType.Entropy, string, chunk)) for string in hex_strings: if not self.signature_is_excluded(string, chunk.file_path): hex_entropy = self.calculate_entropy(string, HEX_CHARS) if hex_entropy > 3: issues.append( Issue(types.IssueType.Entropy, string, chunk)) return issues
def test_get_strings_of_set_will_not_return_strings_below_threshold_length( self): strings = util.get_strings_of_set("w.asdf.q", "asdfqwer", 3) self.assertEqual(strings, ["asdf"])
def test_get_strings_of_set_splits_string_by_chars_outside_charset(self): strings = util.get_strings_of_set("asdf.qwer", "asdfqwer", 1) self.assertEqual(strings, ["asdf", "qwer"])