def test_equal(self): self.assertEqual(PolyglotLevel(), PolyglotLevel()) self.assertEqual( PolyglotLevel(is_valid=False, suspicious_chunks=[(0, 100), (105, 5)], embedded={'docx', 'jar'}), PolyglotLevel(is_valid=False, suspicious_chunks=[(0, 100), (105, 5)], embedded={'docx', 'jar'}))
def scan(self) -> PolyglotLevel: level = PolyglotLevel() magic = self.must_read(3) version = self.read_i8(1) uncompressed_size = self.read_i32() if magic == b'FWS': if self.file_size != uncompressed_size: level.add_chunk(uncompressed_size, self.file_size - uncompressed_size) elif magic == b'CWS': # FIXME Find a way to not read all the file into memory (e.g. by buffering) to_decompress = self.buf.read() decompressor = zlib.decompressobj() decompressor.decompress(to_decompress, uncompressed_size - 8) if decompressor.unused_data: level.add_chunk(self.file_size - len(decompressor.unused_data), len(decompressor.unused_data)) elif magic == b'ZWS': # TODO Check if works correctly!! compressed_size = self.read_i32() lzma_size = compressed_size + 5 + 4 if lzma_size + 8 != self.file_size: level.add_chunk(lzma_size + 8, self.file_size - (lzma_size + 8)) return level
def test_str(self): self.assertEqual('PolyglotLevel()', str(PolyglotLevel())) self.assertEqual('PolyglotLevel(is_valid=False)', str(PolyglotLevel(is_valid=False))) self.assertEqual( 'PolyglotLevel(suspicious_chunks=[(0x0, 100), (0x5, 200)])', str(PolyglotLevel(suspicious_chunks=[(0, 100), (5, 200)]))) self.assertEqual("PolyglotLevel(embedded={'docx'})", str(PolyglotLevel(embedded={'docx'}))) self.assertEqual( "PolyglotLevel(is_valid=False, suspicious_chunks=[(0x0, 100), (0x5, 200)], embedded={'jar'})", str( PolyglotLevel(is_valid=False, suspicious_chunks=[(0, 100), (5, 200)], embedded={'jar'})))
def test_garbage_at_beginning_length(self): result = flac.check( 'tests/samples/flac/29999_bytes_of_garbage_at_beginning.flac') self.assertEqual(PolyglotLevel(suspicious_chunks=[(0, 29999)]), result) self.assertIsNone( flac.check( 'tests/samples/flac/30000_bytes_of_garbage_at_beginning.flac'))
def check_with_matches(filename, matches): if 'IsPE' not in matches: return None level = PolyglotLevel() file_size = os.stat(filename).st_size pe = pefile.PE(filename) overlay_offset = pe.get_overlay_data_start_offset() if overlay_offset is not None: entry_id = pefile.DIRECTORY_ENTRY['IMAGE_DIRECTORY_ENTRY_SECURITY'] if len(pe.OPTIONAL_HEADER.DATA_DIRECTORY) > entry_id \ and pe.OPTIONAL_HEADER.DATA_DIRECTORY[entry_id].VirtualAddress != 0: certificate_table = pe.OPTIONAL_HEADER.DATA_DIRECTORY[entry_id] certificate_table_begin = certificate_table.VirtualAddress certificate_table_end = certificate_table_begin + certificate_table.Size else: certificate_table_begin = overlay_offset certificate_table_end = certificate_table_begin # Check if there is some data before the certificate table if overlay_offset < certificate_table_begin: level.add_chunk(overlay_offset, certificate_table_begin - overlay_offset) # Check if there is some data after the certificate table if file_size > certificate_table_end: level.add_chunk(certificate_table_end, file_size - certificate_table_end) return level
def test_builder(self): self.assertEqual(PolyglotLevel(is_valid=False), PolyglotLevel().invalid()) self.assertEqual(PolyglotLevel(suspicious_chunks=[(0, 100), (105, 5)]), PolyglotLevel().add_chunk(0, 100).add_chunk(105, 5)) self.assertEqual(PolyglotLevel(embedded={'jar'}), PolyglotLevel().embed('jar'))
def check_with_matches(filename, matches): if 'IsTAR' not in matches: return None level = PolyglotLevel() with open(filename, 'rb') as fd: offset = 0 while True: # Read header header = fd.read(__BLOCK_SIZE) if len(header) != __BLOCK_SIZE or all(b == 0 for b in header): # End of file header break # Detect garbage in file name filename_field = header[:100] null = filename_field.find(b'\x00') after_null = filename_field[null + 1:] for i, b in enumerate(after_null): if b != 0: level.add_chunk(offset + (100 - len(after_null) + i), len(after_null) - i) break try: file_size = int(header[124:124+12].strip(b'\x00'), base=8) except ValueError: return level.invalid() data_block_nb = 0 while data_block_nb * __BLOCK_SIZE < file_size: data_block_nb += 1 fd.seek(data_block_nb * __BLOCK_SIZE, io.SEEK_CUR) offset += __BLOCK_SIZE + data_block_nb * __BLOCK_SIZE # Test for non-null byte at the end block = fd.read(__BLOCK_SIZE) while len(block) != 0 and all(b == 0 for b in block): block = fd.read(__BLOCK_SIZE) offset = fd.tell() - len(block) fd.seek(0, io.SEEK_END) file_size = fd.tell() if len(block) != 0: level.add_chunk(offset, file_size - offset) return level
def __init__(self, filename, path): self.filename = filename start_time = time.time() self.scan_results = scan(path, use_magic=True) self.elapsed_time = time.time() - start_time self.results = [] self.magic_scan_results = magic_scan(path) self.trid_scan_results = trid_scan(path) for type, level in self.scan_results.items(): level_without_embedded = PolyglotLevel( is_valid=level.is_valid, suspicious_chunks=level.suspicious_chunks) for embedded_type in level.embedded: self.results.append( AnalysisEntry(embedded_type, level_without_embedded)) self.results.append(AnalysisEntry(type, level)) self.results.sort(key=lambda entry: entry.ext)
def test_garbage_at_end(self): self.assertEqual(PolyglotLevel(suspicious_chunks=[(0x4CA0, 0x343)]), png.check('tests/samples/png/garbage_at_end.png'))
def test_regular_png(self): self.assertEqual(PolyglotLevel(), png.check('tests/samples/png/regular.png'))
def is_suspicious(self): return any(entry.level != PolyglotLevel() for entry in self.results)
def test_garbage_at_end(self): self.assertEqual(PolyglotLevel(suspicious_chunks=[(0x3804F, 0x15)]), webm.check('tests/samples/webm/garbage_at_end.webm'))
def test_regular_file(self): self.assertEqual(PolyglotLevel(), webm.check('tests/samples/webm/regular.webm'))
def test_be_garbage_at_end(self): self.assertEqual( PolyglotLevel(suspicious_chunks=[(0x41FEC, 0x343)]), tiff.check('tests/samples/tiff/garbage_at_end-be.tiff'))
def test_rar5_garbage_at_beginning(self): self.assertEqual( PolyglotLevel(suspicious_chunks=[(0, 0x1F8)]), rar.check('tests/samples/rar/garbage-at-beginning.rar5'))
def test_garbage_at_beginning(self): result = ogg.check('tests/samples/ogg/garbage_at_beginning.ogg') self.assertEqual(PolyglotLevel(suspicious_chunks=[(0, 0x11)]), result)
def test_garbage_at_end(self): result = ogg.check('tests/samples/ogg/garbage_at_end.ogg') self.assertEqual(PolyglotLevel(suspicious_chunks=[(0x83AA, 0xE)]), result)
def test_rar5_unexpected_eof(self): self.assertEqual(PolyglotLevel(is_valid=False), rar.check('tests/samples/rar/unexpected_eof.rar5'))
def test_rar5_no_endarc(self): self.assertEqual(PolyglotLevel(), rar.check('tests/samples/rar/no-endarc.rar5'))
def test_rar5_garbage_at_end(self): self.assertEqual(PolyglotLevel(suspicious_chunks=[(0x56, 0x343)]), rar.check('tests/samples/rar/garbage-at-end.rar5'))
def test_rar3_garbage_everywhere(self): self.assertEqual( PolyglotLevel(suspicious_chunks=[(0, 0x2060), (0x20B9, 0x343)]), rar.check('tests/samples/rar/garbage-at-beginning-end.rar'))
def test_rar3_garbage_at_end_no_endarc_size_0(self): self.assertEqual( PolyglotLevel(is_valid=False, suspicious_chunks=[(0x52, 0xC1)]), rar.check('tests/samples/rar/garbage_at_end-no_endarc.rar'))
def test_rar3_regular(self): self.assertEqual(PolyglotLevel(), rar.check('tests/samples/rar/regular.rar'))
def test_rar5_garbage_at_end_no_endarc(self): # FIXME Should be (0x4E, 0xC1), but the current reader can't discard all the invalid blocks # when there is no ENDARC self.assertEqual( PolyglotLevel(is_valid=False, suspicious_chunks=[(0x62, 0xAD)]), rar.check('tests/samples/rar/garbage_at_end-no_endarc.rar5'))
def test_garbage_in_middle(self): result = ogg.check('tests/samples/ogg/garbage_in_middle.ogg') self.assertEqual(PolyglotLevel(suspicious_chunks=[(0xA39, 0x26)]), result)
def test_garbage_at_beginning(self): result = wav.check('tests/samples/wav/garbage_at_beginning.wav') self.assertEqual(PolyglotLevel(suspicious_chunks=[(0, 0xF)]), result)
def test_regular_file(self): self.assertEqual(PolyglotLevel(), ogg.check('tests/samples/ogg/regular.ogg'))
def test_garbage_at_beginning_and_end(self): result = wav.check( 'tests/samples/wav/garbage_at_beginning_and_end.wav') self.assertEqual( PolyglotLevel(suspicious_chunks=[(0, 0x15), (0x56429, 0x2E8C)]), result)
def test_le_regular_file(self): self.assertEqual(PolyglotLevel(), tiff.check('tests/samples/tiff/regular-le.tiff'))
def test_regular_file(self): self.assertEqual(PolyglotLevel(), wav.check('tests/samples/wav/regular.wav'))