def test_different(self): src = './tests/data/' dst = './tests/data/parallel' if sys.version_info >= (3, 8): with os.scandir(src) as itr: entries = list(itr) shutil._copytree(entries=entries, src=src, dst=dst, symlinks=False, ignore=None, copy_function=shutil.copy2, ignore_dangling_symlinks=False) else: shutil.copytree(src, dst) proc = subprocess.Popen(mat2_binary + glob.glob('./tests/data/parallel/dirty.*'), stdout=subprocess.PIPE) stdout, _ = proc.communicate() for i in glob.glob('./test/data/parallel/dirty.cleaned.*'): p, mime = parser_factory.get_parser(i) self.assertIsNotNone(mime) self.assertIsNotNone(p) p = parser_factory.get_parser(p.output_filename) self.assertEqual(p.get_meta(), {}) shutil.rmtree('./tests/data/parallel')
def test_different(self): shutil.copytree('./tests/data/', './tests/data/parallel') proc = subprocess.Popen(mat2_binary + glob.glob('./tests/data/parallel/dirty.*'), stdout=subprocess.PIPE) stdout, _ = proc.communicate() for i in glob.glob('./test/data/parallel/dirty.cleaned.*'): p, mime = parser_factory.get_parser(i) self.assertIsNotNone(mime) self.assertIsNotNone(p) p = parser_factory.get_parser(p.output_filename) self.assertEqual(p.get_meta(), {}) shutil.rmtree('./tests/data/parallel')
def test_odt(self): shutil.copy('./tests/data/embedded_corrupted.odt', './tests/data/clean.odt') parser, _ = parser_factory.get_parser('./tests/data/clean.odt') self.assertFalse(parser.remove_all()) self.assertTrue(parser.get_meta()) os.remove('./tests/data/clean.odt')
def clean_meta(filename: str, is_lightweight: bool, inplace: bool, sandbox: bool, policy: UnknownMemberPolicy) -> bool: mode = (os.R_OK | os.W_OK) if inplace else os.R_OK if not __check_file(filename, mode): return False try: p, mtype = parser_factory.get_parser(filename) # type: ignore except ValueError as e: print("[-] something went wrong when cleaning %s: %s" % (filename, e)) return False if p is None: print("[-] %s's format (%s) is not supported" % (filename, mtype)) return False p.unknown_member_policy = policy p.lightweight_cleaning = is_lightweight p.sandbox = sandbox try: logging.debug('Cleaning %s…', filename) ret = p.remove_all() if ret is True: shutil.copymode(filename, p.output_filename) if inplace is True: os.rename(p.output_filename, filename) return ret except RuntimeError as e: print("[-] %s can't be cleaned: %s" % (filename, e)) return False
def test_css(self): p, mimetype = parser_factory.get_parser('./tests/data/dirty.css') self.assertEqual(mimetype, 'text/css') meta = p.get_meta() self.assertEqual(meta['author'], 'jvoisin') self.assertEqual(meta['version'], '1.0') self.assertEqual(meta['harmful data'], 'underline is cool')
def test_tar(self): with tarfile.TarFile.open('./tests/data/clean.tar', 'w') as zout: zout.add('./tests/data/dirty.flac') zout.add('./tests/data/dirty.docx') zout.add('./tests/data/dirty.jpg') zout.add('./tests/data/embedded_corrupted.docx') tarinfo = tarfile.TarInfo(name='./tests/data/dirty.png') tarinfo.mtime = time.time() tarinfo.uid = 1337 tarinfo.gid = 1338 tarinfo.size = os.stat('./tests/data/dirty.png').st_size with open('./tests/data/dirty.png', 'rb') as f: zout.addfile(tarinfo, f) p, mimetype = parser_factory.get_parser('./tests/data/clean.tar') self.assertEqual(mimetype, 'application/x-tar') meta = p.get_meta() self.assertEqual(meta['./tests/data/dirty.flac']['comments'], 'Thank you for using MAT !') self.assertEqual( meta['./tests/data/dirty.docx']['word/media/image1.png'] ['Comment'], 'This is a comment, be careful!') self.assertFalse(p.remove_all()) os.remove('./tests/data/clean.tar') shutil.copy('./tests/data/dirty.png', './tests/data/clean.tar') with self.assertRaises(ValueError): archive.TarParser('./tests/data/clean.tar') os.remove('./tests/data/clean.tar')
def test_zip(self): with zipfile.ZipFile('./tests/data/dirty.zip', 'w') as zout: zout.write('./tests/data/dirty.flac', compress_type=zipfile.ZIP_STORED) zout.write('./tests/data/dirty.docx', compress_type=zipfile.ZIP_DEFLATED) zout.write('./tests/data/dirty.jpg', compress_type=zipfile.ZIP_BZIP2) zout.write('./tests/data/dirty.txt', compress_type=zipfile.ZIP_LZMA) p, mimetype = parser_factory.get_parser('./tests/data/dirty.zip') self.assertEqual(mimetype, 'application/zip') meta = p.get_meta() self.assertEqual(meta['tests/data/dirty.flac']['comments'], 'Thank you for using MAT !') self.assertEqual( meta['tests/data/dirty.docx']['word/media/image1.png']['Comment'], 'This is a comment, be careful!') with zipfile.ZipFile('./tests/data/dirty.zip') as zipin: members = { 'tests/data/dirty.flac': zipfile.ZIP_STORED, 'tests/data/dirty.docx': zipfile.ZIP_DEFLATED, 'tests/data/dirty.jpg': zipfile.ZIP_BZIP2, 'tests/data/dirty.txt': zipfile.ZIP_LZMA, } for k, v in members.items(): self.assertEqual(zipin.getinfo(k).compress_type, v) os.remove('./tests/data/dirty.zip')
def test_tar(self): with tarfile.TarFile.open('./tests/data/clean.tar', 'w') as zout: zout.add('./tests/data/dirty.flac') zout.add('./tests/data/dirty.docx') zout.add('./tests/data/dirty.jpg') zout.add('./tests/data/embedded_corrupted.docx') tarinfo = tarfile.TarInfo(name='./tests/data/dirty.png') tarinfo.mtime = time.time() tarinfo.uid = 1337 tarinfo.gid = 1338 tarinfo.size = os.stat('./tests/data/dirty.png').st_size with open('./tests/data/dirty.png', 'rb') as f: zout.addfile(tarinfo, f) p, mimetype = parser_factory.get_parser('./tests/data/clean.tar') self.assertEqual(mimetype, 'application/x-tar') with self.assertRaises(ValueError): p.get_meta() with self.assertRaises(ValueError): self.assertFalse(p.remove_all()) os.remove('./tests/data/clean.tar') shutil.copy('./tests/data/dirty.png', './tests/data/clean.tar') with self.assertRaises(ValueError): archive.TarParser('./tests/data/clean.tar') os.remove('./tests/data/clean.tar')
def test_docx(self): shutil.copy('./tests/data/embedded_corrupted.docx', './tests/data/clean.docx') parser, _ = parser_factory.get_parser('./tests/data/clean.docx') self.assertFalse(parser.remove_all()) self.assertIsNotNone(parser.get_meta()) os.remove('./tests/data/clean.docx')
def _remove_metadata(fpath) -> Tuple[bool, Optional[str]]: """ This is a simple wrapper around libmat2, because it's easier and cleaner this way. """ parser, mtype = parser_factory.get_parser(fpath) if parser is None: return False, mtype return parser.remove_all(), mtype
def test_brokensymlink(self): shutil.copy('./tests/test_libmat2.py', './tests/clean.py') os.symlink('./tests/clean.py', './tests/SYMLINK') os.remove('./tests/clean.py') parser, mimetype = parser_factory.get_parser('./tests/SYMLINK') self.assertEqual(mimetype, None) self.assertEqual(parser, None) os.unlink('./tests/SYMLINK')
def test_tarfile_double_extension_handling(self): """ Test that our module auto-detection is handling sub-sub-classes """ with tarfile.TarFile.open('./tests/data/dirty.tar.bz2', 'w:bz2') as zout: zout.add('./tests/data/dirty.jpg') parser, mimetype = parser_factory.get_parser( './tests/data/dirty.tar.bz2') self.assertEqual(mimetype, 'application/x-tar+bz2') os.remove('./tests/data/dirty.tar.bz2')
def test_docx(self): shutil.copy('./tests/data/embedded_corrupted.docx', './tests/data/clean.docx') parser, _ = parser_factory.get_parser('./tests/data/clean.docx') with self.assertRaises(ValueError): parser.remove_all() with self.assertRaises(ValueError): self.assertIsNotNone(parser.get_meta()) os.remove('./tests/data/clean.docx')
def test_epub(self): with zipfile.ZipFile('./tests/data/clean.epub', 'w') as zout: zout.write('./tests/data/dirty.jpg', 'OEBPS/content.opf') p, mimetype = parser_factory.get_parser('./tests/data/clean.epub') self.assertEqual(mimetype, 'application/epub+zip') meta = p.get_meta() self.assertEqual(meta['OEBPS/content.opf']['OEBPS/content.opf'], 'harmful content') self.assertFalse(p.remove_all()) os.remove('./tests/data/clean.epub')
def TestOneInput(data): fdp = atheris.FuzzedDataProvider(data) extension = fdp.PickValueInList(extensions) data = fdp.ConsumeBytes(sys.maxsize) fname = '/tmp/mat2_fuzz' + extension with open(fname, 'wb') as f: f.write(data) try: p, _ = parser_factory.get_parser(fname) if p: p.sandbox = False p.get_meta() p.remove_all() p, _ = parser_factory.get_parser(fname) p.get_meta() except ValueError: pass os.remove(fname)
def test_zip(self): with zipfile.ZipFile('./tests/data/clean.zip', 'w') as zout: zout.write('./tests/data/dirty.flac') zout.write('./tests/data/dirty.docx') zout.write('./tests/data/dirty.jpg') zout.write('./tests/data/embedded_corrupted.docx') p, mimetype = parser_factory.get_parser('./tests/data/clean.zip') self.assertEqual(mimetype, 'application/zip') with self.assertRaises(ValueError): p.get_meta() with self.assertRaises(ValueError): self.assertFalse(p.remove_all()) os.remove('./tests/data/clean.zip')
def test_zip(self): with zipfile.ZipFile('./tests/data/clean.zip', 'w') as zout: zout.write('./tests/data/dirty.flac') zout.write('./tests/data/dirty.docx') zout.write('./tests/data/dirty.jpg') zout.write('./tests/data/embedded_corrupted.docx') p, mimetype = parser_factory.get_parser('./tests/data/clean.zip') self.assertEqual(mimetype, 'application/zip') meta = p.get_meta() self.assertEqual(meta['tests/data/dirty.flac']['comments'], 'Thank you for using MAT !') self.assertEqual(meta['tests/data/dirty.docx']['word/media/image1.png']['Comment'], 'This is a comment, be careful!') self.assertFalse(p.remove_all()) os.remove('./tests/data/clean.zip')
def __check_deep_meta(self, p): tempdir = tempfile.mkdtemp() zipin = zipfile.ZipFile(p.filename) zipin.extractall(tempdir) for subdir, dirs, files in os.walk(tempdir): for f in files: complete_path = os.path.join(subdir, f) inside_p, _ = parser_factory.get_parser(complete_path) if inside_p is None: continue self.assertEqual(inside_p.get_meta(), {}) shutil.rmtree(tempdir)
def test_tar(self): with tarfile.TarFile('./tests/data/dirty.tar', 'w') as tout: tout.add('./tests/data/dirty.flac') tout.add('./tests/data/dirty.docx') tout.add('./tests/data/dirty.jpg') p, mimetype = parser_factory.get_parser('./tests/data/dirty.tar') self.assertEqual(mimetype, 'application/x-tar') meta = p.get_meta() self.assertEqual(meta['./tests/data/dirty.flac']['comments'], 'Thank you for using MAT !') self.assertEqual( meta['./tests/data/dirty.docx']['word/media/image1.png'] ['Comment'], 'This is a comment, be careful!') os.remove('./tests/data/dirty.tar')
def show_meta(filename: str, sandbox: bool): if not __check_file(filename): return try: p, mtype = parser_factory.get_parser(filename) # type: ignore except ValueError as e: print("[-] something went wrong when processing %s: %s" % (filename, e)) return if p is None: print("[-] %s's format (%s) is not supported" % (filename, mtype)) return p.sandbox = sandbox __print_meta(filename, p.get_meta())
def test_epub(self): p, mimetype = parser_factory.get_parser('./tests/data/dirty.epub') self.assertEqual(mimetype, 'application/epub+zip') meta = p.get_meta() self.assertEqual(meta['OEBPS/content.opf']['dc:creator'], 'Dorothy L. Sayers') self.assertEqual( meta['OEBPS/toc.ncx']['dtb:generator'], 'Ebookmaker 0.4.0a5 by Marcello Perathoner <*****@*****.**>' ) self.assertEqual( meta[ 'OEBPS/@public@vhost@g@gutenberg@html@files@58820@58820-h@[email protected]'] ['CreatorTool'], 'Adobe Photoshop CS5 Macintosh') self.assertEqual( meta[ 'OEBPS/@public@vhost@g@gutenberg@html@files@58820@[email protected]'] ['generator'], 'Ebookmaker 0.4.0a5 by Marcello Perathoner <*****@*****.**>' )
def test_onlymember_tar(self): with tarfile.open('./tests/data/clean.tar', 'w') as zout: zout.add('./tests/data/dirty.png') tarinfo = tarfile.TarInfo('./tests/data/dirty.jpg') tarinfo.mtime = time.time() tarinfo.uid = 1337 tarinfo.gid = 0 tarinfo.mode = 0o000 tarinfo.size = os.stat('./tests/data/dirty.jpg').st_size with open('./tests/data/dirty.jpg', 'rb') as f: zout.addfile(tarinfo=tarinfo, fileobj=f) p, mimetype = parser_factory.get_parser('./tests/data/clean.tar') self.assertEqual(mimetype, 'application/x-tar') meta = p.get_meta() self.assertEqual(meta['./tests/data/dirty.jpg']['uid'], '1337') self.assertTrue(p.remove_all()) p = archive.TarParser('./tests/data/clean.cleaned.tar') self.assertEqual(p.get_meta(), {}) os.remove('./tests/data/clean.tar') os.remove('./tests/data/clean.cleaned.tar')
def test_tarbz2(self): with tarfile.TarFile.open('./tests/data/dirty.tar.bz2', 'w:bz2') as zout: zout.add('./tests/data/dirty.flac') zout.add('./tests/data/dirty.docx') zout.add('./tests/data/dirty.jpg') p = archive.TarParser('./tests/data/dirty.tar.bz2') meta = p.get_meta() self.assertEqual( meta['./tests/data/dirty.docx']['word/media/image1.png'] ['Comment'], 'This is a comment, be careful!') ret = p.remove_all() self.assertTrue(ret) p = archive.TarParser('./tests/data/dirty.cleaned.tar.bz2') self.assertEqual(p.get_meta(), {}) self.assertTrue(p.remove_all()) tmp_dir = tempfile.mkdtemp() with tarfile.open('./tests/data/dirty.cleaned.tar.bz2') as zout: zout.extractall(path=tmp_dir) zout.close() number_of_files = 0 for root, _, fnames in os.walk(tmp_dir): for f in fnames: complete_path = os.path.join(root, f) p, _ = parser_factory.get_parser(complete_path) self.assertIsNotNone(p) self.assertEqual(p.get_meta(), {}) number_of_files += 1 self.assertEqual(number_of_files, 3) os.remove('./tests/data/dirty.tar.bz2') os.remove('./tests/data/dirty.cleaned.tar.bz2') os.remove('./tests/data/dirty.cleaned.cleaned.tar.bz2')
def test_subsubcalss(self): """ Test that our module auto-detection is handling sub-sub-classes """ parser, mimetype = parser_factory.get_parser('./tests/data/dirty.mp3') self.assertEqual(mimetype, 'audio/mpeg') self.assertEqual(parser.__class__, audio.MP3Parser)
def test_gif(self): p, mimetype = parser_factory.get_parser('./tests/data/dirty.gif') self.assertEqual(mimetype, 'image/gif') meta = p.get_meta() self.assertEqual(meta['Comment'], 'this is a test comment')
def test_wmv(self): p, mimetype = parser_factory.get_parser('./tests/data/dirty.wmv') self.assertEqual(mimetype, 'video/x-ms-wmv') meta = p.get_meta() self.assertEqual(meta['EncodingSettings'], 'Lavf52.103.0')
def test_txt(self): p, mimetype = parser_factory.get_parser('./tests/data/dirty.txt') self.assertEqual(mimetype, 'text/plain') meta = p.get_meta() self.assertEqual(meta, {})
def test_inexistingfile(self): parser, mimetype = parser_factory.get_parser( './tests/NONEXISTING_FILE') self.assertEqual(mimetype, None) self.assertEqual(parser, None)
def test_pdf(self): shutil.copy('./tests/test_libmat2.py', './tests/data/clean.py') parser, mimetype = parser_factory.get_parser('./tests/data/clean.py') self.assertEqual(mimetype, 'text/x-python') self.assertEqual(parser, None) os.remove('./tests/data/clean.py')
def test_chardevice(self): parser, mimetype = parser_factory.get_parser('/dev/zero') self.assertEqual(mimetype, None) self.assertEqual(parser, None)