Ejemplo n.º 1
0
    def test_different(self):
        src = './tests/data/'
        dst = './tests/data/parallel'
        if sys.version_info >= (3, 8):
            with os.scandir(src) as itr:
                entries = list(itr)
            shutil._copytree(entries=entries,
                             src=src,
                             dst=dst,
                             symlinks=False,
                             ignore=None,
                             copy_function=shutil.copy2,
                             ignore_dangling_symlinks=False)
        else:
            shutil.copytree(src, dst)

        proc = subprocess.Popen(mat2_binary +
                                glob.glob('./tests/data/parallel/dirty.*'),
                                stdout=subprocess.PIPE)
        stdout, _ = proc.communicate()

        for i in glob.glob('./test/data/parallel/dirty.cleaned.*'):
            p, mime = parser_factory.get_parser(i)
            self.assertIsNotNone(mime)
            self.assertIsNotNone(p)
            p = parser_factory.get_parser(p.output_filename)
            self.assertEqual(p.get_meta(), {})
        shutil.rmtree('./tests/data/parallel')
Ejemplo n.º 2
0
    def test_different(self):
        shutil.copytree('./tests/data/', './tests/data/parallel')

        proc = subprocess.Popen(mat2_binary +
                                glob.glob('./tests/data/parallel/dirty.*'),
                                stdout=subprocess.PIPE)
        stdout, _ = proc.communicate()

        for i in glob.glob('./test/data/parallel/dirty.cleaned.*'):
            p, mime = parser_factory.get_parser(i)
            self.assertIsNotNone(mime)
            self.assertIsNotNone(p)
            p = parser_factory.get_parser(p.output_filename)
            self.assertEqual(p.get_meta(), {})
        shutil.rmtree('./tests/data/parallel')
Ejemplo n.º 3
0
 def test_odt(self):
     shutil.copy('./tests/data/embedded_corrupted.odt',
                 './tests/data/clean.odt')
     parser, _ = parser_factory.get_parser('./tests/data/clean.odt')
     self.assertFalse(parser.remove_all())
     self.assertTrue(parser.get_meta())
     os.remove('./tests/data/clean.odt')
Ejemplo n.º 4
0
def clean_meta(filename: str, is_lightweight: bool, inplace: bool,
               sandbox: bool, policy: UnknownMemberPolicy) -> bool:
    mode = (os.R_OK | os.W_OK) if inplace else os.R_OK
    if not __check_file(filename, mode):
        return False

    try:
        p, mtype = parser_factory.get_parser(filename)  # type: ignore
    except ValueError as e:
        print("[-] something went wrong when cleaning %s: %s" % (filename, e))
        return False
    if p is None:
        print("[-] %s's format (%s) is not supported" % (filename, mtype))
        return False
    p.unknown_member_policy = policy
    p.lightweight_cleaning = is_lightweight
    p.sandbox = sandbox

    try:
        logging.debug('Cleaning %s…', filename)
        ret = p.remove_all()
        if ret is True:
            shutil.copymode(filename, p.output_filename)
            if inplace is True:
                os.rename(p.output_filename, filename)
        return ret
    except RuntimeError as e:
        print("[-] %s can't be cleaned: %s" % (filename, e))
    return False
Ejemplo n.º 5
0
 def test_css(self):
     p, mimetype = parser_factory.get_parser('./tests/data/dirty.css')
     self.assertEqual(mimetype, 'text/css')
     meta = p.get_meta()
     self.assertEqual(meta['author'], 'jvoisin')
     self.assertEqual(meta['version'], '1.0')
     self.assertEqual(meta['harmful data'], 'underline is cool')
Ejemplo n.º 6
0
    def test_tar(self):
        with tarfile.TarFile.open('./tests/data/clean.tar', 'w') as zout:
            zout.add('./tests/data/dirty.flac')
            zout.add('./tests/data/dirty.docx')
            zout.add('./tests/data/dirty.jpg')
            zout.add('./tests/data/embedded_corrupted.docx')
            tarinfo = tarfile.TarInfo(name='./tests/data/dirty.png')
            tarinfo.mtime = time.time()
            tarinfo.uid = 1337
            tarinfo.gid = 1338
            tarinfo.size = os.stat('./tests/data/dirty.png').st_size
            with open('./tests/data/dirty.png', 'rb') as f:
                zout.addfile(tarinfo, f)
        p, mimetype = parser_factory.get_parser('./tests/data/clean.tar')
        self.assertEqual(mimetype, 'application/x-tar')
        meta = p.get_meta()
        self.assertEqual(meta['./tests/data/dirty.flac']['comments'],
                         'Thank you for using MAT !')
        self.assertEqual(
            meta['./tests/data/dirty.docx']['word/media/image1.png']
            ['Comment'], 'This is a comment, be careful!')
        self.assertFalse(p.remove_all())
        os.remove('./tests/data/clean.tar')

        shutil.copy('./tests/data/dirty.png', './tests/data/clean.tar')
        with self.assertRaises(ValueError):
            archive.TarParser('./tests/data/clean.tar')
        os.remove('./tests/data/clean.tar')
Ejemplo n.º 7
0
    def test_zip(self):
        with zipfile.ZipFile('./tests/data/dirty.zip', 'w') as zout:
            zout.write('./tests/data/dirty.flac',
                       compress_type=zipfile.ZIP_STORED)
            zout.write('./tests/data/dirty.docx',
                       compress_type=zipfile.ZIP_DEFLATED)
            zout.write('./tests/data/dirty.jpg',
                       compress_type=zipfile.ZIP_BZIP2)
            zout.write('./tests/data/dirty.txt',
                       compress_type=zipfile.ZIP_LZMA)
        p, mimetype = parser_factory.get_parser('./tests/data/dirty.zip')
        self.assertEqual(mimetype, 'application/zip')
        meta = p.get_meta()
        self.assertEqual(meta['tests/data/dirty.flac']['comments'],
                         'Thank you for using MAT !')
        self.assertEqual(
            meta['tests/data/dirty.docx']['word/media/image1.png']['Comment'],
            'This is a comment, be careful!')

        with zipfile.ZipFile('./tests/data/dirty.zip') as zipin:
            members = {
                'tests/data/dirty.flac': zipfile.ZIP_STORED,
                'tests/data/dirty.docx': zipfile.ZIP_DEFLATED,
                'tests/data/dirty.jpg': zipfile.ZIP_BZIP2,
                'tests/data/dirty.txt': zipfile.ZIP_LZMA,
            }
            for k, v in members.items():
                self.assertEqual(zipin.getinfo(k).compress_type, v)

        os.remove('./tests/data/dirty.zip')
Ejemplo n.º 8
0
    def test_tar(self):
        with tarfile.TarFile.open('./tests/data/clean.tar', 'w') as zout:
            zout.add('./tests/data/dirty.flac')
            zout.add('./tests/data/dirty.docx')
            zout.add('./tests/data/dirty.jpg')
            zout.add('./tests/data/embedded_corrupted.docx')
            tarinfo = tarfile.TarInfo(name='./tests/data/dirty.png')
            tarinfo.mtime = time.time()
            tarinfo.uid = 1337
            tarinfo.gid = 1338
            tarinfo.size = os.stat('./tests/data/dirty.png').st_size
            with open('./tests/data/dirty.png', 'rb') as f:
                zout.addfile(tarinfo, f)
        p, mimetype = parser_factory.get_parser('./tests/data/clean.tar')
        self.assertEqual(mimetype, 'application/x-tar')
        with self.assertRaises(ValueError):
            p.get_meta()
        with self.assertRaises(ValueError):
            self.assertFalse(p.remove_all())
        os.remove('./tests/data/clean.tar')

        shutil.copy('./tests/data/dirty.png', './tests/data/clean.tar')
        with self.assertRaises(ValueError):
            archive.TarParser('./tests/data/clean.tar')
        os.remove('./tests/data/clean.tar')
Ejemplo n.º 9
0
 def test_docx(self):
     shutil.copy('./tests/data/embedded_corrupted.docx',
                 './tests/data/clean.docx')
     parser, _ = parser_factory.get_parser('./tests/data/clean.docx')
     self.assertFalse(parser.remove_all())
     self.assertIsNotNone(parser.get_meta())
     os.remove('./tests/data/clean.docx')
Ejemplo n.º 10
0
def _remove_metadata(fpath) -> Tuple[bool, Optional[str]]:
    """ This is a simple wrapper around libmat2, because it's
    easier and cleaner this way.
    """
    parser, mtype = parser_factory.get_parser(fpath)
    if parser is None:
        return False, mtype
    return parser.remove_all(), mtype
Ejemplo n.º 11
0
 def test_brokensymlink(self):
     shutil.copy('./tests/test_libmat2.py', './tests/clean.py')
     os.symlink('./tests/clean.py', './tests/SYMLINK')
     os.remove('./tests/clean.py')
     parser, mimetype = parser_factory.get_parser('./tests/SYMLINK')
     self.assertEqual(mimetype, None)
     self.assertEqual(parser, None)
     os.unlink('./tests/SYMLINK')
Ejemplo n.º 12
0
 def test_tarfile_double_extension_handling(self):
     """ Test that our module auto-detection is handling sub-sub-classes """
     with tarfile.TarFile.open('./tests/data/dirty.tar.bz2',
                               'w:bz2') as zout:
         zout.add('./tests/data/dirty.jpg')
     parser, mimetype = parser_factory.get_parser(
         './tests/data/dirty.tar.bz2')
     self.assertEqual(mimetype, 'application/x-tar+bz2')
     os.remove('./tests/data/dirty.tar.bz2')
Ejemplo n.º 13
0
 def test_docx(self):
     shutil.copy('./tests/data/embedded_corrupted.docx',
                 './tests/data/clean.docx')
     parser, _ = parser_factory.get_parser('./tests/data/clean.docx')
     with self.assertRaises(ValueError):
         parser.remove_all()
     with self.assertRaises(ValueError):
         self.assertIsNotNone(parser.get_meta())
     os.remove('./tests/data/clean.docx')
Ejemplo n.º 14
0
    def test_epub(self):
        with zipfile.ZipFile('./tests/data/clean.epub', 'w') as zout:
            zout.write('./tests/data/dirty.jpg', 'OEBPS/content.opf')
        p, mimetype = parser_factory.get_parser('./tests/data/clean.epub')
        self.assertEqual(mimetype, 'application/epub+zip')
        meta = p.get_meta()
        self.assertEqual(meta['OEBPS/content.opf']['OEBPS/content.opf'],
                         'harmful content')

        self.assertFalse(p.remove_all())
        os.remove('./tests/data/clean.epub')
Ejemplo n.º 15
0
def TestOneInput(data):
    fdp = atheris.FuzzedDataProvider(data)
    extension = fdp.PickValueInList(extensions)
    data = fdp.ConsumeBytes(sys.maxsize)

    fname = '/tmp/mat2_fuzz' + extension

    with open(fname, 'wb') as f:
        f.write(data)
    try:
        p, _ = parser_factory.get_parser(fname)
        if p:
            p.sandbox = False
            p.get_meta()
            p.remove_all()
            p, _ = parser_factory.get_parser(fname)
            p.get_meta()
    except ValueError:
        pass
    os.remove(fname)
Ejemplo n.º 16
0
 def test_zip(self):
     with zipfile.ZipFile('./tests/data/clean.zip', 'w') as zout:
         zout.write('./tests/data/dirty.flac')
         zout.write('./tests/data/dirty.docx')
         zout.write('./tests/data/dirty.jpg')
         zout.write('./tests/data/embedded_corrupted.docx')
     p, mimetype = parser_factory.get_parser('./tests/data/clean.zip')
     self.assertEqual(mimetype, 'application/zip')
     with self.assertRaises(ValueError):
         p.get_meta()
     with self.assertRaises(ValueError):
         self.assertFalse(p.remove_all())
     os.remove('./tests/data/clean.zip')
Ejemplo n.º 17
0
 def test_zip(self):
     with zipfile.ZipFile('./tests/data/clean.zip', 'w') as zout:
         zout.write('./tests/data/dirty.flac')
         zout.write('./tests/data/dirty.docx')
         zout.write('./tests/data/dirty.jpg')
         zout.write('./tests/data/embedded_corrupted.docx')
     p, mimetype = parser_factory.get_parser('./tests/data/clean.zip')
     self.assertEqual(mimetype, 'application/zip')
     meta = p.get_meta()
     self.assertEqual(meta['tests/data/dirty.flac']['comments'], 'Thank you for using MAT !')
     self.assertEqual(meta['tests/data/dirty.docx']['word/media/image1.png']['Comment'], 'This is a comment, be careful!')
     self.assertFalse(p.remove_all())
     os.remove('./tests/data/clean.zip')
Ejemplo n.º 18
0
    def __check_deep_meta(self, p):
        tempdir = tempfile.mkdtemp()
        zipin = zipfile.ZipFile(p.filename)
        zipin.extractall(tempdir)

        for subdir, dirs, files in os.walk(tempdir):
            for f in files:
                complete_path = os.path.join(subdir, f)
                inside_p, _ = parser_factory.get_parser(complete_path)
                if inside_p is None:
                    continue
                self.assertEqual(inside_p.get_meta(), {})
        shutil.rmtree(tempdir)
Ejemplo n.º 19
0
 def test_tar(self):
     with tarfile.TarFile('./tests/data/dirty.tar', 'w') as tout:
         tout.add('./tests/data/dirty.flac')
         tout.add('./tests/data/dirty.docx')
         tout.add('./tests/data/dirty.jpg')
     p, mimetype = parser_factory.get_parser('./tests/data/dirty.tar')
     self.assertEqual(mimetype, 'application/x-tar')
     meta = p.get_meta()
     self.assertEqual(meta['./tests/data/dirty.flac']['comments'],
                      'Thank you for using MAT !')
     self.assertEqual(
         meta['./tests/data/dirty.docx']['word/media/image1.png']
         ['Comment'], 'This is a comment, be careful!')
     os.remove('./tests/data/dirty.tar')
Ejemplo n.º 20
0
def show_meta(filename: str, sandbox: bool):
    if not __check_file(filename):
        return

    try:
        p, mtype = parser_factory.get_parser(filename)  # type: ignore
    except ValueError as e:
        print("[-] something went wrong when processing %s: %s" %
              (filename, e))
        return
    if p is None:
        print("[-] %s's format (%s) is not supported" % (filename, mtype))
        return
    p.sandbox = sandbox
    __print_meta(filename, p.get_meta())
Ejemplo n.º 21
0
 def test_epub(self):
     p, mimetype = parser_factory.get_parser('./tests/data/dirty.epub')
     self.assertEqual(mimetype, 'application/epub+zip')
     meta = p.get_meta()
     self.assertEqual(meta['OEBPS/content.opf']['dc:creator'],
                      'Dorothy L. Sayers')
     self.assertEqual(
         meta['OEBPS/toc.ncx']['dtb:generator'],
         'Ebookmaker 0.4.0a5 by Marcello Perathoner <*****@*****.**>'
     )
     self.assertEqual(
         meta[
             'OEBPS/@public@vhost@g@gutenberg@html@files@58820@58820-h@[email protected]']
         ['CreatorTool'], 'Adobe Photoshop CS5 Macintosh')
     self.assertEqual(
         meta[
             'OEBPS/@public@vhost@g@gutenberg@html@files@58820@[email protected]']
         ['generator'],
         'Ebookmaker 0.4.0a5 by Marcello Perathoner <*****@*****.**>'
     )
Ejemplo n.º 22
0
    def test_onlymember_tar(self):
        with tarfile.open('./tests/data/clean.tar', 'w') as zout:
            zout.add('./tests/data/dirty.png')
            tarinfo = tarfile.TarInfo('./tests/data/dirty.jpg')
            tarinfo.mtime = time.time()
            tarinfo.uid = 1337
            tarinfo.gid = 0
            tarinfo.mode = 0o000
            tarinfo.size = os.stat('./tests/data/dirty.jpg').st_size
            with open('./tests/data/dirty.jpg', 'rb') as f:
                zout.addfile(tarinfo=tarinfo, fileobj=f)
        p, mimetype = parser_factory.get_parser('./tests/data/clean.tar')
        self.assertEqual(mimetype, 'application/x-tar')
        meta = p.get_meta()
        self.assertEqual(meta['./tests/data/dirty.jpg']['uid'], '1337')
        self.assertTrue(p.remove_all())

        p = archive.TarParser('./tests/data/clean.cleaned.tar')
        self.assertEqual(p.get_meta(), {})
        os.remove('./tests/data/clean.tar')
        os.remove('./tests/data/clean.cleaned.tar')
Ejemplo n.º 23
0
    def test_tarbz2(self):
        with tarfile.TarFile.open('./tests/data/dirty.tar.bz2',
                                  'w:bz2') as zout:
            zout.add('./tests/data/dirty.flac')
            zout.add('./tests/data/dirty.docx')
            zout.add('./tests/data/dirty.jpg')
        p = archive.TarParser('./tests/data/dirty.tar.bz2')
        meta = p.get_meta()
        self.assertEqual(
            meta['./tests/data/dirty.docx']['word/media/image1.png']
            ['Comment'], 'This is a comment, be careful!')

        ret = p.remove_all()
        self.assertTrue(ret)

        p = archive.TarParser('./tests/data/dirty.cleaned.tar.bz2')
        self.assertEqual(p.get_meta(), {})
        self.assertTrue(p.remove_all())

        tmp_dir = tempfile.mkdtemp()
        with tarfile.open('./tests/data/dirty.cleaned.tar.bz2') as zout:
            zout.extractall(path=tmp_dir)
            zout.close()

        number_of_files = 0
        for root, _, fnames in os.walk(tmp_dir):
            for f in fnames:
                complete_path = os.path.join(root, f)
                p, _ = parser_factory.get_parser(complete_path)
                self.assertIsNotNone(p)
                self.assertEqual(p.get_meta(), {})
                number_of_files += 1
        self.assertEqual(number_of_files, 3)

        os.remove('./tests/data/dirty.tar.bz2')
        os.remove('./tests/data/dirty.cleaned.tar.bz2')
        os.remove('./tests/data/dirty.cleaned.cleaned.tar.bz2')
Ejemplo n.º 24
0
 def test_subsubcalss(self):
     """ Test that our module auto-detection is handling sub-sub-classes """
     parser, mimetype = parser_factory.get_parser('./tests/data/dirty.mp3')
     self.assertEqual(mimetype, 'audio/mpeg')
     self.assertEqual(parser.__class__, audio.MP3Parser)
Ejemplo n.º 25
0
 def test_gif(self):
     p, mimetype = parser_factory.get_parser('./tests/data/dirty.gif')
     self.assertEqual(mimetype, 'image/gif')
     meta = p.get_meta()
     self.assertEqual(meta['Comment'], 'this is a test comment')
Ejemplo n.º 26
0
 def test_wmv(self):
     p, mimetype = parser_factory.get_parser('./tests/data/dirty.wmv')
     self.assertEqual(mimetype, 'video/x-ms-wmv')
     meta = p.get_meta()
     self.assertEqual(meta['EncodingSettings'], 'Lavf52.103.0')
Ejemplo n.º 27
0
 def test_txt(self):
     p, mimetype = parser_factory.get_parser('./tests/data/dirty.txt')
     self.assertEqual(mimetype, 'text/plain')
     meta = p.get_meta()
     self.assertEqual(meta, {})
Ejemplo n.º 28
0
 def test_inexistingfile(self):
     parser, mimetype = parser_factory.get_parser(
         './tests/NONEXISTING_FILE')
     self.assertEqual(mimetype, None)
     self.assertEqual(parser, None)
Ejemplo n.º 29
0
 def test_pdf(self):
     shutil.copy('./tests/test_libmat2.py', './tests/data/clean.py')
     parser, mimetype = parser_factory.get_parser('./tests/data/clean.py')
     self.assertEqual(mimetype, 'text/x-python')
     self.assertEqual(parser, None)
     os.remove('./tests/data/clean.py')
Ejemplo n.º 30
0
 def test_chardevice(self):
     parser, mimetype = parser_factory.get_parser('/dev/zero')
     self.assertEqual(mimetype, None)
     self.assertEqual(parser, None)