Esempio n. 1
0
 def test_policy_unknown(self):
     shutil.copy('./tests/data/embedded.docx', './tests/data/clean.docx')
     p = office.MSOfficeParser('./tests/data/clean.docx')
     with self.assertRaises(ValueError):
         p.unknown_member_policy = UnknownMemberPolicy(
             'unknown_policy_name_totally_invalid')
     os.remove('./tests/data/clean.docx')
Esempio n. 2
0
    def test_office(self):
        shutil.copy('./tests/data/office_revision_session_ids.docx',
                    './tests/data/clean.docx')
        p = office.MSOfficeParser('./tests/data/clean.docx')

        meta = p.get_meta()
        self.assertIsNotNone(meta)

        how_many_rsid = False
        with zipfile.ZipFile('./tests/data/clean.docx') as zin:
            for item in zin.infolist():
                if not item.filename.endswith('.xml'):
                    continue
                num = zin.read(item).decode('utf-8').lower().count('w:rsid')
                how_many_rsid += num
        self.assertEqual(how_many_rsid, 11)

        ret = p.remove_all()
        self.assertTrue(ret)

        with zipfile.ZipFile('./tests/data/clean.cleaned.docx') as zin:
            for item in zin.infolist():
                if not item.filename.endswith('.xml'):
                    continue
                num = zin.read(item).decode('utf-8').lower().count('w:rsid')
                self.assertEqual(num, 0)

        os.remove('./tests/data/clean.docx')
        os.remove('./tests/data/clean.cleaned.docx')
Esempio n. 3
0
 def test_office_incomplete(self):
     shutil.copy('./tests/data/malformed_content_types.docx',
                 './tests/data/clean.docx')
     p = office.MSOfficeParser('./tests/data/clean.docx')
     self.assertIsNotNone(p)
     self.assertFalse(p.remove_all())
     os.remove('./tests/data/clean.docx')
Esempio n. 4
0
 def test_policy_keep(self):
     shutil.copy('./tests/data/embedded.docx', './tests/data/clean.docx')
     p = office.MSOfficeParser('./tests/data/clean.docx')
     p.unknown_member_policy = UnknownMemberPolicy.KEEP
     self.assertTrue(p.remove_all())
     os.remove('./tests/data/clean.docx')
     os.remove('./tests/data/clean.cleaned.docx')
Esempio n. 5
0
 def test_policy_keep(self):
     shutil.copy('./tests/data/embedded.docx', self.target)
     p = office.MSOfficeParser(self.target)
     p.unknown_member_policy = UnknownMemberPolicy.KEEP
     self.assertTrue(p.remove_all())
     os.remove(p.filename)
     os.remove(p.output_filename)
Esempio n. 6
0
    def test_office(self):
        shutil.copy('./tests/data/dirty.docx', './tests/data/clean.docx')
        p = office.MSOfficeParser('./tests/data/clean.docx')

        meta = p.get_meta()
        self.assertIsNotNone(meta)

        ret = p.remove_all()
        self.assertTrue(ret)

        p = office.MSOfficeParser('./tests/data/clean.cleaned.docx')
        self.assertEqual(p.get_meta(), {})
        self.assertTrue(p.remove_all())

        os.remove('./tests/data/clean.docx')
        os.remove('./tests/data/clean.cleaned.docx')
        os.remove('./tests/data/clean.cleaned.cleaned.docx')
Esempio n. 7
0
    def test_complex_pptx(self):
        target = './tests/data/clean.pptx'
        shutil.copy('./tests/data/narrated_powerpoint_presentation.pptx',
                    target)
        p = office.MSOfficeParser(target)
        self.assertTrue(p.remove_all())

        os.remove(target)
        os.remove(p.output_filename)
Esempio n. 8
0
    def test_office(self):
        shutil.copy('./tests/data/dirty.docx', './tests/data/clean.docx')
        p = office.MSOfficeParser('./tests/data/clean.docx')

        meta = p.get_meta()
        self.assertIsNotNone(meta)
        self.assertEqual(meta['word/media/image1.png']['Comment'], 'This is a comment, be careful!')

        ret = p.remove_all()
        self.assertTrue(ret)

        p = office.MSOfficeParser('./tests/data/clean.cleaned.docx')
        self.assertEqual(p.get_meta(), {})

        self.__check_zip_meta(p)
        self.__check_deep_meta(p)

        os.remove('./tests/data/clean.docx')
        os.remove('./tests/data/clean.cleaned.docx')
Esempio n. 9
0
 def test_docx(self):
     p = office.MSOfficeParser('./tests/data/dirty.docx')
     meta = p.get_meta()
     self.assertEqual(meta['docProps/core.xml']['cp:lastModifiedBy'],
                      'Julien Voisin')
     self.assertEqual(meta['docProps/core.xml']['dc:creator'],
                      'julien voisin')
     self.assertEqual(
         meta['docProps/app.xml']['Application'],
         'LibreOffice/5.4.5.1$Linux_X86_64 LibreOffice_project/40m0$Build-1'
     )
Esempio n. 10
0
    def test_msoffice(self):
        with zipfile.ZipFile('./tests/data/revision.docx') as zipin:
            c = zipin.open('word/document.xml')
            content = c.read()
            r = b'<w:ins w:id="1" w:author="Unknown Author" w:date="2018-06-28T23:48:00Z">'
            self.assertIn(r, content)

        shutil.copy('./tests/data/revision.docx',
                    './tests/data/revision_clean.docx')
        p = office.MSOfficeParser('./tests/data/revision_clean.docx')
        self.assertTrue(p.remove_all())

        with zipfile.ZipFile(
                './tests/data/revision_clean.cleaned.docx') as zipin:
            c = zipin.open('word/document.xml')
            content = c.read()
            r = b'<w:ins w:id="1" w:author="Unknown Author" w:date="2018-06-28T23:48:00Z">'
            self.assertNotIn(r, content)

        os.remove('./tests/data/revision_clean.docx')
        os.remove('./tests/data/revision_clean.cleaned.docx')
Esempio n. 11
0
 def test_docx_with_py(self):
     shutil.copy('./tests/data/embedded.docx', './tests/data/clean.docx')
     p = office.MSOfficeParser('./tests/data/clean.docx')
     self.assertFalse(p.remove_all())
     os.remove('./tests/data/clean.docx')
Esempio n. 12
0
 def test_office_broken(self):
     shutil.copy('./tests/data/broken_xml_content_types.docx',
                 './tests/data/clean.docx')
     with self.assertRaises(ValueError):
         office.MSOfficeParser('./tests/data/clean.docx')
     os.remove('./tests/data/clean.docx')
Esempio n. 13
0
 def test_docx(self):
     shutil.copy('./tests/data/dirty.png', './tests/data/clean.docx')
     with self.assertRaises(ValueError):
         office.MSOfficeParser('./tests/data/clean.docx')
     os.remove('./tests/data/clean.docx')
Esempio n. 14
0
 def test_office_incomplete(self):
     shutil.copy('./tests/data/malformed_content_types.docx',
                 './tests/data/clean.docx')
     with self.assertRaises(ValueError):
         office.MSOfficeParser('./tests/data/clean.docx')
     os.remove('./tests/data/clean.docx')