Beispiel #1
    def test_lt(self):
        # Parse original file
        content = self.get_content('with_lt.docx')
        template, stringset = self.handler.parse(content)

        # Make sure extracted data is OK
        self.assertEqual(len(stringset), 1)
        openstring = stringset[0]
        self.assertEqual(openstring.order, 0)
        self.assertEqual(openstring.string, u'This is a < lessthan')
        self.assertEqual(openstring.string, openstring.key)

        # Compile with altered translation
        translation = U'THIS IS A < LESSTHAN'
        stringset = [OpenString(openstring.key, translation, order=0)]
        content = self.handler.compile(template, stringset)

        # Make sure compiled file has altered data
        docx = DocxFile(content)
        self.assertFalse("This is a" in docx.get_document())
        self.assertFalse("lessthan" in docx.get_document())
        self.assertTrue("THIS IS A" in docx.get_document())
        self.assertTrue("LESSTHAN" in docx.get_document())

        # Parse compiled file
        template, stringset = self.handler.parse(content)

        # Make sure compiled file has the correct translation
        self.assertEqual(len(stringset), 1)
        openstring = stringset[0]
        self.assertEqual(openstring.order, 0)
        self.assertEqual(openstring.string, translation)
        self.assertEqual(openstring.string, openstring.key)
Beispiel #2
    def test_rtl_missing_ppr(self):
        path = '{}/hello_world_no_ppr.docx'.format(self.TESTFILE_BASE)
        with open(path, 'rb') as f:
            content =
        handler = DocxHandler()
        template, stringset = handler.parse(content)
        openstring = stringset[0]

        # Compile with altered translation
        translation = u'<tx>Καλημέρα κόσμε </tx><tx href="">αυτός είναι ένας κρίκος</tx>'  # noqa
        stringset = [OpenString(openstring.key, translation, order=1)]

        content = handler.compile(template, stringset, is_rtl=True)
        docx = DocxFile(content)
        soup = BeautifulSoup(docx.get_document(), 'xml')
        self.assertEqual(len(stringset), 1)
        self.assertEqual(len(soup.find_all("w:bidi")), 1)
        for pPr in soup.find_all("w:pPr"):
            self.assertEqual(len(pPr.findChildren("w:bidi")), 1)
            for bidi in pPr.findChildren("w:bidi"):
                self.assertEqual(bidi["w:val"], "1")

        self.assertTrue(len(soup.find_all("w:rtl")), 1)
        for rPr in soup.find_all("w:rPr"):
            self.assertEqual(len(rPr.findChildren("w:rtl")), 1)
            for rtl in rPr.findChildren("w:rtl"):
                self.assertEqual(rtl["w:val"], "1")
Beispiel #3
    def test_ampersand(self):
        content = self.get_content('with_ampersand.docx')
        template, stringset = self.handler.parse(content)

        # Make sure extracted data is OK
        self.assertEqual(len(stringset), 1)
        openstring = stringset[0]
        self.assertEqual(openstring.order, 0)
        self.assertEqual(openstring.string, u'This is an & ampersand')
        self.assertEqual(openstring.string, openstring.key)

        # Compile with altered translation
        translation = U'THIS IS AN & AMPERSAND'
        stringset = [OpenString(openstring.key, translation, order=0)]
        content = self.handler.compile(template, stringset)

        # Make sure compiled file has altered data
        docx = DocxFile(content)
        self.assertFalse("This is an" in docx.get_document())
        self.assertFalse("ampersand" in docx.get_document())
        self.assertTrue("THIS IS AN" in docx.get_document())
        self.assertTrue("AMPERSAND" in docx.get_document())

        # Parse compiled file
        template, stringset = self.handler.parse(content)

        # Make sure compiled file has the correct translation
        self.assertEqual(len(stringset), 1)
        openstring = stringset[0]
        self.assertEqual(openstring.order, 0)
        self.assertEqual(openstring.string, translation)
        self.assertEqual(openstring.string, openstring.key)
Beispiel #4
    def test_simple_file(self):
        path = '{}/hello_world.docx'.format(self.TESTFILE_BASE)
        with open(path, 'rb') as f:
            content =

        handler = DocxHandler()
        template, stringset = handler.parse(content)

        self.assertEqual(len(stringset), 1)

        openstring = stringset[0]
        self.assertEqual(openstring.order, 0)
            u'<tx>Hello world </tx><tx href="">this is a link</tx>'  # noqa
        self.assertEqual(openstring.string, openstring.key)

        translation = u'<tx>Καλημέρα κόσμε </tx><tx href="">αυτός είναι ένας κρίκος</tx>'  # noqa
        stringset = [
            OpenString(openstring.key, translation, order=1)

        content = handler.compile(template, stringset)
        template, stringset = handler.parse(content)

        self.assertEqual(len(stringset), 1)

        docx = DocxFile(content)

        for text in [u'Hello world ', u'this is a link']:
            self.assertFalse(text in docx.get_document())

        for url in [u'']:
            self.assertFalse(url in docx.get_document_rels())

        for text in [u'Καλημέρα κόσμε ', u'αυτός είναι ένας κρίκος']:
            self.assertTrue(text in docx.get_document())

        for url in [u'']:
            self.assertTrue(url in docx.get_document_rels())

        docx.set_document(u'Modified Document')
        docx.set_document_rels(u'Modified Document Rels')

        openstring = stringset[0]
        self.assertEqual(openstring.order, 0)
        self.assertEqual(openstring.string, translation)
        self.assertEqual(openstring.string, openstring.key)
Beispiel #5
    def test_broken_file(self):
        path = '{}/missing_wr_parent.docx'.format(self.TESTFILE_BASE)
        with open(path, 'rb') as f:
            content =

        docx = DocxFile(content)

        handler = DocxHandler()
        template, stringset = handler.parse(content)

        self.assertEqual(len(stringset), 1)

        openstring = stringset[0]
        self.assertEqual(openstring.order, 0)
            u'Foo bar baz'
        self.assertEqual(openstring.string, openstring.key)

        translation = u'Φου βαρ βαζ'
        stringset = [
            OpenString(openstring.key, translation, order=1)

        content = handler.compile(template, stringset)

        handler = DocxHandler()
        template, stringset = handler.parse(content)

        self.assertEqual(len(stringset), 1)

        openstring = stringset[0]
        self.assertEqual(openstring.order, 0)
            u'Φου βαρ βαζ'
Beispiel #6
    def test_hyperlink_reorder(self):
        content = self.get_content('special_cases_2.docx')

        template, source_stringset = self.handler.parse(content)
        content = self.handler.compile(template, source_stringset)

        docx = DocxFile(template)
        soup = BeautifulSoup(docx.get_document(), 'xml')
        paragraph = soup.find_all('w:p')[0]
        text_elements_bf_reorder = paragraph.find_all('w:t')
        # reorder href rPr is swapped
        translated_strings = [
                u'ένα δύο ',
                u'<tx>τρία </tx>',
                u'<tx href=""> τέσσερα </tx>',

        translated_stringset = []
        order = 1
        for extracted, translation in zip(source_stringset,
                OpenString(extracted.key, u''.join(translation), order=order))
            order += 1
        content = self.handler.compile(template, translated_stringset)
        _, stringset = self.handler.parse(content)

        docx = DocxFile(content)
        soup = BeautifulSoup(docx.get_document(), 'xml')
        paragraph = soup.find_all('w:p')[0]
        text_elements = paragraph.find_all('w:t')

        self.assertEqual(text_elements[1].parent.rPr.color, None)
        self.assertEqual(text_elements[1].parent.rPr.u, None)
Beispiel #7
    def test_docx_file(self):
        content = self.get_content('hello_world.docx')

        docx = DocxFile(content)

        for text in [u'Hello world ', u'this is a link']:
            self.assertTrue(text in docx.get_document())

        for url in [u'']:
            self.assertTrue(url in docx.get_document_rels())

        docx.set_document(u'Modified Document')
        docx.set_document_rels(u'Modified Document Rels')

        content = docx.compress()

        docx = DocxFile(content)
        self.assertEqual(docx.get_document(), u'Modified Document')
        self.assertEqual(docx.get_document_rels(), u'Modified Document Rels')
Beispiel #8
    def test_two_text_elements_file(self):
        content = self.get_content('two_text_elements.docx')
        template, source_stringset = self.handler.parse(content)

        expected_strings = [
            u'<tx>Hello</tx><tx> world</tx>',
            u'<tx>Goodbye </tx><tx>world</tx>',
            u'<tx>This is a </tx><tx href="">link</tx>',
            u'<tx>This is my picture </tx><tx> (rest of text goes here).</tx>',

        for extracted, expected in zip(source_stringset, expected_strings):
            self.assertEqual(extracted.string, u''.join(expected))

        docx = DocxFile(template)
        expected_docx_source_text = [
            u' world',
            u'Goodbye ',
            u'This is a ',
            u'This is my picture ',
            u' (rest of text goes here).',
        for text in expected_docx_source_text:
            self.assertTrue(text in docx.get_document())

        for url in [u'']:
            self.assertTrue(url in docx.get_document_rels())

        translated_strings = [
            u'<tx>Γεία</tx><tx> κόσμε</tx>',
            u'<tx>Αντίο </tx><tx>κόσμε</tx>',
            u'<tx>Αυτό είναι ένα </tx><tx href="">λίνκ</tx>',  # noqa
            u'<tx>Και αυτή η εικόνα μου </tx><tx> (υπόλοιπο κείμενο).</tx>',

        translated_stringset = []
        order = 1
        for extracted, translation in zip(source_stringset,
                OpenString(extracted.key, u''.join(translation), order=order))
            order += 1

        fixed_stringset = [
            u'<tx>Γεία</tx><tx> κόσμε</tx>',
            u'<tx>Αντίο </tx><tx>κόσμε</tx>',
            u'<tx>Αυτό είναι ένα </tx><tx href="">λίνκ</tx>',  # noqa
            u'<tx>Και αυτή η εικόνα μου </tx><tx> (υπόλοιπο κείμενο).</tx>',

        content = self.handler.compile(template, translated_stringset)
        template, stringset = self.handler.parse(content)

        for extracted, expected in zip(stringset, fixed_stringset):
            self.assertEqual(extracted.string, u''.join(expected))

        docx = DocxFile(template)
        expected_docx_source_text = [
            u' κόσμε',
            u'Αντίο ',
            u'Αυτό είναι ένα ',
            u'Και αυτή η εικόνα μου ',
            u' (υπόλοιπο κείμενο).',
        for text in expected_docx_source_text:
            self.assertTrue(text in docx.get_document())

        for url in [u'']:
            self.assertTrue(url in docx.get_document_rels())
Beispiel #9
    def test_special_cases_file(self):
        content = self.get_content('special_cases.docx')
        template, source_stringset = self.handler.parse(content)

        expected_strings = [
                u'one two ', u'<tx href="">three ',
                u'<tx>four</tx>', u'<tx> five </tx>', u'<tx>six</tx>',
                u'<tx> seven eight</tx></tx>', u'<tx> nine </tx>',
                u'<tx>ten </tx>', u'<tx>eleven</tx>', u' twelve'

        for extracted, expected in zip(source_stringset, expected_strings):
            self.assertEqual(extracted.string, u''.join(expected))

        docx = DocxFile(template)
        expected_docx_source_text = [
            u'one two', u'three ', u'four', u' five ', u'six', u' seven eight',
            u' nine ', u'ten ', u'eleven', u' twelve'
        for text in expected_docx_source_text:
            self.assertTrue(text in docx.get_document())

        for url in [u'']:
            self.assertTrue(url in docx.get_document_rels())

        # missing href is removed
        translated_strings = [
                u'ένα δύο ', u'<tx>τρία </tx>', u'<tx>τέσσερα</tx>',
                u'<tx> πέντε </tx>', u'<tx>έξι</tx>', u'<tx> επτά οχτώ</tx>',
                u'<tx> εννεά </tx>', u'<tx>δέκα </tx>', u'<tx>έντεκα</tx>',
                u' δώδεκα'

        translated_stringset = []
        order = 1
        for extracted, translation in zip(source_stringset,
                OpenString(extracted.key, u''.join(translation), order=order))
            order += 1

        fixed_stringset = [
                u'ένα δύο ', u'<tx>τρία </tx>', u'<tx>τέσσερα</tx>',
                u'<tx> πέντε </tx>', u'<tx>έξι</tx>', u'<tx> επτά οχτώ</tx>',
                u'<tx> εννεά </tx>', u'<tx>δέκα </tx>', u'<tx>έντεκα</tx>',
                u' δώδεκα'

        content = self.handler.compile(template, translated_stringset)
        _, stringset = self.handler.parse(content)

        for extracted, expected in zip(stringset, fixed_stringset):
            self.assertEqual(extracted.string, u''.join(expected))

        # reorder href is added
        translated_strings = [
                u'ένα δύο ', u'<tx>τρία </tx>', u'<tx>τέσσερα</tx>',
                u'<tx> πέντε </tx>', u'<tx>έξι</tx>', u'<tx> επτά οχτώ</tx>',
                u'<tx> εννεά </tx>', u'<tx>δέκα </tx>',
                u'<tx href="">έντεκα</tx>', u' δώδεκα'

        translated_stringset = []
        order = 1
        for extracted, translation in zip(source_stringset,
                OpenString(extracted.key, u''.join(translation), order=order))
            order += 1

        fixed_stringset = [
                u'ένα δύο ', u'<tx>τρία </tx>', u'<tx>τέσσερα</tx>',
                u'<tx> πέντε </tx>', u'<tx>έξι</tx>', u'<tx> επτά οχτώ</tx>',
                u'<tx> εννεά </tx>', u'<tx>δέκα </tx>',
                u'<tx href="">έντεκα</tx>', u' δώδεκα'

        content = self.handler.compile(template, translated_stringset)
        _, stringset = self.handler.parse(content)

        for extracted, expected in zip(stringset, fixed_stringset):
            self.assertEqual(extracted.string, u''.join(expected))

        docx = DocxFile(content)
        for url in [u'']:
            self.assertTrue(url in docx.get_document_rels())

        # missing tags removes elements from docx
        translated_strings = [
                u'ένα δύο ', u'<tx>τρία </tx>', u'<tx>τέσσερα</tx>',
                u'<tx> πέντε </tx>', u'<tx>έξι</tx>', u'<tx> επτά οχτώ</tx>',
                u' εννεά ', u'δέκα ', u'έντεκα', u' δώδεκα'

        translated_stringset = []
        order = 1
        for extracted, translation in zip(source_stringset,
                OpenString(extracted.key, u''.join(translation), order=order))
            order += 1

        fixed_stringset = [
                u'ένα δύο ', u'<tx>τρία </tx>', u'<tx>τέσσερα</tx>',
                u'<tx> πέντε </tx>', u'<tx>έξι</tx>', u'<tx> επτά οχτώ</tx>',
                u' εννεά ', u'δέκα ', u'έντεκα', u' δώδεκα'

        content = self.handler.compile(template, translated_stringset)
        _, stringset = self.handler.parse(content)

        for extracted, expected in zip(stringset, fixed_stringset):
            self.assertEqual(extracted.string, u''.join(expected))

        # More tags merge text into last element
        translated_strings = [
                u'ένα δύο ', u'<tx href="">τρία ',
                u'<tx>τέσσερα</tx>', u'<tx> πέντε </tx>', u'<tx>έξι</tx>',
                u'<tx> επτά οχτώ</tx></tx>', u'<tx> εννεά </tx>',
                u'<tx>δέκα </tx>', u'<tx>έντεκα</tx>', u'<tx> δώδεκα</tx>',
                u'<tx> δεκατρία</tx>', u'<tx> δεκατέσσερα</tx>', u' δεκαπέντε'

        translated_stringset = []
        order = 1
        for extracted, translation in zip(source_stringset,
                OpenString(extracted.key, u''.join(translation), order=order))
            order += 1

        fixed_stringset = [
                u'ένα δύο ', u'<tx href="">τρία ',
                u'<tx>τέσσερα</tx>', u'<tx> πέντε </tx>', u'<tx>έξι</tx>',
                u'<tx> επτά οχτώ</tx></tx>', u'<tx> εννεά </tx>',
                u'<tx>δέκα </tx>', u'<tx>έντεκα</tx>',
                u' δώδεκα δεκατρία δεκατέσσερα δεκαπέντε'

        content = self.handler.compile(template, translated_stringset)
        _, stringset = self.handler.parse(content)

        for extracted, expected in zip(stringset, fixed_stringset):
            self.assertEqual(extracted.string, u''.join(expected))
Beispiel #10
    def test_complex_file(self):
        content = self.get_content('complex.docx')
        template, stringset = self.handler.parse(content)

        expected_strings = [
            u'a Title', u'a Subtitle', u'Heading 1', u'Heading 2',
            u'Heading 3', u'Internal <tx>styled</tx> link',
                u'This '
                u'<tx>complex text</tx>', u'<tx> that</tx>'
                u'<tx> surrounds </tx>'
                u'<tx href="">a '
                u'<tx>external hyperlink</tx>'
                u'<tx> that </tx>'
                u'<tx>includes a</tx>'
                u'<tx> mix</tx></tx>'
                u'<tx> of </tx>'
                u'<tx>styles </tx>'
                u'<tx> it gets</tx>'
                u'<tx> parsed</tx>', u' as expected'
            ], u'Unordered item 1', u'Unordered item 2', u'Unordered item 3',
            u'Ordered item 1', u'Ordered item 2', u'Ordered item 3',
            u'Table 1.1', u'Table 1.2', u'Table 2.1', u'Table 2.2', u'↧↨↩'

        for extracted, expected in zip(stringset, expected_strings):
            self.assertEqual(extracted.string, u''.join(expected))

        docx = DocxFile(template)
        expected_docx_source_text = [
            u'a Title', u'a Subtitle', u'Heading 1', u'Heading 2',
            u'Heading 3', u'Internal ', u'styled', u' link', u'This ',
            u'complex text', u'that', u'surrounds ', u'a ',
            u'external hyperlink', u' that ', u'includes a', u' mix', u' of ',
            u'styles ', u'and', u' it gets', u'parsed', u' as expected',
            u'Unordered item 1', u'Unordered item 2', u'Unordered item 3',
            u'Ordered item 1', u'Ordered item 2', u'Ordered item 3',
            u'Table 1.1', u'Table 1.2', u'Table 2.1', u'Table 2.2', u'↧↨↩'
        for text in expected_docx_source_text:
            self.assertTrue(text in docx.get_document())

        for url in [u'']:
            self.assertTrue(url in docx.get_document_rels())

        translated_strings = [
            u'Τίτλος', u'Υπότιτλος', u'Επικεφαλίδα 1', u'Επικεφαλίδα 2',
            u'Επικεφαλίδα 3', u'Eσωτερικός <tx>με στύλ</tx> σύνδεσμος',
                u'Αυτό '
                u'<tx>σύνθετο κείμενο</tx>', u'<tx> το οποίο</tx>'
                u'<tx> περιβάλλει </tx>'
                u'<tx href="">έναν '
                u'<tx> που </tx>'
                u'<tx>περιέχει ένα</tx>'
                u'<tx> μείγμα</tx></tx>'
                u'<tx> από </tx>'
                u'<tx>στύλ </tx>'
                u'<tx> καταφέρνει να</tx>'
                u'<tx> αναλυθεί</tx>', u' όπως αναμένεται'
            ], u'Μη ταξινομημένο στοιχείο 1', u'Μη ταξινομημένο στοιχείο 2',
            u'Μη ταξινομημένο στοιχείο 3', u'Ταξινομημένο στοιχείο 1',
            u'Ταξινομημένο στοιχείο 2', u'Ταξινομημένο στοιχείο 3',
            u'Πίνακας 1.1', u'Πίνακας 1.2', u'Πίνακας 2.1', u'Πίνακας 2.2',
            u'Ειδικοί χαρακτήρες'

        translated_stringset = []
        order = 1
        for extracted, translation in zip(stringset, translated_strings):
                OpenString(extracted.key, u''.join(translation), order=order))
            order += 1

        content = self.handler.compile(template, translated_stringset)
        template, stringset = self.handler.parse(content)

        docx = DocxFile(content)

        for extracted, expected in zip(stringset, translated_strings):
            self.assertEqual(extracted.string, u''.join(expected))

        for text in expected_docx_source_text:
            self.assertFalse(text in docx.get_document())

        for url in [u'']:
            self.assertFalse(url in docx.get_document_rels())

        translated_text = [
            u'Τίτλος', u'Υπότιτλος', u'Επικεφαλίδα 1', u'Επικεφαλίδα 2',
            u'Επικεφαλίδα 3', u'Eσωτερικός ', u'με στύλ', u' σύνδεσμος',
            u'Αυτό ', u'σύνθετο κείμενο', u'το οποίο', u' περιβάλλει ',
            u'έναν ', u'σύνδεσμο', u' που ', u'περιέχει ένα', u' μείγμα',
            u' από ', u'στύλ ', u'και', u' καταφέρνει να', u'αναλυθεί',
            u' όπως αναμένεται', u'Μη ταξινομημένο στοιχείο 1',
            u'Μη ταξινομημένο στοιχείο 2', u'Μη ταξινομημένο στοιχείο 3',
            u'Ταξινομημένο στοιχείο 1', u'Ταξινομημένο στοιχείο 2',
            u'Ταξινομημένο στοιχείο 3', u'Πίνακας 1.1', u'Πίνακας 1.2',
            u'Πίνακας 2.1', u'Πίνακας 2.2', u'Ειδικοί χαρακτήρες'

        for text in translated_text:
            self.assertTrue(text in docx.get_document())

        for url in [u'']:
            self.assertTrue(url in docx.get_document_rels())
Beispiel #11
    def test_docx_file(self):
        path = '{}/hello_world.docx'.format(self.TESTFILE_BASE)
        with open(path, 'rb') as f:
            content =

        docx = DocxFile(content)

        for text in [u'Hello world ', u'this is a link']:
            self.assertTrue(text in docx.get_document())

        for url in [u'']:
            self.assertTrue(url in docx.get_document_rels())

        docx.set_document(u'Modified Document')
        docx.set_document_rels(u'Modified Document Rels')

        content = docx.compress()

        docx = DocxFile(content)
        self.assertEqual(docx.get_document(), u'Modified Document')
        self.assertEqual(docx.get_document_rels(), u'Modified Document Rels')