Example #1
0
    def import_file(self,
                    file_path,
                    options={'scale_font_size': True},
                    **kwargs):
        # TODO: document this asap

        self.delegate.notifier = self.notifier
        self.broken_images = []
        self.converted_images = []

        book = self.book
        process_mode = kwargs.get('process_mode', 'overwrite')

        try:
            self.dfile = ooxml.read_from_file(file_path)
            if self.is_chapter_mode:
                chapter_content = serialize.serialize(self.dfile.document,
                                                      self._serialize_options)
                self._import_single_chapter(self.chapter, chapter_content,
                                            process_mode)
            else:
                chapters = importer.get_chapters(
                    self.dfile.document,
                    options=options,
                    serialize_options=self._serialize_options)
                self._import_chapters(book, chapters)

            # save attachments and tyles
            self._import_attachments(book, self.dfile.document)
            self._import_styles(book)
            self.dfile.close()

            self._check_for_elements()

            # trigger signal depending on the import mode
            # TODO: allow attaching user as sender on `book_imported` signal
            if self.is_chapter_mode:
                chapter_imported.send(sender=(self.user or self),
                                      chapter=self.chapter)
            else:
                book_imported.send(sender=self, book=book)

        except zipfile.BadZipfile:
            notif_msg = _(
                "The file could not be imported because it was not saved in the .docx format. Try to open the file in Word and save it as a .docx."
            )  # noqa
            self.notifier.error(notif_msg)
        except Exception as err:
            err_msg = _(
                "The docx file you uploaded contains errors and cannot be converted. Please contact customer support."
            )  # noqa
            self.notifier.error(err_msg)
            logger.exception("Error trying to import docx file. Msg: %s" % err)
Example #2
0
import six
import logging

import ooxml
from ooxml import parse, serialize, importer

logging.basicConfig(filename='ooxml.log', level=logging.INFO)

file_name = '../files/02_split.docx'

dfile = ooxml.read_from_file(file_name)

chapters = importer.get_chapters(dfile.document)

for title, content in chapters:
    six.print_('====================================================================')
    six.print_(title)
    six.print_('====================================================================')
    six.print_(content)

Example #3
0
import six
import logging

import ooxml
from ooxml import parse, serialize, importer

logging.basicConfig(filename='ooxml.log', level=logging.INFO)

file_name = '../files/02_split.docx'

dfile = ooxml.read_from_file(file_name)

chapters = importer.get_chapters(dfile.document)

for title, content in chapters:
    six.print_(
        '====================================================================')
    six.print_(title)
    six.print_(
        '====================================================================')
    six.print_(content)
Example #4
0
    def import_file(self, file_path, book, options=None):
        self.delegate.notifier = self.notifier
        self.broken_images = []
        self.converted_images = []

        def serialize_empty(ctx, document, elem, root):
            return root

        def serialize_endnote(ctx, document, el, root):
            # <sup class="endnote" data-id="1454855960556">1</sup>

            if el.rid not in self.endnotes:
                data_id = str(uuid.uuid1()).replace('-', '')
                self.endnotes[el.rid] = data_id
            else:
                data_id = self.endnotes[el.rid]

            note = lxml.etree.SubElement(
                root, 'sup', {'class': 'endnote', 'data-id': data_id})
            note.text = '1'

            return root

        def serialize_footnote(ctx, document, el, root):
            # <sup class="endnote" data-id="1454855960556">1</sup>

            if el.rid not in self.footnotes:
                data_id = str(uuid.uuid1()).replace('-', '')
                self.footnotes[el.rid] = data_id
            else:
                data_id = self.footnotes[el.rid]

            note = lxml.etree.SubElement(
                root, 'sup', {'class': 'endnote', 'data-id': data_id})
            note.text = '1'

            return root

        if not options:
            options = {'scale_font_size': True}

        try:
            self.dfile = ooxml.read_from_file(file_path)

            serialize_options = {
                'embed_styles': True,
                'embed_fontsize': True,
                # 'empty_paragraph_as_nbsp': True,
                'serializers': {
                    doc.Math: serialize_empty,
                    doc.Footnote: serialize_footnote,
                    doc.Endnote: serialize_endnote
                }
            }

            chapters = importer.get_chapters(
                self.dfile.document, options=options,
                serialize_options=serialize_options)

            self._import_attachments(book, self.dfile.document)
            self._import_chapters(book, chapters)

            # get the styles
            self._import_styles(book)
            self.dfile.close()

            self._check_for_elements()
        except zipfile.BadZipfile:
            notif_msg = _("The file could not be imported because it was not saved in the .docx format. Try to open the file in Word and save it as a .docx.")  # noqa
            self.notifier.error(notif_msg)
        except Exception as err:
            err_msg = _("The docx file you uploaded contains errors and cannot be converted. Please contact customer support.")  # noqa
            self.notifier.error(err_msg)
            logger.exception("Error trying to import docx file. Msg: %s" % err)
Example #5
0
    def import_file(self, file_path, book, options=None):
        self.delegate.notifier = self.notifier
        self.broken_images = []
        self.converted_images = []

        def serialize_empty(ctx, document, elem, root):
            return root

        def serialize_endnote(ctx, document, el, root):
            # <sup class="endnote" data-id="1454855960556">1</sup>

            if el.rid not in self.endnotes:
                data_id = str(uuid.uuid1()).replace('-', '')
                self.endnotes[el.rid] = data_id
            else:
                data_id = self.endnotes[el.rid]

            note = lxml.etree.SubElement(root, 'sup', {
                'class': 'endnote',
                'data-id': data_id
            })
            note.text = '1'

            return root

        def serialize_footnote(ctx, document, el, root):
            # <sup class="endnote" data-id="1454855960556">1</sup>

            if el.rid not in self.footnotes:
                data_id = str(uuid.uuid1()).replace('-', '')
                self.footnotes[el.rid] = data_id
            else:
                data_id = self.footnotes[el.rid]

            note = lxml.etree.SubElement(root, 'sup', {
                'class': 'endnote',
                'data-id': data_id
            })
            note.text = '1'

            return root

        if not options:
            options = {'scale_font_size': True}

        try:
            self.dfile = ooxml.read_from_file(file_path)

            # TODO: move this into a more customisable place.
            serialize_options = {
                'header': docutils.DocHeaderContext,
                'embed_styles': True,
                'embed_fontsize': True,
                # 'empty_paragraph_as_nbsp': True,
                'serializers': {
                    doc.Math: serialize_empty,
                    doc.Footnote: serialize_footnote,
                    doc.Endnote: serialize_endnote
                },
                'hooks': {
                    'p': [docutils.hook_p],
                    'h': [docutils.check_h_tags_hook],
                    'table': [docutils.hook_infobox_table]
                }
            }

            chapters = importer.get_chapters(
                self.dfile.document,
                options=options,
                serialize_options=serialize_options)

            self._import_attachments(book, self.dfile.document)
            self._import_chapters(book, chapters)

            # get the styles
            self._import_styles(book)
            self.dfile.close()

            self._check_for_elements()
        except zipfile.BadZipfile:
            notif_msg = _(
                "The file could not be imported because it was not saved in the .docx format. Try to open the file in Word and save it as a .docx."
            )  # noqa
            self.notifier.error(notif_msg)
        except Exception as err:
            err_msg = _(
                "The docx file you uploaded contains errors and cannot be converted. Please contact customer support."
            )  # noqa
            self.notifier.error(err_msg)
            logger.exception("Error trying to import docx file. Msg: %s" % err)