def render_to_pdf(self, temp_file): tempname = tempfile.mkdtemp() temp_out_file_html = self.generate_temp_file(tempname, suffix='html') temp_out_file_pdf = self.generate_temp_file(tempname, suffix='pdf') ofile = ooxml.read_from_file(temp_file) html = """<html style="height: 100%"> <head> <meta http-equiv="X-UA-Compatible" content="IE=edge,chrome=1"/> <meta http-equiv="content-type" content="text/html; charset=utf-8"/> </head> <body> """ html += unicode(serialize.serialize(ofile.document), 'utf-8') html += "</body></html>" with codecs.open(temp_out_file_html, 'w', 'utf-8') as f: f.write(html) pdfkit.from_file(temp_out_file_html, temp_out_file_pdf) os.remove(temp_out_file_html) return temp_out_file_pdf
def import_file(self, file_path, options={'scale_font_size': True}, **kwargs): # TODO: document this asap self.delegate.notifier = self.notifier self.broken_images = [] self.converted_images = [] book = self.book process_mode = kwargs.get('process_mode', 'overwrite') try: self.dfile = ooxml.read_from_file(file_path) if self.is_chapter_mode: chapter_content = serialize.serialize(self.dfile.document, self._serialize_options) self._import_single_chapter(self.chapter, chapter_content, process_mode) else: chapters = importer.get_chapters( self.dfile.document, options=options, serialize_options=self._serialize_options) self._import_chapters(book, chapters) # save attachments and tyles self._import_attachments(book, self.dfile.document) self._import_styles(book) self.dfile.close() self._check_for_elements() # trigger signal depending on the import mode # TODO: allow attaching user as sender on `book_imported` signal if self.is_chapter_mode: chapter_imported.send(sender=(self.user or self), chapter=self.chapter) else: book_imported.send(sender=self, book=book) except zipfile.BadZipfile: notif_msg = _( "The file could not be imported because it was not saved in the .docx format. Try to open the file in Word and save it as a .docx." ) # noqa self.notifier.error(notif_msg) except Exception as err: err_msg = _( "The docx file you uploaded contains errors and cannot be converted. Please contact customer support." ) # noqa self.notifier.error(err_msg) logger.exception("Error trying to import docx file. Msg: %s" % err)
import six import logging import ooxml from ooxml import parse, serialize, importer logging.basicConfig(filename='ooxml.log', level=logging.INFO) file_name = '../files/02_split.docx' dfile = ooxml.read_from_file(file_name) chapters = importer.get_chapters(dfile.document) for title, content in chapters: six.print_('====================================================================') six.print_(title) six.print_('====================================================================') six.print_(content)
import six import logging import ooxml from ooxml import parse, serialize, importer logging.basicConfig(filename='ooxml.log', level=logging.INFO) file_name = '../files/02_split.docx' dfile = ooxml.read_from_file(file_name) chapters = importer.get_chapters(dfile.document) for title, content in chapters: six.print_( '====================================================================') six.print_(title) six.print_( '====================================================================') six.print_(content)
def import_file(self, file_path, book, options=None): self.delegate.notifier = self.notifier self.broken_images = [] self.converted_images = [] def serialize_empty(ctx, document, elem, root): return root def serialize_endnote(ctx, document, el, root): # <sup class="endnote" data-id="1454855960556">1</sup> if el.rid not in self.endnotes: data_id = str(uuid.uuid1()).replace('-', '') self.endnotes[el.rid] = data_id else: data_id = self.endnotes[el.rid] note = lxml.etree.SubElement( root, 'sup', {'class': 'endnote', 'data-id': data_id}) note.text = '1' return root def serialize_footnote(ctx, document, el, root): # <sup class="endnote" data-id="1454855960556">1</sup> if el.rid not in self.footnotes: data_id = str(uuid.uuid1()).replace('-', '') self.footnotes[el.rid] = data_id else: data_id = self.footnotes[el.rid] note = lxml.etree.SubElement( root, 'sup', {'class': 'endnote', 'data-id': data_id}) note.text = '1' return root if not options: options = {'scale_font_size': True} try: self.dfile = ooxml.read_from_file(file_path) serialize_options = { 'embed_styles': True, 'embed_fontsize': True, # 'empty_paragraph_as_nbsp': True, 'serializers': { doc.Math: serialize_empty, doc.Footnote: serialize_footnote, doc.Endnote: serialize_endnote } } chapters = importer.get_chapters( self.dfile.document, options=options, serialize_options=serialize_options) self._import_attachments(book, self.dfile.document) self._import_chapters(book, chapters) # get the styles self._import_styles(book) self.dfile.close() self._check_for_elements() except zipfile.BadZipfile: notif_msg = _("The file could not be imported because it was not saved in the .docx format. Try to open the file in Word and save it as a .docx.") # noqa self.notifier.error(notif_msg) except Exception as err: err_msg = _("The docx file you uploaded contains errors and cannot be converted. Please contact customer support.") # noqa self.notifier.error(err_msg) logger.exception("Error trying to import docx file. Msg: %s" % err)
def import_file(self, file_path, book, options=None): self.delegate.notifier = self.notifier self.broken_images = [] self.converted_images = [] def serialize_empty(ctx, document, elem, root): return root def serialize_endnote(ctx, document, el, root): # <sup class="endnote" data-id="1454855960556">1</sup> if el.rid not in self.endnotes: data_id = str(uuid.uuid1()).replace('-', '') self.endnotes[el.rid] = data_id else: data_id = self.endnotes[el.rid] note = lxml.etree.SubElement(root, 'sup', { 'class': 'endnote', 'data-id': data_id }) note.text = '1' return root def serialize_footnote(ctx, document, el, root): # <sup class="endnote" data-id="1454855960556">1</sup> if el.rid not in self.footnotes: data_id = str(uuid.uuid1()).replace('-', '') self.footnotes[el.rid] = data_id else: data_id = self.footnotes[el.rid] note = lxml.etree.SubElement(root, 'sup', { 'class': 'endnote', 'data-id': data_id }) note.text = '1' return root if not options: options = {'scale_font_size': True} try: self.dfile = ooxml.read_from_file(file_path) # TODO: move this into a more customisable place. serialize_options = { 'header': docutils.DocHeaderContext, 'embed_styles': True, 'embed_fontsize': True, # 'empty_paragraph_as_nbsp': True, 'serializers': { doc.Math: serialize_empty, doc.Footnote: serialize_footnote, doc.Endnote: serialize_endnote }, 'hooks': { 'p': [docutils.hook_p], 'h': [docutils.check_h_tags_hook], 'table': [docutils.hook_infobox_table] } } chapters = importer.get_chapters( self.dfile.document, options=options, serialize_options=serialize_options) self._import_attachments(book, self.dfile.document) self._import_chapters(book, chapters) # get the styles self._import_styles(book) self.dfile.close() self._check_for_elements() except zipfile.BadZipfile: notif_msg = _( "The file could not be imported because it was not saved in the .docx format. Try to open the file in Word and save it as a .docx." ) # noqa self.notifier.error(notif_msg) except Exception as err: err_msg = _( "The docx file you uploaded contains errors and cannot be converted. Please contact customer support." ) # noqa self.notifier.error(err_msg) logger.exception("Error trying to import docx file. Msg: %s" % err)