def __init__(self, opts, log, cover_data=None, toc=None): from calibre.gui2 import must_use_qt from calibre.utils.podofo import get_podofo must_use_qt() QObject.__init__(self) self.logger = self.log = log self.podofo = get_podofo() self.doc = self.podofo.PDFDoc() self.loop = QEventLoop() self.view = QWebView() self.page = Page(opts, self.log) self.view.setPage(self.page) self.view.setRenderHints(QPainter.Antialiasing|QPainter.TextAntialiasing|QPainter.SmoothPixmapTransform) self.view.loadFinished.connect(self._render_html, type=Qt.QueuedConnection) for x in (Qt.Horizontal, Qt.Vertical): self.view.page().mainFrame().setScrollBarPolicy(x, Qt.ScrollBarAlwaysOff) self.render_queue = [] self.combine_queue = [] self.tmp_path = PersistentTemporaryDirectory(u'_pdf_output_parts') self.opts = opts self.cover_data = cover_data self.paged_js = None self.toc = toc
def print_finished(self, pdf_data): with open(OUTPUT, 'wb') as f: f.write(pdf_data) QApplication.instance().exit(0) podofo = get_podofo() doc = podofo.PDFDoc() doc.load(pdf_data)
def __init__(self, opts, log, cover_data=None, toc=None): from calibre.gui2 import must_use_qt from calibre.utils.podofo import get_podofo must_use_qt() QObject.__init__(self) self.logger = self.log = log self.podofo = get_podofo() self.doc = self.podofo.PDFDoc() self.loop = QEventLoop() self.view = QWebView() self.page = Page(opts, self.log) self.view.setPage(self.page) self.view.setRenderHints(QPainter.Antialiasing | QPainter.TextAntialiasing | QPainter.SmoothPixmapTransform) self.view.loadFinished.connect(self._render_html, type=Qt.QueuedConnection) for x in (Qt.Horizontal, Qt.Vertical): self.view.page().mainFrame().setScrollBarPolicy( x, Qt.ScrollBarAlwaysOff) self.render_queue = [] self.combine_queue = [] self.tmp_path = PersistentTemporaryDirectory(u'_pdf_output_parts') self.opts = opts self.cover_data = cover_data self.paged_js = None self.toc = toc
def test_merge_fonts(): path = sys.argv[-1] podofo = get_podofo() pdf_doc = podofo.PDFDoc() pdf_doc.open(path) merge_fonts(pdf_doc) out = path.rpartition('.')[0] + '-merged.pdf' pdf_doc.save(out) print('Merged PDF writted to', out)
def is_rasterbook(path, basic_return=True): """ Identify whether this is a raster doc (ie. a scan) or a digitally authored text+graphic doc. Skip conversion if source doc is not mostly raster-image based. Ascertain this by checking whether there are as many image objects in the PDF as there are pages +/- 5 (google books and other scanners add pure-text preambles to their pdfs) If basic_return is True: return: aforementioned bool value otherwise: return: aforementioned bool value, number of pages, number of images """ def fun_basic_return(result, pages, images): if basic_return: return result else: return result, pages, images printsd('enter is_rasterbook: {}'.format(path)) podofo = get_podofo() pdf = podofo.PDFDoc() printsd('opens file') pdf.open(path) printsd('\n starts counting pages') pages = pdf.page_count() printsd('\n number of pages: {}'.format(pages)) try: # without try statement, a lot of PDFs causes podofo.Error: # Error: A NULL handle was passed, but initialized data was expected. # It's probably a bug in calibre podofo image_count method: # https://github.com/kovidgoyal/calibre/blob/master/src/calibre/utils/podofo/doc.cpp#L146 # or PDF file created with errors. # # This is not a big concern because raises mostly for heavy image PDFs images = pdf.image_count() except: import inspect error_info = sys.exc_info() prints("Unexpected error: {}".format(error_info)) prints("from module: {}".format(inspect.getmodule(error_info[0]))) # reraise exception if other exception than podofo.Error # str comparison because of problems with importing cpp Error # TODO: error type in except statement if object.__str__(error_info[0]) != "<class 'podofo.Error'>": raise else: # TODO: WARN or ASK user what to do, image count is unknown return fun_basic_return(True, pages, None) else: prints("pages(%s) : images(%s) > %s" % (pages, images, path)) if pages > 0: return fun_basic_return(abs(pages - images) <= 5, pages, images) return fun_basic_return(False, pages, images)
def test_merge_fonts(): path = sys.argv[-1] podofo = get_podofo() pdf_doc = podofo.PDFDoc() pdf_doc.open(path) from calibre.utils.logging import default_log merge_fonts(pdf_doc, default_log) out = path.rpartition('.')[0] + '-merged.pdf' pdf_doc.save(out) print('Merged PDF written to', out)
def is_rasterbook(path, basic_return=True): """ Identify whether this is a raster doc (ie. a scan) or a digitally authored text+graphic doc. Skip conversion if source doc is not mostly raster-image based. Ascertain this by checking whether there are as many image objects in the PDF as there are pages +/- 5 (google books and other scanners add pure-text preambles to their pdfs) If basic_return is True: return: aforementioned bool value otherwise: return: aforementioned bool value, number of pages, number of images """ def fun_basic_return(result, pages, images): if basic_return: return result else: return result, pages, images printsd('enter is_rasterbook: {}'.format(path)) podofo = get_podofo() pdf = podofo.PDFDoc() printsd('opens file') pdf.open(path) printsd('\n starts counting pages') pages = pdf.page_count() printsd('\n number of pages: {}'.format(pages)) try: # without try statement, a lot of PDFs causes podofo.Error: # Error: A NULL handle was passed, but initialized data was expected. # It's probably a bug in calibre podofo image_count method: # https://github.com/kovidgoyal/calibre/blob/master/src/calibre/utils/podofo/doc.cpp#L146 images = pdf.image_count() except: import inspect error_info = sys.exc_info() prints("Unexpected error: {}".format(error_info)) prints("from module: {}".format(inspect.getmodule(error_info[0]))) # reraise exception if other exception than podofo.Error # str comparison because of problems with importing cpp Error # TODO: error type in except statement if object.__str__(error_info[0]) != "<class 'podofo.Error'>": raise else: # TODO: WARN or ASK user what to do, image count is unknown return fun_basic_return(True, pages, None) else: prints("pages(%s) : images(%s) > %s" % (pages, images, path)) if pages > 0: return fun_basic_return(abs(pages - images) <= 5, pages, images) return fun_basic_return(False, pages, images)
def dump(self, items, out_stream, pdf_metadata): from calibre.utils.podofo import get_podofo f = PersistentTemporaryFile('_comic2pdf.pdf') f.close() self.metadata = pdf_metadata try: self.render_images(f.name, pdf_metadata, items) with open(f.name, 'rb') as x: raw = x.read() doc = get_podofo().PDFDoc() doc.load(raw) doc.creator = u'%s %s [http://calibre-ebook.com]'%( __appname__, __version__) doc.title = self.metadata.title doc.author = self.metadata.author if self.metadata.tags: doc.keywords = self.metadata.tags raw = doc.write() out_stream.write(raw) finally: try: os.remove(f.name) except: pass
def dump(self, items, out_stream, pdf_metadata): from calibre.utils.podofo import get_podofo f = PersistentTemporaryFile('_comic2pdf.pdf') f.close() self.metadata = pdf_metadata try: self.render_images(f.name, pdf_metadata, items) with open(f.name, 'rb') as x: raw = x.read() doc = get_podofo().PDFDoc() doc.load(raw) doc.creator = u'%s %s [http://calibre-ebook.com]' % (__appname__, __version__) doc.title = self.metadata.title doc.author = self.metadata.author if self.metadata.tags: doc.keywords = self.metadata.tags raw = doc.write() out_stream.write(raw) finally: try: os.remove(f.name) except: pass
def data_as_pdf_doc(data): podofo = get_podofo() ans = podofo.PDFDoc() ans.load(data) return ans