def render_html(path_to_html, width=590, height=750, as_xhtml=True): from PyQt4.QtWebKit import QWebPage from PyQt4.Qt import QEventLoop, QPalette, Qt, QUrl, QSize from calibre.gui2 import is_ok_to_use_qt if not is_ok_to_use_qt(): return None path_to_html = os.path.abspath(path_to_html) with CurrentDir(os.path.dirname(path_to_html)): page = QWebPage() pal = page.palette() pal.setBrush(QPalette.Background, Qt.white) page.setPalette(pal) page.setViewportSize(QSize(width, height)) page.mainFrame().setScrollBarPolicy(Qt.Vertical, Qt.ScrollBarAlwaysOff) page.mainFrame().setScrollBarPolicy(Qt.Horizontal, Qt.ScrollBarAlwaysOff) loop = QEventLoop() renderer = HTMLRenderer(page, loop) page.loadFinished.connect(renderer, type=Qt.QueuedConnection) if as_xhtml: page.mainFrame().setContent(open(path_to_html, 'rb').read(), 'application/xhtml+xml', QUrl.fromLocalFile(path_to_html)) else: page.mainFrame().load(QUrl.fromLocalFile(path_to_html)) loop.exec_() renderer.loop = renderer.page = None page.loadFinished.disconnect() del page del loop if isinstance(renderer.exception, ParserError) and as_xhtml: return render_html(path_to_html, width=width, height=height, as_xhtml=False) return renderer
class StatsCollector(object): def __init__(self, container, do_embed=False): self.container = container self.log = self.logger = container.log self.do_embed = do_embed must_use_qt() self.parser = CSSParser(loglevel=logging.CRITICAL, log=logging.getLogger('calibre.css')) self.loop = QEventLoop() self.view = QWebView() self.page = Page(self.log) self.view.setPage(self.page) self.page.setViewportSize(QSize(1200, 1600)) self.view.loadFinished.connect(self.collect, type=Qt.QueuedConnection) self.render_queue = list(container.spine_items) self.font_stats = {} self.font_usage_map = {} self.font_spec_map = {} self.font_rule_map = {} self.all_font_rules = {} QTimer.singleShot(0, self.render_book) if self.loop.exec_() == 1: raise Exception('Failed to gather statistics from book, see log for details') def render_book(self): try: if not self.render_queue: self.loop.exit() else: self.render_next() except: self.logger.exception('Rendering failed') self.loop.exit(1) def render_next(self): item = unicode(self.render_queue.pop(0)) self.current_item = item load_html(item, self.view) def collect(self, ok): if not ok: self.log.error('Failed to render document: %s'%self.container.relpath(self.current_item)) self.loop.exit(1) return try: self.page.load_js() self.collect_font_stats() except: self.log.exception('Failed to collect font stats from: %s'%self.container.relpath(self.current_item)) self.loop.exit(1) return self.render_book() def href_to_name(self, href, warn_name): if not href.startswith('file://'): self.log.warn('Non-local URI in', warn_name, ':', href, 'ignoring') return None src = href[len('file://'):] if iswindows and len(src) > 2 and (src[0], src[2]) == ('/', ':'): src = src[1:] src = src.replace('/', os.sep) src = unquote(src) name = self.container.abspath_to_name(src) if not self.container.has_name(name): self.log.warn('Missing resource', href, 'in', warn_name, 'ignoring') return None return name def collect_font_stats(self): self.page.evaljs('window.font_stats.get_font_face_rules()') font_face_rules = self.page.bridge_value if not isinstance(font_face_rules, list): raise Exception('Unknown error occurred while reading font-face rules') # Weed out invalid font-face rules rules = [] for rule in font_face_rules: ff = rule.get('font-family', None) if not ff: continue style = self.parser.parseStyle('font-family:%s'%ff, validate=False) ff = [x.value for x in style.getProperty('font-family').propertyValue] if not ff or ff[0] == 'inherit': continue rule['font-family'] = frozenset(icu_lower(f) for f in ff) src = rule.get('src', None) if not src: continue if src.startswith('url(') and src.endswith(')') and src[4] not in {'"', "'"}: # Quote the url otherwise cssutils fails to parse it if it has # ' or " in it src = "url('" + src[4:-1].replace("'", "\\'") + "')" style = self.parser.parseStyle('background-image:%s'%src, validate=False) src = style.getProperty('background-image').propertyValue[0].uri name = self.href_to_name(src, '@font-face rule') if name is None: continue rule['src'] = name normalize_font_properties(rule) rule['width'] = widths[rule['font-stretch']] rule['weight'] = int(rule['font-weight']) rules.append(rule) if not rules and not self.do_embed: return self.font_rule_map[self.container.abspath_to_name(self.current_item)] = rules for rule in rules: self.all_font_rules[rule['src']] = rule for rule in rules: if rule['src'] not in self.font_stats: self.font_stats[rule['src']] = set() self.page.evaljs('window.font_stats.get_font_usage()') font_usage = self.page.bridge_value if not isinstance(font_usage, list): raise Exception('Unknown error occurred while reading font usage') exclude = {'\n', '\r', '\t'} self.font_usage_map[self.container.abspath_to_name(self.current_item)] = fu = defaultdict(dict) bad_fonts = {'serif', 'sans-serif', 'monospace', 'cursive', 'fantasy', 'sansserif', 'inherit'} for font in font_usage: text = set() for t in font['text']: text |= frozenset(t) text.difference_update(exclude) if not text: continue normalize_font_properties(font) for rule in get_matching_rules(rules, font): self.font_stats[rule['src']] |= text if self.do_embed: ff = [icu_lower(x) for x in font.get('font-family', [])] if ff and ff[0] not in bad_fonts: keys = {'font-weight', 'font-style', 'font-stretch', 'font-family'} key = frozenset(((k, ff[0] if k == 'font-family' else v) for k, v in font.iteritems() if k in keys)) val = fu[key] if not val: val.update({k:(font[k][0] if k == 'font-family' else font[k]) for k in keys}) val['text'] = set() val['text'] |= text self.font_usage_map[self.container.abspath_to_name(self.current_item)] = dict(fu) if self.do_embed: self.page.evaljs('window.font_stats.get_font_families()') font_families = self.page.bridge_value if not isinstance(font_families, dict): raise Exception('Unknown error occurred while reading font families') self.font_spec_map[self.container.abspath_to_name(self.current_item)] = fs = set() for raw in font_families.iterkeys(): style = self.parser.parseStyle('font-family:' + raw, validate=False).getProperty('font-family') for x in style.propertyValue: x = x.value if x and x.lower() not in bad_fonts: fs.add(x)
class PDFWriter(QObject): def _pass_json_value_getter(self): val = json.dumps(self.bridge_value) return QString(val) def _pass_json_value_setter(self, value): self.bridge_value = json.loads(unicode(value)) _pass_json_value = pyqtProperty(QString, fget=_pass_json_value_getter, fset=_pass_json_value_setter) @pyqtSlot(result=unicode) def title(self): return self.doc_title @pyqtSlot(result=unicode) def author(self): return self.doc_author @pyqtSlot(result=unicode) def section(self): return self.current_section def __init__(self, opts, log, cover_data=None, toc=None): from calibre.gui2 import is_ok_to_use_qt if not is_ok_to_use_qt(): raise Exception('Not OK to use Qt') QObject.__init__(self) self.logger = self.log = log self.opts = opts self.cover_data = cover_data self.paged_js = None self.toc = toc self.loop = QEventLoop() self.view = QWebView() self.page = Page(opts, self.log) self.view.setPage(self.page) self.view.setRenderHints(QPainter.Antialiasing | QPainter.TextAntialiasing | QPainter.SmoothPixmapTransform) self.view.loadFinished.connect(self.render_html, type=Qt.QueuedConnection) for x in (Qt.Horizontal, Qt.Vertical): self.view.page().mainFrame().setScrollBarPolicy( x, Qt.ScrollBarAlwaysOff) self.report_progress = lambda x, y: x self.current_section = '' def dump(self, items, out_stream, pdf_metadata): opts = self.opts page_size = get_page_size(self.opts) xdpi, ydpi = self.view.logicalDpiX(), self.view.logicalDpiY() # We cannot set the side margins in the webview as there is no right # margin for the last page (the margins are implemented with # -webkit-column-gap) ml, mr = opts.margin_left, opts.margin_right self.doc = PdfDevice(out_stream, page_size=page_size, left_margin=ml, top_margin=0, right_margin=mr, bottom_margin=0, xdpi=xdpi, ydpi=ydpi, errors=self.log.error, debug=self.log.debug, compress=not opts.uncompressed_pdf, mark_links=opts.pdf_mark_links) self.footer = opts.pdf_footer_template if self.footer: self.footer = self.footer.strip() if not self.footer and opts.pdf_page_numbers: self.footer = '<p style="text-align:center; text-indent: 0">_PAGENUM_</p>' self.header = opts.pdf_header_template if self.header: self.header = self.header.strip() min_margin = 36 if self.footer and opts.margin_bottom < min_margin: self.log.warn( 'Bottom margin is too small for footer, increasing it.') opts.margin_bottom = min_margin if self.header and opts.margin_top < min_margin: self.log.warn('Top margin is too small for header, increasing it.') opts.margin_top = min_margin self.page.setViewportSize(QSize(self.doc.width(), self.doc.height())) self.render_queue = items self.total_items = len(items) mt, mb = map(self.doc.to_px, (opts.margin_top, opts.margin_bottom)) self.margin_top, self.margin_bottom = map(lambda x: int(floor(x)), (mt, mb)) self.painter = QPainter(self.doc) self.doc.set_metadata(title=pdf_metadata.title, author=pdf_metadata.author, tags=pdf_metadata.tags) self.doc_title = pdf_metadata.title self.doc_author = pdf_metadata.author self.painter.save() try: if self.cover_data is not None: p = QPixmap() p.loadFromData(self.cover_data) if not p.isNull(): self.doc.init_page() draw_image_page(QRect(*self.doc.full_page_rect), self.painter, p, preserve_aspect_ratio=self.opts. preserve_cover_aspect_ratio) self.doc.end_page() finally: self.painter.restore() QTimer.singleShot(0, self.render_book) if self.loop.exec_() == 1: raise Exception('PDF Output failed, see log for details') if self.toc is not None and len(self.toc) > 0: self.doc.add_outline(self.toc) self.painter.end() if self.doc.errors_occurred: raise Exception('PDF Output failed, see log for details') def render_book(self): if self.doc.errors_occurred: return self.loop.exit(1) try: if not self.render_queue: self.loop.exit() else: self.render_next() except: self.logger.exception('Rendering failed') self.loop.exit(1) def render_next(self): item = unicode(self.render_queue.pop(0)) self.logger.debug('Processing %s...' % item) self.current_item = item load_html(item, self.view) def render_html(self, ok): if ok: try: self.do_paged_render() except: self.log.exception('Rendering failed') self.loop.exit(1) return else: # The document is so corrupt that we can't render the page. self.logger.error('Document cannot be rendered.') self.loop.exit(1) return done = self.total_items - len(self.render_queue) self.report_progress( done / self.total_items, _('Rendered %s' % os.path.basename(self.current_item))) self.render_book() @property def current_page_num(self): return self.doc.current_page_num def load_mathjax(self): evaljs = self.view.page().mainFrame().evaluateJavaScript mjpath = P(u'viewer/mathjax').replace(os.sep, '/') if iswindows: mjpath = u'/' + mjpath if evaljs(''' window.mathjax.base = %s; mathjax.check_for_math(); mathjax.math_present ''' % (json.dumps(mjpath, ensure_ascii=False))).toBool(): self.log.debug('Math present, loading MathJax') while not evaljs('mathjax.math_loaded').toBool(): self.loop.processEvents(self.loop.ExcludeUserInputEvents) evaljs( 'document.getElementById("MathJax_Message").style.display="none";' ) def get_sections(self, anchor_map): sections = {} ci = os.path.abspath(os.path.normcase(self.current_item)) if self.toc is not None: for toc in self.toc.flat(): path = toc.abspath or None frag = toc.fragment or None if path is None: continue path = os.path.abspath(os.path.normcase(path)) if path == ci: col = 0 if frag and frag in anchor_map: col = anchor_map[frag]['column'] if col not in sections: sections[col] = toc.text or _('Untitled') return sections def do_paged_render(self): if self.paged_js is None: import uuid from calibre.utils.resources import compiled_coffeescript as cc self.paged_js = cc('ebooks.oeb.display.utils') self.paged_js += cc('ebooks.oeb.display.indexing') self.paged_js += cc('ebooks.oeb.display.paged') self.paged_js += cc('ebooks.oeb.display.mathjax') self.hf_uuid = str(uuid.uuid4()).replace('-', '') self.view.page().mainFrame().addToJavaScriptWindowObject( "py_bridge", self) self.view.page().longjs_counter = 0 evaljs = self.view.page().mainFrame().evaluateJavaScript evaljs(self.paged_js) self.load_mathjax() evaljs(''' py_bridge.__defineGetter__('value', function() { return JSON.parse(this._pass_json_value); }); py_bridge.__defineSetter__('value', function(val) { this._pass_json_value = JSON.stringify(val); }); document.body.style.backgroundColor = "white"; paged_display.set_geometry(1, %d, %d, %d); paged_display.layout(); paged_display.fit_images(); py_bridge.value = book_indexing.all_links_and_anchors(); ''' % (self.margin_top, 0, self.margin_bottom)) amap = self.bridge_value if not isinstance(amap, dict): amap = { 'links': [], 'anchors': {} } # Some javascript error occurred sections = self.get_sections(amap['anchors']) col = 0 if self.header: self.bridge_value = self.header evaljs('paged_display.header_template = py_bridge.value') if self.footer: self.bridge_value = self.footer evaljs('paged_display.footer_template = py_bridge.value') if self.header or self.footer: evaljs('paged_display.create_header_footer("%s");' % self.hf_uuid) start_page = self.current_page_num mf = self.view.page().mainFrame() while True: if col in sections: self.current_section = sections[col] self.doc.init_page() if self.header or self.footer: evaljs('paged_display.update_header_footer(%d)' % self.current_page_num) self.painter.save() mf.render(self.painter) self.painter.restore() nsl = evaljs('paged_display.next_screen_location()').toInt() self.doc.end_page() if not nsl[1] or nsl[0] <= 0: break evaljs( 'window.scrollTo(%d, 0); paged_display.position_header_footer();' % nsl[0]) if self.doc.errors_occurred: break col += 1 if not self.doc.errors_occurred: self.doc.add_links(self.current_item, start_page, amap['links'], amap['anchors'])
class PDFWriter(QObject): # {{{ def __init__(self, opts, log, cover_data=None, toc=None): from calibre.gui2 import is_ok_to_use_qt from calibre.utils.podofo import get_podofo if not is_ok_to_use_qt(): raise Exception("Not OK to use Qt") QObject.__init__(self) self.logger = self.log = log self.podofo = get_podofo() self.doc = self.podofo.PDFDoc() self.loop = QEventLoop() self.view = QWebView() self.page = Page(opts, self.log) self.view.setPage(self.page) self.view.setRenderHints(QPainter.Antialiasing | QPainter.TextAntialiasing | QPainter.SmoothPixmapTransform) self.view.loadFinished.connect(self._render_html, type=Qt.QueuedConnection) for x in (Qt.Horizontal, Qt.Vertical): self.view.page().mainFrame().setScrollBarPolicy(x, Qt.ScrollBarAlwaysOff) self.render_queue = [] self.combine_queue = [] self.tmp_path = PersistentTemporaryDirectory(u"_pdf_output_parts") self.opts = opts self.cover_data = cover_data self.paged_js = None self.toc = toc def dump(self, items, out_stream, pdf_metadata): self.metadata = pdf_metadata self._delete_tmpdir() self.outline = Outline(self.toc, items) self.render_queue = items self.combine_queue = [] self.out_stream = out_stream self.insert_cover() self.render_succeeded = False self.current_page_num = self.doc.page_count() self.combine_queue.append(os.path.join(self.tmp_path, "qprinter_out.pdf")) self.first_page = True self.setup_printer(self.combine_queue[-1]) QTimer.singleShot(0, self._render_book) self.loop.exec_() if self.painter is not None: self.painter.end() if self.printer is not None: self.printer.abort() if not self.render_succeeded: raise Exception("Rendering HTML to PDF failed") def _render_book(self): try: if len(self.render_queue) == 0: self._write() else: self._render_next() except: self.logger.exception("Rendering failed") self.loop.exit(1) def _render_next(self): item = unicode(self.render_queue.pop(0)) self.logger.debug("Processing %s..." % item) self.current_item = item load_html(item, self.view) def _render_html(self, ok): if ok: self.do_paged_render() else: # The document is so corrupt that we can't render the page. self.logger.error("Document cannot be rendered.") self.loop.exit(0) return self._render_book() def _pass_json_value_getter(self): val = json.dumps(self.bridge_value) return QString(val) def _pass_json_value_setter(self, value): self.bridge_value = json.loads(unicode(value)) _pass_json_value = pyqtProperty(QString, fget=_pass_json_value_getter, fset=_pass_json_value_setter) def setup_printer(self, outpath): self.printer = self.painter = None printer = get_pdf_printer(self.opts, output_file_name=outpath) painter = QPainter(printer) zoomx = printer.logicalDpiX() / self.view.logicalDpiX() zoomy = printer.logicalDpiY() / self.view.logicalDpiY() painter.scale(zoomx, zoomy) pr = printer.pageRect() self.printer, self.painter = printer, painter self.viewport_size = QSize(pr.width() / zoomx, pr.height() / zoomy) self.page.setViewportSize(self.viewport_size) def do_paged_render(self): if self.paged_js is None: from calibre.utils.resources import compiled_coffeescript self.paged_js = compiled_coffeescript("ebooks.oeb.display.utils") self.paged_js += compiled_coffeescript("ebooks.oeb.display.indexing") self.paged_js += compiled_coffeescript("ebooks.oeb.display.paged") self.view.page().mainFrame().addToJavaScriptWindowObject("py_bridge", self) evaljs = self.view.page().mainFrame().evaluateJavaScript evaljs(self.paged_js) evaljs( """ py_bridge.__defineGetter__('value', function() { return JSON.parse(this._pass_json_value); }); py_bridge.__defineSetter__('value', function(val) { this._pass_json_value = JSON.stringify(val); }); document.body.style.backgroundColor = "white"; paged_display.set_geometry(1, 0, 0, 0); paged_display.layout(); paged_display.fit_images(); """ ) mf = self.view.page().mainFrame() start_page = self.current_page_num if not self.first_page: start_page += 1 while True: if not self.first_page: if self.printer.newPage(): self.current_page_num += 1 self.first_page = False mf.render(self.painter) nsl = evaljs("paged_display.next_screen_location()").toInt() if not nsl[1] or nsl[0] <= 0: break evaljs("window.scrollTo(%d, 0)" % nsl[0]) self.bridge_value = tuple(self.outline.anchor_map[self.current_item]) evaljs("py_bridge.value = book_indexing.anchor_positions(py_bridge.value)") amap = self.bridge_value if not isinstance(amap, dict): amap = {} # Some javascript error occurred self.outline.set_pos(self.current_item, None, start_page, 0) for anchor, x in amap.iteritems(): pagenum, ypos = x self.outline.set_pos(self.current_item, anchor, start_page + pagenum, ypos) def append_doc(self, outpath): doc = self.podofo.PDFDoc() with open(outpath, "rb") as f: raw = f.read() doc.load(raw) self.doc.append(doc) def _delete_tmpdir(self): if os.path.exists(self.tmp_path): shutil.rmtree(self.tmp_path, True) self.tmp_path = PersistentTemporaryDirectory("_pdf_output_parts") def insert_cover(self): if not isinstance(self.cover_data, bytes): return item_path = os.path.join(self.tmp_path, "cover.pdf") printer = get_pdf_printer(self.opts, output_file_name=item_path, for_comic=True) self.combine_queue.insert(0, item_path) p = QPixmap() p.loadFromData(self.cover_data) if not p.isNull(): painter = QPainter(printer) draw_image_page(printer, painter, p, preserve_aspect_ratio=self.opts.preserve_cover_aspect_ratio) painter.end() self.append_doc(item_path) printer.abort() def _write(self): self.painter.end() self.printer.abort() self.painter = self.printer = None self.append_doc(self.combine_queue[-1]) try: self.doc.creator = u"%s %s [http://calibre-ebook.com]" % (__appname__, __version__) self.doc.title = self.metadata.title self.doc.author = self.metadata.author if self.metadata.tags: self.doc.keywords = self.metadata.tags self.outline(self.doc) self.doc.save_to_fileobj(self.out_stream) self.render_succeeded = True finally: self._delete_tmpdir() self.loop.exit(0)
class PDFWriter(QObject): def _pass_json_value_getter(self): val = json.dumps(self.bridge_value) return QString(val) def _pass_json_value_setter(self, value): self.bridge_value = json.loads(unicode(value)) _pass_json_value = pyqtProperty(QString, fget=_pass_json_value_getter, fset=_pass_json_value_setter) @pyqtSlot(result=unicode) def title(self): return self.doc_title @pyqtSlot(result=unicode) def author(self): return self.doc_author @pyqtSlot(result=unicode) def section(self): return self.current_section def __init__(self, opts, log, cover_data=None, toc=None): from calibre.gui2 import is_ok_to_use_qt if not is_ok_to_use_qt(): raise Exception('Not OK to use Qt') QObject.__init__(self) self.logger = self.log = log self.opts = opts self.cover_data = cover_data self.paged_js = None self.toc = toc self.loop = QEventLoop() self.view = QWebView() self.page = Page(opts, self.log) self.view.setPage(self.page) self.view.setRenderHints(QPainter.Antialiasing| QPainter.TextAntialiasing|QPainter.SmoothPixmapTransform) self.view.loadFinished.connect(self.render_html, type=Qt.QueuedConnection) for x in (Qt.Horizontal, Qt.Vertical): self.view.page().mainFrame().setScrollBarPolicy(x, Qt.ScrollBarAlwaysOff) self.report_progress = lambda x, y: x self.current_section = '' def dump(self, items, out_stream, pdf_metadata): opts = self.opts page_size = get_page_size(self.opts) xdpi, ydpi = self.view.logicalDpiX(), self.view.logicalDpiY() # We cannot set the side margins in the webview as there is no right # margin for the last page (the margins are implemented with # -webkit-column-gap) ml, mr = opts.margin_left, opts.margin_right self.doc = PdfDevice(out_stream, page_size=page_size, left_margin=ml, top_margin=0, right_margin=mr, bottom_margin=0, xdpi=xdpi, ydpi=ydpi, errors=self.log.error, debug=self.log.debug, compress=not opts.uncompressed_pdf, mark_links=opts.pdf_mark_links) self.footer = opts.pdf_footer_template if self.footer: self.footer = self.footer.strip() if not self.footer and opts.pdf_page_numbers: self.footer = '<p style="text-align:center; text-indent: 0">_PAGENUM_</p>' self.header = opts.pdf_header_template if self.header: self.header = self.header.strip() min_margin = 36 if self.footer and opts.margin_bottom < min_margin: self.log.warn('Bottom margin is too small for footer, increasing it.') opts.margin_bottom = min_margin if self.header and opts.margin_top < min_margin: self.log.warn('Top margin is too small for header, increasing it.') opts.margin_top = min_margin self.page.setViewportSize(QSize(self.doc.width(), self.doc.height())) self.render_queue = items self.total_items = len(items) mt, mb = map(self.doc.to_px, (opts.margin_top, opts.margin_bottom)) self.margin_top, self.margin_bottom = map(lambda x:int(floor(x)), (mt, mb)) self.painter = QPainter(self.doc) self.doc.set_metadata(title=pdf_metadata.title, author=pdf_metadata.author, tags=pdf_metadata.tags) self.doc_title = pdf_metadata.title self.doc_author = pdf_metadata.author self.painter.save() try: if self.cover_data is not None: p = QPixmap() p.loadFromData(self.cover_data) if not p.isNull(): self.doc.init_page() draw_image_page(QRect(*self.doc.full_page_rect), self.painter, p, preserve_aspect_ratio=self.opts.preserve_cover_aspect_ratio) self.doc.end_page() finally: self.painter.restore() QTimer.singleShot(0, self.render_book) if self.loop.exec_() == 1: raise Exception('PDF Output failed, see log for details') if self.toc is not None and len(self.toc) > 0: self.doc.add_outline(self.toc) self.painter.end() if self.doc.errors_occurred: raise Exception('PDF Output failed, see log for details') def render_book(self): if self.doc.errors_occurred: return self.loop.exit(1) try: if not self.render_queue: self.loop.exit() else: self.render_next() except: self.logger.exception('Rendering failed') self.loop.exit(1) def render_next(self): item = unicode(self.render_queue.pop(0)) self.logger.debug('Processing %s...' % item) self.current_item = item load_html(item, self.view) def render_html(self, ok): if ok: try: self.do_paged_render() except: self.log.exception('Rendering failed') self.loop.exit(1) return else: # The document is so corrupt that we can't render the page. self.logger.error('Document cannot be rendered.') self.loop.exit(1) return done = self.total_items - len(self.render_queue) self.report_progress(done/self.total_items, _('Rendered %s'%os.path.basename(self.current_item))) self.render_book() @property def current_page_num(self): return self.doc.current_page_num def load_mathjax(self): evaljs = self.view.page().mainFrame().evaluateJavaScript mjpath = P(u'viewer/mathjax').replace(os.sep, '/') if iswindows: mjpath = u'/' + mjpath if evaljs(''' window.mathjax.base = %s; mathjax.check_for_math(); mathjax.math_present '''%(json.dumps(mjpath, ensure_ascii=False))).toBool(): self.log.debug('Math present, loading MathJax') while not evaljs('mathjax.math_loaded').toBool(): self.loop.processEvents(self.loop.ExcludeUserInputEvents) evaljs('document.getElementById("MathJax_Message").style.display="none";') def get_sections(self, anchor_map): sections = {} ci = os.path.abspath(os.path.normcase(self.current_item)) if self.toc is not None: for toc in self.toc.flat(): path = toc.abspath or None frag = toc.fragment or None if path is None: continue path = os.path.abspath(os.path.normcase(path)) if path == ci: col = 0 if frag and frag in anchor_map: col = anchor_map[frag]['column'] if col not in sections: sections[col] = toc.text or _('Untitled') return sections def do_paged_render(self): if self.paged_js is None: import uuid from calibre.utils.resources import compiled_coffeescript as cc self.paged_js = cc('ebooks.oeb.display.utils') self.paged_js += cc('ebooks.oeb.display.indexing') self.paged_js += cc('ebooks.oeb.display.paged') self.paged_js += cc('ebooks.oeb.display.mathjax') self.hf_uuid = str(uuid.uuid4()).replace('-', '') self.view.page().mainFrame().addToJavaScriptWindowObject("py_bridge", self) self.view.page().longjs_counter = 0 evaljs = self.view.page().mainFrame().evaluateJavaScript evaljs(self.paged_js) self.load_mathjax() evaljs(''' py_bridge.__defineGetter__('value', function() { return JSON.parse(this._pass_json_value); }); py_bridge.__defineSetter__('value', function(val) { this._pass_json_value = JSON.stringify(val); }); document.body.style.backgroundColor = "white"; paged_display.set_geometry(1, %d, %d, %d); paged_display.layout(); paged_display.fit_images(); py_bridge.value = book_indexing.all_links_and_anchors(); '''%(self.margin_top, 0, self.margin_bottom)) amap = self.bridge_value if not isinstance(amap, dict): amap = {'links':[], 'anchors':{}} # Some javascript error occurred sections = self.get_sections(amap['anchors']) col = 0 if self.header: self.bridge_value = self.header evaljs('paged_display.header_template = py_bridge.value') if self.footer: self.bridge_value = self.footer evaljs('paged_display.footer_template = py_bridge.value') if self.header or self.footer: evaljs('paged_display.create_header_footer("%s");'%self.hf_uuid) start_page = self.current_page_num mf = self.view.page().mainFrame() while True: if col in sections: self.current_section = sections[col] self.doc.init_page() if self.header or self.footer: evaljs('paged_display.update_header_footer(%d)'%self.current_page_num) self.painter.save() mf.render(self.painter) self.painter.restore() nsl = evaljs('paged_display.next_screen_location()').toInt() self.doc.end_page() if not nsl[1] or nsl[0] <= 0: break evaljs('window.scrollTo(%d, 0); paged_display.position_header_footer();'%nsl[0]) if self.doc.errors_occurred: break col += 1 if not self.doc.errors_occurred: self.doc.add_links(self.current_item, start_page, amap['links'], amap['anchors'])
class StatsCollector(object): def __init__(self, container, do_embed=False): self.container = container self.log = self.logger = container.log self.do_embed = do_embed must_use_qt() self.parser = CSSParser(loglevel=logging.CRITICAL, log=logging.getLogger("calibre.css")) self.loop = QEventLoop() self.view = QWebView() self.page = Page(self.log) self.view.setPage(self.page) self.page.setViewportSize(QSize(1200, 1600)) self.view.loadFinished.connect(self.collect, type=Qt.QueuedConnection) self.render_queue = list(container.spine_items) self.font_stats = {} self.font_usage_map = {} self.font_spec_map = {} self.font_rule_map = {} self.all_font_rules = {} QTimer.singleShot(0, self.render_book) if self.loop.exec_() == 1: raise Exception("Failed to gather statistics from book, see log for details") def render_book(self): try: if not self.render_queue: self.loop.exit() else: self.render_next() except: self.logger.exception("Rendering failed") self.loop.exit(1) def render_next(self): item = unicode(self.render_queue.pop(0)) self.current_item = item load_html(item, self.view) def collect(self, ok): if not ok: self.log.error("Failed to render document: %s" % self.container.relpath(self.current_item)) self.loop.exit(1) return try: self.page.load_js() self.collect_font_stats() except: self.log.exception("Failed to collect font stats from: %s" % self.container.relpath(self.current_item)) self.loop.exit(1) return self.render_book() def href_to_name(self, href, warn_name): if not href.startswith("file://"): self.log.warn("Non-local URI in", warn_name, ":", href, "ignoring") return None src = href[len("file://") :] if iswindows and len(src) > 2 and (src[0], src[2]) == ("/", ":"): src = src[1:] src = src.replace("/", os.sep) src = unquote(src) name = self.container.abspath_to_name(src) if not self.container.has_name(name): self.log.warn("Missing resource", href, "in", warn_name, "ignoring") return None return name def collect_font_stats(self): self.page.evaljs("window.font_stats.get_font_face_rules()") font_face_rules = self.page.bridge_value if not isinstance(font_face_rules, list): raise Exception("Unknown error occurred while reading font-face rules") # Weed out invalid font-face rules rules = [] for rule in font_face_rules: ff = rule.get("font-family", None) if not ff: continue style = self.parser.parseStyle("font-family:%s" % ff, validate=False) ff = [x.value for x in style.getProperty("font-family").propertyValue] if not ff or ff[0] == "inherit": continue rule["font-family"] = frozenset(icu_lower(f) for f in ff) src = rule.get("src", None) if not src: continue style = self.parser.parseStyle("background-image:%s" % src, validate=False) src = style.getProperty("background-image").propertyValue[0].uri name = self.href_to_name(src, "@font-face rule") if name is None: continue rule["src"] = name normalize_font_properties(rule) rule["width"] = widths[rule["font-stretch"]] rule["weight"] = int(rule["font-weight"]) rules.append(rule) if not rules and not self.do_embed: return self.font_rule_map[self.container.abspath_to_name(self.current_item)] = rules for rule in rules: self.all_font_rules[rule["src"]] = rule for rule in rules: if rule["src"] not in self.font_stats: self.font_stats[rule["src"]] = set() self.page.evaljs("window.font_stats.get_font_usage()") font_usage = self.page.bridge_value if not isinstance(font_usage, list): raise Exception("Unknown error occurred while reading font usage") exclude = {"\n", "\r", "\t"} self.font_usage_map[self.container.abspath_to_name(self.current_item)] = fu = defaultdict(dict) bad_fonts = {"serif", "sans-serif", "monospace", "cursive", "fantasy", "sansserif", "inherit"} for font in font_usage: text = set() for t in font["text"]: text |= frozenset(t) text.difference_update(exclude) if not text: continue normalize_font_properties(font) for rule in get_matching_rules(rules, font): self.font_stats[rule["src"]] |= text if self.do_embed: ff = [icu_lower(x) for x in font.get("font-family", [])] if ff and ff[0] not in bad_fonts: keys = {"font-weight", "font-style", "font-stretch", "font-family"} key = frozenset(((k, ff[0] if k == "font-family" else v) for k, v in font.iteritems() if k in keys)) val = fu[key] if not val: val.update({k: (font[k][0] if k == "font-family" else font[k]) for k in keys}) val["text"] = set() val["text"] |= text self.font_usage_map[self.container.abspath_to_name(self.current_item)] = dict(fu) if self.do_embed: self.page.evaljs("window.font_stats.get_font_families()") font_families = self.page.bridge_value if not isinstance(font_families, dict): raise Exception("Unknown error occurred while reading font families") self.font_spec_map[self.container.abspath_to_name(self.current_item)] = fs = set() for raw in font_families.iterkeys(): style = self.parser.parseStyle("font-family:" + raw, validate=False).getProperty("font-family") for x in style.propertyValue: x = x.value if x and x.lower() not in bad_fonts: fs.add(x)
class PDFWriter(QObject): # {{{ def __init__(self, opts, log, cover_data=None, toc=None): from calibre.gui2 import is_ok_to_use_qt from calibre.utils.podofo import get_podofo if not is_ok_to_use_qt(): raise Exception('Not OK to use Qt') QObject.__init__(self) self.logger = self.log = log self.podofo = get_podofo() self.doc = self.podofo.PDFDoc() self.loop = QEventLoop() self.view = QWebView() self.page = Page(opts, self.log) self.view.setPage(self.page) self.view.setRenderHints(QPainter.Antialiasing | QPainter.TextAntialiasing | QPainter.SmoothPixmapTransform) self.view.loadFinished.connect(self._render_html, type=Qt.QueuedConnection) for x in (Qt.Horizontal, Qt.Vertical): self.view.page().mainFrame().setScrollBarPolicy( x, Qt.ScrollBarAlwaysOff) self.render_queue = [] self.combine_queue = [] self.tmp_path = PersistentTemporaryDirectory(u'_pdf_output_parts') self.opts = opts self.cover_data = cover_data self.paged_js = None self.toc = toc def dump(self, items, out_stream, pdf_metadata): self.metadata = pdf_metadata self._delete_tmpdir() self.outline = Outline(self.toc, items) self.render_queue = items self.combine_queue = [] self.out_stream = out_stream self.insert_cover() self.render_succeeded = False self.current_page_num = self.doc.page_count() self.combine_queue.append( os.path.join(self.tmp_path, 'qprinter_out.pdf')) self.first_page = True self.setup_printer(self.combine_queue[-1]) QTimer.singleShot(0, self._render_book) self.loop.exec_() if self.painter is not None: self.painter.end() if self.printer is not None: self.printer.abort() if not self.render_succeeded: raise Exception('Rendering HTML to PDF failed') def _render_book(self): try: if len(self.render_queue) == 0: self._write() else: self._render_next() except: self.logger.exception('Rendering failed') self.loop.exit(1) def _render_next(self): item = unicode(self.render_queue.pop(0)) self.logger.debug('Processing %s...' % item) self.current_item = item load_html(item, self.view) def _render_html(self, ok): if ok: self.do_paged_render() else: # The document is so corrupt that we can't render the page. self.logger.error('Document cannot be rendered.') self.loop.exit(0) return self._render_book() def _pass_json_value_getter(self): val = json.dumps(self.bridge_value) return QString(val) def _pass_json_value_setter(self, value): self.bridge_value = json.loads(unicode(value)) _pass_json_value = pyqtProperty(QString, fget=_pass_json_value_getter, fset=_pass_json_value_setter) def setup_printer(self, outpath): self.printer = self.painter = None printer = get_pdf_printer(self.opts, output_file_name=outpath) painter = QPainter(printer) zoomx = printer.logicalDpiX() / self.view.logicalDpiX() zoomy = printer.logicalDpiY() / self.view.logicalDpiY() painter.scale(zoomx, zoomy) pr = printer.pageRect() self.printer, self.painter = printer, painter self.viewport_size = QSize(pr.width() / zoomx, pr.height() / zoomy) self.page.setViewportSize(self.viewport_size) def do_paged_render(self): if self.paged_js is None: from calibre.utils.resources import compiled_coffeescript self.paged_js = compiled_coffeescript('ebooks.oeb.display.utils') self.paged_js += compiled_coffeescript( 'ebooks.oeb.display.indexing') self.paged_js += compiled_coffeescript('ebooks.oeb.display.paged') self.view.page().mainFrame().addToJavaScriptWindowObject( "py_bridge", self) evaljs = self.view.page().mainFrame().evaluateJavaScript evaljs(self.paged_js) evaljs(''' py_bridge.__defineGetter__('value', function() { return JSON.parse(this._pass_json_value); }); py_bridge.__defineSetter__('value', function(val) { this._pass_json_value = JSON.stringify(val); }); document.body.style.backgroundColor = "white"; paged_display.set_geometry(1, 0, 0, 0); paged_display.layout(); paged_display.fit_images(); ''') mf = self.view.page().mainFrame() start_page = self.current_page_num if not self.first_page: start_page += 1 while True: if not self.first_page: if self.printer.newPage(): self.current_page_num += 1 self.first_page = False mf.render(self.painter) nsl = evaljs('paged_display.next_screen_location()').toInt() if not nsl[1] or nsl[0] <= 0: break evaljs('window.scrollTo(%d, 0)' % nsl[0]) self.bridge_value = tuple(self.outline.anchor_map[self.current_item]) evaljs( 'py_bridge.value = book_indexing.anchor_positions(py_bridge.value)' ) amap = self.bridge_value if not isinstance(amap, dict): amap = {} # Some javascript error occurred self.outline.set_pos(self.current_item, None, start_page, 0) for anchor, x in amap.iteritems(): pagenum, ypos = x self.outline.set_pos(self.current_item, anchor, start_page + pagenum, ypos) def append_doc(self, outpath): doc = self.podofo.PDFDoc() with open(outpath, 'rb') as f: raw = f.read() doc.load(raw) self.doc.append(doc) def _delete_tmpdir(self): if os.path.exists(self.tmp_path): shutil.rmtree(self.tmp_path, True) self.tmp_path = PersistentTemporaryDirectory('_pdf_output_parts') def insert_cover(self): if not isinstance(self.cover_data, bytes): return item_path = os.path.join(self.tmp_path, 'cover.pdf') printer = get_pdf_printer(self.opts, output_file_name=item_path, for_comic=True) self.combine_queue.insert(0, item_path) p = QPixmap() p.loadFromData(self.cover_data) if not p.isNull(): painter = QPainter(printer) draw_image_page( printer, painter, p, preserve_aspect_ratio=self.opts.preserve_cover_aspect_ratio) painter.end() self.append_doc(item_path) printer.abort() def _write(self): self.painter.end() self.printer.abort() self.painter = self.printer = None self.append_doc(self.combine_queue[-1]) try: self.doc.creator = u'%s %s [http://calibre-ebook.com]' % ( __appname__, __version__) self.doc.title = self.metadata.title self.doc.author = self.metadata.author if self.metadata.tags: self.doc.keywords = self.metadata.tags self.outline(self.doc) self.doc.save_to_fileobj(self.out_stream) self.render_succeeded = True finally: self._delete_tmpdir() self.loop.exit(0)
class StatsCollector(object): def __init__(self, container): self.container = container self.log = self.logger = container.log must_use_qt() self.loop = QEventLoop() self.view = QWebView() self.page = Page(self.log) self.view.setPage(self.page) self.page.setViewportSize(QSize(1200, 1600)) self.view.loadFinished.connect(self.collect, type=Qt.QueuedConnection) self.render_queue = list(container.spine_items) self.font_stats = {} QTimer.singleShot(0, self.render_book) if self.loop.exec_() == 1: raise Exception( 'Failed to gather statistics from book, see log for details') def render_book(self): try: if not self.render_queue: self.loop.exit() else: self.render_next() except: self.logger.exception('Rendering failed') self.loop.exit(1) def render_next(self): item = unicode(self.render_queue.pop(0)) self.current_item = item load_html(item, self.view) def collect(self, ok): if not ok: self.log.error('Failed to render document: %s' % self.container.relpath(self.current_item)) self.loop.exit(1) return try: self.page.load_js() self.collect_font_stats() except: self.log.exception('Failed to collect font stats from: %s' % self.container.relpath(self.current_item)) self.loop.exit(1) return self.render_book() def href_to_name(self, href, warn_name): if not href.startswith('file://'): self.log.warn('Non-local URI in', warn_name, ':', href, 'ignoring') return None src = href[len('file://'):] if iswindows and len(src) > 2 and (src[0], src[2]) == ('/', ':'): src = src[1:] src = src.replace('/', os.sep) src = unquote(src) name = self.container.abspath_to_name(src) if not self.container.has_name(name): self.log.warn('Missing resource', href, 'in', warn_name, 'ignoring') return None return name def collect_font_stats(self): self.page.evaljs('window.font_stats.get_font_face_rules()') font_face_rules = self.page.bridge_value if not isinstance(font_face_rules, list): raise Exception( 'Unknown error occurred while reading font-face rules') # Weed out invalid font-face rules rules = [] for rule in font_face_rules: ff = rule.get('font-family', None) if not ff: continue style = parseStyle('font-family:%s' % ff, validate=False) ff = [ x.value for x in style.getProperty('font-family').propertyValue ] if not ff or ff[0] == 'inherit': continue rule['font-family'] = frozenset(icu_lower(f) for f in ff) src = rule.get('src', None) if not src: continue style = parseStyle('background-image:%s' % src, validate=False) src = style.getProperty('background-image').propertyValue[0].uri name = self.href_to_name(src, '@font-face rule') rule['src'] = name normalize_font_properties(rule) rule['width'] = widths[rule['font-stretch']] rule['weight'] = int(rule['font-weight']) rules.append(rule) if not rules: return for rule in rules: if rule['src'] not in self.font_stats: self.font_stats[rule['src']] = set() self.page.evaljs('window.font_stats.get_font_usage()') font_usage = self.page.bridge_value if not isinstance(font_usage, list): raise Exception('Unknown error occurred while reading font usage') exclude = {'\n', '\r', '\t'} for font in font_usage: text = set() for t in font['text']: text |= frozenset(t) text.difference_update(exclude) if not text: continue for rule in get_matching_rules(rules, font): self.font_stats[rule['src']] |= text
class PDFWriter(QObject): # {{{ def __init__(self, opts, log, cover_data=None): from calibre.gui2 import is_ok_to_use_qt if not is_ok_to_use_qt(): raise Exception('Not OK to use Qt') QObject.__init__(self) self.logger = log self.loop = QEventLoop() self.view = QWebView() self.view.setRenderHints(QPainter.Antialiasing|QPainter.TextAntialiasing|QPainter.SmoothPixmapTransform) self.view.loadFinished.connect(self._render_html, type=Qt.QueuedConnection) for x in (Qt.Horizontal, Qt.Vertical): self.view.page().mainFrame().setScrollBarPolicy(x, Qt.ScrollBarAlwaysOff) self.render_queue = [] self.combine_queue = [] self.tmp_path = PersistentTemporaryDirectory(u'_pdf_output_parts') self.opts = opts self.cover_data = cover_data self.paged_js = None def dump(self, items, out_stream, pdf_metadata): self.metadata = pdf_metadata self._delete_tmpdir() self.render_queue = items self.combine_queue = [] self.out_stream = out_stream QMetaObject.invokeMethod(self, "_render_book", Qt.QueuedConnection) self.loop.exec_() @QtCore.pyqtSignature('_render_book()') def _render_book(self): if len(self.render_queue) == 0: self._write() else: self._render_next() def _render_next(self): item = unicode(self.render_queue.pop(0)) self.combine_queue.append(os.path.join(self.tmp_path, '%i.pdf' % (len(self.combine_queue) + 1))) self.logger.debug('Processing %s...' % item) load_html(item, self.view) def _render_html(self, ok): if ok: item_path = os.path.join(self.tmp_path, '%i.pdf' % len(self.combine_queue)) self.logger.debug('\tRendering item %s as %i.pdf' % (os.path.basename(str(self.view.url().toLocalFile())), len(self.combine_queue))) self.do_paged_render(item_path) else: # The document is so corrupt that we can't render the page. self.loop.exit(0) raise Exception('Document cannot be rendered.') self._render_book() def do_paged_render(self, outpath): from PyQt4.Qt import QSize, QPainter if self.paged_js is None: from calibre.utils.resources import compiled_coffeescript self.paged_js = compiled_coffeescript('ebooks.oeb.display.utils') self.paged_js += compiled_coffeescript('ebooks.oeb.display.paged') printer = get_pdf_printer(self.opts, output_file_name=outpath) painter = QPainter(printer) zoomx = printer.logicalDpiX()/self.view.logicalDpiX() zoomy = printer.logicalDpiY()/self.view.logicalDpiY() painter.scale(zoomx, zoomy) pr = printer.pageRect() evaljs = self.view.page().mainFrame().evaluateJavaScript evaljs(self.paged_js) self.view.page().setViewportSize(QSize(pr.width()/zoomx, pr.height()/zoomy)) evaljs(''' document.body.style.backgroundColor = "white"; paged_display.set_geometry(1, 0, 0, 0); paged_display.layout(); paged_display.fit_images(); ''') mf = self.view.page().mainFrame() while True: mf.render(painter) nsl = evaljs('paged_display.next_screen_location()').toInt() if not nsl[1] or nsl[0] <= 0: break evaljs('window.scrollTo(%d, 0)'%nsl[0]) printer.newPage() painter.end() printer.abort() def _delete_tmpdir(self): if os.path.exists(self.tmp_path): shutil.rmtree(self.tmp_path, True) self.tmp_path = PersistentTemporaryDirectory('_pdf_output_parts') def insert_cover(self): if self.cover_data is None: return item_path = os.path.join(self.tmp_path, 'cover.pdf') printer = get_pdf_printer(self.opts, output_file_name=item_path) self.combine_queue.insert(0, item_path) p = QPixmap() p.loadFromData(self.cover_data) if not p.isNull(): painter = QPainter(printer) draw_image_page(printer, painter, p, preserve_aspect_ratio=self.opts.preserve_cover_aspect_ratio) painter.end() printer.abort() def _write(self): self.logger.debug('Combining individual PDF parts...') self.insert_cover() try: outPDF = PdfFileWriter(title=self.metadata.title, author=self.metadata.author) for item in self.combine_queue: # The input PDF stream must remain open until the final PDF # is written to disk. PyPDF references pages added to the # final PDF from the input PDF on disk. It does not store # the pages in memory so we can't close the input PDF. inputPDF = PdfFileReader(open(item, 'rb')) for page in inputPDF.pages: outPDF.addPage(page) outPDF.write(self.out_stream) finally: self._delete_tmpdir() self.loop.exit(0)
class StatsCollector(object): def __init__(self, container, do_embed=False): self.container = container self.log = self.logger = container.log self.do_embed = do_embed must_use_qt() self.parser = CSSParser(loglevel=logging.CRITICAL, log=logging.getLogger('calibre.css')) self.loop = QEventLoop() self.view = QWebView() self.page = Page(self.log) self.view.setPage(self.page) self.page.setViewportSize(QSize(1200, 1600)) self.view.loadFinished.connect(self.collect, type=Qt.QueuedConnection) self.render_queue = list(container.spine_items) self.font_stats = {} self.font_usage_map = {} self.font_spec_map = {} self.font_rule_map = {} self.all_font_rules = {} QTimer.singleShot(0, self.render_book) if self.loop.exec_() == 1: raise Exception( 'Failed to gather statistics from book, see log for details') def render_book(self): try: if not self.render_queue: self.loop.exit() else: self.render_next() except: self.logger.exception('Rendering failed') self.loop.exit(1) def render_next(self): item = unicode(self.render_queue.pop(0)) self.current_item = item load_html(item, self.view) def collect(self, ok): if not ok: self.log.error('Failed to render document: %s' % self.container.relpath(self.current_item)) self.loop.exit(1) return try: self.page.load_js() self.collect_font_stats() except: self.log.exception('Failed to collect font stats from: %s' % self.container.relpath(self.current_item)) self.loop.exit(1) return self.render_book() def href_to_name(self, href, warn_name): if not href.startswith('file://'): self.log.warn('Non-local URI in', warn_name, ':', href, 'ignoring') return None src = href[len('file://'):] if iswindows and len(src) > 2 and (src[0], src[2]) == ('/', ':'): src = src[1:] src = src.replace('/', os.sep) src = unquote(src) name = self.container.abspath_to_name(src) if not self.container.has_name(name): self.log.warn('Missing resource', href, 'in', warn_name, 'ignoring') return None return name def collect_font_stats(self): self.page.evaljs('window.font_stats.get_font_face_rules()') font_face_rules = self.page.bridge_value if not isinstance(font_face_rules, list): raise Exception( 'Unknown error occurred while reading font-face rules') # Weed out invalid font-face rules rules = [] for rule in font_face_rules: ff = rule.get('font-family', None) if not ff: continue style = self.parser.parseStyle('font-family:%s' % ff, validate=False) ff = [ x.value for x in style.getProperty('font-family').propertyValue ] if not ff or ff[0] == 'inherit': continue rule['font-family'] = frozenset(icu_lower(f) for f in ff) src = rule.get('src', None) if not src: continue style = self.parser.parseStyle('background-image:%s' % src, validate=False) src = style.getProperty('background-image').propertyValue[0].uri name = self.href_to_name(src, '@font-face rule') if name is None: continue rule['src'] = name normalize_font_properties(rule) rule['width'] = widths[rule['font-stretch']] rule['weight'] = int(rule['font-weight']) rules.append(rule) if not rules and not self.do_embed: return self.font_rule_map[self.container.abspath_to_name( self.current_item)] = rules for rule in rules: self.all_font_rules[rule['src']] = rule for rule in rules: if rule['src'] not in self.font_stats: self.font_stats[rule['src']] = set() self.page.evaljs('window.font_stats.get_font_usage()') font_usage = self.page.bridge_value if not isinstance(font_usage, list): raise Exception('Unknown error occurred while reading font usage') exclude = {'\n', '\r', '\t'} self.font_usage_map[self.container.abspath_to_name( self.current_item)] = fu = defaultdict(dict) bad_fonts = { 'serif', 'sans-serif', 'monospace', 'cursive', 'fantasy', 'sansserif', 'inherit' } for font in font_usage: text = set() for t in font['text']: text |= frozenset(t) text.difference_update(exclude) if not text: continue normalize_font_properties(font) for rule in get_matching_rules(rules, font): self.font_stats[rule['src']] |= text if self.do_embed: ff = [icu_lower(x) for x in font.get('font-family', [])] if ff and ff[0] not in bad_fonts: keys = { 'font-weight', 'font-style', 'font-stretch', 'font-family' } key = frozenset(((k, ff[0] if k == 'font-family' else v) for k, v in font.iteritems() if k in keys)) val = fu[key] if not val: val.update({ k: (font[k][0] if k == 'font-family' else font[k]) for k in keys }) val['text'] = set() val['text'] |= text self.font_usage_map[self.container.abspath_to_name( self.current_item)] = dict(fu) if self.do_embed: self.page.evaljs('window.font_stats.get_font_families()') font_families = self.page.bridge_value if not isinstance(font_families, dict): raise Exception( 'Unknown error occurred while reading font families') self.font_spec_map[self.container.abspath_to_name( self.current_item)] = fs = set() for raw in font_families.iterkeys(): style = self.parser.parseStyle( 'font-family:' + raw, validate=False).getProperty('font-family') for x in style.propertyValue: x = x.value if x and x.lower() not in bad_fonts: fs.add(x)
class StatsCollector(object): def __init__(self, container): self.container = container self.log = self.logger = container.log must_use_qt() self.loop = QEventLoop() self.view = QWebView() self.page = Page(self.log) self.view.setPage(self.page) self.page.setViewportSize(QSize(1200, 1600)) self.view.loadFinished.connect(self.collect, type=Qt.QueuedConnection) self.render_queue = list(container.spine_items) self.font_stats = {} QTimer.singleShot(0, self.render_book) if self.loop.exec_() == 1: raise Exception('Failed to gather statistics from book, see log for details') def render_book(self): try: if not self.render_queue: self.loop.exit() else: self.render_next() except: self.logger.exception('Rendering failed') self.loop.exit(1) def render_next(self): item = unicode(self.render_queue.pop(0)) self.current_item = item load_html(item, self.view) def collect(self, ok): if not ok: self.log.error('Failed to render document: %s'%self.container.relpath(self.current_item)) self.loop.exit(1) return try: self.page.load_js() self.collect_font_stats() except: self.log.exception('Failed to collect font stats from: %s'%self.container.relpath(self.current_item)) self.loop.exit(1) return self.render_book() def collect_font_stats(self): self.page.evaljs('window.font_stats.get_font_face_rules()') font_face_rules = self.page.bridge_value if not isinstance(font_face_rules, list): raise Exception('Unknown error occurred while reading font-face rules') # Weed out invalid font-face rules rules = [] for rule in font_face_rules: ff = rule.get('font-family', None) if not ff: continue style = parseStyle('font-family:%s'%ff, validate=False) ff = [x.value for x in style.getProperty('font-family').propertyValue] if not ff or ff[0] == 'inherit': continue rule['font-family'] = frozenset(icu_lower(f) for f in ff) src = rule.get('src', None) if not src: continue style = parseStyle('background-image:%s'%src, validate=False) src = style.getProperty('background-image').propertyValue[0].uri if not src.startswith('file://'): self.log.warn('Unknown URI in @font-face: %r'%src) continue src = src[len('file://'):] if iswindows and src.startswith('/'): src = src[1:] src = src.replace('/', os.sep) src = unquote(src) name = self.container.abspath_to_name(src) if not self.container.has_name(name): self.log.warn('Font %r referenced in @font-face rule not found' %name) continue rule['src'] = name normalize_font_properties(rule) rule['width'] = widths[rule['font-stretch']] rule['weight'] = int(rule['font-weight']) rules.append(rule) if not rules: return for rule in rules: if rule['src'] not in self.font_stats: self.font_stats[rule['src']] = set() self.page.evaljs('window.font_stats.get_font_usage()') font_usage = self.page.bridge_value if not isinstance(font_usage, list): raise Exception('Unknown error occurred while reading font usage') exclude = {'\n', '\r', '\t'} for font in font_usage: text = set() for t in font['text']: text |= frozenset(t) text.difference_update(exclude) if not text: continue for rule in get_matching_rules(rules, font): self.font_stats[rule['src']] |= text
class PDFWriter(QObject): # {{{ def __init__(self, opts, log, cover_data=None): from calibre.gui2 import is_ok_to_use_qt if not is_ok_to_use_qt(): raise Exception('Not OK to use Qt') QObject.__init__(self) self.logger = log self.loop = QEventLoop() self.view = QWebView() self.view.setRenderHints(QPainter.Antialiasing | QPainter.TextAntialiasing | QPainter.SmoothPixmapTransform) self.view.loadFinished.connect(self._render_html, type=Qt.QueuedConnection) for x in (Qt.Horizontal, Qt.Vertical): self.view.page().mainFrame().setScrollBarPolicy( x, Qt.ScrollBarAlwaysOff) self.render_queue = [] self.combine_queue = [] self.tmp_path = PersistentTemporaryDirectory(u'_pdf_output_parts') self.opts = opts self.cover_data = cover_data self.paged_js = None def dump(self, items, out_stream, pdf_metadata): self.metadata = pdf_metadata self._delete_tmpdir() self.render_queue = items self.combine_queue = [] self.out_stream = out_stream QMetaObject.invokeMethod(self, "_render_book", Qt.QueuedConnection) self.loop.exec_() @QtCore.pyqtSignature('_render_book()') def _render_book(self): if len(self.render_queue) == 0: self._write() else: self._render_next() def _render_next(self): item = unicode(self.render_queue.pop(0)) self.combine_queue.append( os.path.join(self.tmp_path, '%i.pdf' % (len(self.combine_queue) + 1))) self.logger.debug('Processing %s...' % item) load_html(item, self.view) def _render_html(self, ok): if ok: item_path = os.path.join(self.tmp_path, '%i.pdf' % len(self.combine_queue)) self.logger.debug( '\tRendering item %s as %i.pdf' % (os.path.basename(str( self.view.url().toLocalFile())), len(self.combine_queue))) self.do_paged_render(item_path) else: # The document is so corrupt that we can't render the page. self.loop.exit(0) raise Exception('Document cannot be rendered.') self._render_book() def do_paged_render(self, outpath): from PyQt4.Qt import QSize, QPainter if self.paged_js is None: from calibre.utils.resources import compiled_coffeescript self.paged_js = compiled_coffeescript('ebooks.oeb.display.utils') self.paged_js += compiled_coffeescript('ebooks.oeb.display.paged') printer = get_pdf_printer(self.opts, output_file_name=outpath) painter = QPainter(printer) zoomx = printer.logicalDpiX() / self.view.logicalDpiX() zoomy = printer.logicalDpiY() / self.view.logicalDpiY() painter.scale(zoomx, zoomy) pr = printer.pageRect() evaljs = self.view.page().mainFrame().evaluateJavaScript evaljs(self.paged_js) self.view.page().setViewportSize( QSize(pr.width() / zoomx, pr.height() / zoomy)) evaljs(''' document.body.style.backgroundColor = "white"; paged_display.set_geometry(1, 0, 0, 0); paged_display.layout(); paged_display.fit_images(); ''') mf = self.view.page().mainFrame() while True: mf.render(painter) nsl = evaljs('paged_display.next_screen_location()').toInt() if not nsl[1] or nsl[0] <= 0: break evaljs('window.scrollTo(%d, 0)' % nsl[0]) printer.newPage() painter.end() printer.abort() def _delete_tmpdir(self): if os.path.exists(self.tmp_path): shutil.rmtree(self.tmp_path, True) self.tmp_path = PersistentTemporaryDirectory('_pdf_output_parts') def insert_cover(self): if self.cover_data is None: return item_path = os.path.join(self.tmp_path, 'cover.pdf') printer = get_pdf_printer(self.opts, output_file_name=item_path) self.combine_queue.insert(0, item_path) p = QPixmap() p.loadFromData(self.cover_data) if not p.isNull(): painter = QPainter(printer) draw_image_page( printer, painter, p, preserve_aspect_ratio=self.opts.preserve_cover_aspect_ratio) painter.end() printer.abort() def _write(self): self.logger.debug('Combining individual PDF parts...') self.insert_cover() try: outPDF = PdfFileWriter(title=self.metadata.title, author=self.metadata.author) for item in self.combine_queue: # The input PDF stream must remain open until the final PDF # is written to disk. PyPDF references pages added to the # final PDF from the input PDF on disk. It does not store # the pages in memory so we can't close the input PDF. inputPDF = PdfFileReader(open(item, 'rb')) for page in inputPDF.pages: outPDF.addPage(page) outPDF.write(self.out_stream) finally: self._delete_tmpdir() self.loop.exit(0)
class PDFWriter(QObject): def _pass_json_value_getter(self): val = json.dumps(self.bridge_value) return QString(val) def _pass_json_value_setter(self, value): self.bridge_value = json.loads(unicode(value)) _pass_json_value = pyqtProperty(QString, fget=_pass_json_value_getter, fset=_pass_json_value_setter) def __init__(self, opts, log, cover_data=None, toc=None): from calibre.gui2 import is_ok_to_use_qt if not is_ok_to_use_qt(): raise Exception('Not OK to use Qt') QObject.__init__(self) self.logger = self.log = log self.opts = opts self.cover_data = cover_data self.paged_js = None self.toc = toc self.loop = QEventLoop() self.view = QWebView() self.page = Page(opts, self.log) self.view.setPage(self.page) self.view.setRenderHints(QPainter.Antialiasing| QPainter.TextAntialiasing|QPainter.SmoothPixmapTransform) self.view.loadFinished.connect(self.render_html, type=Qt.QueuedConnection) for x in (Qt.Horizontal, Qt.Vertical): self.view.page().mainFrame().setScrollBarPolicy(x, Qt.ScrollBarAlwaysOff) self.report_progress = lambda x, y: x def dump(self, items, out_stream, pdf_metadata): opts = self.opts self.outline = Outline(self.toc, items) page_size = get_page_size(self.opts) xdpi, ydpi = self.view.logicalDpiX(), self.view.logicalDpiY() ml, mr = opts.margin_left, opts.margin_right margin_side = min(ml, mr) ml, mr = ml - margin_side, mr - margin_side self.doc = PdfDevice(out_stream, page_size=page_size, left_margin=ml, top_margin=0, right_margin=mr, bottom_margin=0, xdpi=xdpi, ydpi=ydpi, errors=self.log.error, debug=self.log.debug, compress=not opts.uncompressed_pdf) self.page.setViewportSize(QSize(self.doc.width(), self.doc.height())) self.render_queue = items self.total_items = len(items) # TODO: Test margins mt, mb = map(self.doc.to_px, (opts.margin_top, opts.margin_bottom)) ms = self.doc.to_px(margin_side, vertical=False) self.margin_top, self.margin_size, self.margin_bottom = map( lambda x:int(floor(x)), (mt, ms, mb)) self.painter = QPainter(self.doc) self.doc.set_metadata(title=pdf_metadata.title, author=pdf_metadata.author, tags=pdf_metadata.tags) self.painter.save() try: if self.cover_data is not None: p = QPixmap() p.loadFromData(self.cover_data) if not p.isNull(): draw_image_page(QRect(0, 0, self.doc.width(), self.doc.height()), self.painter, p, preserve_aspect_ratio=self.opts.preserve_cover_aspect_ratio) self.doc.end_page() finally: self.painter.restore() QTimer.singleShot(0, self.render_book) self.loop.exec_() # TODO: Outline and links self.painter.end() if self.doc.errors_occurred: raise Exception('PDF Output failed, see log for details') def render_book(self): if self.doc.errors_occurred: return self.loop.exit(1) try: if not self.render_queue: self.loop.exit() else: self.render_next() except: self.logger.exception('Rendering failed') self.loop.exit(1) def render_next(self): item = unicode(self.render_queue.pop(0)) self.logger.debug('Processing %s...' % item) self.current_item = item load_html(item, self.view) def render_html(self, ok): if ok: try: self.do_paged_render() except: self.log.exception('Rendering failed') self.loop.exit(1) else: # The document is so corrupt that we can't render the page. self.logger.error('Document cannot be rendered.') self.loop.exit(1) return done = self.total_items - len(self.render_queue) self.report_progress(done/self.total_items, _('Rendered %s'%os.path.basename(self.current_item))) self.render_book() @property def current_page_num(self): return self.doc.current_page_num def do_paged_render(self): if self.paged_js is None: from calibre.utils.resources import compiled_coffeescript self.paged_js = compiled_coffeescript('ebooks.oeb.display.utils') self.paged_js += compiled_coffeescript('ebooks.oeb.display.indexing') self.paged_js += compiled_coffeescript('ebooks.oeb.display.paged') self.view.page().mainFrame().addToJavaScriptWindowObject("py_bridge", self) evaljs = self.view.page().mainFrame().evaluateJavaScript evaljs(self.paged_js) evaljs(''' py_bridge.__defineGetter__('value', function() { return JSON.parse(this._pass_json_value); }); py_bridge.__defineSetter__('value', function(val) { this._pass_json_value = JSON.stringify(val); }); document.body.style.backgroundColor = "white"; paged_display.set_geometry(1, %d, %d, %d); paged_display.layout(); paged_display.fit_images(); '''%(self.margin_top, self.margin_size, self.margin_bottom)) mf = self.view.page().mainFrame() start_page = self.current_page_num dx = 0 while True: self.doc.init_page() self.painter.save() mf.render(self.painter) self.painter.restore() nsl = evaljs('paged_display.next_screen_location()').toInt() self.doc.end_page() if not nsl[1] or nsl[0] <= 0: break dx = nsl[0] evaljs('window.scrollTo(%d, 0)'%dx) if self.doc.errors_occurred: break self.bridge_value = tuple(self.outline.anchor_map[self.current_item]) evaljs('py_bridge.value = book_indexing.anchor_positions(py_bridge.value)') amap = self.bridge_value if not isinstance(amap, dict): amap = {} # Some javascript error occurred self.outline.set_pos(self.current_item, None, start_page, 0) for anchor, x in amap.iteritems(): pagenum, ypos = x self.outline.set_pos(self.current_item, anchor, start_page + pagenum, ypos)