class StatsCollector(object): def __init__(self, container, do_embed=False): self.container = container self.log = self.logger = container.log self.do_embed = do_embed must_use_qt() self.parser = CSSParser(loglevel=logging.CRITICAL, log=logging.getLogger('calibre.css')) self.first_letter_pat = regex.compile(r'^[\p{Ps}\p{Ps}\p{Pe}\p{Pi}\p{Pf}\p{Po}]+', regex.VERSION1 | regex.UNICODE) self.loop = QEventLoop() self.view = QWebView() self.page = Page(self.log) self.view.setPage(self.page) self.page.setViewportSize(QSize(1200, 1600)) self.view.loadFinished.connect(self.collect, type=Qt.QueuedConnection) self.render_queue = list(container.spine_items) self.font_stats = {} self.font_usage_map = {} self.font_spec_map = {} self.font_rule_map = {} self.all_font_rules = {} QTimer.singleShot(0, self.render_book) if self.loop.exec_() == 1: raise Exception('Failed to gather statistics from book, see log for details') def log_exception(self, *args): orig = self.log.filter_level try: self.log.filter_level = self.log.DEBUG self.log.exception(*args) finally: self.log.filter_level = orig def render_book(self): try: if not self.render_queue: self.loop.exit() else: self.render_next() except: self.log_exception('Rendering failed') self.loop.exit(1) def render_next(self): item = unicode(self.render_queue.pop(0)) self.current_item = item load_html(item, self.view) def collect(self, ok): if not ok: self.log.error('Failed to render document: %s'%self.container.relpath(self.current_item)) self.loop.exit(1) return try: self.page.load_js() self.collect_font_stats() except: self.log_exception('Failed to collect font stats from: %s'%self.container.relpath(self.current_item)) self.loop.exit(1) return self.render_book() def href_to_name(self, href, warn_name): if not href.startswith('file://'): self.log.warn('Non-local URI in', warn_name, ':', href, 'ignoring') return None src = href[len('file://'):] if iswindows and len(src) > 2 and (src[0], src[2]) == ('/', ':'): src = src[1:] src = src.replace('/', os.sep) src = unquote(src) name = self.container.abspath_to_name(src) if not self.container.has_name(name): self.log.warn('Missing resource', href, 'in', warn_name, 'ignoring') return None return name def collect_font_stats(self): self.page.evaljs('window.font_stats.get_font_face_rules()') font_face_rules = self.page.bridge_value if not isinstance(font_face_rules, list): raise Exception('Unknown error occurred while reading font-face rules') # Weed out invalid font-face rules rules = [] import tinycss parser = tinycss.make_full_parser() for rule in font_face_rules: ff = rule.get('font-family', None) if not ff: continue style = self.parser.parseStyle('font-family:%s'%ff, validate=False) ff = [x.value for x in style.getProperty('font-family').propertyValue] if not ff or ff[0] == 'inherit': continue rule['font-family'] = frozenset(icu_lower(f) for f in ff) src = rule.get('src', None) if not src: continue try: tokens = parser.parse_stylesheet('@font-face { src: %s }' % src).rules[0].declarations[0].value except Exception: self.log.warn('Failed to parse @font-family src: %s' % src) continue for token in tokens: if token.type == 'URI': uv = token.value if uv: sn = self.href_to_name(uv, '@font-face rule') if sn is not None: rule['src'] = sn break else: self.log.warn('The @font-face rule refers to a font file that does not exist in the book: %s' % src) continue normalize_font_properties(rule) rule['width'] = widths[rule['font-stretch']] rule['weight'] = int(rule['font-weight']) rules.append(rule) if not rules and not self.do_embed: return self.font_rule_map[self.container.abspath_to_name(self.current_item)] = rules for rule in rules: self.all_font_rules[rule['src']] = rule for rule in rules: if rule['src'] not in self.font_stats: self.font_stats[rule['src']] = set() self.page.evaljs('window.font_stats.get_font_usage()') font_usage = self.page.bridge_value if not isinstance(font_usage, list): raise Exception('Unknown error occurred while reading font usage') self.page.evaljs('window.font_stats.get_pseudo_element_font_usage()') pseudo_element_font_usage = self.page.bridge_value if not isinstance(pseudo_element_font_usage, list): raise Exception('Unknown error occurred while reading pseudo element font usage') font_usage += get_pseudo_element_font_usage(pseudo_element_font_usage, self.first_letter_pat, self.parser) exclude = {'\n', '\r', '\t'} self.font_usage_map[self.container.abspath_to_name(self.current_item)] = fu = defaultdict(dict) bad_fonts = {'serif', 'sans-serif', 'monospace', 'cursive', 'fantasy', 'sansserif', 'inherit'} for font in font_usage: text = set() for t in font['text']: text |= frozenset(t) text.difference_update(exclude) if not text: continue normalize_font_properties(font) for rule in get_matching_rules(rules, font): self.font_stats[rule['src']] |= text if self.do_embed: ff = [icu_lower(x) for x in font.get('font-family', [])] if ff and ff[0] not in bad_fonts: keys = {'font-weight', 'font-style', 'font-stretch', 'font-family'} key = frozenset(((k, ff[0] if k == 'font-family' else v) for k, v in font.iteritems() if k in keys)) val = fu[key] if not val: val.update({k:(font[k][0] if k == 'font-family' else font[k]) for k in keys}) val['text'] = set() val['text'] |= text self.font_usage_map[self.container.abspath_to_name(self.current_item)] = dict(fu) if self.do_embed: self.page.evaljs('window.font_stats.get_font_families()') font_families = self.page.bridge_value if not isinstance(font_families, dict): raise Exception('Unknown error occurred while reading font families') self.font_spec_map[self.container.abspath_to_name(self.current_item)] = fs = set() for font_dict, text, pseudo in pseudo_element_font_usage: font_families[font_dict['font-family']] = True for raw in font_families.iterkeys(): for x in parse_font_families(self.parser, raw): if x.lower() not in bad_fonts: fs.add(x)
class PDFWriter(QObject): # {{{ def __init__(self, opts, log, cover_data=None, toc=None): from calibre.gui2 import must_use_qt from calibre.utils.podofo import get_podofo must_use_qt() QObject.__init__(self) self.logger = self.log = log self.podofo = get_podofo() self.doc = self.podofo.PDFDoc() self.loop = QEventLoop() self.view = QWebView() self.page = Page(opts, self.log) self.view.setPage(self.page) self.view.setRenderHints(QPainter.Antialiasing|QPainter.TextAntialiasing|QPainter.SmoothPixmapTransform) self.view.loadFinished.connect(self._render_html, type=Qt.QueuedConnection) for x in (Qt.Horizontal, Qt.Vertical): self.view.page().mainFrame().setScrollBarPolicy(x, Qt.ScrollBarAlwaysOff) self.render_queue = [] self.combine_queue = [] self.tmp_path = PersistentTemporaryDirectory(u'_pdf_output_parts') self.opts = opts self.cover_data = cover_data self.paged_js = None self.toc = toc def dump(self, items, out_stream, pdf_metadata): self.metadata = pdf_metadata self._delete_tmpdir() self.outline = Outline(self.toc, items) self.render_queue = items self.combine_queue = [] self.out_stream = out_stream self.insert_cover() self.render_succeeded = False self.current_page_num = self.doc.page_count() self.combine_queue.append(os.path.join(self.tmp_path, 'qprinter_out.pdf')) self.first_page = True self.setup_printer(self.combine_queue[-1]) QTimer.singleShot(0, self._render_book) self.loop.exec_() if self.painter is not None: self.painter.end() if self.printer is not None: self.printer.abort() if not self.render_succeeded: raise Exception('Rendering HTML to PDF failed') def _render_book(self): try: if len(self.render_queue) == 0: self._write() else: self._render_next() except: self.logger.exception('Rendering failed') self.loop.exit(1) def _render_next(self): item = unicode(self.render_queue.pop(0)) self.logger.debug('Processing %s...' % item) self.current_item = item load_html(item, self.view) def _render_html(self, ok): if ok: self.do_paged_render() else: # The document is so corrupt that we can't render the page. self.logger.error('Document cannot be rendered.') self.loop.exit(0) return self._render_book() def _pass_json_value_getter(self): val = json.dumps(self.bridge_value) return val def _pass_json_value_setter(self, value): self.bridge_value = json.loads(unicode(value)) _pass_json_value = pyqtProperty(str, fget=_pass_json_value_getter, fset=_pass_json_value_setter) def setup_printer(self, outpath): self.printer = self.painter = None printer = get_pdf_printer(self.opts, output_file_name=outpath) painter = QPainter(printer) zoomx = printer.logicalDpiX()/self.view.logicalDpiX() zoomy = printer.logicalDpiY()/self.view.logicalDpiY() painter.scale(zoomx, zoomy) pr = printer.pageRect() self.printer, self.painter = printer, painter self.viewport_size = QSize(pr.width()/zoomx, pr.height()/zoomy) self.page.setViewportSize(self.viewport_size) def do_paged_render(self): if self.paged_js is None: from calibre.utils.resources import compiled_coffeescript self.paged_js = compiled_coffeescript('ebooks.oeb.display.utils') self.paged_js += compiled_coffeescript('ebooks.oeb.display.indexing') self.paged_js += compiled_coffeescript('ebooks.oeb.display.paged') self.view.page().mainFrame().addToJavaScriptWindowObject("py_bridge", self) evaljs = self.view.page().mainFrame().evaluateJavaScript evaljs(self.paged_js) evaljs(''' py_bridge.__defineGetter__('value', function() { return JSON.parse(this._pass_json_value); }); py_bridge.__defineSetter__('value', function(val) { this._pass_json_value = JSON.stringify(val); }); document.body.style.backgroundColor = "white"; paged_display.set_geometry(1, 0, 0, 0); paged_display.layout(); paged_display.fit_images(); ''') mf = self.view.page().mainFrame() start_page = self.current_page_num if not self.first_page: start_page += 1 while True: if not self.first_page: if self.printer.newPage(): self.current_page_num += 1 self.first_page = False mf.render(self.painter) try: nsl = int(evaljs('paged_display.next_screen_location()')) except (TypeError, ValueError): break if nsl <= 0: break evaljs('window.scrollTo(%d, 0)'%nsl) self.bridge_value = tuple(self.outline.anchor_map[self.current_item]) evaljs('py_bridge.value = book_indexing.anchor_positions(py_bridge.value)') amap = self.bridge_value if not isinstance(amap, dict): amap = {} # Some javascript error occurred self.outline.set_pos(self.current_item, None, start_page, 0) for anchor, x in amap.iteritems(): pagenum, ypos = x self.outline.set_pos(self.current_item, anchor, start_page + pagenum, ypos) def append_doc(self, outpath): doc = self.podofo.PDFDoc() with open(outpath, 'rb') as f: raw = f.read() doc.load(raw) self.doc.append(doc) def _delete_tmpdir(self): if os.path.exists(self.tmp_path): shutil.rmtree(self.tmp_path, True) self.tmp_path = PersistentTemporaryDirectory('_pdf_output_parts') def insert_cover(self): if not isinstance(self.cover_data, bytes): return item_path = os.path.join(self.tmp_path, 'cover.pdf') printer = get_pdf_printer(self.opts, output_file_name=item_path, for_comic=True) self.combine_queue.insert(0, item_path) p = QPixmap() p.loadFromData(self.cover_data) if not p.isNull(): painter = QPainter(printer) draw_image_page(printer, painter, p, preserve_aspect_ratio=self.opts.preserve_cover_aspect_ratio) painter.end() self.append_doc(item_path) printer.abort() def _write(self): self.painter.end() self.printer.abort() self.painter = self.printer = None self.append_doc(self.combine_queue[-1]) try: self.doc.creator = u'%s %s [http://calibre-ebook.com]'%( __appname__, __version__) self.doc.title = self.metadata.title self.doc.author = self.metadata.author if self.metadata.tags: self.doc.keywords = self.metadata.tags self.outline(self.doc) self.doc.save_to_fileobj(self.out_stream) self.render_succeeded = True finally: self._delete_tmpdir() self.loop.exit(0)
class PDFWriter(QObject): def _pass_json_value_getter(self): val = json.dumps(self.bridge_value) return val def _pass_json_value_setter(self, value): self.bridge_value = json.loads(unicode(value)) _pass_json_value = pyqtProperty(str, fget=_pass_json_value_getter, fset=_pass_json_value_setter) @pyqtSlot(result=unicode) def title(self): return self.doc_title @pyqtSlot(result=unicode) def author(self): return self.doc_author @pyqtSlot(result=unicode) def section(self): return self.current_section @pyqtSlot(result=unicode) def tl_section(self): return self.current_tl_section def __init__(self, opts, log, cover_data=None, toc=None): from calibre.gui2 import must_use_qt must_use_qt() QObject.__init__(self) self.logger = self.log = log self.opts = opts self.cover_data = cover_data self.paged_js = None self.toc = toc self.loop = QEventLoop() self.view = QWebView() self.page = Page(opts, self.log) self.view.setPage(self.page) self.view.setRenderHints(QPainter.Antialiasing| QPainter.TextAntialiasing|QPainter.SmoothPixmapTransform) self.view.loadFinished.connect(self.render_html, type=Qt.QueuedConnection) for x in (Qt.Horizontal, Qt.Vertical): self.view.page().mainFrame().setScrollBarPolicy(x, Qt.ScrollBarAlwaysOff) self.report_progress = lambda x, y: x self.current_section = '' self.current_tl_section = '' def dump(self, items, out_stream, pdf_metadata): opts = self.opts page_size = get_page_size(self.opts) xdpi, ydpi = self.view.logicalDpiX(), self.view.logicalDpiY() # We cannot set the side margins in the webview as there is no right # margin for the last page (the margins are implemented with # -webkit-column-gap) ml, mr = opts.margin_left, opts.margin_right self.doc = PdfDevice(out_stream, page_size=page_size, left_margin=ml, top_margin=0, right_margin=mr, bottom_margin=0, xdpi=xdpi, ydpi=ydpi, errors=self.log.error, debug=self.log.debug, compress=not opts.uncompressed_pdf, opts=opts, mark_links=opts.pdf_mark_links) self.footer = opts.pdf_footer_template if self.footer: self.footer = self.footer.strip() if not self.footer and opts.pdf_page_numbers: self.footer = '<p style="text-align:center; text-indent: 0">_PAGENUM_</p>' self.header = opts.pdf_header_template if self.header: self.header = self.header.strip() min_margin = 1.5 * opts._final_base_font_size if self.footer and opts.margin_bottom < min_margin: self.log.warn('Bottom margin is too small for footer, increasing it to %.1fpts' % min_margin) opts.margin_bottom = min_margin if self.header and opts.margin_top < min_margin: self.log.warn('Top margin is too small for header, increasing it to %.1fpts' % min_margin) opts.margin_top = min_margin self.page.setViewportSize(QSize(self.doc.width(), self.doc.height())) self.render_queue = items self.total_items = len(items) mt, mb = map(self.doc.to_px, (opts.margin_top, opts.margin_bottom)) self.margin_top, self.margin_bottom = map(lambda x:int(floor(x)), (mt, mb)) self.painter = QPainter(self.doc) self.doc.set_metadata(title=pdf_metadata.title, author=pdf_metadata.author, tags=pdf_metadata.tags, mi=pdf_metadata.mi) self.doc_title = pdf_metadata.title self.doc_author = pdf_metadata.author self.painter.save() try: if self.cover_data is not None: p = QPixmap() try: p.loadFromData(self.cover_data) except TypeError: self.log.warn('This ebook does not have a raster cover, cannot generate cover for PDF' '. Cover type: %s' % type(self.cover_data)) if not p.isNull(): self.doc.init_page() draw_image_page(QRect(*self.doc.full_page_rect), self.painter, p, preserve_aspect_ratio=self.opts.preserve_cover_aspect_ratio) self.doc.end_page() finally: self.painter.restore() QTimer.singleShot(0, self.render_book) if self.loop.exec_() == 1: raise Exception('PDF Output failed, see log for details') if self.toc is not None and len(self.toc) > 0: self.doc.add_outline(self.toc) self.painter.end() if self.doc.errors_occurred: raise Exception('PDF Output failed, see log for details') def render_inline_toc(self): self.rendered_inline_toc = True from calibre.ebooks.pdf.render.toc import toc_as_html raw = toc_as_html(self.toc, self.doc, self.opts) pt = PersistentTemporaryFile('_pdf_itoc.htm') pt.write(raw) pt.close() self.render_queue.append(pt.name) self.render_next() def render_book(self): if self.doc.errors_occurred: return self.loop.exit(1) try: if not self.render_queue: if self.opts.pdf_add_toc and self.toc is not None and len(self.toc) > 0 and not hasattr(self, 'rendered_inline_toc'): return self.render_inline_toc() self.loop.exit() else: self.render_next() except: self.logger.exception('Rendering failed') self.loop.exit(1) def render_next(self): item = unicode(self.render_queue.pop(0)) self.logger.debug('Processing %s...' % item) self.current_item = item load_html(item, self.view) def render_html(self, ok): if ok: try: self.do_paged_render() except: self.log.exception('Rendering failed') self.loop.exit(1) return else: # The document is so corrupt that we can't render the page. self.logger.error('Document cannot be rendered.') self.loop.exit(1) return done = self.total_items - len(self.render_queue) self.report_progress(done/self.total_items, _('Rendered %s'%os.path.basename(self.current_item))) self.render_book() @property def current_page_num(self): return self.doc.current_page_num def load_mathjax(self): evaljs = self.view.page().mainFrame().evaluateJavaScript mjpath = P(u'viewer/mathjax').replace(os.sep, '/') if iswindows: mjpath = u'/' + mjpath if bool(evaljs(''' window.mathjax.base = %s; mathjax.check_for_math(); mathjax.math_present '''%(json.dumps(mjpath, ensure_ascii=False)))): self.log.debug('Math present, loading MathJax') while not bool(evaljs('mathjax.math_loaded')): self.loop.processEvents(self.loop.ExcludeUserInputEvents) evaljs('document.getElementById("MathJax_Message").style.display="none";') def get_sections(self, anchor_map, only_top_level=False): sections = defaultdict(list) ci = os.path.abspath(os.path.normcase(self.current_item)) if self.toc is not None: tocentries = self.toc.top_level_items() if only_top_level else self.toc.flat() for toc in tocentries: path = toc.abspath or None frag = toc.fragment or None if path is None: continue path = os.path.abspath(os.path.normcase(path)) if path == ci: col = 0 if frag and frag in anchor_map: col = anchor_map[frag]['column'] sections[col].append(toc.text or _('Untitled')) return sections def do_paged_render(self): if self.paged_js is None: import uuid from calibre.utils.resources import compiled_coffeescript as cc self.paged_js = cc('ebooks.oeb.display.utils') self.paged_js += cc('ebooks.oeb.display.indexing') self.paged_js += cc('ebooks.oeb.display.paged') self.paged_js += cc('ebooks.oeb.display.mathjax') self.hf_uuid = str(uuid.uuid4()).replace('-', '') self.view.page().mainFrame().addToJavaScriptWindowObject("py_bridge", self) self.view.page().longjs_counter = 0 evaljs = self.view.page().mainFrame().evaluateJavaScript evaljs(self.paged_js) self.load_mathjax() evaljs(''' Object.defineProperty(py_bridge, 'value', { get : function() { return JSON.parse(this._pass_json_value); }, set : function(val) { this._pass_json_value = JSON.stringify(val); } }); document.body.style.backgroundColor = "white"; paged_display.set_geometry(1, %d, %d, %d); paged_display.layout(); paged_display.fit_images(); py_bridge.value = book_indexing.all_links_and_anchors(); window.scrollTo(0, 0); // This is needed as getting anchor positions could have caused the viewport to scroll '''%(self.margin_top, 0, self.margin_bottom)) amap = self.bridge_value if not isinstance(amap, dict): amap = {'links':[], 'anchors':{}} # Some javascript error occurred sections = self.get_sections(amap['anchors']) tl_sections = self.get_sections(amap['anchors'], True) col = 0 if self.header: self.bridge_value = self.header evaljs('paged_display.header_template = py_bridge.value') if self.footer: self.bridge_value = self.footer evaljs('paged_display.footer_template = py_bridge.value') if self.header or self.footer: evaljs('paged_display.create_header_footer("%s");'%self.hf_uuid) start_page = self.current_page_num mf = self.view.page().mainFrame() def set_section(col, sections, attr): # If this page has no section, use the section from the previous page idx = col if col in sections else col - 1 if col - 1 in sections else None if idx is not None: setattr(self, attr, sections[idx][0]) while True: set_section(col, sections, 'current_section') set_section(col, tl_sections, 'current_tl_section') self.doc.init_page() if self.header or self.footer: evaljs('paged_display.update_header_footer(%d)'%self.current_page_num) self.painter.save() mf.render(self.painter) self.painter.restore() try: nsl = int(evaljs('paged_display.next_screen_location()')) except (TypeError, ValueError): break self.doc.end_page(nsl <= 0) if nsl <= 0: break evaljs('window.scrollTo(%d, 0); paged_display.position_header_footer();'%nsl) if self.doc.errors_occurred: break col += 1 if not self.doc.errors_occurred: self.doc.add_links(self.current_item, start_page, amap['links'], amap['anchors'])
class StatsCollector(object): def __init__(self, container, do_embed=False): self.container = container self.log = self.logger = container.log self.do_embed = do_embed must_use_qt() self.parser = CSSParser(loglevel=logging.CRITICAL, log=logging.getLogger('calibre.css')) self.loop = QEventLoop() self.view = QWebView() self.page = Page(self.log) self.view.setPage(self.page) self.page.setViewportSize(QSize(1200, 1600)) self.view.loadFinished.connect(self.collect, type=Qt.QueuedConnection) self.render_queue = list(container.spine_items) self.font_stats = {} self.font_usage_map = {} self.font_spec_map = {} self.font_rule_map = {} self.all_font_rules = {} QTimer.singleShot(0, self.render_book) if self.loop.exec_() == 1: raise Exception('Failed to gather statistics from book, see log for details') def render_book(self): try: if not self.render_queue: self.loop.exit() else: self.render_next() except: self.logger.exception('Rendering failed') self.loop.exit(1) def render_next(self): item = unicode(self.render_queue.pop(0)) self.current_item = item load_html(item, self.view) def collect(self, ok): if not ok: self.log.error('Failed to render document: %s'%self.container.relpath(self.current_item)) self.loop.exit(1) return try: self.page.load_js() self.collect_font_stats() except: self.log.exception('Failed to collect font stats from: %s'%self.container.relpath(self.current_item)) self.loop.exit(1) return self.render_book() def href_to_name(self, href, warn_name): if not href.startswith('file://'): self.log.warn('Non-local URI in', warn_name, ':', href, 'ignoring') return None src = href[len('file://'):] if iswindows and len(src) > 2 and (src[0], src[2]) == ('/', ':'): src = src[1:] src = src.replace('/', os.sep) src = unquote(src) name = self.container.abspath_to_name(src) if not self.container.has_name(name): self.log.warn('Missing resource', href, 'in', warn_name, 'ignoring') return None return name def collect_font_stats(self): self.page.evaljs('window.font_stats.get_font_face_rules()') font_face_rules = self.page.bridge_value if not isinstance(font_face_rules, list): raise Exception('Unknown error occurred while reading font-face rules') # Weed out invalid font-face rules rules = [] for rule in font_face_rules: ff = rule.get('font-family', None) if not ff: continue style = self.parser.parseStyle('font-family:%s'%ff, validate=False) ff = [x.value for x in style.getProperty('font-family').propertyValue] if not ff or ff[0] == 'inherit': continue rule['font-family'] = frozenset(icu_lower(f) for f in ff) src = rule.get('src', None) if not src: continue if src.startswith('url(') and src.endswith(')') and src[4] not in {'"', "'"}: # Quote the url otherwise cssutils fails to parse it if it has # ' or " in it src = "url('" + src[4:-1].replace("'", "\\'") + "')" style = self.parser.parseStyle('background-image:%s'%src, validate=False) src = style.getProperty('background-image').propertyValue[0].uri name = self.href_to_name(src, '@font-face rule') if name is None: continue rule['src'] = name normalize_font_properties(rule) rule['width'] = widths[rule['font-stretch']] rule['weight'] = int(rule['font-weight']) rules.append(rule) if not rules and not self.do_embed: return self.font_rule_map[self.container.abspath_to_name(self.current_item)] = rules for rule in rules: self.all_font_rules[rule['src']] = rule for rule in rules: if rule['src'] not in self.font_stats: self.font_stats[rule['src']] = set() self.page.evaljs('window.font_stats.get_font_usage()') font_usage = self.page.bridge_value if not isinstance(font_usage, list): raise Exception('Unknown error occurred while reading font usage') exclude = {'\n', '\r', '\t'} self.font_usage_map[self.container.abspath_to_name(self.current_item)] = fu = defaultdict(dict) bad_fonts = {'serif', 'sans-serif', 'monospace', 'cursive', 'fantasy', 'sansserif', 'inherit'} for font in font_usage: text = set() for t in font['text']: text |= frozenset(t) text.difference_update(exclude) if not text: continue normalize_font_properties(font) for rule in get_matching_rules(rules, font): self.font_stats[rule['src']] |= text if self.do_embed: ff = [icu_lower(x) for x in font.get('font-family', [])] if ff and ff[0] not in bad_fonts: keys = {'font-weight', 'font-style', 'font-stretch', 'font-family'} key = frozenset(((k, ff[0] if k == 'font-family' else v) for k, v in font.iteritems() if k in keys)) val = fu[key] if not val: val.update({k:(font[k][0] if k == 'font-family' else font[k]) for k in keys}) val['text'] = set() val['text'] |= text self.font_usage_map[self.container.abspath_to_name(self.current_item)] = dict(fu) if self.do_embed: self.page.evaljs('window.font_stats.get_font_families()') font_families = self.page.bridge_value if not isinstance(font_families, dict): raise Exception('Unknown error occurred while reading font families') self.font_spec_map[self.container.abspath_to_name(self.current_item)] = fs = set() for raw in font_families.iterkeys(): style = self.parser.parseStyle('font-family:' + raw, validate=False).getProperty('font-family') for x in style.propertyValue: x = x.value if x and x.lower() not in bad_fonts: fs.add(x)
class PDFWriter(QObject): @pyqtSlot(result=unicode) def title(self): return self.doc_title @pyqtSlot(result=unicode) def author(self): return self.doc_author @pyqtSlot(result=unicode) def section(self): return self.current_section @pyqtSlot(result=unicode) def tl_section(self): return self.current_tl_section def __init__(self, opts, log, cover_data=None, toc=None): from calibre.gui2 import must_use_qt must_use_qt() QObject.__init__(self) self.logger = self.log = log current_log(log) self.opts = opts self.cover_data = cover_data self.paged_js = None self.toc = toc self.loop = QEventLoop() self.view = QWebView() self.page = Page(opts, self.log) self.view.setPage(self.page) self.view.setRenderHints(QPainter.Antialiasing|QPainter.TextAntialiasing|QPainter.SmoothPixmapTransform) self.view.loadFinished.connect(self.render_html, type=Qt.QueuedConnection) for x in (Qt.Horizontal, Qt.Vertical): self.view.page().mainFrame().setScrollBarPolicy(x, Qt.ScrollBarAlwaysOff) self.report_progress = lambda x, y: x self.current_section = '' self.current_tl_section = '' def dump(self, items, out_stream, pdf_metadata): opts = self.opts page_size = get_page_size(self.opts) xdpi, ydpi = self.view.logicalDpiX(), self.view.logicalDpiY() def margin(which): val = getattr(opts, 'pdf_page_margin_' + which) if val == 0.0: val = getattr(opts, 'margin_' + which) return val ml, mr, mt, mb = map(margin, 'left right top bottom'.split()) # We cannot set the side margins in the webview as there is no right # margin for the last page (the margins are implemented with # -webkit-column-gap) self.doc = PdfDevice(out_stream, page_size=page_size, left_margin=ml, top_margin=0, right_margin=mr, bottom_margin=0, xdpi=xdpi, ydpi=ydpi, errors=self.log.error, debug=self.log.debug, compress=not opts.uncompressed_pdf, opts=opts, mark_links=opts.pdf_mark_links, page_margins=(ml, mr, mt, mb)) self.footer = opts.pdf_footer_template if self.footer: self.footer = self.footer.strip() if not self.footer and opts.pdf_page_numbers: self.footer = '<p style="text-align:center; text-indent: 0">_PAGENUM_</p>' self.header = opts.pdf_header_template if self.header: self.header = self.header.strip() min_margin = 1.5 * opts._final_base_font_size if self.footer and mb < min_margin: self.log.warn('Bottom margin is too small for footer, increasing it to %.1fpts' % min_margin) mb = min_margin if self.header and mt < min_margin: self.log.warn('Top margin is too small for header, increasing it to %.1fpts' % min_margin) mt = min_margin self.page.setViewportSize(QSize(self.doc.width(), self.doc.height())) self.render_queue = items self.total_items = len(items) mt, mb = map(self.doc.to_px, (mt, mb)) self.margin_top, self.margin_bottom = map(lambda x:int(floor(x)), (mt, mb)) self.painter = QPainter(self.doc) try: self.book_language = pdf_metadata.mi.languages[0] except Exception: self.book_language = 'eng' self.doc.set_metadata(title=pdf_metadata.title, author=pdf_metadata.author, tags=pdf_metadata.tags, mi=pdf_metadata.mi) self.doc_title = pdf_metadata.title self.doc_author = pdf_metadata.author self.painter.save() try: if self.cover_data is not None: p = QPixmap() try: p.loadFromData(self.cover_data) except TypeError: self.log.warn('This ebook does not have a raster cover, cannot generate cover for PDF' '. Cover type: %s' % type(self.cover_data)) if not p.isNull(): self.doc.init_page() draw_image_page(QRect(*self.doc.full_page_rect), self.painter, p, preserve_aspect_ratio=self.opts.preserve_cover_aspect_ratio) self.doc.end_page() finally: self.painter.restore() QTimer.singleShot(0, self.render_book) if self.loop.exec_() == 1: raise Exception('PDF Output failed, see log for details') if self.toc is not None and len(self.toc) > 0: self.doc.add_outline(self.toc) self.painter.end() if self.doc.errors_occurred: raise Exception('PDF Output failed, see log for details') def render_inline_toc(self): evaljs = self.view.page().mainFrame().evaluateJavaScript self.rendered_inline_toc = True from calibre.ebooks.pdf.render.toc import toc_as_html raw = toc_as_html(self.toc, self.doc, self.opts, evaljs) pt = PersistentTemporaryFile('_pdf_itoc.htm') pt.write(raw) pt.close() self.render_queue.append(pt.name) self.render_next() def render_book(self): if self.doc.errors_occurred: return self.loop.exit(1) try: if not self.render_queue: if self.opts.pdf_add_toc and self.toc is not None and len(self.toc) > 0 and not hasattr(self, 'rendered_inline_toc'): return self.render_inline_toc() self.loop.exit() else: self.render_next() except: self.logger.exception('Rendering failed') self.loop.exit(1) def render_next(self): item = unicode(self.render_queue.pop(0)) self.logger.debug('Processing %s...' % item) self.current_item = item load_html(item, self.view) def render_html(self, ok): if ok: try: self.do_paged_render() except: self.log.exception('Rendering failed') self.loop.exit(1) return else: # The document is so corrupt that we can't render the page. self.logger.error('Document cannot be rendered.') self.loop.exit(1) return done = self.total_items - len(self.render_queue) self.report_progress(done/self.total_items, _('Rendered %s'%os.path.basename(self.current_item))) self.render_book() @property def current_page_num(self): return self.doc.current_page_num def load_mathjax(self): evaljs = self.view.page().mainFrame().evaluateJavaScript mjpath = P(u'viewer/mathjax').replace(os.sep, '/') if iswindows: mjpath = u'/' + mjpath if bool(evaljs(''' window.mathjax.base = %s; mathjax.check_for_math(); mathjax.math_present '''%(json.dumps(mjpath, ensure_ascii=False)))): self.log.debug('Math present, loading MathJax') while not bool(evaljs('mathjax.math_loaded')): self.loop.processEvents(self.loop.ExcludeUserInputEvents) evaljs('document.getElementById("MathJax_Message").style.display="none";') def load_header_footer_images(self): from calibre.utils.monotonic import monotonic evaljs = self.view.page().mainFrame().evaluateJavaScript st = monotonic() while not evaljs('paged_display.header_footer_images_loaded()'): self.loop.processEvents(self.loop.ExcludeUserInputEvents) if monotonic() - st > 5: self.log.warn('Header and footer images have not loaded in 5 seconds, ignoring') break def get_sections(self, anchor_map, only_top_level=False): sections = defaultdict(list) ci = os.path.abspath(os.path.normcase(self.current_item)) if self.toc is not None: tocentries = self.toc.top_level_items() if only_top_level else self.toc.flat() for toc in tocentries: path = toc.abspath or None frag = toc.fragment or None if path is None: continue path = os.path.abspath(os.path.normcase(path)) if path == ci: col = 0 if frag and frag in anchor_map: col = anchor_map[frag]['column'] sections[col].append(toc.text or _('Untitled')) return sections def hyphenate(self, evaljs): evaljs(u'''\ Hyphenator.config( { 'minwordlength' : 6, // 'hyphenchar' : '|', 'displaytogglebox' : false, 'remoteloading' : false, 'doframes' : true, 'defaultlanguage' : 'en', 'storagetype' : 'session', 'onerrorhandler' : function (e) { console.log(e); } }); Hyphenator.hyphenate(document.body, "%s"); ''' % self.hyphenate_lang ) def convert_page_margins(self, doc_margins): ans = [0, 0, 0, 0] def convert(name, idx, vertical=True): m = doc_margins.get(name) if m is None: ans[idx] = getattr(self.doc.engine, '{}_margin'.format(name)) else: ans[idx] = m convert('left', 0, False), convert('top', 1), convert('right', 2, False), convert('bottom', 3) return ans def do_paged_render(self): if self.paged_js is None: import uuid from calibre.utils.resources import compiled_coffeescript as cc self.paged_js = cc('ebooks.oeb.display.utils') self.paged_js += cc('ebooks.oeb.display.indexing') self.paged_js += cc('ebooks.oeb.display.paged') self.paged_js += cc('ebooks.oeb.display.mathjax') if self.opts.pdf_hyphenate: self.paged_js += P('viewer/hyphenate/Hyphenator.js', data=True).decode('utf-8') hjs, self.hyphenate_lang = load_hyphenator_dicts({}, self.book_language) self.paged_js += hjs self.hf_uuid = str(uuid.uuid4()).replace('-', '') self.view.page().mainFrame().addToJavaScriptWindowObject("py_bridge", self) self.view.page().longjs_counter = 0 evaljs = self.view.page().mainFrame().evaluateJavaScript evaljs(self.paged_js) self.load_mathjax() if self.opts.pdf_hyphenate: self.hyphenate(evaljs) margin_top, margin_bottom = self.margin_top, self.margin_bottom page_margins = None if self.opts.pdf_use_document_margins: doc_margins = evaljs('document.documentElement.getAttribute("data-calibre-pdf-output-page-margins")') try: doc_margins = json.loads(doc_margins) except Exception: doc_margins = None if doc_margins and isinstance(doc_margins, dict): doc_margins = {k:float(v) for k, v in doc_margins.iteritems() if isinstance(v, (float, int)) and k in {'right', 'top', 'left', 'bottom'}} if doc_margins: margin_top = margin_bottom = 0 page_margins = self.convert_page_margins(doc_margins) amap = json.loads(evaljs(''' document.body.style.backgroundColor = "white"; // Qt WebKit cannot handle opacity with the Pdf backend s = document.createElement('style'); s.textContent = '* {opacity: 1 !important}'; document.documentElement.appendChild(s); paged_display.set_geometry(1, %d, %d, %d); paged_display.layout(); paged_display.fit_images(); ret = book_indexing.all_links_and_anchors(); window.scrollTo(0, 0); // This is needed as getting anchor positions could have caused the viewport to scroll JSON.stringify(ret); '''%(margin_top, 0, margin_bottom))) if not isinstance(amap, dict): amap = {'links':[], 'anchors':{}} # Some javascript error occurred for val in amap['anchors'].itervalues(): if isinstance(val, dict) and 'column' in val: val['column'] = int(val['column']) for href, val in amap['links']: if isinstance(val, dict) and 'column' in val: val['column'] = int(val['column']) sections = self.get_sections(amap['anchors']) tl_sections = self.get_sections(amap['anchors'], True) col = 0 if self.header: evaljs('paged_display.header_template = ' + json.dumps(self.header)) if self.footer: evaljs('paged_display.footer_template = ' + json.dumps(self.footer)) if self.header or self.footer: evaljs('paged_display.create_header_footer("%s");'%self.hf_uuid) start_page = self.current_page_num mf = self.view.page().mainFrame() def set_section(col, sections, attr): # If this page has no section, use the section from the previous page idx = col if col in sections else col - 1 if col - 1 in sections else None if idx is not None: setattr(self, attr, sections[idx][0]) from calibre.ebooks.pdf.render.toc import calculate_page_number while True: set_section(col, sections, 'current_section') set_section(col, tl_sections, 'current_tl_section') self.doc.init_page(page_margins) num = calculate_page_number(self.current_page_num, self.opts.pdf_page_number_map, evaljs) if self.header or self.footer: if evaljs('paged_display.update_header_footer(%d)'%num) is True: self.load_header_footer_images() self.painter.save() mf.render(self.painter, mf.ContentsLayer) self.painter.restore() try: nsl = int(evaljs('paged_display.next_screen_location()')) except (TypeError, ValueError): break self.doc.end_page(nsl <= 0) if nsl <= 0: break evaljs('window.scrollTo(%d, 0); paged_display.position_header_footer();'%nsl) if self.doc.errors_occurred: break col += 1 if not self.doc.errors_occurred and self.doc.current_page_num > 1: self.doc.add_links(self.current_item, start_page, amap['links'], amap['anchors'])
class PDFWriter(QObject): def _pass_json_value_getter(self): val = json.dumps(self.bridge_value) return val def _pass_json_value_setter(self, value): self.bridge_value = json.loads(unicode(value)) _pass_json_value = pyqtProperty(str, fget=_pass_json_value_getter, fset=_pass_json_value_setter) @pyqtSlot(result=unicode) def title(self): return self.doc_title @pyqtSlot(result=unicode) def author(self): return self.doc_author @pyqtSlot(result=unicode) def section(self): return self.current_section def __init__(self, opts, log, cover_data=None, toc=None): from calibre.gui2 import must_use_qt must_use_qt() QObject.__init__(self) self.logger = self.log = log self.opts = opts self.cover_data = cover_data self.paged_js = None self.toc = toc self.loop = QEventLoop() self.view = QWebView() self.page = Page(opts, self.log) self.view.setPage(self.page) self.view.setRenderHints(QPainter.Antialiasing | QPainter.TextAntialiasing | QPainter.SmoothPixmapTransform) self.view.loadFinished.connect(self.render_html, type=Qt.QueuedConnection) for x in (Qt.Horizontal, Qt.Vertical): self.view.page().mainFrame().setScrollBarPolicy( x, Qt.ScrollBarAlwaysOff) self.report_progress = lambda x, y: x self.current_section = '' def dump(self, items, out_stream, pdf_metadata): opts = self.opts page_size = get_page_size(self.opts) xdpi, ydpi = self.view.logicalDpiX(), self.view.logicalDpiY() # We cannot set the side margins in the webview as there is no right # margin for the last page (the margins are implemented with # -webkit-column-gap) ml, mr = opts.margin_left, opts.margin_right self.doc = PdfDevice(out_stream, page_size=page_size, left_margin=ml, top_margin=0, right_margin=mr, bottom_margin=0, xdpi=xdpi, ydpi=ydpi, errors=self.log.error, debug=self.log.debug, compress=not opts.uncompressed_pdf, mark_links=opts.pdf_mark_links) self.footer = opts.pdf_footer_template if self.footer: self.footer = self.footer.strip() if not self.footer and opts.pdf_page_numbers: self.footer = '<p style="text-align:center; text-indent: 0">_PAGENUM_</p>' self.header = opts.pdf_header_template if self.header: self.header = self.header.strip() min_margin = 36 if self.footer and opts.margin_bottom < min_margin: self.log.warn( 'Bottom margin is too small for footer, increasing it.') opts.margin_bottom = min_margin if self.header and opts.margin_top < min_margin: self.log.warn('Top margin is too small for header, increasing it.') opts.margin_top = min_margin self.page.setViewportSize(QSize(self.doc.width(), self.doc.height())) self.render_queue = items self.total_items = len(items) mt, mb = map(self.doc.to_px, (opts.margin_top, opts.margin_bottom)) self.margin_top, self.margin_bottom = map(lambda x: int(floor(x)), (mt, mb)) self.painter = QPainter(self.doc) self.doc.set_metadata(title=pdf_metadata.title, author=pdf_metadata.author, tags=pdf_metadata.tags, mi=pdf_metadata.mi) self.doc_title = pdf_metadata.title self.doc_author = pdf_metadata.author self.painter.save() try: if self.cover_data is not None: p = QPixmap() try: p.loadFromData(self.cover_data) except TypeError: self.log.warn( 'This ebook does not have a raster cover, cannot generate cover for PDF' '. Cover type: %s' % type(self.cover_data)) if not p.isNull(): self.doc.init_page() draw_image_page(QRect(*self.doc.full_page_rect), self.painter, p, preserve_aspect_ratio=self.opts. preserve_cover_aspect_ratio) self.doc.end_page() finally: self.painter.restore() QTimer.singleShot(0, self.render_book) if self.loop.exec_() == 1: raise Exception('PDF Output failed, see log for details') if self.toc is not None and len(self.toc) > 0: self.doc.add_outline(self.toc) self.painter.end() if self.doc.errors_occurred: raise Exception('PDF Output failed, see log for details') def render_inline_toc(self): self.rendered_inline_toc = True from calibre.ebooks.pdf.render.toc import toc_as_html raw = toc_as_html(self.toc, self.doc, self.opts) pt = PersistentTemporaryFile('_pdf_itoc.htm') pt.write(raw) pt.close() self.render_queue.append(pt.name) self.render_next() def render_book(self): if self.doc.errors_occurred: return self.loop.exit(1) try: if not self.render_queue: if self.opts.pdf_add_toc and self.toc is not None and len( self.toc) > 0 and not hasattr(self, 'rendered_inline_toc'): return self.render_inline_toc() self.loop.exit() else: self.render_next() except: self.logger.exception('Rendering failed') self.loop.exit(1) def render_next(self): item = unicode(self.render_queue.pop(0)) self.logger.debug('Processing %s...' % item) self.current_item = item load_html(item, self.view) def render_html(self, ok): if ok: try: self.do_paged_render() except: self.log.exception('Rendering failed') self.loop.exit(1) return else: # The document is so corrupt that we can't render the page. self.logger.error('Document cannot be rendered.') self.loop.exit(1) return done = self.total_items - len(self.render_queue) self.report_progress( done / self.total_items, _('Rendered %s' % os.path.basename(self.current_item))) self.render_book() @property def current_page_num(self): return self.doc.current_page_num def load_mathjax(self): evaljs = self.view.page().mainFrame().evaluateJavaScript mjpath = P(u'viewer/mathjax').replace(os.sep, '/') if iswindows: mjpath = u'/' + mjpath if bool( evaljs(''' window.mathjax.base = %s; mathjax.check_for_math(); mathjax.math_present ''' % (json.dumps(mjpath, ensure_ascii=False)))): self.log.debug('Math present, loading MathJax') while not bool(evaljs('mathjax.math_loaded')): self.loop.processEvents(self.loop.ExcludeUserInputEvents) evaljs( 'document.getElementById("MathJax_Message").style.display="none";' ) def get_sections(self, anchor_map): sections = defaultdict(list) ci = os.path.abspath(os.path.normcase(self.current_item)) if self.toc is not None: for toc in self.toc.flat(): path = toc.abspath or None frag = toc.fragment or None if path is None: continue path = os.path.abspath(os.path.normcase(path)) if path == ci: col = 0 if frag and frag in anchor_map: col = anchor_map[frag]['column'] sections[col].append(toc.text or _('Untitled')) return sections def do_paged_render(self): if self.paged_js is None: import uuid from calibre.utils.resources import compiled_coffeescript as cc self.paged_js = cc('ebooks.oeb.display.utils') self.paged_js += cc('ebooks.oeb.display.indexing') self.paged_js += cc('ebooks.oeb.display.paged') self.paged_js += cc('ebooks.oeb.display.mathjax') self.hf_uuid = str(uuid.uuid4()).replace('-', '') self.view.page().mainFrame().addToJavaScriptWindowObject( "py_bridge", self) self.view.page().longjs_counter = 0 evaljs = self.view.page().mainFrame().evaluateJavaScript evaljs(self.paged_js) self.load_mathjax() evaljs(''' Object.defineProperty(py_bridge, 'value', { get : function() { return JSON.parse(this._pass_json_value); }, set : function(val) { this._pass_json_value = JSON.stringify(val); } }); document.body.style.backgroundColor = "white"; paged_display.set_geometry(1, %d, %d, %d); paged_display.layout(); paged_display.fit_images(); py_bridge.value = book_indexing.all_links_and_anchors(); window.scrollTo(0, 0); // This is needed as getting anchor positions could have caused the viewport to scroll ''' % (self.margin_top, 0, self.margin_bottom)) amap = self.bridge_value if not isinstance(amap, dict): amap = { 'links': [], 'anchors': {} } # Some javascript error occurred sections = self.get_sections(amap['anchors']) col = 0 if self.header: self.bridge_value = self.header evaljs('paged_display.header_template = py_bridge.value') if self.footer: self.bridge_value = self.footer evaljs('paged_display.footer_template = py_bridge.value') if self.header or self.footer: evaljs('paged_display.create_header_footer("%s");' % self.hf_uuid) start_page = self.current_page_num mf = self.view.page().mainFrame() while True: if col in sections: self.current_section = sections[col][0] elif col - 1 in sections: # Ensure we are using the last section on the previous page as # the section for this page, since this page has no sections self.current_section = sections[col - 1][-1] self.doc.init_page() if self.header or self.footer: evaljs('paged_display.update_header_footer(%d)' % self.current_page_num) self.painter.save() mf.render(self.painter) self.painter.restore() try: nsl = int(evaljs('paged_display.next_screen_location()')) except (TypeError, ValueError): break self.doc.end_page() if nsl <= 0: break evaljs( 'window.scrollTo(%d, 0); paged_display.position_header_footer();' % nsl) if self.doc.errors_occurred: break col += 1 if not self.doc.errors_occurred: self.doc.add_links(self.current_item, start_page, amap['links'], amap['anchors'])