Beispiel #1
0
class StatsCollector(object):

    def __init__(self, container, do_embed=False):
        self.container = container
        self.log = self.logger = container.log
        self.do_embed = do_embed
        must_use_qt()
        self.parser = CSSParser(loglevel=logging.CRITICAL, log=logging.getLogger('calibre.css'))

        self.loop = QEventLoop()
        self.view = QWebView()
        self.page = Page(self.log)
        self.view.setPage(self.page)
        self.page.setViewportSize(QSize(1200, 1600))

        self.view.loadFinished.connect(self.collect,
                type=Qt.QueuedConnection)

        self.render_queue = list(container.spine_items)
        self.font_stats = {}
        self.font_usage_map = {}
        self.font_spec_map = {}
        self.font_rule_map = {}
        self.all_font_rules = {}

        QTimer.singleShot(0, self.render_book)

        if self.loop.exec_() == 1:
            raise Exception('Failed to gather statistics from book, see log for details')

    def render_book(self):
        try:
            if not self.render_queue:
                self.loop.exit()
            else:
                self.render_next()
        except:
            self.logger.exception('Rendering failed')
            self.loop.exit(1)

    def render_next(self):
        item = unicode(self.render_queue.pop(0))
        self.current_item = item
        load_html(item, self.view)

    def collect(self, ok):
        if not ok:
            self.log.error('Failed to render document: %s'%self.container.relpath(self.current_item))
            self.loop.exit(1)
            return
        try:
            self.page.load_js()
            self.collect_font_stats()
        except:
            self.log.exception('Failed to collect font stats from: %s'%self.container.relpath(self.current_item))
            self.loop.exit(1)
            return

        self.render_book()

    def href_to_name(self, href, warn_name):
        if not href.startswith('file://'):
            self.log.warn('Non-local URI in', warn_name, ':', href, 'ignoring')
            return None
        src = href[len('file://'):]
        if iswindows and len(src) > 2 and (src[0], src[2]) == ('/', ':'):
            src = src[1:]
        src = src.replace('/', os.sep)
        src = unquote(src)
        name = self.container.abspath_to_name(src)
        if not self.container.has_name(name):
            self.log.warn('Missing resource', href, 'in', warn_name,
                          'ignoring')
            return None
        return name

    def collect_font_stats(self):
        self.page.evaljs('window.font_stats.get_font_face_rules()')
        font_face_rules = self.page.bridge_value
        if not isinstance(font_face_rules, list):
            raise Exception('Unknown error occurred while reading font-face rules')

        # Weed out invalid font-face rules
        rules = []
        for rule in font_face_rules:
            ff = rule.get('font-family', None)
            if not ff:
                continue
            style = self.parser.parseStyle('font-family:%s'%ff, validate=False)
            ff = [x.value for x in
                  style.getProperty('font-family').propertyValue]
            if not ff or ff[0] == 'inherit':
                continue
            rule['font-family'] = frozenset(icu_lower(f) for f in ff)
            src = rule.get('src', None)
            if not src:
                continue
            if src.startswith('url(') and src.endswith(')') and src[4] not in {'"', "'"}:
                # Quote the url otherwise cssutils fails to parse it if it has
                # ' or " in it
                src = "url('" + src[4:-1].replace("'", "\\'") + "')"
            style = self.parser.parseStyle('background-image:%s'%src, validate=False)
            src = style.getProperty('background-image').propertyValue[0].uri
            name = self.href_to_name(src, '@font-face rule')
            if name is None:
                continue
            rule['src'] = name
            normalize_font_properties(rule)
            rule['width'] = widths[rule['font-stretch']]
            rule['weight'] = int(rule['font-weight'])
            rules.append(rule)

        if not rules and not self.do_embed:
            return

        self.font_rule_map[self.container.abspath_to_name(self.current_item)] = rules
        for rule in rules:
            self.all_font_rules[rule['src']] = rule

        for rule in rules:
            if rule['src'] not in self.font_stats:
                self.font_stats[rule['src']] = set()

        self.page.evaljs('window.font_stats.get_font_usage()')
        font_usage = self.page.bridge_value
        if not isinstance(font_usage, list):
            raise Exception('Unknown error occurred while reading font usage')
        exclude = {'\n', '\r', '\t'}
        self.font_usage_map[self.container.abspath_to_name(self.current_item)] = fu = defaultdict(dict)
        bad_fonts = {'serif', 'sans-serif', 'monospace', 'cursive', 'fantasy', 'sansserif', 'inherit'}
        for font in font_usage:
            text = set()
            for t in font['text']:
                text |= frozenset(t)
            text.difference_update(exclude)
            if not text:
                continue
            normalize_font_properties(font)
            for rule in get_matching_rules(rules, font):
                self.font_stats[rule['src']] |= text
            if self.do_embed:
                ff = [icu_lower(x) for x in font.get('font-family', [])]
                if ff and ff[0] not in bad_fonts:
                    keys = {'font-weight', 'font-style', 'font-stretch', 'font-family'}
                    key = frozenset(((k, ff[0] if k == 'font-family' else v) for k, v in font.iteritems() if k in keys))
                    val = fu[key]
                    if not val:
                        val.update({k:(font[k][0] if k == 'font-family' else font[k]) for k in keys})
                        val['text'] = set()
                    val['text'] |= text
        self.font_usage_map[self.container.abspath_to_name(self.current_item)] = dict(fu)

        if self.do_embed:
            self.page.evaljs('window.font_stats.get_font_families()')
            font_families = self.page.bridge_value
            if not isinstance(font_families, dict):
                raise Exception('Unknown error occurred while reading font families')
            self.font_spec_map[self.container.abspath_to_name(self.current_item)] = fs = set()
            for raw in font_families.iterkeys():
                style = self.parser.parseStyle('font-family:' + raw, validate=False).getProperty('font-family')
                for x in style.propertyValue:
                    x = x.value
                    if x and x.lower() not in bad_fonts:
                        fs.add(x)
Beispiel #2
0
class PDFWriter(QObject):
    def _pass_json_value_getter(self):
        val = json.dumps(self.bridge_value)
        return QString(val)

    def _pass_json_value_setter(self, value):
        self.bridge_value = json.loads(unicode(value))

    _pass_json_value = pyqtProperty(QString,
                                    fget=_pass_json_value_getter,
                                    fset=_pass_json_value_setter)

    @pyqtSlot(result=unicode)
    def title(self):
        return self.doc_title

    @pyqtSlot(result=unicode)
    def author(self):
        return self.doc_author

    @pyqtSlot(result=unicode)
    def section(self):
        return self.current_section

    def __init__(self, opts, log, cover_data=None, toc=None):
        from calibre.gui2 import is_ok_to_use_qt
        if not is_ok_to_use_qt():
            raise Exception('Not OK to use Qt')
        QObject.__init__(self)

        self.logger = self.log = log
        self.opts = opts
        self.cover_data = cover_data
        self.paged_js = None
        self.toc = toc

        self.loop = QEventLoop()
        self.view = QWebView()
        self.page = Page(opts, self.log)
        self.view.setPage(self.page)
        self.view.setRenderHints(QPainter.Antialiasing
                                 | QPainter.TextAntialiasing
                                 | QPainter.SmoothPixmapTransform)
        self.view.loadFinished.connect(self.render_html,
                                       type=Qt.QueuedConnection)
        for x in (Qt.Horizontal, Qt.Vertical):
            self.view.page().mainFrame().setScrollBarPolicy(
                x, Qt.ScrollBarAlwaysOff)
        self.report_progress = lambda x, y: x
        self.current_section = ''

    def dump(self, items, out_stream, pdf_metadata):
        opts = self.opts
        page_size = get_page_size(self.opts)
        xdpi, ydpi = self.view.logicalDpiX(), self.view.logicalDpiY()
        # We cannot set the side margins in the webview as there is no right
        # margin for the last page (the margins are implemented with
        # -webkit-column-gap)
        ml, mr = opts.margin_left, opts.margin_right
        self.doc = PdfDevice(out_stream,
                             page_size=page_size,
                             left_margin=ml,
                             top_margin=0,
                             right_margin=mr,
                             bottom_margin=0,
                             xdpi=xdpi,
                             ydpi=ydpi,
                             errors=self.log.error,
                             debug=self.log.debug,
                             compress=not opts.uncompressed_pdf,
                             mark_links=opts.pdf_mark_links)
        self.footer = opts.pdf_footer_template
        if self.footer:
            self.footer = self.footer.strip()
        if not self.footer and opts.pdf_page_numbers:
            self.footer = '<p style="text-align:center; text-indent: 0">_PAGENUM_</p>'
        self.header = opts.pdf_header_template
        if self.header:
            self.header = self.header.strip()
        min_margin = 36
        if self.footer and opts.margin_bottom < min_margin:
            self.log.warn(
                'Bottom margin is too small for footer, increasing it.')
            opts.margin_bottom = min_margin
        if self.header and opts.margin_top < min_margin:
            self.log.warn('Top margin is too small for header, increasing it.')
            opts.margin_top = min_margin

        self.page.setViewportSize(QSize(self.doc.width(), self.doc.height()))
        self.render_queue = items
        self.total_items = len(items)

        mt, mb = map(self.doc.to_px, (opts.margin_top, opts.margin_bottom))
        self.margin_top, self.margin_bottom = map(lambda x: int(floor(x)),
                                                  (mt, mb))

        self.painter = QPainter(self.doc)
        self.doc.set_metadata(title=pdf_metadata.title,
                              author=pdf_metadata.author,
                              tags=pdf_metadata.tags)
        self.doc_title = pdf_metadata.title
        self.doc_author = pdf_metadata.author
        self.painter.save()
        try:
            if self.cover_data is not None:
                p = QPixmap()
                p.loadFromData(self.cover_data)
                if not p.isNull():
                    self.doc.init_page()
                    draw_image_page(QRect(*self.doc.full_page_rect),
                                    self.painter,
                                    p,
                                    preserve_aspect_ratio=self.opts.
                                    preserve_cover_aspect_ratio)
                    self.doc.end_page()
        finally:
            self.painter.restore()

        QTimer.singleShot(0, self.render_book)
        if self.loop.exec_() == 1:
            raise Exception('PDF Output failed, see log for details')

        if self.toc is not None and len(self.toc) > 0:
            self.doc.add_outline(self.toc)

        self.painter.end()

        if self.doc.errors_occurred:
            raise Exception('PDF Output failed, see log for details')

    def render_book(self):
        if self.doc.errors_occurred:
            return self.loop.exit(1)
        try:
            if not self.render_queue:
                self.loop.exit()
            else:
                self.render_next()
        except:
            self.logger.exception('Rendering failed')
            self.loop.exit(1)

    def render_next(self):
        item = unicode(self.render_queue.pop(0))

        self.logger.debug('Processing %s...' % item)
        self.current_item = item
        load_html(item, self.view)

    def render_html(self, ok):
        if ok:
            try:
                self.do_paged_render()
            except:
                self.log.exception('Rendering failed')
                self.loop.exit(1)
                return
        else:
            # The document is so corrupt that we can't render the page.
            self.logger.error('Document cannot be rendered.')
            self.loop.exit(1)
            return
        done = self.total_items - len(self.render_queue)
        self.report_progress(
            done / self.total_items,
            _('Rendered %s' % os.path.basename(self.current_item)))
        self.render_book()

    @property
    def current_page_num(self):
        return self.doc.current_page_num

    def load_mathjax(self):
        evaljs = self.view.page().mainFrame().evaluateJavaScript
        mjpath = P(u'viewer/mathjax').replace(os.sep, '/')
        if iswindows:
            mjpath = u'/' + mjpath
        if evaljs('''
                    window.mathjax.base = %s;
                    mathjax.check_for_math(); mathjax.math_present
                    ''' % (json.dumps(mjpath, ensure_ascii=False))).toBool():
            self.log.debug('Math present, loading MathJax')
            while not evaljs('mathjax.math_loaded').toBool():
                self.loop.processEvents(self.loop.ExcludeUserInputEvents)
            evaljs(
                'document.getElementById("MathJax_Message").style.display="none";'
            )

    def get_sections(self, anchor_map):
        sections = {}
        ci = os.path.abspath(os.path.normcase(self.current_item))
        if self.toc is not None:
            for toc in self.toc.flat():
                path = toc.abspath or None
                frag = toc.fragment or None
                if path is None:
                    continue
                path = os.path.abspath(os.path.normcase(path))
                if path == ci:
                    col = 0
                    if frag and frag in anchor_map:
                        col = anchor_map[frag]['column']
                    if col not in sections:
                        sections[col] = toc.text or _('Untitled')

        return sections

    def do_paged_render(self):
        if self.paged_js is None:
            import uuid
            from calibre.utils.resources import compiled_coffeescript as cc
            self.paged_js = cc('ebooks.oeb.display.utils')
            self.paged_js += cc('ebooks.oeb.display.indexing')
            self.paged_js += cc('ebooks.oeb.display.paged')
            self.paged_js += cc('ebooks.oeb.display.mathjax')
            self.hf_uuid = str(uuid.uuid4()).replace('-', '')

        self.view.page().mainFrame().addToJavaScriptWindowObject(
            "py_bridge", self)
        self.view.page().longjs_counter = 0
        evaljs = self.view.page().mainFrame().evaluateJavaScript
        evaljs(self.paged_js)
        self.load_mathjax()

        evaljs('''
        py_bridge.__defineGetter__('value', function() {
            return JSON.parse(this._pass_json_value);
        });
        py_bridge.__defineSetter__('value', function(val) {
            this._pass_json_value = JSON.stringify(val);
        });

        document.body.style.backgroundColor = "white";
        paged_display.set_geometry(1, %d, %d, %d);
        paged_display.layout();
        paged_display.fit_images();
        py_bridge.value = book_indexing.all_links_and_anchors();
        ''' % (self.margin_top, 0, self.margin_bottom))

        amap = self.bridge_value
        if not isinstance(amap, dict):
            amap = {
                'links': [],
                'anchors': {}
            }  # Some javascript error occurred
        sections = self.get_sections(amap['anchors'])
        col = 0

        if self.header:
            self.bridge_value = self.header
            evaljs('paged_display.header_template = py_bridge.value')
        if self.footer:
            self.bridge_value = self.footer
            evaljs('paged_display.footer_template = py_bridge.value')
        if self.header or self.footer:
            evaljs('paged_display.create_header_footer("%s");' % self.hf_uuid)

        start_page = self.current_page_num

        mf = self.view.page().mainFrame()
        while True:
            if col in sections:
                self.current_section = sections[col]
            self.doc.init_page()
            if self.header or self.footer:
                evaljs('paged_display.update_header_footer(%d)' %
                       self.current_page_num)
            self.painter.save()
            mf.render(self.painter)
            self.painter.restore()
            nsl = evaljs('paged_display.next_screen_location()').toInt()
            self.doc.end_page()
            if not nsl[1] or nsl[0] <= 0:
                break
            evaljs(
                'window.scrollTo(%d, 0); paged_display.position_header_footer();'
                % nsl[0])
            if self.doc.errors_occurred:
                break
            col += 1

        if not self.doc.errors_occurred:
            self.doc.add_links(self.current_item, start_page, amap['links'],
                               amap['anchors'])
Beispiel #3
0
class PDFWriter(QObject):  # {{{
    def __init__(self, opts, log, cover_data=None, toc=None):
        from calibre.gui2 import is_ok_to_use_qt
        from calibre.utils.podofo import get_podofo

        if not is_ok_to_use_qt():
            raise Exception("Not OK to use Qt")
        QObject.__init__(self)

        self.logger = self.log = log
        self.podofo = get_podofo()
        self.doc = self.podofo.PDFDoc()

        self.loop = QEventLoop()
        self.view = QWebView()
        self.page = Page(opts, self.log)
        self.view.setPage(self.page)
        self.view.setRenderHints(QPainter.Antialiasing | QPainter.TextAntialiasing | QPainter.SmoothPixmapTransform)
        self.view.loadFinished.connect(self._render_html, type=Qt.QueuedConnection)
        for x in (Qt.Horizontal, Qt.Vertical):
            self.view.page().mainFrame().setScrollBarPolicy(x, Qt.ScrollBarAlwaysOff)
        self.render_queue = []
        self.combine_queue = []
        self.tmp_path = PersistentTemporaryDirectory(u"_pdf_output_parts")

        self.opts = opts
        self.cover_data = cover_data
        self.paged_js = None
        self.toc = toc

    def dump(self, items, out_stream, pdf_metadata):
        self.metadata = pdf_metadata
        self._delete_tmpdir()
        self.outline = Outline(self.toc, items)

        self.render_queue = items
        self.combine_queue = []
        self.out_stream = out_stream
        self.insert_cover()

        self.render_succeeded = False
        self.current_page_num = self.doc.page_count()
        self.combine_queue.append(os.path.join(self.tmp_path, "qprinter_out.pdf"))
        self.first_page = True
        self.setup_printer(self.combine_queue[-1])
        QTimer.singleShot(0, self._render_book)
        self.loop.exec_()
        if self.painter is not None:
            self.painter.end()
        if self.printer is not None:
            self.printer.abort()

        if not self.render_succeeded:
            raise Exception("Rendering HTML to PDF failed")

    def _render_book(self):
        try:
            if len(self.render_queue) == 0:
                self._write()
            else:
                self._render_next()
        except:
            self.logger.exception("Rendering failed")
            self.loop.exit(1)

    def _render_next(self):
        item = unicode(self.render_queue.pop(0))

        self.logger.debug("Processing %s..." % item)
        self.current_item = item
        load_html(item, self.view)

    def _render_html(self, ok):
        if ok:
            self.do_paged_render()
        else:
            # The document is so corrupt that we can't render the page.
            self.logger.error("Document cannot be rendered.")
            self.loop.exit(0)
            return
        self._render_book()

    def _pass_json_value_getter(self):
        val = json.dumps(self.bridge_value)
        return QString(val)

    def _pass_json_value_setter(self, value):
        self.bridge_value = json.loads(unicode(value))

    _pass_json_value = pyqtProperty(QString, fget=_pass_json_value_getter, fset=_pass_json_value_setter)

    def setup_printer(self, outpath):
        self.printer = self.painter = None
        printer = get_pdf_printer(self.opts, output_file_name=outpath)
        painter = QPainter(printer)
        zoomx = printer.logicalDpiX() / self.view.logicalDpiX()
        zoomy = printer.logicalDpiY() / self.view.logicalDpiY()
        painter.scale(zoomx, zoomy)
        pr = printer.pageRect()
        self.printer, self.painter = printer, painter
        self.viewport_size = QSize(pr.width() / zoomx, pr.height() / zoomy)
        self.page.setViewportSize(self.viewport_size)

    def do_paged_render(self):
        if self.paged_js is None:
            from calibre.utils.resources import compiled_coffeescript

            self.paged_js = compiled_coffeescript("ebooks.oeb.display.utils")
            self.paged_js += compiled_coffeescript("ebooks.oeb.display.indexing")
            self.paged_js += compiled_coffeescript("ebooks.oeb.display.paged")

        self.view.page().mainFrame().addToJavaScriptWindowObject("py_bridge", self)
        evaljs = self.view.page().mainFrame().evaluateJavaScript
        evaljs(self.paged_js)
        evaljs(
            """
        py_bridge.__defineGetter__('value', function() {
            return JSON.parse(this._pass_json_value);
        });
        py_bridge.__defineSetter__('value', function(val) {
            this._pass_json_value = JSON.stringify(val);
        });

        document.body.style.backgroundColor = "white";
        paged_display.set_geometry(1, 0, 0, 0);
        paged_display.layout();
        paged_display.fit_images();
        """
        )
        mf = self.view.page().mainFrame()
        start_page = self.current_page_num
        if not self.first_page:
            start_page += 1
        while True:
            if not self.first_page:
                if self.printer.newPage():
                    self.current_page_num += 1
            self.first_page = False
            mf.render(self.painter)
            nsl = evaljs("paged_display.next_screen_location()").toInt()
            if not nsl[1] or nsl[0] <= 0:
                break
            evaljs("window.scrollTo(%d, 0)" % nsl[0])

        self.bridge_value = tuple(self.outline.anchor_map[self.current_item])
        evaljs("py_bridge.value = book_indexing.anchor_positions(py_bridge.value)")
        amap = self.bridge_value
        if not isinstance(amap, dict):
            amap = {}  # Some javascript error occurred
        self.outline.set_pos(self.current_item, None, start_page, 0)
        for anchor, x in amap.iteritems():
            pagenum, ypos = x
            self.outline.set_pos(self.current_item, anchor, start_page + pagenum, ypos)

    def append_doc(self, outpath):
        doc = self.podofo.PDFDoc()
        with open(outpath, "rb") as f:
            raw = f.read()
        doc.load(raw)
        self.doc.append(doc)

    def _delete_tmpdir(self):
        if os.path.exists(self.tmp_path):
            shutil.rmtree(self.tmp_path, True)
            self.tmp_path = PersistentTemporaryDirectory("_pdf_output_parts")

    def insert_cover(self):
        if not isinstance(self.cover_data, bytes):
            return
        item_path = os.path.join(self.tmp_path, "cover.pdf")
        printer = get_pdf_printer(self.opts, output_file_name=item_path, for_comic=True)
        self.combine_queue.insert(0, item_path)
        p = QPixmap()
        p.loadFromData(self.cover_data)
        if not p.isNull():
            painter = QPainter(printer)
            draw_image_page(printer, painter, p, preserve_aspect_ratio=self.opts.preserve_cover_aspect_ratio)
            painter.end()
            self.append_doc(item_path)
        printer.abort()

    def _write(self):
        self.painter.end()
        self.printer.abort()
        self.painter = self.printer = None
        self.append_doc(self.combine_queue[-1])

        try:
            self.doc.creator = u"%s %s [http://calibre-ebook.com]" % (__appname__, __version__)
            self.doc.title = self.metadata.title
            self.doc.author = self.metadata.author
            if self.metadata.tags:
                self.doc.keywords = self.metadata.tags
            self.outline(self.doc)
            self.doc.save_to_fileobj(self.out_stream)
            self.render_succeeded = True
        finally:
            self._delete_tmpdir()
            self.loop.exit(0)
Beispiel #4
0
class PDFWriter(QObject):

    def _pass_json_value_getter(self):
        val = json.dumps(self.bridge_value)
        return QString(val)

    def _pass_json_value_setter(self, value):
        self.bridge_value = json.loads(unicode(value))

    _pass_json_value = pyqtProperty(QString, fget=_pass_json_value_getter,
            fset=_pass_json_value_setter)

    @pyqtSlot(result=unicode)
    def title(self):
        return self.doc_title

    @pyqtSlot(result=unicode)
    def author(self):
        return self.doc_author

    @pyqtSlot(result=unicode)
    def section(self):
        return self.current_section

    def __init__(self, opts, log, cover_data=None, toc=None):
        from calibre.gui2 import is_ok_to_use_qt
        if not is_ok_to_use_qt():
            raise Exception('Not OK to use Qt')
        QObject.__init__(self)

        self.logger = self.log = log
        self.opts = opts
        self.cover_data = cover_data
        self.paged_js = None
        self.toc = toc

        self.loop = QEventLoop()
        self.view = QWebView()
        self.page = Page(opts, self.log)
        self.view.setPage(self.page)
        self.view.setRenderHints(QPainter.Antialiasing|
                    QPainter.TextAntialiasing|QPainter.SmoothPixmapTransform)
        self.view.loadFinished.connect(self.render_html,
                type=Qt.QueuedConnection)
        for x in (Qt.Horizontal, Qt.Vertical):
            self.view.page().mainFrame().setScrollBarPolicy(x,
                    Qt.ScrollBarAlwaysOff)
        self.report_progress = lambda x, y: x
        self.current_section = ''

    def dump(self, items, out_stream, pdf_metadata):
        opts = self.opts
        page_size = get_page_size(self.opts)
        xdpi, ydpi = self.view.logicalDpiX(), self.view.logicalDpiY()
        # We cannot set the side margins in the webview as there is no right
        # margin for the last page (the margins are implemented with
        # -webkit-column-gap)
        ml, mr = opts.margin_left, opts.margin_right
        self.doc = PdfDevice(out_stream, page_size=page_size, left_margin=ml,
                             top_margin=0, right_margin=mr, bottom_margin=0,
                             xdpi=xdpi, ydpi=ydpi, errors=self.log.error,
                             debug=self.log.debug, compress=not
                             opts.uncompressed_pdf,
                             mark_links=opts.pdf_mark_links)
        self.footer = opts.pdf_footer_template
        if self.footer:
            self.footer = self.footer.strip()
        if not self.footer and opts.pdf_page_numbers:
            self.footer = '<p style="text-align:center; text-indent: 0">_PAGENUM_</p>'
        self.header = opts.pdf_header_template
        if self.header:
            self.header = self.header.strip()
        min_margin = 36
        if self.footer and opts.margin_bottom < min_margin:
            self.log.warn('Bottom margin is too small for footer, increasing it.')
            opts.margin_bottom = min_margin
        if self.header and opts.margin_top < min_margin:
            self.log.warn('Top margin is too small for header, increasing it.')
            opts.margin_top = min_margin

        self.page.setViewportSize(QSize(self.doc.width(), self.doc.height()))
        self.render_queue = items
        self.total_items = len(items)

        mt, mb = map(self.doc.to_px, (opts.margin_top, opts.margin_bottom))
        self.margin_top, self.margin_bottom = map(lambda x:int(floor(x)), (mt, mb))

        self.painter = QPainter(self.doc)
        self.doc.set_metadata(title=pdf_metadata.title,
                              author=pdf_metadata.author,
                              tags=pdf_metadata.tags)
        self.doc_title = pdf_metadata.title
        self.doc_author = pdf_metadata.author
        self.painter.save()
        try:
            if self.cover_data is not None:
                p = QPixmap()
                p.loadFromData(self.cover_data)
                if not p.isNull():
                    self.doc.init_page()
                    draw_image_page(QRect(*self.doc.full_page_rect),
                            self.painter, p,
                            preserve_aspect_ratio=self.opts.preserve_cover_aspect_ratio)
                    self.doc.end_page()
        finally:
            self.painter.restore()

        QTimer.singleShot(0, self.render_book)
        if self.loop.exec_() == 1:
            raise Exception('PDF Output failed, see log for details')

        if self.toc is not None and len(self.toc) > 0:
            self.doc.add_outline(self.toc)

        self.painter.end()

        if self.doc.errors_occurred:
            raise Exception('PDF Output failed, see log for details')

    def render_book(self):
        if self.doc.errors_occurred:
            return self.loop.exit(1)
        try:
            if not self.render_queue:
                self.loop.exit()
            else:
                self.render_next()
        except:
            self.logger.exception('Rendering failed')
            self.loop.exit(1)

    def render_next(self):
        item = unicode(self.render_queue.pop(0))

        self.logger.debug('Processing %s...' % item)
        self.current_item = item
        load_html(item, self.view)

    def render_html(self, ok):
        if ok:
            try:
                self.do_paged_render()
            except:
                self.log.exception('Rendering failed')
                self.loop.exit(1)
                return
        else:
            # The document is so corrupt that we can't render the page.
            self.logger.error('Document cannot be rendered.')
            self.loop.exit(1)
            return
        done = self.total_items - len(self.render_queue)
        self.report_progress(done/self.total_items,
                        _('Rendered %s'%os.path.basename(self.current_item)))
        self.render_book()

    @property
    def current_page_num(self):
        return self.doc.current_page_num

    def load_mathjax(self):
        evaljs = self.view.page().mainFrame().evaluateJavaScript
        mjpath = P(u'viewer/mathjax').replace(os.sep, '/')
        if iswindows:
            mjpath = u'/' + mjpath
        if evaljs('''
                    window.mathjax.base = %s;
                    mathjax.check_for_math(); mathjax.math_present
                    '''%(json.dumps(mjpath, ensure_ascii=False))).toBool():
            self.log.debug('Math present, loading MathJax')
            while not evaljs('mathjax.math_loaded').toBool():
                self.loop.processEvents(self.loop.ExcludeUserInputEvents)
            evaljs('document.getElementById("MathJax_Message").style.display="none";')

    def get_sections(self, anchor_map):
        sections = {}
        ci = os.path.abspath(os.path.normcase(self.current_item))
        if self.toc is not None:
            for toc in self.toc.flat():
                path = toc.abspath or None
                frag = toc.fragment or None
                if path is None:
                    continue
                path = os.path.abspath(os.path.normcase(path))
                if path == ci:
                    col = 0
                    if frag and frag in anchor_map:
                        col = anchor_map[frag]['column']
                    if col not in sections:
                        sections[col] = toc.text or _('Untitled')

        return sections

    def do_paged_render(self):
        if self.paged_js is None:
            import uuid
            from calibre.utils.resources import compiled_coffeescript as cc
            self.paged_js =  cc('ebooks.oeb.display.utils')
            self.paged_js += cc('ebooks.oeb.display.indexing')
            self.paged_js += cc('ebooks.oeb.display.paged')
            self.paged_js += cc('ebooks.oeb.display.mathjax')
            self.hf_uuid = str(uuid.uuid4()).replace('-', '')

        self.view.page().mainFrame().addToJavaScriptWindowObject("py_bridge", self)
        self.view.page().longjs_counter = 0
        evaljs = self.view.page().mainFrame().evaluateJavaScript
        evaljs(self.paged_js)
        self.load_mathjax()

        evaljs('''
        py_bridge.__defineGetter__('value', function() {
            return JSON.parse(this._pass_json_value);
        });
        py_bridge.__defineSetter__('value', function(val) {
            this._pass_json_value = JSON.stringify(val);
        });

        document.body.style.backgroundColor = "white";
        paged_display.set_geometry(1, %d, %d, %d);
        paged_display.layout();
        paged_display.fit_images();
        py_bridge.value = book_indexing.all_links_and_anchors();
        '''%(self.margin_top, 0, self.margin_bottom))

        amap = self.bridge_value
        if not isinstance(amap, dict):
            amap = {'links':[], 'anchors':{}} # Some javascript error occurred
        sections = self.get_sections(amap['anchors'])
        col = 0

        if self.header:
            self.bridge_value = self.header
            evaljs('paged_display.header_template = py_bridge.value')
        if self.footer:
            self.bridge_value = self.footer
            evaljs('paged_display.footer_template = py_bridge.value')
        if self.header or self.footer:
            evaljs('paged_display.create_header_footer("%s");'%self.hf_uuid)

        start_page = self.current_page_num

        mf = self.view.page().mainFrame()
        while True:
            if col in sections:
                self.current_section = sections[col]
            self.doc.init_page()
            if self.header or self.footer:
                evaljs('paged_display.update_header_footer(%d)'%self.current_page_num)
            self.painter.save()
            mf.render(self.painter)
            self.painter.restore()
            nsl = evaljs('paged_display.next_screen_location()').toInt()
            self.doc.end_page()
            if not nsl[1] or nsl[0] <= 0:
                break
            evaljs('window.scrollTo(%d, 0); paged_display.position_header_footer();'%nsl[0])
            if self.doc.errors_occurred:
                break
            col += 1

        if not self.doc.errors_occurred:
            self.doc.add_links(self.current_item, start_page, amap['links'],
                            amap['anchors'])
Beispiel #5
0
class StatsCollector(object):
    def __init__(self, container, do_embed=False):
        self.container = container
        self.log = self.logger = container.log
        self.do_embed = do_embed
        must_use_qt()
        self.parser = CSSParser(loglevel=logging.CRITICAL, log=logging.getLogger("calibre.css"))

        self.loop = QEventLoop()
        self.view = QWebView()
        self.page = Page(self.log)
        self.view.setPage(self.page)
        self.page.setViewportSize(QSize(1200, 1600))

        self.view.loadFinished.connect(self.collect, type=Qt.QueuedConnection)

        self.render_queue = list(container.spine_items)
        self.font_stats = {}
        self.font_usage_map = {}
        self.font_spec_map = {}
        self.font_rule_map = {}
        self.all_font_rules = {}

        QTimer.singleShot(0, self.render_book)

        if self.loop.exec_() == 1:
            raise Exception("Failed to gather statistics from book, see log for details")

    def render_book(self):
        try:
            if not self.render_queue:
                self.loop.exit()
            else:
                self.render_next()
        except:
            self.logger.exception("Rendering failed")
            self.loop.exit(1)

    def render_next(self):
        item = unicode(self.render_queue.pop(0))
        self.current_item = item
        load_html(item, self.view)

    def collect(self, ok):
        if not ok:
            self.log.error("Failed to render document: %s" % self.container.relpath(self.current_item))
            self.loop.exit(1)
            return
        try:
            self.page.load_js()
            self.collect_font_stats()
        except:
            self.log.exception("Failed to collect font stats from: %s" % self.container.relpath(self.current_item))
            self.loop.exit(1)
            return

        self.render_book()

    def href_to_name(self, href, warn_name):
        if not href.startswith("file://"):
            self.log.warn("Non-local URI in", warn_name, ":", href, "ignoring")
            return None
        src = href[len("file://") :]
        if iswindows and len(src) > 2 and (src[0], src[2]) == ("/", ":"):
            src = src[1:]
        src = src.replace("/", os.sep)
        src = unquote(src)
        name = self.container.abspath_to_name(src)
        if not self.container.has_name(name):
            self.log.warn("Missing resource", href, "in", warn_name, "ignoring")
            return None
        return name

    def collect_font_stats(self):
        self.page.evaljs("window.font_stats.get_font_face_rules()")
        font_face_rules = self.page.bridge_value
        if not isinstance(font_face_rules, list):
            raise Exception("Unknown error occurred while reading font-face rules")

        # Weed out invalid font-face rules
        rules = []
        for rule in font_face_rules:
            ff = rule.get("font-family", None)
            if not ff:
                continue
            style = self.parser.parseStyle("font-family:%s" % ff, validate=False)
            ff = [x.value for x in style.getProperty("font-family").propertyValue]
            if not ff or ff[0] == "inherit":
                continue
            rule["font-family"] = frozenset(icu_lower(f) for f in ff)
            src = rule.get("src", None)
            if not src:
                continue
            style = self.parser.parseStyle("background-image:%s" % src, validate=False)
            src = style.getProperty("background-image").propertyValue[0].uri
            name = self.href_to_name(src, "@font-face rule")
            if name is None:
                continue
            rule["src"] = name
            normalize_font_properties(rule)
            rule["width"] = widths[rule["font-stretch"]]
            rule["weight"] = int(rule["font-weight"])
            rules.append(rule)

        if not rules and not self.do_embed:
            return

        self.font_rule_map[self.container.abspath_to_name(self.current_item)] = rules
        for rule in rules:
            self.all_font_rules[rule["src"]] = rule

        for rule in rules:
            if rule["src"] not in self.font_stats:
                self.font_stats[rule["src"]] = set()

        self.page.evaljs("window.font_stats.get_font_usage()")
        font_usage = self.page.bridge_value
        if not isinstance(font_usage, list):
            raise Exception("Unknown error occurred while reading font usage")
        exclude = {"\n", "\r", "\t"}
        self.font_usage_map[self.container.abspath_to_name(self.current_item)] = fu = defaultdict(dict)
        bad_fonts = {"serif", "sans-serif", "monospace", "cursive", "fantasy", "sansserif", "inherit"}
        for font in font_usage:
            text = set()
            for t in font["text"]:
                text |= frozenset(t)
            text.difference_update(exclude)
            if not text:
                continue
            normalize_font_properties(font)
            for rule in get_matching_rules(rules, font):
                self.font_stats[rule["src"]] |= text
            if self.do_embed:
                ff = [icu_lower(x) for x in font.get("font-family", [])]
                if ff and ff[0] not in bad_fonts:
                    keys = {"font-weight", "font-style", "font-stretch", "font-family"}
                    key = frozenset(((k, ff[0] if k == "font-family" else v) for k, v in font.iteritems() if k in keys))
                    val = fu[key]
                    if not val:
                        val.update({k: (font[k][0] if k == "font-family" else font[k]) for k in keys})
                        val["text"] = set()
                    val["text"] |= text
        self.font_usage_map[self.container.abspath_to_name(self.current_item)] = dict(fu)

        if self.do_embed:
            self.page.evaljs("window.font_stats.get_font_families()")
            font_families = self.page.bridge_value
            if not isinstance(font_families, dict):
                raise Exception("Unknown error occurred while reading font families")
            self.font_spec_map[self.container.abspath_to_name(self.current_item)] = fs = set()
            for raw in font_families.iterkeys():
                style = self.parser.parseStyle("font-family:" + raw, validate=False).getProperty("font-family")
                for x in style.propertyValue:
                    x = x.value
                    if x and x.lower() not in bad_fonts:
                        fs.add(x)
Beispiel #6
0
class PDFWriter(QObject):  # {{{
    def __init__(self, opts, log, cover_data=None, toc=None):
        from calibre.gui2 import is_ok_to_use_qt
        from calibre.utils.podofo import get_podofo
        if not is_ok_to_use_qt():
            raise Exception('Not OK to use Qt')
        QObject.__init__(self)

        self.logger = self.log = log
        self.podofo = get_podofo()
        self.doc = self.podofo.PDFDoc()

        self.loop = QEventLoop()
        self.view = QWebView()
        self.page = Page(opts, self.log)
        self.view.setPage(self.page)
        self.view.setRenderHints(QPainter.Antialiasing
                                 | QPainter.TextAntialiasing
                                 | QPainter.SmoothPixmapTransform)
        self.view.loadFinished.connect(self._render_html,
                                       type=Qt.QueuedConnection)
        for x in (Qt.Horizontal, Qt.Vertical):
            self.view.page().mainFrame().setScrollBarPolicy(
                x, Qt.ScrollBarAlwaysOff)
        self.render_queue = []
        self.combine_queue = []
        self.tmp_path = PersistentTemporaryDirectory(u'_pdf_output_parts')

        self.opts = opts
        self.cover_data = cover_data
        self.paged_js = None
        self.toc = toc

    def dump(self, items, out_stream, pdf_metadata):
        self.metadata = pdf_metadata
        self._delete_tmpdir()
        self.outline = Outline(self.toc, items)

        self.render_queue = items
        self.combine_queue = []
        self.out_stream = out_stream
        self.insert_cover()

        self.render_succeeded = False
        self.current_page_num = self.doc.page_count()
        self.combine_queue.append(
            os.path.join(self.tmp_path, 'qprinter_out.pdf'))
        self.first_page = True
        self.setup_printer(self.combine_queue[-1])
        QTimer.singleShot(0, self._render_book)
        self.loop.exec_()
        if self.painter is not None:
            self.painter.end()
        if self.printer is not None:
            self.printer.abort()

        if not self.render_succeeded:
            raise Exception('Rendering HTML to PDF failed')

    def _render_book(self):
        try:
            if len(self.render_queue) == 0:
                self._write()
            else:
                self._render_next()
        except:
            self.logger.exception('Rendering failed')
            self.loop.exit(1)

    def _render_next(self):
        item = unicode(self.render_queue.pop(0))

        self.logger.debug('Processing %s...' % item)
        self.current_item = item
        load_html(item, self.view)

    def _render_html(self, ok):
        if ok:
            self.do_paged_render()
        else:
            # The document is so corrupt that we can't render the page.
            self.logger.error('Document cannot be rendered.')
            self.loop.exit(0)
            return
        self._render_book()

    def _pass_json_value_getter(self):
        val = json.dumps(self.bridge_value)
        return QString(val)

    def _pass_json_value_setter(self, value):
        self.bridge_value = json.loads(unicode(value))

    _pass_json_value = pyqtProperty(QString,
                                    fget=_pass_json_value_getter,
                                    fset=_pass_json_value_setter)

    def setup_printer(self, outpath):
        self.printer = self.painter = None
        printer = get_pdf_printer(self.opts, output_file_name=outpath)
        painter = QPainter(printer)
        zoomx = printer.logicalDpiX() / self.view.logicalDpiX()
        zoomy = printer.logicalDpiY() / self.view.logicalDpiY()
        painter.scale(zoomx, zoomy)
        pr = printer.pageRect()
        self.printer, self.painter = printer, painter
        self.viewport_size = QSize(pr.width() / zoomx, pr.height() / zoomy)
        self.page.setViewportSize(self.viewport_size)

    def do_paged_render(self):
        if self.paged_js is None:
            from calibre.utils.resources import compiled_coffeescript
            self.paged_js = compiled_coffeescript('ebooks.oeb.display.utils')
            self.paged_js += compiled_coffeescript(
                'ebooks.oeb.display.indexing')
            self.paged_js += compiled_coffeescript('ebooks.oeb.display.paged')

        self.view.page().mainFrame().addToJavaScriptWindowObject(
            "py_bridge", self)
        evaljs = self.view.page().mainFrame().evaluateJavaScript
        evaljs(self.paged_js)
        evaljs('''
        py_bridge.__defineGetter__('value', function() {
            return JSON.parse(this._pass_json_value);
        });
        py_bridge.__defineSetter__('value', function(val) {
            this._pass_json_value = JSON.stringify(val);
        });

        document.body.style.backgroundColor = "white";
        paged_display.set_geometry(1, 0, 0, 0);
        paged_display.layout();
        paged_display.fit_images();
        ''')
        mf = self.view.page().mainFrame()
        start_page = self.current_page_num
        if not self.first_page:
            start_page += 1
        while True:
            if not self.first_page:
                if self.printer.newPage():
                    self.current_page_num += 1
            self.first_page = False
            mf.render(self.painter)
            nsl = evaljs('paged_display.next_screen_location()').toInt()
            if not nsl[1] or nsl[0] <= 0: break
            evaljs('window.scrollTo(%d, 0)' % nsl[0])

        self.bridge_value = tuple(self.outline.anchor_map[self.current_item])
        evaljs(
            'py_bridge.value = book_indexing.anchor_positions(py_bridge.value)'
        )
        amap = self.bridge_value
        if not isinstance(amap, dict):
            amap = {}  # Some javascript error occurred
        self.outline.set_pos(self.current_item, None, start_page, 0)
        for anchor, x in amap.iteritems():
            pagenum, ypos = x
            self.outline.set_pos(self.current_item, anchor,
                                 start_page + pagenum, ypos)

    def append_doc(self, outpath):
        doc = self.podofo.PDFDoc()
        with open(outpath, 'rb') as f:
            raw = f.read()
        doc.load(raw)
        self.doc.append(doc)

    def _delete_tmpdir(self):
        if os.path.exists(self.tmp_path):
            shutil.rmtree(self.tmp_path, True)
            self.tmp_path = PersistentTemporaryDirectory('_pdf_output_parts')

    def insert_cover(self):
        if not isinstance(self.cover_data, bytes):
            return
        item_path = os.path.join(self.tmp_path, 'cover.pdf')
        printer = get_pdf_printer(self.opts,
                                  output_file_name=item_path,
                                  for_comic=True)
        self.combine_queue.insert(0, item_path)
        p = QPixmap()
        p.loadFromData(self.cover_data)
        if not p.isNull():
            painter = QPainter(printer)
            draw_image_page(
                printer,
                painter,
                p,
                preserve_aspect_ratio=self.opts.preserve_cover_aspect_ratio)
            painter.end()
            self.append_doc(item_path)
        printer.abort()

    def _write(self):
        self.painter.end()
        self.printer.abort()
        self.painter = self.printer = None
        self.append_doc(self.combine_queue[-1])

        try:
            self.doc.creator = u'%s %s [http://calibre-ebook.com]' % (
                __appname__, __version__)
            self.doc.title = self.metadata.title
            self.doc.author = self.metadata.author
            if self.metadata.tags:
                self.doc.keywords = self.metadata.tags
            self.outline(self.doc)
            self.doc.save_to_fileobj(self.out_stream)
            self.render_succeeded = True
        finally:
            self._delete_tmpdir()
            self.loop.exit(0)
Beispiel #7
0
class StatsCollector(object):
    def __init__(self, container):
        self.container = container
        self.log = self.logger = container.log
        must_use_qt()

        self.loop = QEventLoop()
        self.view = QWebView()
        self.page = Page(self.log)
        self.view.setPage(self.page)
        self.page.setViewportSize(QSize(1200, 1600))

        self.view.loadFinished.connect(self.collect, type=Qt.QueuedConnection)

        self.render_queue = list(container.spine_items)
        self.font_stats = {}

        QTimer.singleShot(0, self.render_book)

        if self.loop.exec_() == 1:
            raise Exception(
                'Failed to gather statistics from book, see log for details')

    def render_book(self):
        try:
            if not self.render_queue:
                self.loop.exit()
            else:
                self.render_next()
        except:
            self.logger.exception('Rendering failed')
            self.loop.exit(1)

    def render_next(self):
        item = unicode(self.render_queue.pop(0))
        self.current_item = item
        load_html(item, self.view)

    def collect(self, ok):
        if not ok:
            self.log.error('Failed to render document: %s' %
                           self.container.relpath(self.current_item))
            self.loop.exit(1)
            return
        try:
            self.page.load_js()
            self.collect_font_stats()
        except:
            self.log.exception('Failed to collect font stats from: %s' %
                               self.container.relpath(self.current_item))
            self.loop.exit(1)
            return

        self.render_book()

    def href_to_name(self, href, warn_name):
        if not href.startswith('file://'):
            self.log.warn('Non-local URI in', warn_name, ':', href, 'ignoring')
            return None
        src = href[len('file://'):]
        if iswindows and len(src) > 2 and (src[0], src[2]) == ('/', ':'):
            src = src[1:]
        src = src.replace('/', os.sep)
        src = unquote(src)
        name = self.container.abspath_to_name(src)
        if not self.container.has_name(name):
            self.log.warn('Missing resource', href, 'in', warn_name,
                          'ignoring')
            return None
        return name

    def collect_font_stats(self):
        self.page.evaljs('window.font_stats.get_font_face_rules()')
        font_face_rules = self.page.bridge_value
        if not isinstance(font_face_rules, list):
            raise Exception(
                'Unknown error occurred while reading font-face rules')

        # Weed out invalid font-face rules
        rules = []
        for rule in font_face_rules:
            ff = rule.get('font-family', None)
            if not ff: continue
            style = parseStyle('font-family:%s' % ff, validate=False)
            ff = [
                x.value for x in style.getProperty('font-family').propertyValue
            ]
            if not ff or ff[0] == 'inherit':
                continue
            rule['font-family'] = frozenset(icu_lower(f) for f in ff)
            src = rule.get('src', None)
            if not src: continue
            style = parseStyle('background-image:%s' % src, validate=False)
            src = style.getProperty('background-image').propertyValue[0].uri
            name = self.href_to_name(src, '@font-face rule')
            rule['src'] = name
            normalize_font_properties(rule)
            rule['width'] = widths[rule['font-stretch']]
            rule['weight'] = int(rule['font-weight'])
            rules.append(rule)

        if not rules:
            return

        for rule in rules:
            if rule['src'] not in self.font_stats:
                self.font_stats[rule['src']] = set()

        self.page.evaljs('window.font_stats.get_font_usage()')
        font_usage = self.page.bridge_value
        if not isinstance(font_usage, list):
            raise Exception('Unknown error occurred while reading font usage')
        exclude = {'\n', '\r', '\t'}
        for font in font_usage:
            text = set()
            for t in font['text']:
                text |= frozenset(t)
            text.difference_update(exclude)
            if not text: continue
            for rule in get_matching_rules(rules, font):
                self.font_stats[rule['src']] |= text
Beispiel #8
0
class PDFWriter(QObject): # {{{

    def __init__(self, opts, log, cover_data=None):
        from calibre.gui2 import is_ok_to_use_qt
        if not is_ok_to_use_qt():
            raise Exception('Not OK to use Qt')
        QObject.__init__(self)

        self.logger = log

        self.loop = QEventLoop()
        self.view = QWebView()
        self.view.setRenderHints(QPainter.Antialiasing|QPainter.TextAntialiasing|QPainter.SmoothPixmapTransform)
        self.view.loadFinished.connect(self._render_html,
                type=Qt.QueuedConnection)
        for x in (Qt.Horizontal, Qt.Vertical):
            self.view.page().mainFrame().setScrollBarPolicy(x,
                    Qt.ScrollBarAlwaysOff)
        self.render_queue = []
        self.combine_queue = []
        self.tmp_path = PersistentTemporaryDirectory(u'_pdf_output_parts')

        self.opts = opts
        self.cover_data = cover_data
        self.paged_js = None

    def dump(self, items, out_stream, pdf_metadata):
        self.metadata = pdf_metadata
        self._delete_tmpdir()

        self.render_queue = items
        self.combine_queue = []
        self.out_stream = out_stream

        QMetaObject.invokeMethod(self, "_render_book", Qt.QueuedConnection)
        self.loop.exec_()


    @QtCore.pyqtSignature('_render_book()')
    def _render_book(self):
        if len(self.render_queue) == 0:
            self._write()
        else:
            self._render_next()

    def _render_next(self):
        item = unicode(self.render_queue.pop(0))
        self.combine_queue.append(os.path.join(self.tmp_path, '%i.pdf' % (len(self.combine_queue) + 1)))

        self.logger.debug('Processing %s...' % item)
        load_html(item, self.view)

    def _render_html(self, ok):
        if ok:
            item_path = os.path.join(self.tmp_path, '%i.pdf' % len(self.combine_queue))
            self.logger.debug('\tRendering item %s as %i.pdf' % (os.path.basename(str(self.view.url().toLocalFile())), len(self.combine_queue)))
            self.do_paged_render(item_path)
        else:
            # The document is so corrupt that we can't render the page.
            self.loop.exit(0)
            raise Exception('Document cannot be rendered.')
        self._render_book()

    def do_paged_render(self, outpath):
        from PyQt4.Qt import QSize, QPainter
        if self.paged_js is None:
            from calibre.utils.resources import compiled_coffeescript
            self.paged_js = compiled_coffeescript('ebooks.oeb.display.utils')
            self.paged_js += compiled_coffeescript('ebooks.oeb.display.paged')
        printer = get_pdf_printer(self.opts, output_file_name=outpath)
        painter = QPainter(printer)
        zoomx = printer.logicalDpiX()/self.view.logicalDpiX()
        zoomy = printer.logicalDpiY()/self.view.logicalDpiY()
        painter.scale(zoomx, zoomy)

        pr = printer.pageRect()
        evaljs = self.view.page().mainFrame().evaluateJavaScript
        evaljs(self.paged_js)
        self.view.page().setViewportSize(QSize(pr.width()/zoomx,
            pr.height()/zoomy))
        evaljs('''
        document.body.style.backgroundColor = "white";
        paged_display.set_geometry(1, 0, 0, 0);
        paged_display.layout();
        paged_display.fit_images();
        ''')
        mf = self.view.page().mainFrame()
        while True:
            mf.render(painter)
            nsl = evaljs('paged_display.next_screen_location()').toInt()
            if not nsl[1] or nsl[0] <= 0: break
            evaljs('window.scrollTo(%d, 0)'%nsl[0])
            printer.newPage()

        painter.end()
        printer.abort()

    def _delete_tmpdir(self):
        if os.path.exists(self.tmp_path):
            shutil.rmtree(self.tmp_path, True)
            self.tmp_path = PersistentTemporaryDirectory('_pdf_output_parts')

    def insert_cover(self):
        if self.cover_data is None:
            return
        item_path = os.path.join(self.tmp_path, 'cover.pdf')
        printer = get_pdf_printer(self.opts, output_file_name=item_path)
        self.combine_queue.insert(0, item_path)
        p = QPixmap()
        p.loadFromData(self.cover_data)
        if not p.isNull():
            painter = QPainter(printer)
            draw_image_page(printer, painter, p,
                    preserve_aspect_ratio=self.opts.preserve_cover_aspect_ratio)
            painter.end()
        printer.abort()


    def _write(self):
        self.logger.debug('Combining individual PDF parts...')

        self.insert_cover()

        try:
            outPDF = PdfFileWriter(title=self.metadata.title, author=self.metadata.author)
            for item in self.combine_queue:
                # The input PDF stream must remain open until the final PDF
                # is written to disk. PyPDF references pages added to the
                # final PDF from the input PDF on disk. It does not store
                # the pages in memory so we can't close the input PDF.
                inputPDF = PdfFileReader(open(item, 'rb'))
                for page in inputPDF.pages:
                    outPDF.addPage(page)
            outPDF.write(self.out_stream)
        finally:
            self._delete_tmpdir()
            self.loop.exit(0)
Beispiel #9
0
class StatsCollector(object):
    def __init__(self, container, do_embed=False):
        self.container = container
        self.log = self.logger = container.log
        self.do_embed = do_embed
        must_use_qt()
        self.parser = CSSParser(loglevel=logging.CRITICAL,
                                log=logging.getLogger('calibre.css'))

        self.loop = QEventLoop()
        self.view = QWebView()
        self.page = Page(self.log)
        self.view.setPage(self.page)
        self.page.setViewportSize(QSize(1200, 1600))

        self.view.loadFinished.connect(self.collect, type=Qt.QueuedConnection)

        self.render_queue = list(container.spine_items)
        self.font_stats = {}
        self.font_usage_map = {}
        self.font_spec_map = {}
        self.font_rule_map = {}
        self.all_font_rules = {}

        QTimer.singleShot(0, self.render_book)

        if self.loop.exec_() == 1:
            raise Exception(
                'Failed to gather statistics from book, see log for details')

    def render_book(self):
        try:
            if not self.render_queue:
                self.loop.exit()
            else:
                self.render_next()
        except:
            self.logger.exception('Rendering failed')
            self.loop.exit(1)

    def render_next(self):
        item = unicode(self.render_queue.pop(0))
        self.current_item = item
        load_html(item, self.view)

    def collect(self, ok):
        if not ok:
            self.log.error('Failed to render document: %s' %
                           self.container.relpath(self.current_item))
            self.loop.exit(1)
            return
        try:
            self.page.load_js()
            self.collect_font_stats()
        except:
            self.log.exception('Failed to collect font stats from: %s' %
                               self.container.relpath(self.current_item))
            self.loop.exit(1)
            return

        self.render_book()

    def href_to_name(self, href, warn_name):
        if not href.startswith('file://'):
            self.log.warn('Non-local URI in', warn_name, ':', href, 'ignoring')
            return None
        src = href[len('file://'):]
        if iswindows and len(src) > 2 and (src[0], src[2]) == ('/', ':'):
            src = src[1:]
        src = src.replace('/', os.sep)
        src = unquote(src)
        name = self.container.abspath_to_name(src)
        if not self.container.has_name(name):
            self.log.warn('Missing resource', href, 'in', warn_name,
                          'ignoring')
            return None
        return name

    def collect_font_stats(self):
        self.page.evaljs('window.font_stats.get_font_face_rules()')
        font_face_rules = self.page.bridge_value
        if not isinstance(font_face_rules, list):
            raise Exception(
                'Unknown error occurred while reading font-face rules')

        # Weed out invalid font-face rules
        rules = []
        for rule in font_face_rules:
            ff = rule.get('font-family', None)
            if not ff:
                continue
            style = self.parser.parseStyle('font-family:%s' % ff,
                                           validate=False)
            ff = [
                x.value for x in style.getProperty('font-family').propertyValue
            ]
            if not ff or ff[0] == 'inherit':
                continue
            rule['font-family'] = frozenset(icu_lower(f) for f in ff)
            src = rule.get('src', None)
            if not src:
                continue
            style = self.parser.parseStyle('background-image:%s' % src,
                                           validate=False)
            src = style.getProperty('background-image').propertyValue[0].uri
            name = self.href_to_name(src, '@font-face rule')
            if name is None:
                continue
            rule['src'] = name
            normalize_font_properties(rule)
            rule['width'] = widths[rule['font-stretch']]
            rule['weight'] = int(rule['font-weight'])
            rules.append(rule)

        if not rules and not self.do_embed:
            return

        self.font_rule_map[self.container.abspath_to_name(
            self.current_item)] = rules
        for rule in rules:
            self.all_font_rules[rule['src']] = rule

        for rule in rules:
            if rule['src'] not in self.font_stats:
                self.font_stats[rule['src']] = set()

        self.page.evaljs('window.font_stats.get_font_usage()')
        font_usage = self.page.bridge_value
        if not isinstance(font_usage, list):
            raise Exception('Unknown error occurred while reading font usage')
        exclude = {'\n', '\r', '\t'}
        self.font_usage_map[self.container.abspath_to_name(
            self.current_item)] = fu = defaultdict(dict)
        bad_fonts = {
            'serif', 'sans-serif', 'monospace', 'cursive', 'fantasy',
            'sansserif', 'inherit'
        }
        for font in font_usage:
            text = set()
            for t in font['text']:
                text |= frozenset(t)
            text.difference_update(exclude)
            if not text:
                continue
            normalize_font_properties(font)
            for rule in get_matching_rules(rules, font):
                self.font_stats[rule['src']] |= text
            if self.do_embed:
                ff = [icu_lower(x) for x in font.get('font-family', [])]
                if ff and ff[0] not in bad_fonts:
                    keys = {
                        'font-weight', 'font-style', 'font-stretch',
                        'font-family'
                    }
                    key = frozenset(((k, ff[0] if k == 'font-family' else v)
                                     for k, v in font.iteritems()
                                     if k in keys))
                    val = fu[key]
                    if not val:
                        val.update({
                            k: (font[k][0] if k == 'font-family' else font[k])
                            for k in keys
                        })
                        val['text'] = set()
                    val['text'] |= text
        self.font_usage_map[self.container.abspath_to_name(
            self.current_item)] = dict(fu)

        if self.do_embed:
            self.page.evaljs('window.font_stats.get_font_families()')
            font_families = self.page.bridge_value
            if not isinstance(font_families, dict):
                raise Exception(
                    'Unknown error occurred while reading font families')
            self.font_spec_map[self.container.abspath_to_name(
                self.current_item)] = fs = set()
            for raw in font_families.iterkeys():
                style = self.parser.parseStyle(
                    'font-family:' + raw,
                    validate=False).getProperty('font-family')
                for x in style.propertyValue:
                    x = x.value
                    if x and x.lower() not in bad_fonts:
                        fs.add(x)
Beispiel #10
0
class StatsCollector(object):

    def __init__(self, container):
        self.container = container
        self.log = self.logger = container.log
        must_use_qt()

        self.loop = QEventLoop()
        self.view = QWebView()
        self.page = Page(self.log)
        self.view.setPage(self.page)
        self.page.setViewportSize(QSize(1200, 1600))

        self.view.loadFinished.connect(self.collect,
                type=Qt.QueuedConnection)

        self.render_queue = list(container.spine_items)
        self.font_stats = {}

        QTimer.singleShot(0, self.render_book)

        if self.loop.exec_() == 1:
            raise Exception('Failed to gather statistics from book, see log for details')

    def render_book(self):
        try:
            if not self.render_queue:
                self.loop.exit()
            else:
                self.render_next()
        except:
            self.logger.exception('Rendering failed')
            self.loop.exit(1)

    def render_next(self):
        item = unicode(self.render_queue.pop(0))
        self.current_item = item
        load_html(item, self.view)

    def collect(self, ok):
        if not ok:
            self.log.error('Failed to render document: %s'%self.container.relpath(self.current_item))
            self.loop.exit(1)
            return
        try:
            self.page.load_js()
            self.collect_font_stats()
        except:
            self.log.exception('Failed to collect font stats from: %s'%self.container.relpath(self.current_item))
            self.loop.exit(1)
            return

        self.render_book()

    def collect_font_stats(self):
        self.page.evaljs('window.font_stats.get_font_face_rules()')
        font_face_rules = self.page.bridge_value
        if not isinstance(font_face_rules, list):
            raise Exception('Unknown error occurred while reading font-face rules')

        # Weed out invalid font-face rules
        rules = []
        for rule in font_face_rules:
            ff = rule.get('font-family', None)
            if not ff: continue
            style = parseStyle('font-family:%s'%ff, validate=False)
            ff = [x.value for x in
                  style.getProperty('font-family').propertyValue]
            if not ff or ff[0] == 'inherit':
                continue
            rule['font-family'] = frozenset(icu_lower(f) for f in ff)
            src = rule.get('src', None)
            if not src: continue
            style = parseStyle('background-image:%s'%src, validate=False)
            src = style.getProperty('background-image').propertyValue[0].uri
            if not src.startswith('file://'):
                self.log.warn('Unknown URI in @font-face: %r'%src)
                continue
            src = src[len('file://'):]
            if iswindows and src.startswith('/'):
                src = src[1:]
            src = src.replace('/', os.sep)
            src = unquote(src)
            name = self.container.abspath_to_name(src)
            if not self.container.has_name(name):
                self.log.warn('Font %r referenced in @font-face rule not found'
                              %name)
                continue
            rule['src'] = name
            normalize_font_properties(rule)
            rule['width'] = widths[rule['font-stretch']]
            rule['weight'] = int(rule['font-weight'])
            rules.append(rule)

        if not rules:
            return

        for rule in rules:
            if rule['src'] not in self.font_stats:
                self.font_stats[rule['src']] = set()

        self.page.evaljs('window.font_stats.get_font_usage()')
        font_usage = self.page.bridge_value
        if not isinstance(font_usage, list):
            raise Exception('Unknown error occurred while reading font usage')
        exclude = {'\n', '\r', '\t'}
        for font in font_usage:
            text = set()
            for t in font['text']:
                text |= frozenset(t)
            text.difference_update(exclude)
            if not text: continue
            for rule in get_matching_rules(rules, font):
                self.font_stats[rule['src']] |= text
Beispiel #11
0
class PDFWriter(QObject):  # {{{
    def __init__(self, opts, log, cover_data=None):
        from calibre.gui2 import is_ok_to_use_qt
        if not is_ok_to_use_qt():
            raise Exception('Not OK to use Qt')
        QObject.__init__(self)

        self.logger = log

        self.loop = QEventLoop()
        self.view = QWebView()
        self.view.setRenderHints(QPainter.Antialiasing
                                 | QPainter.TextAntialiasing
                                 | QPainter.SmoothPixmapTransform)
        self.view.loadFinished.connect(self._render_html,
                                       type=Qt.QueuedConnection)
        for x in (Qt.Horizontal, Qt.Vertical):
            self.view.page().mainFrame().setScrollBarPolicy(
                x, Qt.ScrollBarAlwaysOff)
        self.render_queue = []
        self.combine_queue = []
        self.tmp_path = PersistentTemporaryDirectory(u'_pdf_output_parts')

        self.opts = opts
        self.cover_data = cover_data
        self.paged_js = None

    def dump(self, items, out_stream, pdf_metadata):
        self.metadata = pdf_metadata
        self._delete_tmpdir()

        self.render_queue = items
        self.combine_queue = []
        self.out_stream = out_stream

        QMetaObject.invokeMethod(self, "_render_book", Qt.QueuedConnection)
        self.loop.exec_()

    @QtCore.pyqtSignature('_render_book()')
    def _render_book(self):
        if len(self.render_queue) == 0:
            self._write()
        else:
            self._render_next()

    def _render_next(self):
        item = unicode(self.render_queue.pop(0))
        self.combine_queue.append(
            os.path.join(self.tmp_path,
                         '%i.pdf' % (len(self.combine_queue) + 1)))

        self.logger.debug('Processing %s...' % item)
        load_html(item, self.view)

    def _render_html(self, ok):
        if ok:
            item_path = os.path.join(self.tmp_path,
                                     '%i.pdf' % len(self.combine_queue))
            self.logger.debug(
                '\tRendering item %s as %i.pdf' %
                (os.path.basename(str(
                    self.view.url().toLocalFile())), len(self.combine_queue)))
            self.do_paged_render(item_path)
        else:
            # The document is so corrupt that we can't render the page.
            self.loop.exit(0)
            raise Exception('Document cannot be rendered.')
        self._render_book()

    def do_paged_render(self, outpath):
        from PyQt4.Qt import QSize, QPainter
        if self.paged_js is None:
            from calibre.utils.resources import compiled_coffeescript
            self.paged_js = compiled_coffeescript('ebooks.oeb.display.utils')
            self.paged_js += compiled_coffeescript('ebooks.oeb.display.paged')
        printer = get_pdf_printer(self.opts, output_file_name=outpath)
        painter = QPainter(printer)
        zoomx = printer.logicalDpiX() / self.view.logicalDpiX()
        zoomy = printer.logicalDpiY() / self.view.logicalDpiY()
        painter.scale(zoomx, zoomy)

        pr = printer.pageRect()
        evaljs = self.view.page().mainFrame().evaluateJavaScript
        evaljs(self.paged_js)
        self.view.page().setViewportSize(
            QSize(pr.width() / zoomx,
                  pr.height() / zoomy))
        evaljs('''
        document.body.style.backgroundColor = "white";
        paged_display.set_geometry(1, 0, 0, 0);
        paged_display.layout();
        paged_display.fit_images();
        ''')
        mf = self.view.page().mainFrame()
        while True:
            mf.render(painter)
            nsl = evaljs('paged_display.next_screen_location()').toInt()
            if not nsl[1] or nsl[0] <= 0: break
            evaljs('window.scrollTo(%d, 0)' % nsl[0])
            printer.newPage()

        painter.end()
        printer.abort()

    def _delete_tmpdir(self):
        if os.path.exists(self.tmp_path):
            shutil.rmtree(self.tmp_path, True)
            self.tmp_path = PersistentTemporaryDirectory('_pdf_output_parts')

    def insert_cover(self):
        if self.cover_data is None:
            return
        item_path = os.path.join(self.tmp_path, 'cover.pdf')
        printer = get_pdf_printer(self.opts, output_file_name=item_path)
        self.combine_queue.insert(0, item_path)
        p = QPixmap()
        p.loadFromData(self.cover_data)
        if not p.isNull():
            painter = QPainter(printer)
            draw_image_page(
                printer,
                painter,
                p,
                preserve_aspect_ratio=self.opts.preserve_cover_aspect_ratio)
            painter.end()
        printer.abort()

    def _write(self):
        self.logger.debug('Combining individual PDF parts...')

        self.insert_cover()

        try:
            outPDF = PdfFileWriter(title=self.metadata.title,
                                   author=self.metadata.author)
            for item in self.combine_queue:
                # The input PDF stream must remain open until the final PDF
                # is written to disk. PyPDF references pages added to the
                # final PDF from the input PDF on disk. It does not store
                # the pages in memory so we can't close the input PDF.
                inputPDF = PdfFileReader(open(item, 'rb'))
                for page in inputPDF.pages:
                    outPDF.addPage(page)
            outPDF.write(self.out_stream)
        finally:
            self._delete_tmpdir()
            self.loop.exit(0)
Beispiel #12
0
class PDFWriter(QObject):

    def _pass_json_value_getter(self):
        val = json.dumps(self.bridge_value)
        return QString(val)

    def _pass_json_value_setter(self, value):
        self.bridge_value = json.loads(unicode(value))

    _pass_json_value = pyqtProperty(QString, fget=_pass_json_value_getter,
            fset=_pass_json_value_setter)

    def __init__(self, opts, log, cover_data=None, toc=None):
        from calibre.gui2 import is_ok_to_use_qt
        if not is_ok_to_use_qt():
            raise Exception('Not OK to use Qt')
        QObject.__init__(self)

        self.logger = self.log = log
        self.opts = opts
        self.cover_data = cover_data
        self.paged_js = None
        self.toc = toc

        self.loop = QEventLoop()
        self.view = QWebView()
        self.page = Page(opts, self.log)
        self.view.setPage(self.page)
        self.view.setRenderHints(QPainter.Antialiasing|
                    QPainter.TextAntialiasing|QPainter.SmoothPixmapTransform)
        self.view.loadFinished.connect(self.render_html,
                type=Qt.QueuedConnection)
        for x in (Qt.Horizontal, Qt.Vertical):
            self.view.page().mainFrame().setScrollBarPolicy(x,
                    Qt.ScrollBarAlwaysOff)
        self.report_progress = lambda x, y: x

    def dump(self, items, out_stream, pdf_metadata):
        opts = self.opts
        self.outline = Outline(self.toc, items)
        page_size = get_page_size(self.opts)
        xdpi, ydpi = self.view.logicalDpiX(), self.view.logicalDpiY()
        ml, mr = opts.margin_left, opts.margin_right
        margin_side = min(ml, mr)
        ml, mr = ml - margin_side, mr - margin_side
        self.doc = PdfDevice(out_stream, page_size=page_size, left_margin=ml,
                             top_margin=0, right_margin=mr, bottom_margin=0,
                             xdpi=xdpi, ydpi=ydpi, errors=self.log.error,
                             debug=self.log.debug, compress=not
                             opts.uncompressed_pdf)

        self.page.setViewportSize(QSize(self.doc.width(), self.doc.height()))
        self.render_queue = items
        self.total_items = len(items)

        # TODO: Test margins
        mt, mb = map(self.doc.to_px, (opts.margin_top, opts.margin_bottom))
        ms = self.doc.to_px(margin_side, vertical=False)
        self.margin_top, self.margin_size, self.margin_bottom = map(
            lambda x:int(floor(x)), (mt, ms, mb))

        self.painter = QPainter(self.doc)
        self.doc.set_metadata(title=pdf_metadata.title,
                              author=pdf_metadata.author,
                              tags=pdf_metadata.tags)
        self.painter.save()
        try:
            if self.cover_data is not None:
                p = QPixmap()
                p.loadFromData(self.cover_data)
                if not p.isNull():
                    draw_image_page(QRect(0, 0, self.doc.width(), self.doc.height()),
                            self.painter, p,
                            preserve_aspect_ratio=self.opts.preserve_cover_aspect_ratio)
                    self.doc.end_page()
        finally:
            self.painter.restore()

        QTimer.singleShot(0, self.render_book)
        self.loop.exec_()

        # TODO: Outline and links
        self.painter.end()

        if self.doc.errors_occurred:
            raise Exception('PDF Output failed, see log for details')

    def render_book(self):
        if self.doc.errors_occurred:
            return self.loop.exit(1)
        try:
            if not self.render_queue:
                self.loop.exit()
            else:
                self.render_next()
        except:
            self.logger.exception('Rendering failed')
            self.loop.exit(1)

    def render_next(self):
        item = unicode(self.render_queue.pop(0))

        self.logger.debug('Processing %s...' % item)
        self.current_item = item
        load_html(item, self.view)

    def render_html(self, ok):
        if ok:
            try:
                self.do_paged_render()
            except:
                self.log.exception('Rendering failed')
                self.loop.exit(1)
        else:
            # The document is so corrupt that we can't render the page.
            self.logger.error('Document cannot be rendered.')
            self.loop.exit(1)
            return
        done = self.total_items - len(self.render_queue)
        self.report_progress(done/self.total_items,
                        _('Rendered %s'%os.path.basename(self.current_item)))
        self.render_book()

    @property
    def current_page_num(self):
        return self.doc.current_page_num

    def do_paged_render(self):
        if self.paged_js is None:
            from calibre.utils.resources import compiled_coffeescript
            self.paged_js =  compiled_coffeescript('ebooks.oeb.display.utils')
            self.paged_js += compiled_coffeescript('ebooks.oeb.display.indexing')
            self.paged_js += compiled_coffeescript('ebooks.oeb.display.paged')

        self.view.page().mainFrame().addToJavaScriptWindowObject("py_bridge", self)
        evaljs = self.view.page().mainFrame().evaluateJavaScript
        evaljs(self.paged_js)
        evaljs('''
        py_bridge.__defineGetter__('value', function() {
            return JSON.parse(this._pass_json_value);
        });
        py_bridge.__defineSetter__('value', function(val) {
            this._pass_json_value = JSON.stringify(val);
        });

        document.body.style.backgroundColor = "white";
        paged_display.set_geometry(1, %d, %d, %d);
        paged_display.layout();
        paged_display.fit_images();
        '''%(self.margin_top, self.margin_size, self.margin_bottom))

        mf = self.view.page().mainFrame()
        start_page = self.current_page_num
        dx = 0
        while True:
            self.doc.init_page()
            self.painter.save()
            mf.render(self.painter)
            self.painter.restore()
            nsl = evaljs('paged_display.next_screen_location()').toInt()
            self.doc.end_page()
            if not nsl[1] or nsl[0] <= 0:
                break
            dx = nsl[0]
            evaljs('window.scrollTo(%d, 0)'%dx)
            if self.doc.errors_occurred:
                break

        self.bridge_value = tuple(self.outline.anchor_map[self.current_item])
        evaljs('py_bridge.value = book_indexing.anchor_positions(py_bridge.value)')
        amap = self.bridge_value
        if not isinstance(amap, dict):
            amap = {} # Some javascript error occurred
        self.outline.set_pos(self.current_item, None, start_page, 0)
        for anchor, x in amap.iteritems():
            pagenum, ypos = x
            self.outline.set_pos(self.current_item, anchor, start_page + pagenum, ypos)