Beispiel #1
0
    def __init__(self, parent, get_option, get_help, db=None, book_id=None):
        self.__connections = []
        Widget.__init__(
            self,
            parent,
            ["margin_top", "margin_left", "margin_right", "margin_bottom", "input_profile", "output_profile"],
        )

        self.db, self.book_id = db, book_id
        self.input_model = ProfileModel(input_profiles())
        self.output_model = ProfileModel(output_profiles())
        self.opt_input_profile.setModel(self.input_model)
        self.opt_output_profile.setModel(self.output_model)
        for g, slot in self.__connections:
            g.selectionModel().currentChanged.connect(slot)
        del self.__connections

        for x in (self.opt_input_profile, self.opt_output_profile):
            x.setMouseTracking(True)
            self.connect(x, SIGNAL("entered(QModelIndex)"), self.show_desc)
        self.initialize_options(get_option, get_help, db, book_id)
        it = unicode(self.opt_input_profile.toolTip())
        self.opt_input_profile.setToolTip("<p>" + it.replace("t.", "t.\n<br>"))
        it = unicode(self.opt_output_profile.toolTip())
        self.opt_output_profile.setToolTip("<p>" + it.replace("t.", "ce.\n<br>"))
Beispiel #2
0
    def __init__(self, parent, get_option, get_help, db=None, book_id=None):
        self.__connections = []
        Widget.__init__(self, parent, [
            'margin_top', 'margin_left', 'margin_right', 'margin_bottom',
            'input_profile', 'output_profile'
        ])

        self.db, self.book_id = db, book_id
        self.input_model = ProfileModel(input_profiles())
        self.output_model = ProfileModel(output_profiles())
        self.opt_input_profile.setModel(self.input_model)
        self.opt_output_profile.setModel(self.output_model)
        for g, slot in self.__connections:
            g.selectionModel().currentChanged.connect(slot)
        del self.__connections

        for x in (self.opt_input_profile, self.opt_output_profile):
            x.setMouseTracking(True)
            x.entered[(QModelIndex)].connect(self.show_desc)
        self.initialize_options(get_option, get_help, db, book_id)
        it = str(self.opt_input_profile.toolTip())
        self.opt_input_profile.setToolTip('<p>' + it.replace('t.', 't.\n<br>'))
        it = str(self.opt_output_profile.toolTip())
        self.opt_output_profile.setToolTip('<p>' +
                                           it.replace('t.', 'ce.\n<br>'))
Beispiel #3
0
 def get_profile_values(self):
     from calibre.ebooks.conversion.config import load_defaults
     recs = load_defaults('page_setup')
     pfname = recs.get('output_profile', 'default')
     from calibre.customize.ui import output_profiles
     for profile in output_profiles():
         if profile.short_name == pfname:
             break
     dbase = profile.fbase
     fsizes = profile.fkey
     return dbase, fsizes
Beispiel #4
0
 def get_profile_values(self):
     from calibre.ebooks.conversion.config import load_defaults
     recs = load_defaults('page_setup')
     pfname = recs.get('output_profile', 'default')
     from calibre.customize.ui import output_profiles
     for profile in output_profiles():
         if profile.short_name == pfname:
             break
     dbase = profile.fbase
     fsizes = profile.fkey
     return dbase, fsizes
Beispiel #5
0
def profiles():
    ans = getattr(profiles, 'ans', None)
    if ans is None:
        def desc(profile):
            w, h = profile.screen_size
            if w >= 10000:
                ss = _('unlimited')
            else:
                ss = _('%(width)d x %(height)d pixels') % dict(width=w, height=h)
            ss = _('Screen size: %s') % ss
            return {'name': profile.name, 'description': ('%s [%s]' % (profile.description, ss))}

        ans = profiles.ans = {}
        ans['input'] = {p.short_name: desc(p) for p in input_profiles()}
        ans['output'] = {p.short_name: desc(p) for p in output_profiles()}
    return ans
Beispiel #6
0
def render_jacket(container, jacket):
    mi = container.mi
    ps = load_defaults('page_setup')
    op = ps.get('output_profile', 'default')
    opmap = {x.short_name:x for x in output_profiles()}
    output_profile = opmap.get(op, opmap['default'])
    root = render(mi, output_profile)
    for img, path in referenced_images(root):
        container.log('Embedding referenced image: %s into jacket' % path)
        ext = path.rpartition('.')[-1]
        jacket_item = container.generate_item('jacket_image.'+ext, id_prefix='jacket_img')
        name = container.href_to_name(jacket_item.get('href'), container.opf_name)
        with open(path, 'rb') as f:
            container.parsed_cache[name] = f.read()
            container.commit_item(name)
        href = container.name_to_href(name, jacket)
        img.set('src', href)
    return root
Beispiel #7
0
def render_jacket(container, jacket):
    mi = container.mi
    ps = load_defaults('page_setup')
    op = ps.get('output_profile', 'default')
    opmap = {x.short_name:x for x in output_profiles()}
    output_profile = opmap.get(op, opmap['default'])
    root = render(mi, output_profile)
    for img, path in referenced_images(root):
        container.log('Embedding referenced image: %s into jacket' % path)
        ext = path.rpartition('.')[-1]
        jacket_item = container.generate_item('jacket_image.'+ext, id_prefix='jacket_img')
        name = container.href_to_name(jacket_item.get('href'), container.opf_name)
        with open(path, 'rb') as f:
            container.parsed_cache[name] = f.read()
            container.commit_item(name)
        href = container.name_to_href(name, jacket)
        img.set('src', href)
    return root
Beispiel #8
0
    def __init__(self, parent, get_option, get_help, db=None, book_id=None):
        self.__connections = []
        Widget.__init__(self, parent, OPTIONS['pipe']['page_setup'])

        self.db, self.book_id = db, book_id
        self.input_model = ProfileModel(input_profiles())
        self.output_model = ProfileModel(output_profiles())
        self.opt_input_profile.setModel(self.input_model)
        self.opt_output_profile.setModel(self.output_model)
        for g, slot in self.__connections:
            g.selectionModel().currentChanged.connect(slot)
        del self.__connections

        for x in (self.opt_input_profile, self.opt_output_profile):
            x.setMouseTracking(True)
            x.entered[(QModelIndex)].connect(self.show_desc)
        self.initialize_options(get_option, get_help, db, book_id)
        self.opt_input_profile.setToolTip('')
        self.opt_output_profile.setToolTip('')
Beispiel #9
0
 def set_image_settings(cls):
     from calibre.ebooks.conversion.config import load_defaults, save_defaults
     recs = load_defaults('comic_input')
     
     output_profile = "No output profile found."
     for profile in output_profiles():
         if profile.short_name == cls.output_profile:
             output_profile = profile
     
     if getattr(output_profile, 'colors', 0): # Can also refer to grayscale shades
         recs['colors'] = getattr(output_profile, 'colors')
     
     if getattr(output_profile, 'supports_color', False):
         recs['dont_grayscale'] = True
     
     if getattr(output_profile, 'large_screen', False):
         recs['keep_aspect_ratio'] = True
     
     save_defaults('comic_input', recs)
Beispiel #10
0
    def __init__(self, parent, get_option, get_help, db=None, book_id=None):
        self.__connections = []
        Widget.__init__(self, parent, OPTIONS['pipe']['page_setup'])

        self.db, self.book_id = db, book_id
        self.input_model = ProfileModel(input_profiles())
        self.output_model = ProfileModel(output_profiles())
        self.opt_input_profile.setModel(self.input_model)
        self.opt_output_profile.setModel(self.output_model)
        for g, slot in self.__connections:
            g.selectionModel().currentChanged.connect(slot)
        del self.__connections

        for x in (self.opt_input_profile, self.opt_output_profile):
            x.setMouseTracking(True)
            x.entered[(QModelIndex)].connect(self.show_desc)
        self.initialize_options(get_option, get_help, db, book_id)
        it = unicode_type(self.opt_input_profile.toolTip())
        self.opt_input_profile.setToolTip('<p>'+it.replace('t.','t.\n<br>'))
        it = unicode_type(self.opt_output_profile.toolTip())
        self.opt_output_profile.setToolTip('<p>'+it.replace('t.','ce.\n<br>'))
Beispiel #11
0
    def __init__(self, parent, get_option, get_help, db=None, book_id=None):
        self.__connections = []
        Widget.__init__(self, parent,
                ['margin_top', 'margin_left', 'margin_right', 'margin_bottom',
                    'input_profile', 'output_profile']
                )

        self.db, self.book_id = db, book_id
        self.input_model = ProfileModel(input_profiles())
        self.output_model = ProfileModel(output_profiles())
        self.opt_input_profile.setModel(self.input_model)
        self.opt_output_profile.setModel(self.output_model)
        for g, slot in self.__connections:
            g.selectionModel().currentChanged.connect(slot)
        del self.__connections

        for x in (self.opt_input_profile, self.opt_output_profile):
            x.setMouseTracking(True)
            self.connect(x, SIGNAL('entered(QModelIndex)'), self.show_desc)
        self.initialize_options(get_option, get_help, db, book_id)
        it = unicode(self.opt_input_profile.toolTip())
        self.opt_input_profile.setToolTip('<p>'+it.replace('t.','t.\n<br>'))
        it = unicode(self.opt_output_profile.toolTip())
        self.opt_output_profile.setToolTip('<p>'+it.replace('t.','ce.\n<br>'))
Beispiel #12
0
    def __init__(self, tree, path, oeb, opts, profile=None,
            extra_css='', user_css=''):
        self.oeb, self.opts = oeb, opts
        self.profile = profile
        if self.profile is None:
            # Use the default profile. This should really be using
            # opts.output_profile, but I don't want to risk changing it, as
            # doing so might well have hard to debug font size effects.
            from calibre.customize.ui import output_profiles
            for x in output_profiles():
                if x.short_name == 'default':
                    self.profile = x
                    break
        if self.profile is None:
            # Just in case the default profile is removed in the future :)
            self.profile = opts.output_profile
        self.logger = oeb.logger
        item = oeb.manifest.hrefs[path]
        basename = os.path.basename(path)
        cssname = os.path.splitext(basename)[0] + '.css'
        stylesheets = [html_css_stylesheet()]
        head = xpath(tree, '/h:html/h:head')
        if head:
            head = head[0]
        else:
            head = []

        # Add cssutils parsing profiles from output_profile
        for profile in self.opts.output_profile.extra_css_modules:
            cssprofiles.addProfile(profile['name'],
                                        profile['props'],
                                        profile['macros'])

        parser = CSSParser(fetcher=self._fetch_css_file,
                log=logging.getLogger('calibre.css'))
        self.font_face_rules = []
        for elem in head:
            if (elem.tag == XHTML('style') and
                elem.get('type', CSS_MIME) in OEB_STYLES):
                text = elem.text if elem.text else u''
                for x in elem:
                    t = getattr(x, 'text', None)
                    if t:
                        text += u'\n\n' + force_unicode(t, u'utf-8')
                    t = getattr(x, 'tail', None)
                    if t:
                        text += u'\n\n' + force_unicode(t, u'utf-8')
                if text:
                    text = XHTML_CSS_NAMESPACE + text
                    text = oeb.css_preprocessor(text)
                    stylesheet = parser.parseString(text, href=cssname,
                            validate=False)
                    stylesheet.namespaces['h'] = XHTML_NS
                    stylesheets.append(stylesheet)
                    # Make links to resources absolute, since these rules will
                    # be folded into a stylesheet at the root
                    replaceUrls(stylesheet, item.abshref,
                            ignoreImportRules=True)
            elif elem.tag == XHTML('link') and elem.get('href') \
                 and elem.get('rel', 'stylesheet').lower() == 'stylesheet' \
                 and elem.get('type', CSS_MIME).lower() in OEB_STYLES:
                href = urlnormalize(elem.attrib['href'])
                path = item.abshref(href)
                sitem = oeb.manifest.hrefs.get(path, None)
                if sitem is None:
                    self.logger.warn(
                        'Stylesheet %r referenced by file %r not in manifest' %
                        (path, item.href))
                    continue
                if not hasattr(sitem.data, 'cssRules'):
                    self.logger.warn(
                    'Stylesheet %r referenced by file %r is not CSS'%(path,
                        item.href))
                    continue
                stylesheets.append(sitem.data)
        csses = {'extra_css':extra_css, 'user_css':user_css}
        for w, x in csses.items():
            if x:
                try:
                    text = XHTML_CSS_NAMESPACE + x
                    stylesheet = parser.parseString(text, href=cssname,
                            validate=False)
                    stylesheet.namespaces['h'] = XHTML_NS
                    stylesheets.append(stylesheet)
                except:
                    self.logger.exception('Failed to parse %s, ignoring.'%w)
                    self.logger.debug('Bad css: ')
                    self.logger.debug(x)
        rules = []
        index = 0
        self.stylesheets = set()
        self.page_rule = {}
        for stylesheet in stylesheets:
            href = stylesheet.href
            self.stylesheets.add(href)
            for rule in stylesheet.cssRules:
                rules.extend(self.flatten_rule(rule, href, index))
                index = index + 1
        rules.sort()
        self.rules = rules
        self._styles = {}
        for _, _, cssdict, text, _ in rules:
            fl = ':first-letter' in text
            if fl:
                text = text.replace(':first-letter', '')
            selector = get_css_selector(text)
            matches = selector(tree, self.logger)
            if fl:
                from lxml.builder import ElementMaker
                E = ElementMaker(namespace=XHTML_NS)
                for elem in matches:
                    for x in elem.iter():
                        if x.text:
                            punctuation_chars = []
                            text = unicode(x.text)
                            while text:
                                if not unicodedata.category(text[0]).startswith('P'):
                                    break
                                punctuation_chars.append(text[0])
                                text = text[1:]

                            special_text = u''.join(punctuation_chars) + \
                                    (text[0] if text else u'')
                            span = E.span(special_text)
                            span.tail = text[1:]
                            x.text = None
                            x.insert(0, span)
                            self.style(span)._update_cssdict(cssdict)
                            break
            else:
                for elem in matches:
                    self.style(elem)._update_cssdict(cssdict)
        for elem in xpath(tree, '//h:*[@style]'):
            self.style(elem)._apply_style_attr(url_replacer=item.abshref)
        num_pat = re.compile(r'\d+$')
        for elem in xpath(tree, '//h:img[@width or @height]'):
            style = self.style(elem)
            # Check if either height or width is not default
            is_styled = style._style.get('width', 'auto') != 'auto' or \
                    style._style.get('height', 'auto') != 'auto'
            if not is_styled:
                # Update img style dimension using width and height
                upd = {}
                for prop in ('width', 'height'):
                    val = elem.get(prop, '').strip()
                    try:
                        del elem.attrib[prop]
                    except:
                        pass
                    if val:
                        if num_pat.match(val) is not None:
                            val += 'px'
                        upd[prop] = val
                if upd:
                    style._update_cssdict(upd)
Beispiel #13
0
    def __init__(self, tree, path, oeb, opts, profile=None,
            extra_css='', user_css='', base_css=''):
        self.oeb, self.opts = oeb, opts
        self.profile = profile
        if self.profile is None:
            # Use the default profile. This should really be using
            # opts.output_profile, but I don't want to risk changing it, as
            # doing so might well have hard to debug font size effects.
            from calibre.customize.ui import output_profiles
            for x in output_profiles():
                if x.short_name == 'default':
                    self.profile = x
                    break
        if self.profile is None:
            # Just in case the default profile is removed in the future :)
            self.profile = opts.output_profile
        self.body_font_size = self.profile.fbase
        self.logger = oeb.logger
        item = oeb.manifest.hrefs[path]
        basename = os.path.basename(path)
        cssname = os.path.splitext(basename)[0] + '.css'
        stylesheets = [html_css_stylesheet()]
        if base_css:
            stylesheets.append(parseString(base_css, validate=False))
        style_tags = xpath(tree, '//*[local-name()="style" or local-name()="link"]')

        # Add cssutils parsing profiles from output_profile
        for profile in self.opts.output_profile.extra_css_modules:
            cssprofiles.addProfile(profile['name'],
                                        profile['props'],
                                        profile['macros'])

        parser = CSSParser(fetcher=self._fetch_css_file,
                log=logging.getLogger('calibre.css'))
        self.font_face_rules = []
        for elem in style_tags:
            if (elem.tag == XHTML('style') and elem.get('type', CSS_MIME) in OEB_STYLES and media_ok(elem.get('media'))):
                text = elem.text if elem.text else u''
                for x in elem:
                    t = getattr(x, 'text', None)
                    if t:
                        text += u'\n\n' + force_unicode(t, u'utf-8')
                    t = getattr(x, 'tail', None)
                    if t:
                        text += u'\n\n' + force_unicode(t, u'utf-8')
                if text:
                    text = oeb.css_preprocessor(text)
                    # We handle @import rules separately
                    parser.setFetcher(lambda x: ('utf-8', b''))
                    stylesheet = parser.parseString(text, href=cssname,
                            validate=False)
                    parser.setFetcher(self._fetch_css_file)
                    for rule in stylesheet.cssRules:
                        if rule.type == rule.IMPORT_RULE:
                            ihref = item.abshref(rule.href)
                            if not media_ok(rule.media.mediaText):
                                continue
                            hrefs = self.oeb.manifest.hrefs
                            if ihref not in hrefs:
                                self.logger.warn('Ignoring missing stylesheet in @import rule:', rule.href)
                                continue
                            sitem = hrefs[ihref]
                            if sitem.media_type not in OEB_STYLES:
                                self.logger.warn('CSS @import of non-CSS file %r' % rule.href)
                                continue
                            stylesheets.append(sitem.data)
                    # Make links to resources absolute, since these rules will
                    # be folded into a stylesheet at the root
                    replaceUrls(stylesheet, item.abshref,
                            ignoreImportRules=True)
                    stylesheets.append(stylesheet)
            elif (elem.tag == XHTML('link') and elem.get('href') and elem.get(
                    'rel', 'stylesheet').lower() == 'stylesheet' and elem.get(
                    'type', CSS_MIME).lower() in OEB_STYLES and media_ok(elem.get('media'))
                ):
                href = urlnormalize(elem.attrib['href'])
                path = item.abshref(href)
                sitem = oeb.manifest.hrefs.get(path, None)
                if sitem is None:
                    self.logger.warn(
                        'Stylesheet %r referenced by file %r not in manifest' %
                        (path, item.href))
                    continue
                if not hasattr(sitem.data, 'cssRules'):
                    self.logger.warn(
                    'Stylesheet %r referenced by file %r is not CSS'%(path,
                        item.href))
                    continue
                stylesheets.append(sitem.data)
        csses = {'extra_css':extra_css, 'user_css':user_css}
        for w, x in csses.items():
            if x:
                try:
                    text = x
                    stylesheet = parser.parseString(text, href=cssname,
                            validate=False)
                    stylesheets.append(stylesheet)
                except:
                    self.logger.exception('Failed to parse %s, ignoring.'%w)
                    self.logger.debug('Bad css: ')
                    self.logger.debug(x)
        rules = []
        index = 0
        self.stylesheets = set()
        self.page_rule = {}
        for sheet_index, stylesheet in enumerate(stylesheets):
            href = stylesheet.href
            self.stylesheets.add(href)
            for rule in stylesheet.cssRules:
                if rule.type == rule.MEDIA_RULE:
                    if media_ok(rule.media.mediaText):
                        for subrule in rule.cssRules:
                            rules.extend(self.flatten_rule(subrule, href, index, is_user_agent_sheet=sheet_index==0))
                            index += 1
                else:
                    rules.extend(self.flatten_rule(rule, href, index, is_user_agent_sheet=sheet_index==0))
                    index = index + 1
        rules.sort()
        self.rules = rules
        self._styles = {}
        pseudo_pat = re.compile(ur':{1,2}(%s)' % ('|'.join(INAPPROPRIATE_PSEUDO_CLASSES)), re.I)
        select = Select(tree, ignore_inappropriate_pseudo_classes=True)

        for _, _, cssdict, text, _ in rules:
            fl = pseudo_pat.search(text)
            try:
                matches = tuple(select(text))
            except SelectorError as err:
                self.logger.error('Ignoring CSS rule with invalid selector: %r (%s)' % (text, as_unicode(err)))
                continue

            if fl is not None:
                fl = fl.group(1)
                if fl == 'first-letter' and getattr(self.oeb,
                        'plumber_output_format', '').lower() in {u'mobi', u'docx'}:
                    # Fake first-letter
                    for elem in matches:
                        for x in elem.iter('*'):
                            if x.text:
                                punctuation_chars = []
                                text = unicode(x.text)
                                while text:
                                    category = unicodedata.category(text[0])
                                    if category[0] not in {'P', 'Z'}:
                                        break
                                    punctuation_chars.append(text[0])
                                    text = text[1:]

                                special_text = u''.join(punctuation_chars) + \
                                        (text[0] if text else u'')
                                span = x.makeelement('{%s}span' % XHTML_NS)
                                span.text = special_text
                                span.set('data-fake-first-letter', '1')
                                span.tail = text[1:]
                                x.text = None
                                x.insert(0, span)
                                self.style(span)._update_cssdict(cssdict)
                                break
                else:  # Element pseudo-class
                    for elem in matches:
                        self.style(elem)._update_pseudo_class(fl, cssdict)
            else:
                for elem in matches:
                    self.style(elem)._update_cssdict(cssdict)
        for elem in xpath(tree, '//h:*[@style]'):
            self.style(elem)._apply_style_attr(url_replacer=item.abshref)
        num_pat = re.compile(r'[0-9.]+$')
        for elem in xpath(tree, '//h:img[@width or @height]'):
            style = self.style(elem)
            # Check if either height or width is not default
            is_styled = style._style.get('width', 'auto') != 'auto' or \
                    style._style.get('height', 'auto') != 'auto'
            if not is_styled:
                # Update img style dimension using width and height
                upd = {}
                for prop in ('width', 'height'):
                    val = elem.get(prop, '').strip()
                    try:
                        del elem.attrib[prop]
                    except:
                        pass
                    if val:
                        if num_pat.match(val) is not None:
                            val += 'px'
                        upd[prop] = val
                if upd:
                    style._update_cssdict(upd)
Beispiel #14
0
    def __init__(self, tree, path, oeb, opts, profile=None, extra_css="", user_css=""):
        self.oeb, self.opts = oeb, opts
        self.profile = profile
        if self.profile is None:
            # Use the default profile. This should really be using
            # opts.output_profile, but I don't want to risk changing it, as
            # doing so might well have hard to debug font size effects.
            from calibre.customize.ui import output_profiles

            for x in output_profiles():
                if x.short_name == "default":
                    self.profile = x
                    break
        if self.profile is None:
            # Just in case the default profile is removed in the future :)
            self.profile = opts.output_profile
        self.body_font_size = self.profile.fbase
        self.logger = oeb.logger
        item = oeb.manifest.hrefs[path]
        basename = os.path.basename(path)
        cssname = os.path.splitext(basename)[0] + ".css"
        stylesheets = [html_css_stylesheet()]
        style_tags = xpath(tree, '//*[local-name()="style" or local-name()="link"]')

        # Add cssutils parsing profiles from output_profile
        for profile in self.opts.output_profile.extra_css_modules:
            cssprofiles.addProfile(profile["name"], profile["props"], profile["macros"])

        parser = CSSParser(fetcher=self._fetch_css_file, log=logging.getLogger("calibre.css"))
        self.font_face_rules = []
        for elem in style_tags:
            if elem.tag == XHTML("style") and elem.get("type", CSS_MIME) in OEB_STYLES:
                text = elem.text if elem.text else u""
                for x in elem:
                    t = getattr(x, "text", None)
                    if t:
                        text += u"\n\n" + force_unicode(t, u"utf-8")
                    t = getattr(x, "tail", None)
                    if t:
                        text += u"\n\n" + force_unicode(t, u"utf-8")
                if text:
                    text = oeb.css_preprocessor(text, add_namespace=True)
                    # We handle @import rules separately
                    parser.setFetcher(lambda x: ("utf-8", b""))
                    stylesheet = parser.parseString(text, href=cssname, validate=False)
                    parser.setFetcher(self._fetch_css_file)
                    stylesheet.namespaces["h"] = XHTML_NS
                    for rule in stylesheet.cssRules:
                        if rule.type == rule.IMPORT_RULE:
                            ihref = item.abshref(rule.href)
                            if rule.media.mediaText == "amzn-mobi":
                                continue
                            hrefs = self.oeb.manifest.hrefs
                            if ihref not in hrefs:
                                self.logger.warn("Ignoring missing stylesheet in @import rule:", rule.href)
                                continue
                            sitem = hrefs[ihref]
                            if sitem.media_type not in OEB_STYLES:
                                self.logger.warn("CSS @import of non-CSS file %r" % rule.href)
                                continue
                            stylesheets.append(sitem.data)
                    for rule in tuple(stylesheet.cssRules.rulesOfType(CSSRule.PAGE_RULE)):
                        stylesheet.cssRules.remove(rule)
                    # Make links to resources absolute, since these rules will
                    # be folded into a stylesheet at the root
                    replaceUrls(stylesheet, item.abshref, ignoreImportRules=True)
                    stylesheets.append(stylesheet)
            elif (
                elem.tag == XHTML("link")
                and elem.get("href")
                and elem.get("rel", "stylesheet").lower() == "stylesheet"
                and elem.get("type", CSS_MIME).lower() in OEB_STYLES
            ):
                href = urlnormalize(elem.attrib["href"])
                path = item.abshref(href)
                sitem = oeb.manifest.hrefs.get(path, None)
                if sitem is None:
                    self.logger.warn("Stylesheet %r referenced by file %r not in manifest" % (path, item.href))
                    continue
                if not hasattr(sitem.data, "cssRules"):
                    self.logger.warn("Stylesheet %r referenced by file %r is not CSS" % (path, item.href))
                    continue
                stylesheets.append(sitem.data)
        csses = {"extra_css": extra_css, "user_css": user_css}
        for w, x in csses.items():
            if x:
                try:
                    text = XHTML_CSS_NAMESPACE + x
                    stylesheet = parser.parseString(text, href=cssname, validate=False)
                    stylesheet.namespaces["h"] = XHTML_NS
                    stylesheets.append(stylesheet)
                except:
                    self.logger.exception("Failed to parse %s, ignoring." % w)
                    self.logger.debug("Bad css: ")
                    self.logger.debug(x)
        rules = []
        index = 0
        self.stylesheets = set()
        self.page_rule = {}
        for sheet_index, stylesheet in enumerate(stylesheets):
            href = stylesheet.href
            self.stylesheets.add(href)
            for rule in stylesheet.cssRules:
                if rule.type == rule.MEDIA_RULE:
                    media = {rule.media.item(i) for i in xrange(rule.media.length)}
                    if not media.intersection({"all", "screen", "amzn-kf8"}):
                        continue
                    for subrule in rule.cssRules:
                        rules.extend(self.flatten_rule(subrule, href, index, is_user_agent_sheet=sheet_index == 0))
                        index += 1
                else:
                    rules.extend(self.flatten_rule(rule, href, index, is_user_agent_sheet=sheet_index == 0))
                    index = index + 1
        rules.sort()
        self.rules = rules
        self._styles = {}
        pseudo_pat = re.compile(ur":(first-letter|first-line|link|hover|visited|active|focus|before|after)", re.I)
        for _, _, cssdict, text, _ in rules:
            fl = pseudo_pat.search(text)
            if fl is not None:
                text = text.replace(fl.group(), "")
            selector = get_css_selector(text, self.oeb.log)
            matches = selector(tree, self.logger)
            if fl is not None:
                fl = fl.group(1)
                if fl == "first-letter" and getattr(self.oeb, "plumber_output_format", "").lower() == u"mobi":
                    # Fake first-letter
                    from lxml.builder import ElementMaker

                    E = ElementMaker(namespace=XHTML_NS)
                    for elem in matches:
                        for x in elem.iter():
                            if x.text:
                                punctuation_chars = []
                                text = unicode(x.text)
                                while text:
                                    category = unicodedata.category(text[0])
                                    if category[0] not in {"P", "Z"}:
                                        break
                                    punctuation_chars.append(text[0])
                                    text = text[1:]

                                special_text = u"".join(punctuation_chars) + (text[0] if text else u"")
                                span = E.span(special_text)
                                span.tail = text[1:]
                                x.text = None
                                x.insert(0, span)
                                self.style(span)._update_cssdict(cssdict)
                                break
                else:  # Element pseudo-class
                    for elem in matches:
                        self.style(elem)._update_pseudo_class(fl, cssdict)
            else:
                for elem in matches:
                    self.style(elem)._update_cssdict(cssdict)
        for elem in xpath(tree, "//h:*[@style]"):
            self.style(elem)._apply_style_attr(url_replacer=item.abshref)
        num_pat = re.compile(r"[0-9.]+$")
        for elem in xpath(tree, "//h:img[@width or @height]"):
            style = self.style(elem)
            # Check if either height or width is not default
            is_styled = style._style.get("width", "auto") != "auto" or style._style.get("height", "auto") != "auto"
            if not is_styled:
                # Update img style dimension using width and height
                upd = {}
                for prop in ("width", "height"):
                    val = elem.get(prop, "").strip()
                    try:
                        del elem.attrib[prop]
                    except:
                        pass
                    if val:
                        if num_pat.match(val) is not None:
                            val += "px"
                        upd[prop] = val
                if upd:
                    style._update_cssdict(upd)
    def __init__(self,
                 tree,
                 path,
                 oeb,
                 opts,
                 profile=None,
                 extra_css='',
                 user_css=''):
        self.oeb, self.opts = oeb, opts
        self.profile = profile
        if self.profile is None:
            # Use the default profile. This should really be using
            # opts.output_profile, but I don't want to risk changing it, as
            # doing so might well have hard to debug font size effects.
            from calibre.customize.ui import output_profiles
            for x in output_profiles():
                if x.short_name == 'default':
                    self.profile = x
                    break
        if self.profile is None:
            # Just in case the default profile is removed in the future :)
            self.profile = opts.output_profile
        self.logger = oeb.logger
        item = oeb.manifest.hrefs[path]
        basename = os.path.basename(path)
        cssname = os.path.splitext(basename)[0] + '.css'
        stylesheets = [html_css_stylesheet()]
        head = xpath(tree, '/h:html/h:head')
        if head:
            head = head[0]
        else:
            head = []

        # Add cssutils parsing profiles from output_profile
        for profile in self.opts.output_profile.extra_css_modules:
            cssprofiles.addProfile(profile['name'], profile['props'],
                                   profile['macros'])

        parser = CSSParser(fetcher=self._fetch_css_file,
                           log=logging.getLogger('calibre.css'))
        self.font_face_rules = []
        for elem in head:
            if (elem.tag == XHTML('style')
                    and elem.get('type', CSS_MIME) in OEB_STYLES):
                text = elem.text if elem.text else u''
                for x in elem:
                    t = getattr(x, 'text', None)
                    if t:
                        text += u'\n\n' + force_unicode(t, u'utf-8')
                    t = getattr(x, 'tail', None)
                    if t:
                        text += u'\n\n' + force_unicode(t, u'utf-8')
                if text:
                    text = XHTML_CSS_NAMESPACE + text
                    text = oeb.css_preprocessor(text)
                    stylesheet = parser.parseString(text,
                                                    href=cssname,
                                                    validate=False)
                    stylesheet.namespaces['h'] = XHTML_NS
                    stylesheets.append(stylesheet)
                    # Make links to resources absolute, since these rules will
                    # be folded into a stylesheet at the root
                    replaceUrls(stylesheet,
                                item.abshref,
                                ignoreImportRules=True)
            elif elem.tag == XHTML('link') and elem.get('href') \
                 and elem.get('rel', 'stylesheet').lower() == 'stylesheet' \
                 and elem.get('type', CSS_MIME).lower() in OEB_STYLES:
                href = urlnormalize(elem.attrib['href'])
                path = item.abshref(href)
                sitem = oeb.manifest.hrefs.get(path, None)
                if sitem is None:
                    self.logger.warn(
                        'Stylesheet %r referenced by file %r not in manifest' %
                        (path, item.href))
                    continue
                if not hasattr(sitem.data, 'cssRules'):
                    self.logger.warn(
                        'Stylesheet %r referenced by file %r is not CSS' %
                        (path, item.href))
                    continue
                stylesheets.append(sitem.data)
        csses = {'extra_css': extra_css, 'user_css': user_css}
        for w, x in csses.items():
            if x:
                try:
                    text = XHTML_CSS_NAMESPACE + x
                    stylesheet = parser.parseString(text,
                                                    href=cssname,
                                                    validate=False)
                    stylesheet.namespaces['h'] = XHTML_NS
                    stylesheets.append(stylesheet)
                except:
                    self.logger.exception('Failed to parse %s, ignoring.' % w)
                    self.logger.debug('Bad css: ')
                    self.logger.debug(x)
        rules = []
        index = 0
        self.stylesheets = set()
        self.page_rule = {}
        for stylesheet in stylesheets:
            href = stylesheet.href
            self.stylesheets.add(href)
            for rule in stylesheet.cssRules:
                rules.extend(self.flatten_rule(rule, href, index))
                index = index + 1
        rules.sort()
        self.rules = rules
        self._styles = {}
        for _, _, cssdict, text, _ in rules:
            fl = ':first-letter' in text
            if fl:
                text = text.replace(':first-letter', '')
            selector = get_css_selector(text)
            matches = selector(tree, self.logger)
            if fl:
                from lxml.builder import ElementMaker
                E = ElementMaker(namespace=XHTML_NS)
                for elem in matches:
                    for x in elem.iter():
                        if x.text:
                            punctuation_chars = []
                            text = unicode(x.text)
                            while text:
                                if not unicodedata.category(
                                        text[0]).startswith('P'):
                                    break
                                punctuation_chars.append(text[0])
                                text = text[1:]

                            special_text = u''.join(punctuation_chars) + \
                                    (text[0] if text else u'')
                            span = E.span(special_text)
                            span.tail = text[1:]
                            x.text = None
                            x.insert(0, span)
                            self.style(span)._update_cssdict(cssdict)
                            break
            else:
                for elem in matches:
                    self.style(elem)._update_cssdict(cssdict)
        for elem in xpath(tree, '//h:*[@style]'):
            self.style(elem)._apply_style_attr(url_replacer=item.abshref)
        num_pat = re.compile(r'\d+$')
        for elem in xpath(tree, '//h:img[@width or @height]'):
            style = self.style(elem)
            # Check if either height or width is not default
            is_styled = style._style.get('width', 'auto') != 'auto' or \
                    style._style.get('height', 'auto') != 'auto'
            if not is_styled:
                # Update img style dimension using width and height
                upd = {}
                for prop in ('width', 'height'):
                    val = elem.get(prop, '').strip()
                    try:
                        del elem.attrib[prop]
                    except:
                        pass
                    if val:
                        if num_pat.match(val) is not None:
                            val += 'px'
                        upd[prop] = val
                if upd:
                    style._update_cssdict(upd)
Beispiel #16
0
    def __init__(self, tree, path, oeb, opts, profile=None,
            extra_css='', user_css='', base_css=''):
        self.oeb, self.opts = oeb, opts
        self.profile = profile
        if self.profile is None:
            # Use the default profile. This should really be using
            # opts.output_profile, but I don't want to risk changing it, as
            # doing so might well have hard to debug font size effects.
            from calibre.customize.ui import output_profiles
            for x in output_profiles():
                if x.short_name == 'default':
                    self.profile = x
                    break
        if self.profile is None:
            # Just in case the default profile is removed in the future :)
            self.profile = opts.output_profile
        self.body_font_size = self.profile.fbase
        self.logger = oeb.logger
        item = oeb.manifest.hrefs[path]
        basename = os.path.basename(path)
        cssname = os.path.splitext(basename)[0] + '.css'
        stylesheets = [html_css_stylesheet()]
        if base_css:
            stylesheets.append(parseString(base_css, validate=False))
        style_tags = xpath(tree, '//*[local-name()="style" or local-name()="link"]')

        # Add cssutils parsing profiles from output_profile
        for profile in self.opts.output_profile.extra_css_modules:
            cssprofiles.addProfile(profile['name'],
                                        profile['props'],
                                        profile['macros'])

        parser = CSSParser(fetcher=self._fetch_css_file,
                log=logging.getLogger('calibre.css'))
        self.font_face_rules = []
        for elem in style_tags:
            if (elem.tag == XHTML('style') and
                elem.get('type', CSS_MIME) in OEB_STYLES):
                text = elem.text if elem.text else u''
                for x in elem:
                    t = getattr(x, 'text', None)
                    if t:
                        text += u'\n\n' + force_unicode(t, u'utf-8')
                    t = getattr(x, 'tail', None)
                    if t:
                        text += u'\n\n' + force_unicode(t, u'utf-8')
                if text:
                    text = oeb.css_preprocessor(text)
                    # We handle @import rules separately
                    parser.setFetcher(lambda x: ('utf-8', b''))
                    stylesheet = parser.parseString(text, href=cssname,
                            validate=False)
                    parser.setFetcher(self._fetch_css_file)
                    for rule in stylesheet.cssRules:
                        if rule.type == rule.IMPORT_RULE:
                            ihref = item.abshref(rule.href)
                            if rule.media.mediaText == 'amzn-mobi':
                                continue
                            hrefs = self.oeb.manifest.hrefs
                            if ihref not in hrefs:
                                self.logger.warn('Ignoring missing stylesheet in @import rule:', rule.href)
                                continue
                            sitem = hrefs[ihref]
                            if sitem.media_type not in OEB_STYLES:
                                self.logger.warn('CSS @import of non-CSS file %r' % rule.href)
                                continue
                            stylesheets.append(sitem.data)
                    for rule in tuple(stylesheet.cssRules.rulesOfType(CSSRule.PAGE_RULE)):
                        stylesheet.cssRules.remove(rule)
                    # Make links to resources absolute, since these rules will
                    # be folded into a stylesheet at the root
                    replaceUrls(stylesheet, item.abshref,
                            ignoreImportRules=True)
                    stylesheets.append(stylesheet)
            elif elem.tag == XHTML('link') and elem.get('href') \
                 and elem.get('rel', 'stylesheet').lower() == 'stylesheet' \
                 and elem.get('type', CSS_MIME).lower() in OEB_STYLES:
                href = urlnormalize(elem.attrib['href'])
                path = item.abshref(href)
                sitem = oeb.manifest.hrefs.get(path, None)
                if sitem is None:
                    self.logger.warn(
                        'Stylesheet %r referenced by file %r not in manifest' %
                        (path, item.href))
                    continue
                if not hasattr(sitem.data, 'cssRules'):
                    self.logger.warn(
                    'Stylesheet %r referenced by file %r is not CSS'%(path,
                        item.href))
                    continue
                stylesheets.append(sitem.data)
        csses = {'extra_css':extra_css, 'user_css':user_css}
        for w, x in csses.items():
            if x:
                try:
                    text = x
                    stylesheet = parser.parseString(text, href=cssname,
                            validate=False)
                    stylesheets.append(stylesheet)
                except:
                    self.logger.exception('Failed to parse %s, ignoring.'%w)
                    self.logger.debug('Bad css: ')
                    self.logger.debug(x)
        rules = []
        index = 0
        self.stylesheets = set()
        self.page_rule = {}
        for sheet_index, stylesheet in enumerate(stylesheets):
            href = stylesheet.href
            self.stylesheets.add(href)
            for rule in stylesheet.cssRules:
                if rule.type == rule.MEDIA_RULE:
                    media = {rule.media.item(i) for i in
                             xrange(rule.media.length)}
                    if not media.intersection({'all', 'screen', 'amzn-kf8'}):
                        continue
                    for subrule in rule.cssRules:
                        rules.extend(self.flatten_rule(subrule, href, index, is_user_agent_sheet=sheet_index==0))
                        index += 1
                else:
                    rules.extend(self.flatten_rule(rule, href, index, is_user_agent_sheet=sheet_index==0))
                    index = index + 1
        rules.sort()
        self.rules = rules
        self._styles = {}
        pseudo_pat = re.compile(ur':{1,2}(%s)' % ('|'.join(INAPPROPRIATE_PSEUDO_CLASSES)), re.I)
        select = Select(tree, ignore_inappropriate_pseudo_classes=True)

        for _, _, cssdict, text, _ in rules:
            fl = pseudo_pat.search(text)
            try:
                matches = tuple(select(text))
            except SelectorError as err:
                self.logger.error('Ignoring CSS rule with invalid selector: %r (%s)' % (text, as_unicode(err)))
                continue

            if fl is not None:
                fl = fl.group(1)
                if fl == 'first-letter' and getattr(self.oeb,
                        'plumber_output_format', '').lower() in {u'mobi', u'docx'}:
                    # Fake first-letter
                    from lxml.builder import ElementMaker
                    E = ElementMaker(namespace=XHTML_NS)
                    for elem in matches:
                        for x in elem.iter('*'):
                            if x.text:
                                punctuation_chars = []
                                text = unicode(x.text)
                                while text:
                                    category = unicodedata.category(text[0])
                                    if category[0] not in {'P', 'Z'}:
                                        break
                                    punctuation_chars.append(text[0])
                                    text = text[1:]

                                special_text = u''.join(punctuation_chars) + \
                                        (text[0] if text else u'')
                                span = E.span(special_text)
                                span.set('data-fake-first-letter', '1')
                                span.tail = text[1:]
                                x.text = None
                                x.insert(0, span)
                                self.style(span)._update_cssdict(cssdict)
                                break
                else:  # Element pseudo-class
                    for elem in matches:
                        self.style(elem)._update_pseudo_class(fl, cssdict)
            else:
                for elem in matches:
                    self.style(elem)._update_cssdict(cssdict)
        for elem in xpath(tree, '//h:*[@style]'):
            self.style(elem)._apply_style_attr(url_replacer=item.abshref)
        num_pat = re.compile(r'[0-9.]+$')
        for elem in xpath(tree, '//h:img[@width or @height]'):
            style = self.style(elem)
            # Check if either height or width is not default
            is_styled = style._style.get('width', 'auto') != 'auto' or \
                    style._style.get('height', 'auto') != 'auto'
            if not is_styled:
                # Update img style dimension using width and height
                upd = {}
                for prop in ('width', 'height'):
                    val = elem.get(prop, '').strip()
                    try:
                        del elem.attrib[prop]
                    except:
                        pass
                    if val:
                        if num_pat.match(val) is not None:
                            val += 'px'
                        upd[prop] = val
                if upd:
                    style._update_cssdict(upd)
Beispiel #17
0
def render_jacket(mi):
    ps = load_defaults('page_setup')
    op = ps.get('output_profile', 'default')
    opmap = {x.short_name: x for x in output_profiles()}
    output_profile = opmap.get(op, opmap['default'])
    return render(mi, output_profile)
Beispiel #18
0
    def __init__(self, input, output, log, report_progress=DummyReporter(),
            dummy=False, merge_plugin_recs=True, abort_after_input_dump=False,
            override_input_metadata=False):
        '''
        :param input: Path to input file.
        :param output: Path to output file/directory
        '''
        if isbytestring(input):
            input = input.decode(filesystem_encoding)
        if isbytestring(output):
            output = output.decode(filesystem_encoding)
        self.original_input_arg = input
        self.input = os.path.abspath(input)
        self.output = os.path.abspath(output)
        self.log = log
        self.ui_reporter = report_progress
        self.abort_after_input_dump = abort_after_input_dump
        self.override_input_metadata = override_input_metadata

        # Pipeline options {{{
        # Initialize the conversion options that are independent of input and
        # output formats. The input and output plugins can still disable these
        # options via recommendations.
        self.pipeline_options = [

OptionRecommendation(name='verbose',
            recommended_value=0, level=OptionRecommendation.LOW,
            short_switch='v',
            help=_('Level of verbosity. Specify multiple times for greater '
                   'verbosity.')
        ),

OptionRecommendation(name='debug_pipeline',
            recommended_value=None, level=OptionRecommendation.LOW,
            short_switch='d',
            help=_('Save the output from different stages of the conversion '
                   'pipeline to the specified '
                   'directory. Useful if you are unsure at which stage '
                   'of the conversion process a bug is occurring.')
        ),

OptionRecommendation(name='input_profile',
            recommended_value='default', level=OptionRecommendation.LOW,
            choices=[x.short_name for x in input_profiles()],
            help=_('Specify the input profile. The input profile gives the '
                   'conversion system information on how to interpret '
                   'various information in the input document. For '
                   'example resolution dependent lengths (i.e. lengths in '
                   'pixels). Choices are:')+\
                        ', '.join([x.short_name for x in input_profiles()])
        ),

OptionRecommendation(name='output_profile',
            recommended_value='default', level=OptionRecommendation.LOW,
            choices=[x.short_name for x in output_profiles()],
            help=_('Specify the output profile. The output profile '
                   'tells the conversion system how to optimize the '
                   'created document for the specified device. In some cases, '
                   'an output profile is required to produce documents that '
                   'will work on a device. For example EPUB on the SONY reader. '
                   'Choices are:') + \
                           ', '.join([x.short_name for x in output_profiles()])
        ),

OptionRecommendation(name='base_font_size',
            recommended_value=0, level=OptionRecommendation.LOW,
            help=_('The base font size in pts. All font sizes in the produced book '
                   'will be rescaled based on this size. By choosing a larger '
                   'size you can make the fonts in the output bigger and vice '
                   'versa. By default, the base font size is chosen based on '
                   'the output profile you chose.'
                   )
        ),

OptionRecommendation(name='font_size_mapping',
            recommended_value=None, level=OptionRecommendation.LOW,
            help=_('Mapping from CSS font names to font sizes in pts. '
                   'An example setting is 12,12,14,16,18,20,22,24. '
                   'These are the mappings for the sizes xx-small to xx-large, '
                   'with the final size being for huge fonts. The font '
                   'rescaling algorithm uses these sizes to intelligently '
                   'rescale fonts. The default is to use a mapping based on '
                   'the output profile you chose.'
                   )
        ),

OptionRecommendation(name='disable_font_rescaling',
            recommended_value=False, level=OptionRecommendation.LOW,
            help=_('Disable all rescaling of font sizes.'
                   )
        ),

OptionRecommendation(name='minimum_line_height',
            recommended_value=120.0, level=OptionRecommendation.LOW,
            help=_(
            'The minimum line height, as a percentage of the element\'s '
            'calculated font size. calibre will ensure that every element '
            'has a line height of at least this setting, irrespective of '
            'what the input document specifies. Set to zero to disable. '
            'Default is 120%. Use this setting in preference to '
            'the direct line height specification, unless you know what '
            'you are doing. For example, you can achieve "double spaced" '
            'text by setting this to 240.'
            )
        ),


OptionRecommendation(name='line_height',
            recommended_value=0, level=OptionRecommendation.LOW,
            help=_(
            'The line height in pts. Controls spacing between consecutive '
            'lines of text. Only applies to elements that do not define '
            'their own line height. In most cases, the minimum line height '
            'option is more useful. '
            'By default no line height manipulation is performed.'
            )
        ),

OptionRecommendation(name='linearize_tables',
            recommended_value=False, level=OptionRecommendation.LOW,
            help=_('Some badly designed documents use tables to control the '
                'layout of text on the page. When converted these documents '
                'often have text that runs off the page and other artifacts. '
                'This option will extract the content from the tables and '
                'present it in a linear fashion.'
                )
        ),

OptionRecommendation(name='level1_toc',
            recommended_value=None, level=OptionRecommendation.LOW,
            help=_('XPath expression that specifies all tags that '
            'should be added to the Table of Contents at level one. If '
            'this is specified, it takes precedence over other forms '
            'of auto-detection.'
            ' See the XPath Tutorial in the calibre User Manual for examples.'
                )
        ),

OptionRecommendation(name='level2_toc',
            recommended_value=None, level=OptionRecommendation.LOW,
            help=_('XPath expression that specifies all tags that should be '
            'added to the Table of Contents at level two. Each entry is added '
            'under the previous level one entry.'
            ' See the XPath Tutorial in the calibre User Manual for examples.'
                )
        ),

OptionRecommendation(name='level3_toc',
            recommended_value=None, level=OptionRecommendation.LOW,
            help=_('XPath expression that specifies all tags that should be '
                'added to the Table of Contents at level three. Each entry '
                'is added under the previous level two entry.'
            ' See the XPath Tutorial in the calibre User Manual for examples.'
                )
        ),

OptionRecommendation(name='use_auto_toc',
            recommended_value=False, level=OptionRecommendation.LOW,
            help=_('Normally, if the source file already has a Table of '
            'Contents, it is used in preference to the auto-generated one. '
            'With this option, the auto-generated one is always used.'
                )
        ),

OptionRecommendation(name='no_chapters_in_toc',
            recommended_value=False, level=OptionRecommendation.LOW,
            help=_("Don't add auto-detected chapters to the Table of "
            'Contents.'
                )
        ),

OptionRecommendation(name='toc_threshold',
            recommended_value=6, level=OptionRecommendation.LOW,
            help=_(
        'If fewer than this number of chapters is detected, then links '
        'are added to the Table of Contents. Default: %default')
        ),

OptionRecommendation(name='max_toc_links',
            recommended_value=50, level=OptionRecommendation.LOW,
            help=_('Maximum number of links to insert into the TOC. Set to 0 '
               'to disable. Default is: %default. Links are only added to the '
            'TOC if less than the threshold number of chapters were detected.'
                )
        ),

OptionRecommendation(name='toc_filter',
            recommended_value=None, level=OptionRecommendation.LOW,
            help=_('Remove entries from the Table of Contents whose titles '
            'match the specified regular expression. Matching entries and all '
            'their children are removed.'
                )
        ),

OptionRecommendation(name='duplicate_links_in_toc',
            recommended_value=False, level=OptionRecommendation.LOW,
            help=_('When creating a TOC from links in the input document, '
                'allow duplicate entries, i.e. allow more than one entry '
                'with the same text, provided that they point to a '
                'different location.')
        ),


OptionRecommendation(name='chapter',
        recommended_value="//*[((name()='h1' or name()='h2') and "
              r"re:test(., '\s*((chapter|book|section|part)\s+)|((prolog|prologue|epilogue)(\s+|$))', 'i')) or @class "
              "= 'chapter']", level=OptionRecommendation.LOW,
            help=_('An XPath expression to detect chapter titles. The default '
                'is to consider <h1> or <h2> tags that contain the words '
                '"chapter","book","section", "prologue", "epilogue", or "part" as chapter titles as '
                'well as any tags that have class="chapter". The expression '
                'used must evaluate to a list of elements. To disable chapter '
                'detection, use the expression "/". See the XPath Tutorial '
                'in the calibre User Manual for further help on using this '
                'feature.'
                )
        ),

OptionRecommendation(name='chapter_mark',
            recommended_value='pagebreak', level=OptionRecommendation.LOW,
            choices=['pagebreak', 'rule', 'both', 'none'],
            help=_('Specify how to mark detected chapters. A value of '
                    '"pagebreak" will insert page breaks before chapters. '
                    'A value of "rule" will insert a line before chapters. '
                    'A value of "none" will disable chapter marking and a '
                    'value of "both" will use both page breaks and lines '
                    'to mark chapters.')
        ),

OptionRecommendation(name='extra_css',
            recommended_value=None, level=OptionRecommendation.LOW,
            help=_('Either the path to a CSS stylesheet or raw CSS. '
                'This CSS will be appended to the style rules from '
                'the source file, so it can be used to override those '
                'rules.')
        ),

OptionRecommendation(name='filter_css',
            recommended_value=None, level=OptionRecommendation.LOW,
            help=_('A comma separated list of CSS properties that '
                'will be removed from all CSS style rules. This is useful '
                'if the presence of some style information prevents it '
                'from being overridden on your device. '
                'For example: '
                'font-family,color,margin-left,margin-right')
        ),

OptionRecommendation(name='page_breaks_before',
            recommended_value="//*[name()='h1' or name()='h2']",
            level=OptionRecommendation.LOW,
            help=_('An XPath expression. Page breaks are inserted '
            'before the specified elements.')
        ),

OptionRecommendation(name='remove_fake_margins',
            recommended_value=True, level=OptionRecommendation.LOW,
            help=_('Some documents specify page margins by '
                'specifying a left and right margin on each individual '
                'paragraph. calibre will try to detect and remove these '
                'margins. Sometimes, this can cause the removal of '
                'margins that should not have been removed. In this '
                'case you can disable the removal.')
        ),


OptionRecommendation(name='margin_top',
        recommended_value=5.0, level=OptionRecommendation.LOW,
        help=_('Set the top margin in pts. Default is %default. '
            'Setting this to less than zero will cause no margin to be set. '
            'Note: 72 pts equals 1 inch')),

OptionRecommendation(name='margin_bottom',
        recommended_value=5.0, level=OptionRecommendation.LOW,
        help=_('Set the bottom margin in pts. Default is %default. '
            'Setting this to less than zero will cause no margin to be set. '
            'Note: 72 pts equals 1 inch')),

OptionRecommendation(name='margin_left',
        recommended_value=5.0, level=OptionRecommendation.LOW,
        help=_('Set the left margin in pts. Default is %default. '
            'Setting this to less than zero will cause no margin to be set. '
            'Note: 72 pts equals 1 inch')),

OptionRecommendation(name='margin_right',
        recommended_value=5.0, level=OptionRecommendation.LOW,
        help=_('Set the right margin in pts. Default is %default. '
            'Setting this to less than zero will cause no margin to be set. '
            'Note: 72 pts equals 1 inch')),

OptionRecommendation(name='change_justification',
        recommended_value='original', level=OptionRecommendation.LOW,
        choices=['left','justify','original'],
        help=_('Change text justification. A value of "left" converts all'
            ' justified text in the source to left aligned (i.e. '
            'unjustified) text. A value of "justify" converts all '
            'unjustified text to justified. A value of "original" '
            '(the default) does not change justification in the '
            'source file. Note that only some output formats support '
            'justification.')),

OptionRecommendation(name='remove_paragraph_spacing',
        recommended_value=False, level=OptionRecommendation.LOW,
        help=_('Remove spacing between paragraphs. Also sets an indent on '
        'paragraphs of 1.5em. Spacing removal will not work '
        'if the source file does not use paragraphs (<p> or <div> tags).')
        ),

OptionRecommendation(name='remove_paragraph_spacing_indent_size',
        recommended_value=1.5, level=OptionRecommendation.LOW,
        help=_('When calibre removes blank lines between paragraphs, it automatically '
            'sets a paragraph indent, to ensure that paragraphs can be easily '
            'distinguished. This option controls the width of that indent (in em). '
            'If you set this value negative, then the indent specified in the input '
            'document is used, that is, calibre does not change the indentation.')
        ),

OptionRecommendation(name='prefer_metadata_cover',
        recommended_value=False, level=OptionRecommendation.LOW,
        help=_('Use the cover detected from the source file in preference '
        'to the specified cover.')
        ),

OptionRecommendation(name='insert_blank_line',
        recommended_value=False, level=OptionRecommendation.LOW,
        help=_('Insert a blank line between paragraphs. Will not work '
            'if the source file does not use paragraphs (<p> or <div> tags).'
            )
        ),

OptionRecommendation(name='insert_blank_line_size',
        recommended_value=0.5, level=OptionRecommendation.LOW,
        help=_('Set the height of the inserted blank lines (in em).'
            ' The height of the lines between paragraphs will be twice the value'
            ' set here.')
        ),

OptionRecommendation(name='remove_first_image',
        recommended_value=False, level=OptionRecommendation.LOW,
        help=_('Remove the first image from the input ebook. Useful if the '
        'input document has a cover image that is not identified as a cover. '
        'In this case, if you set a cover in calibre, the output document will '
        'end up with two cover images if you do not specify this option.'
            )
        ),

OptionRecommendation(name='insert_metadata',
        recommended_value=False, level=OptionRecommendation.LOW,
        help=_('Insert the book metadata at the start of '
            'the book. This is useful if your ebook reader does not support '
            'displaying/searching metadata directly.'
            )
        ),

OptionRecommendation(name='smarten_punctuation',
        recommended_value=False, level=OptionRecommendation.LOW,
        help=_('Convert plain quotes, dashes and ellipsis to their '
            'typographically correct equivalents. For details, see '
            'http://daringfireball.net/projects/smartypants'
            )
        ),

OptionRecommendation(name='unsmarten_punctuation',
        recommended_value=False, level=OptionRecommendation.LOW,
        help=_('Convert fancy quotes, dashes and ellipsis to their '
               'plain equivalents.'
            )
        ),

OptionRecommendation(name='read_metadata_from_opf',
            recommended_value=None, level=OptionRecommendation.LOW,
            short_switch='m',
            help=_('Read metadata from the specified OPF file. Metadata read '
                   'from this file will override any metadata in the source '
                   'file.')
        ),

OptionRecommendation(name='asciiize',
        recommended_value=False, level=OptionRecommendation.LOW,
        help=(_('Transliterate unicode characters to an ASCII '
            'representation. Use with care because this will replace '
            'unicode characters with ASCII. For instance it will replace "%s" '
            'with "Mikhail Gorbachiov". Also, note that in '
            'cases where there are multiple representations of a character '
            '(characters shared by Chinese and Japanese for instance) the '
            'representation based on the current calibre interface language will be '
            'used.')%\
            u'\u041c\u0438\u0445\u0430\u0438\u043b '
            u'\u0413\u043e\u0440\u0431\u0430\u0447\u0451\u0432'
)
        ),

OptionRecommendation(name='keep_ligatures',
            recommended_value=False, level=OptionRecommendation.LOW,
            help=_('Preserve ligatures present in the input document. '
                'A ligature is a special rendering of a pair of '
                'characters like ff, fi, fl et cetera. '
                'Most readers do not have support for '
                'ligatures in their default fonts, so they are '
                'unlikely to render correctly. By default, calibre '
                'will turn a ligature into the corresponding pair of normal '
                'characters. This option will preserve them instead.')
        ),

OptionRecommendation(name='title',
    recommended_value=None, level=OptionRecommendation.LOW,
    help=_('Set the title.')),

OptionRecommendation(name='authors',
    recommended_value=None, level=OptionRecommendation.LOW,
    help=_('Set the authors. Multiple authors should be separated by '
    'ampersands.')),

OptionRecommendation(name='title_sort',
    recommended_value=None, level=OptionRecommendation.LOW,
    help=_('The version of the title to be used for sorting. ')),

OptionRecommendation(name='author_sort',
    recommended_value=None, level=OptionRecommendation.LOW,
    help=_('String to be used when sorting by author. ')),

OptionRecommendation(name='cover',
    recommended_value=None, level=OptionRecommendation.LOW,
    help=_('Set the cover to the specified file or URL')),

OptionRecommendation(name='comments',
    recommended_value=None, level=OptionRecommendation.LOW,
    help=_('Set the ebook description.')),

OptionRecommendation(name='publisher',
    recommended_value=None, level=OptionRecommendation.LOW,
    help=_('Set the ebook publisher.')),

OptionRecommendation(name='series',
    recommended_value=None, level=OptionRecommendation.LOW,
    help=_('Set the series this ebook belongs to.')),

OptionRecommendation(name='series_index',
    recommended_value=None, level=OptionRecommendation.LOW,
    help=_('Set the index of the book in this series.')),

OptionRecommendation(name='rating',
    recommended_value=None, level=OptionRecommendation.LOW,
    help=_('Set the rating. Should be a number between 1 and 5.')),

OptionRecommendation(name='isbn',
    recommended_value=None, level=OptionRecommendation.LOW,
    help=_('Set the ISBN of the book.')),

OptionRecommendation(name='tags',
    recommended_value=None, level=OptionRecommendation.LOW,
    help=_('Set the tags for the book. Should be a comma separated list.')),

OptionRecommendation(name='book_producer',
    recommended_value=None, level=OptionRecommendation.LOW,
    help=_('Set the book producer.')),

OptionRecommendation(name='language',
    recommended_value=None, level=OptionRecommendation.LOW,
    help=_('Set the language.')),

OptionRecommendation(name='pubdate',
    recommended_value=None, level=OptionRecommendation.LOW,
    help=_('Set the publication date.')),

OptionRecommendation(name='timestamp',
    recommended_value=None, level=OptionRecommendation.LOW,
    help=_('Set the book timestamp (no longer used anywhere)')),

OptionRecommendation(name='enable_heuristics',
    recommended_value=False, level=OptionRecommendation.LOW,
    help=_('Enable heuristic processing. This option must be set for any '
           'heuristic processing to take place.')),

OptionRecommendation(name='markup_chapter_headings',
    recommended_value=True, level=OptionRecommendation.LOW,
    help=_('Detect unformatted chapter headings and sub headings. Change '
           'them to h2 and h3 tags.  This setting will not create a TOC, '
           'but can be used in conjunction with structure detection to create '
           'one.')),

OptionRecommendation(name='italicize_common_cases',
    recommended_value=True, level=OptionRecommendation.LOW,
    help=_('Look for common words and patterns that denote '
           'italics and italicize them.')),

OptionRecommendation(name='fix_indents',
    recommended_value=True, level=OptionRecommendation.LOW,
    help=_('Turn indentation created from multiple non-breaking space entities '
           'into CSS indents.')),

OptionRecommendation(name='html_unwrap_factor',
    recommended_value=0.40, level=OptionRecommendation.LOW,
    help=_('Scale used to determine the length at which a line should '
            'be unwrapped. Valid values are a decimal between 0 and 1. The '
            'default is 0.4, just below the median line length.  If only a '
            'few lines in the document require unwrapping this value should '
            'be reduced')),

OptionRecommendation(name='unwrap_lines',
    recommended_value=True, level=OptionRecommendation.LOW,
    help=_('Unwrap lines using punctuation and other formatting clues.')),

OptionRecommendation(name='delete_blank_paragraphs',
    recommended_value=True, level=OptionRecommendation.LOW,
    help=_('Remove empty paragraphs from the document when they exist between '
           'every other paragraph')),

OptionRecommendation(name='format_scene_breaks',
    recommended_value=True, level=OptionRecommendation.LOW,
    help=_('Left aligned scene break markers are center aligned. '
           'Replace soft scene breaks that use multiple blank lines with '
           'horizontal rules.')),

OptionRecommendation(name='replace_scene_breaks',
    recommended_value='', level=OptionRecommendation.LOW,
    help=_('Replace scene breaks with the specified text. By default, the '
        'text from the input document is used.')),

OptionRecommendation(name='dehyphenate',
    recommended_value=True, level=OptionRecommendation.LOW,
    help=_('Analyze hyphenated words throughout the document.  The '
           'document itself is used as a dictionary to determine whether hyphens '
           'should be retained or removed.')),

OptionRecommendation(name='renumber_headings',
    recommended_value=True, level=OptionRecommendation.LOW,
    help=_('Looks for occurrences of sequential <h1> or <h2> tags. '
           'The tags are renumbered to prevent splitting in the middle '
           'of chapter headings.')),

OptionRecommendation(name='sr1_search',
    recommended_value='', level=OptionRecommendation.LOW,
    help=_('Search pattern (regular expression) to be replaced with '
           'sr1-replace.')),

OptionRecommendation(name='sr1_replace',
    recommended_value='', level=OptionRecommendation.LOW,
    help=_('Replacement to replace the text found with sr1-search.')),

OptionRecommendation(name='sr2_search',
    recommended_value='', level=OptionRecommendation.LOW,
    help=_('Search pattern (regular expression) to be replaced with '
           'sr2-replace.')),

OptionRecommendation(name='sr2_replace',
    recommended_value='', level=OptionRecommendation.LOW,
    help=_('Replacement to replace the text found with sr2-search.')),

OptionRecommendation(name='sr3_search',
    recommended_value='', level=OptionRecommendation.LOW,
    help=_('Search pattern (regular expression) to be replaced with '
           'sr3-replace.')),

OptionRecommendation(name='sr3_replace',
    recommended_value='', level=OptionRecommendation.LOW,
    help=_('Replacement to replace the text found with sr3-search.')),

OptionRecommendation(name='search_replace',
    recommended_value=None, level=OptionRecommendation.LOW, help=_(
        'Path to a file containing search and replace regular expressions. '
        'The file must contain alternating lines of regular expression '
        'followed by replacement pattern (which can be an empty line). '
        'The regular expression must be in the python regex syntax and '
        'the file must be UTF-8 encoded.')),
]
        # }}}

        input_fmt = os.path.splitext(self.input)[1]
        if not input_fmt:
            raise ValueError('Input file must have an extension')
        input_fmt = input_fmt[1:].lower().replace('original_', '')
        self.archive_input_tdir = None
        if input_fmt in ARCHIVE_FMTS:
            self.log('Processing archive...')
            tdir = PersistentTemporaryDirectory('_plumber_archive')
            self.input, input_fmt = self.unarchive(self.input, tdir)
            self.archive_input_tdir = tdir
        if os.access(self.input, os.R_OK):
            nfp = run_plugins_on_preprocess(self.input, input_fmt)
            if nfp != self.input:
                self.input = nfp
                input_fmt = os.path.splitext(self.input)[1]
                if not input_fmt:
                    raise ValueError('Input file must have an extension')
                input_fmt = input_fmt[1:].lower()

        if os.path.exists(self.output) and os.path.isdir(self.output):
            output_fmt = 'oeb'
        else:
            output_fmt = os.path.splitext(self.output)[1]
            if not output_fmt:
                output_fmt = '.oeb'
            output_fmt = output_fmt[1:].lower()

        self.input_plugin  = plugin_for_input_format(input_fmt)
        self.output_plugin = plugin_for_output_format(output_fmt)

        if self.input_plugin is None:
            raise ValueError('No plugin to handle input format: '+input_fmt)

        if self.output_plugin is None:
            raise ValueError('No plugin to handle output format: '+output_fmt)

        self.input_fmt = input_fmt
        self.output_fmt = output_fmt


        self.all_format_options = set()
        self.input_options = set()
        self.output_options = set()
        # Build set of all possible options. Two options are equal if their
        # names are the same.
        if not dummy:
            self.input_options  = self.input_plugin.options.union(
                                        self.input_plugin.common_options)
            self.output_options = self.output_plugin.options.union(
                                    self.output_plugin.common_options)
        else:
            for fmt in available_input_formats():
                input_plugin = plugin_for_input_format(fmt)
                if input_plugin:
                    self.all_format_options = self.all_format_options.union(
                        input_plugin.options.union(input_plugin.common_options))
            for fmt in available_output_formats():
                output_plugin = plugin_for_output_format(fmt)
                if output_plugin:
                    self.all_format_options = self.all_format_options.union(
                        output_plugin.options.union(output_plugin.common_options))

        # Remove the options that have been disabled by recommendations from the
        # plugins.
        for w in ('input_options', 'output_options',
                'all_format_options'):
            temp = set([])
            for x in getattr(self, w):
                temp.add(x.clone())
            setattr(self, w, temp)
        if merge_plugin_recs:
            self.merge_plugin_recommendations()
Beispiel #19
0
    def __init__(self, tree, path, oeb, opts, profile=None,
            extra_css='', user_css=''):
        self.oeb, self.opts = oeb, opts
        self.profile = profile
        if self.profile is None:
            # Use the default profile. This should really be using
            # opts.output_profile, but I don't want to risk changing it, as
            # doing so might well have hard to debug font size effects.
            from calibre.customize.ui import output_profiles
            for x in output_profiles():
                if x.short_name == 'default':
                    self.profile = x
                    break
        if self.profile is None:
            # Just in case the default profile is removed in the future :)
            self.profile = opts.output_profile
        self.body_font_size = self.profile.fbase
        self.logger = oeb.logger
        item = oeb.manifest.hrefs[path]
        basename = os.path.basename(path)
        cssname = os.path.splitext(basename)[0] + '.css'
        stylesheets = [html_css_stylesheet()]
        head = xpath(tree, '/h:html/h:head')
        if head:
            head = head[0]
        else:
            head = []

        # Add cssutils parsing profiles from output_profile
        for profile in self.opts.output_profile.extra_css_modules:
            cssprofiles.addProfile(profile['name'],
                                        profile['props'],
                                        profile['macros'])

        parser = CSSParser(fetcher=self._fetch_css_file,
                log=logging.getLogger('calibre.css'))
        self.font_face_rules = []
        for elem in head:
            if (elem.tag == XHTML('style') and
                elem.get('type', CSS_MIME) in OEB_STYLES):
                text = elem.text if elem.text else u''
                for x in elem:
                    t = getattr(x, 'text', None)
                    if t:
                        text += u'\n\n' + force_unicode(t, u'utf-8')
                    t = getattr(x, 'tail', None)
                    if t:
                        text += u'\n\n' + force_unicode(t, u'utf-8')
                if text:
                    text = oeb.css_preprocessor(text, add_namespace=True)
                    # We handle @import rules separately
                    parser.setFetcher(lambda x: ('utf-8', b''))
                    stylesheet = parser.parseString(text, href=cssname,
                            validate=False)
                    parser.setFetcher(self._fetch_css_file)
                    stylesheet.namespaces['h'] = XHTML_NS
                    for rule in stylesheet.cssRules:
                        if rule.type == rule.IMPORT_RULE:
                            ihref = item.abshref(rule.href)
                            if rule.media.mediaText == 'amzn-mobi':
                                continue
                            hrefs = self.oeb.manifest.hrefs
                            if ihref not in hrefs:
                                self.logger.warn('Ignoring missing stylesheet in @import rule:', rule.href)
                                continue
                            sitem = hrefs[ihref]
                            if sitem.media_type not in OEB_STYLES:
                                self.logger.warn('CSS @import of non-CSS file %r' % rule.href)
                                continue
                            stylesheets.append(sitem.data)
                    # Make links to resources absolute, since these rules will
                    # be folded into a stylesheet at the root
                    replaceUrls(stylesheet, item.abshref,
                            ignoreImportRules=True)
                    stylesheets.append(stylesheet)
            elif elem.tag == XHTML('link') and elem.get('href') \
                 and elem.get('rel', 'stylesheet').lower() == 'stylesheet' \
                 and elem.get('type', CSS_MIME).lower() in OEB_STYLES:
                href = urlnormalize(elem.attrib['href'])
                path = item.abshref(href)
                sitem = oeb.manifest.hrefs.get(path, None)
                if sitem is None:
                    self.logger.warn(
                        'Stylesheet %r referenced by file %r not in manifest' %
                        (path, item.href))
                    continue
                if not hasattr(sitem.data, 'cssRules'):
                    self.logger.warn(
                    'Stylesheet %r referenced by file %r is not CSS'%(path,
                        item.href))
                    continue
                stylesheets.append(sitem.data)
        csses = {'extra_css':extra_css, 'user_css':user_css}
        for w, x in csses.items():
            if x:
                try:
                    text = XHTML_CSS_NAMESPACE + x
                    stylesheet = parser.parseString(text, href=cssname,
                            validate=False)
                    stylesheet.namespaces['h'] = XHTML_NS
                    stylesheets.append(stylesheet)
                except:
                    self.logger.exception('Failed to parse %s, ignoring.'%w)
                    self.logger.debug('Bad css: ')
                    self.logger.debug(x)
        rules = []
        index = 0
        self.stylesheets = set()
        self.page_rule = {}
        for stylesheet in stylesheets:
            href = stylesheet.href
            self.stylesheets.add(href)
            for rule in stylesheet.cssRules:
                if rule.type == rule.MEDIA_RULE:
                    media = {rule.media.item(i) for i in
                             xrange(rule.media.length)}
                    if not media.intersection({'all', 'screen', 'amzn-kf8'}):
                        continue
                    for subrule in rule.cssRules:
                        rules.extend(self.flatten_rule(subrule, href, index))
                        index += 1
                else:
                    rules.extend(self.flatten_rule(rule, href, index))
                    index = index + 1
        rules.sort()
        self.rules = rules
        self._styles = {}
        pseudo_pat = re.compile(ur':(first-letter|first-line|link|hover|visited|active|focus|before|after)', re.I)
        for _, _, cssdict, text, _ in rules:
            fl = pseudo_pat.search(text)
            if fl is not None:
                text = text.replace(fl.group(), '')
            selector = get_css_selector(text, self.oeb.log)
            matches = selector(tree, self.logger)
            if fl is not None:
                fl = fl.group(1)
                if fl == 'first-letter' and getattr(self.oeb,
                        'plumber_output_format', '').lower() == u'mobi':
                    # Fake first-letter
                    from lxml.builder import ElementMaker
                    E = ElementMaker(namespace=XHTML_NS)
                    for elem in matches:
                        for x in elem.iter():
                            if x.text:
                                punctuation_chars = []
                                text = unicode(x.text)
                                while text:
                                    category = unicodedata.category(text[0])
                                    if category[0] not in {'P', 'Z'}:
                                        break
                                    punctuation_chars.append(text[0])
                                    text = text[1:]

                                special_text = u''.join(punctuation_chars) + \
                                        (text[0] if text else u'')
                                span = E.span(special_text)
                                span.tail = text[1:]
                                x.text = None
                                x.insert(0, span)
                                self.style(span)._update_cssdict(cssdict)
                                break
                else:  # Element pseudo-class
                    for elem in matches:
                        self.style(elem)._update_pseudo_class(fl, cssdict)
            else:
                for elem in matches:
                    self.style(elem)._update_cssdict(cssdict)
        for elem in xpath(tree, '//h:*[@style]'):
            self.style(elem)._apply_style_attr(url_replacer=item.abshref)
        num_pat = re.compile(r'\d+$')
        for elem in xpath(tree, '//h:img[@width or @height]'):
            style = self.style(elem)
            # Check if either height or width is not default
            is_styled = style._style.get('width', 'auto') != 'auto' or \
                    style._style.get('height', 'auto') != 'auto'
            if not is_styled:
                # Update img style dimension using width and height
                upd = {}
                for prop in ('width', 'height'):
                    val = elem.get(prop, '').strip()
                    try:
                        del elem.attrib[prop]
                    except:
                        pass
                    if val:
                        if num_pat.match(val) is not None:
                            val += 'px'
                        upd[prop] = val
                if upd:
                    style._update_cssdict(upd)
Beispiel #20
0
def render_jacket(mi):
    ps = load_defaults('page_setup')
    op = ps.get('output_profile', 'default')
    opmap = {x.short_name:x for x in output_profiles()}
    output_profile = opmap.get(op, opmap['default'])
    return render(mi, output_profile)