def __init__(self, parent, get_option, get_help, db=None, book_id=None): self.__connections = [] Widget.__init__( self, parent, ["margin_top", "margin_left", "margin_right", "margin_bottom", "input_profile", "output_profile"], ) self.db, self.book_id = db, book_id self.input_model = ProfileModel(input_profiles()) self.output_model = ProfileModel(output_profiles()) self.opt_input_profile.setModel(self.input_model) self.opt_output_profile.setModel(self.output_model) for g, slot in self.__connections: g.selectionModel().currentChanged.connect(slot) del self.__connections for x in (self.opt_input_profile, self.opt_output_profile): x.setMouseTracking(True) self.connect(x, SIGNAL("entered(QModelIndex)"), self.show_desc) self.initialize_options(get_option, get_help, db, book_id) it = unicode(self.opt_input_profile.toolTip()) self.opt_input_profile.setToolTip("<p>" + it.replace("t.", "t.\n<br>")) it = unicode(self.opt_output_profile.toolTip()) self.opt_output_profile.setToolTip("<p>" + it.replace("t.", "ce.\n<br>"))
def __init__(self, parent, get_option, get_help, db=None, book_id=None): self.__connections = [] Widget.__init__(self, parent, [ 'margin_top', 'margin_left', 'margin_right', 'margin_bottom', 'input_profile', 'output_profile' ]) self.db, self.book_id = db, book_id self.input_model = ProfileModel(input_profiles()) self.output_model = ProfileModel(output_profiles()) self.opt_input_profile.setModel(self.input_model) self.opt_output_profile.setModel(self.output_model) for g, slot in self.__connections: g.selectionModel().currentChanged.connect(slot) del self.__connections for x in (self.opt_input_profile, self.opt_output_profile): x.setMouseTracking(True) x.entered[(QModelIndex)].connect(self.show_desc) self.initialize_options(get_option, get_help, db, book_id) it = str(self.opt_input_profile.toolTip()) self.opt_input_profile.setToolTip('<p>' + it.replace('t.', 't.\n<br>')) it = str(self.opt_output_profile.toolTip()) self.opt_output_profile.setToolTip('<p>' + it.replace('t.', 'ce.\n<br>'))
def get_profile_values(self): from calibre.ebooks.conversion.config import load_defaults recs = load_defaults('page_setup') pfname = recs.get('output_profile', 'default') from calibre.customize.ui import output_profiles for profile in output_profiles(): if profile.short_name == pfname: break dbase = profile.fbase fsizes = profile.fkey return dbase, fsizes
def profiles(): ans = getattr(profiles, 'ans', None) if ans is None: def desc(profile): w, h = profile.screen_size if w >= 10000: ss = _('unlimited') else: ss = _('%(width)d x %(height)d pixels') % dict(width=w, height=h) ss = _('Screen size: %s') % ss return {'name': profile.name, 'description': ('%s [%s]' % (profile.description, ss))} ans = profiles.ans = {} ans['input'] = {p.short_name: desc(p) for p in input_profiles()} ans['output'] = {p.short_name: desc(p) for p in output_profiles()} return ans
def render_jacket(container, jacket): mi = container.mi ps = load_defaults('page_setup') op = ps.get('output_profile', 'default') opmap = {x.short_name:x for x in output_profiles()} output_profile = opmap.get(op, opmap['default']) root = render(mi, output_profile) for img, path in referenced_images(root): container.log('Embedding referenced image: %s into jacket' % path) ext = path.rpartition('.')[-1] jacket_item = container.generate_item('jacket_image.'+ext, id_prefix='jacket_img') name = container.href_to_name(jacket_item.get('href'), container.opf_name) with open(path, 'rb') as f: container.parsed_cache[name] = f.read() container.commit_item(name) href = container.name_to_href(name, jacket) img.set('src', href) return root
def __init__(self, parent, get_option, get_help, db=None, book_id=None): self.__connections = [] Widget.__init__(self, parent, OPTIONS['pipe']['page_setup']) self.db, self.book_id = db, book_id self.input_model = ProfileModel(input_profiles()) self.output_model = ProfileModel(output_profiles()) self.opt_input_profile.setModel(self.input_model) self.opt_output_profile.setModel(self.output_model) for g, slot in self.__connections: g.selectionModel().currentChanged.connect(slot) del self.__connections for x in (self.opt_input_profile, self.opt_output_profile): x.setMouseTracking(True) x.entered[(QModelIndex)].connect(self.show_desc) self.initialize_options(get_option, get_help, db, book_id) self.opt_input_profile.setToolTip('') self.opt_output_profile.setToolTip('')
def set_image_settings(cls): from calibre.ebooks.conversion.config import load_defaults, save_defaults recs = load_defaults('comic_input') output_profile = "No output profile found." for profile in output_profiles(): if profile.short_name == cls.output_profile: output_profile = profile if getattr(output_profile, 'colors', 0): # Can also refer to grayscale shades recs['colors'] = getattr(output_profile, 'colors') if getattr(output_profile, 'supports_color', False): recs['dont_grayscale'] = True if getattr(output_profile, 'large_screen', False): recs['keep_aspect_ratio'] = True save_defaults('comic_input', recs)
def __init__(self, parent, get_option, get_help, db=None, book_id=None): self.__connections = [] Widget.__init__(self, parent, OPTIONS['pipe']['page_setup']) self.db, self.book_id = db, book_id self.input_model = ProfileModel(input_profiles()) self.output_model = ProfileModel(output_profiles()) self.opt_input_profile.setModel(self.input_model) self.opt_output_profile.setModel(self.output_model) for g, slot in self.__connections: g.selectionModel().currentChanged.connect(slot) del self.__connections for x in (self.opt_input_profile, self.opt_output_profile): x.setMouseTracking(True) x.entered[(QModelIndex)].connect(self.show_desc) self.initialize_options(get_option, get_help, db, book_id) it = unicode_type(self.opt_input_profile.toolTip()) self.opt_input_profile.setToolTip('<p>'+it.replace('t.','t.\n<br>')) it = unicode_type(self.opt_output_profile.toolTip()) self.opt_output_profile.setToolTip('<p>'+it.replace('t.','ce.\n<br>'))
def __init__(self, parent, get_option, get_help, db=None, book_id=None): self.__connections = [] Widget.__init__(self, parent, ['margin_top', 'margin_left', 'margin_right', 'margin_bottom', 'input_profile', 'output_profile'] ) self.db, self.book_id = db, book_id self.input_model = ProfileModel(input_profiles()) self.output_model = ProfileModel(output_profiles()) self.opt_input_profile.setModel(self.input_model) self.opt_output_profile.setModel(self.output_model) for g, slot in self.__connections: g.selectionModel().currentChanged.connect(slot) del self.__connections for x in (self.opt_input_profile, self.opt_output_profile): x.setMouseTracking(True) self.connect(x, SIGNAL('entered(QModelIndex)'), self.show_desc) self.initialize_options(get_option, get_help, db, book_id) it = unicode(self.opt_input_profile.toolTip()) self.opt_input_profile.setToolTip('<p>'+it.replace('t.','t.\n<br>')) it = unicode(self.opt_output_profile.toolTip()) self.opt_output_profile.setToolTip('<p>'+it.replace('t.','ce.\n<br>'))
def __init__(self, tree, path, oeb, opts, profile=None, extra_css='', user_css=''): self.oeb, self.opts = oeb, opts self.profile = profile if self.profile is None: # Use the default profile. This should really be using # opts.output_profile, but I don't want to risk changing it, as # doing so might well have hard to debug font size effects. from calibre.customize.ui import output_profiles for x in output_profiles(): if x.short_name == 'default': self.profile = x break if self.profile is None: # Just in case the default profile is removed in the future :) self.profile = opts.output_profile self.logger = oeb.logger item = oeb.manifest.hrefs[path] basename = os.path.basename(path) cssname = os.path.splitext(basename)[0] + '.css' stylesheets = [html_css_stylesheet()] head = xpath(tree, '/h:html/h:head') if head: head = head[0] else: head = [] # Add cssutils parsing profiles from output_profile for profile in self.opts.output_profile.extra_css_modules: cssprofiles.addProfile(profile['name'], profile['props'], profile['macros']) parser = CSSParser(fetcher=self._fetch_css_file, log=logging.getLogger('calibre.css')) self.font_face_rules = [] for elem in head: if (elem.tag == XHTML('style') and elem.get('type', CSS_MIME) in OEB_STYLES): text = elem.text if elem.text else u'' for x in elem: t = getattr(x, 'text', None) if t: text += u'\n\n' + force_unicode(t, u'utf-8') t = getattr(x, 'tail', None) if t: text += u'\n\n' + force_unicode(t, u'utf-8') if text: text = XHTML_CSS_NAMESPACE + text text = oeb.css_preprocessor(text) stylesheet = parser.parseString(text, href=cssname, validate=False) stylesheet.namespaces['h'] = XHTML_NS stylesheets.append(stylesheet) # Make links to resources absolute, since these rules will # be folded into a stylesheet at the root replaceUrls(stylesheet, item.abshref, ignoreImportRules=True) elif elem.tag == XHTML('link') and elem.get('href') \ and elem.get('rel', 'stylesheet').lower() == 'stylesheet' \ and elem.get('type', CSS_MIME).lower() in OEB_STYLES: href = urlnormalize(elem.attrib['href']) path = item.abshref(href) sitem = oeb.manifest.hrefs.get(path, None) if sitem is None: self.logger.warn( 'Stylesheet %r referenced by file %r not in manifest' % (path, item.href)) continue if not hasattr(sitem.data, 'cssRules'): self.logger.warn( 'Stylesheet %r referenced by file %r is not CSS'%(path, item.href)) continue stylesheets.append(sitem.data) csses = {'extra_css':extra_css, 'user_css':user_css} for w, x in csses.items(): if x: try: text = XHTML_CSS_NAMESPACE + x stylesheet = parser.parseString(text, href=cssname, validate=False) stylesheet.namespaces['h'] = XHTML_NS stylesheets.append(stylesheet) except: self.logger.exception('Failed to parse %s, ignoring.'%w) self.logger.debug('Bad css: ') self.logger.debug(x) rules = [] index = 0 self.stylesheets = set() self.page_rule = {} for stylesheet in stylesheets: href = stylesheet.href self.stylesheets.add(href) for rule in stylesheet.cssRules: rules.extend(self.flatten_rule(rule, href, index)) index = index + 1 rules.sort() self.rules = rules self._styles = {} for _, _, cssdict, text, _ in rules: fl = ':first-letter' in text if fl: text = text.replace(':first-letter', '') selector = get_css_selector(text) matches = selector(tree, self.logger) if fl: from lxml.builder import ElementMaker E = ElementMaker(namespace=XHTML_NS) for elem in matches: for x in elem.iter(): if x.text: punctuation_chars = [] text = unicode(x.text) while text: if not unicodedata.category(text[0]).startswith('P'): break punctuation_chars.append(text[0]) text = text[1:] special_text = u''.join(punctuation_chars) + \ (text[0] if text else u'') span = E.span(special_text) span.tail = text[1:] x.text = None x.insert(0, span) self.style(span)._update_cssdict(cssdict) break else: for elem in matches: self.style(elem)._update_cssdict(cssdict) for elem in xpath(tree, '//h:*[@style]'): self.style(elem)._apply_style_attr(url_replacer=item.abshref) num_pat = re.compile(r'\d+$') for elem in xpath(tree, '//h:img[@width or @height]'): style = self.style(elem) # Check if either height or width is not default is_styled = style._style.get('width', 'auto') != 'auto' or \ style._style.get('height', 'auto') != 'auto' if not is_styled: # Update img style dimension using width and height upd = {} for prop in ('width', 'height'): val = elem.get(prop, '').strip() try: del elem.attrib[prop] except: pass if val: if num_pat.match(val) is not None: val += 'px' upd[prop] = val if upd: style._update_cssdict(upd)
def __init__(self, tree, path, oeb, opts, profile=None, extra_css='', user_css='', base_css=''): self.oeb, self.opts = oeb, opts self.profile = profile if self.profile is None: # Use the default profile. This should really be using # opts.output_profile, but I don't want to risk changing it, as # doing so might well have hard to debug font size effects. from calibre.customize.ui import output_profiles for x in output_profiles(): if x.short_name == 'default': self.profile = x break if self.profile is None: # Just in case the default profile is removed in the future :) self.profile = opts.output_profile self.body_font_size = self.profile.fbase self.logger = oeb.logger item = oeb.manifest.hrefs[path] basename = os.path.basename(path) cssname = os.path.splitext(basename)[0] + '.css' stylesheets = [html_css_stylesheet()] if base_css: stylesheets.append(parseString(base_css, validate=False)) style_tags = xpath(tree, '//*[local-name()="style" or local-name()="link"]') # Add cssutils parsing profiles from output_profile for profile in self.opts.output_profile.extra_css_modules: cssprofiles.addProfile(profile['name'], profile['props'], profile['macros']) parser = CSSParser(fetcher=self._fetch_css_file, log=logging.getLogger('calibre.css')) self.font_face_rules = [] for elem in style_tags: if (elem.tag == XHTML('style') and elem.get('type', CSS_MIME) in OEB_STYLES and media_ok(elem.get('media'))): text = elem.text if elem.text else u'' for x in elem: t = getattr(x, 'text', None) if t: text += u'\n\n' + force_unicode(t, u'utf-8') t = getattr(x, 'tail', None) if t: text += u'\n\n' + force_unicode(t, u'utf-8') if text: text = oeb.css_preprocessor(text) # We handle @import rules separately parser.setFetcher(lambda x: ('utf-8', b'')) stylesheet = parser.parseString(text, href=cssname, validate=False) parser.setFetcher(self._fetch_css_file) for rule in stylesheet.cssRules: if rule.type == rule.IMPORT_RULE: ihref = item.abshref(rule.href) if not media_ok(rule.media.mediaText): continue hrefs = self.oeb.manifest.hrefs if ihref not in hrefs: self.logger.warn('Ignoring missing stylesheet in @import rule:', rule.href) continue sitem = hrefs[ihref] if sitem.media_type not in OEB_STYLES: self.logger.warn('CSS @import of non-CSS file %r' % rule.href) continue stylesheets.append(sitem.data) # Make links to resources absolute, since these rules will # be folded into a stylesheet at the root replaceUrls(stylesheet, item.abshref, ignoreImportRules=True) stylesheets.append(stylesheet) elif (elem.tag == XHTML('link') and elem.get('href') and elem.get( 'rel', 'stylesheet').lower() == 'stylesheet' and elem.get( 'type', CSS_MIME).lower() in OEB_STYLES and media_ok(elem.get('media')) ): href = urlnormalize(elem.attrib['href']) path = item.abshref(href) sitem = oeb.manifest.hrefs.get(path, None) if sitem is None: self.logger.warn( 'Stylesheet %r referenced by file %r not in manifest' % (path, item.href)) continue if not hasattr(sitem.data, 'cssRules'): self.logger.warn( 'Stylesheet %r referenced by file %r is not CSS'%(path, item.href)) continue stylesheets.append(sitem.data) csses = {'extra_css':extra_css, 'user_css':user_css} for w, x in csses.items(): if x: try: text = x stylesheet = parser.parseString(text, href=cssname, validate=False) stylesheets.append(stylesheet) except: self.logger.exception('Failed to parse %s, ignoring.'%w) self.logger.debug('Bad css: ') self.logger.debug(x) rules = [] index = 0 self.stylesheets = set() self.page_rule = {} for sheet_index, stylesheet in enumerate(stylesheets): href = stylesheet.href self.stylesheets.add(href) for rule in stylesheet.cssRules: if rule.type == rule.MEDIA_RULE: if media_ok(rule.media.mediaText): for subrule in rule.cssRules: rules.extend(self.flatten_rule(subrule, href, index, is_user_agent_sheet=sheet_index==0)) index += 1 else: rules.extend(self.flatten_rule(rule, href, index, is_user_agent_sheet=sheet_index==0)) index = index + 1 rules.sort() self.rules = rules self._styles = {} pseudo_pat = re.compile(ur':{1,2}(%s)' % ('|'.join(INAPPROPRIATE_PSEUDO_CLASSES)), re.I) select = Select(tree, ignore_inappropriate_pseudo_classes=True) for _, _, cssdict, text, _ in rules: fl = pseudo_pat.search(text) try: matches = tuple(select(text)) except SelectorError as err: self.logger.error('Ignoring CSS rule with invalid selector: %r (%s)' % (text, as_unicode(err))) continue if fl is not None: fl = fl.group(1) if fl == 'first-letter' and getattr(self.oeb, 'plumber_output_format', '').lower() in {u'mobi', u'docx'}: # Fake first-letter for elem in matches: for x in elem.iter('*'): if x.text: punctuation_chars = [] text = unicode(x.text) while text: category = unicodedata.category(text[0]) if category[0] not in {'P', 'Z'}: break punctuation_chars.append(text[0]) text = text[1:] special_text = u''.join(punctuation_chars) + \ (text[0] if text else u'') span = x.makeelement('{%s}span' % XHTML_NS) span.text = special_text span.set('data-fake-first-letter', '1') span.tail = text[1:] x.text = None x.insert(0, span) self.style(span)._update_cssdict(cssdict) break else: # Element pseudo-class for elem in matches: self.style(elem)._update_pseudo_class(fl, cssdict) else: for elem in matches: self.style(elem)._update_cssdict(cssdict) for elem in xpath(tree, '//h:*[@style]'): self.style(elem)._apply_style_attr(url_replacer=item.abshref) num_pat = re.compile(r'[0-9.]+$') for elem in xpath(tree, '//h:img[@width or @height]'): style = self.style(elem) # Check if either height or width is not default is_styled = style._style.get('width', 'auto') != 'auto' or \ style._style.get('height', 'auto') != 'auto' if not is_styled: # Update img style dimension using width and height upd = {} for prop in ('width', 'height'): val = elem.get(prop, '').strip() try: del elem.attrib[prop] except: pass if val: if num_pat.match(val) is not None: val += 'px' upd[prop] = val if upd: style._update_cssdict(upd)
def __init__(self, tree, path, oeb, opts, profile=None, extra_css="", user_css=""): self.oeb, self.opts = oeb, opts self.profile = profile if self.profile is None: # Use the default profile. This should really be using # opts.output_profile, but I don't want to risk changing it, as # doing so might well have hard to debug font size effects. from calibre.customize.ui import output_profiles for x in output_profiles(): if x.short_name == "default": self.profile = x break if self.profile is None: # Just in case the default profile is removed in the future :) self.profile = opts.output_profile self.body_font_size = self.profile.fbase self.logger = oeb.logger item = oeb.manifest.hrefs[path] basename = os.path.basename(path) cssname = os.path.splitext(basename)[0] + ".css" stylesheets = [html_css_stylesheet()] style_tags = xpath(tree, '//*[local-name()="style" or local-name()="link"]') # Add cssutils parsing profiles from output_profile for profile in self.opts.output_profile.extra_css_modules: cssprofiles.addProfile(profile["name"], profile["props"], profile["macros"]) parser = CSSParser(fetcher=self._fetch_css_file, log=logging.getLogger("calibre.css")) self.font_face_rules = [] for elem in style_tags: if elem.tag == XHTML("style") and elem.get("type", CSS_MIME) in OEB_STYLES: text = elem.text if elem.text else u"" for x in elem: t = getattr(x, "text", None) if t: text += u"\n\n" + force_unicode(t, u"utf-8") t = getattr(x, "tail", None) if t: text += u"\n\n" + force_unicode(t, u"utf-8") if text: text = oeb.css_preprocessor(text, add_namespace=True) # We handle @import rules separately parser.setFetcher(lambda x: ("utf-8", b"")) stylesheet = parser.parseString(text, href=cssname, validate=False) parser.setFetcher(self._fetch_css_file) stylesheet.namespaces["h"] = XHTML_NS for rule in stylesheet.cssRules: if rule.type == rule.IMPORT_RULE: ihref = item.abshref(rule.href) if rule.media.mediaText == "amzn-mobi": continue hrefs = self.oeb.manifest.hrefs if ihref not in hrefs: self.logger.warn("Ignoring missing stylesheet in @import rule:", rule.href) continue sitem = hrefs[ihref] if sitem.media_type not in OEB_STYLES: self.logger.warn("CSS @import of non-CSS file %r" % rule.href) continue stylesheets.append(sitem.data) for rule in tuple(stylesheet.cssRules.rulesOfType(CSSRule.PAGE_RULE)): stylesheet.cssRules.remove(rule) # Make links to resources absolute, since these rules will # be folded into a stylesheet at the root replaceUrls(stylesheet, item.abshref, ignoreImportRules=True) stylesheets.append(stylesheet) elif ( elem.tag == XHTML("link") and elem.get("href") and elem.get("rel", "stylesheet").lower() == "stylesheet" and elem.get("type", CSS_MIME).lower() in OEB_STYLES ): href = urlnormalize(elem.attrib["href"]) path = item.abshref(href) sitem = oeb.manifest.hrefs.get(path, None) if sitem is None: self.logger.warn("Stylesheet %r referenced by file %r not in manifest" % (path, item.href)) continue if not hasattr(sitem.data, "cssRules"): self.logger.warn("Stylesheet %r referenced by file %r is not CSS" % (path, item.href)) continue stylesheets.append(sitem.data) csses = {"extra_css": extra_css, "user_css": user_css} for w, x in csses.items(): if x: try: text = XHTML_CSS_NAMESPACE + x stylesheet = parser.parseString(text, href=cssname, validate=False) stylesheet.namespaces["h"] = XHTML_NS stylesheets.append(stylesheet) except: self.logger.exception("Failed to parse %s, ignoring." % w) self.logger.debug("Bad css: ") self.logger.debug(x) rules = [] index = 0 self.stylesheets = set() self.page_rule = {} for sheet_index, stylesheet in enumerate(stylesheets): href = stylesheet.href self.stylesheets.add(href) for rule in stylesheet.cssRules: if rule.type == rule.MEDIA_RULE: media = {rule.media.item(i) for i in xrange(rule.media.length)} if not media.intersection({"all", "screen", "amzn-kf8"}): continue for subrule in rule.cssRules: rules.extend(self.flatten_rule(subrule, href, index, is_user_agent_sheet=sheet_index == 0)) index += 1 else: rules.extend(self.flatten_rule(rule, href, index, is_user_agent_sheet=sheet_index == 0)) index = index + 1 rules.sort() self.rules = rules self._styles = {} pseudo_pat = re.compile(ur":(first-letter|first-line|link|hover|visited|active|focus|before|after)", re.I) for _, _, cssdict, text, _ in rules: fl = pseudo_pat.search(text) if fl is not None: text = text.replace(fl.group(), "") selector = get_css_selector(text, self.oeb.log) matches = selector(tree, self.logger) if fl is not None: fl = fl.group(1) if fl == "first-letter" and getattr(self.oeb, "plumber_output_format", "").lower() == u"mobi": # Fake first-letter from lxml.builder import ElementMaker E = ElementMaker(namespace=XHTML_NS) for elem in matches: for x in elem.iter(): if x.text: punctuation_chars = [] text = unicode(x.text) while text: category = unicodedata.category(text[0]) if category[0] not in {"P", "Z"}: break punctuation_chars.append(text[0]) text = text[1:] special_text = u"".join(punctuation_chars) + (text[0] if text else u"") span = E.span(special_text) span.tail = text[1:] x.text = None x.insert(0, span) self.style(span)._update_cssdict(cssdict) break else: # Element pseudo-class for elem in matches: self.style(elem)._update_pseudo_class(fl, cssdict) else: for elem in matches: self.style(elem)._update_cssdict(cssdict) for elem in xpath(tree, "//h:*[@style]"): self.style(elem)._apply_style_attr(url_replacer=item.abshref) num_pat = re.compile(r"[0-9.]+$") for elem in xpath(tree, "//h:img[@width or @height]"): style = self.style(elem) # Check if either height or width is not default is_styled = style._style.get("width", "auto") != "auto" or style._style.get("height", "auto") != "auto" if not is_styled: # Update img style dimension using width and height upd = {} for prop in ("width", "height"): val = elem.get(prop, "").strip() try: del elem.attrib[prop] except: pass if val: if num_pat.match(val) is not None: val += "px" upd[prop] = val if upd: style._update_cssdict(upd)
def __init__(self, tree, path, oeb, opts, profile=None, extra_css='', user_css=''): self.oeb, self.opts = oeb, opts self.profile = profile if self.profile is None: # Use the default profile. This should really be using # opts.output_profile, but I don't want to risk changing it, as # doing so might well have hard to debug font size effects. from calibre.customize.ui import output_profiles for x in output_profiles(): if x.short_name == 'default': self.profile = x break if self.profile is None: # Just in case the default profile is removed in the future :) self.profile = opts.output_profile self.logger = oeb.logger item = oeb.manifest.hrefs[path] basename = os.path.basename(path) cssname = os.path.splitext(basename)[0] + '.css' stylesheets = [html_css_stylesheet()] head = xpath(tree, '/h:html/h:head') if head: head = head[0] else: head = [] # Add cssutils parsing profiles from output_profile for profile in self.opts.output_profile.extra_css_modules: cssprofiles.addProfile(profile['name'], profile['props'], profile['macros']) parser = CSSParser(fetcher=self._fetch_css_file, log=logging.getLogger('calibre.css')) self.font_face_rules = [] for elem in head: if (elem.tag == XHTML('style') and elem.get('type', CSS_MIME) in OEB_STYLES): text = elem.text if elem.text else u'' for x in elem: t = getattr(x, 'text', None) if t: text += u'\n\n' + force_unicode(t, u'utf-8') t = getattr(x, 'tail', None) if t: text += u'\n\n' + force_unicode(t, u'utf-8') if text: text = XHTML_CSS_NAMESPACE + text text = oeb.css_preprocessor(text) stylesheet = parser.parseString(text, href=cssname, validate=False) stylesheet.namespaces['h'] = XHTML_NS stylesheets.append(stylesheet) # Make links to resources absolute, since these rules will # be folded into a stylesheet at the root replaceUrls(stylesheet, item.abshref, ignoreImportRules=True) elif elem.tag == XHTML('link') and elem.get('href') \ and elem.get('rel', 'stylesheet').lower() == 'stylesheet' \ and elem.get('type', CSS_MIME).lower() in OEB_STYLES: href = urlnormalize(elem.attrib['href']) path = item.abshref(href) sitem = oeb.manifest.hrefs.get(path, None) if sitem is None: self.logger.warn( 'Stylesheet %r referenced by file %r not in manifest' % (path, item.href)) continue if not hasattr(sitem.data, 'cssRules'): self.logger.warn( 'Stylesheet %r referenced by file %r is not CSS' % (path, item.href)) continue stylesheets.append(sitem.data) csses = {'extra_css': extra_css, 'user_css': user_css} for w, x in csses.items(): if x: try: text = XHTML_CSS_NAMESPACE + x stylesheet = parser.parseString(text, href=cssname, validate=False) stylesheet.namespaces['h'] = XHTML_NS stylesheets.append(stylesheet) except: self.logger.exception('Failed to parse %s, ignoring.' % w) self.logger.debug('Bad css: ') self.logger.debug(x) rules = [] index = 0 self.stylesheets = set() self.page_rule = {} for stylesheet in stylesheets: href = stylesheet.href self.stylesheets.add(href) for rule in stylesheet.cssRules: rules.extend(self.flatten_rule(rule, href, index)) index = index + 1 rules.sort() self.rules = rules self._styles = {} for _, _, cssdict, text, _ in rules: fl = ':first-letter' in text if fl: text = text.replace(':first-letter', '') selector = get_css_selector(text) matches = selector(tree, self.logger) if fl: from lxml.builder import ElementMaker E = ElementMaker(namespace=XHTML_NS) for elem in matches: for x in elem.iter(): if x.text: punctuation_chars = [] text = unicode(x.text) while text: if not unicodedata.category( text[0]).startswith('P'): break punctuation_chars.append(text[0]) text = text[1:] special_text = u''.join(punctuation_chars) + \ (text[0] if text else u'') span = E.span(special_text) span.tail = text[1:] x.text = None x.insert(0, span) self.style(span)._update_cssdict(cssdict) break else: for elem in matches: self.style(elem)._update_cssdict(cssdict) for elem in xpath(tree, '//h:*[@style]'): self.style(elem)._apply_style_attr(url_replacer=item.abshref) num_pat = re.compile(r'\d+$') for elem in xpath(tree, '//h:img[@width or @height]'): style = self.style(elem) # Check if either height or width is not default is_styled = style._style.get('width', 'auto') != 'auto' or \ style._style.get('height', 'auto') != 'auto' if not is_styled: # Update img style dimension using width and height upd = {} for prop in ('width', 'height'): val = elem.get(prop, '').strip() try: del elem.attrib[prop] except: pass if val: if num_pat.match(val) is not None: val += 'px' upd[prop] = val if upd: style._update_cssdict(upd)
def __init__(self, tree, path, oeb, opts, profile=None, extra_css='', user_css='', base_css=''): self.oeb, self.opts = oeb, opts self.profile = profile if self.profile is None: # Use the default profile. This should really be using # opts.output_profile, but I don't want to risk changing it, as # doing so might well have hard to debug font size effects. from calibre.customize.ui import output_profiles for x in output_profiles(): if x.short_name == 'default': self.profile = x break if self.profile is None: # Just in case the default profile is removed in the future :) self.profile = opts.output_profile self.body_font_size = self.profile.fbase self.logger = oeb.logger item = oeb.manifest.hrefs[path] basename = os.path.basename(path) cssname = os.path.splitext(basename)[0] + '.css' stylesheets = [html_css_stylesheet()] if base_css: stylesheets.append(parseString(base_css, validate=False)) style_tags = xpath(tree, '//*[local-name()="style" or local-name()="link"]') # Add cssutils parsing profiles from output_profile for profile in self.opts.output_profile.extra_css_modules: cssprofiles.addProfile(profile['name'], profile['props'], profile['macros']) parser = CSSParser(fetcher=self._fetch_css_file, log=logging.getLogger('calibre.css')) self.font_face_rules = [] for elem in style_tags: if (elem.tag == XHTML('style') and elem.get('type', CSS_MIME) in OEB_STYLES): text = elem.text if elem.text else u'' for x in elem: t = getattr(x, 'text', None) if t: text += u'\n\n' + force_unicode(t, u'utf-8') t = getattr(x, 'tail', None) if t: text += u'\n\n' + force_unicode(t, u'utf-8') if text: text = oeb.css_preprocessor(text) # We handle @import rules separately parser.setFetcher(lambda x: ('utf-8', b'')) stylesheet = parser.parseString(text, href=cssname, validate=False) parser.setFetcher(self._fetch_css_file) for rule in stylesheet.cssRules: if rule.type == rule.IMPORT_RULE: ihref = item.abshref(rule.href) if rule.media.mediaText == 'amzn-mobi': continue hrefs = self.oeb.manifest.hrefs if ihref not in hrefs: self.logger.warn('Ignoring missing stylesheet in @import rule:', rule.href) continue sitem = hrefs[ihref] if sitem.media_type not in OEB_STYLES: self.logger.warn('CSS @import of non-CSS file %r' % rule.href) continue stylesheets.append(sitem.data) for rule in tuple(stylesheet.cssRules.rulesOfType(CSSRule.PAGE_RULE)): stylesheet.cssRules.remove(rule) # Make links to resources absolute, since these rules will # be folded into a stylesheet at the root replaceUrls(stylesheet, item.abshref, ignoreImportRules=True) stylesheets.append(stylesheet) elif elem.tag == XHTML('link') and elem.get('href') \ and elem.get('rel', 'stylesheet').lower() == 'stylesheet' \ and elem.get('type', CSS_MIME).lower() in OEB_STYLES: href = urlnormalize(elem.attrib['href']) path = item.abshref(href) sitem = oeb.manifest.hrefs.get(path, None) if sitem is None: self.logger.warn( 'Stylesheet %r referenced by file %r not in manifest' % (path, item.href)) continue if not hasattr(sitem.data, 'cssRules'): self.logger.warn( 'Stylesheet %r referenced by file %r is not CSS'%(path, item.href)) continue stylesheets.append(sitem.data) csses = {'extra_css':extra_css, 'user_css':user_css} for w, x in csses.items(): if x: try: text = x stylesheet = parser.parseString(text, href=cssname, validate=False) stylesheets.append(stylesheet) except: self.logger.exception('Failed to parse %s, ignoring.'%w) self.logger.debug('Bad css: ') self.logger.debug(x) rules = [] index = 0 self.stylesheets = set() self.page_rule = {} for sheet_index, stylesheet in enumerate(stylesheets): href = stylesheet.href self.stylesheets.add(href) for rule in stylesheet.cssRules: if rule.type == rule.MEDIA_RULE: media = {rule.media.item(i) for i in xrange(rule.media.length)} if not media.intersection({'all', 'screen', 'amzn-kf8'}): continue for subrule in rule.cssRules: rules.extend(self.flatten_rule(subrule, href, index, is_user_agent_sheet=sheet_index==0)) index += 1 else: rules.extend(self.flatten_rule(rule, href, index, is_user_agent_sheet=sheet_index==0)) index = index + 1 rules.sort() self.rules = rules self._styles = {} pseudo_pat = re.compile(ur':{1,2}(%s)' % ('|'.join(INAPPROPRIATE_PSEUDO_CLASSES)), re.I) select = Select(tree, ignore_inappropriate_pseudo_classes=True) for _, _, cssdict, text, _ in rules: fl = pseudo_pat.search(text) try: matches = tuple(select(text)) except SelectorError as err: self.logger.error('Ignoring CSS rule with invalid selector: %r (%s)' % (text, as_unicode(err))) continue if fl is not None: fl = fl.group(1) if fl == 'first-letter' and getattr(self.oeb, 'plumber_output_format', '').lower() in {u'mobi', u'docx'}: # Fake first-letter from lxml.builder import ElementMaker E = ElementMaker(namespace=XHTML_NS) for elem in matches: for x in elem.iter('*'): if x.text: punctuation_chars = [] text = unicode(x.text) while text: category = unicodedata.category(text[0]) if category[0] not in {'P', 'Z'}: break punctuation_chars.append(text[0]) text = text[1:] special_text = u''.join(punctuation_chars) + \ (text[0] if text else u'') span = E.span(special_text) span.set('data-fake-first-letter', '1') span.tail = text[1:] x.text = None x.insert(0, span) self.style(span)._update_cssdict(cssdict) break else: # Element pseudo-class for elem in matches: self.style(elem)._update_pseudo_class(fl, cssdict) else: for elem in matches: self.style(elem)._update_cssdict(cssdict) for elem in xpath(tree, '//h:*[@style]'): self.style(elem)._apply_style_attr(url_replacer=item.abshref) num_pat = re.compile(r'[0-9.]+$') for elem in xpath(tree, '//h:img[@width or @height]'): style = self.style(elem) # Check if either height or width is not default is_styled = style._style.get('width', 'auto') != 'auto' or \ style._style.get('height', 'auto') != 'auto' if not is_styled: # Update img style dimension using width and height upd = {} for prop in ('width', 'height'): val = elem.get(prop, '').strip() try: del elem.attrib[prop] except: pass if val: if num_pat.match(val) is not None: val += 'px' upd[prop] = val if upd: style._update_cssdict(upd)
def render_jacket(mi): ps = load_defaults('page_setup') op = ps.get('output_profile', 'default') opmap = {x.short_name: x for x in output_profiles()} output_profile = opmap.get(op, opmap['default']) return render(mi, output_profile)
def __init__(self, input, output, log, report_progress=DummyReporter(), dummy=False, merge_plugin_recs=True, abort_after_input_dump=False, override_input_metadata=False): ''' :param input: Path to input file. :param output: Path to output file/directory ''' if isbytestring(input): input = input.decode(filesystem_encoding) if isbytestring(output): output = output.decode(filesystem_encoding) self.original_input_arg = input self.input = os.path.abspath(input) self.output = os.path.abspath(output) self.log = log self.ui_reporter = report_progress self.abort_after_input_dump = abort_after_input_dump self.override_input_metadata = override_input_metadata # Pipeline options {{{ # Initialize the conversion options that are independent of input and # output formats. The input and output plugins can still disable these # options via recommendations. self.pipeline_options = [ OptionRecommendation(name='verbose', recommended_value=0, level=OptionRecommendation.LOW, short_switch='v', help=_('Level of verbosity. Specify multiple times for greater ' 'verbosity.') ), OptionRecommendation(name='debug_pipeline', recommended_value=None, level=OptionRecommendation.LOW, short_switch='d', help=_('Save the output from different stages of the conversion ' 'pipeline to the specified ' 'directory. Useful if you are unsure at which stage ' 'of the conversion process a bug is occurring.') ), OptionRecommendation(name='input_profile', recommended_value='default', level=OptionRecommendation.LOW, choices=[x.short_name for x in input_profiles()], help=_('Specify the input profile. The input profile gives the ' 'conversion system information on how to interpret ' 'various information in the input document. For ' 'example resolution dependent lengths (i.e. lengths in ' 'pixels). Choices are:')+\ ', '.join([x.short_name for x in input_profiles()]) ), OptionRecommendation(name='output_profile', recommended_value='default', level=OptionRecommendation.LOW, choices=[x.short_name for x in output_profiles()], help=_('Specify the output profile. The output profile ' 'tells the conversion system how to optimize the ' 'created document for the specified device. In some cases, ' 'an output profile is required to produce documents that ' 'will work on a device. For example EPUB on the SONY reader. ' 'Choices are:') + \ ', '.join([x.short_name for x in output_profiles()]) ), OptionRecommendation(name='base_font_size', recommended_value=0, level=OptionRecommendation.LOW, help=_('The base font size in pts. All font sizes in the produced book ' 'will be rescaled based on this size. By choosing a larger ' 'size you can make the fonts in the output bigger and vice ' 'versa. By default, the base font size is chosen based on ' 'the output profile you chose.' ) ), OptionRecommendation(name='font_size_mapping', recommended_value=None, level=OptionRecommendation.LOW, help=_('Mapping from CSS font names to font sizes in pts. ' 'An example setting is 12,12,14,16,18,20,22,24. ' 'These are the mappings for the sizes xx-small to xx-large, ' 'with the final size being for huge fonts. The font ' 'rescaling algorithm uses these sizes to intelligently ' 'rescale fonts. The default is to use a mapping based on ' 'the output profile you chose.' ) ), OptionRecommendation(name='disable_font_rescaling', recommended_value=False, level=OptionRecommendation.LOW, help=_('Disable all rescaling of font sizes.' ) ), OptionRecommendation(name='minimum_line_height', recommended_value=120.0, level=OptionRecommendation.LOW, help=_( 'The minimum line height, as a percentage of the element\'s ' 'calculated font size. calibre will ensure that every element ' 'has a line height of at least this setting, irrespective of ' 'what the input document specifies. Set to zero to disable. ' 'Default is 120%. Use this setting in preference to ' 'the direct line height specification, unless you know what ' 'you are doing. For example, you can achieve "double spaced" ' 'text by setting this to 240.' ) ), OptionRecommendation(name='line_height', recommended_value=0, level=OptionRecommendation.LOW, help=_( 'The line height in pts. Controls spacing between consecutive ' 'lines of text. Only applies to elements that do not define ' 'their own line height. In most cases, the minimum line height ' 'option is more useful. ' 'By default no line height manipulation is performed.' ) ), OptionRecommendation(name='linearize_tables', recommended_value=False, level=OptionRecommendation.LOW, help=_('Some badly designed documents use tables to control the ' 'layout of text on the page. When converted these documents ' 'often have text that runs off the page and other artifacts. ' 'This option will extract the content from the tables and ' 'present it in a linear fashion.' ) ), OptionRecommendation(name='level1_toc', recommended_value=None, level=OptionRecommendation.LOW, help=_('XPath expression that specifies all tags that ' 'should be added to the Table of Contents at level one. If ' 'this is specified, it takes precedence over other forms ' 'of auto-detection.' ' See the XPath Tutorial in the calibre User Manual for examples.' ) ), OptionRecommendation(name='level2_toc', recommended_value=None, level=OptionRecommendation.LOW, help=_('XPath expression that specifies all tags that should be ' 'added to the Table of Contents at level two. Each entry is added ' 'under the previous level one entry.' ' See the XPath Tutorial in the calibre User Manual for examples.' ) ), OptionRecommendation(name='level3_toc', recommended_value=None, level=OptionRecommendation.LOW, help=_('XPath expression that specifies all tags that should be ' 'added to the Table of Contents at level three. Each entry ' 'is added under the previous level two entry.' ' See the XPath Tutorial in the calibre User Manual for examples.' ) ), OptionRecommendation(name='use_auto_toc', recommended_value=False, level=OptionRecommendation.LOW, help=_('Normally, if the source file already has a Table of ' 'Contents, it is used in preference to the auto-generated one. ' 'With this option, the auto-generated one is always used.' ) ), OptionRecommendation(name='no_chapters_in_toc', recommended_value=False, level=OptionRecommendation.LOW, help=_("Don't add auto-detected chapters to the Table of " 'Contents.' ) ), OptionRecommendation(name='toc_threshold', recommended_value=6, level=OptionRecommendation.LOW, help=_( 'If fewer than this number of chapters is detected, then links ' 'are added to the Table of Contents. Default: %default') ), OptionRecommendation(name='max_toc_links', recommended_value=50, level=OptionRecommendation.LOW, help=_('Maximum number of links to insert into the TOC. Set to 0 ' 'to disable. Default is: %default. Links are only added to the ' 'TOC if less than the threshold number of chapters were detected.' ) ), OptionRecommendation(name='toc_filter', recommended_value=None, level=OptionRecommendation.LOW, help=_('Remove entries from the Table of Contents whose titles ' 'match the specified regular expression. Matching entries and all ' 'their children are removed.' ) ), OptionRecommendation(name='duplicate_links_in_toc', recommended_value=False, level=OptionRecommendation.LOW, help=_('When creating a TOC from links in the input document, ' 'allow duplicate entries, i.e. allow more than one entry ' 'with the same text, provided that they point to a ' 'different location.') ), OptionRecommendation(name='chapter', recommended_value="//*[((name()='h1' or name()='h2') and " r"re:test(., '\s*((chapter|book|section|part)\s+)|((prolog|prologue|epilogue)(\s+|$))', 'i')) or @class " "= 'chapter']", level=OptionRecommendation.LOW, help=_('An XPath expression to detect chapter titles. The default ' 'is to consider <h1> or <h2> tags that contain the words ' '"chapter","book","section", "prologue", "epilogue", or "part" as chapter titles as ' 'well as any tags that have class="chapter". The expression ' 'used must evaluate to a list of elements. To disable chapter ' 'detection, use the expression "/". See the XPath Tutorial ' 'in the calibre User Manual for further help on using this ' 'feature.' ) ), OptionRecommendation(name='chapter_mark', recommended_value='pagebreak', level=OptionRecommendation.LOW, choices=['pagebreak', 'rule', 'both', 'none'], help=_('Specify how to mark detected chapters. A value of ' '"pagebreak" will insert page breaks before chapters. ' 'A value of "rule" will insert a line before chapters. ' 'A value of "none" will disable chapter marking and a ' 'value of "both" will use both page breaks and lines ' 'to mark chapters.') ), OptionRecommendation(name='extra_css', recommended_value=None, level=OptionRecommendation.LOW, help=_('Either the path to a CSS stylesheet or raw CSS. ' 'This CSS will be appended to the style rules from ' 'the source file, so it can be used to override those ' 'rules.') ), OptionRecommendation(name='filter_css', recommended_value=None, level=OptionRecommendation.LOW, help=_('A comma separated list of CSS properties that ' 'will be removed from all CSS style rules. This is useful ' 'if the presence of some style information prevents it ' 'from being overridden on your device. ' 'For example: ' 'font-family,color,margin-left,margin-right') ), OptionRecommendation(name='page_breaks_before', recommended_value="//*[name()='h1' or name()='h2']", level=OptionRecommendation.LOW, help=_('An XPath expression. Page breaks are inserted ' 'before the specified elements.') ), OptionRecommendation(name='remove_fake_margins', recommended_value=True, level=OptionRecommendation.LOW, help=_('Some documents specify page margins by ' 'specifying a left and right margin on each individual ' 'paragraph. calibre will try to detect and remove these ' 'margins. Sometimes, this can cause the removal of ' 'margins that should not have been removed. In this ' 'case you can disable the removal.') ), OptionRecommendation(name='margin_top', recommended_value=5.0, level=OptionRecommendation.LOW, help=_('Set the top margin in pts. Default is %default. ' 'Setting this to less than zero will cause no margin to be set. ' 'Note: 72 pts equals 1 inch')), OptionRecommendation(name='margin_bottom', recommended_value=5.0, level=OptionRecommendation.LOW, help=_('Set the bottom margin in pts. Default is %default. ' 'Setting this to less than zero will cause no margin to be set. ' 'Note: 72 pts equals 1 inch')), OptionRecommendation(name='margin_left', recommended_value=5.0, level=OptionRecommendation.LOW, help=_('Set the left margin in pts. Default is %default. ' 'Setting this to less than zero will cause no margin to be set. ' 'Note: 72 pts equals 1 inch')), OptionRecommendation(name='margin_right', recommended_value=5.0, level=OptionRecommendation.LOW, help=_('Set the right margin in pts. Default is %default. ' 'Setting this to less than zero will cause no margin to be set. ' 'Note: 72 pts equals 1 inch')), OptionRecommendation(name='change_justification', recommended_value='original', level=OptionRecommendation.LOW, choices=['left','justify','original'], help=_('Change text justification. A value of "left" converts all' ' justified text in the source to left aligned (i.e. ' 'unjustified) text. A value of "justify" converts all ' 'unjustified text to justified. A value of "original" ' '(the default) does not change justification in the ' 'source file. Note that only some output formats support ' 'justification.')), OptionRecommendation(name='remove_paragraph_spacing', recommended_value=False, level=OptionRecommendation.LOW, help=_('Remove spacing between paragraphs. Also sets an indent on ' 'paragraphs of 1.5em. Spacing removal will not work ' 'if the source file does not use paragraphs (<p> or <div> tags).') ), OptionRecommendation(name='remove_paragraph_spacing_indent_size', recommended_value=1.5, level=OptionRecommendation.LOW, help=_('When calibre removes blank lines between paragraphs, it automatically ' 'sets a paragraph indent, to ensure that paragraphs can be easily ' 'distinguished. This option controls the width of that indent (in em). ' 'If you set this value negative, then the indent specified in the input ' 'document is used, that is, calibre does not change the indentation.') ), OptionRecommendation(name='prefer_metadata_cover', recommended_value=False, level=OptionRecommendation.LOW, help=_('Use the cover detected from the source file in preference ' 'to the specified cover.') ), OptionRecommendation(name='insert_blank_line', recommended_value=False, level=OptionRecommendation.LOW, help=_('Insert a blank line between paragraphs. Will not work ' 'if the source file does not use paragraphs (<p> or <div> tags).' ) ), OptionRecommendation(name='insert_blank_line_size', recommended_value=0.5, level=OptionRecommendation.LOW, help=_('Set the height of the inserted blank lines (in em).' ' The height of the lines between paragraphs will be twice the value' ' set here.') ), OptionRecommendation(name='remove_first_image', recommended_value=False, level=OptionRecommendation.LOW, help=_('Remove the first image from the input ebook. Useful if the ' 'input document has a cover image that is not identified as a cover. ' 'In this case, if you set a cover in calibre, the output document will ' 'end up with two cover images if you do not specify this option.' ) ), OptionRecommendation(name='insert_metadata', recommended_value=False, level=OptionRecommendation.LOW, help=_('Insert the book metadata at the start of ' 'the book. This is useful if your ebook reader does not support ' 'displaying/searching metadata directly.' ) ), OptionRecommendation(name='smarten_punctuation', recommended_value=False, level=OptionRecommendation.LOW, help=_('Convert plain quotes, dashes and ellipsis to their ' 'typographically correct equivalents. For details, see ' 'http://daringfireball.net/projects/smartypants' ) ), OptionRecommendation(name='unsmarten_punctuation', recommended_value=False, level=OptionRecommendation.LOW, help=_('Convert fancy quotes, dashes and ellipsis to their ' 'plain equivalents.' ) ), OptionRecommendation(name='read_metadata_from_opf', recommended_value=None, level=OptionRecommendation.LOW, short_switch='m', help=_('Read metadata from the specified OPF file. Metadata read ' 'from this file will override any metadata in the source ' 'file.') ), OptionRecommendation(name='asciiize', recommended_value=False, level=OptionRecommendation.LOW, help=(_('Transliterate unicode characters to an ASCII ' 'representation. Use with care because this will replace ' 'unicode characters with ASCII. For instance it will replace "%s" ' 'with "Mikhail Gorbachiov". Also, note that in ' 'cases where there are multiple representations of a character ' '(characters shared by Chinese and Japanese for instance) the ' 'representation based on the current calibre interface language will be ' 'used.')%\ u'\u041c\u0438\u0445\u0430\u0438\u043b ' u'\u0413\u043e\u0440\u0431\u0430\u0447\u0451\u0432' ) ), OptionRecommendation(name='keep_ligatures', recommended_value=False, level=OptionRecommendation.LOW, help=_('Preserve ligatures present in the input document. ' 'A ligature is a special rendering of a pair of ' 'characters like ff, fi, fl et cetera. ' 'Most readers do not have support for ' 'ligatures in their default fonts, so they are ' 'unlikely to render correctly. By default, calibre ' 'will turn a ligature into the corresponding pair of normal ' 'characters. This option will preserve them instead.') ), OptionRecommendation(name='title', recommended_value=None, level=OptionRecommendation.LOW, help=_('Set the title.')), OptionRecommendation(name='authors', recommended_value=None, level=OptionRecommendation.LOW, help=_('Set the authors. Multiple authors should be separated by ' 'ampersands.')), OptionRecommendation(name='title_sort', recommended_value=None, level=OptionRecommendation.LOW, help=_('The version of the title to be used for sorting. ')), OptionRecommendation(name='author_sort', recommended_value=None, level=OptionRecommendation.LOW, help=_('String to be used when sorting by author. ')), OptionRecommendation(name='cover', recommended_value=None, level=OptionRecommendation.LOW, help=_('Set the cover to the specified file or URL')), OptionRecommendation(name='comments', recommended_value=None, level=OptionRecommendation.LOW, help=_('Set the ebook description.')), OptionRecommendation(name='publisher', recommended_value=None, level=OptionRecommendation.LOW, help=_('Set the ebook publisher.')), OptionRecommendation(name='series', recommended_value=None, level=OptionRecommendation.LOW, help=_('Set the series this ebook belongs to.')), OptionRecommendation(name='series_index', recommended_value=None, level=OptionRecommendation.LOW, help=_('Set the index of the book in this series.')), OptionRecommendation(name='rating', recommended_value=None, level=OptionRecommendation.LOW, help=_('Set the rating. Should be a number between 1 and 5.')), OptionRecommendation(name='isbn', recommended_value=None, level=OptionRecommendation.LOW, help=_('Set the ISBN of the book.')), OptionRecommendation(name='tags', recommended_value=None, level=OptionRecommendation.LOW, help=_('Set the tags for the book. Should be a comma separated list.')), OptionRecommendation(name='book_producer', recommended_value=None, level=OptionRecommendation.LOW, help=_('Set the book producer.')), OptionRecommendation(name='language', recommended_value=None, level=OptionRecommendation.LOW, help=_('Set the language.')), OptionRecommendation(name='pubdate', recommended_value=None, level=OptionRecommendation.LOW, help=_('Set the publication date.')), OptionRecommendation(name='timestamp', recommended_value=None, level=OptionRecommendation.LOW, help=_('Set the book timestamp (no longer used anywhere)')), OptionRecommendation(name='enable_heuristics', recommended_value=False, level=OptionRecommendation.LOW, help=_('Enable heuristic processing. This option must be set for any ' 'heuristic processing to take place.')), OptionRecommendation(name='markup_chapter_headings', recommended_value=True, level=OptionRecommendation.LOW, help=_('Detect unformatted chapter headings and sub headings. Change ' 'them to h2 and h3 tags. This setting will not create a TOC, ' 'but can be used in conjunction with structure detection to create ' 'one.')), OptionRecommendation(name='italicize_common_cases', recommended_value=True, level=OptionRecommendation.LOW, help=_('Look for common words and patterns that denote ' 'italics and italicize them.')), OptionRecommendation(name='fix_indents', recommended_value=True, level=OptionRecommendation.LOW, help=_('Turn indentation created from multiple non-breaking space entities ' 'into CSS indents.')), OptionRecommendation(name='html_unwrap_factor', recommended_value=0.40, level=OptionRecommendation.LOW, help=_('Scale used to determine the length at which a line should ' 'be unwrapped. Valid values are a decimal between 0 and 1. The ' 'default is 0.4, just below the median line length. If only a ' 'few lines in the document require unwrapping this value should ' 'be reduced')), OptionRecommendation(name='unwrap_lines', recommended_value=True, level=OptionRecommendation.LOW, help=_('Unwrap lines using punctuation and other formatting clues.')), OptionRecommendation(name='delete_blank_paragraphs', recommended_value=True, level=OptionRecommendation.LOW, help=_('Remove empty paragraphs from the document when they exist between ' 'every other paragraph')), OptionRecommendation(name='format_scene_breaks', recommended_value=True, level=OptionRecommendation.LOW, help=_('Left aligned scene break markers are center aligned. ' 'Replace soft scene breaks that use multiple blank lines with ' 'horizontal rules.')), OptionRecommendation(name='replace_scene_breaks', recommended_value='', level=OptionRecommendation.LOW, help=_('Replace scene breaks with the specified text. By default, the ' 'text from the input document is used.')), OptionRecommendation(name='dehyphenate', recommended_value=True, level=OptionRecommendation.LOW, help=_('Analyze hyphenated words throughout the document. The ' 'document itself is used as a dictionary to determine whether hyphens ' 'should be retained or removed.')), OptionRecommendation(name='renumber_headings', recommended_value=True, level=OptionRecommendation.LOW, help=_('Looks for occurrences of sequential <h1> or <h2> tags. ' 'The tags are renumbered to prevent splitting in the middle ' 'of chapter headings.')), OptionRecommendation(name='sr1_search', recommended_value='', level=OptionRecommendation.LOW, help=_('Search pattern (regular expression) to be replaced with ' 'sr1-replace.')), OptionRecommendation(name='sr1_replace', recommended_value='', level=OptionRecommendation.LOW, help=_('Replacement to replace the text found with sr1-search.')), OptionRecommendation(name='sr2_search', recommended_value='', level=OptionRecommendation.LOW, help=_('Search pattern (regular expression) to be replaced with ' 'sr2-replace.')), OptionRecommendation(name='sr2_replace', recommended_value='', level=OptionRecommendation.LOW, help=_('Replacement to replace the text found with sr2-search.')), OptionRecommendation(name='sr3_search', recommended_value='', level=OptionRecommendation.LOW, help=_('Search pattern (regular expression) to be replaced with ' 'sr3-replace.')), OptionRecommendation(name='sr3_replace', recommended_value='', level=OptionRecommendation.LOW, help=_('Replacement to replace the text found with sr3-search.')), OptionRecommendation(name='search_replace', recommended_value=None, level=OptionRecommendation.LOW, help=_( 'Path to a file containing search and replace regular expressions. ' 'The file must contain alternating lines of regular expression ' 'followed by replacement pattern (which can be an empty line). ' 'The regular expression must be in the python regex syntax and ' 'the file must be UTF-8 encoded.')), ] # }}} input_fmt = os.path.splitext(self.input)[1] if not input_fmt: raise ValueError('Input file must have an extension') input_fmt = input_fmt[1:].lower().replace('original_', '') self.archive_input_tdir = None if input_fmt in ARCHIVE_FMTS: self.log('Processing archive...') tdir = PersistentTemporaryDirectory('_plumber_archive') self.input, input_fmt = self.unarchive(self.input, tdir) self.archive_input_tdir = tdir if os.access(self.input, os.R_OK): nfp = run_plugins_on_preprocess(self.input, input_fmt) if nfp != self.input: self.input = nfp input_fmt = os.path.splitext(self.input)[1] if not input_fmt: raise ValueError('Input file must have an extension') input_fmt = input_fmt[1:].lower() if os.path.exists(self.output) and os.path.isdir(self.output): output_fmt = 'oeb' else: output_fmt = os.path.splitext(self.output)[1] if not output_fmt: output_fmt = '.oeb' output_fmt = output_fmt[1:].lower() self.input_plugin = plugin_for_input_format(input_fmt) self.output_plugin = plugin_for_output_format(output_fmt) if self.input_plugin is None: raise ValueError('No plugin to handle input format: '+input_fmt) if self.output_plugin is None: raise ValueError('No plugin to handle output format: '+output_fmt) self.input_fmt = input_fmt self.output_fmt = output_fmt self.all_format_options = set() self.input_options = set() self.output_options = set() # Build set of all possible options. Two options are equal if their # names are the same. if not dummy: self.input_options = self.input_plugin.options.union( self.input_plugin.common_options) self.output_options = self.output_plugin.options.union( self.output_plugin.common_options) else: for fmt in available_input_formats(): input_plugin = plugin_for_input_format(fmt) if input_plugin: self.all_format_options = self.all_format_options.union( input_plugin.options.union(input_plugin.common_options)) for fmt in available_output_formats(): output_plugin = plugin_for_output_format(fmt) if output_plugin: self.all_format_options = self.all_format_options.union( output_plugin.options.union(output_plugin.common_options)) # Remove the options that have been disabled by recommendations from the # plugins. for w in ('input_options', 'output_options', 'all_format_options'): temp = set([]) for x in getattr(self, w): temp.add(x.clone()) setattr(self, w, temp) if merge_plugin_recs: self.merge_plugin_recommendations()
def __init__(self, tree, path, oeb, opts, profile=None, extra_css='', user_css=''): self.oeb, self.opts = oeb, opts self.profile = profile if self.profile is None: # Use the default profile. This should really be using # opts.output_profile, but I don't want to risk changing it, as # doing so might well have hard to debug font size effects. from calibre.customize.ui import output_profiles for x in output_profiles(): if x.short_name == 'default': self.profile = x break if self.profile is None: # Just in case the default profile is removed in the future :) self.profile = opts.output_profile self.body_font_size = self.profile.fbase self.logger = oeb.logger item = oeb.manifest.hrefs[path] basename = os.path.basename(path) cssname = os.path.splitext(basename)[0] + '.css' stylesheets = [html_css_stylesheet()] head = xpath(tree, '/h:html/h:head') if head: head = head[0] else: head = [] # Add cssutils parsing profiles from output_profile for profile in self.opts.output_profile.extra_css_modules: cssprofiles.addProfile(profile['name'], profile['props'], profile['macros']) parser = CSSParser(fetcher=self._fetch_css_file, log=logging.getLogger('calibre.css')) self.font_face_rules = [] for elem in head: if (elem.tag == XHTML('style') and elem.get('type', CSS_MIME) in OEB_STYLES): text = elem.text if elem.text else u'' for x in elem: t = getattr(x, 'text', None) if t: text += u'\n\n' + force_unicode(t, u'utf-8') t = getattr(x, 'tail', None) if t: text += u'\n\n' + force_unicode(t, u'utf-8') if text: text = oeb.css_preprocessor(text, add_namespace=True) # We handle @import rules separately parser.setFetcher(lambda x: ('utf-8', b'')) stylesheet = parser.parseString(text, href=cssname, validate=False) parser.setFetcher(self._fetch_css_file) stylesheet.namespaces['h'] = XHTML_NS for rule in stylesheet.cssRules: if rule.type == rule.IMPORT_RULE: ihref = item.abshref(rule.href) if rule.media.mediaText == 'amzn-mobi': continue hrefs = self.oeb.manifest.hrefs if ihref not in hrefs: self.logger.warn('Ignoring missing stylesheet in @import rule:', rule.href) continue sitem = hrefs[ihref] if sitem.media_type not in OEB_STYLES: self.logger.warn('CSS @import of non-CSS file %r' % rule.href) continue stylesheets.append(sitem.data) # Make links to resources absolute, since these rules will # be folded into a stylesheet at the root replaceUrls(stylesheet, item.abshref, ignoreImportRules=True) stylesheets.append(stylesheet) elif elem.tag == XHTML('link') and elem.get('href') \ and elem.get('rel', 'stylesheet').lower() == 'stylesheet' \ and elem.get('type', CSS_MIME).lower() in OEB_STYLES: href = urlnormalize(elem.attrib['href']) path = item.abshref(href) sitem = oeb.manifest.hrefs.get(path, None) if sitem is None: self.logger.warn( 'Stylesheet %r referenced by file %r not in manifest' % (path, item.href)) continue if not hasattr(sitem.data, 'cssRules'): self.logger.warn( 'Stylesheet %r referenced by file %r is not CSS'%(path, item.href)) continue stylesheets.append(sitem.data) csses = {'extra_css':extra_css, 'user_css':user_css} for w, x in csses.items(): if x: try: text = XHTML_CSS_NAMESPACE + x stylesheet = parser.parseString(text, href=cssname, validate=False) stylesheet.namespaces['h'] = XHTML_NS stylesheets.append(stylesheet) except: self.logger.exception('Failed to parse %s, ignoring.'%w) self.logger.debug('Bad css: ') self.logger.debug(x) rules = [] index = 0 self.stylesheets = set() self.page_rule = {} for stylesheet in stylesheets: href = stylesheet.href self.stylesheets.add(href) for rule in stylesheet.cssRules: if rule.type == rule.MEDIA_RULE: media = {rule.media.item(i) for i in xrange(rule.media.length)} if not media.intersection({'all', 'screen', 'amzn-kf8'}): continue for subrule in rule.cssRules: rules.extend(self.flatten_rule(subrule, href, index)) index += 1 else: rules.extend(self.flatten_rule(rule, href, index)) index = index + 1 rules.sort() self.rules = rules self._styles = {} pseudo_pat = re.compile(ur':(first-letter|first-line|link|hover|visited|active|focus|before|after)', re.I) for _, _, cssdict, text, _ in rules: fl = pseudo_pat.search(text) if fl is not None: text = text.replace(fl.group(), '') selector = get_css_selector(text, self.oeb.log) matches = selector(tree, self.logger) if fl is not None: fl = fl.group(1) if fl == 'first-letter' and getattr(self.oeb, 'plumber_output_format', '').lower() == u'mobi': # Fake first-letter from lxml.builder import ElementMaker E = ElementMaker(namespace=XHTML_NS) for elem in matches: for x in elem.iter(): if x.text: punctuation_chars = [] text = unicode(x.text) while text: category = unicodedata.category(text[0]) if category[0] not in {'P', 'Z'}: break punctuation_chars.append(text[0]) text = text[1:] special_text = u''.join(punctuation_chars) + \ (text[0] if text else u'') span = E.span(special_text) span.tail = text[1:] x.text = None x.insert(0, span) self.style(span)._update_cssdict(cssdict) break else: # Element pseudo-class for elem in matches: self.style(elem)._update_pseudo_class(fl, cssdict) else: for elem in matches: self.style(elem)._update_cssdict(cssdict) for elem in xpath(tree, '//h:*[@style]'): self.style(elem)._apply_style_attr(url_replacer=item.abshref) num_pat = re.compile(r'\d+$') for elem in xpath(tree, '//h:img[@width or @height]'): style = self.style(elem) # Check if either height or width is not default is_styled = style._style.get('width', 'auto') != 'auto' or \ style._style.get('height', 'auto') != 'auto' if not is_styled: # Update img style dimension using width and height upd = {} for prop in ('width', 'height'): val = elem.get(prop, '').strip() try: del elem.attrib[prop] except: pass if val: if num_pat.match(val) is not None: val += 'px' upd[prop] = val if upd: style._update_cssdict(upd)
def render_jacket(mi): ps = load_defaults('page_setup') op = ps.get('output_profile', 'default') opmap = {x.short_name:x for x in output_profiles()} output_profile = opmap.get(op, opmap['default']) return render(mi, output_profile)