def create_epub_cover(container, cover_path): from calibre.ebooks.conversion.config import load_defaults from calibre.ebooks.oeb.transforms.cover import CoverManager ext = cover_path.rpartition('.')[-1].lower() raster_cover_item = container.generate_item('cover.'+ext, id_prefix='cover') raster_cover = container.href_to_name(raster_cover_item.get('href'), container.opf_name) with open(cover_path, 'rb') as src, container.open(raster_cover, 'wb') as dest: shutil.copyfileobj(src, dest) opts = load_defaults('epub_output') keep_aspect = opts.get('preserve_cover_aspect_ratio', False) no_svg = opts.get('no_svg_cover', False) if no_svg: style = 'style="height: 100%%"' templ = CoverManager.NONSVG_TEMPLATE.replace('__style__', style) else: width, height = 600, 800 try: width, height = identify(cover_path)[:2] except: container.log.exception("Failed to get width and height of cover") ar = 'xMidYMid meet' if keep_aspect else 'none' templ = CoverManager.SVG_TEMPLATE.replace('__ar__', ar) templ = templ.replace('__viewbox__', '0 0 %d %d'%(width, height)) templ = templ.replace('__width__', str(width)) templ = templ.replace('__height__', str(height)) titlepage_item = container.generate_item('titlepage.xhtml', id_prefix='titlepage') titlepage = container.href_to_name(titlepage_item.get('href'), container.opf_name) raw = templ%container.name_to_href(raster_cover).encode('utf-8') with container.open(titlepage, 'wb') as f: f.write(raw) # We have to make sure the raster cover item has id="cover" for the moron # that wrote the Nook firmware if raster_cover_item.get('id') != 'cover': from calibre.ebooks.oeb.base import uuid_id newid = uuid_id() for item in container.opf_xpath('//*[@id="cover"]'): item.set('id', newid) for item in container.opf_xpath('//*[@idref="cover"]'): item.set('idref', newid) raster_cover_item.set('id', 'cover') spine = container.opf_xpath('//opf:spine')[0] ref = spine.makeelement(OPF('itemref'), idref=titlepage_item.get('id')) container.insert_into_xml(spine, ref, index=0) guide = container.opf_get_or_create('guide') container.insert_into_xml(guide, guide.makeelement( OPF('reference'), type='cover', title=_('Cover'), href=container.name_to_href(titlepage, base=container.opf_name))) metadata = container.opf_get_or_create('metadata') meta = metadata.makeelement(OPF('meta'), name='cover') meta.set('content', raster_cover_item.get('id')) container.insert_into_xml(metadata, meta) return raster_cover, titlepage
def safe_img_data(container, name, mt): if 'svg' in mt: return 0, 0 try: width, height, fmt = identify(container.name_to_abspath(name)) except Exception: width = height = 0 return width, height
def cleanup_markup(log, root, styles, dest_dir, detect_cover): # Move <hr>s outside paragraphs, if possible. pancestor = XPath('|'.join('ancestor::%s[1]' % x for x in ('p', 'h1', 'h2', 'h3', 'h4', 'h5', 'h6'))) for hr in root.xpath('//span/hr'): p = pancestor(hr) if p: p = p[0] descendants = tuple(p.iterdescendants()) if descendants[-1] is hr: parent = p.getparent() idx = parent.index(p) parent.insert(idx+1, hr) hr.tail = '\n\t' # Merge consecutive spans that have the same styling current_run = [] for span in root.xpath('//span[not(@style)]'): if not current_run: current_run.append(span) else: last = current_run[-1] if mergeable(last, span): current_run.append(span) else: if len(current_run) > 1: merge_run(current_run) current_run = [span] # Remove unnecessary span tags that are the only child of a parent block # element class_map = dict(styles.classes.itervalues()) parents = ('p', 'div') + tuple('h%d' % i for i in xrange(1, 7)) for parent in root.xpath('//*[(%s) and count(span)=1]' % ' or '.join('name()="%s"' % t for t in parents)): if len(parent) == 1 and not parent.text and not parent[0].tail and not parent[0].get('id', None): # We have a block whose contents are entirely enclosed in a <span> span = parent[0] span_class = span.get('class', None) span_css = class_map.get(span_class, {}) if liftable(span_css): pclass = parent.get('class', None) if span_class: pclass = (pclass + ' ' + span_class) if pclass else span_class parent.set('class', pclass) parent.text = span.text parent.remove(span) for child in span: parent.append(child) # Make spans whose only styling is bold or italic into <b> and <i> tags for span in root.xpath('//span[@class and not(@style)]'): css = class_map.get(span.get('class', None), {}) if len(css) == 1: if css == {'font-style':'italic'}: span.tag = 'i' del span.attrib['class'] elif css == {'font-weight':'bold'}: span.tag = 'b' del span.attrib['class'] # Get rid of <span>s that have no styling for span in root.xpath('//span[not(@class) and not(@id) and not(@style)]'): lift(span) if detect_cover: # Check if the first image in the document is possibly a cover img = root.xpath('//img[@src][1]') if img: img = img[0] path = os.path.join(dest_dir, img.get('src')) if os.path.exists(path) and before_count(root, img, limit=10) < 5: from calibre.utils.magick.draw import identify try: width, height, fmt = identify(path) except: width, height, fmt = 0, 0, None try: is_cover = 0.8 <= height/width <= 1.8 and height*width >= 160000 except ZeroDivisionError: is_cover = False if is_cover: log.debug('Detected an image that looks like a cover') img.getparent().remove(img) return path
def create_epub_cover(container, cover_path): from calibre.ebooks.conversion.config import load_defaults from calibre.ebooks.oeb.transforms.cover import CoverManager ext = cover_path.rpartition(".")[-1].lower() raster_cover_item = container.generate_item("cover." + ext, id_prefix="cover") raster_cover = container.href_to_name(raster_cover_item.get("href"), container.opf_name) with open(cover_path, "rb") as src, container.open(raster_cover, "wb") as dest: shutil.copyfileobj(src, dest) opts = load_defaults("epub_output") keep_aspect = opts.get("preserve_cover_aspect_ratio", False) no_svg = opts.get("no_svg_cover", False) if no_svg: style = 'style="height: 100%%"' templ = CoverManager.NONSVG_TEMPLATE.replace("__style__", style) else: width, height = 600, 800 try: width, height = identify(cover_path)[:2] except: container.log.exception("Failed to get width and height of cover") ar = "xMidYMid meet" if keep_aspect else "none" templ = CoverManager.SVG_TEMPLATE.replace("__ar__", ar) templ = templ.replace("__viewbox__", "0 0 %d %d" % (width, height)) templ = templ.replace("__width__", str(width)) templ = templ.replace("__height__", str(height)) titlepage_item = container.generate_item("titlepage.xhtml", id_prefix="titlepage") titlepage = container.href_to_name(titlepage_item.get("href"), container.opf_name) raw = templ % container.name_to_href(raster_cover).encode("utf-8") with container.open(titlepage, "wb") as f: f.write(raw) # We have to make sure the raster cover item has id="cover" for the moron # that wrote the Nook firmware if raster_cover_item.get("id") != "cover": from calibre.ebooks.oeb.base import uuid_id newid = uuid_id() for item in container.opf_xpath('//*[@id="cover"]'): item.set("id", newid) for item in container.opf_xpath('//*[@idref="cover"]'): item.set("idref", newid) raster_cover_item.set("id", "cover") spine = container.opf_xpath("//opf:spine")[0] ref = spine.makeelement(OPF("itemref"), idref=titlepage_item.get("id")) container.insert_into_xml(spine, ref, index=0) guide = container.opf_get_or_create("guide") container.insert_into_xml( guide, guide.makeelement( OPF("reference"), type="cover", title=_("Cover"), href=container.name_to_href(titlepage, base=container.opf_name), ), ) metadata = container.opf_get_or_create("metadata") meta = metadata.makeelement(OPF("meta"), name="cover") meta.set("content", raster_cover_item.get("id")) container.insert_into_xml(metadata, meta) return raster_cover, titlepage
def cleanup_markup(log, root, styles, dest_dir, detect_cover, XPath): # Move <hr>s outside paragraphs, if possible. pancestor = XPath('|'.join('ancestor::%s[1]' % x for x in ('p', 'h1', 'h2', 'h3', 'h4', 'h5', 'h6'))) for hr in root.xpath('//span/hr'): p = pancestor(hr) if p: p = p[0] descendants = tuple(p.iterdescendants()) if descendants[-1] is hr: parent = p.getparent() idx = parent.index(p) parent.insert(idx + 1, hr) hr.tail = '\n\t' # Merge consecutive spans that have the same styling current_run = [] for span in root.xpath('//span[not(@style or @lang)]'): if not current_run: current_run.append(span) else: last = current_run[-1] if mergeable(last, span): current_run.append(span) else: if len(current_run) > 1: merge_run(current_run) current_run = [span] # Remove unnecessary span tags that are the only child of a parent block # element class_map = dict(styles.classes.itervalues()) parents = ('p', 'div') + tuple('h%d' % i for i in xrange(1, 7)) for parent in root.xpath('//*[(%s) and count(span)=1]' % ' or '.join('name()="%s"' % t for t in parents)): if len(parent) == 1 and not parent.text and not parent[ 0].tail and not parent[0].get('id', None): # We have a block whose contents are entirely enclosed in a <span> span = parent[0] span_class = span.get('class', None) span_css = class_map.get(span_class, {}) if liftable(span_css): pclass = parent.get('class', None) if span_class: pclass = (pclass + ' ' + span_class) if pclass else span_class parent.set('class', pclass) parent.text = span.text parent.remove(span) if span.get('lang'): parent.set('lang', span.get('lang')) for child in span: parent.append(child) # Make spans whose only styling is bold or italic into <b> and <i> tags for span in root.xpath('//span[@class and not(@style)]'): css = class_map.get(span.get('class', None), {}) if len(css) == 1: if css == {'font-style': 'italic'}: span.tag = 'i' del span.attrib['class'] elif css == {'font-weight': 'bold'}: span.tag = 'b' del span.attrib['class'] # Get rid of <span>s that have no styling for span in root.xpath('//span[not(@class or @id or @style or @lang)]'): lift(span) # Convert <p><br style="page-break-after:always"> </p> style page breaks # into something the viewer will render as a page break for p in root.xpath('//p[br[@style="page-break-after:always"]]'): if len(p) == 1 and (not p[0].tail or not p[0].tail.strip()): p.remove(p[0]) prefix = p.get('style', '') if prefix: prefix += '; ' p.set('style', prefix + 'page-break-after:always') p.text = NBSP if not p.text else p.text if detect_cover: # Check if the first image in the document is possibly a cover img = root.xpath('//img[@src][1]') if img: img = img[0] path = os.path.join(dest_dir, img.get('src')) if os.path.exists(path) and before_count(root, img, limit=10) < 5: from calibre.utils.magick.draw import identify try: width, height, fmt = identify(path) except: width, height, fmt = 0, 0, None # noqa del fmt try: is_cover = 0.8 <= height / width <= 1.8 and height * width >= 160000 except ZeroDivisionError: is_cover = False if is_cover: log.debug('Detected an image that looks like a cover') img.getparent().remove(img) return path
def create_epub_cover(container, cover_path, existing_image, options=None): from calibre.ebooks.conversion.config import load_defaults from calibre.ebooks.oeb.transforms.cover import CoverManager try: ext = cover_path.rpartition('.')[-1].lower() except Exception: ext = 'jpeg' cname, tname = 'cover.' + ext, 'titlepage.xhtml' recommended_folders = get_recommended_folders(container, (cname, tname)) if existing_image: raster_cover = existing_image manifest_id = {v:k for k, v in container.manifest_id_map.iteritems()}[existing_image] raster_cover_item = container.opf_xpath('//opf:manifest/*[@id="%s"]' % manifest_id)[0] else: folder = recommended_folders[cname] if folder: cname = folder + '/' + cname raster_cover_item = container.generate_item(cname, id_prefix='cover') raster_cover = container.href_to_name(raster_cover_item.get('href'), container.opf_name) with container.open(raster_cover, 'wb') as dest: if callable(cover_path): cover_path('write_image', dest) else: with lopen(cover_path, 'rb') as src: shutil.copyfileobj(src, dest) if options is None: opts = load_defaults('epub_output') keep_aspect = opts.get('preserve_cover_aspect_ratio', False) no_svg = opts.get('no_svg_cover', False) else: keep_aspect = options.get('keep_aspect', False) no_svg = options.get('no_svg', False) if no_svg: style = 'style="height: 100%%"' templ = CoverManager.NONSVG_TEMPLATE.replace('__style__', style) else: if callable(cover_path): templ = CoverManager.SVG_TEMPLATE else: width, height = 600, 800 try: if existing_image: width, height = identify_data(container.raw_data(existing_image, decode=False))[:2] else: width, height = identify(cover_path)[:2] except: container.log.exception("Failed to get width and height of cover") ar = 'xMidYMid meet' if keep_aspect else 'none' templ = CoverManager.SVG_TEMPLATE.replace('__ar__', ar) templ = templ.replace('__viewbox__', '0 0 %d %d'%(width, height)) templ = templ.replace('__width__', str(width)) templ = templ.replace('__height__', str(height)) folder = recommended_folders[tname] if folder: tname = folder + '/' + tname titlepage_item = container.generate_item(tname, id_prefix='titlepage') titlepage = container.href_to_name(titlepage_item.get('href'), container.opf_name) raw = templ%container.name_to_href(raster_cover, titlepage).encode('utf-8') with container.open(titlepage, 'wb') as f: f.write(raw) # We have to make sure the raster cover item has id="cover" for the moron # that wrote the Nook firmware if raster_cover_item.get('id') != 'cover': from calibre.ebooks.oeb.base import uuid_id newid = uuid_id() for item in container.opf_xpath('//*[@id="cover"]'): item.set('id', newid) for item in container.opf_xpath('//*[@idref="cover"]'): item.set('idref', newid) raster_cover_item.set('id', 'cover') spine = container.opf_xpath('//opf:spine')[0] ref = spine.makeelement(OPF('itemref'), idref=titlepage_item.get('id')) container.insert_into_xml(spine, ref, index=0) guide = container.opf_get_or_create('guide') container.insert_into_xml(guide, guide.makeelement( OPF('reference'), type='cover', title=_('Cover'), href=container.name_to_href(titlepage, base=container.opf_name))) metadata = container.opf_get_or_create('metadata') meta = metadata.makeelement(OPF('meta'), name='cover') meta.set('content', raster_cover_item.get('id')) container.insert_into_xml(metadata, meta) return raster_cover, titlepage
def cleanup_markup(log, root, styles, dest_dir, detect_cover): # Move <hr>s outside paragraphs, if possible. pancestor = XPath("|".join("ancestor::%s[1]" % x for x in ("p", "h1", "h2", "h3", "h4", "h5", "h6"))) for hr in root.xpath("//span/hr"): p = pancestor(hr) if p: p = p[0] descendants = tuple(p.iterdescendants()) if descendants[-1] is hr: parent = p.getparent() idx = parent.index(p) parent.insert(idx + 1, hr) hr.tail = "\n\t" # Merge consecutive spans that have the same styling current_run = [] for span in root.xpath("//span[not(@style or @lang)]"): if not current_run: current_run.append(span) else: last = current_run[-1] if mergeable(last, span): current_run.append(span) else: if len(current_run) > 1: merge_run(current_run) current_run = [span] # Remove unnecessary span tags that are the only child of a parent block # element class_map = dict(styles.classes.itervalues()) parents = ("p", "div") + tuple("h%d" % i for i in xrange(1, 7)) for parent in root.xpath("//*[(%s) and count(span)=1]" % " or ".join('name()="%s"' % t for t in parents)): if len(parent) == 1 and not parent.text and not parent[0].tail and not parent[0].get("id", None): # We have a block whose contents are entirely enclosed in a <span> span = parent[0] span_class = span.get("class", None) span_css = class_map.get(span_class, {}) if liftable(span_css): pclass = parent.get("class", None) if span_class: pclass = (pclass + " " + span_class) if pclass else span_class parent.set("class", pclass) parent.text = span.text parent.remove(span) if span.get("lang"): parent.set("lang", span.get("lang")) for child in span: parent.append(child) # Make spans whose only styling is bold or italic into <b> and <i> tags for span in root.xpath("//span[@class and not(@style)]"): css = class_map.get(span.get("class", None), {}) if len(css) == 1: if css == {"font-style": "italic"}: span.tag = "i" del span.attrib["class"] elif css == {"font-weight": "bold"}: span.tag = "b" del span.attrib["class"] # Get rid of <span>s that have no styling for span in root.xpath("//span[not(@class or @id or @style or @lang)]"): lift(span) # Convert <p><br style="page-break-after:always"> </p> style page breaks # into something the viewer will render as a page break for p in root.xpath('//p[br[@style="page-break-after:always"]]'): if len(p) == 1 and (not p[0].tail or not p[0].tail.strip()): p.remove(p[0]) prefix = p.get("style", "") if prefix: prefix += "; " p.set("style", prefix + "page-break-after:always") p.text = NBSP if not p.text else p.text if detect_cover: # Check if the first image in the document is possibly a cover img = root.xpath("//img[@src][1]") if img: img = img[0] path = os.path.join(dest_dir, img.get("src")) if os.path.exists(path) and before_count(root, img, limit=10) < 5: from calibre.utils.magick.draw import identify try: width, height, fmt = identify(path) except: width, height, fmt = 0, 0, None del fmt try: is_cover = 0.8 <= height / width <= 1.8 and height * width >= 160000 except ZeroDivisionError: is_cover = False if is_cover: log.debug("Detected an image that looks like a cover") img.getparent().remove(img) return path