def _metadata_from_opf(self, opf): from calibre.ebooks.metadata.opf2 import OPF from calibre.ebooks.oeb.transforms.metadata import meta_info_to_oeb_metadata stream = cStringIO.StringIO(etree.tostring(opf, xml_declaration=True, encoding='utf-8')) o = OPF(stream) pwm = o.primary_writing_mode if pwm: self.oeb.metadata.primary_writing_mode = pwm mi = o.to_book_metadata() if not mi.language: mi.language = get_lang().replace('_', '-') self.oeb.metadata.add('language', mi.language) if not mi.book_producer: mi.book_producer = '%(a)s (%(v)s) [http://%(a)s-ebook.com]'%\ dict(a=__appname__, v=__version__) meta_info_to_oeb_metadata(mi, self.oeb.metadata, self.logger) m = self.oeb.metadata m.add('identifier', str(uuid.uuid4()), id='uuid_id', scheme='uuid') self.oeb.uid = self.oeb.metadata.identifier[-1] if not m.title: m.add('title', self.oeb.translate(__('Unknown'))) has_aut = False for x in m.creator: if getattr(x, 'role', '').lower() in ('', 'aut'): has_aut = True break if not has_aut: m.add('creator', self.oeb.translate(__('Unknown')), role='aut')
def mark_as_titlepage(container, name, move_to_start=True): if move_to_start: for item, q, linear in container.spine_iter: if name == q: break if not linear: item.set('linear', 'yes') if item.getparent().index(item) > 0: container.insert_into_xml(item.getparent(), item, 0) for ref in container.opf_xpath('//opf:guide/opf:reference[@type="cover"]'): ref.getparent().remove(ref) for guide in container.opf_xpath('//opf:guide'): container.insert_into_xml( guide, guide.makeelement(OPF('reference'), type='cover', href=container.name_to_href( name, container.opf_name))) container.dirty(container.opf_name)
def convert_text(self, oeb_book): from calibre.ebooks.metadata.opf2 import OPF from calibre.ebooks.pdf.render.from_html import PDFWriter self.log.debug('Serializing oeb input to disk for processing...') self.get_cover_data() self.process_fonts() with TemporaryDirectory('_pdf_out') as oeb_dir: from calibre.customize.ui import plugin_for_output_format oeb_output = plugin_for_output_format('oeb') oeb_output.convert(oeb_book, oeb_dir, self.input_plugin, self.opts, self.log) opfpath = glob.glob(os.path.join(oeb_dir, '*.opf'))[0] opf = OPF(opfpath, os.path.dirname(opfpath)) self.write(PDFWriter, [s.path for s in opf.spine], getattr(opf, 'toc', None))
def set_azw3_cover(container, cover_path, report): name = None found = True for gi in container.opf_xpath( '//opf:guide/opf:reference[@href and contains(@type, "cover")]'): href = gi.get('href') name = container.href_to_name(href, container.opf_name) container.remove_from_xml(gi) if name is None or not container.has_name(name): item = container.generate_item(name='cover.jpeg', id_prefix='cover') name = container.href_to_name(item.get('href'), container.opf_name) found = False href = container.name_to_href(name, container.opf_name) guide = container.opf_xpath('//opf:guide')[0] container.insert_into_xml( guide, guide.makeelement(OPF('reference'), href=href, type='cover')) with open(cover_path, 'rb') as src, container.open(name, 'wb') as dest: shutil.copyfileobj(src, dest) container.dirty(container.opf_name) report('Cover updated' if found else 'Cover inserted')
def writer(root, prefixes, refines, ival=None): uid = root.get('unique-identifier') package_identifier = None for ident in XPath('./opf:metadata/dc:identifier')(root): is_package_id = uid is not None and uid == ident.get('id') if is_package_id: package_identifier = ident val = (ident.text or '').strip() if (val.startswith(name + ':') or ident.get(OPF('scheme')) == name) and not is_package_id: remove_element(ident, refines) metadata = XPath('./opf:metadata')(root)[0] if ival: ident = metadata.makeelement(DC('identifier')) ident.text = '%s:%s' % (name, ival) if package_identifier is None: metadata.append(ident) else: p = package_identifier.getparent() p.insert(p.index(package_identifier), ident)
def generate_item(self, name, id_prefix=None, media_type=None): '''Add an item to the manifest with href derived from the given name. Ensures uniqueness of href and id automatically. Returns generated item.''' id_prefix = id_prefix or 'id' media_type = media_type or guess_type(name) href = self.name_to_href(name, self.opf_name) base, ext = href.rpartition('.')[0::2] all_ids = {x.get('id') for x in self.opf_xpath('//*[@id]')} c = 0 item_id = id_prefix while item_id in all_ids: c += 1 item_id = id_prefix + '%d'%c all_names = {x.get('href') for x in self.opf_xpath( '//opf:manifest/opf:item[@href]')} def exists(h): return self.exists(self.href_to_name(h, self.opf_name)) c = 0 while href in all_names or exists(href): c += 1 href = '%s_%d.%s'%(base, c, ext) manifest = self.opf_xpath('//opf:manifest')[0] item = manifest.makeelement(OPF('item'), id=item_id, href=href) item.set('media-type', media_type) self.insert_into_xml(manifest, item) self.dirty(self.opf_name) name = self.href_to_name(href, self.opf_name) self.name_path_map[name] = path = self.name_to_abspath(name) self.mime_map[name] = media_type # Ensure that the file corresponding to the newly created item exists # otherwise cloned containers will fail when they try to get the number # of links to the file base = os.path.dirname(path) if not os.path.exists(base): os.makedirs(base) open(path, 'wb').close() return item
def convert_text(self, oeb_book): from calibre.ebooks.metadata.opf2 import OPF from calibre.ebooks.pdf.render.from_html import PDFWriter self.log.debug('Serializing oeb input to disk for processing...') self.get_cover_data() self.process_fonts() if self.opts.pdf_use_document_margins and self.stored_page_margins: import json for href, margins in iteritems(self.stored_page_margins): item = oeb_book.manifest.hrefs.get(href) if item is not None: root = item.data if hasattr(root, 'xpath') and margins: root.set('data-calibre-pdf-output-page-margins', json.dumps(margins)) # Remove javascript for item in self.oeb.spine: root = item.data if hasattr(root, 'xpath'): for script in root.xpath('//*[local-name()="script"]'): script.text = None script.attrib.clear() for elem in root.iter('*'): for attr in tuple(elem.attrib): if attr.startswith('on'): elem.set(attr, '') with TemporaryDirectory('_pdf_out') as oeb_dir: from calibre.customize.ui import plugin_for_output_format oeb_output = plugin_for_output_format('oeb') oeb_output.convert(oeb_book, oeb_dir, self.input_plugin, self.opts, self.log) opfpath = glob.glob(os.path.join(oeb_dir, '*.opf'))[0] opf = OPF(opfpath, os.path.dirname(opfpath)) self.write(PDFWriter, [s.path for s in opf.spine], getattr(opf, 'toc', None))
def mark_as_titlepage(container, name, move_to_start=True): ''' Mark the specified HTML file as the titlepage of the EPUB. :param move_to_start: If True the HTML file is moved to the start of the spine ''' if move_to_start: for item, q, linear in container.spine_iter: if name == q: break if not linear: item.set('linear', 'yes') if item.getparent().index(item) > 0: container.insert_into_xml(item.getparent(), item, 0) for ref in container.opf_xpath('//opf:guide/opf:reference[@type="cover"]'): ref.getparent().remove(ref) for guide in get_guides(container): container.insert_into_xml(guide, guide.makeelement( OPF('reference'), type='cover', href=container.name_to_href(name, container.opf_name))) container.dirty(container.opf_name)
def fix_opf(self, container): spine_names = {n for n, l in container.spine_names} spine = container.opf_xpath('//opf:spine')[0] rmap = {v: k for k, v in container.manifest_id_map.iteritems()} # Add unreferenced text files to the spine for name, mt in container.mime_map.iteritems(): if mt in OEB_DOCS and name not in spine_names: spine_names.add(name) container.insert_into_xml( spine, spine.makeelement(OPF('itemref'), idref=rmap[name])) # Remove duplicate entries from spine seen = set() for item, name, linear in container.spine_iter: if name in seen: container.remove_from_xml(item) seen.add(name) # Ensure that the meta cover tag is correct cover_id = rmap['_static/' + self.config.epub_cover[0]] for meta in container.opf_xpath('//opf:meta[@name="cover"]'): meta.set('content', cover_id) # Add description metadata metadata = container.opf_xpath('//opf:metadata')[0] container.insert_into_xml(metadata, metadata.makeelement(DC('description'))) metadata[-1].text = 'Comprehensive documentation for calibre' # Remove search.html since it is useless in EPUB container.remove_item('search.html') # Remove unreferenced files for error in check_links(container): if error.__class__ is UnreferencedResource: container.remove_item(error.name) # Pretty print the OPF pretty_opf(container.parsed(container.opf_name)) container.dirty(container.opf_name)
def fix_opf(self, container): spine_names = {n for n, l in container.spine_names} spine = container.opf_xpath('//opf:spine')[0] rmap = {v: k for k, v in iteritems(container.manifest_id_map)} # Add unreferenced text files to the spine for name, mt in iteritems(container.mime_map): if mt in OEB_DOCS and name not in spine_names: spine_names.add(name) container.insert_into_xml( spine, spine.makeelement(OPF('itemref'), idref=rmap[name])) # Remove duplicate entries from spine seen = set() for item, name, linear in container.spine_iter: if name in seen: container.remove_from_xml(item) seen.add(name) # Remove the <guide> which is not needed in EPUB 3 for guide in container.opf_xpath('//*[local-name()="guide"]'): guide.getparent().remove(guide) # Ensure that the cover-image property is set cover_id = rmap['_static/' + self.config.epub_cover[0]] for item in container.opf_xpath( '//opf:item[@id="{}"]'.format(cover_id)): item.set('properties', 'cover-image') # Remove any <meta cover> tag as it is not needed in epub 3 for meta in container.opf_xpath('//opf:meta[@name="cover"]'): meta.getparent().remove(meta) # Remove unreferenced files for error in check_links(container): if error.__class__ is UnreferencedResource: container.remove_item(error.name) # Pretty print the OPF pretty_opf(container.parsed(container.opf_name)) container.dirty(container.opf_name)
def add_or_replace_jacket(container): name = find_existing_jacket(container) found = True if name is None: jacket_item = container.generate_item('jacket.xhtml', id_prefix='jacket') name = container.href_to_name(jacket_item.get('href'), container.opf_name) found = False replace_jacket(container, name) if not found: # Insert new jacket into spine index = 0 sp = container.abspath_to_name(container.spine_items.next()) if sp == find_cover_page(container): index = 1 itemref = container.opf.makeelement(OPF('itemref'), idref=jacket_item.get('id')) container.insert_into_xml(container.opf_xpath('//opf:spine')[0], itemref, index=index) return found
def convert_metadata(self, oeb): E = ElementMaker( namespace=namespaces['cp'], nsmap={x: namespaces[x] for x in 'cp dc dcterms xsi'.split()}) cp = E.coreProperties(E.revision("1"), E.lastModifiedBy('calibre')) ts = utcnow().isoformat(str('T')).rpartition('.')[0] + 'Z' for x in 'created modified'.split(): x = cp.makeelement( '{%s}%s' % (namespaces['dcterms'], x), **{'{%s}type' % namespaces['xsi']: 'dcterms:W3CDTF'}) x.text = ts cp.append(x) package = etree.Element(OPF('package'), attrib={'version': '2.0'}, nsmap={None: OPF2_NS}) oeb.metadata.to_opf2(package) self.mi = ReadOPF(BytesIO(xml2str(package)), populate_spine=False, try_to_guess_cover=False).to_book_metadata() update_doc_props(cp, self.mi) return xml2str(cp)
def parse_identifier(ident, val, refines): idid = ident.get('id') refines = refines[idid] scheme = None lval = val.lower() def finalize(scheme, val): if not scheme or not val: return None, None scheme = scheme.lower() if scheme in ('http', 'https'): return None, None if scheme.startswith('isbn'): scheme = 'isbn' if scheme == 'isbn': val = val.split(':')[-1] val = check_isbn(val) if val is None: return None, None return scheme, val # Try the OPF 2 style opf:scheme attribute, which will be present, for # example, in EPUB 3 files that have had their metadata set by an # application that only understands EPUB 2. scheme = ident.get(OPF('scheme')) if scheme and not lval.startswith('urn:'): return finalize(scheme, val) # Technically, we should be looking for refines that define the scheme, but # the IDioticPF created such a bad spec that they got their own # examples wrong, so I cannot be bothered doing this. # http://www.idpf.org/epub/301/spec/epub-publications-errata/ # Parse the value for the scheme if lval.startswith('urn:'): val = val[4:] prefix, rest = val.partition(':')[::2] return finalize(prefix, rest)
def create_cover_page(self, input_fmt): templ = ''' <html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en"> <head><style> html, body, img { height: 100vh; display: block; margin: 0; padding: 0; border-width: 0; } img { width: auto; height: auto; margin-left: auto; margin-right: auto; max-width: 100vw; max-height: 100vh } </style></head><body><img src="%s"/></body></html> ''' if input_fmt == 'epub': def cover_path(action, data): if action == 'write_image': data.write(BLANK_JPEG) return set_epub_cover(self, cover_path, (lambda *a: None), options={'template': templ}) raster_cover_name = find_cover_image(self, strict=True) if raster_cover_name is None: item = self.generate_item(name='cover.jpeg', id_prefix='cover') raster_cover_name = self.href_to_name(item.get('href'), self.opf_name) with self.open(raster_cover_name, 'wb') as dest: dest.write(BLANK_JPEG) item = self.generate_item(name='titlepage.html', id_prefix='titlepage') titlepage_name = self.href_to_name(item.get('href'), self.opf_name) raw = templ % prepare_string_for_xml( self.name_to_href(raster_cover_name, titlepage_name), True) with self.open(titlepage_name, 'wb') as f: f.write(raw.encode('utf-8')) spine = self.opf_xpath('//opf:spine')[0] ref = spine.makeelement(OPF('itemref'), idref=item.get('id')) self.insert_into_xml(spine, ref, index=0) self.dirty(self.opf_name) return raster_cover_name, titlepage_name
def create_cover_page(self, input_fmt): if input_fmt == 'epub': def cover_path(action, data): if action == 'write_image': data.write(BLANK_JPEG) return set_epub_cover(self, cover_path, (lambda *a: None)) from calibre.ebooks.oeb.transforms.cover import CoverManager raster_cover_name = find_cover_image(self, strict=True) if raster_cover_name is None: item = self.generate_item(name='cover.jpeg', id_prefix='cover') raster_cover_name = self.href_to_name(item.get('href'), self.opf_name) with self.open(raster_cover_name, 'wb') as dest: dest.write(BLANK_JPEG) item = self.generate_item(name='titlepage.html', id_prefix='titlepage') titlepage_name = self.href_to_name(item.get('href'), self.opf_name) templ = CoverManager.SVG_TEMPLATE raw = templ % self.name_to_href(raster_cover_name, titlepage_name) with self.open(titlepage_name, 'wb') as f: f.write(raw.encode('utf-8')) spine = self.opf_xpath('//opf:spine')[0] ref = spine.makeelement(OPF('itemref'), idref=item.get('id')) self.insert_into_xml(spine, ref, index=0) self.dirty(self.opf_name) return raster_cover_name, titlepage_name
def convert(self, oeb, output_path, input_plugin, opts, log): self.log, self.opts, self.oeb = log, opts, oeb if self.opts.epub_inline_toc: from calibre.ebooks.mobi.writer8.toc import TOCAdder opts.mobi_toc_at_start = not opts.epub_toc_at_end opts.mobi_passthrough = False opts.no_inline_toc = False TOCAdder(oeb, opts, replace_previous_inline_toc=True, ignore_existing_toc=True) if self.opts.epub_flatten: from calibre.ebooks.oeb.transforms.filenames import FlatFilenames FlatFilenames()(oeb, opts) else: from calibre.ebooks.oeb.transforms.filenames import UniqueFilenames UniqueFilenames()(oeb, opts) self.workaround_ade_quirks() self.workaround_webkit_quirks() self.upshift_markup() from calibre.ebooks.oeb.transforms.rescale import RescaleImages RescaleImages(check_colorspaces=True)(oeb, opts) from calibre.ebooks.oeb.transforms.split import Split split = Split(not self.opts.dont_split_on_page_breaks, max_flow_size=self.opts.flow_size * 1024) split(self.oeb, self.opts) from calibre.ebooks.oeb.transforms.cover import CoverManager cm = CoverManager( no_default_cover=self.opts.no_default_epub_cover, no_svg_cover=self.opts.no_svg_cover, preserve_aspect_ratio=self.opts.preserve_cover_aspect_ratio) cm(self.oeb, self.opts, self.log) self.workaround_sony_quirks() if self.oeb.toc.count() == 0: self.log.warn('This EPUB file has no Table of Contents. ' 'Creating a default TOC') first = next(iter(self.oeb.spine)) self.oeb.toc.add(_('Start'), first.href) from calibre.ebooks.oeb.base import OPF identifiers = oeb.metadata['identifier'] uuid = None for x in identifiers: if x.get(OPF('scheme'), None).lower() == 'uuid' or str(x).startswith('urn:uuid:'): uuid = str(x).split(':')[-1] break encrypted_fonts = getattr(input_plugin, 'encrypted_fonts', []) if uuid is None: self.log.warn('No UUID identifier found') from uuid import uuid4 uuid = str(uuid4()) oeb.metadata.add('identifier', uuid, scheme='uuid', id=uuid) if encrypted_fonts and not uuid.startswith('urn:uuid:'): # Apparently ADE requires this value to start with urn:uuid: # for some absurd reason, or it will throw a hissy fit and refuse # to use the obfuscated fonts. for x in identifiers: if str(x) == uuid: x.content = 'urn:uuid:' + uuid with TemporaryDirectory('_epub_output') as tdir: from calibre.customize.ui import plugin_for_output_format metadata_xml = None extra_entries = [] if self.is_periodical: if self.opts.output_profile.epub_periodical_format == 'sony': from calibre.ebooks.epub.periodical import sony_metadata metadata_xml, atom_xml = sony_metadata(oeb) extra_entries = [('atom.xml', 'application/atom+xml', atom_xml)] oeb_output = plugin_for_output_format('oeb') oeb_output.convert(oeb, tdir, input_plugin, opts, log) opf = [x for x in os.listdir(tdir) if x.endswith('.opf')][0] self.condense_ncx([ os.path.join(tdir, x) for x in os.listdir(tdir) if x.endswith('.ncx') ][0]) encryption = None if encrypted_fonts: encryption = self.encrypt_fonts(encrypted_fonts, tdir, uuid) from calibre.ebooks.epub import initialize_container with initialize_container(output_path, os.path.basename(opf), extra_entries=extra_entries) as epub: epub.add_dir(tdir) if encryption is not None: epub.writestr('META-INF/encryption.xml', encryption) if metadata_xml is not None: epub.writestr('META-INF/metadata.xml', metadata_xml.encode('utf-8')) if opts.extract_to is not None: from calibre.utils.zipfile import ZipFile if os.path.exists(opts.extract_to): if os.path.isdir(opts.extract_to): shutil.rmtree(opts.extract_to) else: os.remove(opts.extract_to) os.mkdir(opts.extract_to) with ZipFile(output_path) as zf: zf.extractall(path=opts.extract_to) self.log.info('EPUB extracted to', opts.extract_to)
def split(container, name, loc_or_xpath, before=True): ''' Split the file specified by name at the position specified by loc_or_xpath. ''' root = container.parsed(name) if isinstance(loc_or_xpath, type('')): split_point = root.xpath(loc_or_xpath)[0] else: split_point = node_from_loc(root, loc_or_xpath) if in_table(split_point): raise AbortError('Cannot split inside tables') if split_point.tag.endswith('}body'): raise AbortError('Cannot split on the <body> tag') tree1, tree2 = do_split(split_point, container.log, before=before) root1, root2 = tree1.getroot(), tree2.getroot() anchors_in_top = frozenset(root1.xpath('//*/@id')) | frozenset( root1.xpath('//*/@name')) | {''} anchors_in_bottom = frozenset(root2.xpath('//*/@id')) | frozenset( root2.xpath('//*/@name')) manifest_item = container.generate_item( name, media_type=container.mime_map[name]) bottom_name = container.href_to_name(manifest_item.get('href'), container.opf_name) # Fix links in the split trees for r, rname, anchors in [(root1, bottom_name, anchors_in_bottom), (root2, name, anchors_in_top)]: for a in r.xpath('//*[@href]'): url = a.get('href') if url.startswith('#'): fname = name else: fname = container.href_to_name(url, name) if fname == name: purl = urlparse(url) if purl.fragment in anchors: a.set( 'href', '%s#%s' % (container.name_to_href(rname, name), purl.fragment)) # Fix all links in the container that point to anchors in the bottom tree for fname, media_type in container.mime_map.iteritems(): if fname not in {name, bottom_name}: repl = SplitLinkReplacer(fname, anchors_in_bottom, name, bottom_name, container) container.replace_links(fname, repl) container.replace(name, root1) container.replace(bottom_name, root2) spine = container.opf_xpath('//opf:spine')[0] for spine_item, spine_name, linear in container.spine_iter: if spine_name == name: break index = spine.index(spine_item) + 1 si = spine.makeelement(OPF('itemref'), idref=manifest_item.get('id')) if not linear: si.set('linear', 'no') container.insert_into_xml(spine, si, index=index) container.dirty(container.opf_name) return bottom_name
def create_cover_page(self, input_fmt): templ = ''' <html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en"> <head><style> html, body, img { height: 100vh; display: block; margin: 0; padding: 0; border-width: 0; } img { width: 100%%; height: 100%%; object-fit: contain; margin-left: auto; margin-right: auto; max-width: 100vw; max-height: 100vh; top: 50vh; transform: translateY(-50%%); position: relative; } body.cover-fill img { object-fit: fill; } </style></head><body><img src="%s"/></body></html> ''' def generic_cover(): if self.book_metadata is not None: from calibre.ebooks.covers import create_cover mi = self.book_metadata return create_cover(mi.title, mi.authors, mi.series, mi.series_index) return BLANK_JPEG if input_fmt == 'epub': def image_callback(cover_image, wrapped_image): if cover_image: image_callback.cover_data = self.raw_data(cover_image, decode=False) if wrapped_image and not getattr(image_callback, 'cover_data', None): image_callback.cover_data = self.raw_data(wrapped_image, decode=False) def cover_path(action, data): if action == 'write_image': cdata = getattr(image_callback, 'cover_data', None) or generic_cover() data.write(cdata) raster_cover_name, titlepage_name = set_epub_cover( self, cover_path, (lambda *a: None), options={'template': templ}, image_callback=image_callback) else: raster_cover_name = find_cover_image(self, strict=True) if raster_cover_name is None: item = self.generate_item(name='cover.jpeg', id_prefix='cover') raster_cover_name = self.href_to_name(item.get('href'), self.opf_name) with self.open(raster_cover_name, 'wb') as dest: dest.write(generic_cover()) item = self.generate_item(name='titlepage.html', id_prefix='titlepage') titlepage_name = self.href_to_name(item.get('href'), self.opf_name) raw = templ % prepare_string_for_xml( self.name_to_href(raster_cover_name, titlepage_name), True) with self.open(titlepage_name, 'wb') as f: f.write(raw.encode('utf-8')) spine = self.opf_xpath('//opf:spine')[0] ref = spine.makeelement(OPF('itemref'), idref=item.get('id')) self.insert_into_xml(spine, ref, index=0) self.dirty(self.opf_name) return raster_cover_name, titlepage_name
def rationalize_cover2(self, opf, log): ''' Ensure that the cover information in the guide is correct. That means, at most one entry with type="cover" that points to a raster cover and at most one entry with type="titlepage" that points to an HTML titlepage. ''' from calibre.ebooks.oeb.base import OPF removed = None from lxml import etree guide_cover, guide_elem = None, None for guide_elem in opf.iterguide(): if guide_elem.get('type', '').lower() == 'cover': guide_cover = guide_elem.get('href', '').partition('#')[0] break if not guide_cover: raster_cover = opf.raster_cover if raster_cover: if guide_elem is None: g = opf.root.makeelement(OPF('guide')) opf.root.append(g) else: g = guide_elem.getparent() guide_cover = raster_cover guide_elem = g.makeelement(OPF('reference'), attrib={ 'href': raster_cover, 'type': 'cover' }) g.append(guide_elem) return spine = list(opf.iterspine()) if not spine: return # Check if the cover specified in the guide is also # the first element in spine idref = spine[0].get('idref', '') manifest = list(opf.itermanifest()) if not manifest: return elem = [x for x in manifest if x.get('id', '') == idref] if not elem or elem[0].get('href', None) != guide_cover: return log('Found HTML cover', guide_cover) # Remove from spine as covers must be treated # specially if not self.for_viewer: if len(spine) == 1: log.warn( 'There is only a single spine item and it is marked as the cover. Removing cover marking.' ) for guide_elem in tuple(opf.iterguide()): if guide_elem.get('type', '').lower() == 'cover': guide_elem.getparent().remove(guide_elem) return else: spine[0].getparent().remove(spine[0]) removed = guide_cover else: # Ensure the cover is displayed as the first item in the book, some # epub files have it set with linear='no' which causes the cover to # display in the end spine[0].attrib.pop('linear', None) opf.spine[0].is_linear = True # Ensure that the guide has a cover entry pointing to a raster cover # and a titlepage entry pointing to the html titlepage. The titlepage # entry will be used by the epub output plugin, the raster cover entry # by other output plugins. # Search for a raster cover identified in the OPF raster_cover = opf.raster_cover # Set the cover guide entry if raster_cover is not None: guide_elem.set('href', raster_cover) else: # Render the titlepage to create a raster cover from calibre.ebooks import render_html_svg_workaround guide_elem.set('href', 'calibre_raster_cover.jpg') t = etree.SubElement(elem[0].getparent(), OPF('item'), href=guide_elem.get('href'), id='calibre_raster_cover') t.set('media-type', 'image/jpeg') if os.path.exists(guide_cover): renderer = render_html_svg_workaround(guide_cover, log) if renderer is not None: open('calibre_raster_cover.jpg', 'wb').write(renderer) # Set the titlepage guide entry self.set_guide_type(opf, 'titlepage', guide_cover, 'Title Page') return removed
def convert(self, stream, options, file_ext, log, accelerators): from calibre.utils.zipfile import ZipFile from calibre import walk from calibre.ebooks import DRMError from calibre.ebooks.metadata.opf2 import OPF try: zf = ZipFile(stream) zf.extractall(os.getcwdu()) except: log.exception('EPUB appears to be invalid ZIP file, trying a' ' more forgiving ZIP parser') from calibre.utils.localunzip import extractall stream.seek(0) extractall(stream) encfile = os.path.abspath(os.path.join('META-INF', 'encryption.xml')) opf = self.find_opf() if opf is None: for f in walk(u'.'): if f.lower().endswith('.opf') and '__MACOSX' not in f and \ not os.path.basename(f).startswith('.'): opf = os.path.abspath(f) break path = getattr(stream, 'name', 'stream') if opf is None: raise ValueError( '%s is not a valid EPUB file (could not find opf)' % path) opf = os.path.relpath(opf, os.getcwdu()) parts = os.path.split(opf) opf = OPF(opf, os.path.dirname(os.path.abspath(opf))) self._encrypted_font_uris = [] if os.path.exists(encfile): if not self.process_encryption(encfile, opf, log): raise DRMError(os.path.basename(path)) self.encrypted_fonts = self._encrypted_font_uris if len(parts) > 1 and parts[0]: delta = '/'.join(parts[:-1]) + '/' def normpath(x): return posixpath.normpath(delta + elem.get('href')) for elem in opf.itermanifest(): elem.set('href', normpath(elem.get('href'))) for elem in opf.iterguide(): elem.set('href', normpath(elem.get('href'))) f = self.rationalize_cover3 if opf.package_version >= 3.0 else self.rationalize_cover2 self.removed_cover = f(opf, log) if self.removed_cover: self.removed_items_to_ignore = (self.removed_cover, ) epub3_nav = opf.epub3_nav if epub3_nav is not None: self.convert_epub3_nav(epub3_nav, opf, log, options) for x in opf.itermanifest(): if x.get('media-type', '') == 'application/x-dtbook+xml': raise ValueError( 'EPUB files with DTBook markup are not supported') not_for_spine = set() for y in opf.itermanifest(): id_ = y.get('id', None) if id_: mt = y.get('media-type', None) if mt in { 'application/vnd.adobe-page-template+xml', 'application/vnd.adobe.page-template+xml', 'application/adobe-page-template+xml', 'application/adobe.page-template+xml', 'application/text' }: not_for_spine.add(id_) ext = y.get('href', '').rpartition('.')[-1].lower() if mt == 'text/plain' and ext in {'otf', 'ttf'}: # some epub authoring software sets font mime types to # text/plain not_for_spine.add(id_) y.set('media-type', 'application/font') seen = set() for x in list(opf.iterspine()): ref = x.get('idref', None) if not ref or ref in not_for_spine or ref in seen: x.getparent().remove(x) continue seen.add(ref) if len(list(opf.iterspine())) == 0: raise ValueError('No valid entries in the spine of this EPUB') with lopen('content.opf', 'wb') as nopf: nopf.write(opf.render()) return os.path.abspath(u'content.opf')
def build_exth(metadata, prefer_author_sort=False, is_periodical=False, share_not_sync=True, cover_offset=None, thumbnail_offset=None, start_offset=None, mobi_doctype=2, num_of_resources=None, kf8_unknown_count=0, be_kindlegen2=False, kf8_header_index=None, opts=None): exth = BytesIO() nrecs = 0 for term in metadata: if term not in EXTH_CODES: continue code = EXTH_CODES[term] items = metadata[term] if term == 'creator': if prefer_author_sort: creators = [ authors_to_sort_string([unicode(c)]) for c in items ] else: creators = [unicode(c) for c in items] items = creators elif term == 'rights': try: rights = utf8_text(unicode(metadata.rights[0])) except: rights = b'Unknown' exth.write(pack(b'>II', EXTH_CODES['rights'], len(rights) + 8)) exth.write(rights) nrecs += 1 continue for item in items: data = unicode(item) if term != 'description': data = COLLAPSE_RE.sub(' ', data) if term == 'identifier': if data.lower().startswith('urn:isbn:'): data = data[9:] elif item.scheme.lower() == 'isbn': pass else: continue if term == 'language': d2 = usr_lang_as_iso639_1(data) if d2: data = d2 data = utf8_text(data) exth.write(pack(b'>II', code, len(data) + 8)) exth.write(data) nrecs += 1 # Write UUID as ASIN uuid = None from calibre.ebooks.oeb.base import OPF for x in metadata['identifier']: if (x.get(OPF('scheme'), None).lower() == 'uuid' or unicode(x).startswith('urn:uuid:')): uuid = unicode(x).split(':')[-1] break if uuid is None: from uuid import uuid4 uuid = str(uuid4()) if isinstance(uuid, unicode): uuid = uuid.encode('utf-8') if not share_not_sync: exth.write(pack(b'>II', 113, len(uuid) + 8)) exth.write(uuid) nrecs += 1 # Write UUID as SOURCE c_uuid = b'calibre:%s' % uuid exth.write(pack(b'>II', 112, len(c_uuid) + 8)) exth.write(c_uuid) nrecs += 1 # Write cdetype if not is_periodical: if not share_not_sync: exth.write(pack(b'>II', 501, 12)) exth.write(b'EBOK') nrecs += 1 else: ids = {0x101: b'NWPR', 0x103: b'MAGZ'}.get(mobi_doctype, None) if ids: exth.write(pack(b'>II', 501, 12)) exth.write(ids) nrecs += 1 # Add a publication date entry datestr = None if metadata['date']: datestr = str(metadata['date'][0]) elif metadata['timestamp']: datestr = str(metadata['timestamp'][0]) if not datestr: raise ValueError("missing date or timestamp") datestr = bytes(datestr) exth.write(pack(b'>II', EXTH_CODES['pubdate'], len(datestr) + 8)) exth.write(datestr) nrecs += 1 if is_periodical: exth.write(pack(b'>II', EXTH_CODES['lastupdatetime'], len(datestr) + 8)) exth.write(datestr) nrecs += 1 if be_kindlegen2: vals = {204: 201, 205: 2, 206: 5, 207: 0} elif is_periodical: # Pretend to be amazon's super secret periodical generator vals = {204: 201, 205: 2, 206: 0, 207: 101} else: # Pretend to be kindlegen 1.2 vals = {204: 201, 205: 1, 206: 2, 207: 33307} for code, val in vals.iteritems(): exth.write(pack(b'>III', code, 12, val)) nrecs += 1 if cover_offset is not None: exth.write(pack(b'>III', EXTH_CODES['coveroffset'], 12, cover_offset)) exth.write(pack(b'>III', EXTH_CODES['hasfakecover'], 12, 0)) nrecs += 2 if thumbnail_offset is not None: exth.write( pack(b'>III', EXTH_CODES['thumboffset'], 12, thumbnail_offset)) thumbnail_uri_str = bytes( 'kindle:embed:%s' % (to_base(thumbnail_offset, base=32, min_num_digits=4))) exth.write( pack(b'>II', EXTH_CODES['kf8_thumbnail_uri'], len(thumbnail_uri_str) + 8)) exth.write(thumbnail_uri_str) nrecs += 2 if start_offset is not None: try: len(start_offset) except TypeError: start_offset = [start_offset] for so in start_offset: if so is not None: exth.write(pack(b'>III', EXTH_CODES['startreading'], 12, so)) nrecs += 1 if kf8_header_index is not None: exth.write( pack(b'>III', EXTH_CODES['kf8_header_index'], 12, kf8_header_index)) nrecs += 1 if num_of_resources is not None: exth.write( pack(b'>III', EXTH_CODES['num_of_resources'], 12, num_of_resources)) nrecs += 1 if kf8_unknown_count is not None: exth.write( pack(b'>III', EXTH_CODES['kf8_unknown_count'], 12, kf8_unknown_count)) nrecs += 1 #Extra metadata for fullscrenn if opts and opts.book_mode == 'comic': #added for kindleear [insert0003 2017-09-03] exth.write(pack(b'>II', EXTH_CODES['RegionMagnification'], 13)) exth.write(b'false') exth.write(pack(b'>II', EXTH_CODES['book-type'], 13)) exth.write(b'comic') exth.write(pack(b'>II', EXTH_CODES['zero-gutter'], 12)) exth.write(b'true') exth.write(pack(b'>II', EXTH_CODES['zero-margin'], 12)) exth.write(b'true') exth.write(pack(b'>II', EXTH_CODES['primary-writing-mode'], 21)) exth.write(b'horizontal-lr') exth.write(pack(b'>II', EXTH_CODES['fixed-layout'], 12)) exth.write(b'true') exth.write(pack(b'>II', EXTH_CODES['orientation-lock'], 16)) exth.write(b'portrait') original_resolution = b'%dx%d' % opts.dest.comic_screen_size #sth like comic_screen_size = (1072, 1430) exth.write( pack(b'>II', EXTH_CODES['original-resolution'], len(original_resolution) + 8)) exth.write(original_resolution) nrecs += 8 exth = exth.getvalue() trail = len(exth) % 4 pad = b'\0' * (4 - trail) # Always pad w/ at least 1 byte exth = [b'EXTH', pack(b'>II', len(exth) + 12, nrecs), exth, pad] return b''.join(exth)
def create_rating(root, prefixes, val): ensure_prefix(root, prefixes, 'calibre', CALIBRE_PREFIX) m = XPath('./opf:metadata')(root)[0] d = m.makeelement(OPF('meta'), attrib={'property': 'calibre:rating'}) d.text = val m.append(d)
def fb2_header(self): from calibre.ebooks.oeb.base import OPF metadata = {} metadata['title'] = self.oeb_book.metadata.title[0].value metadata['appname'] = __appname__ metadata['version'] = __version__ metadata['date'] = '%i.%i.%i' % ( datetime.now().day, datetime.now().month, datetime.now().year) if self.oeb_book.metadata.language: lc = lang_as_iso639_1(self.oeb_book.metadata.language[0].value) if not lc: lc = self.oeb_book.metadata.language[0].value metadata['lang'] = lc or 'en' else: metadata['lang'] = u'en' metadata['id'] = None metadata['cover'] = self.get_cover() metadata['genre'] = self.opts.fb2_genre metadata['author'] = u'' for auth in self.oeb_book.metadata.creator: author_first = u'' author_middle = u'' author_last = u'' author_parts = auth.value.split(' ') if len(author_parts) == 1: author_last = author_parts[0] elif len(author_parts) == 2: author_first = author_parts[0] author_last = author_parts[1] else: author_first = author_parts[0] author_middle = ' '.join(author_parts[1:-1]) author_last = author_parts[-1] metadata['author'] += '<author>' metadata[ 'author'] += '<first-name>%s</first-name>' % prepare_string_for_xml( author_first) if author_middle: metadata[ 'author'] += '<middle-name>%s</middle-name>' % prepare_string_for_xml( author_middle) metadata[ 'author'] += '<last-name>%s</last-name>' % prepare_string_for_xml( author_last) metadata['author'] += '</author>' if not metadata['author']: metadata[ 'author'] = u'<author><first-name></first-name><last-name><last-name></author>' metadata['sequence'] = u'' if self.oeb_book.metadata.series: index = '1' if self.oeb_book.metadata.series_index: index = self.oeb_book.metadata.series_index[0] metadata['sequence'] = u'<sequence name="%s" number="%s" />' % ( prepare_string_for_xml( u'%s' % self.oeb_book.metadata.series[0]), index) identifiers = self.oeb_book.metadata['identifier'] for x in identifiers: if x.get(OPF('scheme'), None).lower() == 'uuid' or unicode( x).startswith('urn:uuid:'): metadata['id'] = unicode(x).split(':')[-1] break if metadata['id'] is None: self.log.warn('No UUID identifier found') metadata['id'] = str(uuid.uuid4()) for key, value in metadata.items(): if key not in ('author', 'cover', 'sequence'): metadata[key] = prepare_string_for_xml(value) return u'<FictionBook xmlns="http://www.gribuser.ru/xml/fictionbook/2.0" xmlns:xlink="http://www.w3.org/1999/xlink">' \ '<description>' \ '<title-info>' \ '<genre>%(genre)s</genre>' \ '%(author)s' \ '<book-title>%(title)s</book-title>' \ '%(cover)s' \ '<lang>%(lang)s</lang>' \ '%(sequence)s' \ '</title-info>' \ '<document-info>' \ '%(author)s' \ '<program-used>%(appname)s %(version)s</program-used>' \ '<date>%(date)s</date>' \ '<id>%(id)s</id>' \ '<version>1.0</version>' \ '</document-info>' \ '</description>' % metadata
def check_opf(container): errors = [] opf_version = container.opf_version_parsed if container.opf.tag != OPF('package'): err = BaseError(_('The OPF does not have the correct root element'), container.opf_name, container.opf.sourceline) err.HELP = xml(_( 'The opf must have the root element <package> in namespace {0}, like this: <package xmlns="{0}">')).format(OPF2_NS) errors.append(err) elif container.opf.get('version') is None and container.book_type == 'epub': err = BaseError(_('The OPF does not have a version'), container.opf_name, container.opf.sourceline) err.HELP = xml(_( 'The <package> tag in the OPF must have a version attribute. This is usually version="2.0" for EPUB2 and AZW3 and version="3.0" for EPUB3')) errors.append(err) for tag in ('metadata', 'manifest', 'spine'): if not container.opf_xpath('/opf:package/opf:' + tag): errors.append(MissingSection(container.opf_name, tag)) all_ids = set(container.opf_xpath('//*/@id')) if '' in all_ids: for empty_id_tag in container.opf_xpath('//*[@id=""]'): errors.append(EmptyID(container.opf_name, empty_id_tag.sourceline)) all_ids.discard('') for elem in container.opf_xpath('//*[@idref]'): if elem.get('idref') not in all_ids: errors.append(IncorrectIdref(container.opf_name, elem.get('idref'), elem.sourceline)) nl_items = [elem.sourceline for elem in container.opf_xpath('//opf:spine/opf:itemref[@linear="no"]')] if nl_items: errors.append(NonLinearItems(container.opf_name, nl_items)) seen, dups = {}, {} for item in container.opf_xpath('/opf:package/opf:manifest/opf:item'): href = item.get('href', None) if href is None: errors.append(NoHref(container.opf_name, item.get('id', None), item.sourceline)) else: hname = container.href_to_name(href, container.opf_name) if not hname or not container.exists(hname): errors.append(MissingHref(container.opf_name, href, item.sourceline)) if href in seen: if href not in dups: dups[href] = [seen[href]] dups[href].append(item.sourceline) else: seen[href] = item.sourceline errors.extend(DuplicateHref(container.opf_name, eid, locs) for eid, locs in iteritems(dups)) seen, dups = {}, {} for item in container.opf_xpath('/opf:package/opf:spine/opf:itemref[@idref]'): ref = item.get('idref') if ref in seen: if ref not in dups: dups[ref] = [seen[ref]] dups[ref].append(item.sourceline) else: seen[ref] = item.sourceline errors.extend(DuplicateHref(container.opf_name, eid, locs, for_spine=True) for eid, locs in iteritems(dups)) spine = container.opf_xpath('/opf:package/opf:spine[@toc]') if spine: spine = spine[0] mitems = [x for x in container.opf_xpath('/opf:package/opf:manifest/opf:item[@id]') if x.get('id') == spine.get('toc')] if mitems: mitem = mitems[0] if mitem.get('media-type', '') != guess_type('a.ncx'): errors.append(IncorrectToc(container.opf_name, mitem.sourceline, bad_mimetype=mitem.get('media-type'))) else: errors.append(IncorrectToc(container.opf_name, spine.sourceline, bad_idref=spine.get('toc'))) else: spine = container.opf_xpath('/opf:package/opf:spine') if spine: spine = spine[0] ncx = container.manifest_type_map.get(guess_type('a.ncx')) if ncx: ncx_name = ncx[0] rmap = {v:k for k, v in iteritems(container.manifest_id_map)} ncx_id = rmap.get(ncx_name) if ncx_id: errors.append(MissingNCXRef(container.opf_name, spine.sourceline, ncx_id)) if opf_version.major > 2: existing_nav = find_existing_nav_toc(container) if existing_nav is None: errors.append(MissingNav(container.opf_name, 0)) else: toc = parse_nav(container, existing_nav) if len(toc) == 0: errors.append(EmptyNav(existing_nav, 0)) covers = container.opf_xpath('/opf:package/opf:metadata/opf:meta[@name="cover"]') if len(covers) > 0: if len(covers) > 1: errors.append(MultipleCovers(container.opf_name, [c.sourceline for c in covers])) manifest_ids = set(container.opf_xpath('/opf:package/opf:manifest/opf:item/@id')) for cover in covers: if cover.get('content', None) not in manifest_ids: errors.append(IncorrectCover(container.opf_name, cover.sourceline, cover.get('content', ''))) raw = etree.tostring(cover) try: n, c = raw.index(b'name="'), raw.index(b'content="') except ValueError: n = c = -1 if n > -1 and c > -1 and n > c: errors.append(NookCover(container.opf_name, cover.sourceline)) uid = container.opf.get('unique-identifier', None) if uid is None or not container.opf_xpath('/opf:package/opf:metadata/dc:identifier[@id=%r]' % uid): errors.append(NoUID(container.opf_name)) for elem in container.opf_xpath('/opf:package/opf:metadata/dc:identifier'): if not elem.text or not elem.text.strip(): errors.append(EmptyIdentifier(container.opf_name, elem.sourceline)) for item, name, linear in container.spine_iter: mt = container.mime_map[name] if mt != XHTML_MIME: iid = item.get('idref', None) lnum = None if iid: mitem = container.opf_xpath('/opf:package/opf:manifest/opf:item[@id=%r]' % iid) if mitem: lnum = mitem[0].sourceline else: iid = None errors.append(BadSpineMime(name, iid, mt, lnum, container.opf_name)) return errors
def create_series(root, refines, series, series_index): m = XPath('./opf:metadata')(root)[0] d = m.makeelement(OPF('meta'), attrib={'property':'belongs-to-collection'}) d.text = series m.append(d) set_refines(d, refines, refdef('collection-type', 'series'), refdef('group-position', series_index))
def fb2_header(self): from calibre.ebooks.oeb.base import OPF metadata = {} metadata['title'] = self.oeb_book.metadata.title[0].value metadata['appname'] = __appname__ metadata['version'] = __version__ metadata['date'] = '%i.%i.%i' % ( datetime.now().day, datetime.now().month, datetime.now().year) if self.oeb_book.metadata.language: lc = lang_as_iso639_1(self.oeb_book.metadata.language[0].value) if not lc: lc = self.oeb_book.metadata.language[0].value metadata['lang'] = lc or 'en' else: metadata['lang'] = u'en' metadata['id'] = None metadata['cover'] = self.get_cover() metadata['genre'] = self.opts.fb2_genre metadata['author'] = '' for auth in self.oeb_book.metadata.creator: author_first = '' author_middle = '' author_last = '' author_parts = auth.value.split(' ') if len(author_parts) == 1: author_last = author_parts[0] elif len(author_parts) == 2: author_first = author_parts[0] author_last = author_parts[1] else: author_first = author_parts[0] author_middle = ' '.join(author_parts[1:-1]) author_last = author_parts[-1] metadata['author'] += '<author>' metadata[ 'author'] += '<first-name>%s</first-name>' % prepare_string_for_xml( author_first) if author_middle: metadata[ 'author'] += '<middle-name>%s</middle-name>' % prepare_string_for_xml( author_middle) metadata[ 'author'] += '<last-name>%s</last-name>' % prepare_string_for_xml( author_last) metadata['author'] += '</author>' if not metadata['author']: metadata[ 'author'] = '<author><first-name></first-name><last-name></last-name></author>' metadata['keywords'] = '' tags = list(map(unicode_type, self.oeb_book.metadata.subject)) if tags: tags = ', '.join(prepare_string_for_xml(x) for x in tags) metadata['keywords'] = '<keywords>%s</keywords>' % tags metadata['sequence'] = '' if self.oeb_book.metadata.series: index = '1' if self.oeb_book.metadata.series_index: index = self.oeb_book.metadata.series_index[0] metadata['sequence'] = '<sequence name="%s" number="%s"/>' % ( prepare_string_for_xml( '%s' % self.oeb_book.metadata.series[0]), index) year = publisher = isbn = '' identifiers = self.oeb_book.metadata['identifier'] for x in identifiers: if x.get(OPF('scheme'), None).lower() == 'uuid' or unicode_type( x).startswith('urn:uuid:'): metadata['id'] = unicode_type(x).split(':')[-1] break if metadata['id'] is None: self.log.warn('No UUID identifier found') metadata['id'] = unicode_type(uuid.uuid4()) try: date = self.oeb_book.metadata['date'][0] except IndexError: pass else: year = '<year>%s</year>' % prepare_string_for_xml( date.value.partition('-')[0]) try: publisher = self.oeb_book.metadata['publisher'][0] except IndexError: pass else: publisher = '<publisher>%s</publisher>' % prepare_string_for_xml( publisher.value) for x in identifiers: if x.get(OPF('scheme'), None).lower() == 'isbn': isbn = '<isbn>%s</isbn>' % prepare_string_for_xml(x.value) metadata['year'], metadata['isbn'], metadata[ 'publisher'] = year, isbn, publisher for key, value in metadata.items(): if key not in ('author', 'cover', 'sequence', 'keywords', 'year', 'publisher', 'isbn'): metadata[key] = prepare_string_for_xml(value) try: comments = self.oeb_book.metadata['description'][0] except Exception: metadata['comments'] = '' else: from calibre.utils.html2text import html2text metadata['comments'] = '<annotation><p>{}</p></annotation>'.format( prepare_string_for_xml(html2text(comments.value).strip())) # Keep the indentation level of the description the same as the body. header = textwrap.dedent('''\ <FictionBook xmlns="http://www.gribuser.ru/xml/fictionbook/2.0" xmlns:l="http://www.w3.org/1999/xlink"> <description> <title-info> <genre>%(genre)s</genre> %(author)s <book-title>%(title)s</book-title> %(cover)s <lang>%(lang)s</lang> %(keywords)s %(sequence)s %(comments)s </title-info> <document-info> %(author)s <program-used>%(appname)s %(version)s</program-used> <date>%(date)s</date> <id>%(id)s</id> <version>1.0</version> </document-info> <publish-info> %(publisher)s %(year)s %(isbn)s </publish-info> </description>''') % metadata # Remove empty lines. return '\n'.join(filter(unicode_type.strip, header.splitlines()))
def create_epub_cover(container, cover_path, existing_image, options=None): from calibre.ebooks.conversion.config import load_defaults from calibre.ebooks.oeb.transforms.cover import CoverManager ext = cover_path.rpartition('.')[-1].lower() cname, tname = 'cover.' + ext, 'titlepage.xhtml' recommended_folders = get_recommended_folders(container, (cname, tname)) if existing_image: raster_cover = existing_image manifest_id = {v: k for k, v in container.manifest_id_map.iteritems() }[existing_image] raster_cover_item = container.opf_xpath('//opf:manifest/*[@id="%s"]' % manifest_id)[0] else: folder = recommended_folders[cname] if folder: cname = folder + '/' + cname raster_cover_item = container.generate_item(cname, id_prefix='cover') raster_cover = container.href_to_name(raster_cover_item.get('href'), container.opf_name) with open(cover_path, 'rb') as src, container.open(raster_cover, 'wb') as dest: shutil.copyfileobj(src, dest) if options is None: opts = load_defaults('epub_output') keep_aspect = opts.get('preserve_cover_aspect_ratio', False) no_svg = opts.get('no_svg_cover', False) else: keep_aspect = options.get('keep_aspect', False) no_svg = options.get('no_svg', False) if no_svg: style = 'style="height: 100%%"' templ = CoverManager.NONSVG_TEMPLATE.replace('__style__', style) else: width, height = 600, 800 try: if existing_image: width, height = identify_data( container.raw_data(existing_image, decode=False))[:2] else: width, height = identify(cover_path)[:2] except: container.log.exception("Failed to get width and height of cover") ar = 'xMidYMid meet' if keep_aspect else 'none' templ = CoverManager.SVG_TEMPLATE.replace('__ar__', ar) templ = templ.replace('__viewbox__', '0 0 %d %d' % (width, height)) templ = templ.replace('__width__', str(width)) templ = templ.replace('__height__', str(height)) folder = recommended_folders[tname] if folder: tname = folder + '/' + tname titlepage_item = container.generate_item(tname, id_prefix='titlepage') titlepage = container.href_to_name(titlepage_item.get('href'), container.opf_name) raw = templ % container.name_to_href(raster_cover, titlepage).encode('utf-8') with container.open(titlepage, 'wb') as f: f.write(raw) # We have to make sure the raster cover item has id="cover" for the moron # that wrote the Nook firmware if raster_cover_item.get('id') != 'cover': from calibre.ebooks.oeb.base import uuid_id newid = uuid_id() for item in container.opf_xpath('//*[@id="cover"]'): item.set('id', newid) for item in container.opf_xpath('//*[@idref="cover"]'): item.set('idref', newid) raster_cover_item.set('id', 'cover') spine = container.opf_xpath('//opf:spine')[0] ref = spine.makeelement(OPF('itemref'), idref=titlepage_item.get('id')) container.insert_into_xml(spine, ref, index=0) guide = container.opf_get_or_create('guide') container.insert_into_xml( guide, guide.makeelement(OPF('reference'), type='cover', title=_('Cover'), href=container.name_to_href( titlepage, base=container.opf_name))) metadata = container.opf_get_or_create('metadata') meta = metadata.makeelement(OPF('meta'), name='cover') meta.set('content', raster_cover_item.get('id')) container.insert_into_xml(metadata, meta) return raster_cover, titlepage
def check_opf(container): errors = [] if container.opf.tag != OPF('package'): err = BaseError(_('The OPF does not have the correct root element'), container.opf_name) err.HELP = xml( _('The opf must have the root element <package> in namespace {0}, like this: <package xmlns="{0}">' )).format(OPF2_NS) errors.append(err) for tag in ('metadata', 'manifest', 'spine'): if not container.opf_xpath('/opf:package/opf:' + tag): errors.append(MissingSection(container.opf_name, tag)) all_ids = set(container.opf_xpath('//*/@id')) for elem in container.opf_xpath('//*[@idref]'): if elem.get('idref') not in all_ids: errors.append( IncorrectIdref(container.opf_name, elem.get('idref'), elem.sourceline)) nl_items = [ elem.sourceline for elem in container.opf_xpath( '//opf:spine/opf:itemref[@linear="no"]') ] if nl_items: errors.append(NonLinearItems(container.opf_name, nl_items)) seen, dups = {}, {} for item in container.opf_xpath( '/opf:package/opf:manifest/opf:item[@href]'): href = item.get('href') if not container.exists( container.href_to_name(href, container.opf_name)): errors.append( MissingHref(container.opf_name, href, item.sourceline)) if href in seen: if href not in dups: dups[href] = [seen[href]] dups[href].append(item.sourceline) else: seen[href] = item.sourceline errors.extend( DuplicateHref(container.opf_name, eid, locs) for eid, locs in dups.iteritems()) seen, dups = {}, {} for item in container.opf_xpath( '/opf:package/opf:spine/opf:itemref[@idref]'): ref = item.get('idref') if ref in seen: if ref not in dups: dups[ref] = [seen[ref]] dups[ref].append(item.sourceline) else: seen[ref] = item.sourceline errors.extend( DuplicateHref(container.opf_name, eid, locs, for_spine=True) for eid, locs in dups.iteritems()) spine = container.opf_xpath('/opf:package/opf:spine[@toc]') if spine: spine = spine[0] mitems = [ x for x in container.opf_xpath( '/opf:package/opf:manifest/opf:item[@id]') if x.get('id') == spine.get('toc') ] if mitems: mitem = mitems[0] if mitem.get('media-type', '') != guess_type('a.ncx'): errors.append( IncorrectToc(container.opf_name, mitem.sourceline, bad_mimetype=mitem.get('media-type'))) else: errors.append( IncorrectToc(container.opf_name, spine.sourceline, bad_idref=spine.get('toc'))) covers = container.opf_xpath( '/opf:package/opf:metadata/opf:meta[@name="cover"]') if len(covers) > 0: if len(covers) > 1: errors.append( MultipleCovers(container.opf_name, [c.sourceline for c in covers])) manifest_ids = set( container.opf_xpath('/opf:package/opf:manifest/opf:item/@id')) for cover in covers: if cover.get('content', None) not in manifest_ids: errors.append( IncorrectCover(container.opf_name, cover.sourceline, cover.get('content', ''))) raw = etree.tostring(cover) try: n, c = raw.index('name="'), raw.index('content="') except ValueError: n = c = -1 if n > -1 and c > -1 and n > c: errors.append(NookCover(container.opf_name, cover.sourceline)) uid = container.opf.get('unique-identifier', None) if uid is None or not container.opf_xpath( '/opf:package/opf:metadata/dc:identifier[@id=%r]' % uid): errors.append(NoUID(container.opf_name)) for item, name, linear in container.spine_iter: mt = container.mime_map[name] if mt != XHTML_MIME: iid = item.get('idref', None) lnum = None if iid: mitem = container.opf_xpath( '/opf:package/opf:manifest/opf:item[@id=%r]' % iid) if mitem: lnum = mitem[0].sourceline else: iid = None errors.append(BadSpineMime(name, iid, mt, lnum, container.opf_name)) return errors
def convert(self, stream, options, file_ext, log, accelerators): from calibre.utils.zipfile import ZipFile from calibre import walk from calibre.ebooks import DRMError from calibre.ebooks.metadata.opf2 import OPF try: zf = ZipFile(stream) zf.extractall(os.getcwdu()) except: log.exception("EPUB appears to be invalid ZIP file, trying a" " more forgiving ZIP parser") from calibre.utils.localunzip import extractall stream.seek(0) extractall(stream) encfile = os.path.abspath(os.path.join("META-INF", "encryption.xml")) opf = self.find_opf() if opf is None: for f in walk(u"."): if f.lower().endswith(".opf") and "__MACOSX" not in f and not os.path.basename(f).startswith("."): opf = os.path.abspath(f) break path = getattr(stream, "name", "stream") if opf is None: raise ValueError("%s is not a valid EPUB file (could not find opf)" % path) opf = os.path.relpath(opf, os.getcwdu()) parts = os.path.split(opf) opf = OPF(opf, os.path.dirname(os.path.abspath(opf))) self._encrypted_font_uris = [] if os.path.exists(encfile): if not self.process_encryption(encfile, opf, log): raise DRMError(os.path.basename(path)) self.encrypted_fonts = self._encrypted_font_uris if len(parts) > 1 and parts[0]: delta = "/".join(parts[:-1]) + "/" for elem in opf.itermanifest(): elem.set("href", delta + elem.get("href")) for elem in opf.iterguide(): elem.set("href", delta + elem.get("href")) self.removed_cover = self.rationalize_cover(opf, log) self.optimize_opf_parsing = opf for x in opf.itermanifest(): if x.get("media-type", "") == "application/x-dtbook+xml": raise ValueError("EPUB files with DTBook markup are not supported") not_for_spine = set() for y in opf.itermanifest(): id_ = y.get("id", None) if id_ and y.get("media-type", None) in ("application/vnd.adobe-page-template+xml", "application/text"): not_for_spine.add(id_) seen = set() for x in list(opf.iterspine()): ref = x.get("idref", None) if not ref or ref in not_for_spine or ref in seen: x.getparent().remove(x) continue seen.add(ref) if len(list(opf.iterspine())) == 0: raise ValueError("No valid entries in the spine of this EPUB") with open("content.opf", "wb") as nopf: nopf.write(opf.render()) return os.path.abspath(u"content.opf")
def meta_info_to_oeb_metadata(mi, m, log, override_input_metadata=False): from calibre.ebooks.oeb.base import OPF if not mi.is_null('title'): m.clear('title') m.add('title', mi.title) if mi.title_sort: if not m.title: m.add('title', mi.title_sort) m.clear('title_sort') m.add('title_sort', mi.title_sort) if not mi.is_null('authors'): m.filter('creator', lambda x : x.role.lower() in ['aut', '']) for a in mi.authors: attrib = {'role':'aut'} if mi.author_sort: attrib[OPF('file-as')] = mi.author_sort m.add('creator', a, attrib=attrib) if not mi.is_null('book_producer'): m.filter('contributor', lambda x : x.role.lower() == 'bkp') m.add('contributor', mi.book_producer, role='bkp') elif override_input_metadata: m.filter('contributor', lambda x : x.role.lower() == 'bkp') if not mi.is_null('comments'): m.clear('description') m.add('description', mi.comments) elif override_input_metadata: m.clear('description') if not mi.is_null('publisher'): m.clear('publisher') m.add('publisher', mi.publisher) elif override_input_metadata: m.clear('publisher') if not mi.is_null('series'): m.clear('series') m.add('series', mi.series) elif override_input_metadata: m.clear('series') identifiers = mi.get_identifiers() set_isbn = False for typ, val in identifiers.iteritems(): has = False if typ.lower() == 'isbn': set_isbn = True for x in m.identifier: if x.scheme.lower() == typ.lower(): x.content = val has = True if not has: m.add('identifier', val, scheme=typ.upper()) if override_input_metadata and not set_isbn: m.filter('identifier', lambda x: x.scheme.lower() == 'isbn') if not mi.is_null('languages'): m.clear('language') for lang in mi.languages: if lang and lang.lower() not in ('und', ''): m.add('language', lang) if not mi.is_null('series_index'): m.clear('series_index') m.add('series_index', mi.format_series_index()) elif override_input_metadata: m.clear('series_index') if not mi.is_null('rating'): m.clear('rating') m.add('rating', '%.2f'%mi.rating) elif override_input_metadata: m.clear('rating') if not mi.is_null('tags'): m.clear('subject') for t in mi.tags: m.add('subject', t) elif override_input_metadata: m.clear('subject') if not mi.is_null('pubdate'): m.clear('date') m.add('date', isoformat(mi.pubdate)) if not mi.is_null('timestamp'): m.clear('timestamp') m.add('timestamp', isoformat(mi.timestamp)) if not mi.is_null('rights'): m.clear('rights') m.add('rights', mi.rights) if not mi.is_null('publication_type'): m.clear('publication_type') m.add('publication_type', mi.publication_type) if not m.timestamp: m.add('timestamp', isoformat(now()))
def build_exth(metadata, prefer_author_sort=False, is_periodical=False, share_not_sync=True, cover_offset=None, thumbnail_offset=None, start_offset=None, mobi_doctype=2, num_of_resources=None, kf8_unknown_count=0, be_kindlegen2=False, kf8_header_index=None, page_progression_direction=None, primary_writing_mode=None): exth = BytesIO() nrecs = 0 for term in metadata: if term not in EXTH_CODES: continue code = EXTH_CODES[term] items = metadata[term] if term == 'creator': if prefer_author_sort: creators = [authors_to_sort_string([unicode_type(c)]) for c in items] else: creators = [unicode_type(c) for c in items] items = creators elif term == 'rights': try: rights = utf8_text(unicode_type(metadata.rights[0])) except: rights = b'Unknown' exth.write(pack(b'>II', EXTH_CODES['rights'], len(rights) + 8)) exth.write(rights) nrecs += 1 continue for item in items: data = unicode_type(item) if term != 'description': data = COLLAPSE_RE.sub(' ', data) if term == 'identifier': if data.lower().startswith('urn:isbn:'): data = data[9:] elif item.scheme.lower() == 'isbn': pass else: continue if term == 'language': d2 = lang_as_iso639_1(data) if d2: data = d2 data = utf8_text(data) exth.write(pack(b'>II', code, len(data) + 8)) exth.write(data) nrecs += 1 # Write UUID as ASIN uuid = None from calibre.ebooks.oeb.base import OPF for x in metadata['identifier']: if (x.get(OPF('scheme'), None).lower() == 'uuid' or unicode_type(x).startswith('urn:uuid:')): uuid = unicode_type(x).split(':')[-1] break if uuid is None: from uuid import uuid4 uuid = str(uuid4()) if isinstance(uuid, unicode_type): uuid = uuid.encode('utf-8') if not share_not_sync: exth.write(pack(b'>II', 113, len(uuid) + 8)) exth.write(uuid) nrecs += 1 # Write UUID as SOURCE c_uuid = b'calibre:%s' % uuid exth.write(pack(b'>II', 112, len(c_uuid) + 8)) exth.write(c_uuid) nrecs += 1 # Write cdetype if not is_periodical: if not share_not_sync: exth.write(pack(b'>II', 501, 12)) exth.write(b'EBOK') nrecs += 1 else: ids = {0x101:b'NWPR', 0x103:b'MAGZ'}.get(mobi_doctype, None) if ids: exth.write(pack(b'>II', 501, 12)) exth.write(ids) nrecs += 1 # Add a publication date entry if metadata['date']: datestr = str(metadata['date'][0]) elif metadata['timestamp']: datestr = str(metadata['timestamp'][0]) if datestr is None: raise ValueError("missing date or timestamp") datestr = bytes(datestr) exth.write(pack(b'>II', EXTH_CODES['pubdate'], len(datestr) + 8)) exth.write(datestr) nrecs += 1 if is_periodical: exth.write(pack(b'>II', EXTH_CODES['lastupdatetime'], len(datestr) + 8)) exth.write(datestr) nrecs += 1 if be_kindlegen2: mv = 200 if iswindows else 202 if isosx else 201 vals = {204:mv, 205:2, 206:9, 207:0} elif is_periodical: # Pretend to be amazon's super secret periodical generator vals = {204:201, 205:2, 206:0, 207:101} else: # Pretend to be kindlegen 1.2 vals = {204:201, 205:1, 206:2, 207:33307} for code, val in vals.iteritems(): exth.write(pack(b'>III', code, 12, val)) nrecs += 1 if be_kindlegen2: revnum = b'0730-890adc2' exth.write(pack(b'>II', 535, 8 + len(revnum)) + revnum) nrecs += 1 if cover_offset is not None: exth.write(pack(b'>III', EXTH_CODES['coveroffset'], 12, cover_offset)) exth.write(pack(b'>III', EXTH_CODES['hasfakecover'], 12, 0)) nrecs += 2 if thumbnail_offset is not None: exth.write(pack(b'>III', EXTH_CODES['thumboffset'], 12, thumbnail_offset)) thumbnail_uri_str = bytes('kindle:embed:%s' %(to_base(thumbnail_offset, base=32, min_num_digits=4))) exth.write(pack(b'>II', EXTH_CODES['kf8_thumbnail_uri'], len(thumbnail_uri_str) + 8)) exth.write(thumbnail_uri_str) nrecs += 2 if start_offset is not None: try: len(start_offset) except TypeError: start_offset = [start_offset] for so in start_offset: if so is not None: exth.write(pack(b'>III', EXTH_CODES['startreading'], 12, so)) nrecs += 1 if kf8_header_index is not None: exth.write(pack(b'>III', EXTH_CODES['kf8_header_index'], 12, kf8_header_index)) nrecs += 1 if num_of_resources is not None: exth.write(pack(b'>III', EXTH_CODES['num_of_resources'], 12, num_of_resources)) nrecs += 1 if kf8_unknown_count is not None: exth.write(pack(b'>III', EXTH_CODES['kf8_unknown_count'], 12, kf8_unknown_count)) nrecs += 1 if primary_writing_mode: pwm = primary_writing_mode.encode('utf-8') exth.write(pack(b'>II', EXTH_CODES['primary_writing_mode'], len(pwm) + 8)) exth.write(pwm) nrecs += 1 if page_progression_direction in {'rtl', 'ltr', 'default'}: ppd = bytes(page_progression_direction) exth.write(pack(b'>II', EXTH_CODES['page_progression_direction'], len(ppd) + 8)) exth.write(ppd) nrecs += 1 exth = exth.getvalue() trail = len(exth) % 4 pad = b'\0' * (4 - trail) # Always pad w/ at least 1 byte exth = [b'EXTH', pack(b'>II', len(exth) + 12, nrecs), exth, pad] return b''.join(exth)
def split(container, name, loc_or_xpath, before=True, totals=None): ''' Split the file specified by name at the position specified by loc_or_xpath. Splitting automatically migrates all links and references to the affected files. :param loc_or_xpath: Should be an XPath expression such as //h:div[@id="split_here"]. Can also be a *loc* which is used internally to implement splitting in the preview panel. :param before: If True the split occurs before the identified element otherwise after it. :param totals: Used internally ''' root = container.parsed(name) if isinstance(loc_or_xpath, type('')): split_point = root.xpath(loc_or_xpath)[0] else: try: split_point = node_from_loc(root, loc_or_xpath, totals=totals) except MalformedMarkup: # The webkit HTML parser and the container parser have yielded # different node counts, this can happen if the file is valid XML # but contains constructs like nested <p> tags. So force parse it # with the HTML 5 parser and try again. raw = container.raw_data(name) root = container.parse_xhtml(raw, fname=name, force_html5_parse=True) try: split_point = node_from_loc(root, loc_or_xpath, totals=totals) except MalformedMarkup: raise MalformedMarkup( _('The file %s has malformed markup. Try running the Fix HTML tool' ' before splitting') % name) container.replace(name, root) if in_table(split_point): raise AbortError('Cannot split inside tables') if split_point.tag.endswith('}body'): raise AbortError('Cannot split on the <body> tag') tree1, tree2 = do_split(split_point, container.log, before=before) root1, root2 = tree1.getroot(), tree2.getroot() anchors_in_top = frozenset(root1.xpath('//*/@id')) | frozenset( root1.xpath('//*/@name')) | {''} anchors_in_bottom = frozenset(root2.xpath('//*/@id')) | frozenset( root2.xpath('//*/@name')) base, ext = name.rpartition('.')[0::2] base = re.sub(r'_split\d+$', '', base) nname, s = None, 0 while not nname or container.exists(nname): s += 1 nname = '%s_split%d.%s' % (base, s, ext) manifest_item = container.generate_item( nname, media_type=container.mime_map[name]) bottom_name = container.href_to_name(manifest_item.get('href'), container.opf_name) # Fix links in the split trees for r in (root1, root2): for a in r.xpath('//*[@href]'): url = a.get('href') if url.startswith('#'): fname = name else: fname = container.href_to_name(url, name) if fname == name: purl = urlparse(url) if purl.fragment in anchors_in_top: if r is root2: a.set( 'href', '%s#%s' % (container.name_to_href( name, bottom_name), purl.fragment)) else: a.set('href', '#' + purl.fragment) elif purl.fragment in anchors_in_bottom: if r is root1: a.set( 'href', '%s#%s' % (container.name_to_href( bottom_name, name), purl.fragment)) else: a.set('href', '#' + purl.fragment) # Fix all links in the container that point to anchors in the bottom tree for fname, media_type in iteritems(container.mime_map): if fname not in {name, bottom_name}: repl = SplitLinkReplacer(fname, anchors_in_bottom, name, bottom_name, container) container.replace_links(fname, repl) container.replace(name, root1) container.replace(bottom_name, root2) spine = container.opf_xpath('//opf:spine')[0] for spine_item, spine_name, linear in container.spine_iter: if spine_name == name: break index = spine.index(spine_item) + 1 si = spine.makeelement(OPF('itemref'), idref=manifest_item.get('id')) if not linear: si.set('linear', 'no') container.insert_into_xml(spine, si, index=index) container.dirty(container.opf_name) return bottom_name
def convert(self, stream, options, file_ext, log, accelerators): from calibre.utils.zipfile import ZipFile from calibre import walk from calibre.ebooks import DRMError from calibre.ebooks.metadata.opf2 import OPF try: zf = ZipFile(stream) zf.extractall(os.getcwdu()) except: log.exception('EPUB appears to be invalid ZIP file, trying a' ' more forgiving ZIP parser') from calibre.utils.localunzip import extractall stream.seek(0) extractall(stream) encfile = os.path.abspath(os.path.join('META-INF', 'encryption.xml')) opf = self.find_opf() if opf is None: for f in walk(u'.'): if f.lower().endswith('.opf') and '__MACOSX' not in f and \ not os.path.basename(f).startswith('.'): opf = os.path.abspath(f) break path = getattr(stream, 'name', 'stream') if opf is None: raise ValueError('%s is not a valid EPUB file (could not find opf)'%path) opf = os.path.relpath(opf, os.getcwdu()) parts = os.path.split(opf) opf = OPF(opf, os.path.dirname(os.path.abspath(opf))) self._encrypted_font_uris = [] if os.path.exists(encfile): if not self.process_encryption(encfile, opf, log): raise DRMError(os.path.basename(path)) self.encrypted_fonts = self._encrypted_font_uris epub3_nav = opf.epub3_nav if epub3_nav is not None: self.convert_epub3_nav(epub3_nav, opf, log) if len(parts) > 1 and parts[0]: delta = '/'.join(parts[:-1])+'/' for elem in opf.itermanifest(): elem.set('href', delta+elem.get('href')) for elem in opf.iterguide(): elem.set('href', delta+elem.get('href')) f = self.rationalize_cover3 if opf.package_version >= 3.0 else self.rationalize_cover2 self.removed_cover = f(opf, log) for x in opf.itermanifest(): if x.get('media-type', '') == 'application/x-dtbook+xml': raise ValueError( 'EPUB files with DTBook markup are not supported') not_for_spine = set() for y in opf.itermanifest(): id_ = y.get('id', None) if id_ and y.get('media-type', None) in { 'application/vnd.adobe-page-template+xml', 'application/vnd.adobe.page-template+xml', 'application/adobe-page-template+xml', 'application/adobe.page-template+xml', 'application/text'}: not_for_spine.add(id_) seen = set() for x in list(opf.iterspine()): ref = x.get('idref', None) if not ref or ref in not_for_spine or ref in seen: x.getparent().remove(x) continue seen.add(ref) if len(list(opf.iterspine())) == 0: raise ValueError('No valid entries in the spine of this EPUB') with lopen('content.opf', 'wb') as nopf: nopf.write(opf.render()) return os.path.abspath(u'content.opf')