def update_metadata(ebook, new_opf): from calibre.ebooks.metadata.opf2 import OPF from calibre.ebooks.metadata.epub import update_metadata opfpath = ebook.name_to_abspath(ebook.opf_name) with ebook.open(ebook.opf_name, "r+b") as stream, open(new_opf, "rb") as ns: opf = OPF(stream, basedir=os.path.dirname(opfpath), populate_spine=False, unquote_urls=False) mi = OPF(ns, unquote_urls=False, populate_spine=False).to_book_metadata() mi.cover, mi.cover_data = None, (None, None) update_metadata(opf, mi, apply_null=True, update_timestamp=True) stream.seek(0) stream.truncate() stream.write(opf.render())
def update_metadata(ebook, new_opf): from calibre.ebooks.metadata.opf2 import OPF from calibre.ebooks.metadata.epub import update_metadata opfpath = ebook.name_to_abspath(ebook.opf_name) with ebook.open(ebook.opf_name, 'r+b') as stream, open(new_opf, 'rb') as ns: opf = OPF(stream, basedir=os.path.dirname(opfpath), populate_spine=False, unquote_urls=False) mi = OPF(ns, unquote_urls=False, populate_spine=False).to_book_metadata() mi.cover, mi.cover_data = None, (None, None) update_metadata(opf, mi, apply_null=True, update_timestamp=True) stream.seek(0) stream.truncate() stream.write(opf.render())
def set_metadata_opf2(root, cover_prefix, mi, opf_version, cover_data=None, apply_null=False, update_timestamp=False, force_identifiers=False, add_missing_cover=True): mi = MetaInformation(mi) for x in ('guide', 'toc', 'manifest', 'spine'): setattr(mi, x, None) opf = OPF(None, preparsed_opf=root, read_toc=False) if mi.languages: mi.languages = normalize_languages(list(opf.raw_languages) or [], mi.languages) opf.smart_update(mi, apply_null=apply_null) if getattr(mi, 'uuid', None): opf.application_id = mi.uuid if apply_null or force_identifiers: opf.set_identifiers(mi.get_identifiers()) else: orig = opf.get_identifiers() orig.update(mi.get_identifiers()) opf.set_identifiers({k:v for k, v in orig.iteritems() if k and v}) if update_timestamp and mi.timestamp is not None: opf.timestamp = mi.timestamp raster_cover = opf.raster_cover if raster_cover is None and cover_data is not None and add_missing_cover: guide_raster_cover = opf.guide_raster_cover i = None if guide_raster_cover is not None: i = guide_raster_cover raster_cover = i.get('href') else: if cover_prefix and not cover_prefix.endswith('/'): cover_prefix += '/' name = cover_prefix + 'cover.jpg' i = create_manifest_item(opf.root, name, 'cover') if i is not None: raster_cover = name if i is not None: if opf_version.major < 3: [x.getparent().remove(x) for x in opf.root.xpath('//*[local-name()="meta" and @name="cover"]')] m = opf.create_metadata_element('meta', is_dc=False) m.set('name', 'cover'), m.set('content', i.get('id')) else: for x in opf.root.xpath('//*[local-name()="item" and contains(@properties, "cover-image")]'): x.set('properties', x.get('properties').replace('cover-image', '').strip()) i.set('properties', 'cover-image') with pretty_print: return opf.render(), raster_cover
def set_metadata_opf2(root, cover_prefix, mi, opf_version, cover_data=None, apply_null=False, update_timestamp=False, force_identifiers=False, add_missing_cover=True): mi = MetaInformation(mi) for x in ('guide', 'toc', 'manifest', 'spine'): setattr(mi, x, None) opf = OPF(None, preparsed_opf=root, read_toc=False) if mi.languages: mi.languages = normalize_languages(list(opf.raw_languages) or [], mi.languages) opf.smart_update(mi, apply_null=apply_null) if getattr(mi, 'uuid', None): opf.application_id = mi.uuid if apply_null or force_identifiers: opf.set_identifiers(mi.get_identifiers()) else: orig = opf.get_identifiers() orig.update(mi.get_identifiers()) opf.set_identifiers({k:v for k, v in orig.items() if k and v}) if update_timestamp and mi.timestamp is not None: opf.timestamp = mi.timestamp raster_cover = opf.raster_cover if raster_cover is None and cover_data is not None and add_missing_cover: guide_raster_cover = opf.guide_raster_cover i = None if guide_raster_cover is not None: i = guide_raster_cover raster_cover = i.get('href') else: if cover_prefix and not cover_prefix.endswith('/'): cover_prefix += '/' name = cover_prefix + 'cover.jpg' i = create_manifest_item(opf.root, name, 'cover') if i is not None: raster_cover = name if i is not None: if opf_version.major < 3: [x.getparent().remove(x) for x in opf.root.xpath('//*[local-name()="meta" and @name="cover"]')] m = opf.create_metadata_element('meta', is_dc=False) m.set('name', 'cover'), m.set('content', i.get('id')) else: for x in opf.root.xpath('//*[local-name()="item" and contains(@properties, "cover-image")]'): x.set('properties', x.get('properties').replace('cover-image', '').strip()) i.set('properties', 'cover-image') with pretty_print: return opf.render(), raster_cover
def set_metadata(stream, mi): replacements = {} # Get the OPF in the archive. with ZipFile(stream) as zf: opf_path = get_first_opf_name(zf) opf_stream = io.BytesIO(zf.read(opf_path)) opf = OPF(opf_stream) # Cover. new_cdata = None try: new_cdata = mi.cover_data[1] if not new_cdata: raise Exception('no cover') except: try: with open(mi.cover, 'rb') as f: new_cdata = f.read() except: pass if new_cdata: cpath = opf.raster_cover if not cpath: cpath = 'cover.jpg' new_cover = _write_new_cover(new_cdata, cpath) replacements[cpath] = open(new_cover.name, 'rb') mi.cover = cpath # Update the metadata. opf.smart_update(mi, replace_metadata=True) newopf = io.BytesIO(opf.render()) safe_replace(stream, opf_path, newopf, extra_replacements=replacements, add_missing=True) # Cleanup temporary files. try: if cpath is not None: replacements[cpath].close() os.remove(replacements[cpath].name) except: pass
def set_metadata(stream, mi): replacements = {} # Get the OPF in the archive. with ZipFile(stream) as zf: opf_path = get_first_opf_name(zf) opf_stream = StringIO(zf.read(opf_path)) opf = OPF(opf_stream) # Cover. new_cdata = None try: new_cdata = mi.cover_data[1] if not new_cdata: raise Exception('no cover') except: try: new_cdata = open(mi.cover, 'rb').read() except: pass if new_cdata: cpath = opf.raster_cover if not cpath: cpath = 'cover.jpg' new_cover = _write_new_cover(new_cdata, cpath) replacements[cpath] = open(new_cover.name, 'rb') mi.cover = cpath # Update the metadata. opf.smart_update(mi, replace_metadata=True) newopf = StringIO(opf.render()) safe_replace(stream, opf_path, newopf, extra_replacements=replacements, add_missing=True) # Cleanup temporary files. try: if cpath is not None: replacements[cpath].close() os.remove(replacements[cpath].name) except: pass
def convert(self, stream, options, file_ext, log, accelerators): """Convert a KePub file into a structure calibre can process.""" log("KEPUBInput::convert - start") from calibre.utils.zipfile import ZipFile from calibre import walk from calibre.ebooks import DRMError from calibre.ebooks.metadata.opf2 import OPF try: zf = ZipFile(stream) cwd = os.getcwdu() if sys.version_info.major == 2 else os.getcwd() zf.extractall(cwd) except Exception: log.exception("KEPUB appears to be invalid ZIP file, trying a " "more forgiving ZIP parser") from calibre.utils.localunzip import extractall stream.seek(0) extractall(stream) opf = self.find_opf() if opf is None: for f in walk("."): if (f.lower().endswith(".opf") and "__MACOSX" not in f and not os.path.basename(f).startswith(".")): opf = os.path.abspath(f) break path = getattr(stream, "name", "stream") if opf is None: raise ValueError( _( # noqa: F821 "{0} is not a valid KEPUB file (could not find opf)"). format(path)) encfile = os.path.abspath("rights.xml") if os.path.exists(encfile): raise DRMError(os.path.basename(path)) cwd = os.getcwdu() if sys.version_info.major == 2 else os.getcwd() opf = os.path.relpath(opf, cwd) parts = os.path.split(opf) opf = OPF(opf, os.path.dirname(os.path.abspath(opf))) self.encrypted_fonts = [] if len(parts) > 1 and parts[0]: delta = "/".join(parts[:-1]) + "/" for elem in opf.itermanifest(): elem.set("href", delta + elem.get("href")) for elem in opf.iterguide(): elem.set("href", delta + elem.get("href")) f = (self.rationalize_cover3 if opf.package_version >= 3.0 else self.rationalize_cover2) self.removed_cover = f(opf, log) self.optimize_opf_parsing = opf for x in opf.itermanifest(): if x.get("media-type", "") == "application/x-dtbook+xml": raise ValueError( _("EPUB files with DTBook markup are not supported" ) # noqa: F821 ) not_for_spine = set() for y in opf.itermanifest(): id_ = y.get("id", None) if id_ and y.get("media-type", None) in { "application/vnd.adobe-page-template+xml", "application/vnd.adobe.page-template+xml", "application/adobe-page-template+xml", "application/adobe.page-template+xml", "application/text", }: not_for_spine.add(id_) seen = set() for x in list(opf.iterspine()): ref = x.get("idref", None) if not ref or ref in not_for_spine or ref in seen: x.getparent().remove(x) continue seen.add(ref) if len(list(opf.iterspine())) == 0: raise ValueError( _("No valid entries in the spine of this EPUB") # noqa: F821 ) with open("content.opf", "wb") as nopf: nopf.write(opf.render()) return os.path.abspath("content.opf")
def convert(self, stream, options, file_ext, log, accelerators): log("KEPUBInput::convert - start") from calibre.utils.zipfile import ZipFile from calibre import walk from calibre.ebooks import DRMError from calibre.ebooks.metadata.opf2 import OPF try: zf = ZipFile(stream) zf.extractall(os.getcwdu()) except: log.exception('KEPUB appears to be invalid ZIP file, trying a ' 'more forgiving ZIP parser') from calibre.utils.localunzip import extractall stream.seek(0) extractall(stream) opf = self.find_opf() if opf is None: for f in walk(u'.'): if f.lower().endswith('.opf') and '__MACOSX' not in f and \ not os.path.basename(f).startswith('.'): opf = os.path.abspath(f) break path = getattr(stream, 'name', 'stream') if opf is None: raise ValueError( _('%s is not a valid KEPUB file (could not find opf)') % path) encfile = os.path.abspath('rights.xml') if os.path.exists(encfile): raise DRMError(os.path.basename(path)) opf = os.path.relpath(opf, os.getcwdu()) parts = os.path.split(opf) opf = OPF(opf, os.path.dirname(os.path.abspath(opf))) self.encrypted_fonts = [] if len(parts) > 1 and parts[0]: delta = '/'.join(parts[:-1]) + '/' for elem in opf.itermanifest(): elem.set('href', delta + elem.get('href')) for elem in opf.iterguide(): elem.set('href', delta + elem.get('href')) f = self.rationalize_cover3 if opf.package_version >= 3.0 else \ self.rationalize_cover2 self.removed_cover = f(opf, log) self.optimize_opf_parsing = opf for x in opf.itermanifest(): if x.get('media-type', '') == 'application/x-dtbook+xml': raise ValueError( _('EPUB files with DTBook markup are not supported')) not_for_spine = set() for y in opf.itermanifest(): id_ = y.get('id', None) if id_ and y.get('media-type', None) in { 'application/vnd.adobe-page-template+xml', 'application/vnd.adobe.page-template+xml', 'application/adobe-page-template+xml', 'application/adobe.page-template+xml', 'application/text' }: not_for_spine.add(id_) seen = set() for x in list(opf.iterspine()): ref = x.get('idref', None) if not ref or ref in not_for_spine or ref in seen: x.getparent().remove(x) continue seen.add(ref) if len(list(opf.iterspine())) == 0: raise ValueError(_('No valid entries in the spine of this EPUB')) with open('content.opf', 'wb') as nopf: nopf.write(opf.render()) return os.path.abspath(u'content.opf')
def convert(self, stream, options, file_ext, log, accelerators): log("KEPUBInput::convert - start") from calibre.utils.zipfile import ZipFile from calibre import walk from calibre.ebooks import DRMError from calibre.ebooks.metadata.opf2 import OPF try: zf = ZipFile(stream) zf.extractall(os.getcwdu()) except: log.exception('KEPUB appears to be invalid ZIP file, trying a ' 'more forgiving ZIP parser') from calibre.utils.localunzip import extractall stream.seek(0) extractall(stream) opf = self.find_opf() if opf is None: for f in walk(u'.'): if f.lower().endswith('.opf') and '__MACOSX' not in f and \ not os.path.basename(f).startswith('.'): opf = os.path.abspath(f) break path = getattr(stream, 'name', 'stream') if opf is None: raise ValueError( _('%s is not a valid KEPUB file (could not find opf)') % path) encfile = os.path.abspath('rights.xml') if os.path.exists(encfile): raise DRMError(os.path.basename(path)) opf = os.path.relpath(opf, os.getcwdu()) parts = os.path.split(opf) opf = OPF(opf, os.path.dirname(os.path.abspath(opf))) self.encrypted_fonts = [] if len(parts) > 1 and parts[0]: delta = '/'.join(parts[:-1]) + '/' for elem in opf.itermanifest(): elem.set('href', delta + elem.get('href')) for elem in opf.iterguide(): elem.set('href', delta + elem.get('href')) f = self.rationalize_cover3 if opf.package_version >= 3.0 else \ self.rationalize_cover2 self.removed_cover = f(opf, log) self.optimize_opf_parsing = opf for x in opf.itermanifest(): if x.get('media-type', '') == 'application/x-dtbook+xml': raise ValueError(_( 'EPUB files with DTBook markup are not supported')) not_for_spine = set() for y in opf.itermanifest(): id_ = y.get('id', None) if id_ and y.get('media-type', None) in { 'application/vnd.adobe-page-template+xml', 'application/vnd.adobe.page-template+xml', 'application/adobe-page-template+xml', 'application/adobe.page-template+xml', 'application/text' }: not_for_spine.add(id_) seen = set() for x in list(opf.iterspine()): ref = x.get('idref', None) if not ref or ref in not_for_spine or ref in seen: x.getparent().remove(x) continue seen.add(ref) if len(list(opf.iterspine())) == 0: raise ValueError(_('No valid entries in the spine of this EPUB')) with open('content.opf', 'wb') as nopf: nopf.write(opf.render()) return os.path.abspath(u'content.opf')
def convert(self, stream, options, file_ext, log, accelerators): """Convert a KePub file into a structure calibre can process.""" log("KEPUBInput::convert - start") from calibre.utils.zipfile import ZipFile from calibre import walk from calibre.ebooks import DRMError from calibre.ebooks.metadata.opf2 import OPF try: zf = ZipFile(stream) zf.extractall(os.getcwdu()) except Exception: log.exception( "KEPUB appears to be invalid ZIP file, trying a " "more forgiving ZIP parser" ) from calibre.utils.localunzip import extractall stream.seek(0) extractall(stream) opf = self.find_opf() if opf is None: for f in walk(u"."): if ( f.lower().endswith(".opf") and "__MACOSX" not in f and not os.path.basename(f).startswith(".") ): opf = os.path.abspath(f) break path = getattr(stream, "name", "stream") if opf is None: raise ValueError( _( # noqa: F821 "{0} is not a valid KEPUB file (could not find opf)" ).format(path) ) encfile = os.path.abspath("rights.xml") if os.path.exists(encfile): raise DRMError(os.path.basename(path)) opf = os.path.relpath(opf, os.getcwdu()) parts = os.path.split(opf) opf = OPF(opf, os.path.dirname(os.path.abspath(opf))) self.encrypted_fonts = [] if len(parts) > 1 and parts[0]: delta = "/".join(parts[:-1]) + "/" for elem in opf.itermanifest(): elem.set("href", delta + elem.get("href")) for elem in opf.iterguide(): elem.set("href", delta + elem.get("href")) f = ( self.rationalize_cover3 if opf.package_version >= 3.0 else self.rationalize_cover2 ) self.removed_cover = f(opf, log) self.optimize_opf_parsing = opf for x in opf.itermanifest(): if x.get("media-type", "") == "application/x-dtbook+xml": raise ValueError( _("EPUB files with DTBook markup are not supported") # noqa: F821 ) not_for_spine = set() for y in opf.itermanifest(): id_ = y.get("id", None) if id_ and y.get("media-type", None) in { "application/vnd.adobe-page-template+xml", "application/vnd.adobe.page-template+xml", "application/adobe-page-template+xml", "application/adobe.page-template+xml", "application/text", }: not_for_spine.add(id_) seen = set() for x in list(opf.iterspine()): ref = x.get("idref", None) if not ref or ref in not_for_spine or ref in seen: x.getparent().remove(x) continue seen.add(ref) if len(list(opf.iterspine())) == 0: raise ValueError( _("No valid entries in the spine of this EPUB") # noqa: F821 ) with open("content.opf", "wb") as nopf: nopf.write(opf.render()) return os.path.abspath(u"content.opf")