def get_metadata(stream): from calibre.ebooks.metadata import MetaInformation from calibre.ptempfile import TemporaryDirectory from calibre.ebooks.mobi.reader.headers import MetadataHeader from calibre.ebooks.mobi.reader.mobi6 import MobiReader from calibre.utils.img import save_cover_data_to from calibre import CurrentDir stream.seek(0) try: raw = stream.read(3) except Exception: raw = b'' stream.seek(0) if raw == b'TPZ': from calibre.ebooks.metadata.topaz import get_metadata return get_metadata(stream) from calibre.utils.logging import Log log = Log() try: mi = MetaInformation(os.path.basename(stream.name), [_('Unknown')]) except: mi = MetaInformation(_('Unknown'), [_('Unknown')]) mh = MetadataHeader(stream, log) if mh.title and mh.title != _('Unknown'): mi.title = mh.title if mh.exth is not None: if mh.exth.mi is not None: mi = mh.exth.mi else: size = 1024**3 if hasattr(stream, 'seek') and hasattr(stream, 'tell'): pos = stream.tell() stream.seek(0, 2) size = stream.tell() stream.seek(pos) if size < 4 * 1024 * 1024: with TemporaryDirectory('_mobi_meta_reader') as tdir: with CurrentDir(tdir): mr = MobiReader(stream, log) parse_cache = {} mr.extract_content(tdir, parse_cache) if mr.embedded_mi is not None: mi = mr.embedded_mi if hasattr(mh.exth, 'cover_offset'): cover_index = mh.first_image_index + mh.exth.cover_offset data = mh.section_data(int(cover_index)) else: try: data = mh.section_data(mh.first_image_index) except Exception: data = b'' if data and what(None, data) in {'jpg', 'jpeg', 'gif', 'png', 'bmp', 'webp'}: try: mi.cover_data = ('jpg', save_cover_data_to(data)) except Exception: log.exception('Failed to read MOBI cover') return mi
def get_metadata(stream): from calibre.ebooks.metadata import MetaInformation from calibre.ptempfile import TemporaryDirectory from calibre.ebooks.mobi.reader.headers import MetadataHeader from calibre.ebooks.mobi.reader.mobi6 import MobiReader from calibre.utils.magick.draw import save_cover_data_to from calibre import CurrentDir stream.seek(0) try: raw = stream.read(3) except: raw = '' stream.seek(0) if raw == b'TPZ': from calibre.ebooks.metadata.topaz import get_metadata return get_metadata(stream) from calibre.utils.logging import Log log = Log() try: mi = MetaInformation(os.path.basename(stream.name), [_('Unknown')]) except: mi = MetaInformation(_('Unknown'), [_('Unknown')]) mh = MetadataHeader(stream, log) if mh.title and mh.title != _('Unknown'): mi.title = mh.title if mh.exth is not None: if mh.exth.mi is not None: mi = mh.exth.mi else: size = 1024**3 if hasattr(stream, 'seek') and hasattr(stream, 'tell'): pos = stream.tell() stream.seek(0, 2) size = stream.tell() stream.seek(pos) if size < 4*1024*1024: with TemporaryDirectory('_mobi_meta_reader') as tdir: with CurrentDir(tdir): mr = MobiReader(stream, log) parse_cache = {} mr.extract_content(tdir, parse_cache) if mr.embedded_mi is not None: mi = mr.embedded_mi if hasattr(mh.exth, 'cover_offset'): cover_index = mh.first_image_index + mh.exth.cover_offset data = mh.section_data(int(cover_index)) else: try: data = mh.section_data(mh.first_image_index) except: data = '' if data and what(None, data) in {'jpg', 'jpeg', 'gif', 'png', 'bmp', 'webp'}: try: mi.cover_data = ('jpg', save_cover_data_to(data, 'cover.jpg', return_data=True)) except Exception: log.exception('Failed to read MOBI cover') return mi
def explode(path, dest, question=lambda x:True): with open(path, 'rb') as stream: raw = stream.read(3) stream.seek(0) if raw == b'TPZ': raise BadFormat(_('This is not a MOBI file. It is a Topaz file.')) try: header = MetadataHeader(stream, default_log) except MobiError: raise BadFormat(_('This is not a MOBI file.')) if header.encryption_type != 0: raise DRMError(_('This file is locked with DRM. It cannot be tweaked.')) kf8_type = header.kf8_type if kf8_type is None: raise BadFormat(_('This MOBI file does not contain a KF8 format ' 'book. KF8 is the new format from Amazon. calibre can ' 'only tweak MOBI files that contain KF8 books. Older ' 'MOBI files without KF8 are not tweakable.')) if kf8_type == 'joint': if not question(_('This MOBI file contains both KF8 and ' 'older Mobi6 data. Tweaking it will remove the Mobi6 data, which ' 'means the file will not be usable on older Kindles. Are you ' 'sure?')): return None return fork_job('calibre.ebooks.mobi.tweak', 'do_explode', args=(path, dest), no_output=True)['result']
def thumbpath_from_filepath(self, filepath): from calibre.ebooks.metadata.kfx import (CONTAINER_MAGIC, read_book_key_kfx) from calibre.ebooks.mobi.reader.headers import MetadataHeader from calibre.utils.logging import default_log thumb_dir = self.amazon_system_thumbnails_dir() if not os.path.exists(thumb_dir): return with lopen(filepath, 'rb') as f: is_kfx = f.read(4) == CONTAINER_MAGIC f.seek(0) uuid = cdetype = None if is_kfx: uuid, cdetype = read_book_key_kfx(f) else: mh = MetadataHeader(f, default_log) if mh.exth is not None: uuid = mh.exth.uuid cdetype = mh.exth.cdetype if not uuid or not cdetype: return return os.path.join( thumb_dir, 'thumbnail_{uuid}_{cdetype}_portrait.jpg'.format(uuid=uuid, cdetype=cdetype))
def write_apnx(self, mobi_file_path, apnx_path, accurate=True, page_count=0): ''' If you want a fixed number of pages (such as from a custom column) then pass in a value to page_count, otherwise a count will be estimated using either the fast or accurate algorithm. ''' import uuid apnx_meta = { 'guid': str(uuid.uuid4()).replace('-', '')[:8], 'asin': '', 'cdetype': 'EBOK', 'format': 'MOBI_7', 'acr': '' } with open(mobi_file_path, 'rb') as mf: ident = PdbHeaderReader(mf).identity() if ident != 'BOOKMOBI': # Check that this is really a MOBI file. raise Exception(_('Not a valid MOBI file. Reports identity of %s') % ident) apnx_meta['acr'] = str(PdbHeaderReader(mf).name()) # We'll need the PDB name, the MOBI version, and some metadata to make FW 3.4 happy with KF8 files... with open(mobi_file_path, 'rb') as mf: mh = MetadataHeader(mf, default_log) if mh.mobi_version == 8: apnx_meta['format'] = 'MOBI_8' else: apnx_meta['format'] = 'MOBI_7' if mh.exth is None or not mh.exth.cdetype: apnx_meta['cdetype'] = 'EBOK' else: apnx_meta['cdetype'] = str(mh.exth.cdetype) if mh.exth is None or not mh.exth.uuid: apnx_meta['asin'] = '' else: apnx_meta['asin'] = str(mh.exth.uuid) # Get the pages depending on the chosen parser pages = [] if page_count: pages = self.get_pages_exact(mobi_file_path, page_count) else: if accurate: try: pages = self.get_pages_accurate(mobi_file_path) except: # Fall back to the fast parser if we can't # use the accurate one. Typically this is # due to the file having DRM. pages = self.get_pages_fast(mobi_file_path) else: pages = self.get_pages_fast(mobi_file_path) if not pages: raise Exception(_('Could not generate page mapping.')) # Generate the APNX file from the page mapping. apnx = self.generate_apnx(pages, apnx_meta) # Write the APNX. with open(apnx_path, 'wb') as apnxf: apnxf.write(apnx) fsync(apnxf)
def __init__(self, pathtoazw3, log, clone_data=None, tdir=None): if clone_data is not None: super(AZW3Container, self).__init__(None, None, log, clone_data=clone_data) for x in ('pathtoazw3', 'obfuscated_fonts'): setattr(self, x, clone_data[x]) return self.pathtoazw3 = pathtoazw3 if tdir is None: tdir = PersistentTemporaryDirectory('_azw3_container') tdir = os.path.abspath(os.path.realpath(tdir)) self.root = tdir with open(pathtoazw3, 'rb') as stream: raw = stream.read(3) if raw == b'TPZ': raise InvalidMobi( _('This is not a MOBI file. It is a Topaz file.')) try: header = MetadataHeader(stream, default_log) except MobiError: raise InvalidMobi(_('This is not a MOBI file.')) if header.encryption_type != 0: raise DRMError() kf8_type = header.kf8_type if kf8_type is None: raise InvalidMobi( _('This MOBI file does not contain a KF8 format ' 'book. KF8 is the new format from Amazon. calibre can ' 'only edit MOBI files that contain KF8 books. Older ' 'MOBI files without KF8 are not editable.')) if kf8_type == 'joint': raise InvalidMobi( _('This MOBI file contains both KF8 and ' 'older Mobi6 data. calibre can only edit MOBI files ' 'that contain only KF8 data.')) try: opf_path, obfuscated_fonts = fork_job( 'calibre.ebooks.oeb.polish.container', 'do_explode', args=(pathtoazw3, tdir), no_output=True)['result'] except WorkerError as e: log(e.orig_tb) raise InvalidMobi('Failed to explode MOBI') super(AZW3Container, self).__init__(tdir, opf_path, log) self.obfuscated_fonts = { x.replace(os.sep, '/') for x in obfuscated_fonts }
def thumbpath_from_filepath(self, filepath): from calibre.ebooks.mobi.reader.headers import MetadataHeader from calibre.utils.logging import default_log thumb_dir = os.path.join(self._main_prefix, 'system', 'thumbnails') if not os.path.exists(thumb_dir): return with lopen(filepath, 'rb') as f: mh = MetadataHeader(f, default_log) if mh.exth is None or not mh.exth.uuid or not mh.exth.cdetype: return return os.path.join( thumb_dir, 'thumbnail_{uuid}_{cdetype}_portrait.jpg'.format( uuid=mh.exth.uuid, cdetype=mh.exth.cdetype))
def upload_kindle_thumbnail(self, metadata, filepath): coverdata = getattr(metadata, 'thumbnail', None) if not coverdata or not coverdata[2]: return thumb_dir = os.path.join(self._main_prefix, 'system', 'thumbnails') if not os.path.exists(thumb_dir): return from calibre.ebooks.mobi.reader.headers import MetadataHeader with lopen(filepath, 'rb') as f: mh = MetadataHeader(f, default_log) if mh.exth is None or not mh.exth.uuid or not mh.exth.cdetype: return thumbfile = os.path.join( thumb_dir, 'thumbnail_{uuid}_{cdetype}_portrait.jpg'.format( uuid=mh.exth.uuid, cdetype=mh.exth.cdetype)) with open(thumbfile, 'wb') as f: f.write(coverdata[2])
def get_metadata(stream): from calibre.ebooks.metadata import MetaInformation from calibre.ptempfile import TemporaryDirectory from calibre.ebooks.mobi.reader.headers import MetadataHeader from calibre.ebooks.mobi.reader.mobi6 import MobiReader from calibre import CurrentDir try: from PIL import Image as PILImage PILImage except ImportError: import Image as PILImage stream.seek(0) try: raw = stream.read(3) except: raw = '' stream.seek(0) if raw == b'TPZ': from calibre.ebooks.metadata.topaz import get_metadata return get_metadata(stream) from calibre.utils.logging import Log log = Log() try: mi = MetaInformation(os.path.basename(stream.name), [_('Unknown')]) except: mi = MetaInformation(_('Unknown'), [_('Unknown')]) mh = MetadataHeader(stream, log) if mh.title and mh.title != _('Unknown'): mi.title = mh.title if mh.exth is not None: if mh.exth.mi is not None: mi = mh.exth.mi else: size = 1024**3 if hasattr(stream, 'seek') and hasattr(stream, 'tell'): pos = stream.tell() stream.seek(0, 2) size = stream.tell() stream.seek(pos) if size < 4*1024*1024: with TemporaryDirectory('_mobi_meta_reader') as tdir: with CurrentDir(tdir): mr = MobiReader(stream, log) parse_cache = {} mr.extract_content(tdir, parse_cache) if mr.embedded_mi is not None: mi = mr.embedded_mi if hasattr(mh.exth, 'cover_offset'): cover_index = mh.first_image_index + mh.exth.cover_offset data = mh.section_data(int(cover_index)) else: try: data = mh.section_data(mh.first_image_index) except: data = '' buf = cStringIO.StringIO(data) try: im = PILImage.open(buf) except: log.exception('Failed to read MOBI cover') else: obuf = cStringIO.StringIO() im.convert('RGB').save(obuf, format='JPEG') mi.cover_data = ('jpg', obuf.getvalue()) return mi
def get_metadata(stream): from calibre.ebooks.metadata import MetaInformation from calibre.ptempfile import TemporaryDirectory from calibre.ebooks.mobi.reader.headers import MetadataHeader from calibre.ebooks.mobi.reader.mobi6 import MobiReader from calibre import CurrentDir try: from PIL import Image as PILImage PILImage except ImportError: import Image as PILImage stream.seek(0) try: raw = stream.read(3) except: raw = '' stream.seek(0) if raw == b'TPZ': from calibre.ebooks.metadata.topaz import get_metadata return get_metadata(stream) from calibre.utils.logging import Log log = Log() try: mi = MetaInformation(os.path.basename(stream.name), [_('Unknown')]) except: mi = MetaInformation(_('Unknown'), [_('Unknown')]) mh = MetadataHeader(stream, log) if mh.title and mh.title != _('Unknown'): mi.title = mh.title if mh.exth is not None: if mh.exth.mi is not None: mi = mh.exth.mi else: size = 1024**3 if hasattr(stream, 'seek') and hasattr(stream, 'tell'): pos = stream.tell() stream.seek(0, 2) size = stream.tell() stream.seek(pos) if size < 4 * 1024 * 1024: with TemporaryDirectory('_mobi_meta_reader') as tdir: with CurrentDir(tdir): mr = MobiReader(stream, log) parse_cache = {} mr.extract_content(tdir, parse_cache) if mr.embedded_mi is not None: mi = mr.embedded_mi if hasattr(mh.exth, 'cover_offset'): cover_index = mh.first_image_index + mh.exth.cover_offset data = mh.section_data(int(cover_index)) else: try: data = mh.section_data(mh.first_image_index) except: data = '' buf = cStringIO.StringIO(data) try: im = PILImage.open(buf) except: log.exception('Failed to read MOBI cover') else: obuf = cStringIO.StringIO() im.convert('RGB').save(obuf, format='JPEG') mi.cover_data = ('jpg', obuf.getvalue()) return mi