def do_explode(path, dest): from calibre.ebooks.mobi.reader.mobi6 import MobiReader from calibre.ebooks.mobi.reader.mobi8 import Mobi8Reader with open(path, 'rb') as stream: mr = MobiReader(stream, default_log, None, None) with CurrentDir(dest): mr = Mobi8Reader(mr, default_log, for_tweak=True) opf = os.path.abspath(mr()) obfuscated_fonts = mr.encrypted_fonts return opf, obfuscated_fonts
def do_explode(path, dest): with open(path, 'rb') as stream: mr = MobiReader(stream, default_log, None, None) with CurrentDir(dest): mr = Mobi8Reader(mr, default_log) opf = os.path.abspath(mr()) try: os.remove('debug-raw.html') except: pass return opf
def convert(self, stream, options, file_ext, log, accelerators): self.is_kf8 = False self.mobi_is_joint = False from calibre.ebooks.mobi.reader.mobi6 import MobiReader from lxml import html parse_cache = {} try: mr = MobiReader(stream, log, options.input_encoding, options.debug_pipeline) if mr.kf8_type is None: mr.extract_content('.', parse_cache) except: mr = MobiReader(stream, log, options.input_encoding, options.debug_pipeline, try_extra_data_fix=True) if mr.kf8_type is None: mr.extract_content('.', parse_cache) if mr.kf8_type is not None: log('Found KF8 MOBI of type %r' % mr.kf8_type) if mr.kf8_type == 'joint': self.mobi_is_joint = True from calibre.ebooks.mobi.reader.mobi8 import Mobi8Reader mr = Mobi8Reader(mr, log) opf = os.path.abspath(mr()) self.encrypted_fonts = mr.encrypted_fonts self.is_kf8 = True return opf raw = parse_cache.pop('calibre_raw_mobi_markup', False) if raw: if isinstance(raw, unicode_type): raw = raw.encode('utf-8') with lopen('debug-raw.html', 'wb') as f: f.write(raw) from calibre.ebooks.oeb.base import close_self_closing_tags for f, root in parse_cache.items(): raw = html.tostring(root, encoding='utf-8', method='xml', include_meta_content_type=False) raw = close_self_closing_tags(raw) with lopen(f, 'wb') as q: q.write(raw) accelerators['pagebreaks'] = '//h:div[@class="mbp_pagebreak"]' return mr.created_opf_path
def do_explode(path, dest): from calibre.ebooks.mobi.reader.mobi6 import MobiReader from calibre.ebooks.mobi.reader.mobi8 import Mobi8Reader with open(path, 'rb') as stream: mr = MobiReader(stream, default_log, None, None) with CurrentDir(dest): mr = Mobi8Reader(mr, default_log) opf = os.path.abspath(mr()) obfuscated_fonts = mr.encrypted_fonts try: os.remove('debug-raw.html') except: pass return opf, obfuscated_fonts
def parse_mobi(pathtoebook, book_fmt): mobiReader = MobiReader(pathtoebook, default_log) html = b'' offset = 1 # use code from calibre.ebooks.mobi.reader.mobi8:Mobi8Reader.__call__ if book_fmt == 'AZW3' and mobiReader.kf8_type == 'joint': offset = mobiReader.kf8_boundary + 2 mobiReader.extract_text(offset=offset) html = mobiReader.mobi_html if book_fmt == 'AZW3': m8r = Mobi8Reader(mobiReader, default_log) m8r.kf8_sections = mobiReader.sections[offset-1:] m8r.read_indices() m8r.build_parts() html = b''.join(m8r.parts) # match text between HTML tags for match_text in re.finditer(b'>[^<>]+<', html): yield (match_text.start() + 1, match_text.group(0)[1:-1])
def do_explode(path, dest): from calibre.ebooks.mobi.reader.mobi6 import MobiReader from calibre.ebooks.mobi.reader.mobi8 import Mobi8Reader with open(path, 'rb') as stream: mr = MobiReader(stream, default_log, None, None) with CurrentDir(dest): mr = Mobi8Reader(mr, default_log) opf = os.path.abspath(mr()) obfuscated_fonts = mr.encrypted_fonts # If there are no images then the azw3 input plugin dumps all # binary records as .unknown images, remove them if os.path.exists('images') and os.path.isdir('images'): files = os.listdir('images') unknown = [x for x in files if x.endswith('.unknown')] if len(files) == len(unknown): [os.remove('images/'+f) for f in files] try: os.remove('debug-raw.html') except: pass return opf, obfuscated_fonts
def parse_mobi(book_path): # use code from calibre.ebooks.mobi.reader.mobi8:Mobi8Reader.__call__ # and calibre.ebook.conversion.plugins.mobi_input:MOBIInput.convert # https://github.com/kevinhendricks/KindleUnpack/blob/master/lib/mobi_k8proc.py#L216 try: mr = MobiReader(book_path, default_log) except Exception: mr = MobiReader(book_path, default_log, try_extra_data_fix=True) if mr.kf8_type == 'joint': raise Exception('JointMOBI') mr.check_for_drm() mr.extract_text() html = mr.mobi_html if mr.kf8_type == 'standalone': m8r = Mobi8Reader(mr, default_log) m8r.kf8_sections = mr.sections m8r.read_indices() m8r.build_parts() html = b''.join(m8r.parts) # match text between HTML tags for match_text in re.finditer(b'>[^<>]+<', html): yield (match_text.group(0)[1:-1].decode('utf-8'), match_text.start() + 1)