def unpackBook(infile, outdir): files = fileNames(infile, outdir) # process the PalmDoc database header and verify it is a mobi sect = Sectionizer(infile) print "Palm DB type: ", sect.ident if sect.ident != 'BOOKMOBI' and sect.ident != 'TEXtREAd': raise unpackException('invalid file format') if SPLIT_COMBO_MOBIS: # if this is a combination mobi7-mobi8 file split them up mobisplit = mobi_split(infile) if mobisplit.combo: outmobi7 = os.path.join( files.outdir, 'mobi7-' + files.getInputFileBasename() + '.mobi') outmobi8 = os.path.join( files.outdir, 'mobi8-' + files.getInputFileBasename() + '.mobi') file(outmobi7, 'wb').write(mobisplit.getResult7()) file(outmobi8, 'wb').write(mobisplit.getResult8()) # scan sections to see if this is a compound mobi file (K8 format) # and build a list of all mobi headers to process. mhlst = [] mh = MobiHeader(sect, 0) # if this is a mobi8-only file hasK8 here will be true hasK8 = mh.isK8() mhlst.append(mh) K8Boundary = -1 # the last section uses an appended entry of 0xfffffff as its starting point # attempting to process it will cause problems if not hasK8: # if this is a mobi8-only file we don't need to do this for i in xrange(len(sect.sections) - 1): before, after = sect.sections[i:i + 2] if (after - before) == 8: data = sect.loadSection(i) if data == K8_BOUNDARY: print "Mobi Ebook uses the new K8 file format" mh = MobiHeader(sect, i + 1) hasK8 = hasK8 or mh.isK8() mhlst.append(mh) K8Boundary = i break if hasK8: files.makeK8Struct() process_all_mobi_headers(files, sect, mhlst, K8Boundary, False) return
def unpackBook(infile, outdir): files = fileNames(infile, outdir) # process the PalmDoc database header and verify it is a mobi sect = Sectionizer(infile) print "Palm DB type: ", sect.ident if sect.ident != "BOOKMOBI" and sect.ident != "TEXtREAd": raise unpackException("invalid file format") if SPLIT_COMBO_MOBIS: # if this is a combination mobi7-mobi8 file split them up mobisplit = mobi_split(infile) if mobisplit.combo: outmobi7 = os.path.join(files.outdir, "mobi7-" + files.getInputFileBasename() + ".mobi") outmobi8 = os.path.join(files.outdir, "mobi8-" + files.getInputFileBasename() + ".mobi") file(outmobi7, "wb").write(mobisplit.getResult7()) file(outmobi8, "wb").write(mobisplit.getResult8()) # scan sections to see if this is a compound mobi file (K8 format) # and build a list of all mobi headers to process. mhlst = [] mh = MobiHeader(sect, 0) # if this is a mobi8-only file hasK8 here will be true hasK8 = mh.isK8() mhlst.append(mh) K8Boundary = -1 # the last section uses an appended entry of 0xfffffff as its starting point # attempting to process it will cause problems if not hasK8: # if this is a mobi8-only file we don't need to do this for i in xrange(len(sect.sections) - 1): before, after = sect.sections[i : i + 2] if (after - before) == 8: data = sect.loadSection(i) if data == K8_BOUNDARY: print "Mobi Ebook uses the new K8 file format" mh = MobiHeader(sect, i + 1) hasK8 = hasK8 or mh.isK8() mhlst.append(mh) K8Boundary = i break if hasK8: files.makeK8Struct() process_all_mobi_headers(files, sect, mhlst, K8Boundary, False) return
def unpackBook(infile, outdir, apnxfile=None, epubver='2', use_hd=False, dodump=False, dowriteraw=False, dosplitcombos=False): global DUMP global WRITE_RAW_DATA global SPLIT_COMBO_MOBIS if DUMP or dodump: DUMP = True if WRITE_RAW_DATA or dowriteraw: WRITE_RAW_DATA = True if SPLIT_COMBO_MOBIS or dosplitcombos: SPLIT_COMBO_MOBIS = True infile = utf8_str(infile) outdir = utf8_str(outdir) if apnxfile is not None: apnxfile = utf8_str(apnxfile) files = fileNames(infile, outdir) # process the PalmDoc database header and verify it is a mobi sect = Sectionizer(infile) if sect.ident != 'BOOKMOBI' and sect.ident != 'TEXtREAd': raise unpackException('Invalid file format') if DUMP: sect.dumppalmheader() else: print "Palm DB type: %s, %d sections." % (sect.ident,sect.num_sections) # scan sections to see if this is a compound mobi file (K8 format) # and build a list of all mobi headers to process. mhlst = [] mh = MobiHeader(sect,0) # if this is a mobi8-only file hasK8 here will be true mhlst.append(mh) K8Boundary = -1 if mh.isK8(): print "Unpacking a KF8 book..." hasK8 = True else: # This is either a Mobipocket 7 or earlier, or a combi M7/KF8 # Find out which hasK8 = False for i in xrange(len(sect.sectionoffsets)-1): before, after = sect.sectionoffsets[i:i+2] if (after - before) == 8: data = sect.loadSection(i) if data == K8_BOUNDARY: sect.setsectiondescription(i,"Mobi/KF8 Boundary Section") mh = MobiHeader(sect,i+1) hasK8 = True mhlst.append(mh) K8Boundary = i break if hasK8: print "Unpacking a Combination M{0:d}/KF8 book...".format(mh.version) if SPLIT_COMBO_MOBIS: # if this is a combination mobi7-mobi8 file split them up mobisplit = mobi_split(infile) if mobisplit.combo: outmobi7 = os.path.join(files.outdir, 'mobi7-'+files.getInputFileBasename() + '.mobi') outmobi8 = os.path.join(files.outdir, 'mobi8-'+files.getInputFileBasename() + '.azw3') open(pathof(outmobi7), 'wb').write(mobisplit.getResult7()) open(pathof(outmobi8), 'wb').write(mobisplit.getResult8()) else: print "Unpacking a Mobipocket {0:d} book...".format(mh.version) if hasK8: files.makeK8Struct() process_all_mobi_headers(files, apnxfile, sect, mhlst, K8Boundary, False, epubver, use_hd) if DUMP: sect.dumpsectionsinfo() return
def unpackBook(infile, outdir, apnxfile=None, epubver='2', use_hd=False, dodump=False, dowriteraw=False, dosplitcombos=False): global DUMP global WRITE_RAW_DATA global SPLIT_COMBO_MOBIS if DUMP or dodump: DUMP = True if WRITE_RAW_DATA or dowriteraw: WRITE_RAW_DATA = True if SPLIT_COMBO_MOBIS or dosplitcombos: SPLIT_COMBO_MOBIS = True infile = utf8_str(infile) outdir = utf8_str(outdir) if apnxfile is not None: apnxfile = utf8_str(apnxfile) files = fileNames(infile, outdir) # process the PalmDoc database header and verify it is a mobi sect = Sectionizer(infile) if sect.ident != 'BOOKMOBI' and sect.ident != 'TEXtREAd': raise unpackException('Invalid file format') if DUMP: sect.dumppalmheader() else: print "Palm DB type: %s, %d sections." % (sect.ident, sect.num_sections) # scan sections to see if this is a compound mobi file (K8 format) # and build a list of all mobi headers to process. mhlst = [] mh = MobiHeader(sect, 0) # if this is a mobi8-only file hasK8 here will be true mhlst.append(mh) K8Boundary = -1 if mh.isK8(): print "Unpacking a KF8 book..." hasK8 = True else: # This is either a Mobipocket 7 or earlier, or a combi M7/KF8 # Find out which hasK8 = False for i in xrange(len(sect.sectionoffsets) - 1): before, after = sect.sectionoffsets[i:i + 2] if (after - before) == 8: data = sect.loadSection(i) if data == K8_BOUNDARY: sect.setsectiondescription(i, "Mobi/KF8 Boundary Section") mh = MobiHeader(sect, i + 1) hasK8 = True mhlst.append(mh) K8Boundary = i break if hasK8: print "Unpacking a Combination M{0:d}/KF8 book...".format( mh.version) if SPLIT_COMBO_MOBIS: # if this is a combination mobi7-mobi8 file split them up mobisplit = mobi_split(infile) if mobisplit.combo: outmobi7 = os.path.join( files.outdir, 'mobi7-' + files.getInputFileBasename() + '.mobi') outmobi8 = os.path.join( files.outdir, 'mobi8-' + files.getInputFileBasename() + '.azw3') open(pathof(outmobi7), 'wb').write(mobisplit.getResult7()) open(pathof(outmobi8), 'wb').write(mobisplit.getResult8()) else: print "Unpacking a Mobipocket {0:d} book...".format(mh.version) if hasK8: files.makeK8Struct() process_all_mobi_headers(files, apnxfile, sect, mhlst, K8Boundary, False, epubver, use_hd) if DUMP: sect.dumpsectionsinfo() return
def convert(epub_path, kf8_path=None, asin=None, quiet=False): logger.info(f'Processing: {epub_path}') epub = EPUB(epub_path) # ASIN if not asin: if epub.identifier: logger.info(f'Detected book identifier: {epub.identifier}') # Identifier looks like a normative UUID. if epub.identifier.startswith('urn:uuid:'): asin = epub.identifier.split(':')[2] # Identifier looks like a bare UUID. elif re.match( r'[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}', epub.identifier.lower()): asin = epub.identifier # Identifier looks like a genuine ASIN elif re.match(r'^B[0-9A-Z]{9}$', epub.identifier): asin = epub.identifier else: logger.info('NOT using detected book identifier as ASIN') if asin: logger.info(f'Using UUID/ASIN: {asin}') else: # Generate fake ASIN asin = str(uuid.uuid4()) logger.info(f'Generated a fake ASIN: {asin}') # Make a temp copy of the book temp_dir = tempfile.mkdtemp() epub_tmp = os.path.join(temp_dir, f'{asin}.epub') shutil.copy(epub_path, epub_tmp) # Generate temp .mobi file mobi_tmp = os.path.join(temp_dir, f'{asin}.mobi') kindlegen_cmd = ['kindlegen', epub_tmp, '-dont_append_source'] logger.info(f'Running: {kindlegen_cmd}') if not quiet: subprocess.check_call(kindlegen_cmd) else: subprocess.check_call(kindlegen_cmd, stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL) assert os.path.isfile(mobi_tmp) # Fix metadata of temp .mobi file: # - Add ASIN # - Set type to EBOK logger.info('Fixing metadata ...') dmf = DualMobiMetaFix(mobi_tmp, asin) with open(mobi_tmp, 'wb') as f: f.write(dmf.getresult()) # KF8 Output if not kf8_path: epub_dir = os.path.dirname(epub_path) clean_title = re.sub(r'[/|\?|<|>|\\\\|:|\*|\||"|\^| ]+', '_', epub.title) kf8_path = os.path.join(epub_dir, f'{asin}_{clean_title}.azw3') # Extract KF8 from temp .mobi file logger.info('Extracting KF8 ...') mobisplit = mobi_split(mobi_tmp) with open(kf8_path, 'wb') as f: f.write(mobisplit.getResult8()) # Clean up logger.info('Cleaning up ...') shutil.rmtree(temp_dir)