예제 #1
0
def unpackBook(infile, outdir):
    files = fileNames(infile, outdir)

    # process the PalmDoc database header and verify it is a mobi
    sect = Sectionizer(infile)
    print "Palm DB type: ", sect.ident
    if sect.ident != 'BOOKMOBI' and sect.ident != 'TEXtREAd':
        raise unpackException('invalid file format')

    if SPLIT_COMBO_MOBIS:
        # if this is a combination mobi7-mobi8 file split them up
        mobisplit = mobi_split(infile)
        if mobisplit.combo:
            outmobi7 = os.path.join(
                files.outdir,
                'mobi7-' + files.getInputFileBasename() + '.mobi')
            outmobi8 = os.path.join(
                files.outdir,
                'mobi8-' + files.getInputFileBasename() + '.mobi')
            file(outmobi7, 'wb').write(mobisplit.getResult7())
            file(outmobi8, 'wb').write(mobisplit.getResult8())

    # scan sections to see if this is a compound mobi file (K8 format)
    # and build a list of all mobi headers to process.
    mhlst = []
    mh = MobiHeader(sect, 0)
    # if this is a mobi8-only file hasK8 here will be true
    hasK8 = mh.isK8()
    mhlst.append(mh)
    K8Boundary = -1

    # the last section uses an appended entry of 0xfffffff as its starting point
    # attempting to process it will cause problems
    if not hasK8:  # if this is a mobi8-only file we don't need to do this
        for i in xrange(len(sect.sections) - 1):
            before, after = sect.sections[i:i + 2]
            if (after - before) == 8:
                data = sect.loadSection(i)
                if data == K8_BOUNDARY:
                    print "Mobi Ebook uses the new K8 file format"
                    mh = MobiHeader(sect, i + 1)
                    hasK8 = hasK8 or mh.isK8()
                    mhlst.append(mh)
                    K8Boundary = i
                    break
    if hasK8:
        files.makeK8Struct()
    process_all_mobi_headers(files, sect, mhlst, K8Boundary, False)
    return
예제 #2
0
def unpackBook(infile, outdir):
    files = fileNames(infile, outdir)

    # process the PalmDoc database header and verify it is a mobi
    sect = Sectionizer(infile)
    print "Palm DB type: ", sect.ident
    if sect.ident != "BOOKMOBI" and sect.ident != "TEXtREAd":
        raise unpackException("invalid file format")

    if SPLIT_COMBO_MOBIS:
        # if this is a combination mobi7-mobi8 file split them up
        mobisplit = mobi_split(infile)
        if mobisplit.combo:
            outmobi7 = os.path.join(files.outdir, "mobi7-" + files.getInputFileBasename() + ".mobi")
            outmobi8 = os.path.join(files.outdir, "mobi8-" + files.getInputFileBasename() + ".mobi")
            file(outmobi7, "wb").write(mobisplit.getResult7())
            file(outmobi8, "wb").write(mobisplit.getResult8())

    # scan sections to see if this is a compound mobi file (K8 format)
    # and build a list of all mobi headers to process.
    mhlst = []
    mh = MobiHeader(sect, 0)
    # if this is a mobi8-only file hasK8 here will be true
    hasK8 = mh.isK8()
    mhlst.append(mh)
    K8Boundary = -1

    # the last section uses an appended entry of 0xfffffff as its starting point
    # attempting to process it will cause problems
    if not hasK8:  # if this is a mobi8-only file we don't need to do this
        for i in xrange(len(sect.sections) - 1):
            before, after = sect.sections[i : i + 2]
            if (after - before) == 8:
                data = sect.loadSection(i)
                if data == K8_BOUNDARY:
                    print "Mobi Ebook uses the new K8 file format"
                    mh = MobiHeader(sect, i + 1)
                    hasK8 = hasK8 or mh.isK8()
                    mhlst.append(mh)
                    K8Boundary = i
                    break
    if hasK8:
        files.makeK8Struct()
    process_all_mobi_headers(files, sect, mhlst, K8Boundary, False)
    return
예제 #3
0
def unpackBook(infile, outdir, apnxfile=None, epubver='2', use_hd=False, dodump=False, dowriteraw=False, dosplitcombos=False):
    global DUMP
    global WRITE_RAW_DATA
    global SPLIT_COMBO_MOBIS
    if DUMP or dodump:
        DUMP = True
    if WRITE_RAW_DATA or dowriteraw:
        WRITE_RAW_DATA = True
    if SPLIT_COMBO_MOBIS or dosplitcombos:
        SPLIT_COMBO_MOBIS = True

    infile = utf8_str(infile)
    outdir = utf8_str(outdir)
    if apnxfile is not None:
        apnxfile = utf8_str(apnxfile)

    files = fileNames(infile, outdir)

    # process the PalmDoc database header and verify it is a mobi
    sect = Sectionizer(infile)
    if sect.ident != 'BOOKMOBI' and sect.ident != 'TEXtREAd':
        raise unpackException('Invalid file format')
    if DUMP:
        sect.dumppalmheader()
    else:
        print "Palm DB type: %s, %d sections." % (sect.ident,sect.num_sections)

    # scan sections to see if this is a compound mobi file (K8 format)
    # and build a list of all mobi headers to process.
    mhlst = []
    mh = MobiHeader(sect,0)
    # if this is a mobi8-only file hasK8 here will be true
    mhlst.append(mh)
    K8Boundary = -1

    if mh.isK8():
        print "Unpacking a KF8 book..."
        hasK8 = True
    else:
        # This is either a Mobipocket 7 or earlier, or a combi M7/KF8
        # Find out which
        hasK8 = False
        for i in xrange(len(sect.sectionoffsets)-1):
            before, after = sect.sectionoffsets[i:i+2]
            if (after - before) == 8:
                data = sect.loadSection(i)
                if data == K8_BOUNDARY:
                    sect.setsectiondescription(i,"Mobi/KF8 Boundary Section")
                    mh = MobiHeader(sect,i+1)
                    hasK8 = True
                    mhlst.append(mh)
                    K8Boundary = i
                    break
        if hasK8:
            print "Unpacking a Combination M{0:d}/KF8 book...".format(mh.version)
            if SPLIT_COMBO_MOBIS:
                # if this is a combination mobi7-mobi8 file split them up
                mobisplit = mobi_split(infile)
                if mobisplit.combo:
                    outmobi7 = os.path.join(files.outdir, 'mobi7-'+files.getInputFileBasename() + '.mobi')
                    outmobi8 = os.path.join(files.outdir, 'mobi8-'+files.getInputFileBasename() + '.azw3')
                    open(pathof(outmobi7), 'wb').write(mobisplit.getResult7())
                    open(pathof(outmobi8), 'wb').write(mobisplit.getResult8())
        else:
            print "Unpacking a Mobipocket {0:d} book...".format(mh.version)

    if hasK8:
        files.makeK8Struct()

    process_all_mobi_headers(files, apnxfile, sect, mhlst, K8Boundary, False, epubver, use_hd)

    if DUMP:
        sect.dumpsectionsinfo()
    return
예제 #4
0
def unpackBook(infile,
               outdir,
               apnxfile=None,
               epubver='2',
               use_hd=False,
               dodump=False,
               dowriteraw=False,
               dosplitcombos=False):
    global DUMP
    global WRITE_RAW_DATA
    global SPLIT_COMBO_MOBIS
    if DUMP or dodump:
        DUMP = True
    if WRITE_RAW_DATA or dowriteraw:
        WRITE_RAW_DATA = True
    if SPLIT_COMBO_MOBIS or dosplitcombos:
        SPLIT_COMBO_MOBIS = True

    infile = utf8_str(infile)
    outdir = utf8_str(outdir)
    if apnxfile is not None:
        apnxfile = utf8_str(apnxfile)

    files = fileNames(infile, outdir)

    # process the PalmDoc database header and verify it is a mobi
    sect = Sectionizer(infile)
    if sect.ident != 'BOOKMOBI' and sect.ident != 'TEXtREAd':
        raise unpackException('Invalid file format')
    if DUMP:
        sect.dumppalmheader()
    else:
        print "Palm DB type: %s, %d sections." % (sect.ident,
                                                  sect.num_sections)

    # scan sections to see if this is a compound mobi file (K8 format)
    # and build a list of all mobi headers to process.
    mhlst = []
    mh = MobiHeader(sect, 0)
    # if this is a mobi8-only file hasK8 here will be true
    mhlst.append(mh)
    K8Boundary = -1

    if mh.isK8():
        print "Unpacking a KF8 book..."
        hasK8 = True
    else:
        # This is either a Mobipocket 7 or earlier, or a combi M7/KF8
        # Find out which
        hasK8 = False
        for i in xrange(len(sect.sectionoffsets) - 1):
            before, after = sect.sectionoffsets[i:i + 2]
            if (after - before) == 8:
                data = sect.loadSection(i)
                if data == K8_BOUNDARY:
                    sect.setsectiondescription(i, "Mobi/KF8 Boundary Section")
                    mh = MobiHeader(sect, i + 1)
                    hasK8 = True
                    mhlst.append(mh)
                    K8Boundary = i
                    break
        if hasK8:
            print "Unpacking a Combination M{0:d}/KF8 book...".format(
                mh.version)
            if SPLIT_COMBO_MOBIS:
                # if this is a combination mobi7-mobi8 file split them up
                mobisplit = mobi_split(infile)
                if mobisplit.combo:
                    outmobi7 = os.path.join(
                        files.outdir,
                        'mobi7-' + files.getInputFileBasename() + '.mobi')
                    outmobi8 = os.path.join(
                        files.outdir,
                        'mobi8-' + files.getInputFileBasename() + '.azw3')
                    open(pathof(outmobi7), 'wb').write(mobisplit.getResult7())
                    open(pathof(outmobi8), 'wb').write(mobisplit.getResult8())
        else:
            print "Unpacking a Mobipocket {0:d} book...".format(mh.version)

    if hasK8:
        files.makeK8Struct()

    process_all_mobi_headers(files, apnxfile, sect, mhlst, K8Boundary, False,
                             epubver, use_hd)

    if DUMP:
        sect.dumpsectionsinfo()
    return
예제 #5
0
def convert(epub_path, kf8_path=None, asin=None, quiet=False):

    logger.info(f'Processing: {epub_path}')
    epub = EPUB(epub_path)

    # ASIN
    if not asin:
        if epub.identifier:
            logger.info(f'Detected book identifier: {epub.identifier}')
            # Identifier looks like a normative UUID.
            if epub.identifier.startswith('urn:uuid:'):
                asin = epub.identifier.split(':')[2]
            # Identifier looks like a bare UUID.
            elif re.match(
                    r'[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}',
                    epub.identifier.lower()):
                asin = epub.identifier
            # Identifier looks like a genuine ASIN
            elif re.match(r'^B[0-9A-Z]{9}$', epub.identifier):
                asin = epub.identifier
            else:
                logger.info('NOT using detected book identifier as ASIN')

    if asin:
        logger.info(f'Using UUID/ASIN: {asin}')
    else:
        # Generate fake ASIN
        asin = str(uuid.uuid4())
        logger.info(f'Generated a fake ASIN: {asin}')

    # Make a temp copy of the book
    temp_dir = tempfile.mkdtemp()
    epub_tmp = os.path.join(temp_dir, f'{asin}.epub')
    shutil.copy(epub_path, epub_tmp)

    # Generate temp .mobi file
    mobi_tmp = os.path.join(temp_dir, f'{asin}.mobi')
    kindlegen_cmd = ['kindlegen', epub_tmp, '-dont_append_source']
    logger.info(f'Running: {kindlegen_cmd}')
    if not quiet:
        subprocess.check_call(kindlegen_cmd)
    else:
        subprocess.check_call(kindlegen_cmd,
                              stdout=subprocess.DEVNULL,
                              stderr=subprocess.DEVNULL)
    assert os.path.isfile(mobi_tmp)

    # Fix metadata of temp .mobi file:
    # - Add ASIN
    # - Set type to EBOK
    logger.info('Fixing metadata ...')
    dmf = DualMobiMetaFix(mobi_tmp, asin)
    with open(mobi_tmp, 'wb') as f:
        f.write(dmf.getresult())

    # KF8 Output
    if not kf8_path:
        epub_dir = os.path.dirname(epub_path)
        clean_title = re.sub(r'[/|\?|<|>|\\\\|:|\*|\||"|\^| ]+', '_',
                             epub.title)
        kf8_path = os.path.join(epub_dir, f'{asin}_{clean_title}.azw3')

    # Extract KF8 from temp .mobi file
    logger.info('Extracting KF8 ...')
    mobisplit = mobi_split(mobi_tmp)
    with open(kf8_path, 'wb') as f:
        f.write(mobisplit.getResult8())

    # Clean up
    logger.info('Cleaning up ...')
    shutil.rmtree(temp_dir)