Python Sectionizer Examples

Programming Language: Python

Namespace/Package Name: kindle_unpack

Method/Function: Sectionizer

Examples at hotexamples.com: 6

Python Sectionizer - 6 examples found. These are the top rated real world Python examples of kindle_unpack.Sectionizer extracted from open source projects. You can rate examples to help us improve the quality of examples.

Example #1

Show file

File: extract_cover_thumbs.py Project: AthameBook/EKT

def extract_cover_thumbs(kindlepath):
    extensions = ('.azw', '.azw3', '.mobi', '.pobi', '.kfx', '.azw8')

    conn = sqlite3.connect(baza)
    c = conn.cursor()
    c.execute('SELECT {cn}, {coi} FROM {tn} WHERE {coi} IS NOT NULL AND {coi} != "0"'.\
        format(coi=miniatura, tn=tablica, cn=polozenie))
    files = c.fetchall()
    c.execute('SELECT {cn} FROM {tn} WHERE {cn} IS NOT NULL AND {cn} != "0" AND {te} != "Entry:Item:Dictionary"'.\
        format(coi=miniatura, tn=tablica, cn=polozenie, te=typebooka))
    ebooki = c.fetchall()
    conn.close()

    generate_apnx_files(ebooki)

    for names in files:
        name = names[0]
        thumbpath = names[1]
        if os.path.isfile(name.encode('UTF-8')):
            if name.lower().endswith(extensions):
                if name.lower().endswith('.kfx') or name.lower().endswith(
                        '.azw8'):
                    is_kfx = True
                else:
                    is_kfx = False
                if (not os.path.isfile(thumbpath)):
                    fide = name.encode('UTF-8')
                    if is_kfx:
                        if '_sample' in fide:
                            continue
                        try:
                            kfx_metadata = get_kindle_kfx_metadata(name)
                        except Exception as e:
                            continue
                    else:
                        with open(name.encode('UTF-8'), 'rb') as mf:
                            mobi_content = mf.read()
                            if mobi_content[60:68] != 'BOOKMOBI':
                                continue
                        section = kindle_unpack.Sectionizer(
                            name.encode('UTF-8'))
                        mhlst = [kindle_unpack.MobiHeader(section, 0)]
                        mh = mhlst[0]
                        metadata = mh.getmetadata()
                    if is_kfx:
                        image_data = kfx_metadata.get("cover_image_data")
                        if not image_data:
                            continue
                    try:
                        if is_kfx:
                            cover = process_image(image_data.decode('base64'))
                        else:
                            cover = get_cover_image(section, mh, metadata)
                    except IOError:
                        continue
                    if not cover:
                        continue
                    cover.save(thumbpath)
    return 0

Example #2

Show file

File: apnx.py Project: AthameBook/EKT

    def write_apnx(self, mobi_file_path, apnx_path, page_count=0):
        import uuid
        apnx_meta = {
            'guid': str(uuid.uuid4()).replace('-', '')[:8],
            'asin': '',
            'cdetype': 'EBOK',
            'format': 'MOBI_7',
            'acr': ''
        }

        try:
            with open(mobi_file_path, 'rb') as mf:
                ident = PdbHeaderReader(mf).identity()
                if ident != 'BOOKMOBI':
                    return 1
                apnx_meta['acr'] = str(PdbHeaderReader(mf).name())
        except:
            return 1
        with open(mobi_file_path, 'rb') as mf:
            section = kindle_unpack.Sectionizer(mobi_file_path)
            mhlst = [kindle_unpack.MobiHeader(section, 0)]
            mh = mhlst[0]
            metadata = mh.getmetadata()
            if mh.version == 8:
                apnx_meta['format'] = 'MOBI_8'
            else:
                apnx_meta['format'] = 'MOBI_7'
            try:
                if metadata['Document Type'][0] is None:
                    apnx_meta['cdetype'] = 'EBOK'
                else:
                    apnx_meta['cdetype'] = 'EBOK'
                    apnx_meta['cdetype'] = metadata['Document Type'][0]
            except KeyError:
                apnx_meta['cdetype'] = 'EBOK'
            try:
                if metadata['ASIN'][0] is None:
                    apnx_meta['asin'] = ''
                else:
                    apnx_meta['asin'] = metadata['ASIN'][0]
            except KeyError:
                apnx_meta['asin'] = ''

        pages = []
        if page_count:
            pages = self.get_pages_exact(mobi_file_path, page_count)
        else:
            pages = self.get_pages_fast(mobi_file_path)

        if not pages:
            pages = self.get_pages_fast(mobi_file_path)
        if len(pages) > 65536:
            return

        apnx = self.generate_apnx(pages, apnx_meta)

        with open(apnx_path, 'wb') as apnxf:
            apnxf.write(apnx)

Example #3

Show file

    def write_apnx(self, mobi_file_path, apnx_path, page_count=0):
        """
        Write APNX file.

        If you want a fixed number of pages (such as from a custom column) then
        pass in a value to page_count, otherwise a count will be estimated
        using either the fast or accurate algorithm.
        """
        import uuid
        apnx_meta = {'guid': str(uuid.uuid4()).replace('-', '')[:8], 'asin':
                     '', 'cdetype': 'EBOK', 'format': 'MOBI_7', 'acr': ''}

        try:
            with open(mobi_file_path, 'rb') as mf:
                ident = PdbHeaderReader(mf).identity()
                if ident != 'BOOKMOBI':
                    # Check that this is really a MOBI file.
                    print('BŁĄD! Niepoprawny plik MOBI "%s"'
                          % os.path.basename(mobi_file_path))
                    return 1
                apnx_meta['acr'] = str(PdbHeaderReader(mf).name())
        except:
            print('Błąd! Nie można otworzyć pliku %s' % mobi_file_path)
            return 1
        with open(mobi_file_path, 'rb') as mf:
            section = kindle_unpack.Sectionizer(mobi_file_path)
            mhlst = [kindle_unpack.MobiHeader(section, 0)]
            mh = mhlst[0]
            metadata = mh.getmetadata()
            if mh.version == 8:
                apnx_meta['format'] = 'MOBI_8'
            else:
                apnx_meta['format'] = 'MOBI_7'
            try:
                if metadata['Document Type'][0] is None:
                    apnx_meta['cdetype'] = 'EBOK'
                else:
                    apnx_meta['cdetype'] = 'EBOK'
                    apnx_meta['cdetype'] = metadata['Document Type'][0]
            except KeyError:
                apnx_meta['cdetype'] = 'EBOK'
            try:
                if metadata['ASIN'][0] is None:
                    apnx_meta['asin'] = ''
                else:
                    apnx_meta['asin'] = metadata['ASIN'][0]
            except KeyError:
                apnx_meta['asin'] = ''

        pages = []
        if page_count:
            pages = self.get_pages_exact(mobi_file_path, page_count)
        else:
            pages = self.get_pages_fast(mobi_file_path)

        if not pages:
            pages = self.get_pages_fast(mobi_file_path)
        if not pages:
            print('Nie można wygenerować mapowania stron.')
        if len(pages) > 65536:
            print('Ilośc stron w pliku in "%s" przekracza limit. '
                  'Nie można zapisać pliku apnx...' % mobi_file_path)
            return

        apnx = self.generate_apnx(pages, apnx_meta)

        if sys.platform == 'win32':
            apnx_path = '\\\\?\\' + apnx_path.replace('/', '\\')
        with open(apnx_path, 'wb') as apnxf:
            apnxf.write(apnx)

Example #4

Show file

File: extract_cover_thumbs.py Project: sgmqs/ExtractCoverThumbs

def extract_cover_thumbs(is_silent, is_overwrite_pdoc_thumbs,
                         is_overwrite_amzn_thumbs, is_overwrite_apnx,
                         skip_apnx, kindlepath, is_azw, days, fix_thumb):
    docs = os.path.join(kindlepath, 'documents')
    is_verbose = not is_silent
    try:
        dir_list = os.listdir(docs)
        dir_list.sort()
    except:
        print('* ERROR! No Kindle device found in a specified directory: ' +
              kindlepath)
        return 1
    if days is not None:
        dtt = datetime.today()
        days_int = int(days)
        print('Notice! Processing files not older than ' + days + ' days.')
    else:
        days_int = 0
        diff = 0

    # move CSV file to computer temp dir to speed up updating process
    tempdir = tempfile.mkdtemp(suffix='', prefix='extract_cover_thumbs-tmp-')
    csv_pages_name = 'extract_cover_thumbs-book-pages.csv'
    csv_pages = os.path.join(tempdir, csv_pages_name)
    if os.path.isfile(os.path.join(docs, csv_pages_name)):
        shutil.copy2(os.path.join(docs, csv_pages_name),
                     os.path.join(tempdir, csv_pages_name))

    # load ASIN list from CSV
    asinlist = asin_list_from_csv(csv_pages)

    if not os.path.isdir(os.path.join(kindlepath, 'system', 'thumbnails')):
        print('* ERROR! No Kindle device found in the specified path: "' +
              os.path.join(kindlepath) + '"')
        return 1
    print("START of extracting cover thumbnails...")
    if is_azw:
        extensions = ('.azw', '.azw3', '.mobi')
    else:
        extensions = ('.azw3', '.mobi')
    for f in dir_list:
        if days is not None:
            dt = os.path.getctime(os.path.join(docs, f))
            dt = datetime.fromtimestamp(dt).strftime('%Y-%m-%d')
            dt = datetime.strptime(dt, '%Y-%m-%d')
            diff = (dtt - dt).days
        if f.lower().endswith(extensions) and diff <= days_int:
            fide = f.decode(sys.getfilesystemencoding())
            mobi_path = os.path.join(docs, f)
            dump_pages(asinlist, csv_pages, docs, f)
            if is_verbose:
                try:
                    print('* %s:' % fide, end=' ')
                except:
                    print('* %r:' % fide, end=' ')
            with open(mobi_path, 'rb') as mf:
                mobi_content = mf.read()
                if mobi_content[60:68] != 'BOOKMOBI':
                    print('* Not a valid MOBI file "%s".'
                          % fide)
                    continue
            section = kindle_unpack.Sectionizer(mobi_path)
            mhlst = [kindle_unpack.MobiHeader(section, 0)]
            mh = mhlst[0]
            metadata = mh.getmetadata()
            try:
                asin = metadata['ASIN'][0]
            except KeyError:
                asin = None
            try:
                doctype = metadata['Document Type'][0]
            except KeyError:
                doctype = None
            if asin is None:
                print('ERROR! No ASIN found in "%s"' % fide)
                continue
            thumbpath = os.path.join(
                kindlepath, 'system', 'thumbnails',
                'thumbnail_%s_%s_portrait.jpg' % (asin, doctype)
            )
            if (not os.path.isfile(thumbpath) or
                    (is_overwrite_pdoc_thumbs and doctype == 'PDOC') or
                    (is_overwrite_amzn_thumbs and (
                        doctype == 'EBOK' or doctype == 'EBSP'
                    ))):
                if is_verbose:
                    print('EXTRACTING COVER:', end=' ')
                try:
                    cover = get_cover_image(section, mh, metadata, doctype, f,
                                            fide, is_verbose, fix_thumb)
                except IOError:
                    print('FAILED! Image format unrecognized...')
                    continue
                if not cover:
                    continue
                cover.save(thumbpath)
            elif is_verbose:
                print('skipped (cover present or overwriting not forced).')
    if True:
        print("START of downloading real book page numbers...")
        get_real_pages(os.path.join(
            tempdir, 'extract_cover_thumbs-book-pages.csv'))
        print("FINISH of downloading real book page numbers...")
    if not skip_apnx:
        print("START of generating book page numbers (APNX files)...")
        generate_apnx_files(dir_list, docs, is_verbose, is_overwrite_apnx,
                            days, tempdir)
        print("FINISH of generating book page numbers (APNX files)...")

    if is_overwrite_pdoc_thumbs:
        thumb_dir = os.path.join(kindlepath, 'system', 'thumbnails')
        thumb_list = os.listdir(thumb_dir)
        for c in thumb_list:
            if c.startswith('thumbnail') and c.endswith('.jpg'):
                if c.endswith('portrait.jpg'):
                    continue
                fix_generated_thumbs(os.path.join(thumb_dir, c),
                                     is_verbose, fix_thumb)
    print("FINISH of extracting cover thumbnails...")
    shutil.copy2(os.path.join(tempdir, csv_pages_name),
                 os.path.join(docs, csv_pages_name))
    clean_temp(tempdir)
    return 0

Example #5

Show file

    def write_apnx(self, mobi_file_path, apnx_path, page_count=0):
        """
        Write APNX file.

        If you want a fixed number of pages (such as from a custom column) then
        pass in a value to page_count, otherwise a count will be estimated
        using either the fast or accurate algorithm.
        """
        import uuid
        apnx_meta = {'guid': str(uuid.uuid4()).replace('-', '')[:8], 'asin':
                     '', 'cdetype': 'EBOK', 'format': 'MOBI_7', 'acr': ''}

        try:
            with open(mobi_file_path, 'rb') as mf:
                ident = PdbHeaderReader(mf).identity()
                if ident != 'BOOKMOBI':
                    # Check that this is really a MOBI file.
                    print('ERROR! Not a valid MOBI file "%s"'
                          % os.path.basename(mobi_file_path))
                    return 1
                apnx_meta['acr'] = str(PdbHeaderReader(mf).name())
        except:
            print('Error! Unable to open file %s' % mobi_file_path)
            return 1
        # We'll need the PDB name, the MOBI version, and some metadata to make
        # FW 3.4 happy with KF8 files...
        with open(mobi_file_path, 'rb') as mf:
            section = kindle_unpack.Sectionizer(mobi_file_path)
            mhlst = [kindle_unpack.MobiHeader(section, 0)]
            mh = mhlst[0]
            metadata = mh.getmetadata()
            if mh.version == 8:
                apnx_meta['format'] = 'MOBI_8'
            else:
                apnx_meta['format'] = 'MOBI_7'
            try:
                if metadata['Document Type'][0] is None:
                    apnx_meta['cdetype'] = 'EBOK'
                else:
                    apnx_meta['cdetype'] = 'EBOK'
                    apnx_meta['cdetype'] = metadata['Document Type'][0]
            except KeyError:
                apnx_meta['cdetype'] = 'EBOK'
            try:
                if metadata['ASIN'][0] is None:
                    apnx_meta['asin'] = ''
                else:
                    apnx_meta['asin'] = metadata['ASIN'][0]
            except KeyError:
                apnx_meta['asin'] = ''

        # Get the pages depending on the chosen parser
        pages = []
        if page_count:
            pages = self.get_pages_exact(mobi_file_path, page_count)
        else:
            pages = self.get_pages_fast(mobi_file_path)

        if not pages:
            pages = self.get_pages_fast(mobi_file_path)
        if not pages:
            print('Could not generate page mapping.')
        if len(pages) > 65536:
            print('Pages over limit in "%s" file. '
                  'Unable to write apnx file...' % mobi_file_path)
            return

        # Generate the APNX file from the page mapping.
        apnx = self.generate_apnx(pages, apnx_meta)

        # Write the APNX.
        if sys.platform == 'win32':
            apnx_path = '\\\\?\\' + apnx_path.replace('/', '\\')
        with open(apnx_path, 'wb') as apnxf:
            apnxf.write(apnx)

Example #6

Show file

File: extract_cover_thumbs.py Project: zsokolowski/ExtractCoverThumbs

def extract_cover_thumbs(is_silent, is_overwrite_pdoc_thumbs,
                         is_overwrite_amzn_thumbs, is_overwrite_apnx,
                         skip_apnx, kindlepath, is_azw, days, fix_thumb,
                         lubimy_czytac, mark_real_pages, patch_azw3):
    docs = os.path.join(kindlepath, 'documents')
    is_verbose = not is_silent
    if days is not None:
        dtt = datetime.today()
        days_int = int(days)
        print('Notice! Processing files not older than ' + days + ' days.')
    else:
        days_int = 0
        diff = 0

    # move CSV file to computer temp dir to speed up updating process
    tempdir = tempfile.mkdtemp(suffix='', prefix='extract_cover_thumbs-tmp-')
    csv_pages_name = 'extract_cover_thumbs_book_pages2.csv'
    csv_pages = os.path.join(tempdir, csv_pages_name)
    if os.path.isfile(os.path.join(docs, csv_pages_name)):
        shutil.copy2(os.path.join(docs, csv_pages_name),
                     os.path.join(tempdir, csv_pages_name))

    # load ASIN list from CSV
    asinlist, filelist = asin_list_from_csv(csv_pages)

    if not os.path.isdir(os.path.join(kindlepath, 'system', 'thumbnails')):
        print('* ERROR! No Kindle device found in the specified path: "' +
              os.path.join(kindlepath) + '"')
        return 1
    print("START of extracting cover thumbnails...")
    if is_azw:
        extensions = ('.azw', '.azw3', '.mobi', '.kfx', '.azw8')
    else:
        extensions = ('.azw3', '.mobi', '.kfx', '.azw8')
    for root, dirs, files in os.walk(docs):
        for name in files:
            if 'documents' + os.path.sep + 'dictionaries' in root:
                if is_verbose:
                    print('! Excluded dictionary:', name)
                continue
            if days is not None:
                try:
                    dt = os.path.getctime(os.path.join(root, name))
                except OSError:
                    continue
                dt = datetime.fromtimestamp(dt).strftime('%Y-%m-%d')
                dt = datetime.strptime(dt, '%Y-%m-%d')
                diff = (dtt - dt).days
            if name.lower().endswith(extensions) and diff <= days_int:
                if name.lower().endswith('.kfx') or name.lower().endswith('.azw8'):
                    is_kfx = True
                else:
                    is_kfx = False
                fide = name.decode(sys.getfilesystemencoding())
                if is_verbose:
                    try:
                        print('* %s:' % fide, end=' ')
                    except:
                        print('* %r:' % fide, end=' ')
                mobi_path = os.path.join(root, name)
                if "attachables" in mobi_path:
                    continue
                if is_kfx:
                    if '_sample' in fide:
                        if is_verbose:
                            print('KFX Sample. Skipping...')
                        continue
                    try:
                        kfx_metadata = get_kindle_kfx_metadata(mobi_path)
                    except Exception as e:
                        print('ERROR! Extracting metadata from %s: %s' % (
                            fide, unicode(e)
                        ))
                        continue
                    doctype = kfx_metadata.get("cde_content_type")
                    if not doctype:
                        print('ERROR! No document type found in "%s"' % fide)
                        continue
                    asin = kfx_metadata.get("ASIN")
                else:
                    if '!DeviceUpgradeLetter!' in fide:
                        if is_verbose:
                            print('Upgrade Letter. Skipping...')
                        continue
                    dump_pages(asinlist, filelist, csv_pages, root, name, is_verbose)
                    with open(mobi_path, 'rb') as mf:
                        mobi_content = mf.read()
                        if mobi_content[60:68] != 'BOOKMOBI':
                            print('* Not a valid MOBI file "%s".'
                                  % fide)
                            continue
                    section = kindle_unpack.Sectionizer(mobi_path)
                    mhlst = [kindle_unpack.MobiHeader(section, 0)]
                    mh = mhlst[0]
                    metadata = mh.getmetadata()
                    try:
                        asin = metadata['ASIN'][0]
                    except KeyError:
                        asin = None
                    try:
                        doctype = metadata['Document Type'][0]
                    except KeyError:
                        doctype = None
                if (patch_azw3 is True and
                        doctype == 'PDOC' and
                        asin is not None and
                        name.lower().endswith('.azw3')):
                    print("PATCHING AZW3", end=' ')
                    dmf = DualMobiMetaFix(mobi_path)
                    open(mobi_path, 'wb').write(dmf.getresult())
                    doctype = 'EBOK'
                if asin is None:
                    print('ERROR! No ASIN found in "%s"' % fide)
                    continue
                thumbpath = os.path.join(
                    kindlepath, 'system', 'thumbnails',
                    'thumbnail_%s_%s_portrait.jpg' % (asin, doctype)
                )
                if (not os.path.isfile(thumbpath) or
                        (is_overwrite_pdoc_thumbs and doctype == 'PDOC') or
                        (is_overwrite_amzn_thumbs and (
                            doctype == 'EBOK' or doctype == 'EBSP'
                        ))):
                    if is_kfx:
                        image_data = kfx_metadata.get("cover_image_data")
                        if not image_data:
                            print('ERROR! No cover image found in "%s"' % fide)
                            continue
                    if is_verbose:
                        print('PROCESSING COVER:', end=' ')
                    try:
                        if is_kfx:
                            cover = process_image(image_data.decode('base64'),
                                                  fix_thumb, doctype,
                                                  is_verbose)
                        else:
                            cover = get_cover_image(section, mh, metadata,
                                                    doctype, name,
                                                    fide, is_verbose, fix_thumb)
                    except IOError:
                        print('FAILED! Image format unrecognized...')
                        continue
                    if not cover:
                        continue
                    cover.save(thumbpath)
                elif is_verbose:
                    print('skipped (cover present or overwriting not forced).')
    if lubimy_czytac and days:
        print("START of downloading real book page numbers...")
        get_real_pages(os.path.join(
            tempdir, 'extract_cover_thumbs_book_pages2.csv'), mark_real_pages)
        print("FINISH of downloading real book page numbers...")
    if not skip_apnx:
        print("START of generating book page numbers (APNX files)...")
        generate_apnx_files(docs, is_verbose, is_overwrite_apnx,
                            days, tempdir)
        print("FINISH of generating book page numbers (APNX files)...")

    if is_overwrite_pdoc_thumbs:
        thumb_dir = os.path.join(kindlepath, 'system', 'thumbnails')
        thumb_list = os.listdir(thumb_dir)
        for c in thumb_list:
            if c.startswith('thumbnail') and c.endswith('.jpg'):
                if c.endswith('portrait.jpg'):
                    continue
                fix_generated_thumbs(os.path.join(thumb_dir, c),
                                     is_verbose, fix_thumb)
    print("FINISH of extracting cover thumbnails...")
    shutil.copy2(os.path.join(tempdir, csv_pages_name),
                 os.path.join(docs, csv_pages_name))
    clean_temp(tempdir)
    return 0