Ejemplo n.º 1
0
    def get_pages_fast(self, mobi_file_path):
        """
        2300 characters of uncompressed text per page.

        This is not meant to map 1 to 1 to a print book but to be a
        close enough measure.

        A test book was chosen and the characters were counted
        on one page. This number was round to 2240 then 60
        characters of markup were added to the total giving
        2300.

        Uncompressed text length is used because it's easily
        accessible in MOBI files (part of the header). Also,
        It's faster to work off of the length then to
        decompress and parse the actual text.
        """
        text_length = 0
        pages = []
        count = 0

        with open(mobi_file_path, 'rb') as mf:
            phead = PdbHeaderReader(mf)
            r0 = phead.section_data(0)
            text_length = struct.unpack('>I', r0[4:8])[0]

        while count < text_length:
            pages.append(count)
            count += 2300

        return pages
Ejemplo n.º 2
0
    def get_pages_exact(self, mobi_file_path, page_count):
        """
        Get pages exact.

        Given a specified page count (such as from a custom column),
        create our array of pages for the apnx file by dividing by
        the content size of the book.
        """
        pages = []
        count = 0

        with open(mobi_file_path, 'rb') as mf:
            phead = PdbHeaderReader(mf)
            r0 = phead.section_data(0)
            text_length = struct.unpack('>I', r0[4:8])[0]

        chars_per_page = int(text_length / page_count)
        while count < text_length:
            pages.append(count)
            count += chars_per_page

        if len(pages) > page_count:
            # Rounding created extra page entries
            pages = pages[:page_count]

        return pages