def get_pages_fast(self, mobi_file_path): """ 2300 characters of uncompressed text per page. This is not meant to map 1 to 1 to a print book but to be a close enough measure. A test book was chosen and the characters were counted on one page. This number was round to 2240 then 60 characters of markup were added to the total giving 2300. Uncompressed text length is used because it's easily accessible in MOBI files (part of the header). Also, It's faster to work off of the length then to decompress and parse the actual text. """ text_length = 0 pages = [] count = 0 with open(mobi_file_path, 'rb') as mf: phead = PdbHeaderReader(mf) r0 = phead.section_data(0) text_length = struct.unpack('>I', r0[4:8])[0] while count < text_length: pages.append(count) count += 2300 return pages
def get_pages_exact(self, mobi_file_path, page_count): """ Get pages exact. Given a specified page count (such as from a custom column), create our array of pages for the apnx file by dividing by the content size of the book. """ pages = [] count = 0 with open(mobi_file_path, 'rb') as mf: phead = PdbHeaderReader(mf) r0 = phead.section_data(0) text_length = struct.unpack('>I', r0[4:8])[0] chars_per_page = int(text_length / page_count) while count < text_length: pages.append(count) count += chars_per_page if len(pages) > page_count: # Rounding created extra page entries pages = pages[:page_count] return pages