def _parse_pages(self, book_path): result = [] start = time.time() with open(book_path, 'rb') as fh: input_pdf = PdfFileReader(fh) self._log.info('Start processing %s with %s pages...', book_path, input_pdf.getNumPages()) if input_pdf.flattenedPages is None: input_pdf._flatten() for page_num, page in enumerate(input_pdf.flattenedPages, start=1): result.append({ 'page_num': str(page_num), 'text': page.extractText() }) self._log.info('Finished processing %s in %s seconds.', book_path, time.time() - start) return result