def build_pdf_preview( self, file_path: str, preview_name: str, cache_path: str, extension: str = ".pdf", page_id: int = -1, mimetype: str = "", ) -> None: """ generate the pdf large preview """ with open(file_path, "rb") as pdf: input_pdf = utils.get_decrypted_pdf(pdf) output_pdf = PdfFileWriter() if page_id is None or page_id <= -1: for i in range(input_pdf.numPages): output_pdf.addPage(input_pdf.getPage(i)) else: output_pdf.addPage(input_pdf.getPage(int(page_id))) output_stream = BytesIO() output_pdf.write(output_stream) output_stream.seek(0, 0) preview_path = "{path}{file_name}{extension}".format( file_name=preview_name, path=cache_path, extension=extension ) with open(preview_path, "wb") as jpeg: buffer = output_stream.read(1024) while buffer: jpeg.write(buffer) buffer = output_stream.read(1024)
def build_jpeg_preview( self, file_path: str, preview_name: str, cache_path: str, page_id: int, extension: str = ".jpg", size: utils.ImgDims = None, mimetype: str = "", attempt: int = 0, ) -> None: if not size: size = self.default_size cache_file = os.path.join(cache_path, preview_name) if self._cache_file_process_already_running(cache_file): # Note - 10-10-2018 - Basile - infinite recursion protection if attempt >= 5: raise PreviewAbortedMaxAttempsExceeded( "Max attempts exceeded aborting preview") attempt += 1 time.sleep(2) return self.build_jpeg_preview( file_path=file_path, preview_name=preview_name, cache_path=cache_path, extension=extension, page_id=page_id, size=size, attempt=attempt, mimetype=mimetype, ) input_pdf_stream = None if os.path.exists(os.path.join(cache_path, preview_name + ".pdf")): input_pdf_stream = open( os.path.join(cache_path, preview_name + ".pdf"), "rb") if not input_pdf_stream: with open(file_path, "rb") as _file: file, file_extension = os.path.splitext(file_path) output_path = os.path.join(cache_path, preview_name) input_pdf_stream = self._convert_to_pdf( _file, file_extension, cache_path, output_path, mimetype) input_pdf = utils.get_decrypted_pdf(input_pdf_stream) intermediate_pdf = PdfFileWriter() intermediate_pdf.addPage(input_pdf.getPage(int(page_id))) intermediate_pdf_stream = BytesIO() intermediate_pdf.write(intermediate_pdf_stream) intermediate_pdf_stream.seek(0, 0) jpeg_stream = convert_pdf_to_jpeg(intermediate_pdf_stream, size) jpeg_preview_path = os.path.join(cache_path, preview_name + extension) with open(jpeg_preview_path, "wb") as jpeg_output_stream: buffer = jpeg_stream.read(1024) while buffer: jpeg_output_stream.write(buffer) buffer = jpeg_stream.read(1024)
def get_page_number(self, file_path: str, preview_name: str, cache_path: str, mimetype: str = "") -> int: page_nb_file_path = cache_path + preview_name + "_page_nb" if not os.path.exists(page_nb_file_path): pdf_version_filepath = cache_path + preview_name + ".pdf" if not os.path.exists(pdf_version_filepath): self.build_pdf_preview( file_path=file_path, preview_name=preview_name, cache_path=cache_path, mimetype=mimetype, ) with open(page_nb_file_path, "w") as page_nb_file_stream: page_nb_file_stream.seek(0, 0) with open(pdf_version_filepath, "rb") as pdf_stream: pdf_reader = utils.get_decrypted_pdf(pdf_stream) page_nb_file_stream.write(str(pdf_reader.numPages)) with open(page_nb_file_path, "r") as page_nb_stream: page_nb = int(page_nb_stream.read()) return page_nb
def build_jpeg_preview( self, file_path: str, preview_name: str, cache_path: str, page_id: int, extension: str = ".jpg", size: utils.ImgDims = None, mimetype: str = "", ) -> None: """ generate the pdf small preview """ if not size: size = self.default_size with open(file_path, "rb") as pdf: # HACK - D.A. - 2017-08-11 Deactivate strict mode # This avoid crashes when PDF are not standard # See https://github.com/mstamy2/PyPDF2/issues/244 input_pdf = utils.get_decrypted_pdf(pdf, strict=False) output_pdf = PdfFileWriter() output_pdf.addPage(input_pdf.getPage(int(page_id))) output_stream = BytesIO() output_pdf.write(output_stream) output_stream.seek(0, 0) result = convert_pdf_to_jpeg(output_stream, size) preview_path = "{path}{file_name}{extension}".format( file_name=preview_name, path=cache_path, extension=extension) with open(preview_path, "wb") as jpeg: buffer = result.read(1024) while buffer: jpeg.write(buffer) buffer = result.read(1024)
def build_pdf_preview( self, file_path: str, preview_name: str, cache_path: str, extension: str = ".pdf", page_id: int = -1, mimetype: str = "", ) -> None: intermediate_pdf_filename = preview_name.split("-page")[0] + ".pdf" intermediate_pdf_file_path = os.path.join(cache_path, intermediate_pdf_filename) if not os.path.exists(intermediate_pdf_file_path): if os.path.exists(intermediate_pdf_file_path + "_flag"): # Wait 2 seconds, then retry # Info - B.L - 2018/09/28 - Protection for concurent file access # If two person try to preview the same file one will override the file # while the other is reading it. time.sleep(2) return self.build_pdf_preview( file_path=file_path, preview_name=preview_name, cache_path=cache_path, extension=extension, page_id=page_id, mimetype=mimetype, ) with open(file_path, "rb") as input_stream: input_extension = os.path.splitext(file_path)[1] # first step is to convert full document to full pdf self._convert_to_pdf( file_content=input_stream, input_extension=input_extension, cache_path=cache_path, output_filepath=intermediate_pdf_file_path, mimetype=mimetype, ) if page_id < 0: return # in this case, the intermediate file is the requested one pdf_out = PdfFileWriter() with open(intermediate_pdf_file_path, "rb") as pdf_stream: # HACK - G.M - 2020-08-19 - Transform stream in a way pypdf2 can handle it # this should be removed with a future pdf builder. stream = BytesIO(b_(pdf_stream.read())) pdf_in = utils.get_decrypted_pdf(stream) output_file_path = os.path.join( cache_path, "{}{}".format(preview_name, extension)) pdf_out.addPage(pdf_in.getPage(page_id)) with open(output_file_path, "wb") as output_file: pdf_out.write(output_file)
def get_page_number( self, file_path: str, preview_name: str, cache_path: str, mimetype: typing.Optional[str] = None, ) -> int: if not os.path.exists(cache_path + preview_name + "_page_nb"): with open(cache_path + preview_name + "_page_nb", "w") as count: count.seek(0, 0) with open(file_path, "rb") as doc: inputpdf = utils.get_decrypted_pdf(doc) num_page = inputpdf.numPages count.write(str(num_page)) return int(num_page) else: with open(cache_path + preview_name + "_page_nb", "r") as count: count.seek(0, 0) return int(count.read())