예제 #1
0
    def build_pdf_preview(
        self,
        file_path: str,
        preview_name: str,
        cache_path: str,
        extension: str = ".pdf",
        page_id: int = -1,
        mimetype: str = "",
    ) -> None:
        """
        generate the pdf large preview
        """

        with open(file_path, "rb") as pdf:

            input_pdf = utils.get_decrypted_pdf(pdf)
            output_pdf = PdfFileWriter()
            if page_id is None or page_id <= -1:
                for i in range(input_pdf.numPages):
                    output_pdf.addPage(input_pdf.getPage(i))
            else:
                output_pdf.addPage(input_pdf.getPage(int(page_id)))
            output_stream = BytesIO()
            output_pdf.write(output_stream)
            output_stream.seek(0, 0)

            preview_path = "{path}{file_name}{extension}".format(
                file_name=preview_name, path=cache_path, extension=extension
            )

            with open(preview_path, "wb") as jpeg:
                buffer = output_stream.read(1024)
                while buffer:
                    jpeg.write(buffer)
                    buffer = output_stream.read(1024)
예제 #2
0
    def build_jpeg_preview(
        self,
        file_path: str,
        preview_name: str,
        cache_path: str,
        page_id: int,
        extension: str = ".jpg",
        size: utils.ImgDims = None,
        mimetype: str = "",
        attempt: int = 0,
    ) -> None:
        if not size:
            size = self.default_size
        cache_file = os.path.join(cache_path, preview_name)

        if self._cache_file_process_already_running(cache_file):
            # Note - 10-10-2018 - Basile - infinite recursion protection
            if attempt >= 5:
                raise PreviewAbortedMaxAttempsExceeded(
                    "Max attempts exceeded aborting preview")
            attempt += 1
            time.sleep(2)
            return self.build_jpeg_preview(
                file_path=file_path,
                preview_name=preview_name,
                cache_path=cache_path,
                extension=extension,
                page_id=page_id,
                size=size,
                attempt=attempt,
                mimetype=mimetype,
            )

        input_pdf_stream = None
        if os.path.exists(os.path.join(cache_path, preview_name + ".pdf")):
            input_pdf_stream = open(
                os.path.join(cache_path, preview_name + ".pdf"), "rb")

        if not input_pdf_stream:
            with open(file_path, "rb") as _file:
                file, file_extension = os.path.splitext(file_path)
                output_path = os.path.join(cache_path, preview_name)
                input_pdf_stream = self._convert_to_pdf(
                    _file, file_extension, cache_path, output_path, mimetype)

        input_pdf = utils.get_decrypted_pdf(input_pdf_stream)
        intermediate_pdf = PdfFileWriter()
        intermediate_pdf.addPage(input_pdf.getPage(int(page_id)))

        intermediate_pdf_stream = BytesIO()
        intermediate_pdf.write(intermediate_pdf_stream)
        intermediate_pdf_stream.seek(0, 0)
        jpeg_stream = convert_pdf_to_jpeg(intermediate_pdf_stream, size)

        jpeg_preview_path = os.path.join(cache_path, preview_name + extension)
        with open(jpeg_preview_path, "wb") as jpeg_output_stream:
            buffer = jpeg_stream.read(1024)
            while buffer:
                jpeg_output_stream.write(buffer)
                buffer = jpeg_stream.read(1024)
예제 #3
0
    def get_page_number(self,
                        file_path: str,
                        preview_name: str,
                        cache_path: str,
                        mimetype: str = "") -> int:

        page_nb_file_path = cache_path + preview_name + "_page_nb"

        if not os.path.exists(page_nb_file_path):
            pdf_version_filepath = cache_path + preview_name + ".pdf"
            if not os.path.exists(pdf_version_filepath):
                self.build_pdf_preview(
                    file_path=file_path,
                    preview_name=preview_name,
                    cache_path=cache_path,
                    mimetype=mimetype,
                )

            with open(page_nb_file_path, "w") as page_nb_file_stream:
                page_nb_file_stream.seek(0, 0)
                with open(pdf_version_filepath, "rb") as pdf_stream:
                    pdf_reader = utils.get_decrypted_pdf(pdf_stream)
                    page_nb_file_stream.write(str(pdf_reader.numPages))

        with open(page_nb_file_path, "r") as page_nb_stream:
            page_nb = int(page_nb_stream.read())
            return page_nb
예제 #4
0
    def build_jpeg_preview(
        self,
        file_path: str,
        preview_name: str,
        cache_path: str,
        page_id: int,
        extension: str = ".jpg",
        size: utils.ImgDims = None,
        mimetype: str = "",
    ) -> None:
        """
        generate the pdf small preview
        """
        if not size:
            size = self.default_size

        with open(file_path, "rb") as pdf:
            # HACK - D.A. - 2017-08-11 Deactivate strict mode
            # This avoid crashes when PDF are not standard
            # See https://github.com/mstamy2/PyPDF2/issues/244
            input_pdf = utils.get_decrypted_pdf(pdf, strict=False)
            output_pdf = PdfFileWriter()
            output_pdf.addPage(input_pdf.getPage(int(page_id)))
            output_stream = BytesIO()
            output_pdf.write(output_stream)
            output_stream.seek(0, 0)
            result = convert_pdf_to_jpeg(output_stream, size)

            preview_path = "{path}{file_name}{extension}".format(
                file_name=preview_name, path=cache_path, extension=extension)
            with open(preview_path, "wb") as jpeg:
                buffer = result.read(1024)
                while buffer:
                    jpeg.write(buffer)
                    buffer = result.read(1024)
예제 #5
0
    def build_pdf_preview(
        self,
        file_path: str,
        preview_name: str,
        cache_path: str,
        extension: str = ".pdf",
        page_id: int = -1,
        mimetype: str = "",
    ) -> None:

        intermediate_pdf_filename = preview_name.split("-page")[0] + ".pdf"
        intermediate_pdf_file_path = os.path.join(cache_path,
                                                  intermediate_pdf_filename)

        if not os.path.exists(intermediate_pdf_file_path):
            if os.path.exists(intermediate_pdf_file_path + "_flag"):
                # Wait 2 seconds, then retry
                # Info - B.L - 2018/09/28 - Protection for concurent file access
                # If two person try to preview the same file one will override the file
                # while the other is reading it.
                time.sleep(2)
                return self.build_pdf_preview(
                    file_path=file_path,
                    preview_name=preview_name,
                    cache_path=cache_path,
                    extension=extension,
                    page_id=page_id,
                    mimetype=mimetype,
                )

            with open(file_path, "rb") as input_stream:
                input_extension = os.path.splitext(file_path)[1]
                # first step is to convert full document to full pdf
                self._convert_to_pdf(
                    file_content=input_stream,
                    input_extension=input_extension,
                    cache_path=cache_path,
                    output_filepath=intermediate_pdf_file_path,
                    mimetype=mimetype,
                )

        if page_id < 0:
            return  # in this case, the intermediate file is the requested one

        pdf_out = PdfFileWriter()
        with open(intermediate_pdf_file_path, "rb") as pdf_stream:
            # HACK - G.M - 2020-08-19 - Transform stream in a way pypdf2 can handle it
            # this should be removed with a future pdf builder.
            stream = BytesIO(b_(pdf_stream.read()))
            pdf_in = utils.get_decrypted_pdf(stream)
            output_file_path = os.path.join(
                cache_path, "{}{}".format(preview_name, extension))
            pdf_out.addPage(pdf_in.getPage(page_id))

        with open(output_file_path, "wb") as output_file:
            pdf_out.write(output_file)
예제 #6
0
 def get_page_number(
     self,
     file_path: str,
     preview_name: str,
     cache_path: str,
     mimetype: typing.Optional[str] = None,
 ) -> int:
     if not os.path.exists(cache_path + preview_name + "_page_nb"):
         with open(cache_path + preview_name + "_page_nb", "w") as count:
             count.seek(0, 0)
             with open(file_path, "rb") as doc:
                 inputpdf = utils.get_decrypted_pdf(doc)
                 num_page = inputpdf.numPages
                 count.write(str(num_page))
                 return int(num_page)
     else:
         with open(cache_path + preview_name + "_page_nb", "r") as count:
             count.seek(0, 0)
             return int(count.read())