예제 #1
0
    def pdf_printer(self, pdf_input_path):
        args = ['pdf_printer',
                '-dNOPAUSE',
                '-sDEVICE=mswinpr2',
                pdf_input_path]

        encoding = locale.getpreferredencoding()
        args = [a.encode(encoding) for a in args]

        ghostscript.Ghostscript(*args)
        ghostscript.cleanup()
예제 #2
0
파일: main.py 프로젝트: retip94/pdf-to-jpg
def pdf_to_jpg(pdf_input_path, jpeg_name):
    args = [
        "pdf2jpeg",  # actual value doesn't matter
        "-dNOPAUSE",
        "-sDEVICE=jpeg",
        "-r300",
        f'-sOutputFile={jpeg_name}-%03d.jpg',
        pdf_input_path
    ]
    encoding = locale.getpreferredencoding()
    args = [a.encode(encoding) for a in args]
    with ghostscript.Ghostscript(*args) as g:
        ghostscript.cleanup()
예제 #3
0
def pdf2jpeg(pdf_input_path, jpeg_output_path):
    ghostscript.cleanup()
    args = [
        "pef2jpeg",  # actual value doesn't matter
        "-dNOPAUSE",
        "-sDEVICE=jpeg",
        "-r144",
        "-sOutputFile=" + jpeg_output_path,
        pdf_input_path
    ]

    encoding = locale.getpreferredencoding()
    args = [a.encode(encoding) for a in args]
    ghostscript.Ghostscript(*args)
예제 #4
0
    def convert_to_image(self):
        """
        Convert the PDF to tiff image.

        :return: self (allows chaining of methods, since the methods do not return any additional info).

        """
        if os.path.exists(self.pdf_file_spec):
            start_conversion = perf_counter()
            args = [
                "pdf2tiff",
                "-dNOPAUSE",
                "-dSAFER",
                "-dBATCH",
                f"-dNumRenderingThreads={self.threads}",
                f"-q",
                f"-sDEVICE={self.IMAGE_FORMAT}",
                f"-r{self.dpi}",
                f"-sOutputFile={os.path.abspath(f'{self.output_file}-%00d.{self.extension}')}",
                f"{self.pdf_file_spec}",
            ]
            encoding = locale.getpreferredencoding()
            args = [a.encode(encoding) for a in args]

            # Convert the PDF to the TIFF (Need to clean up instance after execution,
            # to allow conversion of additional documents)
            try:
                gs_apis = ghostscript.Ghostscript(*args)
                gs_apis.exit()
                ghostscript.cleanup()

            except Exception as exc:
                print(f"\tERROR ({self.name}): Exception: {exc}")

            # Measure time to convert the PDF to image files.
            self.conversion_duration = perf_counter() - start_conversion
            print(
                f"{self.name}: Conversion took: {self.conversion_duration:0.4f} seconds."
            )

        # Specified PDF was not found.
        else:
            print(f"{self.name}: Unable to find '{self.pdf_file_spec}'")

        return self
    def pdf2png(self, pdf_input_path, png_output_path):
        args = [
            "pdf2png",  # actual value doesn't matter
            "-dNOPAUSE",
            "-sDEVICE=pngmono",
            "-r300",
            "-sOutputFile=" + png_output_path,
            pdf_input_path
        ]

        encoding = locale.getpreferredencoding()
        args = [a.encode(encoding) for a in args]

        try:
            ghostscript.Ghostscript(*args)
            ghostscript.cleanup()
        except:
            print("Erro", ghostscript.GhostscriptError)
예제 #6
0
    def pdf2jpeg(self, pdf_input_path, jpeg_output_path):
        args = [
            "gs",  # actual value doesn't matter
            "--permit-file-read=" + os.path.dirname(pdf_input_path),
            "--permit-file-write=" + jpeg_output_path,
            "-dNOPAUSE",
            #"-dBATCH",
            "-sDEVICE=jpeg",
            "-dTextAlphaBits=4",
            "-r300",
            #"-sOutputFile=" + jpeg_output_path,
            "-o a%03d.jpg",
            pdf_input_path
        ]

        encoding = locale.getpreferredencoding()
        args = [a.encode(encoding) for a in args]

        with ghostscript.Ghostscript(*args) as g:
            ghostscript.cleanup()
예제 #7
0
def process_input_from_request(request):
    file_name = mime_type = ""
    try:
        image = request.FILES['image']
        print("IMAGE ", image)
        # import pdb;pdb.set_trace()

    except MultiValueDictKeyError:
        try:
            file_url = request.POST['image']
            if not file_url:
                raise MultiValueDictKeyError

            file_name = file_url.split('/')[-1]
            image = download_image(file_url) if is_uri(file_url) else False
            if not image:
                raise Exception

        except MultiValueDictKeyError:
            return HttpResponse({
                'status': 'FAIL',
                'status_code': 204,
                'message': "No Content",
                'file_name': None,
                'file_type': None,
                'url': str(request.path)
            }), [], "", "", ""

        except Exception as err:
            return HttpResponse({
                'status': 'FAIL',
                'status_code': 204,
                'message': "Invalid URI",
                'file_name': None,
                'file_type': None,
                'url': str(request.path)
            }), [], "", "", ""

    file_name = image.name if not file_name else file_name
    mime_type = magic.from_buffer(image.read(1024), mime=True)
    file_size = image.getbuffer().nbytes if isinstance(
        image, io.BytesIO) else image.size

    try:
        assert hasattr(image, "read")
    except AssertionError:
        return HttpResponse({
            'status': 'FAIL',
            'status_code': 204,
            'message': "File Not Readable",
            'file_name': file_name,
            'file_type': mime_type.split('/')[-1],
            'url': str(request.path)
        }), [], "", "", ""

    if mime_type in ['application/pdf']:
        image.seek(0) if isinstance(image, io.BytesIO) else image.open()
        pages = []
        max_confidence_index = ''
        int_, temp_local_filename = tempfile.mkstemp()

        f = os.fdopen(int_, 'wb')
        f.write(image.read())  # write the tmp file
        f.close()

        temp_local_dir = tempfile.mkdtemp()
        gs_args = [
            "pdf2png",
            "-dSAFER -dBATCH -dNOPAUSE",
            "-r300",
            "-sDEVICE=pnggray",
            "-dTextAlphaBits=4 -sPAPERSIZE=a4",
            "-o",
            temp_local_dir + "page-%02d.png",
            temp_local_filename,
        ]

        encoding = locale.getpreferredencoding()
        gs_args = [gs_arg.encode(encoding) for gs_arg in gs_args]
        with ghostscript.Ghostscript(*gs_args) as g:
            ghostscript.cleanup()
        files = sorted(glob.glob(temp_local_dir + "*.png"))
        print(files)
        return None, files, file_name, mime_type, file_size

    elif mime_type == 'image/tiff':
        file = []
        _, temp_local_filename = tempfile.mkstemp()
        image.seek(0) if isinstance(image, io.BytesIO) else image.open()
        pdf_tiff = wi(file=image, resolution=180)

        for i, page in enumerate(pdf_tiff.sequence):
            with wi(page) as page_image:
                page_image.alpha_channel = False
                img_buffer = np.asarray(bytearray(
                    page_image.make_blob(format='jpeg')),
                                        dtype='uint8')
                bytesio = io.BytesIO(img_buffer)

                image = Image.open(bytesio)
                image.save(temp_local_filename + '_' + str(i) + '.jpeg')
                file.append(temp_local_filename + '_' + str(i) + '.jpeg')

        # import pdb;pdb.set_trace()
        return None, file, file_name, mime_type, file_size
        # return None, [temp_local_filename], file_name, mime_type, file_size

    elif mime_type == 'text/html':
        return HttpResponse(
            status='FAIL',
            status_code=204,
            message="Not a file object",
            file_name=None,
            file_type=None,
            url=str(
                request.path)).to_dict(), [], file_name, mime_type, file_size

    else:
        temp_local_filename = tempfile.mkstemp()
        fix_image_orientation_using_exif(image, temp_local_filename)
        return None, [temp_local_filename[1]], file_name, mime_type, file_size