def pdf_printer(self, pdf_input_path): args = ['pdf_printer', '-dNOPAUSE', '-sDEVICE=mswinpr2', pdf_input_path] encoding = locale.getpreferredencoding() args = [a.encode(encoding) for a in args] ghostscript.Ghostscript(*args) ghostscript.cleanup()
def pdf_to_jpg(pdf_input_path, jpeg_name): args = [ "pdf2jpeg", # actual value doesn't matter "-dNOPAUSE", "-sDEVICE=jpeg", "-r300", f'-sOutputFile={jpeg_name}-%03d.jpg', pdf_input_path ] encoding = locale.getpreferredencoding() args = [a.encode(encoding) for a in args] with ghostscript.Ghostscript(*args) as g: ghostscript.cleanup()
def pdf2jpeg(pdf_input_path, jpeg_output_path): ghostscript.cleanup() args = [ "pef2jpeg", # actual value doesn't matter "-dNOPAUSE", "-sDEVICE=jpeg", "-r144", "-sOutputFile=" + jpeg_output_path, pdf_input_path ] encoding = locale.getpreferredencoding() args = [a.encode(encoding) for a in args] ghostscript.Ghostscript(*args)
def convert_to_image(self): """ Convert the PDF to tiff image. :return: self (allows chaining of methods, since the methods do not return any additional info). """ if os.path.exists(self.pdf_file_spec): start_conversion = perf_counter() args = [ "pdf2tiff", "-dNOPAUSE", "-dSAFER", "-dBATCH", f"-dNumRenderingThreads={self.threads}", f"-q", f"-sDEVICE={self.IMAGE_FORMAT}", f"-r{self.dpi}", f"-sOutputFile={os.path.abspath(f'{self.output_file}-%00d.{self.extension}')}", f"{self.pdf_file_spec}", ] encoding = locale.getpreferredencoding() args = [a.encode(encoding) for a in args] # Convert the PDF to the TIFF (Need to clean up instance after execution, # to allow conversion of additional documents) try: gs_apis = ghostscript.Ghostscript(*args) gs_apis.exit() ghostscript.cleanup() except Exception as exc: print(f"\tERROR ({self.name}): Exception: {exc}") # Measure time to convert the PDF to image files. self.conversion_duration = perf_counter() - start_conversion print( f"{self.name}: Conversion took: {self.conversion_duration:0.4f} seconds." ) # Specified PDF was not found. else: print(f"{self.name}: Unable to find '{self.pdf_file_spec}'") return self
def pdf2png(self, pdf_input_path, png_output_path): args = [ "pdf2png", # actual value doesn't matter "-dNOPAUSE", "-sDEVICE=pngmono", "-r300", "-sOutputFile=" + png_output_path, pdf_input_path ] encoding = locale.getpreferredencoding() args = [a.encode(encoding) for a in args] try: ghostscript.Ghostscript(*args) ghostscript.cleanup() except: print("Erro", ghostscript.GhostscriptError)
def pdf2jpeg(self, pdf_input_path, jpeg_output_path): args = [ "gs", # actual value doesn't matter "--permit-file-read=" + os.path.dirname(pdf_input_path), "--permit-file-write=" + jpeg_output_path, "-dNOPAUSE", #"-dBATCH", "-sDEVICE=jpeg", "-dTextAlphaBits=4", "-r300", #"-sOutputFile=" + jpeg_output_path, "-o a%03d.jpg", pdf_input_path ] encoding = locale.getpreferredencoding() args = [a.encode(encoding) for a in args] with ghostscript.Ghostscript(*args) as g: ghostscript.cleanup()
def process_input_from_request(request): file_name = mime_type = "" try: image = request.FILES['image'] print("IMAGE ", image) # import pdb;pdb.set_trace() except MultiValueDictKeyError: try: file_url = request.POST['image'] if not file_url: raise MultiValueDictKeyError file_name = file_url.split('/')[-1] image = download_image(file_url) if is_uri(file_url) else False if not image: raise Exception except MultiValueDictKeyError: return HttpResponse({ 'status': 'FAIL', 'status_code': 204, 'message': "No Content", 'file_name': None, 'file_type': None, 'url': str(request.path) }), [], "", "", "" except Exception as err: return HttpResponse({ 'status': 'FAIL', 'status_code': 204, 'message': "Invalid URI", 'file_name': None, 'file_type': None, 'url': str(request.path) }), [], "", "", "" file_name = image.name if not file_name else file_name mime_type = magic.from_buffer(image.read(1024), mime=True) file_size = image.getbuffer().nbytes if isinstance( image, io.BytesIO) else image.size try: assert hasattr(image, "read") except AssertionError: return HttpResponse({ 'status': 'FAIL', 'status_code': 204, 'message': "File Not Readable", 'file_name': file_name, 'file_type': mime_type.split('/')[-1], 'url': str(request.path) }), [], "", "", "" if mime_type in ['application/pdf']: image.seek(0) if isinstance(image, io.BytesIO) else image.open() pages = [] max_confidence_index = '' int_, temp_local_filename = tempfile.mkstemp() f = os.fdopen(int_, 'wb') f.write(image.read()) # write the tmp file f.close() temp_local_dir = tempfile.mkdtemp() gs_args = [ "pdf2png", "-dSAFER -dBATCH -dNOPAUSE", "-r300", "-sDEVICE=pnggray", "-dTextAlphaBits=4 -sPAPERSIZE=a4", "-o", temp_local_dir + "page-%02d.png", temp_local_filename, ] encoding = locale.getpreferredencoding() gs_args = [gs_arg.encode(encoding) for gs_arg in gs_args] with ghostscript.Ghostscript(*gs_args) as g: ghostscript.cleanup() files = sorted(glob.glob(temp_local_dir + "*.png")) print(files) return None, files, file_name, mime_type, file_size elif mime_type == 'image/tiff': file = [] _, temp_local_filename = tempfile.mkstemp() image.seek(0) if isinstance(image, io.BytesIO) else image.open() pdf_tiff = wi(file=image, resolution=180) for i, page in enumerate(pdf_tiff.sequence): with wi(page) as page_image: page_image.alpha_channel = False img_buffer = np.asarray(bytearray( page_image.make_blob(format='jpeg')), dtype='uint8') bytesio = io.BytesIO(img_buffer) image = Image.open(bytesio) image.save(temp_local_filename + '_' + str(i) + '.jpeg') file.append(temp_local_filename + '_' + str(i) + '.jpeg') # import pdb;pdb.set_trace() return None, file, file_name, mime_type, file_size # return None, [temp_local_filename], file_name, mime_type, file_size elif mime_type == 'text/html': return HttpResponse( status='FAIL', status_code=204, message="Not a file object", file_name=None, file_type=None, url=str( request.path)).to_dict(), [], file_name, mime_type, file_size else: temp_local_filename = tempfile.mkstemp() fix_image_orientation_using_exif(image, temp_local_filename) return None, [temp_local_filename[1]], file_name, mime_type, file_size