Python Ghostscript Beispiele, ghostscript.Ghostscript Python Beispiele

Beispiel #1

0

Datei anzeigen

Datei: make_pdfs_look_scanned.py Projekt: apurvmishra99/pdf-to-scan

def convert(file_name):
    try:
        orig_file = Path(file_name).resolve()
        output_path = Path(f"{file_name.split('.')[0]}_.pdf").resolve()
        output_path_temp = Path(f"{file_name.split('.')[0]}__.pdf").resolve()
        with Image(filename=str(orig_file), resolution=150) as img:
            img.transform_colorspace('gray')
            img.linear_stretch(black_point=0.035, white_point=0.1)
            img.blur(radius=0, sigma=0.5)
            img.noise(noise_type='gaussian', attenuate=0.25)
            img.rotate(0.5)
            img.save(filename=str(output_path))

        cmd_gs = [
            'gs', '-dSAFER', '-dBATCH', '-dNOPAUSE', '-dNOCACHE',
            '-sDEVICE=pdfwrite',
            '-sColorConversionStrategy=LeaveColorUnchanged',
            '-dAutoFilterColorImages=true', '-dAutoFilterGrayImages=true',
            '-dDownsampleMonoImages=true', '-dDownsampleGrayImages=true',
            '-dDownsampleColorImages=true',
            f'-sOutputFile={str(output_path_temp)}',
            str(output_path)
        ]
        encoding = locale.getpreferredencoding()
        cmd_gs = [a.encode(encoding) for a in cmd_gs]
        ghostscript.Ghostscript(*cmd_gs)
        os.remove(str(output_path_temp))
        click.secho("File processed and saved", fg="green")
    except Exception as e:
        print(e)

Beispiel #2

0

Datei anzeigen

def convert_pdf2png(bucket, pdf_blob):

    # download the PDF file to a temp file
    print("Downloading PDF: {}".format(pdf_blob.name))
    _, pdf_file_name = tempfile.mkstemp()
    with open(pdf_file_name, "w+b") as pdf_file:
        pdf_blob.download_to_file(pdf_file)

    # convert the PDF to PNG
    print("Converting PDF to PNGs for {}".format(pdf_blob.name))
    pdf_prefix = pdf_blob.name.replace(".pdf", "")[:4]
    png_tempdir = tempfile.mkdtemp
    args = [
        "pdf2png", "-dSAFER", "-sDEVICE=pngalpha", "-r100",
        "-sOutputFile={}/%03d.png".format(png_tempdir), pdf_file_name
    ]
    encoding = locale.getpreferredencoding()
    args = [a.encode(encoding) for a in args]
    ghostscript.Ghostscript(*args)

    # save the PNGs on GCP
    print("Saving PNGs for {}".format(pdf_blob.name))
    for f in glob.glob(png_tempdir + "/*"):
        png_blob = bucket.blob(pdf_prefix + "-images/" + os.path.split(f)[1])
        png_blob.upload_from_filename(f, content_type="image/png")
        png_blob.make_public()
        os.remove(f)
    print("Ended converting PDF to PNGs for {}".format(pdf_blob.name))
    os.remove(pdf_file_name)


# merging both main and test_tutorial modules; the trigger function
# would be called in a function
# https://cloud.google.com/functions/docs/tutorials/ocr

Beispiel #3

0

Datei anzeigen

 def generate_thumbnail_from_pdf(self, document):
     """Generating a thumbnail based on document first file"""
     thumbnail_temporary, thumbnail_directory = self.get_thumbnail_path(
         document)
     # Creating directory for thumbnail if not exists
     if not os.path.exists(thumbnail_directory):
         os.makedirs(thumbnail_directory)
     # Storing temporary PDF file for converting
     tmp_pdf = open(thumbnail_temporary, 'w')
     tmp_pdf.write(document.get_file_obj().read())
     tmp_pdf.close()
     args = [
         'gs',
         '-q',  # Quiet
         '-dSAFER',
         '-sDEVICE=png16m',  # Type. PNG used
         '-r10',  # resolution of the thumbnail
         '-dBATCH',  # Quit GS after converting
         '-dNOPAUSE',  # Do not stop on pages
         '-dFirstPage=1',
         '-dLastPage=1',
         '-sOutputFile=%s.png' % thumbnail_temporary,  # Destination
         '%s' % thumbnail_temporary,  # Source
     ]
     ghostscript.Ghostscript(*args)
     # Deleting the temp PDF
     os.unlink(thumbnail_temporary)

Beispiel #4

0

Datei anzeigen

Datei: main.py Projekt: Vedantdavile/pdf2audiobook

def convert_pdf2png(bucket, pdf_blob):

    # download the PDF file to a temp file
    print("Downloading PDF: {}".format(pdf_blob.name))
    _, pdf_file_name = tempfile.mkstemp()
    with open(pdf_file_name, "w+b") as pdf_file:
        pdf_blob.download_to_file(pdf_file)

    # convert the PDF to PNGs
    print("Converting PDF to PNGs for {}".format(pdf_blob.name))
    pdf_prefix = pdf_blob.name.replace(".pdf", "")[:4]
    png_tempdir = tempfile.mkdtemp()
    args = [
        "pdf2png",
        "-dSAFER",
        "-sDEVICE=pngalpha",
        "-r100",
        "-sOutputFile={}/%03d.png".format(png_tempdir),
        pdf_file_name,
    ]
    encoding = locale.getpreferredencoding()
    args = [a.encode(encoding) for a in args]
    ghostscript.Ghostscript(*args)

    # save the PNGs on GCS
    print("Saving PNGs for {}".format(pdf_blob.name))
    for f in glob.glob(png_tempdir + "/*"):
        png_blob = bucket.blob(pdf_prefix + "-images/" + os.path.split(f)[1])
        png_blob.upload_from_filename(f, content_type="image/png")
        png_blob.make_public()
        os.remove(f)
    print("Ended converting PDF to PNGs for {}".format(pdf_blob.name))
    os.remove(pdf_file_name)

Beispiel #5

0

Datei anzeigen

Datei: process_book.py Projekt: philippe0206/pimmer

def pdf_to_images(filepath, output_folder):
    """Split a PDF file and make images of the individual pages

    :filepath: Path to PDF file
    :returns: tuple with output_folder and resulting file count

    """

    first_page = "1"

    args = [
        "-dNOPAUSE", "-dBATCH", "-dJPEGQ=60", "-r200",
        "-dFirstPage=" + first_page, "-sDEVICE=jpeg",
        "-sOutputFile=" + os.path.join(output_folder, "page_%03d.jpg"),
        filepath
    ]

    encoding = locale.getpreferredencoding()
    args = [a.encode(encoding) for a in args]

    try:
        logging.info(args)
        ghostscript.Ghostscript(*args)
        logging.info(f"PDF file split")
        return (output_folder, len(glob.glob(output_folder)))

    except Exception as ex:
        print(ex)

Beispiel #6

0

Datei anzeigen

Datei: views.py Projekt: lmu/lmu.localprintservice

def print_pdf_put_view(request):
    printer = request.params.get("printer") if request.params.get(
        "printer") else get_default_printer()
    if not request.body:
        return Response("Bad Request", status=400)
    with tempfile.TemporaryDirectory(suffix="lmu.localprintservice") as dir:
        with open(os.path.join(os.path.abspath(dir), "file_to_print.pdf"),
                  "w+b") as pdf:
            pdf.write(request.body)
        if sys.platform == "win32":
            files_to_print = glob.glob(
                os.path.join(os.path.abspath(dir),
                             "*.pdf"))[0].replace('\\\\', '\\')
            import ghostscript
            args = [
                "-dPrinted", "-dBATCH", "-dNOSAFER", "-dNOPAUSE", "-dNOPROMPT"
                "-q", "-dNumCopies#1", "-sDEVICE#mswinpr2",
                f'-sOutputFile#"%printer%{printer}"', f'"{files_to_print}"'
            ]
            encoding = locale.getpreferredencoding()
            args = [a.encode(encoding) for a in args]
            ghostscript.Ghostscript(*args)
        else:
            files_to_print = glob.glob(
                os.path.join(os.path.abspath(dir), "*.pdf"))
            import cups
            conn = cups.Connection()
            conn.printFiles(printer, files_to_print, "Test", options)
    request.response.status = 202
    request.response.headers.update({
        'Access-Control-Allow-Origin': '*',
    })
    return request.response

Beispiel #7

0

Datei anzeigen

def print_pdf(data=None):
    data['investigation'] = tuple(json.loads(data['investigation']))
    data['advice'] = tuple((json.loads(data['advice'])))

    context = {
        'data': data,
    }
    pdf = render_to_pdf('print/slip.html', context)

    temp1 = tempfile.mktemp('.pdf')
    f1 = open(temp1, 'ab')
    f1.write(pdf)
    f1.close()

    args = [
        "-dPrinted", "-dBATCH", "-dNOSAFER", "-dNOPAUSE", "-dNOPROMPT"
        "-q", "-dNumCopies#1", "-sDEVICE#mswinpr2",
        f'-sOutputFile#"%printer%{win32print.GetDefaultPrinter()}"',
        f'"{temp1}"'
    ]
    encoding = locale.getpreferredencoding()
    args = [a.encode(encoding) for a in args]
    ghostscript.Ghostscript(*args)

    return True

Beispiel #8

0

Datei anzeigen

def pdf2jpeg(pdf_input_path, jpeg_output_path):
    args = [
        "pef2jpeg", "-dNOPAUSE", "-sDEVICE=jpeg", "-r144",
        "-sOutputFile=" + jpeg_output_path, pdf_input_path
    ]
    encoding = locale.getpreferredencoding()
    args = [a.encode(encoding) for a in args]
    ghostscript.Ghostscript(*args)

Beispiel #9

0

Datei anzeigen

Datei: Remover.py Projekt: bartekb96/Tabels-Remover

 def pdf2jpeg(self, PDFDirectory, JPEGDirectory):
     args = [
         "pef2jpeg", "-dNOPAUSE", "-sDEVICE=jpeg", "-r144",
         "-sOutputFile=" + JPEGDirectory, PDFDirectory
     ]
     encoding = locale.getpreferredencoding()
     args = [a.encode(encoding) for a in args]
     ghostscript.Ghostscript(*args)

Beispiel #10

0

Datei anzeigen

Datei: conMan.py Projekt: singh-abhisheksingh/slider

def Slide_Extractor():

	files = glob.glob('./media/*')
	# print ("REMOVING FILES: ", files)
	for f in files:
		os.remove(f)

	slide_directory = os.listdir('./uploads')
	ppt_list = []
	for ppt in slide_directory:
		if ppt.endswith('.pptx') or ppt.endswith('.ppt'):
			ppt_list.append(ppt)
	# print (ppt_list)

	directory = os.getcwd()
	directory = directory + '/uploads'
	os.chdir(directory)

	for element in ppt_list:
		command = os.popen('unoconv -f pdf ' + element)
		command.close()

	merger = PdfFileMerger()

	pdf_directory = os.listdir()
	pdf_list = []
	for pdf in pdf_directory:
		if pdf.endswith('.pdf'):
			pdf_list.append(pdf)
			merger.append(pdf)
	# print (pdf_list)

	directory = directory + '/../media'
	os.chdir(directory)
	# print (os.getcwd())

	merger.write("combine.pdf")

	args = ["gs", "-q", "-o", "image%d.png", "-sDEVICE=pngalpha", "combine.pdf"]
	encoding = locale.getpreferredencoding()

	args = [a.encode(encoding) for a in args]
	ghostscript.Ghostscript(*args)

	image_directory = os.listdir()
	image_list = []
	for image in image_directory:
		if image.endswith('.png'):
			image_list.append(image)
	# print (image_list)
	# print (len(image_list))

	directory = directory + '/..'
	os.chdir(directory)
	# print (os.getcwd())

	return (len(image_list))

Beispiel #11

0

Datei anzeigen

Datei: pdf to jpej.py Projekt: beinganukul/Learn-pyhton2-hard-way-ex

def pdf2jpeg(pdf_input_path, jpeg_output_path):
    args = [
        "pdf2jpeg",  # actual value doesn't matter
        "-dNOPAUSE",
        "-sDEVICE=jpeg",
        "-r144",
        "-sOutputFile=" + jpeg_output_path,
        pdf_input_path
    ]
    ghostscript.Ghostscript(*args)

Beispiel #12

0

Datei anzeigen

Datei: helpers.py Projekt: gnomon-church/EODGeneratorPySide2

    def pdf_printer(self, pdf_input_path):
        args = ['pdf_printer',
                '-dNOPAUSE',
                '-sDEVICE=mswinpr2',
                pdf_input_path]

        encoding = locale.getpreferredencoding()
        args = [a.encode(encoding) for a in args]

        ghostscript.Ghostscript(*args)
        ghostscript.cleanup()

Beispiel #13

0

Datei anzeigen

def load_paper(target_path, target_pdf):
    print target_pdf
    file_name = target_pdf.split("/")[::-1][0].split(".")[0]
    target_directory = PROCESSED_IMG_PATH + "/" + file_name
    if not os.path.exists(target_directory): os.mkdir(target_directory)

    gs_args = [
        "-q", "-dNOPAUSE", "-dBATCH", "-dNOPROMPT", "-dNOSAFER",
        "-sDEVICE=png16m", "-sOutputFile=" + target_directory + "/%d.png",
        target_path
    ]
    ghostscript.Ghostscript(*gs_args)

    images = os.listdir(target_directory)
    images = [image for image in images if image.endswith(".png")]
    result_image_path = []

    for image in images:
        result_image_path.append(target_directory + '/' + image)
        img = cv2.imread(target_directory + '/' + image, 0)
        img_rgb = cv2.cvtColor(img, cv2.COLOR_GRAY2BGR)
        try:
            top_right = templateMatcher.detectSymbols(img, img_rgb,
                                                      TEMPLATE_TOP_RIGHT)
            top_left = templateMatcher.detectSymbols(img, img_rgb,
                                                     TEMPLATE_TOP_LEFT)
            bottom_right = templateMatcher.detectSymbols(
                img, img_rgb, TEMPLATE_BOTTOM_RIGHT)
            bottom_left = templateMatcher.detectSymbols(
                img, img_rgb, TEMPLATE_BOTTOM_LEFT)

            corner_symbols = __extract_border_marker(top_right, top_left,
                                                     bottom_right, bottom_left)

            for symbol in corner_symbols.keys():
                coord = corner_symbols[symbol]
                w, h = TEMPLATE[symbol].shape[::-1]
                cv2.rectangle(img_rgb, tuple(coord),
                              (coord[0] + w, coord[1] + h), (255, 0, 0), 1)
                templateMatcher.removeDetected(img, coord[1], coord[0], w, h)

            border_pos = __find_border(img, corner_symbols["top_right"],
                                       corner_symbols["top_left"],
                                       corner_symbols["bottom_right"],
                                       corner_symbols["bottom_left"])
            for pos in border_pos:
                coord = border_pos[pos]
                cv2.rectangle(img_rgb, tuple(coord),
                              (coord[0] + 1, coord[1] + 1), (0, 255, 0), 1)
        except Exception:
            continue
        cv2.imwrite(target_directory + '/' + image, img)
    return result_image_path

Beispiel #14

0

Datei anzeigen

def ai2jpegGs(pdf_input_path, jpeg_output_path):
    args = ["pdf2jpeg", # actual value doesn't matter
            "-dNOPAUSE",
            "-sDEVICE=jpeg",
            "-r144",
            "-sOutputFile=" + jpeg_output_path,
            pdf_input_path]

    encoding = locale.getpreferredencoding()
    args = [a.encode(encoding) for a in args]

    ghostscript.Ghostscript(*args)

Beispiel #15

0

Datei anzeigen

Datei: main.py Projekt: retip94/pdf-to-jpg

def pdf_to_jpg(pdf_input_path, jpeg_name):
    args = [
        "pdf2jpeg",  # actual value doesn't matter
        "-dNOPAUSE",
        "-sDEVICE=jpeg",
        "-r300",
        f'-sOutputFile={jpeg_name}-%03d.jpg',
        pdf_input_path
    ]
    encoding = locale.getpreferredencoding()
    args = [a.encode(encoding) for a in args]
    with ghostscript.Ghostscript(*args) as g:
        ghostscript.cleanup()

Beispiel #16

0

Datei anzeigen

Datei: test.py Projekt: m0t9/exam-reader

def pdf2jpeg(pdf_input_path, jpeg_output_path):

    gs_call = "-q -sDEVICE=png16m -o {} -r300 {}".format(
        jpeg_output_path, pdf_input_path
    )

    gs_call = (
        "-dNumRenderingThreads=8 -dBufferSpace=2000000000 -dBandBufferSpace=500000000"
        + gs_call
    )
    print(gs_call)
    gs_call = gs_call.encode().split()

    ghostscript.Ghostscript(*gs_call)

Beispiel #17

0

Datei anzeigen

Datei: tesseract_ocr.py Projekt: hilalarsa/document-tag-server

def change_format_and_ocr(pdf_input_path, filename):
    jpeg_output_path = filename+".jpeg"
    args = ["pdf2jpeg", # actual value doesn't matter
            "-dNOPAUSE",
            "-sDEVICE=jpeg",
            "-r144",
            "-sOutputFile=" + jpeg_output_path,
            pdf_input_path]

    encoding = locale.getpreferredencoding()
    args = [a.encode(encoding) for a in args]

    ghostscript.Ghostscript(*args)
    return image_to_text(jpeg_output_path)

Beispiel #18

0

Datei anzeigen

def pdf2jpeg(pdf_input_path, output_path):
    args = [
        "pdf2jpeg",  # actual value doesn't matter
        "-dNOPAUSE",
        "-sDEVICE=png16m",  # png 24 bit rgb color
        "-r200",  # input rendering 200 dpi
        "-dDownScaleFactor=1",  # make .png file 200 dpi as well
        "-sOutputFile=" + output_path,
        pdf_input_path
    ]

    # arguments have to be bytes, encode them
    encoding = locale.getpreferredencoding()
    args = [a.encode(encoding) for a in args]
    ghostscript.Ghostscript(*args)

Beispiel #19

0

Datei anzeigen

Datei: pdftoword.py Projekt: ehabterra/pdfToWord

def pdf2jpeg(pdf_input_path, jpeg_output_path):
    args = ["gs", # actual value doesn't matter
            "-dNOPAUSE",
            "-dBATCH", 
            "-sDEVICE=png16m",
            "-r144",
            "-dPDFFitPage",
            #  "-dFirstPage=" + page, 
            #  "-dLastPage=" + page,
            "-sOutputFile=" + jpeg_output_path,
            pdf_input_path]

    encoding = locale.getpreferredencoding()
    args = [a.encode(encoding) for a in args]

    ghostscript.Ghostscript(*args)

Beispiel #20

0

Datei anzeigen

    def convert_to_image(self):
        """
        Convert the PDF to tiff image.

        :return: self (allows chaining of methods, since the methods do not return any additional info).

        """
        if os.path.exists(self.pdf_file_spec):
            start_conversion = perf_counter()
            args = [
                "pdf2tiff",
                "-dNOPAUSE",
                "-dSAFER",
                "-dBATCH",
                f"-dNumRenderingThreads={self.threads}",
                f"-q",
                f"-sDEVICE={self.IMAGE_FORMAT}",
                f"-r{self.dpi}",
                f"-sOutputFile={os.path.abspath(f'{self.output_file}-%00d.{self.extension}')}",
                f"{self.pdf_file_spec}",
            ]
            encoding = locale.getpreferredencoding()
            args = [a.encode(encoding) for a in args]

            # Convert the PDF to the TIFF (Need to clean up instance after execution,
            # to allow conversion of additional documents)
            try:
                gs_apis = ghostscript.Ghostscript(*args)
                gs_apis.exit()
                ghostscript.cleanup()

            except Exception as exc:
                print(f"\tERROR ({self.name}): Exception: {exc}")

            # Measure time to convert the PDF to image files.
            self.conversion_duration = perf_counter() - start_conversion
            print(
                f"{self.name}: Conversion took: {self.conversion_duration:0.4f} seconds."
            )

        # Specified PDF was not found.
        else:
            print(f"{self.name}: Unable to find '{self.pdf_file_spec}'")

        return self

Beispiel #21

0

Datei anzeigen

Datei: utils.py Projekt: AdrienLauwers/SEP-oscareducation

def pdf2png(pdf_input_path, png_output_path):
    """    
    This function is used to transform PDF into PNG.

    :param pdf_input_path: the PDF file
    :param png_output_path: the PNG file
    :type pdf_input_path: FileObject
    :type png_output_path: FileObject
    :returns: the convertion of the PDF into PNG
    :rtype: FileObject
    """
    args = ["pdf2png", # actual value doesn't matter
            "-dNOPAUSE",
            "-sDEVICE=png",
            "-r144",
            "-sOutputFile=" + png_output_path,
            pdf_input_path]
    ghostscript.Ghostscript(*args)

Beispiel #22

0

Datei anzeigen

Datei: PyOCR.py Projekt: bishnubhatta/PyOCR

 def pdf2jpeg(self, pdf_list):
     import ghostscript
     import os
     for file in pdf_list:
         basename = file.split(".")[0]
         print basename
         print os.path.join(self.temp_dir, basename + "%03d.jpeg")
         args = [
             "pdf2jpeg",  # actual value doesn't matter
             "-dNOPAUSE",
             "-sDEVICE=jpeg",
             "-r144",
             "-sOutputFile=" +
             os.path.join(self.temp_dir, basename +
                          "%03d.jpeg"),  #%03.d will increment the file name
             os.path.join(self.dir_path, file)
         ]
         ghostscript.Ghostscript(*args)

Beispiel #23

0

Datei anzeigen

Datei: converter.py Projekt: bartkoopman/alfa

def main():
    '''This is the method'''

    for file in files_in_directory:  #for each file in list
        input_doc = directory_in_str + '/' + file  #set the input path
        print(input_doc)
        output_doc = directory_in_str + '/' + file + '.jpg'  #set the output path

        args = [
            "gs",  # actual value doesn't matter
            "-dNOPAUSE",
            "-sDEVICE=jpeg",
            "-r144",
            "-sOutputFile=" + output_doc,
            input_doc
        ]

        ghostscript.Ghostscript(*args)

Beispiel #24

0

Datei anzeigen

Datei: ExtractorTextoCompletoFrame.py Projekt: LindineuDuran/Python

    def pdf2png(self, pdf_input_path, png_output_path):
        args = [
            "pdf2png",  # actual value doesn't matter
            "-dNOPAUSE",
            "-sDEVICE=pngmono",
            "-r300",
            "-sOutputFile=" + png_output_path,
            pdf_input_path
        ]

        encoding = locale.getpreferredencoding()
        args = [a.encode(encoding) for a in args]

        try:
            ghostscript.Ghostscript(*args)
            ghostscript.cleanup()
        except:
            print("Erro", ghostscript.GhostscriptError)

Beispiel #25

0

Datei anzeigen

    def convert(self, pdf_path, png_path, resolution=300):
        if not self.installed():
            raise OSError(
                "Ghostscript is not installed. You can install it using the instructions"
                " here: https://camelot-py.readthedocs.io/en/master/user/install-deps.html"
            )

        import ghostscript

        gs_command = [
            "gs",
            "-q",
            "-sDEVICE=png16m",
            "-o",
            png_path,
            f"-r{resolution}",
            pdf_path,
        ]
        ghostscript.Ghostscript(*gs_command)

Beispiel #26

0

Datei anzeigen

Datei: PdfSlidesService.py Projekt: gaboth22/GlobalSiteLiveBackendService

    def start(self, source_path, output_path):
        if ".pdf" not in source_path:
            return

        if (os.path.exists(output_path)):
            shutil.rmtree(output_path)

        os.makedirs(output_path)

        args = [
            'pdf2jpeg', '-dNOPAUSE', '-dBATCH', '-dSAFER', '-sDEVICE=png16m',
            '-r50x50', '-sOutputFile=' + output_path + '/page-%03d.jpg',
            source_path
        ]

        encoding = locale.getpreferredencoding()
        args = [a.encode(encoding) for a in args]

        ghostscript.Ghostscript(*args)

Beispiel #27

0

Datei anzeigen

Datei: pdf.py Projekt: smartfile/preview-server

def _run_ghostscript(obj, device, outfile, pages=(1, 1)):
    # An empty file is apparently a valid file as far as ghostscript is
    # concerned. However, it produces an empty image file, which causes
    # errors downline. Detect an empty file and raise here.
    if not obj.src.size:
        raise Exception('Invalid file size 0')

    args = [
        b'-dNOPAUSE',
        b'-dBATCH',
        b'-dSAFER',
        b'-sDEVICE=%s' % bytes(device, 'utf8'),
    ]

    if pages != (0, 0):
        args.extend(
            [b'-dFirstPage=%i' % pages[0],
             b'-dLastPage=%i' % pages[1]])

    # Calculate suitable DPI...
    dpi = _calc_dpi(obj.width, obj.height)
    LOGGER.debug('Converting PDF to image with DPI of %ix%i', *dpi)

    args.extend([
        b'-r%ix%i' % dpi,
        b'-o',
        bytes(outfile, 'utf8'),
        bytes(obj.src.path, 'utf8'),
    ])

    LOGGER.debug('Ghostscript args: %s', args)

    # TODO: fix this lib. You cannot clean up the object with try / except if
    # __init__() raises.
    output = BytesIO()
    with ghostscript.Ghostscript(stdout=output, stderr=output, *args):
        pass

    # Checkout output for errors that require special handling.
    output = output.getvalue()
    if pages != (0, 0) and (b'FirstPage' in output or b'LastPage' in output):
        raise InvalidPageError(pages)

Beispiel #28

0

Datei anzeigen

    def pdf2jpeg(self, pdf_input_path, jpeg_output_path):
        args = [
            "gs",  # actual value doesn't matter
            "--permit-file-read=" + os.path.dirname(pdf_input_path),
            "--permit-file-write=" + jpeg_output_path,
            "-dNOPAUSE",
            #"-dBATCH",
            "-sDEVICE=jpeg",
            "-dTextAlphaBits=4",
            "-r300",
            #"-sOutputFile=" + jpeg_output_path,
            "-o a%03d.jpg",
            pdf_input_path
        ]

        encoding = locale.getpreferredencoding()
        args = [a.encode(encoding) for a in args]

        with ghostscript.Ghostscript(*args) as g:
            ghostscript.cleanup()

Beispiel #29

0

Datei anzeigen

    def resave_pdf(self):
        if 0:
            quality = {
                0: '/default',
                1: '/prepress',
                2: '/printer',
                3: '/ebook',
                4: '/screen'
            }
            args = ['gs', '-sDEVICE=pdfwrite',
                    '-dCompatibilityLevel=1.4',
                    '-dPDFSETTINGS={}'.format(quality[0]),
                    '-dNOPAUSE',
                    '-dQUIET',
                    '-dBATCH',
                    '-dColorAccuracy=2',
                    '-dProcessColorModel=/DeviceRGB',
                    '-sOutputFile={}'.format(self.abs_output_filename),
                    self.abs_tmp_output_filename]

            # '-sDefaultRGBProfile=sRGB_v4_ICC_preference.icc',
            # '-sOutputICCProfile=sRGB_v4_ICC_preference.icc',
            # '-sImageICCProfile=sRGB_v4_ICC_preference.icc',

            # Using python ghostscript module
            encoding = locale.getpreferredencoding()
            args = [a.encode(encoding) for a in args]
            ghostscript.Ghostscript(*args)

            # Calling ghoscript directly
            # subprocess.call(args)

            # Remove original file, called tmp.pdf
            if os.path.exists(self.abs_tmp_output_filename):
                os.remove(self.abs_tmp_output_filename)
        else:
            os.rename(self.abs_tmp_output_filename, self.abs_output_filename)

        self.message_on_header_widget("Created ({:.1f}MB)!".format(
            getsize(self.abs_output_filename) / 1000000.))
        self.message_on_detail_widget("Drag another folder to create a new one.")

Beispiel #30

0

Datei anzeigen

Datei: PdfParser.py Projekt: Krumpet/ProjectKdam

    def catalogue_to_txt_files(txt_filename_template, pdf_filename) -> None:
        """
        Parse the entire catalogue into text files (one for each page) using GhostScript:
        Note the %d which means each page becomes a different txt file
        :param:
        :return:
        """

        args = list(
            map(
                lambda s: s.encode(),  # args need to be encoded into bytes
                [
                    "gs",  # name of the command
                    "-sDEVICE=txtwrite",  # job type - writing to txt files
                    "-o" + txt_filename_template,  # output filename template
                    os.path.join(Paths.PDF_PATH,
                                 pdf_filename),  # input filename
                ]))

        # with suppress_stdout():
        ghostscript.Ghostscript(*args)