def run_rig_args(args, **kwargs): # Remove the two arguments that tell ghostscript to create a PDF/A # Does not remove the Postscript definition file - not necessary # to cause PDF/A creation failure new_args = [ arg for arg in args if not arg.startswith('-dPDFA') and not arg.endswith('.ps') ] proc = run_polling_stderr(new_args, **kwargs) return proc
def run_append_stderr(*args, **kwargs): proc = run_polling_stderr(*args, **kwargs) proc.stderr += '\n' + elision_warning + '\n' return proc
def generate_pdfa( pdf_pages, output_file: os.PathLike, *, compression: str, pdf_version: str = '1.5', pdfa_part: str = '2', progressbar_class=None, ): # Ghostscript's compression is all or nothing. We can either force all images # to JPEG, force all to Flate/PNG, or let it decide how to encode the images. # In most case it's best to let it decide. compression_args = [] if compression == 'jpeg': compression_args = [ "-dAutoFilterColorImages=false", "-dColorImageFilter=/DCTEncode", "-dAutoFilterGrayImages=false", "-dGrayImageFilter=/DCTEncode", ] elif compression == 'lossless': compression_args = [ "-dAutoFilterColorImages=false", "-dColorImageFilter=/FlateEncode", "-dAutoFilterGrayImages=false", "-dGrayImageFilter=/FlateEncode", ] else: compression_args = [ "-dAutoFilterColorImages=true", "-dAutoFilterGrayImages=true", ] strategy = 'LeaveColorUnchanged' # Older versions of Ghostscript expect a leading slash in # sColorConversionStrategy, newer ones should not have it. See Ghostscript # git commit fe1c025d. strategy = ('/' + strategy) if version() < '9.19' else strategy if version() == '9.23': # 9.23: added JPEG passthrough as a new feature, but with a bug that # incorrectly formats some images. Fixed as of 9.24. So we disable this # feature for 9.23. # https://bugs.ghostscript.com/show_bug.cgi?id=699216 compression_args.append('-dPassThroughJPEGImages=false') # nb no need to specify ProcessColorModel when ColorConversionStrategy # is set; see: # https://bugs.ghostscript.com/show_bug.cgi?id=699392 args_gs = ([ GS, "-dBATCH", "-dNOPAUSE", "-dSAFER", "-dCompatibilityLevel=" + str(pdf_version), "-sDEVICE=pdfwrite", "-dAutoRotatePages=/None", "-sColorConversionStrategy=" + strategy, ] + compression_args + [ "-dJPEGQ=95", "-dPDFA=" + pdfa_part, "-dPDFACompatibilityPolicy=1", "-o", "-", "-sstdout=%stderr", ]) args_gs.extend(fspath(s) for s in pdf_pages) # Stringify Path objs try: with Path(output_file).open('wb') as output: p = run_polling_stderr( args_gs, stdout=output, stderr=PIPE, check=True, text=True, encoding='utf-8', errors='replace', callback=GhostscriptFollower(progressbar_class), ) except CalledProcessError as e: # Ghostscript does not change return code when it fails to create # PDF/A - check PDF/A status elsewhere log.error(e.stderr) raise SubprocessOutputError( 'Ghostscript PDF/A rendering failed') from e else: stderr = p.stderr # If there is an error we log the whole stderr, except for filtering # duplicates. if _gs_error_reported(stderr): last_part = None repcount = 0 for part in stderr.split('****'): if part != last_part: if repcount > 1: log.error( f"(previous error message repeated {repcount} times)" ) repcount = 0 log.error(part) else: repcount += 1 last_part = part