Ejemplo n.º 1
0
def run_rig_args(args, **kwargs):
    # Remove the two arguments that tell ghostscript to create a PDF/A
    # Does not remove the Postscript definition file - not necessary
    # to cause PDF/A creation failure
    new_args = [
        arg for arg in args
        if not arg.startswith('-dPDFA') and not arg.endswith('.ps')
    ]
    proc = run_polling_stderr(new_args, **kwargs)
    return proc
Ejemplo n.º 2
0
def run_append_stderr(*args, **kwargs):
    proc = run_polling_stderr(*args, **kwargs)
    proc.stderr += '\n' + elision_warning + '\n'
    return proc
Ejemplo n.º 3
0
def generate_pdfa(
    pdf_pages,
    output_file: os.PathLike,
    *,
    compression: str,
    pdf_version: str = '1.5',
    pdfa_part: str = '2',
    progressbar_class=None,
):
    # Ghostscript's compression is all or nothing. We can either force all images
    # to JPEG, force all to Flate/PNG, or let it decide how to encode the images.
    # In most case it's best to let it decide.
    compression_args = []
    if compression == 'jpeg':
        compression_args = [
            "-dAutoFilterColorImages=false",
            "-dColorImageFilter=/DCTEncode",
            "-dAutoFilterGrayImages=false",
            "-dGrayImageFilter=/DCTEncode",
        ]
    elif compression == 'lossless':
        compression_args = [
            "-dAutoFilterColorImages=false",
            "-dColorImageFilter=/FlateEncode",
            "-dAutoFilterGrayImages=false",
            "-dGrayImageFilter=/FlateEncode",
        ]
    else:
        compression_args = [
            "-dAutoFilterColorImages=true",
            "-dAutoFilterGrayImages=true",
        ]

    strategy = 'LeaveColorUnchanged'
    # Older versions of Ghostscript expect a leading slash in
    # sColorConversionStrategy, newer ones should not have it. See Ghostscript
    # git commit fe1c025d.
    strategy = ('/' + strategy) if version() < '9.19' else strategy

    if version() == '9.23':
        # 9.23: added JPEG passthrough as a new feature, but with a bug that
        # incorrectly formats some images. Fixed as of 9.24. So we disable this
        # feature for 9.23.
        # https://bugs.ghostscript.com/show_bug.cgi?id=699216
        compression_args.append('-dPassThroughJPEGImages=false')

    # nb no need to specify ProcessColorModel when ColorConversionStrategy
    # is set; see:
    # https://bugs.ghostscript.com/show_bug.cgi?id=699392
    args_gs = ([
        GS,
        "-dBATCH",
        "-dNOPAUSE",
        "-dSAFER",
        "-dCompatibilityLevel=" + str(pdf_version),
        "-sDEVICE=pdfwrite",
        "-dAutoRotatePages=/None",
        "-sColorConversionStrategy=" + strategy,
    ] + compression_args + [
        "-dJPEGQ=95",
        "-dPDFA=" + pdfa_part,
        "-dPDFACompatibilityPolicy=1",
        "-o",
        "-",
        "-sstdout=%stderr",
    ])
    args_gs.extend(fspath(s) for s in pdf_pages)  # Stringify Path objs

    try:
        with Path(output_file).open('wb') as output:
            p = run_polling_stderr(
                args_gs,
                stdout=output,
                stderr=PIPE,
                check=True,
                text=True,
                encoding='utf-8',
                errors='replace',
                callback=GhostscriptFollower(progressbar_class),
            )
    except CalledProcessError as e:
        # Ghostscript does not change return code when it fails to create
        # PDF/A - check PDF/A status elsewhere
        log.error(e.stderr)
        raise SubprocessOutputError(
            'Ghostscript PDF/A rendering failed') from e
    else:
        stderr = p.stderr
        # If there is an error we log the whole stderr, except for filtering
        # duplicates.
        if _gs_error_reported(stderr):
            last_part = None
            repcount = 0
            for part in stderr.split('****'):
                if part != last_part:
                    if repcount > 1:
                        log.error(
                            f"(previous error message repeated {repcount} times)"
                        )
                        repcount = 0
                    log.error(part)
                else:
                    repcount += 1
                last_part = part