Exemple #1
0
def rewrite_png(pike: Pdf, im_obj: Object,
                compdata) -> None:  # pragma: no cover
    # When a PNG is inserted into a PDF, we more or less copy the IDAT section from
    # the PDF and transfer the rest of the PNG headers to PDF image metadata.
    # One thing we have to do is tell the PDF reader whether a predictor was used
    # on the image before Flate encoding. (Typically one is.)
    # According to Leptonica source, PDF readers don't actually need us
    # to specify the correct predictor, they just need a value of either:
    #   1 - no predictor
    #   10-14 - there is a predictor
    # Leptonica's compdata->predictor only tells TRUE or FALSE
    # 10-14 means the actual predictor is specified in the data, so for any
    # number >= 10 the PDF reader will use whatever the PNG data specifies.
    # In practice Leptonica should use Paeth, 14, but 15 seems to be the
    # designated value for "optimal". So we will use 15.
    # See:
    #   - PDF RM 7.4.4.4 Table 10
    #   - https://github.com/DanBloomberg/leptonica/blob/master/src/pdfio2.c#L757
    predictor = 15 if compdata.predictor > 0 else 1
    dparms = Dictionary(Predictor=predictor)
    if predictor > 1:
        dparms.BitsPerComponent = compdata.bps  # Yes, this is redundant
        dparms.Colors = compdata.spp
        dparms.Columns = compdata.w

    im_obj.BitsPerComponent = compdata.bps
    im_obj.Width = compdata.w
    im_obj.Height = compdata.h

    log.debug(
        f"PNG {im_obj.objgen}: palette={compdata.ncolors} spp={compdata.spp} bps={compdata.bps}"
    )
    if compdata.ncolors > 0:
        # .ncolors is the number of colors in the palette, not the number of
        # colors used in a true color image. The palette string is always
        # given as RGB tuples even when the image is grayscale; see
        # https://github.com/DanBloomberg/leptonica/blob/master/src/colormap.c#L2067
        palette_pdf_string = compdata.get_palette_pdf_string()
        palette_data = pikepdf.Object.parse(palette_pdf_string)
        palette_stream = pikepdf.Stream(pike, bytes(palette_data))
        palette = [
            Name.Indexed, Name.DeviceRGB, compdata.ncolors - 1, palette_stream
        ]
        cs = palette
    else:
        # ncolors == 0 means we are using a colorspace without a palette
        if compdata.spp == 1:
            cs = Name.DeviceGray
        elif compdata.spp == 4:
            cs = Name.DeviceCMYK
        else:  # spp == 3
            cs = Name.DeviceRGB
    im_obj.ColorSpace = cs
    im_obj.write(compdata.read(), filter=Name.FlateDecode, decode_parms=dparms)
Exemple #2
0
def transcode_pngs(pike, images, image_name_fn, root, log, options):
    if options.optimize >= 2:
        png_quality = (
            max(10, options.png_quality - 10),
            min(100, options.png_quality + 10),
        )
        with concurrent.futures.ThreadPoolExecutor(
                max_workers=options.jobs) as executor:
            for xref in images:
                log.debug(image_name_fn(root, xref))
                executor.submit(
                    pngquant.quantize,
                    image_name_fn(root, xref),
                    png_name(root, xref),
                    png_quality[0],
                    png_quality[1],
                )

    for xref in images:
        im_obj = pike.get_object(xref, 0)
        try:
            compdata = leptonica.CompressedData.open(png_name(root, xref))
        except leptonica.LeptonicaError as e:
            # Most likely this means file not found, i.e. quantize did not
            # produce an improved version
            log.error(e)
            continue

        # If re-coded image is larger don't use it - we test here because
        # pngquant knows the size of the temporary output file but not the actual
        # object in the PDF
        if len(compdata) > int(im_obj.stream_dict.Length):
            log.debug(
                f"pngquant: pngquant did not improve over original image "
                f"{len(compdata)} > {int(im_obj.stream_dict.Length)}")
            continue

        # When a PNG is inserted into a PDF, we more or less copy the IDAT section from
        # the PDF and transfer the rest of the PNG headers to PDF image metadata.
        # One thing we have to do is tell the PDF reader whether a predictor was used
        # on the image before Flate encoding. (Typically one is.)
        # According to Leptonica source, PDF readers don't actually need us
        # to specify the correct predictor, they just need a value of either:
        #   1 - no predictor
        #   10-14 - there is a predictor
        # Leptonica's compdata->predictor only tells TRUE or FALSE
        # From there the PNG decoder can infer the rest from the file.
        # In practice the predictor should be Paeth, 14, so we'll use that.
        # See:
        #   - PDF RM 7.4.4.4 Table 10
        #   - https://github.com/DanBloomberg/leptonica/blob/master/src/pdfio2.c#L757
        predictor = 14 if compdata.predictor > 0 else 1
        dparms = Dictionary(Predictor=predictor)
        if predictor > 1:
            dparms.BitsPerComponent = compdata.bps  # Yes, this is redundant
            dparms.Colors = compdata.spp
            dparms.Columns = compdata.w

        im_obj.BitsPerComponent = compdata.bps
        im_obj.Width = compdata.w
        im_obj.Height = compdata.h

        if compdata.ncolors > 0:
            # .ncolors is the number of colors in the palette, not the number of
            # colors used in a true color image
            palette_pdf_string = compdata.get_palette_pdf_string()
            palette_data = pikepdf.Object.parse(palette_pdf_string)
            palette_stream = pikepdf.Stream(pike, bytes(palette_data))
            palette = [
                Name.Indexed,
                Name.DeviceRGB,
                compdata.ncolors - 1,
                palette_stream,
            ]
            cs = palette
        else:
            if compdata.spp == 1:
                # PDF interprets binary-1 as black in 1bpp, but PNG sets
                # black to 0 for 1bpp. Create a palette that informs the PDF
                # of the mapping - seems cleaner to go this way but pikepdf
                # needs to be patched to support it.
                # palette = [Name.Indexed, Name.DeviceGray, 1, b"\xff\x00"]
                # cs = palette
                cs = Name.DeviceGray
            elif compdata.spp == 3:
                cs = Name.DeviceRGB
            elif compdata.spp == 4:
                cs = Name.DeviceCMYK
        if compdata.bps == 1:
            im_obj.Decode = [
                1, 0
            ]  # Bit of a kludge but this inverts photometric too
        im_obj.ColorSpace = cs
        im_obj.write(compdata.read(),
                     filter=Name.FlateDecode,
                     decode_parms=dparms)
Exemple #3
0
def transcode_pngs(pike, images, image_name_fn, root, log, options):
    if options.optimize >= 2:
        png_quality = (
            max(10, options.png_quality - 10),
            min(100, options.png_quality + 10),
        )
        with concurrent.futures.ThreadPoolExecutor(
            max_workers=options.jobs
        ) as executor:
            for xref in images:
                log.debug(image_name_fn(root, xref))
                executor.submit(
                    pngquant.quantize,
                    image_name_fn(root, xref),
                    png_name(root, xref),
                    png_quality[0],
                    png_quality[1],
                )

    for xref in images:
        im_obj = pike.get_object(xref, 0)
        try:
            compdata = leptonica.CompressedData.open(png_name(root, xref))
        except leptonica.LeptonicaError as e:
            # Most likely this means file not found, i.e. quantize did not
            # produce an improved version
            log.error(e)
            continue

        # If re-coded image is larger don't use it - we test here because
        # pngquant knows the size of the temporary output file but not the actual
        # object in the PDF
        if len(compdata) > int(im_obj.stream_dict.Length):
            log.debug(
                f"pngquant: pngquant did not improve over original image "
                f"{len(compdata)} > {int(im_obj.stream_dict.Length)}"
            )
            continue

        # When a PNG is inserted into a PDF, we more or less copy the IDAT section from
        # the PDF and transfer the rest of the PNG headers to PDF image metadata.
        # One thing we have to do is tell the PDF reader whether a predictor was used
        # on the image before Flate encoding. (Typically one is.)
        # According to Leptonica source, PDF readers don't actually need us
        # to specify the correct predictor, they just need a value of either:
        #   1 - no predictor
        #   10-14 - there is a predictor
        # Leptonica's compdata->predictor only tells TRUE or FALSE
        # From there the PNG decoder can infer the rest from the file.
        # In practice the predictor should be Paeth, 14, so we'll use that.
        # See:
        #   - PDF RM 7.4.4.4 Table 10
        #   - https://github.com/DanBloomberg/leptonica/blob/master/src/pdfio2.c#L757
        predictor = 14 if compdata.predictor > 0 else 1
        dparms = Dictionary(Predictor=predictor)
        if predictor > 1:
            dparms.BitsPerComponent = compdata.bps  # Yes, this is redundant
            dparms.Colors = compdata.spp
            dparms.Columns = compdata.w

        im_obj.BitsPerComponent = compdata.bps
        im_obj.Width = compdata.w
        im_obj.Height = compdata.h

        if compdata.ncolors > 0:
            # .ncolors is the number of colors in the palette, not the number of
            # colors used in a true color image
            palette_pdf_string = compdata.get_palette_pdf_string()
            palette_data = pikepdf.Object.parse(palette_pdf_string)
            palette_stream = pikepdf.Stream(pike, bytes(palette_data))
            palette = [
                Name.Indexed,
                Name.DeviceRGB,
                compdata.ncolors - 1,
                palette_stream,
            ]
            cs = palette
        else:
            if compdata.spp == 1:
                # PDF interprets binary-1 as black in 1bpp, but PNG sets
                # black to 0 for 1bpp. Create a palette that informs the PDF
                # of the mapping - seems cleaner to go this way but pikepdf
                # needs to be patched to support it.
                # palette = [Name.Indexed, Name.DeviceGray, 1, b"\xff\x00"]
                # cs = palette
                cs = Name.DeviceGray
            elif compdata.spp == 3:
                cs = Name.DeviceRGB
            elif compdata.spp == 4:
                cs = Name.DeviceCMYK
        if compdata.bps == 1:
            im_obj.Decode = [1, 0]  # Bit of a kludge but this inverts photometric too
        im_obj.ColorSpace = cs
        im_obj.write(compdata.read(), filter=Name.FlateDecode, decode_parms=dparms)