from PIL import Image
import pytest
import img2pdf
import pikepdf

from ocrmypdf import leptonica
from ocrmypdf.pdfinfo import PdfInfo
from ocrmypdf.exec import ghostscript
from ocrmypdf.helpers import fspath

# pytest.helpers is dynamic
# pylint: disable=no-member
# pylint: disable=w0612

pytestmark = pytest.mark.skipif(
    leptonica.get_leptonica_version() < 'leptonica-1.72',
    reason="Leptonica is too old, correlation doesn't work")

check_ocrmypdf = pytest.helpers.check_ocrmypdf
run_ocrmypdf = pytest.helpers.run_ocrmypdf

RENDERERS = ['hocr', 'sandwich']


def check_monochrome_correlation(outdir, reference_pdf, reference_pageno,
                                 test_pdf, test_pageno):
    gslog = logging.getLogger()

    reference_png = outdir / '{}.ref{:04d}.png'.format(reference_pdf.name,
                                                       reference_pageno)
    test_png = outdir / '{}.test{:04d}.png'.format(test_pdf.name, test_pageno)
Example #2
0
def test_pil_conversion(crom_pix):
    pix, im = crom_pix

    # Check for pixel perfect
    assert ImageChops.difference(pix.topil(), im).getbbox() is None


def test_pix_otsu(crom_pix):
    pix, _ = crom_pix
    im1bpp = pix.otsu_adaptive_threshold()
    assert im1bpp.mode == '1'


@pytest.mark.skipif(
    lept.get_leptonica_version() < 'leptonica-1.76',
    reason="needs new leptonica for API change",
)
def test_crop(resources):
    pix = lept.Pix.open(resources / 'linn.png')
    foreground = pix.crop_to_foreground()
    assert foreground.width < pix.width


def test_clean_bg(resources):
    pix = lept.Pix.open(resources / 'congress.jpg')
    imbg = pix.clean_background_to_white()


def test_pickle(crom_pix):
    pix, _ = crom_pix
Example #3
0
def test_pil_conversion(crom_pix):
    pix, im = crom_pix

    # Check for pixel perfect
    assert ImageChops.difference(pix.topil(), im).getbbox() is None


def test_pix_otsu(crom_pix):
    pix, _ = crom_pix
    im1bpp = pix.otsu_adaptive_threshold()
    assert im1bpp.mode == '1'


@pytest.mark.skipif(
    lp.get_leptonica_version() < 'leptonica-1.76',
    reason="needs new leptonica for API change",
)
def test_crop(resources):
    pix = lp.Pix.open(resources / 'linn.png')
    foreground = pix.crop_to_foreground()
    assert foreground.width < pix.width


def test_clean_bg(resources):
    pix = lp.Pix.open(resources / 'congress.jpg')
    imbg = pix.clean_background_to_white()


def test_pickle(crom_pix):
    pix, _ = crom_pix
Example #4
0
import img2pdf
import pytest
from PIL import Image

import pikepdf
from ocrmypdf import leptonica
from ocrmypdf.exec import ghostscript, tesseract
from ocrmypdf.pdfinfo import PdfInfo

# pytest.helpers is dynamic
# pylint: disable=no-member
# pylint: disable=w0612

pytestmark = pytest.mark.skipif(
    leptonica.get_leptonica_version() < 'leptonica-1.72',
    reason="Leptonica is too old, correlation doesn't work",
)

check_ocrmypdf = pytest.helpers.check_ocrmypdf
run_ocrmypdf = pytest.helpers.run_ocrmypdf


RENDERERS = ['hocr', 'sandwich']


def check_monochrome_correlation(
    outdir, reference_pdf, reference_pageno, test_pdf, test_pageno
):
    gslog = logging.getLogger()