def test_remove_background(spoof_tesseract_noop): from PIL import Image # Ensure the input image does not contain pure white/black im = Image.open(_infile('congress.jpg')) assert im.getextrema() != ((0, 255), (0, 255), (0, 255)) output_pdf = check_ocrmypdf( 'congress.jpg', 'test_remove_bg.pdf', '--remove-background', '--image-dpi', '150', env=spoof_tesseract_noop) from ocrmypdf.ghostscript import rasterize_pdf import logging log = logging.getLogger() output_png = _outfile('remove_bg.png') rasterize_pdf( output_pdf, output_png, xres=100, yres=100, raster_device='png16m', log=log) # The output image should contain pure white and black im = Image.open(output_png) assert im.getextrema() == ((0, 255), (0, 255), (0, 255))
def test_deskew(spoof_tesseract_noop): # Run with deskew deskewed_pdf = check_ocrmypdf( 'skew.pdf', 'test_deskew.pdf', '-d', '-v', '1', env=spoof_tesseract_noop) # Now render as an image again and use Leptonica to find the skew angle # to confirm that it was deskewed from ocrmypdf.ghostscript import rasterize_pdf import logging log = logging.getLogger() deskewed_png = _outfile('deskewed.png') rasterize_pdf( deskewed_pdf, deskewed_png, xres=150, yres=150, raster_device='pngmono', log=log) from ocrmypdf.leptonica import Pix pix = Pix.read(deskewed_png) skew_angle, skew_confidence = pix.find_skew() print(skew_angle) assert -0.5 < skew_angle < 0.5, "Deskewing failed"
def test_remove_background(spoof_tesseract_noop): from PIL import Image # Ensure the input image does not contain pure white/black im = Image.open(_infile('congress.jpg')) assert im.getextrema() != ((0, 255), (0, 255), (0, 255)) output_pdf = check_ocrmypdf('congress.jpg', 'test_remove_bg.pdf', '--remove-background', '--image-dpi', '150', env=spoof_tesseract_noop) from ocrmypdf.ghostscript import rasterize_pdf import logging log = logging.getLogger() output_png = _outfile('remove_bg.png') rasterize_pdf(output_pdf, output_png, xres=100, yres=100, raster_device='png16m', log=log) # The output image should contain pure white and black im = Image.open(output_png) assert im.getextrema() == ((0, 255), (0, 255), (0, 255))
def test_deskew(spoof_tesseract_noop): # Run with deskew deskewed_pdf = check_ocrmypdf('skew.pdf', 'test_deskew.pdf', '-d', '-v', '1', env=spoof_tesseract_noop) # Now render as an image again and use Leptonica to find the skew angle # to confirm that it was deskewed from ocrmypdf.ghostscript import rasterize_pdf import logging log = logging.getLogger() deskewed_png = _outfile('deskewed.png') rasterize_pdf(deskewed_pdf, deskewed_png, xres=150, yres=150, raster_device='pngmono', log=log) from ocrmypdf.leptonica import Pix pix = Pix.read(deskewed_png) skew_angle, skew_confidence = pix.find_skew() print(skew_angle) assert -0.5 < skew_angle < 0.5, "Deskewing failed"
def test_deskew(): # Run with deskew deskewed_pdf = check_ocrmypdf('skew.pdf', 'test_deskew.pdf', '-d') # Now render as an image again and use Leptonica to find the skew angle # to confirm that it was deskewed from ocrmypdf.ghostscript import rasterize_pdf import logging log = logging.getLogger() deskewed_png = _make_output('deskewed.png') rasterize_pdf( deskewed_pdf, deskewed_png, xres=150, yres=150, raster_device='pngmono', log=log) from ocrmypdf.leptonica import pixRead, pixDestroy, pixFindSkew pix = pixRead(deskewed_png) skew_angle, skew_confidence = pixFindSkew(pix) pix = pixDestroy(pix) print(skew_angle) assert -0.5 < skew_angle < 0.5, "Deskewing failed"
def test_deskew(): # Run with deskew deskewed_pdf = check_ocrmypdf('skew.pdf', 'test_deskew.pdf', '-d') # Now render as an image again and use Leptonica to find the skew angle # to confirm that it was deskewed from ocrmypdf.ghostscript import rasterize_pdf import logging log = logging.getLogger() deskewed_png = _make_output('deskewed.png') rasterize_pdf(deskewed_pdf, deskewed_png, xres=150, yres=150, raster_device='pngmono', log=log) from ocrmypdf.leptonica import pixRead, pixDestroy, pixFindSkew pix = pixRead(deskewed_png) skew_angle, skew_confidence = pixFindSkew(pix) pix = pixDestroy(pix) print(skew_angle) assert -0.5 < skew_angle < 0.5, "Deskewing failed"
def rasterize(pdf, pageno, png): if os.path.exists(png): print(png) return ghostscript.rasterize_pdf( pdf, png, xres=100, yres=100, raster_device='pngmono', log=gslog, pageno=pageno)