def test_ocr_image_Whitelist(self): test_text = "ABABABA" img = create_img(test_text) tr = tesserwrap.Tesseract() tr.set_variable("tessedit_char_whitelist", "A") out_text = tr.ocr_image(img).strip() assert out_text != test_text, "%r == %r" % (out_text, test_text)
def test_mean_confidence(self): tr = tesserwrap.Tesseract() img = create_img("Hello World") tr.set_image(img) tr.get_text() # run recognizer to get all data set ok_(tr.get_mean_confidence() >= 0, "Confidence should be positve integer")
def test_set_rectangle(self): test_text = "A BBB" img = create_img("A BBB CCC") tr = tesserwrap.Tesseract() tr.set_image(img) tr.set_rectangle(0, 0, 100, 40) out_text = tr.get_text().decode().strip() eq_(out_text, test_text, "%s is not %s" % (out_text, test_text))
def test_get_textlines(self): tr = tesserwrap.Tesseract() test_text = 'This is a line' img = create_img(test_text) tr.set_image(img) tr.get_text() res = tr.get_textlines() result_text = ''.join([l.value.strip() for l in res]) eq_(result_text, test_text, "%s is not %s" % (result_text, test_text))
def test_get_symbols(self): tr = tesserwrap.Tesseract() test_text = 'ABCD' img = create_img(test_text) tr.set_image(img) tr.get_text() res = tr.get_symbols() result_text = ''.join([l.value for l in res]) eq_(result_text, test_text, "%s is not %s" % (result_text, test_text))
def test_get_rectangle(self): test_text = "A BBB" img = create_img("A BBB CCC") tr = tesserwrap.Tesseract() tr.set_image(img) tr.get_text() # run recognizer to get all data set (l, t), (w, h) = tr.get_rectangle() eq_(l, 0, "Left attribute incorrect") eq_(t, 0, "Top attribute incorrect") eq_(w, 710, "Width attribute incorrect") eq_(h, 40, "Height attribute incorrect")
def test_get_words(self): tr = tesserwrap.Tesseract() img = create_img() tr.set_image(img) tr.get_text() res = tr.get_words() eq_(len(res), 3, "Each word should have one item in result") item = res[0] eq_(item.value, 'Quick', "%s is not %s" % (item.value, 'Quick')) eq_(len(item.box), 4, 'Box does not contain 4 items')
def test_word_confidences(self): tr = tesserwrap.Tesseract() img = create_img() tr.set_image(img) tr.get_text() # run recognizer to get all data set res = tr.get_all_word_confidences() eq_(len(res), 3, "Each word should have one item in result") eq_(tr.get_mean_confidence(), sum(res) / len(res), "Mean confidence incorrect") # Empty image img = create_img("") tr.set_image(img) res2 = tr.get_all_word_confidences() eq_([], res2, "Should be empty result and no crash")
import sys import io import requests import PIL.Image import tesserwrap #: https://github.com/gregjurman/tesserwrap tesseract = tesserwrap.Tesseract() tesseract.set_variable("tessedit_char_whitelist", "abcdefghijklmnopqrstuvwxyz") def distinguish_captcha(image_url, show_origin_image=True): #: preprocess image_bytes = requests.get(image_url).content origin_image = PIL.Image.open(io.BytesIO(image_bytes)) image = origin_image.point(lambda p: p * 1.5)\ .point(lambda p: 255 if p > 200 else 0)\ .convert("1") #: distinguish the text text = tesseract.ocr_image(image) #: show the origin image #if show_origin_image: #origin_image.show() text.replace(' ', '') return text.strip() def decode_captcha(origin_image): image = origin_image.point(lambda p: p * 1.5)\ .point(lambda p: 255 if p > 200 else 0)\
def test_clear(self): tr = tesserwrap.Tesseract() img = create_img("A BBB CCC") tr.set_image(img) tr.clear()
def test_bad_handle(self): tr = tesserwrap.Tesseract() del tr.handle del tr
def test_ocr_image_RGB(self): test_text = "ABABABA" img = create_img(test_text, "RGB") tr = tesserwrap.Tesseract() out_text = tr.ocr_image(img).strip() eq_(out_text, test_text, "%s is not %s" % (out_text, test_text))
from flask import jsonify, Flask, request from PIL import Image import tesserwrap import traceback import sys import io # Init the tesseract tr = tesserwrap.Tesseract(lang="por") # Init web Server app = Flask(__name__) app.config['JSON_AS_ASCII'] = False @app.route('/recognize', methods=["POST"]) def image(): try: imagefile = request.files.get('imagefile', '') img = Image.open(imagefile.stream) text = tr.ocr_image(img) return jsonify(result="ok", data=text) except Exception as err: traceback.print_exc() print(err) return jsonify(result="sorry :/")