def test_ocr_image_Whitelist(self):
     test_text = "ABABABA"
     img = create_img(test_text)
     tr = tesserwrap.Tesseract()
     tr.set_variable("tessedit_char_whitelist", "A")
     out_text = tr.ocr_image(img).strip()
     assert out_text != test_text, "%r == %r" % (out_text, test_text)
 def test_mean_confidence(self):
     tr = tesserwrap.Tesseract()
     img = create_img("Hello World")
     tr.set_image(img)
     tr.get_text()  # run recognizer to get all data set
     ok_(tr.get_mean_confidence() >= 0,
         "Confidence should be positve integer")
 def test_set_rectangle(self):
     test_text = "A BBB"
     img = create_img("A BBB  CCC")
     tr = tesserwrap.Tesseract()
     tr.set_image(img)
     tr.set_rectangle(0, 0, 100, 40)
     out_text = tr.get_text().decode().strip()
     eq_(out_text, test_text, "%s is not %s" % (out_text, test_text))
    def test_get_textlines(self):
        tr = tesserwrap.Tesseract()
        test_text = 'This is a line'
        img = create_img(test_text)
        tr.set_image(img)
        tr.get_text()

        res = tr.get_textlines()
        result_text = ''.join([l.value.strip() for l in res])
        eq_(result_text, test_text, "%s is not %s" % (result_text, test_text))
    def test_get_symbols(self):
        tr = tesserwrap.Tesseract()
        test_text = 'ABCD'
        img = create_img(test_text)
        tr.set_image(img)
        tr.get_text()

        res = tr.get_symbols()
        result_text = ''.join([l.value for l in res])
        eq_(result_text, test_text, "%s is not %s" % (result_text, test_text))
 def test_get_rectangle(self):
     test_text = "A BBB"
     img = create_img("A BBB  CCC")
     tr = tesserwrap.Tesseract()
     tr.set_image(img)
     tr.get_text()  # run recognizer to get all data set
     (l, t), (w, h) = tr.get_rectangle()
     eq_(l, 0, "Left attribute incorrect")
     eq_(t, 0, "Top attribute incorrect")
     eq_(w, 710, "Width attribute incorrect")
     eq_(h, 40, "Height attribute incorrect")
    def test_get_words(self):
        tr = tesserwrap.Tesseract()
        img = create_img()
        tr.set_image(img)
        tr.get_text()

        res = tr.get_words()
        eq_(len(res), 3, "Each word should have one item in result")
        item = res[0]
        eq_(item.value, 'Quick', "%s is not %s" % (item.value, 'Quick'))
        eq_(len(item.box), 4, 'Box does not contain 4 items')
 def test_word_confidences(self):
     tr = tesserwrap.Tesseract()
     img = create_img()
     tr.set_image(img)
     tr.get_text()  # run recognizer to get all data set
     res = tr.get_all_word_confidences()
     eq_(len(res), 3, "Each word should have one item in result")
     eq_(tr.get_mean_confidence(),
         sum(res) / len(res), "Mean confidence incorrect")
     # Empty image
     img = create_img("")
     tr.set_image(img)
     res2 = tr.get_all_word_confidences()
     eq_([], res2, "Should be empty result and no crash")
Exemple #9
0
import sys
import io

import requests
import PIL.Image
import tesserwrap

#: https://github.com/gregjurman/tesserwrap
tesseract = tesserwrap.Tesseract()
tesseract.set_variable("tessedit_char_whitelist", "abcdefghijklmnopqrstuvwxyz")


def distinguish_captcha(image_url, show_origin_image=True):
    #: preprocess
    image_bytes = requests.get(image_url).content
    origin_image = PIL.Image.open(io.BytesIO(image_bytes))
    image = origin_image.point(lambda p: p * 1.5)\
        .point(lambda p: 255 if p > 200 else 0)\
        .convert("1")
    #: distinguish the text
    text = tesseract.ocr_image(image)
    #: show the origin image
    #if show_origin_image:
    #origin_image.show()
    text.replace(' ', '')
    return text.strip()


def decode_captcha(origin_image):
    image = origin_image.point(lambda p: p * 1.5)\
        .point(lambda p: 255 if p > 200 else 0)\
Exemple #10
0
 def test_clear(self):
     tr = tesserwrap.Tesseract()
     img = create_img("A BBB  CCC")
     tr.set_image(img)
     tr.clear()
Exemple #11
0
 def test_bad_handle(self):
     tr = tesserwrap.Tesseract()
     del tr.handle
     del tr
Exemple #12
0
 def test_ocr_image_RGB(self):
     test_text = "ABABABA"
     img = create_img(test_text, "RGB")
     tr = tesserwrap.Tesseract()
     out_text = tr.ocr_image(img).strip()
     eq_(out_text, test_text, "%s is not %s" % (out_text, test_text))
Exemple #13
0
from flask import jsonify, Flask, request
from PIL import Image

import tesserwrap
import traceback
import sys
import io

# Init the tesseract
tr = tesserwrap.Tesseract(lang="por")

# Init web Server
app = Flask(__name__)
app.config['JSON_AS_ASCII'] = False


@app.route('/recognize', methods=["POST"])
def image():
    try:
        imagefile = request.files.get('imagefile', '')
        img = Image.open(imagefile.stream)
        text = tr.ocr_image(img)

        return jsonify(result="ok", data=text)
    except Exception as err:
        traceback.print_exc()
        print(err)
        return jsonify(result="sorry :/")