Python get_available_toolsの例、pyocr.pyocr.get_available_tools Pythonの例

コード例 #1

0

ファイルを表示

ファイル: land.py プロジェクト: zouyaoji/Crawls

    def pyocr_one_image(self, image_path):
        """
		revision: 20190804
		Author: https://blog.csdn.net/HuangZhang_123/article/details/61920975
		references:
			tesseract OCR官网针对大部分linux系统可以直接命令行安装：https://github.com/tesseract-ocr/tesseract/wiki
			tesseract OCR5.0.0 windowns 64位下载地址：https://github.com/UB-Mannheim/tesseract/wiki
			如果海外网速慢，国内4.0版本下载地址是：http://www.xue51.com/soft/1594.html
			简体中文训练集：https://github.com/tesseract-ocr/tessdata/blob/master/chi_sim.traineddata
			繁体中文训练集：https://github.com/tesseract-ocr/tessdata/blob/master/chi_tra.traineddata
			官网文档：https://digi.bib.uni-mannheim.de/tesseract/doc/

			pyocr官网：https://gitlab.gnome.org/World/OpenPaperwork/pyocr

			博文：
			（只在命令行训练/运行tesseract-OCR）Tesseract-OCR识别中文与训练字库实例：https://www.cnblogs.com/wzben/p/5930538.html
			https://blog.csdn.net/qq_37193537/article/details/81335165
		"""
        os.environ['NLS_LANG'] = 'SIMPLIFIED CHINESE_CHINA.UTF8'

        tools = pyocr.get_available_tools()[:]
        if 1 > len(tools):
            error_msg = f"No OCR tool found"
            self.write_log(
                f"Inside Method {sys._getframe().f_code.co_name} of Class {self.__class__.__name__}, {error_msg}"
            )
            return False
        #查找OCR引擎
        self.write_log(f"Using {tools[0].get_name()} to ocr {image_path}")
        #lang='chi_sim'为OCR的识别语言库。C:\Program Files\Tesseract-OCR\tessdata
        return tools[0].image_to_string(Image.open(image_path), lang="chi_sim")

コード例 #2

0

ファイルを表示

    def dt_image_to_text(self,
                         filename,
                         buff,
                         lang='chi_sim'):  # chinese simple
        """ 
        Argument :
        
        filename : input file name
                
        Return :
        
        return file text
        
        
        Note :
        image file to text file
        """

        tools = pyocr.get_available_tools()[:]
        document = ""
        if len(tools) > 0:
            builder = TextBuilder()
            if type(filename) is str:
                document = tools[0].image_to_string(PI.open(filename),
                                                    lang=lang,
                                                    builder=builder)
            elif buff:
                document = tools[0].image_to_string(PI.open(io.BytesIO(buff)),
                                                    lang=lang,
                                                    builder=builder)

        return document

コード例 #3

0

ファイルを表示

ファイル: captcha.py プロジェクト: walleleung/SpiderSystem

    def predict(self):
        captchapath = self.Download()
        im = Image.open(captchapath)

        width = im.size[0]
        height = im.size[1]
        # 创建Draw对象:
        # draw = ImageDraw.Draw(im)
        # 填充每个像素:
        for x in range(0, width):
            for y in range(0, height):
                r, g, b = im.getpixel((x, y))
                if r > 130 and g > 130 and b > 130:
                    im.putpixel((x, y), (255, 255, 255))
                else:
                    im.putpixel((x, y), (0, 0, 0))
                    #   im.save('124.jpg')
        # 验证码破解
        tools = pyocr.get_available_tools()[:]
        #验证码修正表
        redata = {'I': 'r', 'E': 'g', 'G': '6', "L": 'i', "l": 'k'}
        captcha = tools[0].image_to_string(im, lang='eng')
        captcha = self.charReplace(captcha)
        l = list(captcha)
        for i in range(len(l)):
            for j in redata.keys():
                if l[i] == j:
                    l[i] = redata[j]

        newcaptcha = "".join(l)
        #预测完了 删除原来的验证图片
        os.remove(captchapath)
        return newcaptcha

コード例 #4

0

ファイルを表示

 def __init__(self, args):
     with open(args.config, "r") as f:
         self.config = yaml.load(f)
     self.args = args
     tools = pyocr.get_available_tools()
     self.tool = tools[0]
     self.p = PokemonGo()

コード例 #5

0

ファイルを表示

def img_upload_file():

    if request.method == 'POST':
        #接收前端上传的文件，img_file为imput标签的name
        file = request.files["file"]
        #读取文件名
        filename = secure_filename(file.filename)
        #保存文件
        file.save(os.path.join(config["UPLOAD_FOLDER"], filename))

        #识别图片
        # 查找OCR引擎
        tools = pyocr.get_available_tools()[:]
        if len(tools) == 0:
            print("No OCR tool found")
            sys.exit(1)
        img_path = os.path.join(config["UPLOAD_FOLDER"], filename)
        ocr_name = "Using '%s'" % (tools[0].get_name())
        ocr_data = tools[0].image_to_string(Image.open(img_path),
                                            lang='chi_sim')
        result = {
            'ocr_name': ocr_name,
            'ocr_data': ocr_data,
        }
        #成功识别后删除图片
        if len(ocr_data) != 0:
            os.remove(img_path)
        return json.dumps(result)

コード例 #6

0

ファイルを表示

def ocr(path):
    import os
    os.environ['NLS_LANG'] = 'SIMPLIFIED CHINESE_CHINA.UTF8'
    tools = pyocr.get_available_tools()[:]
    if len(tools) == 0:
        return None
    return tools[0].image_to_string(Image.open(path), lang='chi_sim')

コード例 #7

0

ファイルを表示

ファイル: saldo_sodexo.py プロジェクト: georgeyk/saldo-sodexo

def saldo_sodexo(card, card_type, cpf):
    session = requests.Session()
    url = 'https://sodexosaldocartao.com.br/saldocartao/consultaSaldo.do?operation=consult'
    captcha_url = 'https://sodexosaldocartao.com.br/saldocartao/jcaptcha.do'
    captcha = download_captcha(captcha_url, session)

    tool = pyocr.get_available_tools()[0]
    value = tool.image_to_string(process_image(captcha))
    os.unlink(captcha)
    validated = validate_captcha(value)

    if validated:
        data = {'service': card_type,
                'cardNumber': card,
                'cpf': cpf,
                'jcaptcha_response': validated,
                'x': '6',
                'y': '9'}
        r = session.post(url, params=data)

        if not 'textRed' in r.content:
            model = parse_html(r.content)
            print model['name']
            print model['company']
            print model['status']
            print model['card']
            print model['balance']
            return True
    return False

コード例 #8

0

ファイルを表示

def processing(path_to_image, filename, user):
    directory_for_input_data = BASE_DIR + '/ocr/ocr_input_data/{}/'.format(user)
    if not os.path.exists(directory_for_input_data):
        os.makedirs(directory_for_input_data)
    if filename[-3:] == 'pdf':
        filename = filename[:-4]
        with WandImage(filename=settings.STATIC_ROOT+path_to_image) as img:
            img.save(filename="{}/{}.jpg".format(directory_for_input_data, filename))
    elif filename[-3:] == 'png':
        filename = filename[:-4]
        img = Image.open(fp=STATIC_ROOT+path_to_image)
        rgb_im = img.convert('RGB')
        rgb_im.save('{}/{}.jpg'.format(directory_for_input_data, filename), 'JPEG')
    else:
        filename = filename[:-4]
        img = Image.open(fp=STATIC_ROOT+path_to_image)
        img.save('{}/{}.jpg'.format(directory_for_input_data, filename), 'JPEG')

    tools = pyocr.get_available_tools()
    tool = tools[0]
    date = str(datetime.today())[:-7].replace('-', '_').replace(' ', '__').replace(':', '_')
    text = tool.image_to_string(Image.open("{}/{}.jpg".format(directory_for_input_data, filename)))
    directory_for_results = BASE_DIR + '/ocr/ocr_results/{}/'.format(user)
    if not os.path.exists(directory_for_results):
        os.makedirs(directory_for_results)
    filename = 'res_{}_{}.txt'.format(filename, date)
    with codecs.open(directory_for_results+filename, 'a', encoding='utf-8') as txt_file:
        txt_file.write(text)
    return directory_for_results+filename

コード例 #9

0

ファイルを表示

ファイル: StudyController.py プロジェクト: zuoliguang/Python-flask-study

def study_ocr():
    file = STORAGE_FOLDER + "/test4.png"
    os.environ['NLS_LANG'] = 'SIMPLIFIED CHINESE_CHINA.UTF8'
    tools = pyocr.get_available_tools()[:]
    if len(tools) == 0:
        return "No OCR tool found"
    image = Image.open(file)
    text = tools[0].image_to_string(image, lang='eng')
    return text

コード例 #10

0

ファイルを表示

 def __init__(self, args):
     with open(args.config, "r") as f:
         self.config = yaml.load(f)
     self.args = args
     tools = pyocr.get_available_tools()
     self.tool = tools[0]
     self.state = ''
     self.egg_walked = 0
     self.egg_total = 0

コード例 #11

0

ファイルを表示

def processing():
  global rectangle,thresh,erosion_iters,most_common_filter    
  #
  r = cv2.getTrackbarPos('Threshold', 'Inputs')   #changed from frame to Inputs
  #
  cv2.namedWindow('image')
  cv2.setMouseCallback('image', draw_shape)
  #
  img_temp, img, roi = initialize_images()
  rectangle = None
  scale = 1
  factor = 0.75
  #
  while(1):
    sleep(0.2)
    img = img_temp.copy()
    #
    ### ROI ###########################################################
    if is_rectangle(rectangle):
      roi = img_temp[rectangle[0][1]:rectangle[1][1], rectangle[0][0]:rectangle[1][0]]
      cv2.rectangle(img,rectangle[0],rectangle[1],(0,255,0),0)
    else:
      roi = None
    ### FILTER ########################################################
    # retrieving parameters from GUI slidebars        
    ### DISPLAY #######################################################
    cv2.imshow('image',img)
    if roi is not None:      #changed from "if roi != None" which gave array related error
      kernel = np.ones((5, 5), np.uint8)
      roi = cv2.cvtColor(roi, cv2.COLOR_BGR2GRAY)
      roi = cv2.threshold(roi, thresh, 255, cv2.THRESH_BINARY)[1]
      roi = cv2.erode(roi, kernel, iterations=erosion_iters)
      cv2.imshow('ROI',roi) 

    if roi is not None and not drawing:    #changed from "if roi != None" which gave array related error
      ### OCR ###########################################################
      tool = pyocr.get_available_tools()[0] # 
      lang = 'letsgodigital'#'letsgodigital'#"eng"    #export TESSDATA_PREFIX=/path/to/tessdata/folder
      txt = tool.image_to_string(Image.fromarray(roi), lang=lang, builder=builders.TextBuilder())
      print(txt)
    ### ACTIONS #######################################################
    k = cv2.waitKey(1) & 0xFF
    if k == ord('c'):
      img_temp, img, roi = initialize_images()
      img = img_temp.copy()
      rectangle, roi = None, None
      pass
    elif k == ord('r'):
      # todo: ROI resizing
      scale *= factor
      print(scale)
      img = cv2.resize(img, (0,0), fx=factor, fy=factor)
      img_temp = img.copy()
    elif k == 27:   #esc key
      cv2.destroyAllWindows()
      break

コード例 #12

0

ファイルを表示

 def __init__(self):
     with open("config.yaml", "r") as f:
         self.config = yaml.load(f, Loader=yaml.FullLoader)
     tools = pyocr.get_available_tools()
     self.tool = tools[0]
     self.state = ''
     self.egg_walked = 0
     self.distance_total = 0
     self.distance_walked = 0
     self.speed = float(input("Mennyivel mész? km/h "))

コード例 #13

0

ファイルを表示

ファイル: GetNodeLocal.py プロジェクト: xiaolongcheng/ImageOcr

def Imgprint(img):
    tools = pyocr.get_available_tools()[:]
    if len(tools) == 0:
        print("No OCR tool found")
        sys.exit(1)
    #print("Using '%s'" % (tools[0].get_name()))
    #print(tools[0].image_to_string(Image.open('/Users/martin/data/9.png'), lang='chi_sim'))
    res = tools[0].image_to_string(img, lang='chi_sim')
    res = re.findall(r"[\u4e00-\u9fa5]", res, re.S)
    return res

コード例 #14

0

ファイルを表示

    def ocr_image(self, buff, lang='chi_sim'):  # chinese simple

        tools = pyocr.get_available_tools()[:]
        document = ""
        if len(tools) > 0:
            builder = TextBuilder()
            document = tools[0].image_to_string(buff,
                                                lang=lang,
                                                builder=builder)

        return document

コード例 #15

0

ファイルを表示

    def __init__(self, args):
        with open(args.config, "r") as f:
            self.config = yaml.load(f)
        self.args = args
        tools = pyocr.get_available_tools()
        self.tool = tools[0]
        self.p = PokemonGo()
        self.i = 2

        self.CHECK_STRING = self.config['names']['name_check']
        self.SEARCH_STRING = self.config['names']['search_string']

コード例 #16

0

ファイルを表示

ファイル: ocrNumberReco.py プロジェクト: zyqqing/CV

def ocrReco(wMat, size_):
    newMat = np.zeros((28, 28 * size_))
    for i in range(size_):
        for j in range(28):
            for k in range(28):
                newMat[j, k + i * 28] = wMat[i, j, k]

    tempImg = Image.fromarray(uint8(newMat))
    tempImg.save('./temp/temp.png')
    tools = pyocr.get_available_tools()[:]
    print tools[0].image_to_string(Image.open('./temp/temp.png'),
                                   lang='eng').encode('GBK', 'ignore')

コード例 #17

0

ファイルを表示

def imgTostring(path):
    from pyocr import pyocr
    from PIL import Image

    tools = pyocr.get_available_tools()[:]
    #寻找可用的OCR工具
    if len(tools) == 0:
        print "No OCR tool found"
        return -1
    else:
        code = tools[0].image_to_string(Image.open('code.jpg'))
        return code

コード例 #18

0

ファイルを表示

ファイル: HDChinaNet.py プロジェクト: August1996/Python-Demo

def imgTostring(path):
	from pyocr import pyocr
	from PIL import Image

	tools = pyocr.get_available_tools()[:]
	#寻找可用的OCR工具
	if len(tools)== 0:
		print "No OCR tool found"
		return -1
	else:
		code = tools[0].image_to_string(Image.open('code.jpg'))
		return code

コード例 #19

0

ファイルを表示

ファイル: deps.py プロジェクト: jflesch/paperwork

def find_missing_ocr(lang):
    """
    OCR tools are a little bit more tricky
    """
    missing = []
    try:
        from pyocr import pyocr
        ocr_tools = pyocr.get_available_tools()
    except ImportError:
        print(
            "[WARNING] Couldn't import Pyocr. Will assume OCR tool is not"
            " installed yet"
        )
        ocr_tools = []

    if len(ocr_tools) <= 0:
        langs = []
        missing.append(
            (
                'Tesseract', '(none)',
                {
                    'debian': 'tesseract-ocr',
                    'fedora': 'tesseract',
                    'gentoo': 'app-text/tesseract',
                    'linuxmint': 'tesseract-ocr',
                    'ubuntu': 'tesseract-ocr',
                },
            )
        )
    else:
        try:
            langs = ocr_tools[0].get_available_languages()
        except Exception as exc:
            print("[WARNING] Exception while looking for available languages:"
                  " {}".format(str(exc)))
            langs = []

    if (len(langs) <= 0 or lang['tesseract'] not in langs):
        missing.append(
            (
                'Tesseract language data', '(none)',
                {
                    'debian': ('tesseract-ocr-%s' % lang['tesseract']),
                    'fedora': ('tesseract-langpack-%s' % lang['tesseract']),
                    'linuxmint': ('tesseract-ocr-%s' % lang['tesseract']),
                    'ubuntu': ('tesseract-ocr-%s' % lang['tesseract']),
                },
            )
        )

    return missing

コード例 #20

0

ファイルを表示

ファイル: OCR-V1.1.py プロジェクト: Masonwwy/OCR

 def onLeftButtonUp(event):
     self.sel = False
     try:
         self.canvas.delete(lastDraw)
     except Exception as e:
         pass
     sleep(0.1)
     myleft, myright = sorted([self.X.get(), event.x])
     mytop, mybottom = sorted([self.Y.get(), event.y])
     self.selectPosition = (myleft, myright, mytop, mybottom)
     pic = ImageGrab.grab((myleft + 1, mytop + 1, myright, mybottom))
     tools = pyocr.get_available_tools()[:]
     code = tools[0].image_to_string(pic)
     self.result = code
     self.top.destroy()

コード例 #21

0

ファイルを表示

def ocr_pyocr(img_id):
    img_filename = './' + get_image_filename(img_id)
    img = cv2.imread(img_filename, cv2.IMREAD_COLOR)
    tools = pyocr.get_available_tools()
    if len(tools) == 0:
        print("No OCR tool found")
        sys.exit(1)

    tool = tools[0]
    langs = tool.get_available_languages()
    lang = langs[0]
    txt = tool.image_to_string(Image.open(img_filename),
                               lang=lang,
                               builder=pyocr.tesseract.builders.TextBuilder())
    write_output(img_id, txt)

コード例 #22

0

ファイルを表示

ファイル: captchaidentify.py プロジェクト: xiangjl623/Projects

def main():
    image = Image.open("d:/validate_code.png")
    #将图片转换成灰度图片
    image = image.convert("L")
    # #去噪,G = 50,N = 4,Z = 4
    #clearNoise(image,50,4,4)
    # #保存图片
    image.save("d:/result.png")
    tools = pyocr.get_available_tools()[:]
    if len(tools) == 0:
        print("No OCR tool found")
        sys.exit(1)
    print("Using '%s'" % (tools[0].get_name()))
    print(tools[0].get_version())
    print("Using '%s'" % (tools[0].get_available_languages()))
    print(tools[0].image_to_string(Image.open('d:/result.png'), lang='eng'))

コード例 #23

0

ファイルを表示

def find_missing_ocr(lang):
    """
    OCR tools are a little bit more tricky
    """
    missing = []
    try:
        from pyocr import pyocr
        ocr_tools = pyocr.get_available_tools()
    except ImportError:
        print("[WARNING] Couldn't import Pyocr. Will assume OCR tool is not"
              " installed yet")
        ocr_tools = []

    if len(ocr_tools) <= 0:
        langs = []
        missing.append((
            'Tesseract',
            '(none)',
            {
                'debian': 'tesseract-ocr',
                'fedora': 'tesseract',
                'gentoo': 'app-text/tesseract',
                'linuxmint': 'tesseract-ocr',
                'ubuntu': 'tesseract-ocr',
            },
        ))
    else:
        try:
            langs = ocr_tools[0].get_available_languages()
        except Exception as exc:
            print("[WARNING] Exception while looking for available languages:"
                  " {}".format(str(exc)))
            langs = []

    if (len(langs) <= 0 or lang['tesseract'] not in langs):
        missing.append((
            'Tesseract language data',
            '(none)',
            {
                'debian': ('tesseract-ocr-%s' % lang['tesseract']),
                'fedora': ('tesseract-langpack-%s' % lang['tesseract']),
                'linuxmint': ('tesseract-ocr-%s' % lang['tesseract']),
                'ubuntu': ('tesseract-ocr-%s' % lang['tesseract']),
            },
        ))

    return missing

コード例 #24

0

ファイルを表示

ファイル: controllers.py プロジェクト: hethune/glass-skim

def postImage():
    image_file = request.files['file']
    text = "Hello Glass"
    tools = pyocr.get_available_tools()[:]
    if len(tools) > 0:
        text = tools[0].image_to_string(Image.open(image_file), lang='eng', psm='6', builder=builders.TextBuilder())
        text = " ".join(text.split())
    st = SummaryTool()
    sentences_dic = st.get_senteces_ranks(text)
    summary = st.get_summary(text, sentences_dic)

    redis.publish('notifications', "%s." % summary)
    print "=========================================\n"
    print text
    print "=========================================\n"
    print summary
    return ""

コード例 #25

0

ファイルを表示

ファイル: controllers.py プロジェクト: shawiz/glass-skim

def postImage():
    image_file = request.files['file']
    text = "Hello Glass"
    tools = pyocr.get_available_tools()[:]
    if len(tools) > 0:
        text = tools[0].image_to_string(Image.open(image_file),
                                        lang='eng',
                                        psm='6',
                                        builder=builders.TextBuilder())
        text = " ".join(text.split())
    st = SummaryTool()
    sentences_dic = st.get_senteces_ranks(text)
    summary = st.get_summary(text, sentences_dic)

    redis.publish('notifications', "%s." % summary)
    print "=========================================\n"
    print text
    print "=========================================\n"
    print summary
    return ""

コード例 #26

0

ファイルを表示

ファイル: main.py プロジェクト: tomfran/sustainability_reports_analysis

def convert(verbose=False):
    tools = pyocr.get_available_tools()[0]
    counter = 1
    # f = open("data/logs.txt", "w")
    # convert into image and then text every pdf file in "data/pdf"
    for pdf_dir in sorted(os.listdir(PDF_DIRECTORY)):
        for pdf_file in sorted(os.listdir(PDF_DIRECTORY + "/" + pdf_dir)):
            # status print
            if verbose:
                print("\n\033[1m%d. %s\033[0m" % (counter, pdf_file),
                      end="\n\n")
            try:
                get_images(pdf_file, pdf_dir, verbose)
                get_paragraphs(pdf_file, tools, verbose)
                get_texts(pdf_file, pdf_dir, tools, verbose)
            except Exception:
                # f.write("ERROR: %s\n" %(pdf_file))
                pass
            counter += 1
            return

コード例 #27

0

ファイルを表示

ファイル: ocr_webserver.py プロジェクト: ruanima/code_snippet

def security_code():
    img_base64 = request.args.get('img', '')
    if not img_base64: abort(404)
    # context = ssl._create_unverified_context()  # https
    # image_bytes = urlopen(img_url, context=context).read()
    img_base64 = img_base64.split(',', 1)[-1]
    data_stream = io.BytesIO(base64.b64decode(img_base64))   #
    img = Image.open(data_stream)

    # 对图片进行一下预处理，提升识别效果
    img = img.convert("RGBA")
    pixdata = img.load()
    for y in xrange(img.size[1]):
        for x in xrange(img.size[0]):
            if pixdata[x, y] == (204, 233, 246, 255):
                pixdata[x, y] = (255, 255, 255, 1)

    # ocr 识别文字
    tools = pyocr.get_available_tools()[:]
    if len(tools) == 0: abort(404)
    code = tools[0].image_to_string(img, lang='eng')
    return code

コード例 #28

0

ファイルを表示

ファイル: study_zhibu.py プロジェクト: zyolfqitler/scraping-zhibugongzuo-

def verify_ocr(filename):
    from pyocr import pyocr
    from PIL import Image
    import cv2
    import numpy as np
    # verify Code OCR
    tools = pyocr.get_available_tools()[:]
    if len(tools) == 0:
        print("No OCR tool found")
        print("input manually")
        output = input('input correct varify code:')
    else:
        cap = cv2.imread(filename, 0)
        cap[:] = 255 - cap[:]
        ret, thresh = cv2.threshold(cap, 100, 255, cv2.THRESH_BINARY)
        kernel = np.ones((3, 3), dtype=np.uint8)
        erosion = cv2.erode(thresh, kernel, iterations=2)
        cv2.imshow('image', erosion)
        cv2.imwrite('verify.png', erosion)
        print("Using '%s'" % (tools[0].get_name()))
        output = tools[0].image_to_string(Image.open('verify.png'), lang='eng')
        print("recognizing..." + output)
    return output

コード例 #29

0

ファイルを表示

ファイル: mytest.py プロジェクト: hethune/glass-skim

import sys, Image
from pyocr import pyocr
sys.path = ["src"] + sys.path
import builders

tools = pyocr.get_available_tools()[:]
if len(tools) == 0:
    print "No OCR tool found"
    sys.exit(1)
print tools[0].image_to_string(Image.open('test.jpg'), lang='eng', psm='6',
                         builder=builders.TextBuilder())

コード例 #30

0

ファイルを表示

#!/usr/bin/env python3.7
import sys
import argparse

import yaml
from PIL import Image
from pyocr import pyocr
from pyocr import builders

with open("../config.yaml", "r") as f:
    config = yaml.safe_load(f)

tools = pyocr.get_available_tools()
tool = tools[0]


def ocr_img(img, loc, debug=None):
    image = Image.open(img)
    crop = image.crop(loc)
    if debug != None:
        crop.show()
    print(tool.image_to_string(crop).replace("\n", " "))


# usage '__name__' --loc location --app app1|app2 image


def main():
    parser = argparse.ArgumentParser(description='Pokemon GO image tester')
    parser.add_argument('--loc',
                        type=str,

コード例 #31

0

ファイルを表示

ファイル: OCR Tesseract.py プロジェクト: zenolee22/meh

import io
import pdfminer

#image_file = "H:/Projects/OCR/transforming_into_an_analytics_driven_insurance_carrier.pdf"

image_pdf = Image(filename="H:/Projects/OCR/II_StateAdvisoryForumState_AL_2016.pdf", resolution=400)
image_jpeg = image_pdf.convert('jpeg')

req_image = []
final_text = []

for img in image_jpeg.sequence:
    img_page = Image(image=img)
    req_image.append(img_page.make_blob('jpeg'))

tool = pyocr.get_available_tools()[0]
#lang = tool.get_available_languages()[1]

for img in req_image:
    txt = tool.image_to_string(
        PI.open(io.BytesIO(img)),
#        lang=lang,
        builder=builders.TextBuilder()
    )
    final_text.append(txt)

print(final_text[1])

file = open("output.txt","w",encoding='utf-8')

for item in final_text:

コード例 #32

0

ファイルを表示

ファイル: grab.py プロジェクト: zeynelabidinertur/NewWords

        output_jpg = input_pdf.replace(".pdf", ".jpg")
        output_img.write(output_jpg)


# print datetime.now() - start_time

from wand.image import Image
from PIL import Image as PI
import sys
import os
from pyocr import pyocr
from pyocr import builders
import io
#TESSERACT_CMD = os.environ["TESSDATA_PREFIX"] + os.sep + 'tesseract.exe' if os.name == 'nt' else 'tesseract'

tool = pyocr.get_available_tools()[0]
print tool
lang = tool.get_available_languages()
print lang
req_image = []
final_text = []

image_pdf = Image(file="test_pf.pdf", resolution=300)
image_jpeg = image_pdf.convert('jpeg')

for img in image_jpeg.sequence:
    img_page = Image(image=img)
    req_image.append(img_page.make_blob('jpeg'))

for img in req_image:
    txt = tool.image_to_string(PI.open(io.BytesIO(img)),

コード例 #33

0

ファイルを表示

ファイル: settingswindow.py プロジェクト: chrisz/paperwork

    def __init__(self, mainwindow_gui, config):
        gobject.GObject.__init__(self)
        widget_tree = load_uifile("settingswindow.glade")

        self.window = widget_tree.get_object("windowSettings")
        self.window.set_transient_for(mainwindow_gui)

        self.__config = config

        self.workdir_chooser = widget_tree.get_object("filechooserbutton")

        actions = {
            "apply" : (
                [widget_tree.get_object("buttonSettingsOk")],
                ActionApplySettings(self, config)
            ),
            "cancel" : (
                [widget_tree.get_object("buttonSettingsCancel")],
                ActionCancelSettings(self, config)
            ),
            "select_scanner" : (
                [widget_tree.get_object("comboboxDevices")],
                ActionSelectScanner(self)
            ),
            "scan_calibration" : (
                [widget_tree.get_object("buttonScanCalibration")],
                ActionScanCalibration(self)
            )
        }

        self.device_settings = {
            "devid" : {
                'gui' : widget_tree.get_object("comboboxDevices"),
                'stores' : {
                    'loading' : widget_tree.get_object("liststoreLoading"),
                    'loaded'  : widget_tree.get_object("liststoreDevice"),
                },
                'nb_elements' : 0,
                'active_idx' : -1,
            },
            "resolution" : {
                'gui' : widget_tree.get_object("comboboxResolution"),
                'stores' : {
                    'loading' : widget_tree.get_object("liststoreLoading"),
                    'loaded' : widget_tree.get_object("liststoreResolution"),
                },
                'nb_elements' : 0,
                'active_idx' : -1,
            },
        }

        self.ocr_settings = {
            "lang" : {
                'gui' : widget_tree.get_object("comboboxLang"),
                'store' : widget_tree.get_object("liststoreOcrLang"),
            }
        }

        self.calibration = {
            "scan_button" : widget_tree.get_object("buttonScanCalibration"),
            "image_gui" : widget_tree.get_object("imageCalibration"),
            "image_viewport" : widget_tree.get_object("viewportCalibration"),
            "images" : [],  # array of tuples : (resize factor, PIL image)
            "image_eventbox" : widget_tree.get_object("eventboxCalibration"),
            "image_scrollbars" : widget_tree.get_object("scrolledwindowCalibration"),
        }

        self.grips = CalibrationGripHandler(config, self)

        self.progressbar = widget_tree.get_object("progressbarScan")
        self.__scan_start = 0.0

        self.workers = {
            "device_finder" : WorkerDeviceFinder(config.scanner_devid),
            "resolution_finder" : WorkerResolutionFinder(
                    config.scanner_resolution,
                    config.RECOMMENDED_RESOLUTION),
            "scan" : WorkerCalibrationScan(
                    self.calibration['image_viewport']),
            "progress_updater" : WorkerProgressUpdater("calibration scan",
                                                       self.progressbar)
        }

        ocr_tools = pyocr.get_available_tools()
        if len(ocr_tools) <= 0:
            ocr_langs = []
        else:
            ocr_langs = ocr_tools[0].get_available_languages()
        ocr_langs = self.__get_short_to_long_langs(ocr_langs)
        ocr_langs.sort(key=lambda lang: lang[1])
        ocr_langs.insert(0, (None, _("Disable OCR")))

        self.ocr_settings['lang']['store'].clear()
        for (short_lang, long_lang) in ocr_langs:
            self.ocr_settings['lang']['store'].append([long_lang, short_lang])

        for action in ["apply", "cancel", "select_scanner", "scan_calibration"]:
            actions[action][1].connect(actions[action][0])

        self.workers['device_finder'].connect(
                'device-finding-start',
                lambda worker: gobject.idle_add(
                    self.__on_device_finding_start_cb))
        self.workers['device_finder'].connect(
                'device-found',
                lambda worker, user_name, store_name, active: \
                    gobject.idle_add(self.__on_value_found_cb,
                                     self.device_settings['devid'],
                                     user_name, store_name, active))
        self.workers['device_finder'].connect(
                'device-finding-end',
                lambda worker: gobject.idle_add(
                    self.__on_finding_end_cb,
                    self.device_settings['devid']))

        self.workers['resolution_finder'].connect(
                'resolution-finding-start',
                lambda worker: gobject.idle_add(
                    self.__on_finding_start_cb,
                    self.device_settings['resolution']))
        self.workers['resolution_finder'].connect(
                'resolution-found',
                lambda worker, user_name, store_name, active: \
                    gobject.idle_add(self.__on_value_found_cb,
                                     self.device_settings['resolution'],
                                     user_name, store_name, active))
        self.workers['resolution_finder'].connect(
                'resolution-finding-end',
                lambda worker: gobject.idle_add(
                    self.__on_finding_end_cb,
                    self.device_settings['resolution']))

        self.workers['scan'].connect('calibration-scan-start',
                lambda worker: self.__on_scan_start())
        self.workers['scan'].connect('calibration-scan-done',
                lambda worker, img: self.__on_scan_done(img))
        self.workers['scan'].connect('calibration-resize-done',
                lambda worker, factor, img: self.__on_resize_done(factor, img))

        self.calibration['image_eventbox'].connect("button-press-event",
                lambda x, ev: self.grips.on_mouse_button_pressed_cb(ev))
        self.calibration['image_eventbox'].connect("motion-notify-event",
                lambda x, ev: self.grips.on_mouse_motion(ev))
        self.calibration['image_eventbox'].connect("button-release-event",
                lambda x, ev: self.grips.on_mouse_button_released_cb(ev))
        self.calibration['image_eventbox'].add_events(
                gtk.gdk.POINTER_MOTION_MASK)

        self.window.connect("destroy", self.__on_destroy)

        self.display_config(config)

        self.window.set_visible(True)

        # Must be connected after the window has been displayed.
        # Otherwise, if "disable OCR" is already selected in the config
        # it will display a warning popup even before the dialog has been
        # displayed
        self.ocr_settings['lang']['gui'].connect(
            "changed", self.__on_ocr_lang_changed)

        self.workers['device_finder'].start()

コード例 #34

0

ファイルを表示

 def ocr(self, filename):
     tools = pyocr.get_available_tools()[:]
     if len(tools) == 0:
         print("No Ocr tool")
     return tools[0].image_to_string(Image.open(filename), lang='chi_sim')

コード例 #35

0

ファイルを表示

ファイル: recchar.py プロジェクト: chybot/crawler

    def rec(self, pic_file_path = os.path.join(work_dir, 'mimidama', 'test_pics', 'test1.jpg'), typecode = 60001, timeout=120):
        """
        :param pic_file_path:图片路径
        :param typecode:验证码类型
        :return:(str(ret), code_id, is_report_error) 验证码,验证码的id,是否提交的错误报告

        当ret为空或实际识别错误时，先判断is_report_error是否为False，如果为False需要提交一次验证码错误报告(self.reportError(code_id))
        """
        self.handleLog(u"进入recchar的打码方法：", pic_file_path, typecode, timeout, self.type)
        ret = ""
        code_id = 0
        is_report_error = False
        if self.type == "mimidama":
            result = c_char_p("")
            #self.log.info("type=%s,pic_file_path=%s" % (self.type, pic_file_path))
            code_id = self.RecPath(c_int(self.s_id), c_char_p(self.s_key), self.user, self.passwd, c_char_p(pic_file_path), c_int(60001), result)
            self.handleLog("result", result)
            if code_id <= 0:
                print('get result error ,ErrorCode:%d do reportError!' % code_id)
                report_ret = self.reportErrorID(code_id)
                self.handleLog("report_ret:", report_ret)
                if report_ret != 0:
                    self.handleLog("验证码识别错误时提交报告错误 report_ret:%d pic_file_path:%s"%(report_ret, pic_file_path))
                else:
                    self.handleLog("reportError ok!")
                    is_report_error = True
            else:
                self.handleLog("the code_id is:%d result is:%s" % (code_id, result.value))
                ret = result.value
                pass
        elif self.type == "ruokuai":
            self.log.info("type=%s,pic_file_path=%s" % (self.type, pic_file_path))
            im = open(pic_file_path, 'rb').read()
            try:
                rk_create_ret = self.rc.rk_create(im, typecode, timeout)
                self.handleLog("%s, %s" % (str(rk_create_ret), rk_create_ret.get('Error',"NO Error")))
                ret = rk_create_ret["Result"]
                code_id = rk_create_ret["Id"]
                self.handleLog("%s, %s" % (rk_create_ret["Result"], rk_create_ret["Id"]))
            except Exception as e:
                self.handleException(e)
        elif self.type == "zhongdengwang":
            import Image
            import sys
            import ImageEnhance
            import ImageFilter
            from pyocr import pyocr
            #self.log.info("type=%s,pic_file_path=%s" % (self.type, pic_file_path))
            try:
                tools = pyocr.get_available_tools()[:]
                if len(tools) == 0:
                    print("No OCR tool found")
                    sys.exit(1)
                #print("Using '%s'" % (tools[0].get_name()))

                #打开图片
                im = Image.open(pic_file_path)
                #转化到亮度
                imgry = im.convert('L')
                #imgry.save('g'+pic_file_path)

                #二值化
                out = imgry.point(table,'1')
                #out.save('b'+pic_file_path)

                #ret = tools[0].image_to_string(Image.open(pic_file_path), lang='fra')
                #print ret
                ret = tools[0].image_to_string(out, lang='fra')

                #识别对吗
                ret = ret.strip()
                #ret = ret.upper();

                for r in rep:
                    ret = ret.replace(r,rep[r])
                #print ret
            except Exception as e:
                self.handleException(e)
        elif self.type == "jiangsu":
            #print "pic_file_path:", pic_file_path, " strlen:", len(pic_file_path), " type", type(pic_file_path), " id:", id(pic_file_path)
            #print "pic_file_path:", pic_file_path, " strlen:", len(pic_file_path), " type", type(str(pic_file_path)), "id:", id(str(pic_file_path))
            try:
                ret = self.netf.ComputeJiangsu(str(pic_file_path))
            except Exception as e:
                self.handleException(e)
        elif self.type == "xinjiang":
            self.log.info("type=%s,pic_file_path=%s" % (self.type, pic_file_path))
            try:
                ret = self.netf.ComputeXinjiang2(str(pic_file_path))
            except Exception as e:
                self.handleException(e)
        elif self.type == "chongqing":
            self.log.info("type=%s,pic_file_path=%s" % (self.type, pic_file_path))
            try:
                ret = self.netf.ComputeChongqing2(str(pic_file_path))
            except Exception as e:
                self.handleException(e)
        elif self.type == "guangdong":
            self.log.info("type=%s,pic_file_path=%s" % (self.type, pic_file_path))
            try:
                ret = self.netf.ComputeGuangdong4(str(pic_file_path))
            except Exception as e:
                self.handleException(e)
        elif self.type == "neimenggu":
            self.log.info("type=%s,pic_file_path=%s" % (self.type, pic_file_path))
            try:
                ret = self.netf.ComputeGuangdong4(str(pic_file_path))
            except Exception as e:
                self.handleException(e)
        elif self.type == "gansu":
            self.log.info("type=%s,pic_file_path=%s" % (self.type, pic_file_path))
            try:
                ret = self.netf.ComputeGansu(str(pic_file_path))
            except Exception as e:
                self.handleException(e)
        elif self.type == "hainan":
            self.log.info("type=%s,pic_file_path=%s" % (self.type, pic_file_path))
            try:
                ret = self.netf.ComputeGuangdong3(str(pic_file_path))
            except Exception as e:
                self.handleException(e)
        elif self.type == "henan":
            self.log.info("type=%s,pic_file_path=%s" % (self.type, pic_file_path))
            try:
                ret = self.netf.ComputeHenan2(str(pic_file_path))
            except Exception as e:
                self.handleException(e)
        elif self.type == "shanghai":
            self.log.info("type=%s,pic_file_path=%s" % (self.type, pic_file_path))
            try:
                ret = self.netf.ComputeShanghai1(str(pic_file_path))
            except Exception as e:
                self.handleException(e)
        elif self.type == "hunan":
            self.log.info("type=%s,pic_file_path=%s" % (self.type, pic_file_path))
            try:
                ret = self.netf.ComputeShanghai1(str(pic_file_path))
            except Exception as e:
                self.handleException(e)
        elif self.type == "ningxia":
            self.log.info("type=%s,pic_file_path=%s" % (self.type, pic_file_path))
            try:
                ret = self.netf.ComputeNingxia(str(pic_file_path))
            except Exception as e:
                self.handleException(e)
        elif self.type == "jiangxi":
            self.log.info("type=%s,pic_file_path=%s" % (self.type, pic_file_path))
            try:
                ret = self.netf.ComputeJiangxi(str(pic_file_path))
            except Exception as e:
                self.handleException(e)
        elif self.type == "tianjin":
            self.log.info("type=%s,pic_file_path=%s" % (self.type, pic_file_path))
            try:
                ret = self.netf.ComputeTianjin(str(pic_file_path))
            except Exception as e:
                self.handleException(e)
        elif self.type == "fujian":
            self.log.info("type=%s,pic_file_path=%s" % (self.type, pic_file_path))
            try:
                ret = self.netf.ComputeShanghai1(str(pic_file_path))
            except Exception as e:
                self.handleException(e)
        elif self.type == "hebei":
            self.log.info("type=%s,pic_file_path=%s" % (self.type, pic_file_path))
            try:
                ret = self.netf.ComputeShanghai1(str(pic_file_path))
            except Exception as e:
                self.handleException(e)
        elif self.type == "anhui":
            self.log.info("type=%s,pic_file_path=%s" % (self.type, pic_file_path))
            try:
                ret = self.netf.ComputeHenan2(str(pic_file_path))
            except Exception as e:
                self.handleException(e)
        elif self.type == "guangxi":
            self.log.info("type=%s,pic_file_path=%s" % (self.type, pic_file_path))
            try:
                ret = self.netf.ComputeHenan2(str(pic_file_path))
            except Exception as e:
                self.handleException(e)
        elif self.type == "heilongjiang":
            self.log.info("type=%s,pic_file_path=%s" % (self.type, pic_file_path))
            try:
                ret = self.netf.ComputeHenan2(str(pic_file_path))
            except Exception as e:
                self.handleException(e)
        elif self.type == "yunnan":
            self.log.info("type=%s,pic_file_path=%s" % (self.type, pic_file_path))
            try:
                ret = self.netf.ComputeShanghai1(str(pic_file_path))
            except Exception as e:
                self.handleException(e)
        elif self.type == "xizang":
            self.log.info("type=%s,pic_file_path=%s" % (self.type, pic_file_path))
            try:
                ret = self.netf.ComputeHenan2(str(pic_file_path))
            except Exception as e:
                self.handleException(e)
        elif self.type == "qinghai":
            self.log.info("type=%s,pic_file_path=%s" % (self.type, pic_file_path))
            try:
                ret = self.netf.ComputeHenan2(str(pic_file_path))
            except Exception as e:
                self.handleException(e)
        elif self.type == "sichuan":
            self.log.info("type=%s,pic_file_path=%s" % (self.type, pic_file_path))
            try:
                ret = self.netf.ComputeXinjiang2(str(pic_file_path))
            except Exception as e:
                self.handleException(e)
        elif self.type == "zongju":
            self.log.info("type=%s,pic_file_path=%s" % (self.type, pic_file_path))
            try:
                ret = self.netf.ComputeShanghai1(str(pic_file_path))
            except Exception as e:
                self.handleException(e)
        elif self.type == "zhejiang":
            self.log.info("type=%s,pic_file_path=%s" % (self.type, pic_file_path))
            try:
                ret = self.netf.ComputeZhejiang(str(pic_file_path))
            except Exception as e:
                self.handleException(e)
        elif self.type == "shanxitaiyuan":
            self.log.info("type=%s,pic_file_path=%s" % (self.type, pic_file_path))
            try:
                ret = self.netf.ComputeHenan2(str(pic_file_path))
            except Exception as e:
                self.handleException(e)
        elif self.type == "guizhou":
            self.log.info("type=%s,pic_file_path=%s" % (self.type, pic_file_path))
            try:
                ret = self.netf.ComputeGuizhou3(str(pic_file_path))
            except Exception as e:
                self.handleException(e)
        elif self.type == "hubei":
            self.log.info("type=%s,pic_file_path=%s" % (self.type, pic_file_path))
            try:
                ret = self.netf.ComputeHubei(str(pic_file_path))
            except Exception as e:
                self.handleException(e)
        elif self.type == "hubei2":
            self.log.info("type=%s,pic_file_path=%s" % (self.type, pic_file_path))
            try:
                ret = self.netf.ComputeHubei2(str(pic_file_path))
            except Exception as e:
                self.handleException(e)
        elif self.type == "shan3xi":
            self.log.info("type=%s,pic_file_path=%s" % (self.type, pic_file_path))
            try:
                ret = self.netf.ComputeShan3xi(str(pic_file_path))
            except Exception as e:
                self.handleException(e)
        elif self.type == "shan3xi2":
            self.log.info("type=%s,pic_file_path=%s" % (self.type, pic_file_path))
            try:
                ret = self.netf.ComputeShan3xi2(str(pic_file_path))
            except Exception as e:
                self.handleException(e)
        elif self.type == "shandong":
            self.log.info("type=%s,pic_file_path=%s" % (self.type, pic_file_path))
            try:
                ret = self.netf.ComputeShandong(str(pic_file_path))
            except Exception as e:
                self.handleException(e)
        elif self.type == "beijing":
            self.log.info("type=%s,pic_file_path=%s" % (self.type, pic_file_path))
            try:
                ret = self.netf.ComputeBeijing1(str(pic_file_path))
            except Exception as e:
                self.handleException(e)

        elif self.type == "zhongdeng":#中登网
            self.log.info("type=%s,pic_file_path=%s" % (self.type, pic_file_path))
            try:
                ret = self.netf.ComputeZhongdeng(str(pic_file_path))
            except Exception as e:
                self.handleException(e)

        elif self.type == "cnca":#管理体系认证
            self.log.info("type=%s,pic_file_path=%s" % (self.type, pic_file_path))
            try:
                ret = self.netf.ComputeCnca(str(pic_file_path))
            except Exception as e:
                self.handleException(e)

        elif self.type == "haiguan":#管理体系认证
            self.log.info("type=%s,pic_file_path=%s" % (self.type, pic_file_path))
            try:
                ret = self.netf.ComputeHaiguan(str(pic_file_path))
            except Exception as e:
                self.handleException(e)

        elif self.type == "jilin":
            self.log.info("type=%s,pic_file_path=%s" % (self.type, pic_file_path))
            try:
                ret = self.netf.ComputeShandong(str(pic_file_path))
            except Exception as e:
                self.handleException(e)
        elif self.type == "liaoning":
            self.log.info("type=%s,pic_file_path=%s" % (self.type, pic_file_path))
            try:
                ret = self.netf.ComputeLiaoning3(str(pic_file_path))
            except Exception as e:
                self.handleException(e)

        else:
            self.handleLog("unkown type:", self.type)
            sys.exit(-1)

        return (ret, code_id, is_report_error)

コード例 #36

0

ファイルを表示

ファイル: pokefriends.py プロジェクト: davidfegyver/Pokefriends

 def __init__(self):
     with open("config.yaml", "r") as f:
         self.config = yaml.load(f, Loader=yaml.FullLoader)
     tools = pyocr.get_available_tools()
     self.tool = tools[0]
     self.friends = friends

コード例 #37

0

ファイルを表示

from pyocr import pyocr
from pyocr import tesseract

from tests import tests_cuneiform
from tests import tests_tesseract

if __name__ == '__main__':
    for tool in pyocr.TOOLS:
        print("- OCR: %s" % tool.get_name())
        available = tool.is_available()
        print("  is_available(): %s" % (str(available)))
        if available:
            print("  get_version(): %s" % (str(tool.get_version())))
            print("  get_available_languages(): ")
            print("    " + ", ".join(tool.get_available_languages()))
        print("")
    print("")

    print("OCR tool found:")
    for tool in pyocr.get_available_tools():
        print("- %s" % tool.get_name())
    if tesseract.is_available():
        print("---")
        print("Tesseract:")
        unittest.TextTestRunner().run(tests_tesseract.get_all_tests())
    if cuneiform.is_available():
        print("---")
        print("Cuneiform:")
        unittest.TextTestRunner().run(tests_cuneiform.get_all_tests())

コード例 #38

0

ファイルを表示

    def __init__(self, mainwindow_gui, config):
        gobject.GObject.__init__(self)
        widget_tree = load_uifile("settingswindow.glade")

        self.window = widget_tree.get_object("windowSettings")
        self.window.set_transient_for(mainwindow_gui)

        self.__config = config

        self.workdir_chooser = widget_tree.get_object("filechooserbutton")

        actions = {
            "apply": ([widget_tree.get_object("buttonSettingsOk")],
                      ActionApplySettings(self, config)),
            "cancel": ([widget_tree.get_object("buttonSettingsCancel")],
                       ActionCancelSettings(self, config)),
            "select_scanner": ([widget_tree.get_object("comboboxDevices")],
                               ActionSelectScanner(self)),
            "scan_calibration":
            ([widget_tree.get_object("buttonScanCalibration")],
             ActionScanCalibration(self))
        }

        self.device_settings = {
            "devid": {
                'gui': widget_tree.get_object("comboboxDevices"),
                'stores': {
                    'loading': widget_tree.get_object("liststoreLoading"),
                    'loaded': widget_tree.get_object("liststoreDevice"),
                },
                'nb_elements': 0,
                'active_idx': -1,
            },
            "resolution": {
                'gui': widget_tree.get_object("comboboxResolution"),
                'stores': {
                    'loading': widget_tree.get_object("liststoreLoading"),
                    'loaded': widget_tree.get_object("liststoreResolution"),
                },
                'nb_elements': 0,
                'active_idx': -1,
            },
        }

        self.ocr_settings = {
            "lang": {
                'gui': widget_tree.get_object("comboboxLang"),
                'store': widget_tree.get_object("liststoreOcrLang"),
            }
        }

        self.calibration = {
            "scan_button":
            widget_tree.get_object("buttonScanCalibration"),
            "image_gui":
            widget_tree.get_object("imageCalibration"),
            "image_viewport":
            widget_tree.get_object("viewportCalibration"),
            "images": [],  # array of tuples : (resize factor, PIL image)
            "image_eventbox":
            widget_tree.get_object("eventboxCalibration"),
            "image_scrollbars":
            widget_tree.get_object("scrolledwindowCalibration"),
        }

        self.grips = CalibrationGripHandler(config, self)

        self.progressbar = widget_tree.get_object("progressbarScan")
        self.__scan_start = 0.0

        self.workers = {
            "device_finder":
            WorkerDeviceFinder(config.scanner_devid),
            "resolution_finder":
            WorkerResolutionFinder(config.scanner_resolution,
                                   config.RECOMMENDED_RESOLUTION),
            "scan":
            WorkerCalibrationScan(self.calibration['image_viewport']),
            "progress_updater":
            WorkerProgressUpdater("calibration scan", self.progressbar)
        }

        ocr_tools = pyocr.get_available_tools()
        if len(ocr_tools) <= 0:
            ocr_langs = []
        else:
            ocr_langs = ocr_tools[0].get_available_languages()
        ocr_langs = self.__get_short_to_long_langs(ocr_langs)
        ocr_langs.sort(key=lambda lang: lang[1])
        ocr_langs.insert(0, (None, _("Disable OCR")))

        self.ocr_settings['lang']['store'].clear()
        for (short_lang, long_lang) in ocr_langs:
            self.ocr_settings['lang']['store'].append([long_lang, short_lang])

        for action in [
                "apply", "cancel", "select_scanner", "scan_calibration"
        ]:
            actions[action][1].connect(actions[action][0])

        self.workers['device_finder'].connect(
            'device-finding-start',
            lambda worker: gobject.idle_add(self.__on_device_finding_start_cb))
        self.workers['device_finder'].connect(
                'device-found',
                lambda worker, user_name, store_name, active: \
                    gobject.idle_add(self.__on_value_found_cb,
                                     self.device_settings['devid'],
                                     user_name, store_name, active))
        self.workers['device_finder'].connect(
            'device-finding-end', lambda worker: gobject.idle_add(
                self.__on_finding_end_cb, self.device_settings['devid']))

        self.workers['resolution_finder'].connect(
            'resolution-finding-start',
            lambda worker: gobject.idle_add(self.__on_finding_start_cb, self.
                                            device_settings['resolution']))
        self.workers['resolution_finder'].connect(
                'resolution-found',
                lambda worker, user_name, store_name, active: \
                    gobject.idle_add(self.__on_value_found_cb,
                                     self.device_settings['resolution'],
                                     user_name, store_name, active))
        self.workers['resolution_finder'].connect(
            'resolution-finding-end', lambda worker: gobject.idle_add(
                self.__on_finding_end_cb, self.device_settings['resolution']))

        self.workers['scan'].connect('calibration-scan-start',
                                     lambda worker: self.__on_scan_start())
        self.workers['scan'].connect(
            'calibration-scan-done',
            lambda worker, img: self.__on_scan_done(img))
        self.workers['scan'].connect(
            'calibration-resize-done',
            lambda worker, factor, img: self.__on_resize_done(factor, img))

        self.calibration['image_eventbox'].connect(
            "button-press-event",
            lambda x, ev: self.grips.on_mouse_button_pressed_cb(ev))
        self.calibration['image_eventbox'].connect(
            "motion-notify-event",
            lambda x, ev: self.grips.on_mouse_motion(ev))
        self.calibration['image_eventbox'].connect(
            "button-release-event",
            lambda x, ev: self.grips.on_mouse_button_released_cb(ev))
        self.calibration['image_eventbox'].add_events(
            gtk.gdk.POINTER_MOTION_MASK)

        self.window.connect("destroy", self.__on_destroy)

        self.display_config(config)

        self.window.set_visible(True)

        # Must be connected after the window has been displayed.
        # Otherwise, if "disable OCR" is already selected in the config
        # it will display a warning popup even before the dialog has been
        # displayed
        self.ocr_settings['lang']['gui'].connect("changed",
                                                 self.__on_ocr_lang_changed)

        self.workers['device_finder'].start()