Python TextBuilderの例

プログラミング言語: Python

名前空間/パッケージ名: builders

メソッド/関数: TextBuilder

hotexamples.comのコード掲載数: 3

Python TextBuilder - 3件のコード例が見つかりました。すべてオープンソースプロジェクトから抽出されたPythonのbuilders.TextBuilderの実例で、最も評価が高いものを厳選しています。コード例の評価を行っていただくことで、より質の高いコード例が表示されるようになります。

コード例 #1

ファイルを表示

def image_to_string(image, lang=None, builder=None):
    if builder == None:
        builder = builders.TextBuilder()

    with temp_file(builder.file_extension) as output_file:
        cmd = [CUNEIFORM_CMD]
        if lang != None:
            cmd += ["-l", lang]
        cmd += builder.cuneiform_args
        cmd += ["-o", output_file.name]
        cmd += ["-"]  # stdin

        img_data = BytesIO()
        image.save(img_data, format="png")

        proc = subprocess.Popen(cmd,
                                stdin=subprocess.PIPE,
                                stdout=subprocess.PIPE,
                                stderr=subprocess.STDOUT)
        proc.stdin.write(img_data.getvalue())
        proc.stdin.close()
        output = proc.stdout.read().decode('utf-8')
        retcode = proc.wait()
        if retcode:
            raise CuneiformError(retcode, output)
        with codecs.open(output_file.name, 'r', encoding='utf-8',
                         errors='replace') as file_desc:
            results = builder.read_file(file_desc)
        return results

コード例 #2

ファイルを表示

ファイル: tesseract.py プロジェクト: shawiz/glass-skim

def image_to_string(image, lang=None, psm=None, builder=None):
    '''
    Runs tesseract on the specified image. First, the image is written to disk,
    and then the tesseract command is run on the image. Tesseract's result is
    read, and the temporary files are erased.

    Arguments:
        image --- image to OCR
        lang --- tesseract language to use
        builder --- builder used to configure Tesseract and read its result.
            The builder is used to specify the type of output expected.
            Possible builders are TextBuilder or CharBoxBuilder. If builder ==
            None, the builder used will be TextBuilder.

    Returns:
        Depends of the specified builder. By default, it will return a simple
        string.
    '''

    if builder == None:
        builder = builders.TextBuilder()

    input_file_name = '%s.bmp' % tempnam()
    output_file_name_base = tempnam()
    output_file_name = ('%s.%s' %
                        (output_file_name_base, builder.file_extension))

    try:
        image.save(input_file_name)
        (status, errors) = run_tesseract(input_file_name,
                                         output_file_name_base,
                                         lang=lang,
                                         psm=psm,
                                         configs=builder.tesseract_configs)
        if status:
            raise TesseractError(status, errors)
        with codecs.open(output_file_name,
                         'r',
                         encoding='utf-8',
                         errors='replace') as file_desc:
            results = builder.read_file(file_desc)
        return results
    finally:
        cleanup(input_file_name)
        cleanup(output_file_name)

コード例 #3

ファイルを表示

ファイル: mytest.py プロジェクト: shawiz/glass-skim

import sys, Image
from pyocr import pyocr
sys.path = ["src"] + sys.path
import builders

tools = pyocr.get_available_tools()[:]
if len(tools) == 0:
    print "No OCR tool found"
    sys.exit(1)
print tools[0].image_to_string(Image.open('test.jpg'),
                               lang='eng',
                               psm='6',
                               builder=builders.TextBuilder())