-
Notifications
You must be signed in to change notification settings - Fork 0
/
testing.py
executable file
·105 lines (92 loc) · 2.95 KB
/
testing.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
#!/usr/bin/env python
'''
COPYRIGHT:
Python-tesseract is released under the GPL v3.
Copyright (c) Samuel Hoffstaetter, 2009
http://wiki.github.com/hoffstaetter/python-tesseract
'''
tesseract_cmd = 'tesseract'
import Image
import StringIO
import subprocess
import sys
import os
__all__ = ['image_to_string']
def run_tesseract(input_filename, output_filename_base, lang=None, boxes=False):
command = [tesseract_cmd, input_filename, output_filename_base]
if lang is not None:
command += ['-l', lang]
if boxes:
command += ['batch.nochop', 'makebox']
proc = subprocess.Popen(command,
stderr=subprocess.PIPE)
return (proc.wait(), proc.stderr.read())
def cleanup(filename):
try:
os.remove(filename)
except OSError:
pass
def get_errors(error_string):
lines = error_string.splitlines()
error_lines = tuple(line for line in lines if line.find('Error') >= 0)
if len(error_lines) > 0:
return '\n'.join(error_lines)
else:
return error_string.strip()
def tempnam():
stderr = sys.stderr
try:
sys.stderr = StringIO.StringIO()
return os.tempnam(None, 'tess_')
finally:
sys.stderr = stderr
class TesseractError(Exception):
def __init__(self, status, message):
self.status = status
self.message = message
self.args = (status, message)
def image_to_string(image, lang=None, boxes=False):
input_file_name = '%s.bmp' % tempnam()
output_file_name_base = tempnam()
if not boxes:
output_file_name = '%s.txt' % output_file_name_base
else:
output_file_name = '%s.box' % output_file_name_base
try:
image.save(input_file_name)
status, error_string = run_tesseract(input_file_name,
output_file_name_base,
lang=lang,
boxes=boxes)
if status:
errors = get_errors(error_string)
raise TesseractError(status, errors)
f = file(output_file_name)
try:
return f.read().strip()
finally:
f.close()
finally:
cleanup(input_file_name)
cleanup(output_file_name)
if __name__ == '__main__':
if len(sys.argv) == 2:
filename = sys.argv[1]
try:
image = Image.open(filename)
except IOError:
sys.stderr.write('ERROR: Could not open file "%s"\n' % filename)
exit(1)
print image_to_string(image)
elif len(sys.argv) == 4 and sys.argv[1] == '-l':
lang = sys.argv[2]
filename = sys.argv[3]
try:
image = Image.open(filename)
except IOError:
sys.stderr.write('ERROR: Could not open file "%s"\n' % filename)
exit(1)
print image_to_string(image, lang=lang)
else:
sys.stderr.write('Usage: python tesseract.py [-l language] input_file\n')
exit(2)