Esempio n. 1
0
    def __test_txt(self, image_file, expected_box_file, lang='eng'):
        image_file = "tests/data/" + image_file
        expected_box_file = "tests/tesseract/" + expected_box_file

        with codecs.open(expected_box_file, 'r', encoding='utf-8') \
                as file_descriptor:
            expected_boxes = self.builder.read_file(file_descriptor)
        expected_boxes.sort()

        boxes = tesseract.image_to_string(Image.open(image_file),
                                          lang=lang,
                                          builder=self.builder)
        boxes.sort()

        self.assertTrue(len(boxes) > 0)
        self.assertEqual(len(boxes), len(expected_boxes))

        for i in range(0, min(len(boxes), len(expected_boxes))):
            try:
                # python 2.7
                self.assertEqual(type(expected_boxes[i].content), unicode)
                self.assertEqual(type(boxes[i].content), unicode)
            except NameError:
                # python 3
                self.assertEqual(type(expected_boxes[i].content), str)
                self.assertEqual(type(boxes[i].content), str)
            self.assertEqual(boxes[i], expected_boxes[i])
Esempio n. 2
0
    def __test_txt(self, image_file, expected_box_file, lang='eng'):
        image_file = "tests/data/" + image_file
        expected_box_file = "tests/tesseract/" + expected_box_file

        with codecs.open(expected_box_file, 'r', encoding='utf-8') \
                as file_descriptor:
            expected_boxes = self.builder.read_file(file_descriptor)
        expected_boxes.sort()

        boxes = tesseract.image_to_string(Image.open(image_file), lang=lang,
                                          builder=self.builder)
        boxes.sort()

        self.assertTrue(len(boxes) > 0)
        self.assertEqual(len(boxes), len(expected_boxes))

        for i in range(0, min(len(boxes), len(expected_boxes))):
            try:
                # python 2.7
                self.assertEqual(type(expected_boxes[i].content), unicode)
                self.assertEqual(type(boxes[i].content), unicode)
            except NameError:
                # python 3
                self.assertEqual(type(expected_boxes[i].content), str)
                self.assertEqual(type(boxes[i].content), str)
            self.assertEqual(boxes[i], expected_boxes[i])
Esempio n. 3
0
def extractText(pPath):
    lImage = Image.open(pPath) 
    lResult = tesseract.image_to_string(lImage, lang="deu")
    print (lResult)
    if (lResult != None):
    	return 1, pPath
    else:
    	return 0, pPath
Esempio n. 4
0
def recognition():
    from os import chdir, environ
    from tesseract import image_to_string
    from Image import open
    path = environ.get("HOME")
    im = open("blob.jpg")
    text = image_to_string(im)
    chdir(path + "/alpr/latest/")
    return text
Esempio n. 5
0
 def get_authcode(self,url):
     self.headers['Host'] = 'authcode.jd.com'
     self.headers['Referer'] = 'https://passport.jd.com/uc/login'
     response = self.session.get(url, headers = self.headers)
     with open('authcode.jpg','wb') as f:
         f.write(response.content)
     # authcode = input("plz enter authcode:")
     authcode = image_to_string('authcode.jpg', False)
     return authcode
Esempio n. 6
0
def recognition():
	from os import chdir,environ
	from tesseract import image_to_string
	from Image import open
	path = environ.get("HOME")
	im = open("blob.jpg")	
	text = image_to_string(im)
	chdir(path+"/alpr/latest/")
	return text
Esempio n. 7
0
def loadFile():
    ftypes = [('Supported Image files', '*.png'), ('All files', '*')]
    fl = tkFileDialog.askopenfile(filetypes=ftypes, title='Choose a file')
    if fl != None:
        content = image_to_string(Image.open(fl))
        '''Create a Text widget'''
        txt = Text()
        txt.pack(fill=BOTH, expand=1)
        txt.insert(END, content)
        loadImage(fl)
Esempio n. 8
0
def im2string(name):

    #file_name='LITE_peg.jpeg'
    file_name = '2' + name
    text = tesseract.image_to_string(file_name, True, ' -psm 7 digits')
    #im = Image.open(file_name)

    #text = pytesseract.image_to_string(im,config='outputbase digits')
    print(text)
    return text
Esempio n. 9
0
 def post(self):
     self.set_header("Content-Type", "text/plain")
     self.write("You sent a file with name " +
                self.request.files.items()[0][1][0]['filename'])
     # make a "memory file" using StringIO, open with PIL and send to tesseract for OCR
     self.write(
         image_to_string(
             Image.open(
                 StringIO.StringIO(
                     self.request.files.items()[0][1][0]['body']))))
Esempio n. 10
0
 def get_authcode(self, url):
     '''
         验证码
     '''
     self.headers['Host'] = 'authcode.jd.com'
     self.headers['Referer'] = 'https://passport.jd.com/uc/login'
     response = self.session.get(url, headers=self.headers)
     with open('authcode.jpg', 'wb') as f:
         f.write(response.content)
     authcode = image_to_string('authcode.jpg', False)
     return authcode
Esempio n. 11
0
 def tostring(self):
     #2 fois plus grand en bicubique (il me semble que cela donne de meilleur résultat)
     self.img = self.img.resize((self.img.size[0] * 2,
     self.img.size[1] * 2), Image.BICUBIC)
     # convertit en noir et blanc (niveaux de gris)
     self.img = self.img.convert("L")
     if self.invert:
         self.invertVid()
     if self.bold:
         self.tobold()
     self.img = self.img.resize((self.img.size[0] * 2,
     self.img.size[1] * 2), Image.NEAREST)
     return image_to_string(self.img)
Esempio n. 12
0
    def __test_txt(self, image_file, expected_output_file, lang="eng"):
        image_file = "tests/data/" + image_file
        expected_output_file = "tests/tesseract/" + expected_output_file

        expected_output = ""
        with codecs.open(expected_output_file, "r", encoding="utf-8") as file_descriptor:
            for line in file_descriptor:
                expected_output += line
        expected_output = expected_output.strip()

        output = tesseract.image_to_string(Image.open(image_file), lang=lang)

        self.assertEqual(output, expected_output)
Esempio n. 13
0
    def __test_txt(self, image_file, expected_output_file, lang='eng'):
        image_file = "tests/data/" + image_file
        expected_output_file = "tests/tesseract/" + expected_output_file

        expected_output = ""
        with codecs.open(expected_output_file, 'r', encoding='utf-8') \
                as file_descriptor:
            for line in file_descriptor:
                expected_output += line
        expected_output = expected_output.strip()

        output = tesseract.image_to_string(Image.open(image_file), lang=lang)

        self.assertEqual(output, expected_output)
Esempio n. 14
0
    def __test_txt(self, image_file, expected_box_file, lang="eng"):
        image_file = "tests/data/" + image_file
        expected_box_file = "tests/tesseract/" + expected_box_file

        with codecs.open(expected_box_file, "r", encoding="utf-8") as file_descriptor:
            expected_boxes = self.builder.read_file(file_descriptor)
        expected_boxes.sort()

        boxes = tesseract.image_to_string(Image.open(image_file), lang=lang, builder=self.builder)
        boxes.sort()

        self.assertEqual(len(boxes), len(expected_boxes))

        for i in range(0, min(len(boxes), len(expected_boxes))):
            self.assertEqual(boxes[i], expected_boxes[i])
Esempio n. 15
0
    def __test_txt(self, image_file, expected_box_file, lang='eng'):
        image_file = "tests/data/" + image_file
        expected_box_file = "tests/tesseract/" + expected_box_file

        with codecs.open(expected_box_file, 'r', encoding='utf-8') \
                as file_descriptor:
            expected_boxes = self.builder.read_file(file_descriptor)
        expected_boxes.sort()

        boxes = tesseract.image_to_string(Image.open(image_file), lang=lang,
                                          builder=self.builder)
        boxes.sort()

        self.assertEqual(len(boxes), len(expected_boxes))

        for i in range(0, min(len(boxes), len(expected_boxes))):
            self.assertEqual(boxes[i], expected_boxes[i])
Esempio n. 16
0
    def post(self):
        self.set_header("Content-Type", "text/html")
	self.write("<html><body>")
        self.write("You sent a file with name " + self.request.files.items()[0][1][0]['filename'] +"<br/>" )
	
	# create a unique ID file
	tempname = str(uuid.uuid4()) + ".jpg"
	myimg = Image.open(StringIO.StringIO(self.request.files.items()[0][1][0]['body']))
	myfilename = os.path.join(os.path.dirname(__file__),"static",tempname);

	# save image to file as JPEG
	myimg.save(myfilename)

	self.write("<img src=\"static/" + tempname + "\" /><br/>") 

	# do OCR, print result
	self.write(image_to_string(myimg))
	self.write("</body></html>")
Esempio n. 17
0
def extract_pdf_text(src_pdf_file_path):
    os.environ[
        "TESSDATA_PREFIX"] = '/opt/local/share'  # this is required otherwise tesseract complains about file permissions

    with open(src_pdf_file_path, 'rb') as src_pdf_file:
        pdf_reader = PyPDF2.PdfFileReader(src_pdf_file)
        # pdfReader.numPages
        # 19
        for page_index in range(pdf_reader.numPages):
            page = pdf_reader.getPage(page_index)

            image = pdf_page_to_png(page, resolution=72)
            # extract_pdf_page_images(page)

            tmp_img_path = '/tmp/titi.png'
            cv2.imwrite(tmp_img_path, image)
            text = tesseract.image_to_string(
                Image.open(tmp_img_path))  # , lang='deu')
            print(text)
Esempio n. 18
0
    def __test_txt(self, image_file, expected_box_file, lang='eng'):
        image_file = "tests/data/" + image_file
        expected_box_file = "tests/tesseract/" + expected_box_file

        boxes = tesseract.image_to_string(Image.open(image_file), lang=lang,
                                          builder=self.builder)
        boxes.sort()

        with codecs.open(expected_box_file, 'r', encoding='utf-8') \
                as file_descriptor:
            expected_boxes = self.builder.read_file(file_descriptor)
        expected_boxes.sort()

        self.assertEqual(len(boxes), len(expected_boxes))

        for i in range(0, min(len(boxes), len(expected_boxes))):
            for j in range(0, len(boxes[i].word_boxes)):
                self.assertEqual(type(boxes[i].word_boxes[j]),
                                 type(expected_boxes[i].word_boxes[j]))
            self.assertEqual(boxes[i], expected_boxes[i])
Esempio n. 19
0
    def test_write_read(self):
        original_boxes = tesseract.image_to_string(Image.open("tests/data/test.png"), builder=self.builder)
        self.assertTrue(len(original_boxes) > 0)

        (file_descriptor, tmp_path) = tempfile.mkstemp()
        try:
            # we must open the file with codecs.open() for utf-8 support
            os.close(file_descriptor)

            with codecs.open(tmp_path, "w", encoding="utf-8") as file_descriptor:
                self.builder.write_file(file_descriptor, original_boxes)

            with codecs.open(tmp_path, "r", encoding="utf-8") as file_descriptor:
                new_boxes = self.builder.read_file(file_descriptor)

            self.assertEqual(len(new_boxes), len(original_boxes))
            for i in range(0, len(original_boxes)):
                self.assertEqual(new_boxes[i], original_boxes[i])
        finally:
            os.remove(tmp_path)
Esempio n. 20
0
def main():
    q = 0
    k = True
    j = 0
    while k:
        #k = False
        #image_org = grab_screen(region=(0,30,1250,750))
        image_org = cv2.imread(file_paths[q])
        j = j + 0.5
        lower = np.array([55, 100, 100])
        upper = np.array([90, 255, 255])
        #image_org = cv2.bilateralFilter(image_org,9,75,75)
        speed_img = image_org[610:700, 1090:1200]
        image_org = cv2.medianBlur(image_org, 5)
        image = cv2.cvtColor(image_org, cv2.COLOR_BGR2HSV)
        #speed_img =
        print(image_to_string(speed_img))
        #image_org = cv2.GaussianBlur(image_org, (3, 3), 0)
        #back = cv2.imread(bd)
        #back = cv2.resize(back, (image.shape[0], image.shape[1]))
        #crop_img = img[y:y+h, x:x+w]
        #colour_extract = image_org[500:750, 200:950]
        #image_org = cv2.GaussianBlur(image_org, (5, 5), 0)
        #colour_extract = cv2.cvtColor(colour_extract, cv2.COLOR_BGR2HSV)
        #lower, upper = colour_extracting(colour_extract)
        #print(colour_extract.shape)
        #print(lower, upper)
        #image = cv2.cvtColor(image_org, cv2.COLOR_BGR2HSV)
        #mask = cv2.inRange(image_org, lower, upper)
        #image = cv2.bitwise_and(image_org, image_org, mask= mask)
        #linesP = cv2.HoughLinesP(cv2.cvtColor(image, cv2.COLOR_BGR2GRAY), 1, np.pi / 180, 50, None, 50, 10)
        #if linesP is not None:
        #    for i in range(0, len(linesP)):
        #        l = linesP[i][0]
        #        cv2.line(image, (l[0], l[1]), (l[2], l[3]), (0,0,255), 3, cv2.LINE_AA)
        cv2.imshow("window", speed_img)
        #cv2.imshow("crop",colour_extract)
        #cv2.imshow("org", image_org)
        if cv2.waitKey(25) & 0xFF == ord('q'):
            cv2.destroyAllWindows()
            break
Esempio n. 21
0
    def test_write_read(self):
        original_boxes = tesseract.image_to_string(
            Image.open("tests/data/test.png"), builder=self.builder)
        self.assertTrue(len(original_boxes) > 0)

        (file_descriptor, tmp_path) = tempfile.mkstemp()
        try:
            # we must open the file with codecs.open() for utf-8 support
            os.close(file_descriptor)

            with codecs.open(tmp_path, 'w', encoding='utf-8') as file_descriptor:
                self.builder.write_file(file_descriptor, original_boxes)

            with codecs.open(tmp_path, 'r', encoding='utf-8') as file_descriptor:
                new_boxes = self.builder.read_file(file_descriptor)

            self.assertEqual(len(new_boxes), len(original_boxes))
            for i in range(0, len(original_boxes)):
                self.assertEqual(new_boxes[i], original_boxes[i])
        finally:
            os.remove(tmp_path)
Esempio n. 22
0
    def post(self):
        self.set_header("Content-Type", "text/html")
        self.write("<html><body>")
        self.write("You sent a file with name " +
                   self.request.files.items()[0][1][0]['filename'] + "<br/>")

        # create a unique ID file
        tempname = str(uuid.uuid4()) + ".jpg"
        myimg = Image.open(
            StringIO.StringIO(self.request.files.items()[0][1][0]['body']))
        myfilename = os.path.join(os.path.dirname(__file__), "static",
                                  tempname)

        # save image to file as JPEG
        myimg.save(myfilename)

        self.write("<img src=\"static/" + tempname + "\" /><br/>")

        # do OCR, print result
        self.write(image_to_string(myimg))
        self.write("</body></html>")
Esempio n. 23
0
File: kofa.py Progetto: gazs/kofaPro
def process_arlista(href):
  img = Image.open(StringIO.StringIO(download(href)))
  img = img.point(threshold)
  img = img.convert("1")
  csikok = arlista_darabol(img)
  arlista = []

  datum = date.strftime(date.fromtimestamp(int(re.search("\d{10}",href).group(0))), "%Y-%m-%d")
  for csik in csikok[1:]:
    sor = [datum]
    for kocka in csik:
      nagy = kocka.resize((kocka.size[0]*2, kocka.size[1]*2), Image.NEAREST)
      string = tesseract.image_to_string(nagy, lang='csapi')
      if csik.index(kocka) is 0 and string is "":
        break
      if csik.index(kocka) > 0: # ha nem az áru megnevezése...
        scrubbed = scrub(string)
        sor = sor + list(scrubbed)
      if csik.index(kocka) is 0:
        sor.append(string)
    if len(sor) > 1 and sor[2]: 
      arlista.append(sor)
  csviro = csv.writer(open('arlista.csv', 'a'), quoting=csv.QUOTE_NONE)
  csviro.writerows(arlista)
Esempio n. 24
0
def ocr(filename):
    f = open(os.path.join(UPLOAD_FOLDER, filename))
    return image_to_string(Image.open(f))
Esempio n. 25
0
    def post(self):
        self.set_header("Content-Type", "text/plain")
        self.write("You sent a file with name " + self.request.files.items()[0][1][0]['filename'] )
	# make a "memory file" using StringIO, open with PIL and send to tesseract for OCR 
	self.write(image_to_string(Image.open(StringIO.StringIO(self.request.files.items()[0][1][0]['body']))))
Esempio n. 26
0
def ocr(filename):
    f = open(os.path.join(UPLOAD_FOLDER, filename))
    return image_to_string(Image.open(f))
Esempio n. 27
0
 def to_text(self, im, boxes=False, improve=True, lang='por'):
     if improve:
         im = self.improve_image(im)
     if boxes:
         lang=None
     return tesseract.image_to_string(im, lang=lang, boxes=boxes).decode('utf-8')
Esempio n. 28
0
pixdata = img.load()

# Make the letters bolder for easier recognition

for y in xrange(img.size[1]):
    for x in xrange(img.size[0]):
       if pixdata[x, y][0] < 90:
          pixdata[x, y] = (0, 0, 0, 255)

for y in xrange(img.size[1]):
    for x in xrange(img.size[0]):
        if pixdata[x, y][1] < 136:
           pixdata[x, y] = (0, 0, 0, 255)

for y in xrange(img.size[1]):
    for x in xrange(img.size[0]):
        if pixdata[x, y][2] > 0:
           pixdata[x, y] = (255, 255, 255, 255)

img.save("input-black.gifIF")

#   Make the image bigger (needed for OCR)
im_orig = Image.open('input-black.gifig = im_orig.resize((1000, 500), Image.NEAREST)')

ext = ".tif"
big.save("input-NEARESTxt")

#   Perform OCR using tesseract-ocr library
from tesseract import image_to_string
image = Image.open(input-NEAREST.tifrint image_to_string(image))
Esempio n. 29
0
import Image
from tesseract import image_to_string

print(image_to_string(Image.open('E:\\aa.png')))
print(image_to_string(Image.open('E:\\aa.png'), lang='eng'))
import numpy as np
import argparse
import cv2
import tesseract

t2 = cv2.imread('cir2.png')
thresh = cv2.cvtColor(t2, cv2.COLOR_BGR2GRAY)
thresh = cv2.adaptiveThreshold(thresh, 255, cv2.ADAPTIVE_THRESH_MEAN_C,
                               cv2.THRESH_BINARY_INV, 91, 1)

print(tesseract.image_to_string(thresh))

# count, labels, stats, centroids = cv2.connectedComponentsWithStats(thresh)
# for i in range(1,count):
#     t2 = cv2.circle(t2, (int(centroids[i,0]), int(centroids[i,1])), 5, (0, 255, 0, 0), 5)

cv2.imshow('circles', thresh)
cv2.imshow('centers', t2)
cv2.waitKey()
Esempio n. 31
0
def GetImgValue(url, HostPage, UserAgent, cookies, split=None):

	headers = {}
	headers['Accept'] = 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8'
	headers['Connection'] = 'keep-alive'
	headers['Host'] = url.split('/')[2]
	headers['Referer'] = HostPage
	headers['User-Agent'] = UserAgent

	imgData = requests.get(url, headers=headers, cookies=cookies).content
	im = cStringIO.StringIO(imgData)
	img = Image.open(im)
	img = img.convert("RGBA")

	pixdata = img.load()

	enh = ImageEnhance.Contrast(img)
	enh.enhance(1.3).show("30% more contrast")

	#  Make the letters bolder for easier recognition

	for y in xrange(img.size[1]):
		for x in xrange(img.size[0]):
			if pixdata[x, y][0] < 90:
				pixdata[x, y] = (0, 0, 0, 255)

	for y in xrange(img.size[1]):
		for x in xrange(img.size[0]):
			if pixdata[x, y][1] < 136:
				pixdata[x, y] = (0, 0, 0, 255)

	for y in xrange(img.size[1]):
		for x in xrange(img.size[0]):
			if pixdata[x, y][2] > 0:
				pixdata[x, y] = (255, 255, 255, 255)


	if split == 'LR' or split == 'TB':
		img1 = Crop(img=img, split=split, half=1)
		img2 = Crop(img=img, split=split, half=2)
		img1.save("input-black1.gif", "GIF")
		img2.save("input-black2.gif", "GIF")

		#  Make the image 1 bigger (needed for OCR)
		im_orig = Image.open('input-black1.gif')
		big1 = im_orig.resize((1000, 500), Image.NEAREST)

		ext = ".tif"
		big1.save("input-NEAREST1" + ext, "TIFF")
		image1 = Image.open('input-NEAREST1.tif')

		#  Make the image 2 bigger (needed for OCR)
		im_orig = Image.open('input-black2.gif')
		big2 = im_orig.resize((1000, 500), Image.NEAREST)

		ext = ".tif"
		big2.save("input-NEAREST2" + ext, "TIFF")
		image2 = Image.open('input-NEAREST2.tif')

		recaptcha1 = image_to_string(image1)
		recaptcha2 = image_to_string(image2)
		recaptcha = recaptcha1 + " " + recaptcha2
	else:
		if split == 'TOP':
			img = Crop(img=img, split=split)

		img.save("input-black.gif", "GIF")

		#  Make the image bigger (needed for OCR)
		im_orig = Image.open('input-black.gif')
		big = im_orig.resize((1000, 500), Image.NEAREST)

		ext = ".tif"
		big.save("input-NEAREST" + ext, "TIFF")
		image = Image.open('input-NEAREST.tif')

		recaptcha = image_to_string(image)

	return recaptcha
Esempio n. 32
0
import Image
from tesseract import image_to_string
pytesseract.pytesseract.tesseract_cmd = r"C:\Program Files\Tesseract-OCR\tesseract.exe"
print image_to_string(Image.open('scan2.png'))
Esempio n. 33
0
def parse_txt(img_path, lang=None):
    # TODO: validate img_path
    return image_to_string(Image.open(img_path), lang=lang)
Esempio n. 34
0
'''import cv2
import numpy as np
import pyzbar.pyzbar as pyzbar

image = cv2.imread("bca.jpg")


decodedObjects = pyzbar.decode(image)
for obj in decodedObjects:
    print("obj", obj)
    print("Type:", obj.type)
    print("Data: ", obj.data, "\n")

cv2.imshow("Frame", image)
cv2.waitKey(0)
'''
from pillow import Image
from tesseract import image_to_string
print( image_to_string(Image.open('IMG_20200116_151537__01.jpg')) )
print( image_to_string(Image.open('IMG_20200116_151537__01.jpg'), lang='eng') )
Esempio n. 35
0
def find(image_path):
    print(image_to_string(Image.open(image_path)))
    print(image_to_string(Image.open(image_path), lang='eng'))
Esempio n. 36
0
# Make the letters bolder for easier recognition

for y in xrange(img.size[1]):
    for x in xrange(img.size[0]):
        if pixdata[x, y][0] < 90:
            pixdata[x, y] = (0, 0, 0, 255)

for y in xrange(img.size[1]):
    for x in xrange(img.size[0]):
        if pixdata[x, y][1] < 136:
            pixdata[x, y] = (0, 0, 0, 255)

for y in xrange(img.size[1]):
    for x in xrange(img.size[0]):
        if pixdata[x, y][2] > 0:
            pixdata[x, y] = (255, 255, 255, 255)

img.save("input-black.gifIF")

#   Make the image bigger (needed for OCR)
im_orig = Image.open('input-black.gif')
big = im_orig.resize((1000, 500), Image.NEAREST)

ext = ".tif"
big.save("input-NEAREST" + ext)

#   Perform OCR using tesseract-ocr library
from tesseract import image_to_string
image = Image.open('input-NEAREST.tif')
print(image_to_string(image))
Esempio n. 37
0
from PIL import Image
from tesseract import image_to_string

print(image_to_string(Image.open('pic.jpg')))
print(image_to_string(Image.open('test-english.jpg'), lang='eng'))
Esempio n. 38
0
import Image
from tesseract import image_to_string

print image_to_string(Image.open('test.png'))
print image_to_string(Image.open('test-english.jpg'), lang='eng')
Esempio n. 39
0
from PIL import Image
from tesseract import image_to_string
print(image_to_string(Image.open("114.jpg")))
Esempio n. 40
0
import pytesseract as pyt
import tesseract as tst
from PIL import Image
img = Image.open('edited.jpg')
#print(image_to_string(Image.open('edited.jpg')))

print(tst.image_to_string(img))
Esempio n. 41
0
		pixdata[x, y] = (255, 255, 255, 255)

for y in xrange(img.size[1]):
 for x in xrange(img.size[0]):
  if pixdata[x, y][0] < 90:
   pixdata[x, y] = (0, 0, 0, 255)

for y in xrange(img.size[1]):
 for x in xrange(img.size[0]):
  if pixdata[x, y][1] < 136:
   pixdata[x, y] = (0, 0, 0, 255)

for y in xrange(img.size[1]):
 for x in xrange(img.size[0]):
  if pixdata[x, y][2] > 0:
   pixdata[x, y] = (255, 255, 255, 255)

img.save("input-black.gif", "GIF")

#   Make the image bigger (needed for OCR)
im_orig = Image.open('input-black.gif')
big = im_orig.resize((1000, 500), Image.NEAREST)

ext = ".tif"
big.save("input-NEAREST" + ext)

#   Perform OCR using tesseract-ocr library
from tesseract import image_to_string
image = Image.open('input-NEAREST.tif')
print image_to_string(image)