def ocr(): import cv2.cv as cv api = tesseract.TessBaseAPI() api.Init(".", "eng", tesseract.OEM_DEFAULT) api.SetPageSegMode(tesseract.PSM_AUTO) image = cv.LoadImage("eurotext.jpg", cv.CV_LOAD_IMAGE_GRAYSCALE) tesseract.SetCvImage(image, api) text = api.GetUTF8Text() conf = api.MeanTextConf() print(text, len(text)) print("Cofidence Level: %d %%" % conf) print("Confidences of All words") header("Method 1", "*" * 10) confOfText = api.AllWordConfidences() print(confOfText) print("Number of Words:") print("counted by tesseract: %d" % len(confOfText)) print("counted by me: %d[%d]" % (countWords(text), countWords2(text))) if len(confOfText) != countWords(text): print("Why the words counted by tesseract are different from mine!!!!") header("Method 2", "*" * 10) confs = tesseract.AllWordConfidences(api) print(confs, len(confs))
def ocr_receipt(window,image): """string_int=('tesseract ' + 'TempBin' + file_name + ' ' + RECEIPT_PATH + file_name[:-4] + '_int1') string_int2=('tesseract ' + 'TempBin2' + file_name + ' ' + RECEIPT_PATH + file_name[:-4] + '_int2') string_final = ('tesseract ' + 'Final.Bin' + file_name + ' ' + RECEIPT_PATH + file_name[:-4] + '_final') temp = subprocess.call(string_int,shell=True) temp = subprocess.call(string_int2,shell=True) temp = subprocess.call(string_final,shell=True) os.system(string_final) print string_final""" api = tesseract.TessBaseAPI() api.Init(".", "eng", tesseract.OEM_DEFAULT) api.SetVariable("tessedit_char_whitelist", "0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ=-,.\/:") api.SetPageSegMode(tesseract.PSM_AUTO) img_file = "test.png" cv2.imwrite(img_file,image) mbuffer=open(img_file,"rb").read() result = tesseract.ProcessPagesBuffer(mbuffer,len(mbuffer),api) #tesseract.SetCvImage(ocr_img,api) #text=api.GetUTF8Text() conf=api.MeanTextConf() conf2=api.AllWordConfidences() print('Result: ') print(result)
def leerPlaca(self, binarizado): """ Args: img: imagen que contiene el texto, obtenida con la funcion cv2.imread. Returns: El string del texto en la imagen. """ cvmat_image = cv.fromarray(binarizado) imgbincv = cv.GetImage(cvmat_image) # GetImage fuente de un error, revisar # Configuración del tesseract api = tesseract.TessBaseAPI() api.Init(".", "eng", tesseract.OEM_DEFAULT) api.SetVariable('tessedit_char_whitelist', '0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ') # PSM_SINGLE_CHAR para lectura caracter a caracter api.SetPageSegMode(tesseract.PSM_SINGLE_CHAR) # Enviando imagen binarizada al tesseract tesseract.SetCvImage(imgbincv, api) text = api.GetUTF8Text() text = re.sub(r'\W+', '', text) return text
def ocr(self): if self.last_image is not None: image = self.last_image # Add border to keep the characters off the edges offset = 20 height, width, channel = image.shape image = cv2.copyMakeBorder(image, offset, offset, offset, offset, cv2.BORDER_CONSTANT, value=(255, 255, 255)) # Init and configure tesseract api api = tesseract.TessBaseAPI() api.Init(".", "eng", tesseract.OEM_DEFAULT) api.SetPageSegMode(tesseract.PSM_AUTO) api.SetVariable("tessedit_char_whitelist", self.white_list) # Convert to cv image to to pass to tess api # Derived from example code here: http://blog.mimvp.com/2015/11/python-ocr-recognition/ height, width, channel = image.shape iplimage = cv.CreateImageHeader((width, height), cv.IPL_DEPTH_8U, channel) cv.SetData(iplimage, image.tostring(), image.dtype.itemsize * channel * (width)) tesseract.SetCvImage(iplimage, api) api.Recognize(None) ri = api.GetIterator() level = tesseract.RIL_WORD if (ri): word = ri.GetUTF8Text(level) return word
def house_add_votes_from_image(self, vote_file, vote): # Extract the image. with cd('/tmp'): sh.pdfimages(vote_file, vote_file) # Convert it to .png image_file = vote_file + '-000.pbm' with open(image_file, 'rb') as f: data = f.read() api = tesseract.TessBaseAPI() api.Init(".", "eng", tesseract.OEM_DEFAULT) api.SetPageSegMode(tesseract.PSM_SINGLE_BLOCK) whitelist = ("abcdefghijklmnopqrstuvwxyz',-.*" "ABCDEFGHIJKLMNOPQRSTUVWXYZ ") api.SetVariable("tessedit_char_whitelist", whitelist) text = tesseract.ProcessPagesBuffer(data, len(data), api) # Parse the text into a tree. tree = with_image.Rollcall.parse(with_image.Lexer(text)) # Visit the tree and add rollcall votes to the vote object. visitor = with_image.VoteVisitor(vote).visit(tree) os.remove(image_file)
def __init__(self): self.Corr = cv.CV_TM_CCORR_NORMED self.MinCorr = 0.97 path = os.path.join(os.path.dirname(__file__), 'templates') self.SuitTemplates = dict() for suit in ['h', 'd', 'c', 's']: template = cv2.imread(path + '/suits/' + suit + '.png') if template == None: raise Exception('Template for suit %s not found\n' % suit) self.SuitTemplates[suit] = template self.NumTemplates = dict() for num in xrange(1, 13 + 1): template = cv2.imread(path + '/numbers/%d.png' % num) if template == None: raise Exception('Template for number %d not found\n' % num) self.NumTemplates[num] = template # tesseract if True: self.ocr_api = tesseract.TessBaseAPI() self.ocr_api.Init(".", "eng", tesseract.OEM_DEFAULT) self.ocr_api.SetPageSegMode(tesseract.PSM_AUTO)
def ocr(): api = tesseract.TessBaseAPI() api.Init(".", "eng", tesseract.OEM_DEFAULT) api.SetPageSegMode(tesseract.PSM_AUTO) mImgFile = "eurotext.jpg" pixImage = tesseract.pixRead(mImgFile) api.SetImage(pixImage) text = api.GetUTF8Text() conf = api.MeanTextConf() print(text, len(text)) print("Cofidence Level: %d %%" % conf) print("Confidences of All words") header("Method 1", "*" * 10) confOfText = api.AllWordConfidences() print(confOfText) print("Number of Words:") print("counted by tesseract: %d" % len(confOfText)) print("counted by me: %d[%d]" % (countWords(text), countWords2(text))) if len(confOfText) != countWords(text): print("Why the words counted by tesseract are different from mine!!!!") header("Method 2", "*" * 10) confs = tesseract.AllWordConfidences(api) print(confs, len(confs))
def __init__(self): """""" super(Application, self).__init__() self.file_roots_and_names = [] self.potential_problem_files = [] self.searchable_files = [] self.tesseract_api = tesseract.TessBaseAPI() self.tesseract_api.Init(".", "eng", tesseract.OEM_DEFAULT) self.tesseract_api.SetPageSegMode(tesseract.PSM_AUTO)
def tesseract_cv_ocr(imgname): image = cv.LoadImage(imgname) api = tesseract.TessBaseAPI() api.Init(".", "eng", tesseract.OEM_DEFAULT) #api.SetPageSegMode(tesseract.PSM_SINGLE_WORD) api.SetPageSegMode(tesseract.PSM_AUTO) tesseract.SetCvImage(image, api) text = api.GetUTF8Text() conf = api.MeanTextConf() return text
def __init__(self): ''' 初始化tess ''' self.tessApi = tesseract.TessBaseAPI() self.tessApi.Init(config.TESSDATA_PREFIX, config.TESSDATA_LANG, tesseract.OEM_DEFAULT) self.tessApi.SetVariable("tessedit_char_whitelist", config.TESSDATA_UNICHARSET) self.tessApi.SetPageSegMode(tesseract.PSM_SINGLE_LINE)
def ocr_number(img): api = tesseract.TessBaseAPI() api.Init("C:\Program Files (x86)\Tesseract-OCR", "eng", tesseract.OEM_DEFAULT) api.SetVariable("tessedit_char_whitelist", "0123456789+-") api.SetPageSegMode(tesseract.PSM_SINGLE_LINE) cv_im = convert_to_IplImage(img) tesseract.SetCvImage(cv_im, api) return api.GetUTF8Text().split("\n")[0] # OCR結果から余計な改行を取り除く
def auto_search(): # click search button genymotion_session.console.mouse.put_mouse_event_absolute(660,290,0,0,0) genymotion_session.console.mouse.put_mouse_event_absolute(660,290,0,0,1) genymotion_session.console.mouse.put_mouse_event_absolute(660,290,0,0,0) sleep(10) # processing subprocess.call("adb shell screencap -p /sdcard/screen.png", shell=True) subprocess.call("adb pull /sdcard/screen.png /tmp/screen.png", shell=True) im = Image.open("/tmp/screen.png") #box = (60, 80, 165, 180) #box = (53, 72, 140, 165) box = (57, 75, 140, 138) loot = im.crop(box).convert('L') loot = ImageOps.invert(loot) loot.save("/tmp/loot.png", "png") api = tesseract.TessBaseAPI() api.Init("/home/mrtux/app/bin/", "coc",tesseract.OEM_DEFAULT) api.SetVariable("tessedit_char_whitelist", "0123456789") api.SetPageSegMode(tesseract.PSM_AUTO) image = cv.LoadImage("/tmp/loot.png", cv.CV_LOAD_IMAGE_UNCHANGED) tesseract.SetCvImage(image,api) text = api.GetUTF8Text() conf = api.MeanTextConf() total_loot = text.splitlines() gold_loot, elixir_loot = total_loot[0:2] gold_loot_text_element = gold_loot.split(" ") elixir_loot_text_element = elixir_loot.split(" ") for i in range(len(gold_loot_text_element)): if len(gold_loot_text_element[i]) > 3: gold_loot_text_element[i] = gold_loot_text_element[i][1:] for i in range(len(elixir_loot_text_element)): if len(elixir_loot_text_element[i]) > 3: elixir_loot_text_element[i] = elixir_loot_text_element[i][1:] gold_expr = gold_loot.find(" ") == 3 and int(gold_loot_text_element[0]) >= 200 elixir_expr = elixir_loot.find(" ") == 3 and int(elixir_loot_text_element[0]) >= 200 print gold_loot print gold_loot_text_element print elixir_loot print elixir_loot_text_element if gold_expr and elixir_expr: subprocess.call("mplayer /home/mrtux/app/bin/gun.mp3", shell=True) api.End() return True return False
def recognize(image): api = tesseract.TessBaseAPI() api.Init(".", "eng", tesseract.OEM_DEFAULT) api.SetPageSegMode(tesseract.PSM_SINGLE_BLOCK) tesseract.SetCvImage(image, api) text = api.GetUTF8Text() conf = api.MeanTextConf() image = None print text print "Confidence: " + str(conf) return text, conf
def ocr(self, image): """Return OCR result.""" api = tesseract.TessBaseAPI() api.Init(".", "big", tesseract.OEM_DEFAULT) api.SetPageSegMode(tesseract.PSM_SINGLE_BLOCK) h, w = image.shape w_step = w * image.dtype.itemsize iplimage = cv.CreateImageHeader((w, h), cv.IPL_DEPTH_8U, 1) cv.SetData(iplimage, image.tostring(), image.dtype.itemsize * (w)) tesseract.SetCvImage(iplimage, api) hocr = api.GetHOCRText(0) return hocr
def detect(self): api = tesseract.TessBaseAPI() api.Init(".", "eng", tesseract.OEM_DEFAULT) api.SetVariable("tessedit_char_whitelist", "0123456789 ") api.SetPageSegMode(tesseract.PSM_AUTO) f = open(self.tempFile, "rb") mBuffer = f.read() f.close() self.result = tesseract.ProcessPagesBuffer(mBuffer, len(mBuffer), api) return self
def recognize(mImgFile): api = tesseract.TessBaseAPI() api.Init(".", "eng", tesseract.OEM_DEFAULT) eng = "0123456789abcdefghijklmnopqrstuvwxyz." digit = "0123456789" api.SetVariable("tessedit_char_whitelist", digit) api.SetPageSegMode(tesseract.PSM_AUTO) mBuffer = open(mImgFile, "rb").read() result = tesseract.ProcessPagesBuffer(mBuffer, len(mBuffer), api) print "result(ProcessPagesBuffer)=", result api.End()
def getCaptcha(): grayScale = cv.LoadImage( 'https://my.ebharatgas.com/bharatgas/MyCaptcha.png', cv.CV_LOAD_IMAGE_GRAYSCALE) cv.Threshold(gray, gray, 231, 255, cv.CV_THRESH_BINARY) api = tesseract.TessBaseAPI() api.Init(".", "eng", tesseract.OEM_DEFAULT) api.SetVariable("tessedit_char_whitelist", "0123456789abcdefghijklmnopqrstuvwxyz") api.SetPageSegMode(tesseract.PSM_SINGLE_WORD) tesseract.SetCvImage(gray, api) print(api.GetUTF8Text())
def updateOCR(frame): api = tesseract.TessBaseAPI() api.SetVariable("tessedit_char_whitelist", WHITELIST) api.Init("C:\Program Files (x86)\Tesseract-OCR", "eng", tesseract.OEM_DEFAULT) api.SetPageSegMode(tesseract.PSM_AUTO) bitmap = cv2.cv.CreateImageHeader((frame.shape[1], frame.shape[0]), cv2.IPL_DEPTH_8U, 3) cv2.cv.SetData(bitmap, frame.tostring(), frame.dtype.itemsize * 3 * frame.shape[1]) tesseract.SetCvImage(bitmap, api) text = api.GetUTF8Text() confidence = api.MeanTextConf() charMatrix = api.GetBoxText(0).split('\n') return text, confidence, charMatrix
def ocr(self, image, area, factor): api = tesseract.TessBaseAPI() api.Init(".", "big", tesseract.OEM_DEFAULT) api.SetPageSegMode(tesseract.PSM_SINGLE_BLOCK) h, w = image.shape w_step = w * image.dtype.itemsize iplimage = cv.CreateImageHeader((w, h), cv.IPL_DEPTH_8U, 1) cv.SetData(iplimage, image.tostring(), image.dtype.itemsize * (w)) tesseract.SetCvImage(iplimage, api) api.SetRectangle(int(area[0][0] * factor), int(area[0][1] * factor), int(area[1][0] * factor) - int(area[0][0] * factor), int(area[1][1] * factor) - int(area[0][1] * factor)) res = self.hocrToObject(api.GetHOCRText(0), area, factor) return res
def img2text_method2(image_path): print "Using method 2 to generate text from image..." api = tesseract.TessBaseAPI() api.SetOutputName("outputName") api.Init(".", "eng", tesseract.OEM_DEFAULT) api.SetPageSegMode(tesseract.PSM_AUTO) image = cv.LoadImage(image_path, cv.CV_LOAD_IMAGE_GRAYSCALE) tesseract.SetCvImage(image, api) text = api.GetUTF8Text() word_list = text.split() print word_list return word_list
def convert_file(filename): #path = str(app.config['UPLOAD_FOLDER']+filename) path = str('/home/engineer/htdocs/stop/webapi/uploads/' + filename).lower() try: image = cv.LoadImage(path, cv.CV_LOAD_IMAGE_GRAYSCALE) except Exception as e: return str("Error ") + str(e) api = tesseract.TessBaseAPI() api.Init(".", "eng", tesseract.OEM_DEFAULT) api.SetPageSegMode(tesseract.PSM_AUTO) tesseract.SetCvImage(image, api) text = api.GetUTF8Text() conf = api.MeanTextConf() return jsonify({'output': str(text)})
def tesseract_ocr(imgname, type='PagesWrapper'): api = tesseract.TessBaseAPI() api.SetOutputName("outputName") api.Init(".", "eng", tesseract.OEM_DEFAULT) api.SetPageSegMode(tesseract.PSM_AUTO) if type == 'PagesWrapper': result = tesseract.ProcessPagesWrapper(imgname, api) elif type == 'PagesFileStream': result = tesseract.ProcessPagesFileStream(mImgFile, api) elif type == 'PagesRaw': result = tesseract.ProcessPagesRaw(mImgFile, api) elif type == 'PagesBuffer': mBuffer = open(imgname).read() result = tesseract.ProcessPagesBuffer(mBuffer, len(mBuffer), api) return result
def ocr_udemae(img): img = binarize(img) img = erode(img) # cv2.imshow('ocr_udemae', img) # cv2.waitKey(500) api = tesseract.TessBaseAPI() api.Init("C:\Program Files (x86)\Tesseract-OCR", "eng", tesseract.OEM_DEFAULT) api.SetVariable("tessedit_char_whitelist", "SABC+-") api.SetPageSegMode(tesseract.PSM_SINGLE_LINE) cv_im = convert_to_IplImage(img) tesseract.SetCvImage(cv_im, api) return api.GetUTF8Text().split("\n")[0] # OCR結果から余計な改行を取り除く
def ocr_img(self, imgname): """ To use a non-standard language pack named foo.traineddata, set the TESSDATA_PREFIX environment variable so the file can be found at TESSDATA_PREFIX/tessdata/foo.traineddata and give Tesseract the argument -l foo. """ image = cv.LoadImage(imgname, cv.CV_LOAD_IMAGE_GRAYSCALE) api = tesseract.TessBaseAPI() api.Init(".", "eng", tesseract.OEM_DEFAULT) api.SetPageSegMode(tesseract.PSM_AUTO) tesseract.SetCvImage(image, api) text = api.GetUTF8Text() conf = api.MeanTextConf() return text
def __init__(self): api = tesseract.TessBaseAPI() # For some reason, passing the path to Init() was not working, # but setting the environment variable seems to be a workaround. this_directory = os.path.dirname(os.path.realpath(__file__)) os.putenv("TESSDATA_PREFIX", this_directory) api.Init(".", "ibge", tesseract.OEM_DEFAULT) api.SetPageSegMode(tesseract.PSM_AUTO) api.SetVariable("tessedit_char_whitelist", "-0123456789°'\"") api.SetVariable("chs_leading_punct", "-") api.SetVariable("numeric_punctuation", "-°'\"") self._api = api
def read_text(img): """ **SUMMARY** This function will return any text it can find using OCR on the image. Please note that it does not handle rotation well, so if you need it in your application try to rotate and/or crop the area so that the text would be the same way a document is read **RETURNS** A String **EXAMPLE** >>> img = Imgae("somethingwithtext.png") >>> text = img.read_text() >>> print text **NOTE** If you're having run-time problems I feel bad for your son, I've got 99 problems but dependencies ain't one: http://code.google.com/p/tesseract-ocr/ http://code.google.com/p/python-tesseract/ """ if not OCR_ENABLED: return "Please install the correct OCR library required - " \ "http://code.google.com/p/tesseract-ocr/ " \ "http://code.google.com/p/python-tesseract/" api = tesseract.TessBaseAPI() api.SetOutputName("outputName") api.Init(".", "eng", tesseract.OEM_DEFAULT) api.SetPageSegMode(tesseract.PSM_AUTO) jpgdata = StringIO() convert.to_pil_image(img).save(jpgdata, "jpeg") jpgdata.seek(0) stringbuffer = jpgdata.read() result = tesseract.ProcessPagesBuffer(stringbuffer, len(stringbuffer), api) return result
def ocr_sxx(filename): #####Image preprocess##### im = cv2.imread(filename) #convert to grayscale and apply Gaussian filtering im_gray = cv2.cvtColor(im, cv2.COLOR_BGR2GRAY) #cv2.imshow('result1',im_gray) # Threshold the image ret, im_th = cv2.threshold(im_gray, 70, 255, cv2.THRESH_BINARY) #cv2.imshow('result2',im_th) #remove noise im_th = cv2.medianBlur(im_th, 3) #check result #cv2.imshow('result',im_th) #save the image in local computer cv2.imwrite('result.png', im_th) cv2.waitKey(0) # Waits forever for user to press any key cv2.destroyAllWindows() #########----OCR Part----######## image = cv.LoadImage("result.png", cv.CV_LOAD_IMAGE_GRAYSCALE) directory = "tessdata/" ########choose language############ InputLan = "eng" api = tesseract.TessBaseAPI() fileName = InputLan + '.traineddata' fullPath = directory + fileName api.Init(".", InputLan, tesseract.OEM_DEFAULT) #api.SetPageSegMode(tesseract.PSM_SINGLE_WORD) api.SetPageSegMode(tesseract.PSM_AUTO) tesseract.SetCvImage(image, api) text = api.GetUTF8Text() conf = api.MeanTextConf() timage = None print len(text) #text = text.decode('utf-8') #from str to unicode print text.decode('utf-8') #print conf ########delete /n ############ line = text.replace('\n', ' ') #line = line.decode('utf-8') print line.decode('utf-8')
def ocr(self, image, area, factor): api = tesseract.TessBaseAPI() #print self.lang if self.lang == "big" or self.lang == "eng": api.Init(".","big",tesseract.OEM_DEFAULT) else: api.Init(".", str(self.lang), tesseract.OEM_DEFAULT) api.SetPageSegMode(tesseract.PSM_SINGLE_BLOCK) h,w = image.shape w_step = w*image.dtype.itemsize iplimage = cv.CreateImageHeader((w,h), cv.IPL_DEPTH_8U, 1) cv.SetData(iplimage, image.tostring(),image.dtype.itemsize * (w)) tesseract.SetCvImage(iplimage,api) ocr_x2 = int((area[1][0]-int((area[1][0]-area[0][0])*0.17))*factor) api.SetRectangle(int(area[0][0]*factor), int(area[0][1]*factor), ocr_x2-int(area[0][0]*factor), int(area[1][1]*factor)-int(area[0][1]*factor)) res = self.hocrToObject(api.GetHOCRText(0).decode('utf-8'), area, factor) return res
def guess(img_data): try: api = tesseract.TessBaseAPI() api.Init(".", "eng", tesseract.OEM_DEFAULT) api.SetVariable("tessedit_char_whitelist", "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ") im = Image.open(StringIO.StringIO(img_data)) im.load() if im.mode == 'RGBA': r, g, b, a = im.split() im = Image.merge("RGB", (r, g, b)) color_sep(im, 3) color_fil(im) contents = get_pil_string(im) result = tesseract.ProcessPagesBuffer(contents, len(contents), api) if result == None: result = '' result = result.strip().replace(' ', '') return result except Exception, e: e
def tesseract_native(fpath, lang, block=True): """ Use tesseract Python API to process `fpath` pre-segmented image """ api = tesseract.TessBaseAPI() api.Init(".", lang, tesseract.OEM_DEFAULT) if block: api.SetPageSegMode(tesseract.PSM_SINGLE_BLOCK) else: api.SetPageSegMode(tesseract.PSM_AUTO) for opt, val in tesseract_opts.items(): api.SetVariable(opt, val) img = cv.LoadImage(fpath, iscolor=False) tesseract.SetCvImage(img, api) text = api.GetUTF8Text() return text