def getverify1(name): im = Image.open('yzm/'+name) for ii in xrange(4): subname = bytes(ii)+'_'+name region = (0+13*ii,0,13*(ii+1),20) subIm = im.crop(region) subIm.save('subyzm/'+subname) imgry = subIm.convert('L') imgry.save('gyzm/g' + subname) pixel,slope,maxSlope = getPixelandSlop(imgry) setNewImg(imgry,pixel,slope,maxSlope) imgry.save('nyzm/' + subname) thr = 200#ImageStat.Stat(imgry).mean[0]*0.85 #print thr table = [] for i in range(256): if i < thr: table.append(0) else: table.append(1) out = imgry.point(table, '1') print pytesser.image_to_string(out) out.save('byzm/b' + subname) #print imgry.size print name
def crackCaptcha(file): img = Image.open(file) #convert it to black and white img = img.convert("L") #pixdata = img.load() # Clean the background noise, if color != black, then set to white. #for y in xrange(img.size[1]): # for x in xrange(img.size[0]): # if pixdata[x, y] != (255, 255, 255, 255): #by condition :if not white ,then set it to black # pixdata[x, y] = (0, 0, 0, 0) img.save("input-black.gif", "GIF") # Make the image bigger (needed for OCR) im_orig = Image.open('input-black.gif') big = im_orig.resize((116, 56), Image.NEAREST) ext = ".tif" big.save("input-NEAREST" + ext) # Perform OCR using pytesser library from pytesser import image_to_string im = Image.open('input-NEAREST.tif') text = image_to_string(im) text = text.rstrip() print text return text
def get(self): im = Image.open(self.img_file) #im = ImageEnhance.Sharpness(im).enhance(2) #im.show() #im1 = self.test(im) # #转化到灰度图 imgry = im.convert('L') self.clearNoise(im, 50, 4, 4) #保存图像 imgry.save('g' + self.img_file) #imgry.show() #二值化,采用阈值分割法,threshold为分割点 out = imgry.point(self.table_2value(), '1') out.save('b' + self.img_file) #out.show() #识别 text = image_to_string(out) #print 111 #识别对吗 text = text.strip() text = text.upper() for r in self.rep: text = text.replace(r, self.rep[r]) #print text return text
def imageToString(img): '''please input a grayscale image(using cv2.imread(<filename>,0))''' img = processImage(img) cv2.imwrite('processed.bmp', img) new_img = Image.open('processed.bmp') rawCode = pytesser.image_to_string(new_img) code = "" for letter in rawCode: #handle possible mismatches if letter >= '0' and letter <= '9': code += letter elif letter == 'z' or letter == 'Z' or letter == 'L': code += '2' elif letter == 'o' or letter == 'O' or letter == 'Q': code += '0' elif letter == 'A': code += '4' elif letter == 'S' or letter == 's' or letter == '$': code += '5' elif letter == 'g': code += '9' elif letter == '&' or letter == 'R' or letter == '%' or letter == 'a': code += '8' elif letter == '>' or letter == '?' or letter == ')': code += '7' elif letter == 'I' or letter == 'l': code += '1' elif letter == 'G' or letter == 'U': code += '6' # else: # code += letter return (code, img)
def phenotype(self): pyautogui.PAUSE = 1 counter = 0 for i in self.__genoUnitCollection: i.phenotype() if counter % 5 == 0: # Grab screenshot and crop score area screenshot = ImageGrab.grab() scoreRectangle = (560, 650, 660, 680) cropped_rectangle = screenshot.crop(scoreRectangle) # Filter and enhance area to better recognize digits cropped_rectangle = cropped_rectangle.filter( ImageFilter.MedianFilter()) enhancer = ImageEnhance.Contrast(cropped_rectangle) cropped_rectangle = enhancer.enhance(2) cropped_rectangle = cropped_rectangle.convert('1') text = image_to_string(cropped_rectangle) # Remove non-digits from score all = string.maketrans('', '') nodigits = all.translate(all, string.digits) # Convert to Int and save as Fitness if text.translate(all, nodigits) == '': break self.__fitness = int(text.translate(all, nodigits)) counter += 1
def check_picture_action(self): print 'taking picture of check' time = str(datetime.now()) camera = picamera.PiCamera() camera.start_preview(fullscreen = False, window=(400, 400, 640, 480)) waiter = pic_window(self) self.wait_window(waiter.top) camera.capture(time+'.jpg') camera.stop_preview() im = Image.open(time+".jpg") text = image_to_string(im) im = im.resize((480, 270), Image.ANTIALIAS) photo = ImageTk.PhotoImage(im) self.df_pic_label.config(image = photo) self.df_pic_label.photo = photo try: self.deposit_amount = int(text) self.df_value_label.config("$"+monies+" detected") except: self.df_value_label.config("No money detected")
def getCode(self, im): im = im.convert('L') im = im.point(lambda x:255 if x > 128 else x) im = im.point(lambda x:0 if x < 255 else 255) box = (2, 2, im.size[0] - 2, im.size[1] - 2) im = im.crop(box) code = pytesser.image_to_string(im).replace(' ', '').strip() return code
def __getvldcode(self): cont = requests.get(url('vld.bmp?0.30659009179925256'), data='0.30659009179925256', headers=self.header) f = open('tmp.bmp', 'wb').write(cont.content) img = Image.open('tmp.bmp').resize((72, 18), Image.ANTIALIAS) vcode = pytesser.image_to_string(img).strip() return vcode
def image_process_extract_string(s, mask, x, y, w, h): im = mask[y:y + h, x:x + w] cv2.imwrite(s, im) size = 2 * w, 2 * h im = Image.open(s) im_resized = im.resize(size, Image.ANTIALIAS) im_resized.save(s, dpi=(100, 100)) return pytesser.image_to_string(s, 6)
def checkObjTitle(self, a_str): # gets object title & compares the text self.updateImg() self.cropImg(Coords.obj_title_box) img_str = pytesser.image_to_string(self.img) if a_str.lower() in img_str.lower(): return True return False
def get_captcha(): CAPTCHA="" while CAPTCHA == "": req=urllib2.Request(ipclient_init.CAPTCHA_URL,'',ipclient_init.HEADER) gif=opener.open(req) f=open(os.path.join(ipclient_init.CAPTCHA_PATH,'captcha.gif'),'wb') f.write(gif.read()) f.close() CAPTCHA = pytesser.image_to_string('./captcha.gif') return CAPTCHA
def write(): text = "" if request.method == 'POST': file = request.files['file'] if file and allowed_file(file.filename): filestream = file.read() im = Image.open(StringIO(filestream)) text = pytesser.image_to_string(im) return render_template('write.html', text = text) return render_template("write.html",text=text)
def __init__(self, img): self.img = img ## Screenshot Resolution iQuestion = img.crop((0, 120, 640, 400)) iAnswerA = img.crop((55, 410, 585, 565)) iAnswerB = img.crop((55, 590, 585, 745)) iAnswerC = img.crop((55, 770, 585, 925)) iAnswerD = img.crop((55, 950, 585, 1105)) ## Q2 # iQuestion = img.crop((50, 80, 480, 200)) # img = img.filter(ImageFilter.MinFilter(3)) # iAnswerA = img.crop((55, 200, 480, 300)) # iAnswerB = img.crop((55, 300, 480, 400)) # iAnswerC = img.crop((55, 400, 480, 500)) # iAnswerD = img.crop((55, 500, 480, 600)) self.tQuestion = pytesser.image_to_string(iQuestion) self.tAnswerA = pytesser.image_to_string(iAnswerA) self.tAnswerB = pytesser.image_to_string(iAnswerB) self.tAnswerC = pytesser.image_to_string(iAnswerC) self.tAnswerD = pytesser.image_to_string(iAnswerD) # def ParseImage(self, img): # Screenshot Resolution # iQuestion = img.crop((0, 120, 640, 400)) # iAnswerA = img.crop((55, 410, 585, 565)) # iAnswerB = img.crop((55, 590, 585, 745)) # iAnswerC = img.crop((55, 770, 585, 925)) # iAnswerD = img.crop((55, 950, 585, 1105)) pyplot.imshow(iQuestion) pyplot.show() pyplot.imshow(iAnswerA) pyplot.show() pyplot.imshow(iAnswerB) pyplot.show() pyplot.imshow(iAnswerC) pyplot.show() pyplot.imshow(iAnswerD) pyplot.show()
def get_captcha(): CAPTCHA = "" while CAPTCHA == "": req = urllib2.Request(ipclient_init.CAPTCHA_URL, '', ipclient_init.HEADER) gif = opener.open(req) f = open(os.path.join(ipclient_init.CAPTCHA_PATH, 'captcha.gif'), 'wb') f.write(gif.read()) f.close() CAPTCHA = pytesser.image_to_string('./captcha.gif') return CAPTCHA
def isHungry(self): # needs testing self.updateImg() self.cropImg(Coords.current_hunger_box) img_str = pytesser.image_to_string(self.img) img_str = letter_to_snumb(img_str) # uses the func I made to convert letter o to number 0 in a string print img_str # debug hunger = int(img_str) # uses the func get_int from string_to_int.py that I created if hunger <= 300: return True return False
def OCR2(filename): # Converts tiff image to text format using pytesser pdf = PythonMagick.Image(filename) pdf.write(filename[0:-3] + "png") print(filename[0:-3] + "png") im = Image.open(filename) text = image_to_string(im) print text return(text)
def ocr(self): if not self.image: return "" image_path = self.image.path text = pytesser.image_to_string(image_path) text = os.linesep.join([s for s in text.splitlines() if s]) self.image_text = text self.save() os.remove(image_path) return text
def ocr_question_extract(im): # [email protected]:madmaze/pytesseract.git global pytesser try: import pytesser except: print("[ERROR] pytesseract not installed") return im = im.crop((117, 0, 180, 30)) im = pre_ocr_processing(im) im.show() return pytesser.image_to_string(im, language='chi_sim').strip()
def __init__(self, img): self.img = img img = Image.fromarray(img) iQuestion = img.crop((0, 90, 360, 220)) # iAnswerA = img.crop((40, 215, 330, 300)) # iAnswerB = img.crop((40, 310, 330, 400)) # iAnswerC = img.crop((40, 410, 330, 500)) # iAnswerD = img.crop((40, 510, 330, 600)) # iQuestion = img.crop((0, 500, 1500, 900)) iAnswerA = img.crop((510, 280, 780, 350)) iAnswerB = img.crop((510, 380, 780, 450)) iAnswerC = img.crop((510, 480, 780, 550)) iAnswerD = img.crop((510, 580, 780, 650)) # img = PIL.ImageOps.autocontrast(iAnswerA) # img = PIL.ImageOps.autocontrast(iAnswerB) # img = PIL.ImageOps.autocontrast(iAnswerC) # img = PIL.ImageOps.autocontrast(iAnswerD) self.tQuestion = pytesser.image_to_string(iQuestion) self.tAnswerA = pytesser.image_to_string(iAnswerA) self.tAnswerB = pytesser.image_to_string(iAnswerB) self.tAnswerC = pytesser.image_to_string(iAnswerC) self.tAnswerD = pytesser.image_to_string(iAnswerD) if debugImgCrop == True: pyplot.imshow(iQuestion) print self.tQuestion pyplot.show() pyplot.imshow(iAnswerA) print self.tAnswerA pyplot.show() pyplot.imshow(iAnswerB) print self.tAnswerB pyplot.show() pyplot.imshow(iAnswerC) print self.tAnswerC pyplot.show() pyplot.imshow(iAnswerD) print self.tAnswerD pyplot.show()
def image_process_extract_string(s, mask, x, y, w, h): Y = y X = x if Y - 10 >= 0: Y = Y - 10 if X - 10 >= 0: X = X - 10 im = mask[y: y + h, x: x + w] cv2.imwrite(s, im) size = 2 * w, 2 * h im = Image.open(s) im_resized = im.resize(size, Image.ANTIALIAS) im_resized.save(s, dpi=(100, 100)) return pytesser.image_to_string(s, 6)
def parseTextos(): for file in listdir("tinder_uah"): try: img = cv2.imread("tinder_uah/" + file) #img = Image.open("tinder_uah/"+file) if img is not None: txt = pytesser.image_to_string( img, "spa") #Analyse image as a spanish word parsed_txt = txt.encode('cp1252').decode('utf8') text.append( p.limpiar((parsed_txt.replace("\n", " ")).replace( " ", "."))) # Hacemos el texto legible, lo limpiamos except (UnicodeDecodeError, UnicodeError) as e: print(str(e)) print(file, "Descartado") continue
def getImageCodeAndCookie(self): try: im = self.opener.open(self.imageUrl).read() #获取验证码和cookies值 img_buffer = StringIO.StringIO(im) img = Image.open(img_buffer) textcode = pytesser.image_to_string(img) print 'Cookies:' for item in self.cookieJar: if item.name =='ASP.NET_SessionId': self.sessionId = item.value print ' Name = '+item.name print ' Value = '+item.value print 'ImageCode=',textcode return textcode except Exception as e: print 'Failed to get imagecode!', e return ''
def identifyCode(codename): # 二值化 threshold = 140 table = [] for i in range(256): if i < threshold: table.append(0) else: table.append(1) im=Image.open(codename) imgry = im.convert('L') imgry.save('g'+codename) out = imgry.point(table,'1') out.save('b'+codename) os.chdir("D:\\Python27\\Lib\\site-packages\\pytesser_v0.0.1") text=pytesser.image_to_string(out) return text
def tesser_engine(image_file_name, image_resize_procent = 250, corrector=corrector_globalsources): """ Распознавание """ email_image = Image.open(image_file_name) image_size_x, image_size_y = email_image.size image_size_x *= image_resize_procent/100 image_size_y *= image_resize_procent/100 email_image = email_image.resize((image_size_x, image_size_y)) email_text = pytesser.image_to_string(email_image) email_text = email_text.strip() email_text = corrector(email_text) try: email_text_unicode = unicode(email_text) except UnicodeDecodeError: print 'can not convert:', email_text, email_text_unicode = '' return email_text_unicode
def verify(name): #打开图片 im = Image.open(name) #转化到亮度 imgry = im.convert('L') #imgry.save('a' + name) #二值化 out = imgry.point(table, '1') #out.save('b' + name) out = rec(out) #out.save('c' + name) #识别 text = image_to_string(out) #识别对吗 text = text.strip() text = text.upper() #os.system('rm '+name) return text[0:4]
def tesser_engine(image_file_name, image_resize_procent=250, corrector=corrector_globalsources): """ Распознавание """ email_image = Image.open(image_file_name) image_size_x, image_size_y = email_image.size image_size_x *= image_resize_procent / 100 image_size_y *= image_resize_procent / 100 email_image = email_image.resize((image_size_x, image_size_y)) email_text = pytesser.image_to_string(email_image) email_text = email_text.strip() email_text = corrector(email_text) try: email_text_unicode = unicode(email_text) except UnicodeDecodeError: print 'can not convert:', email_text, email_text_unicode = '' return email_text_unicode
def getCode(self): codeUrl = 'http://uems.sysu.edu.cn/elect/login/code' loginUrl = "http://uems.sysu.edu.cn/elect/login" print u'加载验证码...' while True: try: open(get_desktop() + "\code.jpeg", "wb").write(urllib2.urlopen(codeUrl).read()) try: im = Image.open(get_desktop() + '\code.jpeg').convert('L') im = im.point(lambda x: 255 if x > 128 else x) im = im.point(lambda x: 0 if x < 255 else 255) box = (2, 2, im.size[0] - 2, im.size[1] - 2) im = im.crop(box) j_code = pytesser.image_to_string(im).replace( ' ', '').replace(']', 'J').replace('0', 'O').strip().upper() print u'自动识别验证码...' except Exception, e: j_code = raw_input( u'请输入桌面的code.jpeg所对应的验证码(不分大小写): '.encode( self.coding)).upper().strip() postData = 'username='******'&password='******'&j_code=' + j_code + '<=&_eventId=submit&gateway=true' res = urllib2.urlopen(loginUrl, postData) html = BeautifulSoup(res.read(), "html.parser") self.sid = html.select('input[id=sid]')[0]['value'] if self.sid != '': self.xnd = html.select('input[id=xnd]')[0]['value'] self.xq = html.select('input[id=xq]')[0]['value'] print u'登录成功' if os.path.exists(get_desktop() + '\student.txt') == False: demjson.encode_to_file(get_desktop() + '\student.txt', {self.stuNum: self.password}) print u'账号密码已存于桌面文件student.txt,下次可直接登录!!!\n' break except urllib2.HTTPError, e: print u'登录失败,重新加载验证码...' time.sleep(1.0)
def verification(jpgName): ''' 验证码识别 ''' # Step 1 打开图像 im = Image.open(jpgName) # Step 2 把彩色图像转化为灰度图像,转化到亮度 imgry = im.convert('L') # imgry.save('g'+jpgName) # Step 3 需要把图像中的噪声去除掉 threshold = 140 table = [] for i in xrange(256): if i < threshold: table.append(0) else: table.append(1) # 二值化 out = imgry.point(table, '1') # out.save('b'+jpgName) # Step 4 把图片中的字符转化为文本,识别 text = image_to_string(out) # Step 5 优化。根据观察,验证码中只有数字,并且上面的文字识别程序经常把8识别为S rep = { 'O': '0', 'I': '1', 'L': '1', 'Z': '2', 'S': '8' } text = text.strip() text = text.upper() for r in rep: text = text.replace(r, rep[r]) return text
def ocr(): print "OCR..." # 二值化初始化 threshold = 127 table = [] for i in range(256): if i < threshold: table.append(0) else: table.append(1) # 替换表初始化 rep = {'A': '6', 'E': '6', 'I': '1', 'O': '0', 'S': '8', '£': '8'} rgb = Image.open('img.gif').convert('RGB') for x in range(200): for y in range(30): (r, g, b) = rgb.getpixel((x, y)) if r in (51, 102, 153, 204, 255) \ and g in (43, 85, 128, 170, 213, 255) \ and b in (51, 102, 153, 204, 255): rgb.putpixel((x, y), (255, 255, 255)) rgb = rgb.convert('L').point(table, '1') # rgb.show() code = pytesser.image_to_string(rgb) result = '' for words in code.splitlines(): result += words result = ''.join(result.split()).upper() for r in rep: result = result.replace(r, rep[r]) result = filter(str.isdigit, result) reg = r'^(\d{6})$' try: result = re.match(reg, result).group(1) except: print "Error occurred in OCR." rgb.close() return "Error" rgb.close() return result
def ocr(self): # We can use this to stop ourselves getting into an infinite loop. self.iteration += 1 # Get the co-ordinates for the "Nexus 5" window and the crop to just # the letters of the app. bbox = win32gui.GetWindowRect(win32gui.FindWindow(None, "Nexus 5")) bbox = (bbox[0] + 45, bbox[1] + 325, bbox[2] - 70, bbox[3] - 200) # Bring the application to the front. shell = win32com.client.Dispatch("WScript.Shell") shell.AppActivate("Nexus 5") grab = ImageGrab.grab(bbox) # Convert all non-white pixels to black, this greatly improves the # effectiveness of the OCR library. data = grab.getdata() new_data = [] for item in data: if item != (255, 255, 255): new_data.append((0, 0, 0)) else: new_data.append((255, 255, 255)) grab.putdata(new_data) text = image_to_string(grab).strip().lower() # Only try and solve if we have 9 letters, otherwise try to OCR again. if len(text) == 9: Countdown(text) else: print 'Bad length, was {}. Detected {}'.format(len(text), text) if self.iteration < 5: self.ocr()
def login(self): r = self.s.get(ZHYCW.LOGIN_URL_GET) print r.text r = self.s.get('http://passport.chinahr.com/m/genpic') i = Image.open(StringIO(r.content)) i.show() imgCode = pytesser.image_to_string(i) print '-->', imgCode params = { 'backUrl': "http://www.chinahr.com/shenzhen/", 'from': '', 'imgCode': imgCode, 'input': '15728567842', 'pwd': "31af4a634cf808c8a7572b741dec6234b751165471560d368e4dd8fb84428c3db240c478a9d1afb455c9\ e5235d84dd51bdc8ddf1fc621b3b0d4a10664ff13cd3846804a242d0cb7fab7a88f82d9e71ae6bc815d0191691\ 532bfd45c8ee3955250f17b50d16e76c937ab7de4a0aa4ae1040ee50465a45c753e2a8909016a04291", 'rember': '1' } r = self.s.post(ZHYCW.LOGIN_URL_POST, data=params) print r.text
def getValidator(): url = 'http://yjxt.bupt.edu.cn/Public/ValidateCode.aspx?image=1079919554' request = urllib2.Request(url) response = urllib2.urlopen(request) text = response.read() fd=open('validator.png','wb') fd.write(text) fd.close() threshlod = 200 table=[] for i in range(256): if i<threshlod: table.append(0) else: table.append(1) name = 'validator.png' im = Image.open(name) imgry = im.convert('L') imgry.save('g'+name) out = imgry.point(table,'1') text = pytesser.image_to_string(out)[:4] return text
# -*- coding: utf-8 -*- """ Created on Tue Jun 30 15:59:37 2015 @author: James """ import PIL from pytesser import image_to_string print int('50\n\n') print range(1) img = PIL.Image.open('numb_in_pic.png') print (image_to_string(img))
def sudokuSolve(img): ticks=time.time() gray = cv2.cvtColor(img,cv2.COLOR_BGR2GRAY) gray = cv2.GaussianBlur(gray,(5,5),0) thresh = cv2.adaptiveThreshold(gray,255,1,1,11,2) hierarchy,contours,_ = cv2.findContours(thresh, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE) biggest = None max_area = 0 for i in contours: area = cv2.contourArea(i) if area > 100: peri = cv2.arcLength(i,True) approx = cv2.approxPolyDP(i,0.02*peri,True) if area > max_area and len(approx)==4: biggest = approx max_area = area cv2.drawContours(img, biggest, -1, (0,255,0), 8) biggest=rectify(biggest) thresh = cv2.adaptiveThreshold(gray,255,1,1,11,2) h = np.array([ [0,0],[449,0],[449,449],[0,449] ],np.float32) retval = cv2.getPerspectiveTransform(biggest,h) warp = cv2.warpPerspective(thresh,retval,(450,450)) cv2.imwrite('box.jpg',warp) # mask=cv2.imread('mask.jpg') # mask = cv2.cvtColor(mask,cv2.COLOR_BGR2GRAY) # ret, mask = cv2.threshold(mask, 10, 255, cv2.THRESH_BINARY) # i was trying something else! for y in range (0,9): for x in range (0,9) : morph=warp[(50*y):((50*y)+50),(50*x):((50*x)+50)] morph=morph[5:45,5:45] cv2.imwrite('sudokuDigits/cell'+str(y)+str(x)+'.jpg',morph) y=0 x=0 text='' fullResult='' print "| Recognizing Numbers. . . . " keys = [i for i in range(48,58)] for y in range (0,9): for x in range (0,9) : im = Image.open('sudokuDigits/cell'+str(y)+str(x)+'.jpg') text =pytesser.image_to_string(im) if text=='\n' or ord(text[0]) not in keys: fullResult=fullResult+'0' else: fullResult=fullResult+str(text[0]) print "Detected Game!" for i in range(0,81): print fullResult[i], if (i+1)%9 == 0: print "" print "" print "| Solving . . . ." solve.r(fullResult) timetaken=time.time()-ticks print "SolvedIn>> "+str(timetaken)+"secs"
for x in xrange(im.size[0]): if pixdata[x, y][0] < 90: pixdata[x, y] = (0, 0, 0, 255) for y in xrange(im.size[1]): for x in xrange(im.size[0]): if pixdata[x, y][1] < 136: pixdata[x, y] = (0, 0, 0, 255) for y in xrange(im.size[1]): for x in xrange(im.size[0]): if pixdata[x, y][2] > 0: pixdata[x, y] = (255, 255, 255, 255) #box = (0,0,sizey,sizex) box = (2, 2, sizey - 2, sizex - 2) region = im.crop(box) region.save("input-black.jpg", "jpeg") im_orig = Image.open('input-black.jpg') #big = im_orig.resize((540, 160), Image.NEAREST) #big.show() #big.save("test.bmp") file_o.write(pytesser.image_to_string(im_orig)) file_o.close() """ imageSize = (20, 58) image = Image.frombytes('RGB', imageSize, rawData, "F;16") image.save("foo.png") """
from PIL import Image from pytesser import image_to_string import pyscreenshot as ImageGrab import time import sys # Captures Screenshot if __name__ == "__main__": driver.get(url) time.sleep(5) t_end = time.time() + 60 * 2 counter = 1 while time.time() < t_end: im = ImageGrab.grab(bbox=(80, 170, 400, 400)) im.show() ImageGrab.grab_to_file("im" + str(counter) + ".png") counter = counter + 1 counter = counter + 1 for num in range(counter): im = Image.open("C:\Users\Tomonari\Desktop\seizureDection" + "\im" + str(counter) + ".png") text = image_to_string(im.convert('RGB')) busted = text.strip() busted = busted[8:-1] print busted.strip()
def getverify1(): headers_1 = { 'Accept':'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8', 'Accept-Encoding':'gzip, deflate, sdch', 'Accept-Language':'zh-CN,zh;q=0.8', 'Connection':'keep-alive', 'Cookie':'loginInfo=TrZ8QGOkMeDViIlqMt8yaddowAuqeCbj+xWgsjQMFHHolBC5kzI+KONdjuAU/ARb; md=341C882D880E8E665AAC53AC1F31B570B6817396', 'Host':'125.88.59.131:10001', 'Upgrade-Insecure-Requests':'1', 'User-Agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/50.0.2661.102 Safari/537.36' } threshold = 140 table = [] for i in range(256): if i < threshold: table.append(0) else: table.append(1) rep = {'O': '0', 'I': '1', 'L': '1', 'Z': '2', 'S': '8' } r = requests.get('http://125.88.59.131:10001/login.jsp?wlanuserip=10.101.187.210&wlanacip=113.98.13.29', headers=headers_1) codecookies = r.cookies['JSESSIONID'] headers_2 = { 'Accept':'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8', 'Accept-Encoding':'gzip, deflate, sdch', 'Accept-Language':'zh-CN,zh;q=0.8', 'Cache-Control':'max-age=0', 'Connection':'keep-alive', 'Cookie':'loginInfo=TrZ8QGOkMeDViIlqMt8yaddowAuqeCbj+xWgsjQMFHHolBC5kzI+KONdjuAU/ARb; md=341C882D880E8E665AAC53AC1F31B570B6817396 ;JSESSIONID='+codecookies+'; signature=D0AB8BE4413F52D798C67E23DC8E8A78', 'Host':'125.88.59.131:10001', 'Referer':'http://125.88.59.131:10001/login.jsp?wlanuserip=10.101.187.210&wlanacip=113.98.13.29', 'Upgrade-Insecure-Requests':'1', 'User-Agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/50.0.2661.102 Safari/537.36' } timestamp = str(int(time.time())) r = requests.get('http://125.88.59.131:10001/common/image.jsp?time='+timestamp, stream=True, headers=headers_2) # r = requests.get('http://125.88.59.131:10001/common/image.jsp?time=1463198266151', stream=True, headers=headers) try: with open('image.jpg', 'wb') as f: f.write(r.content) f.close() except IOError: print("IO Error\n") im = Image.open('image.jpg') imgry = im.convert('L') out = imgry.point(table, '1') text = image_to_string(out) text = text.strip() text = text.upper() for r in rep: text = text.replace(r, rep[r]) #out.save(text+'.jpg') print text return text, codecookies
def save_captcha(s, link, filename): r = s.get(link) source = r.text.encode('utf-8') source = source.split('base64,')[1] source = source.split('" /><br>')[0] with open(filename, 'wb') as f: f.write(base64.b64decode(source)) pytesser.tesseract_exe_name = 'C:/Python27/Lib/site-packages/tesseract.exe' filename = 'temp.png' s = requests.Session() save_captcha(s, 'http://challenge01.root-me.org/programmation/ch8/', filename) imgx = Image.open(filename) imgx = imgx.convert("RGBA") pix = imgx.load() for y in xrange(imgx.size[1]): for x in xrange(imgx.size[0]): if pix[x, y] == (0, 0, 0, 255): pix[x, y] = (255, 255, 255, 255) imgx.save("bw.gif", "GIF") original = Image.open('bw.gif') captcha = ''.join(pytesser.image_to_string(original).strip().split()) print captcha r = s.post('http://challenge01.root-me.org/programmation/ch8/', data={'cametu': captcha}) source = r.text.encode('utf-8') print strip_tags(source)
def Procesar(): size = 1022, 653 #Size of image ErrorCount = 0 # Error Counter Success = 0 # Success Image ErrorDesconocido = 0 # Unknown error log = "" # Log string from Successful image process errorlog = "" # Log string from ERROR image process # Activate a button when Process is called botonCinco = ttk.Button(ventana, text="Ir a la carpeta procesada", command=Function, state=ACTIVE).place(height=30, width=300, x=240, y=220) origen = Origen.get() # Get text from Field Origen destino = Destino.get() #Get text from Field destino #Exception to create a directory try: directory = destino + "/Procesado" #Intenta crear el repositorio (carpeta) si no esta creado con el nombre Procesado os.mkdir(directory) except: pass # If have lready created location = directory + "/" # Specifies where the images created will be saved. try: Errores = location + "Errores" # Try to create the folder where the images are stored error os.mkdir(Errores) except: pass # If have already created location_two = Errores + "/" # Location from Errors cnt = 1 # Rename string with errors counter to cast in advance # Sum of elements in directory with extensions specified (.jpg, .png, etc) suma = (len(glob.glob(origen + "*.jpg")) + len(glob.glob(origen + "*.png")) + len(glob.glob(origen + "*.jpeg")) + len(glob.glob(origen + "*.gif")) + len(glob.glob(origen + "*.wmp"))) if (suma != 0): # If sum isn't Zero div = (740 / suma) # Make a div # Critic Operation for infile in glob.glob(origen + "*.jpg") or glob.glob( origen + "*.png" ) or glob.glob(origen + "*.jpeg") or glob.glob( origen + "*.gif" ) or glob.glob( origen + "*.wmp" ): #Busca todos los archivos de la ubicacion especificada con la extension *.jpg flag = 0 # Flag progressbar.step(div) # Progressbar ventana.update() # Update master Tk # Evaluates and modifies the string to remove spaces and invalid characters in the path as Pytesser not process all the strings. string = EvaluarString(infile) #print infile # To debug #print string # To debug # Rename exception, if infile equals to string try: os.rename(infile, string) infile = string except: pass # If infile is equals to string im = Image.open(infile) # Open image File #im.thumbnail(size, Image.ANTIALIAS) # Default size from image defined up x, y, w, h = crop_image( infile) # x, y, w, h get values from crop_image function return # If success exception if (x == 0 and y == 0 and w == 0 and h == 0): ErrorDesconocido += 1 # Unknown error count ++ continue # Skip all below #im.crop((int(x+x-x*0.20)+10, y+y/2+10, w+y, h - h/4 - 40)) # Default coordinates values image = im.crop((x, y, w + x, y + h)) # Crop rectangle image. im = image # im get image value im.thumbnail( size, Image.ANTIALIAS) # resize image to default size defined up #im = im.crop((int(x*0.70)+10, y/2 + 10 , w , h/4 + 40)) # To debug #im = im.crop((294, 135, 1022, 203)) # to debug # Image crop with coordinates default try: im = im.crop((294, 100, 1022, 250)) except: pass # If cant crop. # To debug, coordinates default value ## x = int(x*0.70)+10 ## y = y/2 + 10 ## w = w ## h = h/4 + 40 #print x, y, w, h #im = im[y: y+h/4, x+x/4:x+w+w/2] #cv2.imshow('Load', im) text = image_to_string( im) # Image to text with pytesseract, return a string ## text = image_file_to_string(infile) ## text = image_file_to_string(infile, graceful_errors=True) # print(text) # Here ID digits are extracted to rename the file with filename = Archivos(text, location) if filename is None: # If cant get correct name returned filename = "Error" + str(cnt) # file name get name flag = 1 # Flag if error if (flag == 0): # If not error ## T.insert(END, str(location) + str(filename) + ".jpg" + "\n") # To show in Text field Success += 1 # Success count increases T.insert(END, infile + "\n") # Show route from file in Text Field #cv2.imwrite(location + filename + ".jpg", image) # Exception if cant save image try: image.save(location + filename + ".jpg") log += infile + "\n" except: ErrorDesconocido += 1 # Unknown error increases pass else: # If flag == 1, as ERROR ## T.insert(END, str(location_two) + str(filename) + ".jpg" + "\n") ErrorCount += 1 # Error counter increases T.insert(END, infile + " # Error" + "\n") # Show route in textfield from error file #cv2.imwrite(location_two + filename + ".jpg", image) #Exporta la imagen y la guarda con el nombre file que retorno el metodo Archivos # Exception if cant save image try: image.save(location_two + filename + ".jpg") errorlog += infile + "\n" # String for log except: ErrorDesconocido += 1 pass cnt += 1 Log(log, errorlog, location) # Creates a log txt file in directory root = Tk() # Creates a new window ttk.Style().configure("Button", padding=6, relief="flat", background="#ccc") # Window button botonSalir = Button(root, text="OK", command=root.destroy) # Exit button botonSalir.pack(side=BOTTOM) # Button position root.title("Procesamiento de Datos OCR") # Window title Area = Text(root, height=5, width=40) # Window size Area.pack(side=LEFT) # TextField place #Text area LOG Area.insert( END, "Imagenes Procesadas: " + str(suma) + "\nErrores: " + str(ErrorCount) + "\nSatisfactoriamente Procesadas: " + str(Success) + "\nSobreescritura (Repeticiones): " + str(suma - (ErrorCount + Success)) + "\nErroresDesconocidos: " + str(ErrorDesconocido)) progressbar.stop() # Progressbar stop #T.config(state = "disable") root.mainloop()
for y in xrange(image.size[1]): for x in xrange(image.size[0]): if is_white(pixdata[x, y]) | is_noise(pixdata, x, y): pixdata[x, y] = white else: pixdata[x, y] = black return image def remove_line(image): threshold = 100 pixdata = image.load() # Clean the background noise, if color != black, then set to white. for y in xrange(image.size[1]): for x in xrange(image.size[0]): if not (pixdata[x, y][0] > threshold and pixdata[x, y][1] > threshold and pixdata[x, y][2] > threshold): pixdata[x, y] = (0, 0, 0, 255) else: pixdata[x, y] = (255, 255, 255, 255) image = Image.open('2.jpg') image = remove_noise(image) for i in xrange(5): image = remove_noise(image) image.show() print(pytesser.image_to_string(image))
def OCR(img): print img #im = PIL.Image.open(img) #.convert() im = Image.open(img) #.convert() textcode = pytesser.image_to_string(im) return textcode
#!/usr/bin/python import pytesser import gtk.gdk import os # take a screenshot def screenshot(): w = gtk.gdk.get_default_root_window() sz = w.get_size() pb = gtk.gdk.Pixbuf(gtk.gdk.COLORSPACE_RGB, False, 8, sz[0], sz[1]) pb = pb.get_from_drawable(w,w.get_colormap(), 0, 0, 0, 0, sz[0], sz[1]) if (pb != None): pb.save("screenshot.png","png") else: print "Unable to get the screenshot." if __name__ == "__main__": screenshot() print "working..." txt = pytesser.image_to_string("screenshot.png") os.remove("screenshot.png") print txt
startNums = [] tmp = [] tmparray = [] for j in range(len(test)): image = test[j] for i in range(len(image)): tmparray.append(image[i]) if (i+1)%28==0: tmp.append(tmparray) tmparray = [] tmp = np.array(tmp) #tmp = hf.erode(hf.dilate(tmp)) cv2.imwrite('tmp.png',tmp) imagefile = Image.open('tmp.png') tekst = pytesser.image_to_string(imagefile) tekst = tekst.translate(None,'\n') tekst = tekst.translate(None,',') if tekst == '\"I': tekst = '1' elif tekst == 'Z': tekst = '2' elif tekst == '`I': tekst = '1' elif tekst == 'S': tekst = '5' elif tekst == "'I": tekst = '1' elif tekst == 'I':
from PIL import Image from pytesser import image_to_string im = Image.open('abc.png') # im.save('11.tif') # im = Image.open('11.tif') p = image_to_string(im) print p
# or (r, g, b) == (255, 255, 204) or (r, g, b) == (204, 170, 153) or (r, g, b) == (255, 213, 204) \ # or (r, g, b) == (204, 255, 204) or (r, g, b) == (153, 170, 153) or (r, g, b) == (204, 255, 255) \ # or (r, g, b) == (204, 213, 255) or (r, g, b) == (153, 213, 153) or (r, g, b) == (204, 213, 255) \ # or (r, g, b) == (153, 170, 204) or (r, g, b) == (153, 213, 204) or (r, g, b) == (204, 213, 153) \ # or (r, g, b) == (255, 255, 255): # rgb.putpixel((x, y), (255, 255, 255)) # else: # rgb.putpixel((x, y), (0, 0, 0)) if r in (51, 102, 153, 204, 255) and g in (43, 85, 128, 170, 213, 255) and b in (51, 102, 153, 204, 255): rgb.putpixel((x, y), (255, 255, 255)) rgb = rgb.convert('L').point(table, '1') # rgb.show() image.close() code = pytesser.image_to_string(rgb) result = '' for words in code.splitlines(): result += words result = ''.join(result.split()).upper() for r in rep: result = result.replace(r, rep[r]) result = filter(str.isdigit, result) reg = r'^(\d{6})$' try: result = re.match(reg, result).group(1) except: result = "Error occurred in OCR." print i, result print "success"
from pytesser import image_to_string import Image import ImageOps import ImageGrab print image_to_string( )
#coding=utf8 from PIL import Image from pytesser import image_to_string import requests import time PATH ="vcode.png" r = requests.Session() while True: c=r.get("http://www.hualixy.com/inc/checkcode.asp?r=%s"%time.time()).content #print(c) with open(PATH,'wb') as f: f.write(c) f.close() code = image_to_string(Image.open(PATH)).strip() print(code)
只有 “pytesser-v0.0.1”字符串。(没有引号) """ #我主机python2.7这个版本无法安装PIL #说明pytesser使用的网址如下: #http://www.cnblogs.com/txw1958/archive/2012/08/09/python-PyTesser.html import Image import socket import base64 import pytesser sock=socket.socket(socket.AF_INET,socket.SOCK_STREAM) sock.connect(('41.231.53.40',9090)) c='' while True: t=sock.recv(100) c=c+t if len(t)<100: break with open('t.png','wb').write(base64.decodestring(c[:---8])) img=Image.new('RGB',(160,20))#创建一个新的图片 out=img.load() pix=Image.open('t.png').load() for i in xrange(20): if reduce(lambda x,y:x+y,pix[105+j,190-i]+pix[94-j,9+j])<1000: out[i,j]=(255,255,255) ans=pytesser.image_to_string(img) print ans sock.send(ans) print sock.recv(1024)
img_data = base64.b64decode(img_coded) f = open("img","w+") f.write(img_data) f.close() image = Image.open("img") image1 = image.convert("RGBA") pixdata = image1.load() #if noise then trying to clean it out if noise=="yes": for y in xrange(image1.size[1]): for x in xrange(image1.size[0]): if pixdata[x, y] == (0, 0, 0, 255): pixdata[x, y] = (255, 255, 255, 255) image1.save("result.gif","gif") image_final = Image.open("result.gif") captcha = pytesser.image_to_string(image_final).strip() print "[*] captcha found is "+captcha data = { "cametu":captcha } #getting the cookies result = requests.post(URL, data=data, cookies=cookies) print result.text res = open("results.html","w+") res.write(result.text) res.close() print "[*] end with no errors"
for x in xrange(im.size[0]): if pixdata[x, y][0] < 90: pixdata[x, y] = (0, 0, 0, 255) for y in xrange(im.size[1]): for x in xrange(im.size[0]): if pixdata[x, y][1] < 136: pixdata[x, y] = (0, 0, 0, 255) for y in xrange(im.size[1]): for x in xrange(im.size[0]): if pixdata[x, y][2] > 0: pixdata[x, y] = (255, 255, 255, 255) #box = (0,0,sizey,sizex) box = (2, 2, sizey-2, sizex-2) region = im.crop(box) region.save("input-black.jpg", "jpeg") im_orig = Image.open('input-black.jpg') #big = im_orig.resize((540, 160), Image.NEAREST) #big.show() #big.save("test.bmp") file_o.write(pytesser.image_to_string(im_orig)) file_o.close() """ imageSize = (20, 58) image = Image.frombytes('RGB', imageSize, rawData, "F;16") image.save("foo.png") """
def getCheckCodeStringWindows(self, imageObj:Image): text = str(ocr.image_to_string(imageObj)) print("验证码: " + text) text = text.strip() return text