def urllib2_yzm_cookie(): url = 'http://202.119.81.112:8080/Logon.do?method=logon' headers = { 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/57.0.2987.98 Safari/537.36' } data = {'USERNAME': '******', 'PASSWORD': '******', 'RANDOMCODE': ''} postdata = urllib.urlencode(data) # cookie cookie = cookielib.CookieJar() handler = urllib2.HTTPCookieProcessor(cookie) opener = urllib2.build_opener(handler) # 第一次请求网页得到cookie request = urllib2.Request(url, postdata, headers=headers) response = opener.open(request) # 由于验证码识别的误差,不停地获取验证码登陆,直到正确为止 while True: # 获取验证码 yzm = opener.open('http://202.119.81.112:8080/verifycode.servlet') yzmfile = open('yzm.jpg', 'wb') yzmfile.write(yzm.read()) yzmfile.close() # 识别验证码 image = Image.open('yzm.jpg') yzmtext = pytesser.image_to_string(image) tmp = list(yzmtext) while ' ' in tmp: tmp.remove(' ') while '\n' in tmp: tmp.remove('\n') yzmtext = ''.join(tmp)[:4] print yzmtext # return # 模拟登陆 # data['RANDOMCODE'] = raw_input() data['RANDOMCODE'] = yzmtext postdata = urllib.urlencode(data) request = urllib2.Request(url, postdata, headers=headers) response = opener.open(request) # print response.read().decode('utf-8') # 爬取成绩 grade_url = 'http://202.119.81.112:9080/njlgdx/kscj/cjcx_list' response = opener.open(grade_url) html = response.read() f = open('1.txt', 'w') f.write(html) f.close() # print html if re.search('RANDOMCODE', html) == None: break
def get_webtoken(): CaptchaUrl = "http://e.oppo.cn/loginCaptcha" LoginUrl = "http://e.oppo.cn/login" # 验证码地址和post地址 while True: cookie = cookielib.CookieJar() handler = urllib2.HTTPCookieProcessor(cookie) opener = urllib2.build_opener(handler) # urllib2.install_opener(opener) # 将cookies绑定到一个opener cookie由cookielib自动管理 while True: picture = opener.open(CaptchaUrl) # 用openr访问验证码地址,获取cookie local = open('/data/rx/image/image.jpg', 'wb') local.write(picture.read()) local.close() im = Image.open('/data/rx/image/image.jpg') text = pytesser.image_to_string(im).strip() # text = pytesser.image_file_to_string(im,graceful_errors=True).strip() flag = len(text) # print 'authCode=%s,lens=%s' % (text, flag) # print len(text) if flag == 4: break boby = {'name': '扣费', 'passwd': '123456', 'captcha': text} data = urllib.urlencode(boby) request = urllib2.Request(url=LoginUrl, data=data) request.add_header('Content-Type', 'application/x-www-form-urlencoded') opener.open(request) #response cookies = cookie.__str__().split(' ')[1] # print 'cookies=%s' % cookies if cookies.find('WEBTOKEN') == 0: break return cookies
def recognize(pic_name): im = pre_operation(pic_name) text = pytesser.image_to_string(im).strip() if(len(text) == 4 and isalpha(text)): print(text) return text.upper() return None
def __ImagePriceExtract(self, file): try: img = Image.open(file) r,g,b,a = img.split() img = Image.merge('RGB',(r,g,b)) text = image_to_string(img) text = text.replace(" ",'').replace('\n','').replace('\r','') return float(text) except Exception,e: raise DropItem("item price can't not extracted successfully: %s" % e)
def captchaRecognize(captchaImg): #识别修改后的图片 text = pytesser.image_to_string(captchaImg) #使用正则表达式过滤除数字以外的识别数据 captcha = re.findall('[0-9]',text) captcha = ''.join(captcha) if len(captcha)!=4: return 0 else: return captcha
def solve_captcha(self, img): imagename = img.split("/")[-1] req = urllib2.Request(img) img_file = Image.open(StringIO(urllib2.urlopen(req).read())) img_file.resize((116, 56), Image.NEAREST) if self.tmp: with tmanage() as t: n = ''.join([t, "/code.tif"]) img_file.save(n) img_file = Image.open(n) s = image_to_string(img_file) img_file.close() else: output = StringIO() img_file.save(output, format="TIFF") s = image_to_string(Image.open(StringIO(output.getvalue()))) output.close() s = s.replace(' ', '') s = s.replace('.', '') return s[:4]
def solve_captcha(self,img): imagename = img.split("/")[-1] req = urllib2.Request(img) img_file = Image.open(StringIO(urllib2.urlopen(req).read())) img_file.resize((116, 56), Image.NEAREST) if self.tmp: with tmanage() as t: n = ''.join([t, "/code.tif"]) img_file.save(n) img_file = Image.open(n) s = image_to_string(img_file) img_file.close() else: output = StringIO() img_file.save(output,format="TIFF") s = image_to_string(Image.open(StringIO(output.getvalue()))) output.close() s = s.replace(' ','') s = s.replace('.','') return s[:4]
def access_company_url(self,company_url): base_url ='http://bj.597rcw.com' page = urllib2.urlopen(base_url+company_url).read().decode("gbk") number_urls=re.findall('/AspNet/StrToImg.ashx.+?/>', page, re.S) if len(number_urls)!=0: #'<td><b>北京中泰安瑞科技发展有限公司</b></td>"' 截取 commpany_name=re.findall('<td><b>.+?</b></td>', page, re.S)[0][7:-9] #<td height="25" width="85%">饶经理</td> contacts_name=re.findall('<td height="25" width="85%">.+?</td>', page, re.S)[0][28:-5] #<img src="/AspNet/StrToImg.ashx?type=code&email=p7JDp9Jzqo3AaSHCR4ySUg%3D%3D"> number_url=number_urls[0][:-4] #把号码图片转换为文本 img = Image.open(cStringIO.StringIO(urllib2.urlopen(base_url+number_url).read())) number = image_to_string(img) return commpany_name+","+contacts_name+","+number else: return ''
# -*- coding: utf-8 -*- from pytesser.pytesser import image_to_string from PIL import * import Image import ImageEnhance image = Image.open(r"TB2965Lb46I8KJjSszfXXaZVXXa_!!646445699.jpg.jpg") enhancer = ImageEnhance.Contrast(image) image_enhancer = enhancer.enhance(4) print(image_to_string(image_enhancer))
def ocr(image): return pytesser.image_to_string(image.convert('RGB'))
def ocr_cropped(image, box): region = image.crop(box) region = region.convert('RGB') return pytesser.image_to_string(region)
'referer': 'http://hub.hust.edu.cn/index.jsp' }).content global k1, k2 k1, k2 = eval(content) verify_image_url = 'http://hub.hust.edu.cn/randomImage.action?k1=%s&k2=%s&uno=%s&time=%d' % ( k1, k2, username, TIME) content = s.get(verify_image_url).content global vimg vimg = Image.open(StringIO(content)) get_rand_key() try: from pytesser.pytesser import image_to_string vcode = image_to_string(vimg).strip() vimg.save('vcode.jpg') print vcode except: import traceback, sys traceback.print_exc(file=sys.stdout) vimg.show() vcode = raw_input('verify code:') data = { 'usertype': 'xs', 'username': username, 'password': password, 'rand': '', 'ln': server, 'random_key1': '',
def resolver(path): result = pytesser.image_to_string(path) return result
def extractText(left, top, right, bottom, scale = 2): im = capture(left, top, right, bottom) im = im.resize([scale * i for i in im.size]) return tesser.image_to_string(im)
# print headers['Cookie'] # 输入用户名和密码 login_info['USERNAME'] = raw_input('Username: '******'PASSWORD'] = raw_input('Password: '******'yzm.jpg', 'wb') yzmfile.write(yzm_response.content) yzmfile.close() # 识别验证码 image = Image.open('yzm.jpg') yzmtext = pytesser.image_to_string(image) tmp = list(yzmtext) while ' ' in tmp: tmp.remove(' ') while '\n' in tmp: tmp.remove('\n') yzmtext = ''.join(tmp)[:4] # print yzmtext login_info['RANDOMCODE'] = yzmtext # 重新登陆,若验证码正确则登陆成功 # should_success_response = requests.get(login_url, params=urllib.urlencode(login_info), headers=headers) should_success_response = requests.post(login_url, data=login_info, headers=headers) html = should_success_response.content
run_from_console = __name__ == '__main__' img = Image.open('code.png') imgry = img.convert('L') table = [] threshold = 150 for i in range(256): if i < threshold: table.append(0) else: table.append(1) out = imgry.point(table, '1') fname = 'test.tiff' out.save(fname, dpi=(72, 72)) tiff = Image.open(fname) code = pytesser.image_to_string(tiff) clean_code = re.sub(r'[^0-9a-zA-Z]', '', code) print clean_code os.remove(fname)
def extractText(left, top, right, bottom, scale=2): im = capture(left, top, right, bottom) im = im.resize([scale * i for i in im.size]) return tesser.image_to_string(im)
image_to_string = None def get_rand_key(): verify_url = 'http://hub.hust.edu.cn/randomKey.action?username=%s&time=%d' % (username, TIME) content = s.get(verify_url, headers={'referer': 'http://hub.hust.edu.cn/index.jsp'}).content global k1, k2 k1, k2 = eval(content) verify_image_url = 'http://hub.hust.edu.cn/randomImage.action?k1=%s&k2=%s&uno=%s&time=%d' % (k1, k2, username, TIME) content = s.get(verify_image_url).content global vimg vimg = Image.open(StringIO(content)) get_rand_key() try: from pytesser.pytesser import image_to_string vcode = image_to_string(vimg).strip() vimg.save('vcode.jpg') print vcode except: import traceback, sys traceback.print_exc(file=sys.stdout) vimg.show() vcode = raw_input('verify code:') data = { 'usertype': 'xs', 'username': username, 'password': password, 'rand': '', 'ln': server, 'random_key1' : '',