def get_Verification_code(self): self.illegal_query() # 裁剪页面上的验证码图片 imgScreen = Image.open("..\screenshots\car_page.png") box = (726, 678, 960, 774) #设置要裁剪的区域,可在UI Automator元素列表bounds中查看 img = imgScreen.crop(box) # 裁剪得到一个新的图片 img.save('..\screenshots\indent.png') #保存新的图像 #------------识别新图像上的验证码--------------------------- im = Image.open('..\screenshots\indent.png') imgry = im.convert('L') # 把彩色图像转化为灰度图像。RBG转化到HSI彩色空间 # imgry.load() trshold = 170 #灰度阈值设置170,低于这个值的点全部填白色,这个值是关键 table = [] for j in range(256): if j < trshold: table.append(0) else: table.append(1) bim = imgry.point(table, '1') bim.save('..\screenshots\indent.png') #二值化处理后重新保存 # 使用ImageEnhance可以增强图片的识别率,对比度增强 sharpness = ImageEnhance.Contrast(imgry) sharpness.enhance(1.0) # 锐度增强 imgen = ImageEnhance.Sharpness(imgry) imgen.enhance(4.0) sleep(2) # code = pytesseract.image_to_string(shar_img) code = pytesser3.image_file_to_string( '..\screenshots\indent.png').strip() print(code)
def login_in(self, username, password): driver = self.driver driver.get('http://192.168.9.14:80/') sleep(2) driver.find_element_by_id('loginNo').send_keys(username) driver.find_element_by_id('passWord').send_keys(password) # 1、验证码图片每刷新一次都会变 此方法暂不采用 # image = self.driver.find_element_by_id('d').get_attribute('src') # request = urllib.request.Request(image) # response = urllib.request.urlopen(request) # get_img = response.read() # with open('F:\haha\Code.png', 'wb') as fp: # fp.write(get_img) # code = pytesser3.image_file_to_string('F:\haha\Code.png') # print(code) # 2、对验证码进行区域截图 driver.get_screenshot_as_file(acquire_file_dir() + '\pic_log\login_code.png') Image.open(acquire_file_dir() + '\pic_log\login_code.png').crop( (615, 383, 683, 403)).save(acquire_file_dir() + '\pic_log\login_code.png') code = pytesser3.image_file_to_string(acquire_file_dir() + '\pic_log\login_code.png') driver.find_element_by_id('code').send_keys(code) driver.find_element_by_id('c').click() strPath = acquire_file_dir() + '\pic_log\\' \ + str(datetime.datetime.now().date()) + '-' \ + str(datetime.datetime.now().time())[0:2] + '-' \ + str(datetime.datetime.now().time())[3:5] + '-' \ + str(datetime.datetime.now().time())[6:8] \ + '.png' self.driver.get_screenshot_as_file(strPath)
def login(): heads = { 'Accept': 'text / html, application / xhtml + xml, application / xml;q = 0.9, image / webp, * / *;q = 0.8', 'Accept - Encoding': 'gzip, deflate, br', 'Accept - Language': 'zh - CN, zh;q = 0.8', 'Cache - Control': 'max - age = 0', 'Connection': 'keep - alive', 'Content - Length': '75', 'Content - Type': 'application / x - www - form - urlencoded', 'Referer': 'https://www.douban.com', 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:58.0) Gecko/20100101 Firefox/58.0' } # 获取验验证信息 response = requests.get('https://www.douban.com/j/misc/captcha', headers=heads) result = response.json() captchaUrl = result['url'] captchaToken = result['token'] print(captchaToken) # 获取验证码图片 response = requests.get('https:' + captchaUrl, headers=heads) codeimg = response.content fn = open('code.png', 'wb') fn.write(codeimg) fn.close() fn = open('code.png', 'rb') text = pytesser3.image_file_to_string(fn, graceful_errors=True) print(text) fn.close() data = { 'source': 'index_nav', 'redir': 'https: // www.douban.com /', 'form_email': '*****@*****.**', 'form_password': '******', 'captcha - id': captchaToken, 'captcha - solution': input("请输入验证码:") } #登录 url = 'https://www.douban.com/accounts/login' response = requests.post(url, data=data, headers=heads) if ('lovemefan' in response.text): print('登录成功') else: print('登录失败')
def verify_code(image_address): image = Image.open(image_address) # 转为灰度图像 设定二值化阈值 image = image.convert('L') # 对比度增强 sharpness = ImageEnhance.Contrast(image) sharp_img = sharpness.enhance(2.0) sharp_img.save(image_address) result = pytesser3.image_file_to_string(image_address).replace(" ", "") result.replace("/", "1") result.replace("\\N", "W") # result.replace("(", "C") print(result) return result
def getVcode(language): loadVcode(PATH) twrify(PATH, PATH2) s = pytesser3.image_file_to_string(PATH2, language=language) num = 4 result = '' for c in s: if num == 0: break elif c == " ": pass else: result += c num -= 1 return result
import pytesser3 from PIL import Image import os #print os.getcwd() image = Image.open('C:\Users\pc\Desktop\qkyunwei\static\jpeg\code.jpeg') print(pytesser3.image_file_to_string(image, cleanup='cleanup_scratch_flag'))
# coding:utf-8 from urllib import request from pytesser3 import image_file_to_string url = "http://60th.zafu.edu.cn/system/resource/survey/createsurveycheckimg.jsp?random=" for i in range(1, 100): request.urlretrieve(url, r"D:\pycharm-project\exercise\pic\%s.jpg" % i) f = open(r"D:\pycharm-project\exercise\pic\code.txt", "w+") for i in range(1, 100): try: code = image_file_to_string(r"D:\pycharm-project\exercise\pic\%s.jpg" % i) f.write(code) except UnicodeDecodeError: f.write('\n') f.close()
def test_Forgot_password(self): self.driver.find_element_by_link_text("开始").click() # 用name定位用户文本输入框 self.account_field = self.driver.find_element_by_id('sign-up-user-name') # 用name定位密码文本输入框 self.captcha_field = self.driver.find_element_by_id('id_captcha_1') self.account_field.clear() self.captcha_field.clear() self.driver.implicitly_wait(30) # 输入用户名demo self.account_field.send_keys('*****@*****.**') # 输入密码123456 self.captcha_field.send_keys('Qq123456') # 使用js去点击勾选我已阅读并同意相关 js = "$('#privacy').click();" self.driver.execute_script(js) self.driver.implicitly_wait(30) yes = self.driver.find_element_by_id('login-next').is_enabled() print(yes) a = self.driver.find_element_by_id('login-next').click() # 手机或邮箱格式不对 error_info = self.driver.find_element_by_id('sign-up-invalid-user-name').text print(error_info) while error_info == "请输入手机或邮箱": self.account_field.clear() self.account_field.send_keys(input("输入正确的手机或邮箱:")) self.driver.find_element_by_id('login-next').click() error_info = self.driver.find_element_by_id('sign-up-invalid-user-name').text # 验证码不对 error_info = self.driver.find_element_by_id('sign-up-invalid-captcha').text if error_info=="请正确输入图中文字或点击图片换一张": print("验证码不对") # -------------------对验证码进行区域截图,好吧,这方法有点low------------------ self.driver.get_screenshot_as_file('D:\\image1.jpg') # 比较好理解 im = Image.open('D:\\image1.jpg') box = (230, 650, 400, 720) # 设置要裁剪的区域 region = im.crop(box) # 此时,region是一个新的图像对象。 # region.show()#显示的话就会被占用,所以要注释掉 region.save("D:/image_code.jpg") # --------------------图片增强+自动识别简单验证码----------------------------- # def image_file_to_string(file): # cwd = os.getcwd() # try: # os.chdir("D:\MyCode\Pycharm\Selenium_test\venv\Lib") # return pytesser3.image_file_to_string(file) # finally: # os.chdir(cwd) im = Image.open("D:\\image_code.jpg") imgry = im.convert('L') # 图像加强,二值化 sharpness = ImageEnhance.Contrast(imgry) # 对比度增强 sharp_img = sharpness.enhance(2.0) sharp_img.save("D:\\image_code.jpg") #time.sleep(3) # 防止由于网速,可能图片还没保存好,就开始识别 img=Image.open("D:\\image_code.jpg") code = pytesser3.image_file_to_string("D:\\image_code.jpg") # code即为识别出的图片数字str类型 print(code) # 打印code观察是否识别正确 self.driver.implicitly_wait(300)
sys.stdout.write(" 已完成:%.3f%%" % float(i / int(4700 / 20)) + '\r') sys.stdout.flush() df.to_excel('comment_fromdb.xlsx') # ============================================================================= # 糗事百科 # ============================================================================= import requests from bs4 import BeautifulSoup import re import pandas as pd import numpy as np import os import sys import time import pytesser3 from PIL import ImageGrab os.chdir(r'C:/Users/hongzk/Desktop') print(pytesser3.image_file_to_string('ttt.png')) target = 'https://www.qiushibaike.com/text/' r = requests.get(target) html = r.text parseHtml = BeautifulSoup(html) contents = parseHtml.find('div', attrs={ 'id': 'content-left' }).find_all("div", attrs={"class": re.compile("article block untagged mb15.*")}) for content in contents: message = content.find("a").find("div").find("span").text
while b: driver.find_element_by_xpath( "//img[@id='inputImage']/..").click() # 点击刷新验证码 time.sleep(1) driver.save_screenshot('screenshot.png') imgelement = driver.find_element_by_id('inputImage') location = imgelement.location # 获取验证码x,y轴坐标 size = imgelement.size # 获取验证码的长宽 rangle = (int(location['x']), int(location['y']), int(location['x'] + size['width']), int(location['y'] + size['height'])) # 写成我们需要截取的位置坐标 i = Image.open("screenshot.png") # 打开截图 result = i.crop(rangle) # 使用Image的crop函数,从截图中再次截取我们需要的区域 result.save('result.png') yanzhengma = pyt3.image_file_to_string('result.png', 'eng').replace(' ', '')[:4] print(">%s<" % yanzhengma) driver.find_element_by_xpath(my.xpath3).clear() #清空验证码输入栏 driver.find_element_by_xpath(my.xpath3).send_keys(yanzhengma) # 输入验证码 driver.find_element_by_xpath(my.xpath4).click() # 点击登陆 time.sleep(2) # 登陆成功 try: driver.find_element_by_xpath( "//div[contains(text(),'验证码不正确,请重新输入验证码')]") time.sleep(1) driver.find_element_by_xpath( "//button[@class = 'swal2-confirm swal2-styled']").click() except: b = False
mypage = re.findall('"data":(.*?])}', mydata) print(mypage) ''' from PIL import Image, ImageEnhance, ImageFilter, ImageGrab import pytesser3 import sys im = Image.open('C:/Users/Administrator/Desktop/text/captcha (1).png') print(im) print(im.format) print(im.size) print(im.mode) #im.show() print(pytesser3.image_file_to_string('C:/Users/Administrator/Desktop/1.png')) # 二值化 threshold = 140 table = [] for i in range(256): if i != threshold: table.append(0) else: table.append(1) rep = { 'O': '0',