def crack_sougou(self, url): log.info('------开始处理未成功的URL:{}'.format(url)) if re.search('weixin\.sogou\.com', url): log.info('------开始处理搜狗验证码------') self.driver.get(url) time.sleep(2) if '搜公众号' in self.driver.page_source: log.info('浏览器页面正常' + '直接返回') log.info('title {}'.format(self.driver.title)) return '正确' try: img = self.wait.until( EC.presence_of_element_located((By.ID, 'seccodeImage'))) log.info('------出现验证码页面------') location = img.location size = img.size left = location['x'] top = location['y'] right = location['x'] + size['width'] bottom = location['y'] + size['height'] screenshot = self.driver.get_screenshot_as_png() screenshot = Image.open(BytesIO(screenshot)) captcha = screenshot.crop((left, top, right, bottom)) captcha_path = get_captcha_path() captcha.save(captcha_path) captcha_name = os.path.basename(captcha_path) try: # raise RuntimeError captch_input = '' files = { 'img': (captcha_name, open(captcha_path, 'rb'), 'image/png', {}) } res = requests.post(url=GETCAPTCHA_URL, files=files, timeout=self.timeout) res = res.json() if res.get('Success'): captch_input = res.get('Captcha') except Exception as e: log.info('本地识别搜狗验证码获取异常,使用打码平台:{}'.format(e)) with open(captcha_path, "rb") as f: filebytes = f.read() captch_input = captch_upload_image(filebytes) # log.info('------验证码:{}------'.format(captch_input)) log.info('------验证码:{}------'.format(captch_input)) if captch_input: input_text = self.wait.until( EC.presence_of_element_located( (By.ID, 'seccodeInput'))) input_text.clear() input_text.send_keys(captch_input) time.sleep(1) # from selenium.webdriver.common.keys import Keys # self.driver.find_element_by_id("submit").send_keys(Keys.ENTER) # log.info(driver.find_element_by_id("submit")) # # log.info('已经点击元素') submit = self.wait.until( EC.element_to_be_clickable((By.ID, 'submit'))) # time.sleep(1) # self.driver.save_screenshot("click_after.png") submit.click() time.sleep(1) # self.driver.save_screenshot("click_before.png") # try: if '搜公众号' not in self.driver.page_source: # log.info('当前页面{}'.format(self.driver.page_source)) log.info('搜公众号 不在页面中验证失败') log.info('title{}'.format(self.driver.title)) return log.info('------验证码正确------') return '正确' # except Exception as e: # log.info('--22222222----验证码输入错误------ {}'.format(e)) except Exception as e: log.info('------未跳转到验证码页面,跳转到首页,忽略------ {}'.format(e)) elif re.search('mp\.weixin\.qq\.com', url): log.info('------开始处理微信验证码------') cert = random.random() image_url = 'https://mp.weixin.qq.com/mp/verifycode?cert={}'.format( cert) respones = self.s.get(image_url, cookies=self.cookies) captch_input = captch_upload_image(respones.content) log.info('------验证码:{}------'.format(captch_input)) data = {'cert': cert, 'input': captch_input} if self.proxies: r = self.s.post(image_url, cookies=self.cookies, data=data, proxies=self.proxies) ret = r.json().get('ret') log.info(ret) if ret == 0: log.info('------验证码正确----ret--{}'.format(ret)) log.info('------cookies已更新------{}'.format(r.status_code)) time.sleep(0.5) else: r = self.s.post(image_url, cookies=self.cookies, data=data) ret = r.json().get('ret') log.info(ret) if ret == 0: log.info('------验证码正确----ret--{}'.format(ret)) log.info('------cookies已更新------{}'.format(r.status_code))
def crack_sougou(self, url): log('------开始处理未成功的URL:{}'.format(url)) if re.search('weixin\.sogou\.com', url): log('------开始处理搜狗验证码------') self.browser.get(url) time.sleep(2) if '搜公众号' in self.browser.page_source: for i in range(30): self.browser.get(url) log('浏览器页面正常') if '搜公众号' not in self.browser.page_source: break try: img = self.wait.until( EC.presence_of_element_located((By.ID, 'seccodeImage'))) log('------出现验证码页面------') location = img.location size = img.size left = location['x'] top = location['y'] right = location['x'] + size['width'] bottom = location['y'] + size['height'] screenshot = self.browser.get_screenshot_as_png() screenshot = Image.open(BytesIO(screenshot)) captcha = screenshot.crop((left, top, right, bottom)) captcha_path = os.path.join(IMAGE_DIR, CAPTCHA_NAME) captcha.save(captcha_path) captch_input = '' # log('start') files = { 'img': (CAPTCHA_NAME, open(captcha_path, 'rb'), 'image/png', {}) } res = requests.post(url=GetCaptcha_url, files=files) # log('end') res = res.json() if res.get('Success'): captch_input = res.get('Captcha') log('------验证码:{}------'.format(captch_input)) if captch_input: input_text = self.wait.until( EC.presence_of_element_located( (By.ID, 'seccodeInput'))) input_text.clear() input_text.send_keys(captch_input) submit = self.wait.until( EC.element_to_be_clickable((By.ID, 'submit'))) submit.click() time.sleep(2) try: if '搜公众号' not in self.browser.page_source: log('验证失败') return log('------验证码正确------') except Exception as e: log('--22222222----验证码输入错误------', e) except Exception as e: log('------未跳转到验证码页面,跳转到首页,忽略------', e) raise RuntimeError elif re.search('mp\.weixin\.qq\.com', url): log('------开始处理微信验证码------') cert = random.random() image_url = 'https://mp.weixin.qq.com/mp/verifycode?cert={}'.format( cert) respones = self.s.get(image_url, cookies=self.cookies) captch_input = captch_upload_image(respones.content) log('------验证码:{}------'.format(captch_input)) data = {'cert': cert, 'input': captch_input} self.s.post(image_url, cookies=self.cookies, data=data) log('------cookies已更新------')