コード例 #1
0
    def crack_sougou(self, url):
        log.info('------开始处理未成功的URL:{}'.format(url))
        if re.search('weixin\.sogou\.com', url):
            log.info('------开始处理搜狗验证码------')
            self.driver.get(url)
            time.sleep(2)
            if '搜公众号' in self.driver.page_source:
                log.info('浏览器页面正常' + '直接返回')
                return
            try:
                img = self.wait.until(
                    EC.presence_of_element_located((By.ID, 'seccodeImage')))
                log.info('------出现验证码页面------')
                location = img.location
                size = img.size
                left = location['x']
                top = location['y']
                right = location['x'] + size['width']
                bottom = location['y'] + size['height']
                screenshot = self.driver.get_screenshot_as_png()
                screenshot = Image.open(BytesIO(screenshot))
                captcha = screenshot.crop((left, top, right, bottom))
                captcha_path = get_captcha_path()
                captcha.save(captcha_path)
                captcha_name = os.path.basename(captcha_path)
                try:
                    captch_input = ''
                    files = {
                        'img': (captcha_name, open(captcha_path,
                                                   'rb'), 'image/png', {})
                    }
                    res = requests.post(url=GETCAPTCHA_URL, files=files)
                    res = res.json()
                    if res.get('Success'):
                        captch_input = res.get('Captcha')
                except Exception as e:
                    log.info('搜狗验证码获取失败'.format(e))
                    with open(captcha_path, "rb") as f:
                        filebytes = f.read()
                    captch_input = captch_upload_image(filebytes)
                    # log.info('------验证码:{}------'.format(captch_input))
                log.info('------验证码:{}------'.format(captch_input))
                if captch_input:
                    input_text = self.wait.until(
                        EC.presence_of_element_located(
                            (By.ID, 'seccodeInput')))
                    input_text.clear()
                    input_text.send_keys(captch_input)
                    submit = self.wait.until(
                        EC.element_to_be_clickable((By.ID, 'submit')))
                    submit.click()
                    time.sleep(2)
                    try:
                        if '搜公众号' not in self.driver.page_source:
                            log.info('验证失败')
                            return
                        log.info('------验证码正确------')
                    except Exception as e:
                        log.info('--22222222----验证码输入错误------ {}'.format(e))
            except Exception as e:
                log.info('------未跳转到验证码页面,跳转到首页,忽略------ {}'.format(e))

        elif re.search('mp\.weixin\.qq\.com', url):
            log.info('------开始处理微信验证码------')
            cert = random.random()
            image_url = 'https://mp.weixin.qq.com/mp/verifycode?cert={}'.format(
                cert)
            respones = self.s.get(image_url, cookies=self.cookies)
            captch_input = captch_upload_image(respones.content)
            log.info('------验证码:{}------'.format(captch_input))
            data = {'cert': cert, 'input': captch_input}
            r = self.s.post(image_url, cookies=self.cookies, data=data)
            log.info('------cookies已更新------{}'.format(r.status_code))
コード例 #2
0
ファイル: daily_collect.py プロジェクト: whoiskx/com_code
    def crack_sougou(self, url):
        log.info('------开始处理未成功的URL:{}'.format(url))
        if re.search('weixin\.sogou\.com', url):
            log.info('------开始处理搜狗验证码------')
            self.driver.get(url)
            time.sleep(2)
            if '搜公众号' in self.driver.page_source:
                log.info('浏览器页面正常' + '直接返回')
                log.info('title {}'.format(self.driver.title))
                return '正确'
            try:
                img = self.wait.until(
                    EC.presence_of_element_located((By.ID, 'seccodeImage')))
                log.info('------出现验证码页面------')
                location = img.location
                size = img.size
                left = location['x']
                top = location['y']
                right = location['x'] + size['width']
                bottom = location['y'] + size['height']
                screenshot = self.driver.get_screenshot_as_png()
                screenshot = Image.open(BytesIO(screenshot))
                captcha = screenshot.crop((left, top, right, bottom))
                captcha_path = get_captcha_path()
                captcha.save(captcha_path)
                captcha_name = os.path.basename(captcha_path)
                try:
                    # raise RuntimeError
                    captch_input = ''
                    files = {
                        'img': (captcha_name, open(captcha_path,
                                                   'rb'), 'image/png', {})
                    }
                    res = requests.post(url=GETCAPTCHA_URL,
                                        files=files,
                                        timeout=self.timeout)
                    res = res.json()
                    if res.get('Success'):
                        captch_input = res.get('Captcha')
                except Exception as e:
                    log.info('本地识别搜狗验证码获取异常,使用打码平台:{}'.format(e))
                    with open(captcha_path, "rb") as f:
                        filebytes = f.read()
                    captch_input = captch_upload_image(filebytes)
                    # log.info('------验证码:{}------'.format(captch_input))
                log.info('------验证码:{}------'.format(captch_input))
                if captch_input:
                    input_text = self.wait.until(
                        EC.presence_of_element_located(
                            (By.ID, 'seccodeInput')))
                    input_text.clear()
                    input_text.send_keys(captch_input)
                    time.sleep(1)
                    # from selenium.webdriver.common.keys import Keys
                    # self.driver.find_element_by_id("submit").send_keys(Keys.ENTER)
                    # log.info(driver.find_element_by_id("submit"))
                    #
                    # log.info('已经点击元素')
                    submit = self.wait.until(
                        EC.element_to_be_clickable((By.ID, 'submit')))
                    # time.sleep(1)
                    # self.driver.save_screenshot("click_after.png")
                    submit.click()
                    time.sleep(1)
                    # self.driver.save_screenshot("click_before.png")
                    # try:
                    if '搜公众号' not in self.driver.page_source:
                        # log.info('当前页面{}'.format(self.driver.page_source))
                        log.info('搜公众号 不在页面中验证失败')
                        log.info('title{}'.format(self.driver.title))
                        return
                    log.info('------验证码正确------')
                    return '正确'
                    # except Exception as e:
                    #     log.info('--22222222----验证码输入错误------ {}'.format(e))
            except Exception as e:
                log.info('------未跳转到验证码页面,跳转到首页,忽略------ {}'.format(e))

        elif re.search('mp\.weixin\.qq\.com', url):
            log.info('------开始处理微信验证码------')
            cert = random.random()
            image_url = 'https://mp.weixin.qq.com/mp/verifycode?cert={}'.format(
                cert)
            respones = self.s.get(image_url, cookies=self.cookies)
            captch_input = captch_upload_image(respones.content)
            log.info('------验证码:{}------'.format(captch_input))
            data = {'cert': cert, 'input': captch_input}
            if self.proxies:
                r = self.s.post(image_url,
                                cookies=self.cookies,
                                data=data,
                                proxies=self.proxies)
                ret = r.json().get('ret')
                log.info(ret)
                if ret == 0:
                    log.info('------验证码正确----ret--{}'.format(ret))
                log.info('------cookies已更新------{}'.format(r.status_code))
                time.sleep(0.5)
            else:
                r = self.s.post(image_url, cookies=self.cookies, data=data)
                ret = r.json().get('ret')
                log.info(ret)
                if ret == 0:
                    log.info('------验证码正确----ret--{}'.format(ret))
                log.info('------cookies已更新------{}'.format(r.status_code))