Exemple #1
0
 def __unlock_sogou(self,
                    url,
                    resp,
                    session,
                    unlock_callback=None,
                    identify_image_callback=None):
     if unlock_callback is None:
         unlock_callback = unlock_sogou_callback_example
     millis = int(round(time.time() * 1000))
     r_captcha = session.get(
         'http://weixin.sogou.com/antispider/util/seccode.php?tc={}'.format(
             millis),
         headers={
             'Referer': url,
         })
     if not r_captcha.ok:
         raise WechatSogouRequestsException('WechatSogouAPI get img',
                                            r_captcha)
     r_unlock = unlock_callback(url, session, resp, r_captcha.content,
                                identify_image_callback)
     if r_unlock['code'] != 0:
         raise WechatSogouVcodeOcrException(
             '[WechatSogouAPI identify image] code: {code}, msg: {msg}'.
             format(code=r_unlock.get('code'), msg=r_unlock.get('msg')))
     else:
         self.__set_cache(session.cookies.get('SUID'), r_unlock['id'])
Exemple #2
0
    def __unlock_sogou(self,
                       url,
                       resp,
                       session,
                       unlock_callback=None,
                       identify_image_callback=None):
        if unlock_callback is None:
            unlock_callback = unlock_sogou_callback_example
        millis = int(round(time.time() * 1000))
        #这个r_captcha.content就是img图片
        r_captcha = session.get(
            'http://weixin.sogou.com/antispider/util/seccode.php?tc={}'.format(
                millis))
        if not r_captcha.ok:
            raise WechatSogouRequestsException('WechatSogouAPI get img', resp)
        #unlock_callback为unlock_sogou_callback_example,r_unlock返回的为手动输入的验证码,identify_image_callback为identify_image_callback_by_hand
        r_unlock = unlock_callback(url, session, resp, r_captcha.content,
                                   identify_image_callback)

        if r_unlock['code'] != 0:
            raise WechatSogouVcodeOcrException(
                '[WechatSogouAPI identify image] code: {code}, msg: {msg}'.
                format(code=r_unlock.get('code'), msg=r_unlock.get('msg')))
        else:
            print('suid', session.cookies.get('SUID'))
            print('SNUID', r_unlock['id'])
            self.__set_cache(session.cookies.get('SUID'), r_unlock['id'])
Exemple #3
0
    def get_by_unlock(self,
                      url,
                      referer=None,
                      unlock_platform=None,
                      unlock_callback=None,
                      identify_image_callback=None):
        #保证uvlock_platform可用,这里调用的__unlock_sougou
        assert unlock_platform is None or callable(unlock_platform)
        #验证码识别用identify_image_callback_by_hand即手动识别
        if identify_image_callback is None:
            identify_image_callback = identify_image_callback_by_hand
        #断言验证码识别可用
        assert callable(identify_image_callback)
        #断言unlock_callback可以调用或者为None,这里为None
        assert unlock_callback is None or callable(unlock_callback)
        #建立一个session的请求
        session = requests.session()
        #对搜索页面的网页进行请求,但是要设置headers,这里的refer还是为None
        resp = self.__get(url=url,
                          session=session,
                          headers=self.__set_cookie(referer=referer))
        #如果弹出验证码那个url中就带有antispider就识别验证码,在这里才需要unlock_callback和identify_image_callback
        if 'antispider' in resp.url or '请输入验证码' in resp.text:
            #for i in range(self.captcha_break_times):
            print('要输入验证码')
            for i in range(2):
                try:
                    #这个地方进入了打码平台,会进行验证码验证__unlock_sougou,unlock_callback还是None,idenfify_image_callback是identify_image_callback_by_hand
                    #resp为进入了验证码那个界面
                    unlock_platform(url, resp, session, unlock_callback,
                                    identify_image_callback)
                    break
                except WechatSogouVcodeOcrException as e:
                    if i == 1:
                        raise WechatSogouVcodeOcrException(e)

            if '请输入验证码' in resp.text:
                resp = session.get(url)
            else:
                resp = self.__get(url,
                                  session,
                                  headers=self.__set_cookie(referer=referer))
        #没有验证码就直接返回得到的网页了
        return resp
def unlock_weixin_callback_example(url, req, resp, img,
                                   identify_image_callback):
    """手动打码解锁

    Parameters
    ----------
    url : str or unicode
        验证码页面 之前的 url
    req : requests.sessions.Session
        requests.Session() 供调用解锁
    resp : requests.models.Response
        requests 访问页面返回的,已经跳转了
    img : bytes
        验证码图片二进制数据
    identify_image_callback : callable
        处理验证码函数,输入验证码二进制数据,输出文字,参见 identify_image_callback_example

    Returns
    -------
    dict
        {
            'ret': '',
            'errmsg': '',
            'cookie_count': '',
        }
    """
    # no use resp

    unlock_url = 'https://mp.weixin.qq.com/mp/verifycode'
    data = {'cert': time.time() * 1000, 'input': identify_image_callback(img)}
    headers = {
        'Host': 'mp.weixin.qq.com',
        'Content-Type': 'application/x-www-form-urlencoded; charset=UTF-8',
        'Referer': url
    }
    r_unlock = req.post(unlock_url, data, headers=headers)
    if not r_unlock.ok:
        raise WechatSogouVcodeOcrException('unlock[{}] failed: {}[{}]'.format(
            unlock_url, r_unlock.text, r_unlock.status_code))

    return r_unlock.json()
Exemple #5
0
def unlock_sogou_callback_example(url, req, resp, img,
                                  identify_image_callback):
    """手动打码解锁

    Parameters
    ----------
    url : str or unicode
        验证码页面 之前的 url
    req : requests.sessions.Session
        requests.Session() 供调用解锁
    resp : requests.models.Response
        requests 访问页面返回的,已经跳转了
    img : bytes
        验证码图片二进制数据
    identify_image_callback : callable
        处理验证码函数,输入验证码二进制数据,输出文字,参见 identify_image_callback_example

    Returns
    -------
    dict
        {
            'code': '',
            'msg': '',
        }
    """
    # no use resp
    url_quote = url.split('weixin.sogou.com/')[-1]
    unlock_url = 'http://weixin.sogou.com/antispider/thank.php'
    data = {'c': identify_image_callback(img), 'r': '%2F' + url_quote, 'v': 5}
    headers = {
        'Content-Type': 'application/x-www-form-urlencoded; charset=UTF-8',
        'Referer': 'http://weixin.sogou.com/antispider/?from=%2f' + url_quote
    }
    print(data)
    r_unlock = req.post(unlock_url, data, headers=headers)
    if not r_unlock.ok:
        raise WechatSogouVcodeOcrException('unlock[{}] failed: {}'.format(
            unlock_url, r_unlock.text, r_unlock.status_code))
    print(r_unlock.json())
    return r_unlock.json()
Exemple #6
0
    def __get_by_unlock(self,
                        url,
                        referer=None,
                        unlock_platform=None,
                        unlock_callback=None,
                        identify_image_callback=None,
                        session=None):
        assert unlock_platform is None or callable(unlock_platform)
        if identify_image_callback is None:
            identify_image_callback = identify_image_callback_by_hand
        assert unlock_callback is None or callable(unlock_callback)
        assert callable(identify_image_callback)

        if not session:
            session = requests.session()
        resp = self.__get(url,
                          session,
                          headers=self.__set_cookie(referer=referer))
        resp.encoding = 'utf-8'
        if 'antispider' in resp.url or '请输入验证码' in resp.text:
            for i in range(self.captcha_break_times):
                try:
                    unlock_platform(
                        url=url,
                        resp=resp,
                        session=session,
                        unlock_callback=unlock_callback,
                        identify_image_callback=identify_image_callback)
                    break
                except WechatSogouVcodeOcrException as e:
                    if i == self.captcha_break_times - 1:
                        raise WechatSogouVcodeOcrException(e)

            if '请输入验证码' in resp.text:
                headers = self.__set_cookie(referer=referer)
                headers['User-Agent'] = 'Mozilla/5.0 (Windows NT 6.1; WOW64)'
                resp = self.__get(url, session, headers)
                resp.encoding = 'utf-8'
        return resp