Exemple #1
0
    def __unlock_sogou(self,
                       url,
                       resp,
                       session,
                       unlock_callback=None,
                       identify_image_callback=None):
        if unlock_callback is None:
            unlock_callback = unlock_sogou_callback_example

        millis = int(round(time.time() * 1000))
        r_captcha = session.get(
            'http://weixin.sogou.com/antispider/util/seccode.php?tc={}'.format(
                millis))
        if not r_captcha.ok:
            raise WechatSogouRequestsException('WechatSogouAPI get img', resp)

        r_unlock = unlock_callback(url, session, resp, r_captcha.content,
                                   identify_image_callback)

        if r_unlock['code'] != 0:
            raise WechatSogouVcodeOcrException(
                '[WechatSogouAPI identify image] code: {code}, msg: {msg}'.
                format(code=r_unlock.get('code'), msg=r_unlock.get('msg')))
        else:
            self.__set_cache(session.cookies.get('SUID'), r_unlock['id'])
Exemple #2
0
    def __unlock_wechat(self,
                        url,
                        resp,
                        session,
                        unlock_callback=None,
                        identify_image_callback=None):
        if unlock_callback is None:
            unlock_callback = unlock_weixin_callback_example

        r_captcha = session.get(
            'https://mp.weixin.qq.com/mp/verifycode?cert={}'.format(
                time.time() * 1000))
        if not r_captcha.ok:
            raise WechatSogouRequestsException(
                'WechatSogouAPI unlock_history get img', resp)

        r_unlock = unlock_callback(url, session, resp, r_captcha.content,
                                   identify_image_callback)

        if r_unlock['ret'] != 0:
            raise WechatSogouVcodeOcrException(
                '[WechatSogouAPI identify image] code: {ret}, msg: {errmsg}, cookie_count: {cookie_count}'
                .format(ret=r_unlock.get('ret'),
                        errmsg=r_unlock.get('errmsg'),
                        cookie_count=r_unlock.get('cookie_count')))
Exemple #3
0
    def get_gzh_artilce_by_history(self,
                                   keyword=None,
                                   url=None,
                                   deblocking_callback=None,
                                   identify_image_callback=None):
        if url is None:
            gzh_list = self.search_gzh(
                keyword,
                deblocking_callback=deblocking_callback,
                identify_image_callback=identify_image_callback)
            if gzh_list:
                url = gzh_list[0]['url']
            else:
                raise Exception()  # todo use ws exception

        req = requests.session()

        resp = WechatSogouRequest.get(url,
                                      req=req)  # headers=self.__set_cookie()

        if not resp.ok:
            raise WechatSogouRequestsException(
                'WechatSogouAPI get_gzh_artilce_by_history', resp)

        if '请输入验证码' in resp.text:
            self.__deblocking_history(url, resp, req, deblocking_callback,
                                      identify_image_callback)
            resp = WechatSogouRequest.get(
                url, req=req)  # req=req headers=self.__set_cookie()

        return WechatSogouStructuring.get_gzh_info_and_article_by_history(
            resp.text)
Exemple #4
0
    def get_sugg(self, keyword):
        """获取微信搜狗搜索关键词联想

        Parameters
        ----------
        keyword : str or unicode
            关键词

        Returns
        -------
        list[str]
            联想关键词列表

        Raises
        ------
        WechatSogouRequestsException
        """
        url = 'http://w.sugg.sogou.com/sugg/ajaj_json.jsp?key={}&type=wxpub&pr=web'.format(
            quote(keyword.encode('utf-8')))
        r = requests.get(url)
        if not r.ok:
            raise WechatSogouRequestsException('get_sugg', r)

        sugg = re.findall(u'\["' + keyword + '",(.*?),\["', r.text)[0]
        return json.loads(sugg)
Exemple #5
0
    def __get(self, url, session, headers):
        resp = session.get(url, headers=headers, proxies=self.proxies)

        if not resp.ok:
            raise WechatSogouRequestsException('WechatSogouAPI get error', resp)

        return resp
Exemple #6
0
    def __get(self, url, session, headers):
        resp = session.get(url, headers=headers, **self.requests_kwargs)

        if resp.status_code == 302:
            resp = session.get(resp.headers['Location'],
                               headers=headers,
                               **self.requests_kwargs)
        if not resp.ok:
            raise WechatSogouRequestsException('WechatSogouAPI get error',
                                               resp)

        return resp
Exemple #7
0
    def __get(self, url, session, headers):
        h = {}
        if headers:
            for k, v in headers.items():
                h[k] = v
        if self.headers:
            for k, v in self.headers.items():
                h[k] = v
        resp = session.get(url, headers=h, **self.requests_kwargs)

        if not resp.ok:
            raise WechatSogouRequestsException('WechatSogouAPI get error', resp)

        return resp
Exemple #8
0
    def __deblocking_history(self, url, resp, req, deblocking_callback,
                             identify_image_callback):
        r_img = req.get(
            'https://mp.weixin.qq.com/mp/verifycode?cert={}'.format(
                time.time() * 1000))
        if not r_img.ok:
            raise WechatSogouRequestsException(
                'WechatSogouAPI deblocking_history get img', resp)

        if callable(deblocking_callback):
            r_deblocking = deblocking_callback(req, resp, r_img.content)
        else:
            identify_image_callback = identify_image_callback if callable(
                identify_image_callback) else identify_image_callback_example
            r_deblocking = deblocking_callback_history_example(
                url, req, resp, r_img.content, identify_image_callback)

        if r_deblocking['ret'] != 0:
            raise WechatSogouVcodeOcrException(
                '[WechatSogouAPI identify image] code: {ret}, msg: {errmsg}, cookie_count: {cookie_count}'
                .format(**r_deblocking))
Exemple #9
0
    def __deblocking_search(self, url, resp, req, deblocking_callback,
                            identify_image_callback):
        millis = int(round(time.time() * 1000))
        r_img = req.get(
            'http://weixin.sogou.com/antispider/util/seccode.php?tc={}'.format(
                millis))
        if not r_img.ok:
            raise WechatSogouRequestsException('WechatSogouAPI get img', resp)

        if callable(deblocking_callback):
            r_deblocking = deblocking_callback(req, resp, r_img.content)
        else:
            identify_image_callback = identify_image_callback if callable(
                identify_image_callback) else identify_image_callback_example
            r_deblocking = deblocking_callback_search_example(
                url, req, resp, r_img.content, identify_image_callback)

        if r_deblocking['code'] != 0:
            raise WechatSogouVcodeOcrException(
                '[WechatSogouAPI identify image] code: {code}, msg: {msg}'.
                format(**r_deblocking))
        else:
            self.__set_cache(req.cookies.get('SUID'), r_deblocking['id'])
Exemple #10
0
    def search_article(self,
                       keyword,
                       page=1,
                       timesn=0,
                       article_type=WechatSogouRequest.TYPE_ALL,
                       ft=None,
                       et=None,
                       deblocking_callback=None,
                       identify_image_callback=None):
        """搜索 文章

        对于出现验证码的情况,可以由使用者自己提供:
            1、函数 deblocking_callback ,这个函数 handle 出现验证码到解决的整个流程
            2、也可以 只提供函数 identify_image_callback,这个函数输入验证码二进制数据,输出验证码文字,剩下的由 wechatsogou 包来解决
        注意:
            函数 deblocking_callback 和 identify_image_callback 只需要提供一个,如果都提供了,那么 identify_image_callback 不起作用

        Parameters
        ----------
        keyword : str or unicode
            搜索文字
        page : int, optional
            页数 the default is 1
        timesn : {0, 1, 2, 3, 4, 5}
            时间 0 没有限制 / 1一天 / 2一周 / 3一月 / 4一年 / 5自定
            the default is 0
        article_type : {'image', 'video', 'rich', 'all'}
            含有内容的类型 TYPE_IMAGE 有图 / TYPE_VIDEO 有视频 / TYPE_RICH 有图和视频 / TYPE_ALL 啥都有
        ft, et : datetime.date or None
            当 tsn 是 5 时,ft 代表开始时间,如: 2017-07-01
            当 tsn 是 5 时,et 代表结束时间,如: 2017-07-15
        deblocking_callback : callable
            处理出现验证码页面的函数,参见 deblocking_callback_example
        identify_image_callback : callable
            处理验证码函数,输入验证码二进制数据,输出文字,参见 identify_image_callback_example

        Returns
        -------
        list[dict]
            {
                'url': '',
                'img': '',
                'name': '',
                'wechat_id': '',
                'post_perm': '',
                'qrcode': '',
                'introduction': '',
                'authentication': ''
            }

        Raises
        ------
        WechatSogouRequestsException
            requests error
        """
        req = requests.session()

        url = WechatSogouRequest.gen_search_article_url(
            keyword,
            page,
            timesn=timesn,
            article_type=article_type,
            ft=ft,
            et=et)
        url_referer = WechatSogouRequest.gen_search_article_url(keyword)

        resp = WechatSogouRequest.get(
            url, req=req, headers=self.__set_cookie(referer=url_referer))

        if not resp.ok:
            raise WechatSogouRequestsException('WechatSogouAPI search_article',
                                               resp)

        if 'antispider' in resp.url:
            self.__deblocking_search(url, resp, req, deblocking_callback,
                                     identify_image_callback)
            resp = WechatSogouRequest.get(
                url, req=req,
                headers=self.__set_cookie(referer=url_referer))  # req=req

        return WechatSogouStructuring.get_article_by_search(resp.text)
Exemple #11
0
    def search_gzh(self,
                   keyword,
                   page=1,
                   deblocking_callback=None,
                   identify_image_callback=None):
        """搜索 公众号

        对于出现验证码的情况,可以由使用者自己提供:
            1、函数 deblocking_callback ,这个函数 handle 出现验证码到解决的整个流程
            2、也可以 只提供函数 identify_image_callback,这个函数输入验证码二进制数据,输出验证码文字,剩下的由 wechatsogou 包来解决
        注意:
            函数 deblocking_callback 和 identify_image_callback 只需要提供一个,如果都提供了,那么 identify_image_callback 不起作用

        Parameters
        ----------
        keyword : str or unicode
            搜索文字
        page : int, optional
            页数 the default is 1
        deblocking_callback : callable
            处理出现验证码页面的函数,参见 deblocking_callback_example
        identify_image_callback : callable
            处理验证码函数,输入验证码二进制数据,输出文字,参见 identify_image_callback_example

        Returns
        -------
        list[dict]
            {
                'url': '',
                'img': '',
                'name': '',
                'wechat_id': '',
                'post_perm': '',
                'qrcode': '',
                'introduction': '',
                'authentication': ''
            }

        Raises
        ------
        WechatSogouRequestsException
            requests error
        """
        req = requests.session()

        url = WechatSogouRequest.gen_search_gzh_url(keyword, page)
        resp = WechatSogouRequest.get(url,
                                      req=req,
                                      headers=self.__set_cookie())

        if not resp.ok:
            raise WechatSogouRequestsException('WechatSogouAPI search_gzh',
                                               resp)

        if 'antispider' in resp.url:
            self.__deblocking_search(url, resp, req, deblocking_callback,
                                     identify_image_callback)
            resp = WechatSogouRequest.get(
                url, req=req, headers=self.__set_cookie())  # req=req

        return WechatSogouStructuring.get_gzh_by_search(resp.text)