def test_gen_hot_url(self):
        for hot_index in filter(lambda x: not x.startswith('__'), dir(WechatSogouConst.hot_index)):
            url = WechatSogouRequest.gen_hot_url(hot_index)
            assert_in('http://weixin.sogou.com/wapindex/wap/0612/wap_', url)
            assert_in('0.html', url)

            with assert_raises(AssertionError):
                WechatSogouRequest.gen_hot_url(hot_index, 0)

            for page in range(1, 5):
                url = WechatSogouRequest.gen_hot_url(hot_index, page)
                assert_in('http://weixin.sogou.com/wapindex/wap/0612/wap_', url)
                assert_in('{}.html'.format(page - 1), url)
    def test_gen_hot_url(self):
        for hot_index in filter(lambda x: not x.startswith('__'),
                                dir(WechatSogouConst.hot_index)):
            url = WechatSogouRequest.gen_hot_url(hot_index)
            assert_in('http://weixin.sogou.com/wapindex/wap/0612/wap_', url)
            assert_in('0.html', url)

            with assert_raises(AssertionError):
                WechatSogouRequest.gen_hot_url(hot_index, 0)

            for page in range(1, 5):
                url = WechatSogouRequest.gen_hot_url(hot_index, page)
                assert_in('http://weixin.sogou.com/wapindex/wap/0612/wap_',
                          url)
                assert_in('{}.html'.format(page - 1), url)
Example #3
0
    def get_gzh_artilce_by_hot(self,
                               hot_index,
                               page=1,
                               unlock_callback=None,
                               identify_image_callback=None):
        """获取 首页热门文章

        Parameters
        ----------
        hot_index : WechatSogouConst.hot_index
            首页热门文章的分类(常量):WechatSogouConst.hot_index.xxx
        page : int
            页数

        Returns
        -------
        list[dict]
            {
                'gzh': {
                    'headimage': str,  # 公众号头像
                    'wechat_name': str,  # 公众号名称
                },
                'article': {
                    'url': str,  # 文章临时链接
                    'title': str,  # 文章标题
                    'abstract': str,  # 文章摘要
                    'time': int,  # 推送时间,10位时间戳
                    'open_id': str,  # open id
                    'main_img': str  # 封面图片
                }
            }
        """

        assert hasattr(WechatSogouConst.hot_index, hot_index)
        assert isinstance(page, int) and page > 0

        url = WechatSogouRequest.gen_hot_url(hot_index, page)
        resp = self.__get_by_unlock(
            url,
            is_need_unlock=lambda x: 'antispider' in x.url,
            unlock_platform=self.__unlock_sogou,
            unlock_callback=unlock_callback,
            identify_image_callback=identify_image_callback)

        resp.encoding = 'utf-8'
        return WechatSogouStructuring.get_gzh_artilce_by_hot(resp.text)
Example #4
0
    def get_gzh_artilce_by_hot(self, hot_index, page=1, unlock_callback=None, identify_image_callback=None):
        """获取 首页热门文章

        Parameters
        ----------
        hot_index : WechatSogouConst.hot_index
            首页热门文章的分类(常量):WechatSogouConst.hot_index.xxx
        page : int
            页数

        Returns
        -------
        list[dict]
            {
                'gzh': {
                    'headimage': str,  # 公众号头像
                    'wechat_name': str,  # 公众号名称
                },
                'article': {
                    'url': str,  # 文章临时链接
                    'title': str,  # 文章标题
                    'abstract': str,  # 文章摘要
                    'time': int,  # 推送时间,10位时间戳
                    'open_id': str,  # open id
                    'main_img': str  # 封面图片
                }
            }
        """

        assert hasattr(WechatSogouConst.hot_index, hot_index)
        assert isinstance(page, int) and page > 0

        url = WechatSogouRequest.gen_hot_url(hot_index, page)
        resp = self.__get_by_unlock(url,
                                    is_need_unlock=lambda x: 'antispider' in x.url,
                                    unlock_platform=self.__unlock_sogou,
                                    unlock_callback=unlock_callback,
                                    identify_image_callback=identify_image_callback)

        resp.encoding = 'utf-8'
        return WechatSogouStructuring.get_gzh_artilce_by_hot(resp.text)