def test_gen_hot_url(self): for hot_index in filter(lambda x: not x.startswith('__'), dir(WechatSogouConst.hot_index)): url = WechatSogouRequest.gen_hot_url(hot_index) assert_in('http://weixin.sogou.com/wapindex/wap/0612/wap_', url) assert_in('0.html', url) with assert_raises(AssertionError): WechatSogouRequest.gen_hot_url(hot_index, 0) for page in range(1, 5): url = WechatSogouRequest.gen_hot_url(hot_index, page) assert_in('http://weixin.sogou.com/wapindex/wap/0612/wap_', url) assert_in('{}.html'.format(page - 1), url)
def test_gen_hot_url(self): for hot_index in filter(lambda x: not x.startswith('__'), dir(WechatSogouConst.hot_index)): url = WechatSogouRequest.gen_hot_url(hot_index) assert_in('http://weixin.sogou.com/wapindex/wap/0612/wap_', url) assert_in('0.html', url) with assert_raises(AssertionError): WechatSogouRequest.gen_hot_url(hot_index, 0) for page in range(1, 5): url = WechatSogouRequest.gen_hot_url(hot_index, page) assert_in('http://weixin.sogou.com/wapindex/wap/0612/wap_', url) assert_in('{}.html'.format(page - 1), url)
def get_gzh_artilce_by_hot(self, hot_index, page=1, unlock_callback=None, identify_image_callback=None): """获取 首页热门文章 Parameters ---------- hot_index : WechatSogouConst.hot_index 首页热门文章的分类(常量):WechatSogouConst.hot_index.xxx page : int 页数 Returns ------- list[dict] { 'gzh': { 'headimage': str, # 公众号头像 'wechat_name': str, # 公众号名称 }, 'article': { 'url': str, # 文章临时链接 'title': str, # 文章标题 'abstract': str, # 文章摘要 'time': int, # 推送时间,10位时间戳 'open_id': str, # open id 'main_img': str # 封面图片 } } """ assert hasattr(WechatSogouConst.hot_index, hot_index) assert isinstance(page, int) and page > 0 url = WechatSogouRequest.gen_hot_url(hot_index, page) resp = self.__get_by_unlock( url, is_need_unlock=lambda x: 'antispider' in x.url, unlock_platform=self.__unlock_sogou, unlock_callback=unlock_callback, identify_image_callback=identify_image_callback) resp.encoding = 'utf-8' return WechatSogouStructuring.get_gzh_artilce_by_hot(resp.text)
def get_gzh_artilce_by_hot(self, hot_index, page=1, unlock_callback=None, identify_image_callback=None): """获取 首页热门文章 Parameters ---------- hot_index : WechatSogouConst.hot_index 首页热门文章的分类(常量):WechatSogouConst.hot_index.xxx page : int 页数 Returns ------- list[dict] { 'gzh': { 'headimage': str, # 公众号头像 'wechat_name': str, # 公众号名称 }, 'article': { 'url': str, # 文章临时链接 'title': str, # 文章标题 'abstract': str, # 文章摘要 'time': int, # 推送时间,10位时间戳 'open_id': str, # open id 'main_img': str # 封面图片 } } """ assert hasattr(WechatSogouConst.hot_index, hot_index) assert isinstance(page, int) and page > 0 url = WechatSogouRequest.gen_hot_url(hot_index, page) resp = self.__get_by_unlock(url, is_need_unlock=lambda x: 'antispider' in x.url, unlock_platform=self.__unlock_sogou, unlock_callback=unlock_callback, identify_image_callback=identify_image_callback) resp.encoding = 'utf-8' return WechatSogouStructuring.get_gzh_artilce_by_hot(resp.text)