def start_requests(self): """ 入口准备 :return: """ boot_url = 'http://weixin.sogou.com/weixin' task_id = pop_task(self.name) if not task_id: print('%s task is empty' % self.name) return task_item = get_item(FetchTask, task_id) cookies_id, cookies = get_cookies(self.name) url_params = { 'type': 1, # 'query': task_item.follow_id, 'query': task_item.follow_name.encode('utf-8'), } url_profile = get_update_url(boot_url, url_params) meta = { 'cookiejar': cookies_id, 'task_id': task_item.id, 'platform_id': task_item.platform_id, 'channel_id': task_item.channel_id, 'follow_id': task_item.follow_id, 'follow_name': task_item.follow_name, } yield scrapy.Request(url=url_profile, cookies=cookies, callback=self.parse_account_search_list, meta=meta)
def start_requests(self): """ 入口准备 :return: """ url_params = { 'version_code': '6.4.2', 'version_name': '', 'device_platform': 'iphone', 'tt_from': 'weixin', 'utm_source': 'weixin', 'utm_medium': 'toutiao_ios', 'utm_campaign': 'client_share', 'wxshare_count': '1', } task_id = pop_task(self.name) if not task_id: print('%s task is empty' % self.name) return print('%s task id: %s' % (self.name, task_id)) task_item = get_item(FetchTask, task_id) fetch_url = 'http://m.toutiao.com/profile/%s/' % task_item.follow_id url_profile = get_update_url(fetch_url, url_params) meta = { 'task_id': task_item.id, 'platform_id': task_item.platform_id, 'channel_id': task_item.channel_id, 'follow_id': task_item.follow_id, 'follow_name': task_item.follow_name, } yield scrapy.Request(url=url_profile, callback=self.get_profile, meta=meta)
def get_article_task(self): """ 文章抓取入口 :return: """ task_id = pop_task(self.name) if not task_id: print('%s task is empty' % self.name) return print('%s task id: %s' % (self.name, task_id)) task_item = get_item(FetchTask, task_id) article_id = task_item.follow_id article_list_url = 'https://weibo.com/p/%s/wenzhang' % article_id meta = { 'task_id': task_item.id, 'platform_id': task_item.platform_id, 'channel_id': task_item.channel_id, 'follow_id': task_item.follow_id, 'follow_name': task_item.follow_name, } yield scrapy.Request(url=article_list_url, callback=self.parse_article_list, meta=meta)