def parse_twitter_time_line(self, response): account_id = response.request.meta['social_id'] last_content_id = response.request.meta['last_content_id'] account = response.request.meta['screen_name'] # self.logger.info('twitter: {}, last_id: {}, count: {}'.format(account, last_content_id, len(response.tweets))) for tweet in response.tweets: item = self.format_request(to_item(tweet)) if item['retweet_content'] is not None: item['retweet_content'] = json.dumps(item['retweet_content']) item['social_account_id'] = account_id # self.logger.info('post to prod %s', json.dumps({k: str(item[k]) for k in item})) r = requests.post(url=self.commit_url, data={k: str(item[k]) for k in item}, timeout=5) if r.status_code == 200: # todo 判断code是否成功,否则捕获api错误 result = r.json() if int(result['code']) != 0: api_error({ 'url': response.request.url, 'response': json.dumps(r.json()), 'vars': json.dumps({k: str(item[k]) for k in item}) }) else: # todo 捕获异常 self.logger.error('{} - {}'.format(r.status_code, r.content))
def parse(self, response): data = json.loads(response.body) if data['code'] != 0: api_error({'url': response.request.url, 'response': response.body}) self.logger.error('{} error {}'.format(response.request.url, response.body)) return for item in data['data']['list']: if self.debug_screen is not None: if self.debug_screen != item['account']: continue self.logger.info('debug screen {} {}'.format(self.debug_screen, item)) kwargs = { 'screen_name': item['account'], 'count': self.count, 'callback': self.parse_twitter_time_line, 'errback': self.parse_twitter_error, 'meta': { 'social_id': item['id'], 'last_content_id': item['last_social_content_id'], 'screen_name': item['account'], 'need_review': item['need_review'] } } if str(item['last_social_content_id']) != '0': kwargs['since_id'] = item['last_social_content_id'] yield TwitterUserTimelineRequest(**kwargs) next_page_generator = self.yield_next_page_request(response, data) if next_page_generator is not None: yield next_page_generator
def parse(self, response): data = json.loads(response.body) if data['code'] != 0: api_error({'url': response.request.url, 'response': response.body}) self.logger.error('{} error {}'.format(response.request.url, response.body)) return for item in data['data']['list']: yield TwitterUserTimelineRequest( screen_name=item['account'], count=self.count, since_id=item['last_social_content_id'], callback=self.parse_twitter_time_line, errback=self.parse_twitter_error, meta={ 'social_id': item['id'], 'last_content_id': item['last_social_content_id'], 'screen_name': item['account'] }) next_page_generator = self.yield_next_page_request(response, data) if next_page_generator is not None: yield next_page_generator
def parse_error(self, response): api_error({'url': response.request.url})
def parse_error(self, response): # self.logger.error('error url {}, error{}'.format(response.request.url, repr(response))) api_error({'url': response.request.url})