Example #1
0
 def parse_twitter_time_line(self, response):
     account_id = response.request.meta['social_id']
     last_content_id = response.request.meta['last_content_id']
     account = response.request.meta['screen_name']
     # self.logger.info('twitter: {}, last_id: {}, count: {}'.format(account, last_content_id, len(response.tweets)))
     for tweet in response.tweets:
         item = self.format_request(to_item(tweet))
         if item['retweet_content'] is not None:
             item['retweet_content'] = json.dumps(item['retweet_content'])
         item['social_account_id'] = account_id
         # self.logger.info('post to prod %s', json.dumps({k: str(item[k]) for k in item}))
         r = requests.post(url=self.commit_url,
                           data={k: str(item[k])
                                 for k in item},
                           timeout=5)
         if r.status_code == 200:
             # todo 判断code是否成功,否则捕获api错误
             result = r.json()
             if int(result['code']) != 0:
                 api_error({
                     'url':
                     response.request.url,
                     'response':
                     json.dumps(r.json()),
                     'vars':
                     json.dumps({k: str(item[k])
                                 for k in item})
                 })
         else:
             # todo 捕获异常
             self.logger.error('{} - {}'.format(r.status_code, r.content))
Example #2
0
    def parse(self, response):
        data = json.loads(response.body)
        if data['code'] != 0:
            api_error({'url': response.request.url, 'response': response.body})
            self.logger.error('{} error {}'.format(response.request.url, response.body))
            return
        for item in data['data']['list']:
            if self.debug_screen is not None:
                if self.debug_screen != item['account']:
                    continue
                self.logger.info('debug screen {} {}'.format(self.debug_screen, item))

            kwargs = {
                'screen_name': item['account'],
                'count': self.count,
                'callback': self.parse_twitter_time_line,
                'errback': self.parse_twitter_error,
                'meta': {
                    'social_id': item['id'],
                    'last_content_id': item['last_social_content_id'],
                    'screen_name': item['account'],
                    'need_review': item['need_review']
                }
            }
            if str(item['last_social_content_id']) != '0':
                kwargs['since_id'] = item['last_social_content_id']
            yield TwitterUserTimelineRequest(**kwargs)
        next_page_generator = self.yield_next_page_request(response, data)
        if next_page_generator is not None:
            yield next_page_generator
Example #3
0
    def parse(self, response):
        data = json.loads(response.body)
        if data['code'] != 0:
            api_error({'url': response.request.url, 'response': response.body})
            self.logger.error('{} error {}'.format(response.request.url,
                                                   response.body))
            return
        for item in data['data']['list']:
            yield TwitterUserTimelineRequest(
                screen_name=item['account'],
                count=self.count,
                since_id=item['last_social_content_id'],
                callback=self.parse_twitter_time_line,
                errback=self.parse_twitter_error,
                meta={
                    'social_id': item['id'],
                    'last_content_id': item['last_social_content_id'],
                    'screen_name': item['account']
                })

        next_page_generator = self.yield_next_page_request(response, data)
        if next_page_generator is not None:
            yield next_page_generator
Example #4
0
 def parse_error(self, response):
     api_error({'url': response.request.url})
 def parse_error(self, response):
     # self.logger.error('error url {}, error{}'.format(response.request.url, repr(response)))
     api_error({'url': response.request.url})