def next(self): """Fetch a set of items with IDs less than current set.""" if self.limit and self.limit == self.num_tweets: raise StopIteration if self.index >= len(self.results) - 1: data = self.method(max_id=self.max_id, parser=RawParser(), *self.args, **self.kwargs) # This is a special invocation that returns the underlying # APIMethod class model = ModelParser().parse(self.method(create=True), data) result = self.method.__self__.parser.parse( self.method(create=True), data) if len(self.results) != 0: self.index += 1 self.results.append(result) self.model_results.append(model) else: self.index += 1 result = self.results[self.index] model = self.model_results[self.index] if len(result) == 0: raise StopIteration # TODO: Make this not dependant on the parser making max_id and # since_id available self.max_id = model.max_id self.num_tweets += 1 return result
def _get_tweepy_api() -> tweepy.API: """Return an authenticated tweepy api object configured for retries.""" config = TopicsBaseConfig() twitter_config = config.twitter_api() auth = tweepy.OAuthHandler(twitter_config.consumer_key(), twitter_config.consumer_secret()) auth.set_access_token(twitter_config.access_token(), twitter_config.access_token_secret()) # the RawParser lets us directly decode from json to dict below api = tweepy.API( auth_handler=auth, retry_delay=TWITTER_RETRY_DELAY, retry_count=TWITTER_RETRY_COUNT, retry_errors=TWITTER_RETRY_ERRORS, wait_on_rate_limit=True, wait_on_rate_limit_notify=True, parser=RawParser()) return api
def next(self): """Fetch a set of items with IDs less than current set.""" if self.num_tweets >= self.limit: raise StopIteration if self.index >= len(self.results) - 1: data = self.method(max_id=self.max_id, parser=RawParser(), *self.args, **self.kwargs) model = ModelParser().parse(data, api=self.method.__self__, payload_list=self.method.payload_list, payload_type=self.method.payload_type) result = self.method.__self__.parser.parse( data, api=self.method.__self__, payload_list=self.method.payload_list, payload_type=self.method.payload_type) if len(self.results) != 0: self.index += 1 self.results.append(result) self.model_results.append(model) else: self.index += 1 result = self.results[self.index] model = self.model_results[self.index] if len(result) == 0: raise StopIteration # TODO: Make this not dependant on the parser making max_id and # since_id available self.max_id = model.max_id self.num_tweets += 1 return result
def collect_tweets(): """Collect new tweets about Firefox.""" with statsd.timer('customercare.tweets.time_elapsed'): auth = tweepy.OAuthHandler(settings.TWITTER_CONSUMER_KEY, settings.TWITTER_CONSUMER_SECRET, secure=True) auth.set_access_token(settings.TWITTER_ACCESS_TOKEN, settings.TWITTER_ACCESS_TOKEN_SECRET) api = tweepy.API(auth, parser=RawParser()) search_options = { 'q': 'firefox OR #fxinput', 'rpp': settings.CC_TWEETS_PERPAGE, # Items per page. 'result_type': 'recent', # Retrieve tweets by date. } # If we already have some tweets, collect nothing older than what we have. try: latest_tweet = Tweet.latest() except Tweet.DoesNotExist: log.debug('No existing tweets. Retrieving %d tweets from search.' % (settings.CC_TWEETS_PERPAGE)) else: search_options['since_id'] = latest_tweet.tweet_id log.info('Retrieving tweets with id >= %s' % latest_tweet.tweet_id) # Retrieve Tweets try: raw_data = json.loads(str(api.search(**search_options))) except tweepy.TweepError, e: log.warning('Twitter request failed: %s' % e) return if not ('results' in raw_data and raw_data['results']): # Twitter returned 0 results. return # Drop tweets into DB for item in raw_data['results']: # Apply filters to tweet before saving # Allow links in #fxinput tweets statsd.incr('customercare.tweet.collected') item = _filter_tweet(item, allow_links='#fxinput' in item['text']) if not item: continue created_date = datetime.utcfromtimestamp( calendar.timegm(rfc822.parsedate(item['created_at']))) item_lang = item.get('iso_language_code', 'en') tweet = Tweet(tweet_id=item['id'], raw_json=json.dumps(item), locale=item_lang, created=created_date) try: tweet.save() statsd.incr('customercare.tweet.saved') except IntegrityError: pass
fhLog = codecs.open("LOG.txt",'a','UTF-8') def logPrint(s): fhLog.write("%s\n"%s) print s #Update this line with the terms you want to search for terms = ["term1","term2","term3"] from auth import TwitterAuth auth = tweepy.OAuthHandler(TwitterAuth.consumer_key, TwitterAuth.consumer_secret) auth.set_access_token(TwitterAuth.access_token, TwitterAuth.access_token_secret) rawParser = RawParser() api = tweepy.API(auth_handler=auth, parser=rawParser) fhOverall=None allTweets = {} termCnt=0 for term in terms: termCnt+=1 logPrint("Getting term %s (%s of %s)"%(term,termCnt,len(terms))) minid=None #Lowest id we've seen so far, start at None count=1 while True: try: fh=open("output/"+term+"_" + str(count) + ".json","r") result=fh.read()
def upload_chunked(self, filename, *args, **kwargs): """ :reference https://dev.twitter.com/rest/reference/post/media/upload-chunked :allowed_param: """ f = kwargs.pop('file', None) # Media category is dependant on whether media is attached to a tweet # or to a direct message. Assume tweet by default. is_direct_message = kwargs.pop('is_direct_message', False) # Initialize upload (Twitter cannot handle videos > 15 MB) headers, post_data, fp = API._chunk_media( 'init', filename, self.max_size_chunked, form_field='media', f=f, is_direct_message=is_direct_message) kwargs.update({'headers': headers, 'post_data': post_data}) # Send the INIT request media_info = bind_api(api=self, path='/media/upload.json', method='POST', payload_type='media', allowed_param=[], require_auth=True, upload_api=True)(*args, **kwargs) # If a media ID has been generated, we can send the file if media_info.media_id: # default chunk size is 1MB, can be overridden with keyword argument. # minimum chunk size is 16K, which keeps the maximum number of chunks under 999 chunk_size = kwargs.pop('chunk_size', 1024 * 1024) chunk_size = max(chunk_size, 16 * 2014) fsize = getfilesize(filename, f) nloops = int( fsize / chunk_size) + (1 if fsize % chunk_size > 0 else 0) for i in range(nloops): headers, post_data, fp = API._chunk_media( 'append', filename, self.max_size_chunked, chunk_size=chunk_size, f=fp, media_id=media_info.media_id, segment_index=i, is_direct_message=is_direct_message) kwargs.update({ 'headers': headers, 'post_data': post_data, 'parser': RawParser() }) # The APPEND command returns an empty response body bind_api(api=self, path='/media/upload.json', method='POST', payload_type='media', allowed_param=[], require_auth=True, upload_api=True)(*args, **kwargs) # When all chunks have been sent, we can finalize. headers, post_data, fp = API._chunk_media( 'finalize', filename, self.max_size_chunked, media_id=media_info.media_id, is_direct_message=is_direct_message) kwargs = {'headers': headers, 'post_data': post_data} # The FINALIZE command returns media information return bind_api(api=self, path='/media/upload.json', method='POST', payload_type='media', allowed_param=[], require_auth=True, upload_api=True)(*args, **kwargs) else: return media_info