Exemple #1
0
    def next(self):
        """Fetch a set of items with IDs less than current set."""
        if self.limit and self.limit == self.num_tweets:
            raise StopIteration

        if self.index >= len(self.results) - 1:
            data = self.method(max_id=self.max_id,
                               parser=RawParser(),
                               *self.args,
                               **self.kwargs)

            # This is a special invocation that returns the underlying
            # APIMethod class
            model = ModelParser().parse(self.method(create=True), data)
            result = self.method.__self__.parser.parse(
                self.method(create=True), data)

            if len(self.results) != 0:
                self.index += 1
            self.results.append(result)
            self.model_results.append(model)
        else:
            self.index += 1
            result = self.results[self.index]
            model = self.model_results[self.index]

        if len(result) == 0:
            raise StopIteration
        # TODO: Make this not dependant on the parser making max_id and
        # since_id available
        self.max_id = model.max_id
        self.num_tweets += 1
        return result
Exemple #2
0
def _get_tweepy_api() -> tweepy.API:
    """Return an authenticated tweepy api object configured for retries."""

    config = TopicsBaseConfig()
    twitter_config = config.twitter_api()

    auth = tweepy.OAuthHandler(twitter_config.consumer_key(), twitter_config.consumer_secret())
    auth.set_access_token(twitter_config.access_token(), twitter_config.access_token_secret())

    # the RawParser lets us directly decode from json to dict below
    api = tweepy.API(
        auth_handler=auth,
        retry_delay=TWITTER_RETRY_DELAY,
        retry_count=TWITTER_RETRY_COUNT,
        retry_errors=TWITTER_RETRY_ERRORS,
        wait_on_rate_limit=True,
        wait_on_rate_limit_notify=True,
        parser=RawParser())

    return api
Exemple #3
0
    def next(self):
        """Fetch a set of items with IDs less than current set."""
        if self.num_tweets >= self.limit:
            raise StopIteration

        if self.index >= len(self.results) - 1:
            data = self.method(max_id=self.max_id,
                               parser=RawParser(),
                               *self.args,
                               **self.kwargs)

            model = ModelParser().parse(data,
                                        api=self.method.__self__,
                                        payload_list=self.method.payload_list,
                                        payload_type=self.method.payload_type)
            result = self.method.__self__.parser.parse(
                data,
                api=self.method.__self__,
                payload_list=self.method.payload_list,
                payload_type=self.method.payload_type)

            if len(self.results) != 0:
                self.index += 1
            self.results.append(result)
            self.model_results.append(model)
        else:
            self.index += 1
            result = self.results[self.index]
            model = self.model_results[self.index]

        if len(result) == 0:
            raise StopIteration
        # TODO: Make this not dependant on the parser making max_id and
        # since_id available
        self.max_id = model.max_id
        self.num_tweets += 1
        return result
Exemple #4
0
def collect_tweets():
    """Collect new tweets about Firefox."""
    with statsd.timer('customercare.tweets.time_elapsed'):
        auth = tweepy.OAuthHandler(settings.TWITTER_CONSUMER_KEY,
                                   settings.TWITTER_CONSUMER_SECRET,
                                   secure=True)

        auth.set_access_token(settings.TWITTER_ACCESS_TOKEN,
                              settings.TWITTER_ACCESS_TOKEN_SECRET)

        api = tweepy.API(auth, parser=RawParser())

        search_options = {
            'q': 'firefox OR #fxinput',
            'rpp': settings.CC_TWEETS_PERPAGE,  # Items per page.
            'result_type': 'recent',  # Retrieve tweets by date.
        }

        # If we already have some tweets, collect nothing older than what we have.
        try:
            latest_tweet = Tweet.latest()
        except Tweet.DoesNotExist:
            log.debug('No existing tweets. Retrieving %d tweets from search.' %
                      (settings.CC_TWEETS_PERPAGE))
        else:
            search_options['since_id'] = latest_tweet.tweet_id
            log.info('Retrieving tweets with id >= %s' % latest_tweet.tweet_id)

        # Retrieve Tweets
        try:
            raw_data = json.loads(str(api.search(**search_options)))
        except tweepy.TweepError, e:
            log.warning('Twitter request failed: %s' % e)
            return

        if not ('results' in raw_data and raw_data['results']):
            # Twitter returned 0 results.
            return

        # Drop tweets into DB
        for item in raw_data['results']:
            # Apply filters to tweet before saving
            # Allow links in #fxinput tweets
            statsd.incr('customercare.tweet.collected')
            item = _filter_tweet(item, allow_links='#fxinput' in item['text'])
            if not item:
                continue

            created_date = datetime.utcfromtimestamp(
                calendar.timegm(rfc822.parsedate(item['created_at'])))

            item_lang = item.get('iso_language_code', 'en')
            tweet = Tweet(tweet_id=item['id'],
                          raw_json=json.dumps(item),
                          locale=item_lang,
                          created=created_date)
            try:
                tweet.save()
                statsd.incr('customercare.tweet.saved')
            except IntegrityError:
                pass
Exemple #5
0
fhLog = codecs.open("LOG.txt",'a','UTF-8')
def logPrint(s):
	fhLog.write("%s\n"%s)
	print s

#Update this line with the terms you want to search for
terms =  ["term1","term2","term3"]


from auth import TwitterAuth

auth = tweepy.OAuthHandler(TwitterAuth.consumer_key, TwitterAuth.consumer_secret)
auth.set_access_token(TwitterAuth.access_token, TwitterAuth.access_token_secret)

rawParser = RawParser()
api = tweepy.API(auth_handler=auth, parser=rawParser)

fhOverall=None
allTweets = {}

termCnt=0
for term in terms:
	termCnt+=1
	logPrint("Getting term %s (%s of %s)"%(term,termCnt,len(terms)))
	minid=None #Lowest id we've seen so far, start at None
	count=1
	while True:
		try:
			fh=open("output/"+term+"_" + str(count) + ".json","r")
			result=fh.read()
Exemple #6
0
    def upload_chunked(self, filename, *args, **kwargs):
        """ :reference https://dev.twitter.com/rest/reference/post/media/upload-chunked
            :allowed_param:
        """
        f = kwargs.pop('file', None)

        # Media category is dependant on whether media is attached to a tweet
        # or to a direct message. Assume tweet by default.
        is_direct_message = kwargs.pop('is_direct_message', False)

        # Initialize upload (Twitter cannot handle videos > 15 MB)
        headers, post_data, fp = API._chunk_media(
            'init',
            filename,
            self.max_size_chunked,
            form_field='media',
            f=f,
            is_direct_message=is_direct_message)
        kwargs.update({'headers': headers, 'post_data': post_data})

        # Send the INIT request
        media_info = bind_api(api=self,
                              path='/media/upload.json',
                              method='POST',
                              payload_type='media',
                              allowed_param=[],
                              require_auth=True,
                              upload_api=True)(*args, **kwargs)

        # If a media ID has been generated, we can send the file
        if media_info.media_id:
            # default chunk size is 1MB, can be overridden with keyword argument.
            # minimum chunk size is 16K, which keeps the maximum number of chunks under 999
            chunk_size = kwargs.pop('chunk_size', 1024 * 1024)
            chunk_size = max(chunk_size, 16 * 2014)

            fsize = getfilesize(filename, f)
            nloops = int(
                fsize / chunk_size) + (1 if fsize % chunk_size > 0 else 0)
            for i in range(nloops):
                headers, post_data, fp = API._chunk_media(
                    'append',
                    filename,
                    self.max_size_chunked,
                    chunk_size=chunk_size,
                    f=fp,
                    media_id=media_info.media_id,
                    segment_index=i,
                    is_direct_message=is_direct_message)
                kwargs.update({
                    'headers': headers,
                    'post_data': post_data,
                    'parser': RawParser()
                })
                # The APPEND command returns an empty response body
                bind_api(api=self,
                         path='/media/upload.json',
                         method='POST',
                         payload_type='media',
                         allowed_param=[],
                         require_auth=True,
                         upload_api=True)(*args, **kwargs)
            # When all chunks have been sent, we can finalize.
            headers, post_data, fp = API._chunk_media(
                'finalize',
                filename,
                self.max_size_chunked,
                media_id=media_info.media_id,
                is_direct_message=is_direct_message)
            kwargs = {'headers': headers, 'post_data': post_data}

            # The FINALIZE command returns media information
            return bind_api(api=self,
                            path='/media/upload.json',
                            method='POST',
                            payload_type='media',
                            allowed_param=[],
                            require_auth=True,
                            upload_api=True)(*args, **kwargs)
        else:
            return media_info