class TwitterService(object): def __init__(self): self.properties = TwitterSearchOrder() self.properties.set_keywords(['']) self.properties.set_count(100) self.properties.set_include_entities(False) self.api = TwitterSearch(consumer_key=DATASOURCES['twitter']['api_key'], consumer_secret=DATASOURCES['twitter']['api_secret'], access_token=DATASOURCES['twitter']['access_token'], access_token_secret=DATASOURCES['twitter']['access_token_secret']) def get_tweets(self, lat, lng): self.properties.set_geocode(float(lat), float(lng), 1) response = self.api.search_tweets(self.properties) return response
class TwitterService(object): def __init__(self): self.properties = TwitterSearchOrder() self.properties.set_keywords(['']) self.properties.set_count(100) self.properties.set_include_entities(False) self.api = TwitterSearch( consumer_key=DATASOURCES['twitter']['api_key'], consumer_secret=DATASOURCES['twitter']['api_secret'], access_token=DATASOURCES['twitter']['access_token'], access_token_secret=DATASOURCES['twitter']['access_token_secret']) def get_tweets(self, lat, lng): self.properties.set_geocode(float(lat), float(lng), 1) response = self.api.search_tweets(self.properties) return response
def getTweets(politician_id, searchOnlySexistWords): try: politician = Politician.objects.get(id=politician_id) politician_names = [ politician.first_name + " " + politician.last_name, politician.username ] tso = TwitterSearchOrder() searchTerms = [] if searchOnlySexistWords: sexistWords = CONFIG['SEXISTWORDS'] for word in sexistWords: for politician_name in politician_names: searchTerms.append(word + ' ' + politician_name) elif searchOnlySexistWords is False: searchTerms = politician_names tso.set_keywords(searchTerms, or_operator=True) tso.set_language("en") tso.set_include_entities(False) querystr = tso.create_search_url() tso.set_search_url(querystr + "&tweet_mode=extended") ts = TwitterSearch(consumer_key=CONFIG['CONSUMER_KEY'], consumer_secret=CONFIG['CONSUMER_SECRET'], access_token=CONFIG['ACCESS_TOKEN'], access_token_secret=CONFIG['ACCESS_TOKEN_SECRET']) print("**Processing tweets for " + str(politician.first_name + " " + politician.last_name) + "**") if searchOnlySexistWords: tweets = ts.search_tweets_iterable(tso) return tweets else: # will limit to 100 if not only searching sexist words tweets = ts.search_tweets(tso) return tweets['content']['statuses'] except TwitterSearchException as e: logging.exception("Unable to get new tweets because of" + str(e))
class TwitterSearchSensor(PollingSensor): def __init__(self, sensor_service, config=None, poll_interval=None): super(TwitterSearchSensor, self).__init__(sensor_service=sensor_service, config=config, poll_interval=poll_interval) self._trigger_ref = 'twitter.matched_tweet' self._logger = self._sensor_service.get_logger(__name__) def setup(self): self._client = TwitterSearch( consumer_key=self._config['consumer_key'], consumer_secret=self._config['consumer_secret'], access_token=self._config['access_token'], access_token_secret=self._config['access_token_secret'] ) self._last_id = None if type(self._config['query']) is not list: self._logger.exception('Twitter sensor failed. "query" config \ value is not a list') raise ValueError('[TwitterSearchSensor]: "query" is not a list') def poll(self): tso = TwitterSearchOrder() tso.set_keywords(self._config['query'], True) language = self._config.get('language', None) if language: tso.set_language(language) tso.set_result_type('recent') tso.set_count(self._config.get('count', 30)) tso.set_include_entities(False) last_id = self._get_last_id() if last_id: tso.set_since_id(int(last_id)) try: tweets = self._client.search_tweets(tso) tweets = tweets['content']['statuses'] except Exception as e: self._logger.exception('Polling Twitter failed: %s' % (str(e))) return tweets = list(reversed(tweets)) if tweets: self._set_last_id(last_id=tweets[-1]['id']) for tweet in tweets: self._dispatch_trigger_for_tweet(tweet=tweet) def cleanup(self): pass def add_trigger(self, trigger): pass def update_trigger(self, trigger): pass def remove_trigger(self, trigger): pass def _get_last_id(self): if not self._last_id and hasattr(self._sensor_service, 'get_value'): self._last_id = self._sensor_service.get_value(name='last_id') return self._last_id def _set_last_id(self, last_id): self._last_id = last_id if hasattr(self._sensor_service, 'set_value'): self._sensor_service.set_value(name='last_id', value=last_id) def _dispatch_trigger_for_tweet(self, tweet): trigger = self._trigger_ref url = '%s/%s/status/%s' % (BASE_URL, tweet['user']['screen_name'], tweet['id']) payload = { 'id': tweet['id'], 'created_at': tweet['created_at'], 'lang': tweet['lang'], 'place': tweet['place'], 'retweet_count': tweet['retweet_count'], 'favorite_count': tweet['favorite_count'], 'user': { 'screen_name': tweet['user']['screen_name'], 'name': tweet['user']['name'], 'location': tweet['user']['location'], 'description': tweet['user']['description'], }, 'text': tweet['text'], 'url': url } self._sensor_service.dispatch(trigger=trigger, payload=payload)
def twitter_search(params, start_time): """ Retrieves most recent tweets since yesterday based on keywords. Retrieves as many tweets as api gives, up to the maximum set by max_tweets. :param params: The keywords to search for, formatted as list of strings. To search for a url, use this syntax: "url:\"gizmodo com\"" in which the domain is separated by spaces instead of dots and the internal quotes are escaped with backspaces. :return: Returns list of dicts containing: - tweets: the number of tweets, since yesterday, about the specified keywords (up to a maximum count of max_tweets) - tweets_followers: the number of (unique) followers of those tweets (i.e., if the same person tweets ten times in one day, that person's followers are counted once, not ten times). - most_followed_name: the name of the tweeter who tweeted in 'tweets' (above) who has the most followers - most_followed_count: the count of the number of followers who follow the tweeter with the most followers """ print('starting twitter_search') # Set up flow control variables. max_tweets = 10000 # maximum number of tweets to retrieve from api more_tweets = True # are there more tweets to retrieve? need_to_sleep = False # tells to sleep (if approaching api rate limit) error = 'ok' try: # create TwitterSearch object using this app's tokens. ts = TwitterSearch( consumer_key=tw.CONSUMER_KEY, consumer_secret=tw.CONSUMER_SECRET, access_token=tw.ACCESS_TOKEN, access_token_secret=tw.ACCESS_TOKEN_SECRET ) # Create a TwitterSearchOrder object and add keywords to it. tso = TwitterSearchOrder() for param in params: tso.add_keyword(param) # Only search for tweets since yesterday (in UTC). yesterday = datetime.datetime.utcnow().date() - datetime.timedelta(1) tso.set_since(yesterday) # Set up counter variables. tweets = 0 # count of tweets about keywords, since yesterday unique_tweeters = {} # dict of unique tweeters about keywords tweets_followers = 0 # count of followers of unique_tweeters min_id = 0 # next tweet for paginated results, when multiple api calls max_followers = (0, 'null') # the tweeter with the most followers # Keep calling the api (for paginated results) until there are no # more tweets to retrieve, or until max_tweets limit has been reached. while more_tweets and tweets < max_tweets: # Sleep for 60 seconds, if needed, to avoid hitting api limit. if need_to_sleep: print("rate limit:", rate_limit) time.sleep(60) # Call the search api. response = ts.search_tweets(tso) # Are there no more tweets to retrieve? if len(response["content"]["statuses"]) == 0: more_tweets = False else: # there are more tweets to retrieve # Iterate through the batch of tweets retrieved from this # api call. Count the tweet and track all the unique tweeters. for tweet in response["content"]["statuses"]: if tweets > max_tweets: break # stop counting/tracking if reached max_tweets tweets += 1 if (min_id == 0) or (tweet["id"] < min_id): # Set min_id to the id of this tweet. The api returns # tweets in reverse chronological order (most recent is # first), so min_id is a lowering "ceiling" of which # tweet id to start from during subsequent api call. min_id = tweet["id"] # Can uncomment the following lines to see who is tweeting. # print(str(tweets) + "\t" + str(tweet["id"]) # + "\t" + tweet["user"]["screen_name"] # + "\t" + str(tweet["user"]["followers_count"])) if tweet["user"]["screen_name"] not in unique_tweeters: tweeter = tweet["user"]["screen_name"] tweeters_followers = tweet["user"]["followers_count"] # Add tweet's screen_name and followers_count to # unique_tweeters, iff this is first time seeing # this screen_name. unique_tweeters[tweeter] = tweeters_followers # Set the next paginated result's start point (subtract one # to avoid retrieving the last tweet from this batch twice). tso.set_max_id(min_id - 1) # If less than 15 api calls remaining then sleep during next loop. # (Search api free tier allows 180 calls per 15 minute period.) rate_limit = int(ts.get_metadata()["x-rate-limit-remaining"]) if rate_limit < 15: need_to_sleep = True else: need_to_sleep = False # After all tweets have been retrieved (up to max_tweets), calculate # metrics on the followers of the tweeters in unique_tweeters. for tweeter in unique_tweeters: # Count how many followers there are in all the unique_tweeters. tweets_followers += unique_tweeters[tweeter] # Determine which tweeter from unique_tweeters has most followers. if unique_tweeters[tweeter] > max_followers[0]: max_followers = (unique_tweeters[tweeter], tweeter) except TwitterSearchException as e: tweets = None tweets_followers = None error = format_exception(ValueError, e, e.__traceback__) tweets = make_dict( value=tweets, data_name='tweets', start_time=start_time, status=error ) tweets_followers = make_dict( value=tweets_followers, data_name='tweets_followers', start_time=start_time, status=error ) most_followed_name = make_dict( value=escape(max_followers[1], True), data_name='most_followed_name', start_time=start_time, status=error ) most_followed_count = make_dict( value=max_followers[0], data_name='most_followed_count', start_time=start_time, status=error ) return [tweets, tweets_followers, most_followed_name, most_followed_count]
class TwitterSearchSensor(PollingSensor): def __init__(self, sensor_service, config=None, poll_interval=None): super(TwitterSearchSensor, self).__init__(sensor_service=sensor_service, config=config, poll_interval=poll_interval) self._trigger_ref = 'twitter.matched_tweet' self._logger = self._sensor_service.get_logger(__name__) def setup(self): self._client = TwitterSearch( consumer_key=self._config['consumer_key'], consumer_secret=self._config['consumer_secret'], access_token=self._config['access_token'], access_token_secret=self._config['access_token_secret']) self._last_id = None if type(self._config['query']) is not list: self._logger.exception('Twitter sensor failed. "query" config \ value is not a list') raise ValueError('[TwitterSearchSensor]: "query" is not a list') def poll(self): tso = TwitterSearchOrder() tso.set_keywords(self._config['query']) language = self._config.get('language', None) if language: tso.set_language(language) tso.set_result_type('recent') tso.set_count(self._config.get('count', 30)) tso.set_include_entities(False) last_id = self._get_last_id() if last_id: tso.set_since_id(int(last_id)) try: tweets = self._client.search_tweets(tso) tweets = tweets['content']['statuses'] except Exception as e: self._logger.exception('Polling Twitter failed: %s' % (str(e))) return tweets = list(reversed(tweets)) if tweets: self._set_last_id(last_id=tweets[-1]['id']) for tweet in tweets: self._dispatch_trigger_for_tweet(tweet=tweet) def cleanup(self): pass def add_trigger(self, trigger): pass def update_trigger(self, trigger): pass def remove_trigger(self, trigger): pass def _get_last_id(self): if not self._last_id and hasattr(self._sensor_service, 'get_value'): self._last_id = self._sensor_service.get_value(name='last_id') return self._last_id def _set_last_id(self, last_id): self._last_id = last_id if hasattr(self._sensor_service, 'set_value'): self._sensor_service.set_value(name='last_id', value=last_id) def _dispatch_trigger_for_tweet(self, tweet): trigger = self._trigger_ref url = '%s/%s/status/%s' % (BASE_URL, tweet['user']['screen_name'], tweet['id']) payload = { 'id': tweet['id'], 'created_at': tweet['created_at'], 'lang': tweet['lang'], 'place': tweet['place'], 'retweet_count': tweet['retweet_count'], 'favorite_count': tweet['favorite_count'], 'user': { 'screen_name': tweet['user']['screen_name'], 'name': tweet['user']['name'], 'location': tweet['user']['location'], 'description': tweet['user']['description'], }, 'text': tweet['text'], 'url': url } self._sensor_service.dispatch(trigger=trigger, payload=payload)
class TwitterSearchSensor(PollingSensor): def __init__(self, sensor_service, config=None, poll_interval=None): super(TwitterSearchSensor, self).__init__( sensor_service=sensor_service, config=config, poll_interval=poll_interval ) self._trigger_ref = "twitter.matched_tweet" self._logger = self._sensor_service.get_logger(__name__) def setup(self): self._client = TwitterSearch( consumer_key=self._config["consumer_key"], consumer_secret=self._config["consumer_secret"], access_token=self._config["access_token"], access_token_secret=self._config["access_token_secret"], ) self._last_id = None if type(self._config["query"]) is not list: self._logger.exception( 'Twitter sensor failed. "query" config \ value is not a list' ) raise ValueError('[TwitterSearchSensor]: "query" is not a list') def poll(self): tso = TwitterSearchOrder() tso.set_keywords(self._config["query"]) language = self._config.get("language", None) if language: tso.set_language(language) tso.set_result_type("recent") tso.set_count(self._config.get("count", 30)) tso.set_include_entities(False) last_id = self._get_last_id() if last_id: tso.set_since_id(int(last_id)) try: tweets = self._client.search_tweets(tso) tweets = tweets["content"]["statuses"] except Exception as e: self._logger.exception("Polling Twitter failed: %s" % (str(e))) return tweets = list(reversed(tweets)) if tweets: self._set_last_id(last_id=tweets[-1]["id"]) for tweet in tweets: self._dispatch_trigger_for_tweet(tweet=tweet) def cleanup(self): pass def add_trigger(self, trigger): pass def update_trigger(self, trigger): pass def remove_trigger(self, trigger): pass def _get_last_id(self): if not self._last_id and hasattr(self._sensor_service, "get_value"): self._last_id = self._sensor_service.get_value(name="last_id") return self._last_id def _set_last_id(self, last_id): self._last_id = last_id if hasattr(self._sensor_service, "set_value"): self._sensor_service.set_value(name="last_id", value=last_id) def _dispatch_trigger_for_tweet(self, tweet): trigger = self._trigger_ref url = "%s/%s/status/%s" % (BASE_URL, tweet["user"]["screen_name"], tweet["id"]) payload = { "id": tweet["id"], "created_at": tweet["created_at"], "lang": tweet["lang"], "place": tweet["place"], "retweet_count": tweet["retweet_count"], "favorite_count": tweet["favorite_count"], "user": { "screen_name": tweet["user"]["screen_name"], "name": tweet["user"]["name"], "location": tweet["user"]["location"], "description": tweet["user"]["description"], }, "text": tweet["text"], "url": url, } self._sensor_service.dispatch(trigger=trigger, payload=payload)