예제 #1
0
파일: __init__.py 프로젝트: rybo449/tlc
class TwitterService(object):
    def __init__(self):
        self.properties = TwitterSearchOrder()
        self.properties.set_keywords([''])
        self.properties.set_count(100)
        self.properties.set_include_entities(False)
        self.api = TwitterSearch(consumer_key=DATASOURCES['twitter']['api_key'],
                                 consumer_secret=DATASOURCES['twitter']['api_secret'],
                                 access_token=DATASOURCES['twitter']['access_token'],
                                 access_token_secret=DATASOURCES['twitter']['access_token_secret'])

    def get_tweets(self, lat, lng):
        self.properties.set_geocode(float(lat), float(lng), 1)
        response = self.api.search_tweets(self.properties)
        return response
예제 #2
0
class TwitterService(object):
    def __init__(self):
        self.properties = TwitterSearchOrder()
        self.properties.set_keywords([''])
        self.properties.set_count(100)
        self.properties.set_include_entities(False)
        self.api = TwitterSearch(
            consumer_key=DATASOURCES['twitter']['api_key'],
            consumer_secret=DATASOURCES['twitter']['api_secret'],
            access_token=DATASOURCES['twitter']['access_token'],
            access_token_secret=DATASOURCES['twitter']['access_token_secret'])

    def get_tweets(self, lat, lng):
        self.properties.set_geocode(float(lat), float(lng), 1)
        response = self.api.search_tweets(self.properties)
        return response
예제 #3
0
def getTweets(politician_id, searchOnlySexistWords):
    try:

        politician = Politician.objects.get(id=politician_id)
        politician_names = [
            politician.first_name + " " + politician.last_name,
            politician.username
        ]

        tso = TwitterSearchOrder()
        searchTerms = []

        if searchOnlySexistWords:
            sexistWords = CONFIG['SEXISTWORDS']
            for word in sexistWords:
                for politician_name in politician_names:
                    searchTerms.append(word + ' ' + politician_name)
        elif searchOnlySexistWords is False:
            searchTerms = politician_names

        tso.set_keywords(searchTerms, or_operator=True)
        tso.set_language("en")
        tso.set_include_entities(False)
        querystr = tso.create_search_url()
        tso.set_search_url(querystr + "&tweet_mode=extended")
        ts = TwitterSearch(consumer_key=CONFIG['CONSUMER_KEY'],
                           consumer_secret=CONFIG['CONSUMER_SECRET'],
                           access_token=CONFIG['ACCESS_TOKEN'],
                           access_token_secret=CONFIG['ACCESS_TOKEN_SECRET'])

        print("**Processing tweets for " +
              str(politician.first_name + " " + politician.last_name) + "**")
        if searchOnlySexistWords:
            tweets = ts.search_tweets_iterable(tso)
            return tweets
        else:
            # will limit to 100 if not only searching sexist words
            tweets = ts.search_tweets(tso)
            return tweets['content']['statuses']

    except TwitterSearchException as e:
        logging.exception("Unable to get new tweets because of" + str(e))
예제 #4
0
class TwitterSearchSensor(PollingSensor):
    def __init__(self, sensor_service, config=None, poll_interval=None):
        super(TwitterSearchSensor, self).__init__(sensor_service=sensor_service,
                                                  config=config,
                                                  poll_interval=poll_interval)
        self._trigger_ref = 'twitter.matched_tweet'
        self._logger = self._sensor_service.get_logger(__name__)

    def setup(self):
        self._client = TwitterSearch(
            consumer_key=self._config['consumer_key'],
            consumer_secret=self._config['consumer_secret'],
            access_token=self._config['access_token'],
            access_token_secret=self._config['access_token_secret']
        )
        self._last_id = None

        if type(self._config['query']) is not list:
            self._logger.exception('Twitter sensor failed. "query" config \
                                    value is not a list')
            raise ValueError('[TwitterSearchSensor]: "query" is not a list')

    def poll(self):
        tso = TwitterSearchOrder()
        tso.set_keywords(self._config['query'], True)

        language = self._config.get('language', None)
        if language:
            tso.set_language(language)

        tso.set_result_type('recent')
        tso.set_count(self._config.get('count', 30))
        tso.set_include_entities(False)

        last_id = self._get_last_id()

        if last_id:
            tso.set_since_id(int(last_id))

        try:
            tweets = self._client.search_tweets(tso)
            tweets = tweets['content']['statuses']
        except Exception as e:
            self._logger.exception('Polling Twitter failed: %s' % (str(e)))
            return

        tweets = list(reversed(tweets))

        if tweets:
            self._set_last_id(last_id=tweets[-1]['id'])

        for tweet in tweets:
            self._dispatch_trigger_for_tweet(tweet=tweet)

    def cleanup(self):
        pass

    def add_trigger(self, trigger):
        pass

    def update_trigger(self, trigger):
        pass

    def remove_trigger(self, trigger):
        pass

    def _get_last_id(self):
        if not self._last_id and hasattr(self._sensor_service, 'get_value'):
            self._last_id = self._sensor_service.get_value(name='last_id')

        return self._last_id

    def _set_last_id(self, last_id):
        self._last_id = last_id

        if hasattr(self._sensor_service, 'set_value'):
            self._sensor_service.set_value(name='last_id', value=last_id)

    def _dispatch_trigger_for_tweet(self, tweet):
        trigger = self._trigger_ref

        url = '%s/%s/status/%s' % (BASE_URL, tweet['user']['screen_name'], tweet['id'])
        payload = {
            'id': tweet['id'],
            'created_at': tweet['created_at'],
            'lang': tweet['lang'],
            'place': tweet['place'],
            'retweet_count': tweet['retweet_count'],
            'favorite_count': tweet['favorite_count'],
            'user': {
                'screen_name': tweet['user']['screen_name'],
                'name': tweet['user']['name'],
                'location': tweet['user']['location'],
                'description': tweet['user']['description'],
            },
            'text': tweet['text'],
            'url': url
        }
        self._sensor_service.dispatch(trigger=trigger, payload=payload)
예제 #5
0
def twitter_search(params, start_time):
    """
    Retrieves most recent tweets since yesterday based on keywords.
    Retrieves as many tweets as api gives, up to the maximum set by max_tweets.
    :param params: The keywords to search for, formatted as list of 
    strings. To search for a url, use this syntax:
        "url:\"gizmodo com\""
    in which the domain is separated by spaces instead of dots and the 
    internal quotes are escaped with backspaces.
    :return: Returns list of dicts containing:
      - tweets: the number of tweets, since yesterday, about the specified
      keywords (up to a maximum count of max_tweets)
      - tweets_followers: the number of (unique) followers of those tweets
      (i.e., if the same person tweets ten times in one day, that person's
      followers are counted once, not ten times).
      - most_followed_name: the name of the tweeter who tweeted in 'tweets'
      (above) who has the most followers
      - most_followed_count: the count of the number of followers who follow
      the tweeter with the most followers
    """
    print('starting twitter_search')
    # Set up flow control variables.
    max_tweets = 10000  # maximum number of tweets to retrieve from api
    more_tweets = True  # are there more tweets to retrieve?
    need_to_sleep = False  # tells to sleep (if approaching api rate limit)

    error = 'ok'

    try:
        # create TwitterSearch object using this app's tokens.
        ts = TwitterSearch(
            consumer_key=tw.CONSUMER_KEY,
            consumer_secret=tw.CONSUMER_SECRET,
            access_token=tw.ACCESS_TOKEN,
            access_token_secret=tw.ACCESS_TOKEN_SECRET
        )

        # Create a TwitterSearchOrder object and add keywords to it.
        tso = TwitterSearchOrder()
        for param in params:
            tso.add_keyword(param)
        # Only search for tweets since yesterday (in UTC).
        yesterday = datetime.datetime.utcnow().date() - datetime.timedelta(1)
        tso.set_since(yesterday)

        # Set up counter variables.
        tweets = 0  # count of tweets about keywords, since yesterday
        unique_tweeters = {}  # dict of unique tweeters about keywords
        tweets_followers = 0  # count of followers of unique_tweeters
        min_id = 0  # next tweet for paginated results, when multiple api calls
        max_followers = (0, 'null')  # the tweeter with the most followers

        # Keep calling the api (for paginated results) until there are no
        # more tweets to retrieve, or until max_tweets limit has been reached.
        while more_tweets and tweets < max_tweets:
            # Sleep for 60 seconds, if needed, to avoid hitting api limit.
            if need_to_sleep:
                print("rate limit:", rate_limit)
                time.sleep(60)
            # Call the search api.
            response = ts.search_tweets(tso)
            # Are there no more tweets to retrieve?
            if len(response["content"]["statuses"]) == 0:
                more_tweets = False
            else:  # there are more tweets to retrieve
                # Iterate through the batch of tweets retrieved from this
                # api call. Count the tweet and track all the unique tweeters.
                for tweet in response["content"]["statuses"]:
                    if tweets > max_tweets:
                        break  # stop counting/tracking if reached max_tweets
                    tweets += 1
                    if (min_id == 0) or (tweet["id"] < min_id):
                        # Set min_id to the id of this tweet. The api returns
                        # tweets in reverse chronological order (most recent is
                        # first), so min_id is a lowering "ceiling" of which
                        # tweet id to start from during subsequent api call.
                        min_id = tweet["id"]
                    # Can uncomment the following lines to see who is tweeting.
                    # print(str(tweets) + "\t" + str(tweet["id"])
                    #       + "\t" + tweet["user"]["screen_name"]
                    #       + "\t" + str(tweet["user"]["followers_count"]))
                    if tweet["user"]["screen_name"] not in unique_tweeters:
                        tweeter = tweet["user"]["screen_name"]
                        tweeters_followers = tweet["user"]["followers_count"]
                        # Add tweet's screen_name and followers_count to
                        # unique_tweeters, iff this is first time seeing
                        # this screen_name.
                        unique_tweeters[tweeter] = tweeters_followers
                # Set the next paginated result's start point (subtract one
                # to avoid retrieving the last tweet from this batch twice).
                tso.set_max_id(min_id - 1)
            # If less than 15 api calls remaining then sleep during next loop.
            # (Search api free tier allows 180 calls per 15 minute period.)
            rate_limit = int(ts.get_metadata()["x-rate-limit-remaining"])
            if rate_limit < 15:
                need_to_sleep = True
            else:
                need_to_sleep = False
        # After all tweets have been retrieved (up to max_tweets), calculate
        # metrics on the followers of the tweeters in unique_tweeters.
        for tweeter in unique_tweeters:
            # Count how many followers there are in all the unique_tweeters.
            tweets_followers += unique_tweeters[tweeter]
            # Determine which tweeter from unique_tweeters has most followers.
            if unique_tweeters[tweeter] > max_followers[0]:
                max_followers = (unique_tweeters[tweeter], tweeter)

    except TwitterSearchException as e:
        tweets = None
        tweets_followers = None
        error = format_exception(ValueError, e, e.__traceback__)

    tweets = make_dict(
        value=tweets,
        data_name='tweets',
        start_time=start_time,
        status=error
    )

    tweets_followers = make_dict(
        value=tweets_followers,
        data_name='tweets_followers',
        start_time=start_time,
        status=error
    )

    most_followed_name = make_dict(
        value=escape(max_followers[1], True),
        data_name='most_followed_name',
        start_time=start_time,
        status=error
    )

    most_followed_count = make_dict(
        value=max_followers[0],
        data_name='most_followed_count',
        start_time=start_time,
        status=error
    )

    return [tweets, tweets_followers, most_followed_name, most_followed_count]
예제 #6
0
class TwitterSearchSensor(PollingSensor):
    def __init__(self, sensor_service, config=None, poll_interval=None):
        super(TwitterSearchSensor,
              self).__init__(sensor_service=sensor_service,
                             config=config,
                             poll_interval=poll_interval)
        self._trigger_ref = 'twitter.matched_tweet'
        self._logger = self._sensor_service.get_logger(__name__)

    def setup(self):
        self._client = TwitterSearch(
            consumer_key=self._config['consumer_key'],
            consumer_secret=self._config['consumer_secret'],
            access_token=self._config['access_token'],
            access_token_secret=self._config['access_token_secret'])
        self._last_id = None

        if type(self._config['query']) is not list:
            self._logger.exception('Twitter sensor failed. "query" config \
                                    value is not a list')
            raise ValueError('[TwitterSearchSensor]: "query" is not a list')

    def poll(self):
        tso = TwitterSearchOrder()
        tso.set_keywords(self._config['query'])

        language = self._config.get('language', None)
        if language:
            tso.set_language(language)

        tso.set_result_type('recent')
        tso.set_count(self._config.get('count', 30))
        tso.set_include_entities(False)

        last_id = self._get_last_id()

        if last_id:
            tso.set_since_id(int(last_id))

        try:
            tweets = self._client.search_tweets(tso)
            tweets = tweets['content']['statuses']
        except Exception as e:
            self._logger.exception('Polling Twitter failed: %s' % (str(e)))
            return

        tweets = list(reversed(tweets))

        if tweets:
            self._set_last_id(last_id=tweets[-1]['id'])

        for tweet in tweets:
            self._dispatch_trigger_for_tweet(tweet=tweet)

    def cleanup(self):
        pass

    def add_trigger(self, trigger):
        pass

    def update_trigger(self, trigger):
        pass

    def remove_trigger(self, trigger):
        pass

    def _get_last_id(self):
        if not self._last_id and hasattr(self._sensor_service, 'get_value'):
            self._last_id = self._sensor_service.get_value(name='last_id')

        return self._last_id

    def _set_last_id(self, last_id):
        self._last_id = last_id

        if hasattr(self._sensor_service, 'set_value'):
            self._sensor_service.set_value(name='last_id', value=last_id)

    def _dispatch_trigger_for_tweet(self, tweet):
        trigger = self._trigger_ref

        url = '%s/%s/status/%s' % (BASE_URL, tweet['user']['screen_name'],
                                   tweet['id'])
        payload = {
            'id': tweet['id'],
            'created_at': tweet['created_at'],
            'lang': tweet['lang'],
            'place': tweet['place'],
            'retweet_count': tweet['retweet_count'],
            'favorite_count': tweet['favorite_count'],
            'user': {
                'screen_name': tweet['user']['screen_name'],
                'name': tweet['user']['name'],
                'location': tweet['user']['location'],
                'description': tweet['user']['description'],
            },
            'text': tweet['text'],
            'url': url
        }
        self._sensor_service.dispatch(trigger=trigger, payload=payload)
class TwitterSearchSensor(PollingSensor):
    def __init__(self, sensor_service, config=None, poll_interval=None):
        super(TwitterSearchSensor, self).__init__(
            sensor_service=sensor_service, config=config, poll_interval=poll_interval
        )
        self._trigger_ref = "twitter.matched_tweet"
        self._logger = self._sensor_service.get_logger(__name__)

    def setup(self):
        self._client = TwitterSearch(
            consumer_key=self._config["consumer_key"],
            consumer_secret=self._config["consumer_secret"],
            access_token=self._config["access_token"],
            access_token_secret=self._config["access_token_secret"],
        )
        self._last_id = None

        if type(self._config["query"]) is not list:
            self._logger.exception(
                'Twitter sensor failed. "query" config \
                                    value is not a list'
            )
            raise ValueError('[TwitterSearchSensor]: "query" is not a list')

    def poll(self):
        tso = TwitterSearchOrder()
        tso.set_keywords(self._config["query"])

        language = self._config.get("language", None)
        if language:
            tso.set_language(language)

        tso.set_result_type("recent")
        tso.set_count(self._config.get("count", 30))
        tso.set_include_entities(False)

        last_id = self._get_last_id()

        if last_id:
            tso.set_since_id(int(last_id))

        try:
            tweets = self._client.search_tweets(tso)
            tweets = tweets["content"]["statuses"]
        except Exception as e:
            self._logger.exception("Polling Twitter failed: %s" % (str(e)))
            return

        tweets = list(reversed(tweets))

        if tweets:
            self._set_last_id(last_id=tweets[-1]["id"])

        for tweet in tweets:
            self._dispatch_trigger_for_tweet(tweet=tweet)

    def cleanup(self):
        pass

    def add_trigger(self, trigger):
        pass

    def update_trigger(self, trigger):
        pass

    def remove_trigger(self, trigger):
        pass

    def _get_last_id(self):
        if not self._last_id and hasattr(self._sensor_service, "get_value"):
            self._last_id = self._sensor_service.get_value(name="last_id")

        return self._last_id

    def _set_last_id(self, last_id):
        self._last_id = last_id

        if hasattr(self._sensor_service, "set_value"):
            self._sensor_service.set_value(name="last_id", value=last_id)

    def _dispatch_trigger_for_tweet(self, tweet):
        trigger = self._trigger_ref

        url = "%s/%s/status/%s" % (BASE_URL, tweet["user"]["screen_name"], tweet["id"])
        payload = {
            "id": tweet["id"],
            "created_at": tweet["created_at"],
            "lang": tweet["lang"],
            "place": tweet["place"],
            "retweet_count": tweet["retweet_count"],
            "favorite_count": tweet["favorite_count"],
            "user": {
                "screen_name": tweet["user"]["screen_name"],
                "name": tweet["user"]["name"],
                "location": tweet["user"]["location"],
                "description": tweet["user"]["description"],
            },
            "text": tweet["text"],
            "url": url,
        }
        self._sensor_service.dispatch(trigger=trigger, payload=payload)